faiss 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
@@ -0,0 +1,161 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#ifndef FAISS_INDEX_IVFPQ_H
|
11
|
+
#define FAISS_INDEX_IVFPQ_H
|
12
|
+
|
13
|
+
|
14
|
+
#include <vector>
|
15
|
+
|
16
|
+
#include <faiss/IndexIVF.h>
|
17
|
+
#include <faiss/IndexPQ.h>
|
18
|
+
|
19
|
+
|
20
|
+
namespace faiss {
|
21
|
+
|
22
|
+
struct IVFPQSearchParameters: IVFSearchParameters {
|
23
|
+
size_t scan_table_threshold; ///< use table computation or on-the-fly?
|
24
|
+
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
25
|
+
~IVFPQSearchParameters () {}
|
26
|
+
};
|
27
|
+
|
28
|
+
|
29
|
+
/** Inverted file with Product Quantizer encoding. Each residual
|
30
|
+
* vector is encoded as a product quantizer code.
|
31
|
+
*/
|
32
|
+
struct IndexIVFPQ: IndexIVF {
|
33
|
+
bool by_residual; ///< Encode residual or plain vector?
|
34
|
+
|
35
|
+
ProductQuantizer pq; ///< produces the codes
|
36
|
+
|
37
|
+
bool do_polysemous_training; ///< reorder PQ centroids after training?
|
38
|
+
PolysemousTraining *polysemous_training; ///< if NULL, use default
|
39
|
+
|
40
|
+
// search-time parameters
|
41
|
+
size_t scan_table_threshold; ///< use table computation or on-the-fly?
|
42
|
+
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
43
|
+
|
44
|
+
/** Precompute table that speed up query preprocessing at some
|
45
|
+
* memory cost
|
46
|
+
* =-1: force disable
|
47
|
+
* =0: decide heuristically (default: use tables only if they are
|
48
|
+
* < precomputed_tables_max_bytes)
|
49
|
+
* =1: tables that work for all quantizers (size 256 * nlist * M)
|
50
|
+
* =2: specific version for MultiIndexQuantizer (much more compact)
|
51
|
+
*/
|
52
|
+
int use_precomputed_table; ///< if by_residual, build precompute tables
|
53
|
+
static size_t precomputed_table_max_bytes;
|
54
|
+
|
55
|
+
/// if use_precompute_table
|
56
|
+
/// size nlist * pq.M * pq.ksub
|
57
|
+
std::vector <float> precomputed_table;
|
58
|
+
|
59
|
+
IndexIVFPQ (
|
60
|
+
Index * quantizer, size_t d, size_t nlist,
|
61
|
+
size_t M, size_t nbits_per_idx);
|
62
|
+
|
63
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
|
64
|
+
override;
|
65
|
+
|
66
|
+
void encode_vectors(idx_t n, const float* x,
|
67
|
+
const idx_t *list_nos,
|
68
|
+
uint8_t * codes,
|
69
|
+
bool include_listnos = false) const override;
|
70
|
+
|
71
|
+
void sa_decode (idx_t n, const uint8_t *bytes,
|
72
|
+
float *x) const override;
|
73
|
+
|
74
|
+
|
75
|
+
/// same as add_core, also:
|
76
|
+
/// - output 2nd level residuals if residuals_2 != NULL
|
77
|
+
/// - use precomputed list numbers if precomputed_idx != NULL
|
78
|
+
void add_core_o (idx_t n, const float *x,
|
79
|
+
const idx_t *xids, float *residuals_2,
|
80
|
+
const idx_t *precomputed_idx = nullptr);
|
81
|
+
|
82
|
+
/// trains the product quantizer
|
83
|
+
void train_residual(idx_t n, const float* x) override;
|
84
|
+
|
85
|
+
/// same as train_residual, also output 2nd level residuals
|
86
|
+
void train_residual_o (idx_t n, const float *x, float *residuals_2);
|
87
|
+
|
88
|
+
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
89
|
+
float* recons) const override;
|
90
|
+
|
91
|
+
/** Find exact duplicates in the dataset.
|
92
|
+
*
|
93
|
+
* the duplicates are returned in pre-allocated arrays (see the
|
94
|
+
* max sizes).
|
95
|
+
*
|
96
|
+
* @params lims limits between groups of duplicates
|
97
|
+
* (max size ntotal / 2 + 1)
|
98
|
+
* @params ids ids[lims[i]] : ids[lims[i+1]-1] is a group of
|
99
|
+
* duplicates (max size ntotal)
|
100
|
+
* @return n number of groups found
|
101
|
+
*/
|
102
|
+
size_t find_duplicates (idx_t *ids, size_t *lims) const;
|
103
|
+
|
104
|
+
// map a vector to a binary code knowning the index
|
105
|
+
void encode (idx_t key, const float * x, uint8_t * code) const;
|
106
|
+
|
107
|
+
/** Encode multiple vectors
|
108
|
+
*
|
109
|
+
* @param n nb vectors to encode
|
110
|
+
* @param keys posting list ids for those vectors (size n)
|
111
|
+
* @param x vectors (size n * d)
|
112
|
+
* @param codes output codes (size n * code_size)
|
113
|
+
* @param compute_keys if false, assume keys are precomputed,
|
114
|
+
* otherwise compute them
|
115
|
+
*/
|
116
|
+
void encode_multiple (size_t n, idx_t *keys,
|
117
|
+
const float * x, uint8_t * codes,
|
118
|
+
bool compute_keys = false) const;
|
119
|
+
|
120
|
+
/// inverse of encode_multiple
|
121
|
+
void decode_multiple (size_t n, const idx_t *keys,
|
122
|
+
const uint8_t * xcodes, float * x) const;
|
123
|
+
|
124
|
+
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
|
125
|
+
const override;
|
126
|
+
|
127
|
+
/// build precomputed table
|
128
|
+
void precompute_table ();
|
129
|
+
|
130
|
+
IndexIVFPQ ();
|
131
|
+
|
132
|
+
};
|
133
|
+
|
134
|
+
|
135
|
+
/// statistics are robust to internal threading, but not if
|
136
|
+
/// IndexIVFPQ::search_preassigned is called by multiple threads
|
137
|
+
struct IndexIVFPQStats {
|
138
|
+
size_t nrefine; // nb of refines (IVFPQR)
|
139
|
+
|
140
|
+
size_t n_hamming_pass;
|
141
|
+
// nb of passed Hamming distance tests (for polysemous)
|
142
|
+
|
143
|
+
// timings measured with the CPU RTC
|
144
|
+
// on all threads
|
145
|
+
size_t search_cycles;
|
146
|
+
size_t refine_cycles; // only for IVFPQR
|
147
|
+
|
148
|
+
IndexIVFPQStats () {reset (); }
|
149
|
+
void reset ();
|
150
|
+
};
|
151
|
+
|
152
|
+
// global var that collects them all
|
153
|
+
extern IndexIVFPQStats indexIVFPQ_stats;
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
} // namespace faiss
|
159
|
+
|
160
|
+
|
161
|
+
#endif
|
@@ -0,0 +1,219 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#include <faiss/IndexIVFPQR.h>
|
11
|
+
|
12
|
+
#include <faiss/utils/Heap.h>
|
13
|
+
#include <faiss/utils/utils.h>
|
14
|
+
#include <faiss/utils/distances.h>
|
15
|
+
|
16
|
+
#include <faiss/impl/FaissAssert.h>
|
17
|
+
|
18
|
+
|
19
|
+
namespace faiss {
|
20
|
+
|
21
|
+
/*****************************************
|
22
|
+
* IndexIVFPQR implementation
|
23
|
+
******************************************/
|
24
|
+
|
25
|
+
IndexIVFPQR::IndexIVFPQR (
|
26
|
+
Index * quantizer, size_t d, size_t nlist,
|
27
|
+
size_t M, size_t nbits_per_idx,
|
28
|
+
size_t M_refine, size_t nbits_per_idx_refine):
|
29
|
+
IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
|
30
|
+
refine_pq (d, M_refine, nbits_per_idx_refine),
|
31
|
+
k_factor (4)
|
32
|
+
{
|
33
|
+
by_residual = true;
|
34
|
+
}
|
35
|
+
|
36
|
+
IndexIVFPQR::IndexIVFPQR ():
|
37
|
+
k_factor (1)
|
38
|
+
{
|
39
|
+
by_residual = true;
|
40
|
+
}
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
void IndexIVFPQR::reset()
|
45
|
+
{
|
46
|
+
IndexIVFPQ::reset();
|
47
|
+
refine_codes.clear();
|
48
|
+
}
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
void IndexIVFPQR::train_residual (idx_t n, const float *x)
|
54
|
+
{
|
55
|
+
|
56
|
+
float * residual_2 = new float [n * d];
|
57
|
+
ScopeDeleter <float> del(residual_2);
|
58
|
+
|
59
|
+
train_residual_o (n, x, residual_2);
|
60
|
+
|
61
|
+
if (verbose)
|
62
|
+
printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
|
63
|
+
refine_pq.M, refine_pq.ksub, n, d);
|
64
|
+
|
65
|
+
refine_pq.cp.max_points_per_centroid = 1000;
|
66
|
+
refine_pq.cp.verbose = verbose;
|
67
|
+
|
68
|
+
refine_pq.train (n, residual_2);
|
69
|
+
|
70
|
+
}
|
71
|
+
|
72
|
+
|
73
|
+
void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const idx_t *xids) {
|
74
|
+
add_core (n, x, xids, nullptr);
|
75
|
+
}
|
76
|
+
|
77
|
+
void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids,
|
78
|
+
const idx_t *precomputed_idx) {
|
79
|
+
|
80
|
+
float * residual_2 = new float [n * d];
|
81
|
+
ScopeDeleter <float> del(residual_2);
|
82
|
+
|
83
|
+
idx_t n0 = ntotal;
|
84
|
+
|
85
|
+
add_core_o (n, x, xids, residual_2, precomputed_idx);
|
86
|
+
|
87
|
+
refine_codes.resize (ntotal * refine_pq.code_size);
|
88
|
+
|
89
|
+
refine_pq.compute_codes (
|
90
|
+
residual_2, &refine_codes[n0 * refine_pq.code_size], n);
|
91
|
+
|
92
|
+
|
93
|
+
}
|
94
|
+
#define TIC t0 = get_cycles()
|
95
|
+
#define TOC get_cycles () - t0
|
96
|
+
|
97
|
+
|
98
|
+
void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
|
99
|
+
const idx_t *idx,
|
100
|
+
const float *L1_dis,
|
101
|
+
float *distances, idx_t *labels,
|
102
|
+
bool store_pairs,
|
103
|
+
const IVFSearchParameters *params
|
104
|
+
) const
|
105
|
+
{
|
106
|
+
uint64_t t0;
|
107
|
+
TIC;
|
108
|
+
size_t k_coarse = long(k * k_factor);
|
109
|
+
idx_t *coarse_labels = new idx_t [k_coarse * n];
|
110
|
+
ScopeDeleter<idx_t> del1 (coarse_labels);
|
111
|
+
{ // query with quantizer levels 1 and 2.
|
112
|
+
float *coarse_distances = new float [k_coarse * n];
|
113
|
+
ScopeDeleter<float> del(coarse_distances);
|
114
|
+
|
115
|
+
IndexIVFPQ::search_preassigned (
|
116
|
+
n, x, k_coarse,
|
117
|
+
idx, L1_dis, coarse_distances, coarse_labels,
|
118
|
+
true, params);
|
119
|
+
}
|
120
|
+
|
121
|
+
|
122
|
+
indexIVFPQ_stats.search_cycles += TOC;
|
123
|
+
|
124
|
+
TIC;
|
125
|
+
|
126
|
+
// 3rd level refinement
|
127
|
+
size_t n_refine = 0;
|
128
|
+
#pragma omp parallel reduction(+ : n_refine)
|
129
|
+
{
|
130
|
+
// tmp buffers
|
131
|
+
float *residual_1 = new float [2 * d];
|
132
|
+
ScopeDeleter<float> del (residual_1);
|
133
|
+
float *residual_2 = residual_1 + d;
|
134
|
+
#pragma omp for
|
135
|
+
for (idx_t i = 0; i < n; i++) {
|
136
|
+
const float *xq = x + i * d;
|
137
|
+
const idx_t * shortlist = coarse_labels + k_coarse * i;
|
138
|
+
float * heap_sim = distances + k * i;
|
139
|
+
idx_t * heap_ids = labels + k * i;
|
140
|
+
maxheap_heapify (k, heap_sim, heap_ids);
|
141
|
+
|
142
|
+
for (int j = 0; j < k_coarse; j++) {
|
143
|
+
idx_t sl = shortlist[j];
|
144
|
+
|
145
|
+
if (sl == -1) continue;
|
146
|
+
|
147
|
+
int list_no = sl >> 32;
|
148
|
+
int ofs = sl & 0xffffffff;
|
149
|
+
|
150
|
+
assert (list_no >= 0 && list_no < nlist);
|
151
|
+
assert (ofs >= 0 && ofs < invlists->list_size (list_no));
|
152
|
+
|
153
|
+
// 1st level residual
|
154
|
+
quantizer->compute_residual (xq, residual_1, list_no);
|
155
|
+
|
156
|
+
// 2nd level residual
|
157
|
+
const uint8_t * l2code =
|
158
|
+
invlists->get_single_code (list_no, ofs);
|
159
|
+
|
160
|
+
pq.decode (l2code, residual_2);
|
161
|
+
for (int l = 0; l < d; l++)
|
162
|
+
residual_2[l] = residual_1[l] - residual_2[l];
|
163
|
+
|
164
|
+
// 3rd level residual's approximation
|
165
|
+
idx_t id = invlists->get_single_id (list_no, ofs);
|
166
|
+
assert (0 <= id && id < ntotal);
|
167
|
+
refine_pq.decode (&refine_codes [id * refine_pq.code_size],
|
168
|
+
residual_1);
|
169
|
+
|
170
|
+
float dis = fvec_L2sqr (residual_1, residual_2, d);
|
171
|
+
|
172
|
+
if (dis < heap_sim[0]) {
|
173
|
+
maxheap_pop (k, heap_sim, heap_ids);
|
174
|
+
idx_t id_or_pair = store_pairs ? sl : id;
|
175
|
+
maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair);
|
176
|
+
}
|
177
|
+
n_refine ++;
|
178
|
+
}
|
179
|
+
maxheap_reorder (k, heap_sim, heap_ids);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
indexIVFPQ_stats.nrefine += n_refine;
|
183
|
+
indexIVFPQ_stats.refine_cycles += TOC;
|
184
|
+
}
|
185
|
+
|
186
|
+
void IndexIVFPQR::reconstruct_from_offset (int64_t list_no, int64_t offset,
|
187
|
+
float* recons) const
|
188
|
+
{
|
189
|
+
IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons);
|
190
|
+
|
191
|
+
idx_t id = invlists->get_single_id (list_no, offset);
|
192
|
+
assert (0 <= id && id < ntotal);
|
193
|
+
|
194
|
+
std::vector<float> r3(d);
|
195
|
+
refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data());
|
196
|
+
for (int i = 0; i < d; ++i) {
|
197
|
+
recons[i] += r3[i];
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id)
|
202
|
+
{
|
203
|
+
IndexIVFPQR *other = dynamic_cast<IndexIVFPQR *> (&other_in);
|
204
|
+
FAISS_THROW_IF_NOT(other);
|
205
|
+
|
206
|
+
IndexIVF::merge_from (other_in, add_id);
|
207
|
+
|
208
|
+
refine_codes.insert (refine_codes.end(),
|
209
|
+
other->refine_codes.begin(),
|
210
|
+
other->refine_codes.end());
|
211
|
+
other->refine_codes.clear();
|
212
|
+
}
|
213
|
+
|
214
|
+
size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
|
215
|
+
FAISS_THROW_MSG("not implemented");
|
216
|
+
return 0;
|
217
|
+
}
|
218
|
+
|
219
|
+
} // namespace faiss
|
@@ -0,0 +1,65 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#pragma once
|
11
|
+
|
12
|
+
#include <vector>
|
13
|
+
|
14
|
+
#include <faiss/IndexIVFPQ.h>
|
15
|
+
|
16
|
+
|
17
|
+
namespace faiss {
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
/** Index with an additional level of PQ refinement */
|
22
|
+
struct IndexIVFPQR: IndexIVFPQ {
|
23
|
+
ProductQuantizer refine_pq; ///< 3rd level quantizer
|
24
|
+
std::vector <uint8_t> refine_codes; ///< corresponding codes
|
25
|
+
|
26
|
+
/// factor between k requested in search and the k requested from the IVFPQ
|
27
|
+
float k_factor;
|
28
|
+
|
29
|
+
IndexIVFPQR (
|
30
|
+
Index * quantizer, size_t d, size_t nlist,
|
31
|
+
size_t M, size_t nbits_per_idx,
|
32
|
+
size_t M_refine, size_t nbits_per_idx_refine);
|
33
|
+
|
34
|
+
void reset() override;
|
35
|
+
|
36
|
+
size_t remove_ids(const IDSelector& sel) override;
|
37
|
+
|
38
|
+
/// trains the two product quantizers
|
39
|
+
void train_residual(idx_t n, const float* x) override;
|
40
|
+
|
41
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
42
|
+
|
43
|
+
/// same as add_with_ids, but optionally use the precomputed list ids
|
44
|
+
void add_core (idx_t n, const float *x, const idx_t *xids,
|
45
|
+
const idx_t *precomputed_idx = nullptr);
|
46
|
+
|
47
|
+
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
48
|
+
float* recons) const override;
|
49
|
+
|
50
|
+
void merge_from (IndexIVF &other, idx_t add_id) override;
|
51
|
+
|
52
|
+
|
53
|
+
void search_preassigned (idx_t n, const float *x, idx_t k,
|
54
|
+
const idx_t *assign,
|
55
|
+
const float *centroid_dis,
|
56
|
+
float *distances, idx_t *labels,
|
57
|
+
bool store_pairs,
|
58
|
+
const IVFSearchParameters *params=nullptr
|
59
|
+
) const override;
|
60
|
+
|
61
|
+
IndexIVFPQR();
|
62
|
+
};
|
63
|
+
|
64
|
+
|
65
|
+
} // namespace faiss
|
@@ -0,0 +1,331 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
|
11
|
+
#include <faiss/IndexIVFSpectralHash.h>
|
12
|
+
|
13
|
+
#include <memory>
|
14
|
+
#include <algorithm>
|
15
|
+
#include <stdint.h>
|
16
|
+
|
17
|
+
#include <faiss/utils/hamming.h>
|
18
|
+
#include <faiss/utils/utils.h>
|
19
|
+
#include <faiss/impl/FaissAssert.h>
|
20
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
21
|
+
#include <faiss/VectorTransform.h>
|
22
|
+
|
23
|
+
namespace faiss {
|
24
|
+
|
25
|
+
|
26
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash (
|
27
|
+
Index * quantizer, size_t d, size_t nlist,
|
28
|
+
int nbit, float period):
|
29
|
+
IndexIVF (quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
|
30
|
+
nbit (nbit), period (period), threshold_type (Thresh_global)
|
31
|
+
{
|
32
|
+
FAISS_THROW_IF_NOT (code_size % 4 == 0);
|
33
|
+
RandomRotationMatrix *rr = new RandomRotationMatrix (d, nbit);
|
34
|
+
rr->init (1234);
|
35
|
+
vt = rr;
|
36
|
+
own_fields = true;
|
37
|
+
is_trained = false;
|
38
|
+
}
|
39
|
+
|
40
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash():
|
41
|
+
IndexIVF(), vt(nullptr), own_fields(false),
|
42
|
+
nbit(0), period(0), threshold_type(Thresh_global)
|
43
|
+
{}
|
44
|
+
|
45
|
+
IndexIVFSpectralHash::~IndexIVFSpectralHash ()
|
46
|
+
{
|
47
|
+
if (own_fields) {
|
48
|
+
delete vt;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
namespace {
|
53
|
+
|
54
|
+
|
55
|
+
float median (size_t n, float *x) {
|
56
|
+
std::sort(x, x + n);
|
57
|
+
if (n % 2 == 1) {
|
58
|
+
return x [n / 2];
|
59
|
+
} else {
|
60
|
+
return (x [n / 2 - 1] + x [n / 2]) / 2;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
}
|
65
|
+
|
66
|
+
|
67
|
+
void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
|
68
|
+
{
|
69
|
+
if (!vt->is_trained) {
|
70
|
+
vt->train (n, x);
|
71
|
+
}
|
72
|
+
|
73
|
+
if (threshold_type == Thresh_global) {
|
74
|
+
// nothing to do
|
75
|
+
return;
|
76
|
+
} else if (threshold_type == Thresh_centroid ||
|
77
|
+
threshold_type == Thresh_centroid_half) {
|
78
|
+
// convert all centroids with vt
|
79
|
+
std::vector<float> centroids (nlist * d);
|
80
|
+
quantizer->reconstruct_n (0, nlist, centroids.data());
|
81
|
+
trained.resize(nlist * nbit);
|
82
|
+
vt->apply_noalloc (nlist, centroids.data(), trained.data());
|
83
|
+
if (threshold_type == Thresh_centroid_half) {
|
84
|
+
for (size_t i = 0; i < nlist * nbit; i++) {
|
85
|
+
trained[i] -= 0.25 * period;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
return;
|
89
|
+
}
|
90
|
+
// otherwise train medians
|
91
|
+
|
92
|
+
// assign
|
93
|
+
std::unique_ptr<idx_t []> idx (new idx_t [n]);
|
94
|
+
quantizer->assign (n, x, idx.get());
|
95
|
+
|
96
|
+
std::vector<size_t> sizes(nlist + 1);
|
97
|
+
for (size_t i = 0; i < n; i++) {
|
98
|
+
FAISS_THROW_IF_NOT (idx[i] >= 0);
|
99
|
+
sizes[idx[i]]++;
|
100
|
+
}
|
101
|
+
|
102
|
+
size_t ofs = 0;
|
103
|
+
for (int j = 0; j < nlist; j++) {
|
104
|
+
size_t o0 = ofs;
|
105
|
+
ofs += sizes[j];
|
106
|
+
sizes[j] = o0;
|
107
|
+
}
|
108
|
+
|
109
|
+
// transform
|
110
|
+
std::unique_ptr<float []> xt (vt->apply (n, x));
|
111
|
+
|
112
|
+
// transpose + reorder
|
113
|
+
std::unique_ptr<float []> xo (new float[n * nbit]);
|
114
|
+
|
115
|
+
for (size_t i = 0; i < n; i++) {
|
116
|
+
size_t idest = sizes[idx[i]]++;
|
117
|
+
for (size_t j = 0; j < nbit; j++) {
|
118
|
+
xo[idest + n * j] = xt[i * nbit + j];
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
trained.resize (n * nbit);
|
123
|
+
// compute medians
|
124
|
+
#pragma omp for
|
125
|
+
for (int i = 0; i < nlist; i++) {
|
126
|
+
size_t i0 = i == 0 ? 0 : sizes[i - 1];
|
127
|
+
size_t i1 = sizes[i];
|
128
|
+
for (int j = 0; j < nbit; j++) {
|
129
|
+
float *xoi = xo.get() + i0 + n * j;
|
130
|
+
if (i0 == i1) { // nothing to train
|
131
|
+
trained[i * nbit + j] = 0.0;
|
132
|
+
} else if (i1 == i0 + 1) {
|
133
|
+
trained[i * nbit + j] = xoi[0];
|
134
|
+
} else {
|
135
|
+
trained[i * nbit + j] = median(i1 - i0, xoi);
|
136
|
+
}
|
137
|
+
}
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
namespace {
|
143
|
+
|
144
|
+
void binarize_with_freq(size_t nbit, float freq,
|
145
|
+
const float *x, const float *c,
|
146
|
+
uint8_t *codes)
|
147
|
+
{
|
148
|
+
memset (codes, 0, (nbit + 7) / 8);
|
149
|
+
for (size_t i = 0; i < nbit; i++) {
|
150
|
+
float xf = (x[i] - c[i]);
|
151
|
+
int xi = int(floor(xf * freq));
|
152
|
+
int bit = xi & 1;
|
153
|
+
codes[i >> 3] |= bit << (i & 7);
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
|
158
|
+
};
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
|
163
|
+
const idx_t *list_nos,
|
164
|
+
uint8_t * codes,
|
165
|
+
bool include_listnos) const
|
166
|
+
{
|
167
|
+
FAISS_THROW_IF_NOT (is_trained);
|
168
|
+
float freq = 2.0 / period;
|
169
|
+
|
170
|
+
FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported");
|
171
|
+
|
172
|
+
// transform with vt
|
173
|
+
std::unique_ptr<float []> x (vt->apply (n, x_in));
|
174
|
+
|
175
|
+
#pragma omp parallel
|
176
|
+
{
|
177
|
+
std::vector<float> zero (nbit);
|
178
|
+
|
179
|
+
// each thread takes care of a subset of lists
|
180
|
+
#pragma omp for
|
181
|
+
for (size_t i = 0; i < n; i++) {
|
182
|
+
int64_t list_no = list_nos [i];
|
183
|
+
|
184
|
+
if (list_no >= 0) {
|
185
|
+
const float *c;
|
186
|
+
if (threshold_type == Thresh_global) {
|
187
|
+
c = zero.data();
|
188
|
+
} else {
|
189
|
+
c = trained.data() + list_no * nbit;
|
190
|
+
}
|
191
|
+
binarize_with_freq (nbit, freq,
|
192
|
+
x.get() + i * nbit, c,
|
193
|
+
codes + i * code_size) ;
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
namespace {
|
200
|
+
|
201
|
+
|
202
|
+
template<class HammingComputer>
|
203
|
+
struct IVFScanner: InvertedListScanner {
|
204
|
+
|
205
|
+
// copied from index structure
|
206
|
+
const IndexIVFSpectralHash *index;
|
207
|
+
size_t code_size;
|
208
|
+
size_t nbit;
|
209
|
+
bool store_pairs;
|
210
|
+
|
211
|
+
float period, freq;
|
212
|
+
std::vector<float> q;
|
213
|
+
std::vector<float> zero;
|
214
|
+
std::vector<uint8_t> qcode;
|
215
|
+
HammingComputer hc;
|
216
|
+
|
217
|
+
using idx_t = Index::idx_t;
|
218
|
+
|
219
|
+
IVFScanner (const IndexIVFSpectralHash * index,
|
220
|
+
bool store_pairs):
|
221
|
+
index (index),
|
222
|
+
code_size(index->code_size),
|
223
|
+
nbit(index->nbit),
|
224
|
+
store_pairs(store_pairs),
|
225
|
+
period(index->period), freq(2.0 / index->period),
|
226
|
+
q(nbit), zero(nbit), qcode(code_size),
|
227
|
+
hc(qcode.data(), code_size)
|
228
|
+
{
|
229
|
+
}
|
230
|
+
|
231
|
+
|
232
|
+
void set_query (const float *query) override {
|
233
|
+
FAISS_THROW_IF_NOT(query);
|
234
|
+
FAISS_THROW_IF_NOT(q.size() == nbit);
|
235
|
+
index->vt->apply_noalloc (1, query, q.data());
|
236
|
+
|
237
|
+
if (index->threshold_type ==
|
238
|
+
IndexIVFSpectralHash::Thresh_global) {
|
239
|
+
binarize_with_freq
|
240
|
+
(nbit, freq, q.data(), zero.data(), qcode.data());
|
241
|
+
hc.set (qcode.data(), code_size);
|
242
|
+
}
|
243
|
+
}
|
244
|
+
|
245
|
+
idx_t list_no;
|
246
|
+
|
247
|
+
void set_list (idx_t list_no, float /*coarse_dis*/) override {
|
248
|
+
this->list_no = list_no;
|
249
|
+
if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
|
250
|
+
const float *c = index->trained.data() + list_no * nbit;
|
251
|
+
binarize_with_freq (nbit, freq, q.data(), c, qcode.data());
|
252
|
+
hc.set (qcode.data(), code_size);
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
float distance_to_code (const uint8_t *code) const final {
|
257
|
+
return hc.hamming (code);
|
258
|
+
}
|
259
|
+
|
260
|
+
size_t scan_codes (size_t list_size,
|
261
|
+
const uint8_t *codes,
|
262
|
+
const idx_t *ids,
|
263
|
+
float *simi, idx_t *idxi,
|
264
|
+
size_t k) const override
|
265
|
+
{
|
266
|
+
size_t nup = 0;
|
267
|
+
for (size_t j = 0; j < list_size; j++) {
|
268
|
+
|
269
|
+
float dis = hc.hamming (codes);
|
270
|
+
|
271
|
+
if (dis < simi [0]) {
|
272
|
+
maxheap_pop (k, simi, idxi);
|
273
|
+
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
274
|
+
maxheap_push (k, simi, idxi, dis, id);
|
275
|
+
nup++;
|
276
|
+
}
|
277
|
+
codes += code_size;
|
278
|
+
}
|
279
|
+
return nup;
|
280
|
+
}
|
281
|
+
|
282
|
+
void scan_codes_range (size_t list_size,
|
283
|
+
const uint8_t *codes,
|
284
|
+
const idx_t *ids,
|
285
|
+
float radius,
|
286
|
+
RangeQueryResult & res) const override
|
287
|
+
{
|
288
|
+
for (size_t j = 0; j < list_size; j++) {
|
289
|
+
float dis = hc.hamming (codes);
|
290
|
+
if (dis < radius) {
|
291
|
+
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
292
|
+
res.add (dis, id);
|
293
|
+
}
|
294
|
+
codes += code_size;
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
|
299
|
+
};
|
300
|
+
|
301
|
+
} // anonymous namespace
|
302
|
+
|
303
|
+
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
|
304
|
+
(bool store_pairs) const
|
305
|
+
{
|
306
|
+
switch (code_size) {
|
307
|
+
#define HANDLE_CODE_SIZE(cs) \
|
308
|
+
case cs: \
|
309
|
+
return new IVFScanner<HammingComputer ## cs> (this, store_pairs)
|
310
|
+
HANDLE_CODE_SIZE(4);
|
311
|
+
HANDLE_CODE_SIZE(8);
|
312
|
+
HANDLE_CODE_SIZE(16);
|
313
|
+
HANDLE_CODE_SIZE(20);
|
314
|
+
HANDLE_CODE_SIZE(32);
|
315
|
+
HANDLE_CODE_SIZE(64);
|
316
|
+
#undef HANDLE_CODE_SIZE
|
317
|
+
default:
|
318
|
+
if (code_size % 8 == 0) {
|
319
|
+
return new IVFScanner<HammingComputerM8>(this, store_pairs);
|
320
|
+
} else if (code_size % 4 == 0) {
|
321
|
+
return new IVFScanner<HammingComputerM4>(this, store_pairs);
|
322
|
+
} else {
|
323
|
+
FAISS_THROW_MSG("not supported");
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
}
|
328
|
+
|
329
|
+
|
330
|
+
|
331
|
+
} // namespace faiss
|