faiss 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +1 -2
- data/vendor/faiss/faiss/Clustering.cpp +39 -22
- data/vendor/faiss/faiss/Clustering.h +40 -21
- data/vendor/faiss/faiss/IVFlib.cpp +26 -12
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +40 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
- data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
- data/vendor/faiss/faiss/IndexHNSW.h +62 -49
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
- data/vendor/faiss/faiss/IndexIVF.h +46 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
- data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +11 -11
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
- data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
- data/vendor/faiss/faiss/impl/HNSW.h +52 -30
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
- data/vendor/faiss/faiss/impl/io.cpp +23 -15
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
- data/vendor/faiss/faiss/index_factory.cpp +41 -20
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +147 -123
- data/vendor/faiss/faiss/utils/distances.h +86 -9
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +120 -7
- data/vendor/faiss/faiss/utils/utils.h +60 -20
- metadata +23 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#pragma once
|
|
11
9
|
|
|
12
10
|
#include <queue>
|
|
@@ -42,10 +40,13 @@ namespace faiss {
|
|
|
42
40
|
struct VisitedTable;
|
|
43
41
|
struct DistanceComputer; // from AuxIndexStructures
|
|
44
42
|
struct HNSWStats;
|
|
43
|
+
template <class C>
|
|
44
|
+
struct ResultHandler;
|
|
45
45
|
|
|
46
46
|
struct SearchParametersHNSW : SearchParameters {
|
|
47
47
|
int efSearch = 16;
|
|
48
48
|
bool check_relative_distance = true;
|
|
49
|
+
bool bounded_queue = true;
|
|
49
50
|
|
|
50
51
|
~SearchParametersHNSW() {}
|
|
51
52
|
};
|
|
@@ -54,6 +55,9 @@ struct HNSW {
|
|
|
54
55
|
/// internal storage of vectors (32 bits: this is expensive)
|
|
55
56
|
using storage_idx_t = int32_t;
|
|
56
57
|
|
|
58
|
+
// for now we do only these distances
|
|
59
|
+
using C = CMax<float, int64_t>;
|
|
60
|
+
|
|
57
61
|
typedef std::pair<float, storage_idx_t> Node;
|
|
58
62
|
|
|
59
63
|
/** Heap structure that allows fast
|
|
@@ -138,9 +142,6 @@ struct HNSW {
|
|
|
138
142
|
/// enough?
|
|
139
143
|
bool check_relative_distance = true;
|
|
140
144
|
|
|
141
|
-
/// number of entry points in levels > 0.
|
|
142
|
-
int upper_beam = 1;
|
|
143
|
-
|
|
144
145
|
/// use bounded queue during exploration
|
|
145
146
|
bool search_bounded_queue = true;
|
|
146
147
|
|
|
@@ -181,7 +182,8 @@ struct HNSW {
|
|
|
181
182
|
float d_nearest,
|
|
182
183
|
int level,
|
|
183
184
|
omp_lock_t* locks,
|
|
184
|
-
VisitedTable& vt
|
|
185
|
+
VisitedTable& vt,
|
|
186
|
+
bool keep_max_size_level0 = false);
|
|
185
187
|
|
|
186
188
|
/** add point pt_id on all levels <= pt_level and build the link
|
|
187
189
|
* structure for them. */
|
|
@@ -190,29 +192,27 @@ struct HNSW {
|
|
|
190
192
|
int pt_level,
|
|
191
193
|
int pt_id,
|
|
192
194
|
std::vector<omp_lock_t>& locks,
|
|
193
|
-
VisitedTable& vt
|
|
195
|
+
VisitedTable& vt,
|
|
196
|
+
bool keep_max_size_level0 = false);
|
|
194
197
|
|
|
195
198
|
/// search interface for 1 point, single thread
|
|
196
199
|
HNSWStats search(
|
|
197
200
|
DistanceComputer& qdis,
|
|
198
|
-
|
|
199
|
-
idx_t* I,
|
|
200
|
-
float* D,
|
|
201
|
+
ResultHandler<C>& res,
|
|
201
202
|
VisitedTable& vt,
|
|
202
203
|
const SearchParametersHNSW* params = nullptr) const;
|
|
203
204
|
|
|
204
205
|
/// search only in level 0 from a given vertex
|
|
205
206
|
void search_level_0(
|
|
206
207
|
DistanceComputer& qdis,
|
|
207
|
-
|
|
208
|
-
idx_t* idxi,
|
|
209
|
-
float* simi,
|
|
208
|
+
ResultHandler<C>& res,
|
|
210
209
|
idx_t nprobe,
|
|
211
210
|
const storage_idx_t* nearest_i,
|
|
212
211
|
const float* nearest_d,
|
|
213
212
|
int search_type,
|
|
214
213
|
HNSWStats& search_stats,
|
|
215
|
-
VisitedTable& vt
|
|
214
|
+
VisitedTable& vt,
|
|
215
|
+
const SearchParametersHNSW* params = nullptr) const;
|
|
216
216
|
|
|
217
217
|
void reset();
|
|
218
218
|
|
|
@@ -225,38 +225,60 @@ struct HNSW {
|
|
|
225
225
|
DistanceComputer& qdis,
|
|
226
226
|
std::priority_queue<NodeDistFarther>& input,
|
|
227
227
|
std::vector<NodeDistFarther>& output,
|
|
228
|
-
int max_size
|
|
228
|
+
int max_size,
|
|
229
|
+
bool keep_max_size_level0 = false);
|
|
230
|
+
|
|
231
|
+
void permute_entries(const idx_t* map);
|
|
229
232
|
};
|
|
230
233
|
|
|
231
234
|
struct HNSWStats {
|
|
232
|
-
size_t n1
|
|
233
|
-
size_t
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
size_t n1 = 0,
|
|
238
|
-
size_t n2 = 0,
|
|
239
|
-
size_t n3 = 0,
|
|
240
|
-
size_t ndis = 0,
|
|
241
|
-
size_t nreorder = 0)
|
|
242
|
-
: n1(n1), n2(n2), n3(n3), ndis(ndis), nreorder(nreorder) {}
|
|
235
|
+
size_t n1 = 0; /// number of vectors searched
|
|
236
|
+
size_t n2 =
|
|
237
|
+
0; /// number of queries for which the candidate list is exhausted
|
|
238
|
+
size_t ndis = 0; /// number of distances computed
|
|
239
|
+
size_t nhops = 0; /// number of hops aka number of edges traversed
|
|
243
240
|
|
|
244
241
|
void reset() {
|
|
245
|
-
n1 = n2 =
|
|
242
|
+
n1 = n2 = 0;
|
|
246
243
|
ndis = 0;
|
|
247
|
-
|
|
244
|
+
nhops = 0;
|
|
248
245
|
}
|
|
249
246
|
|
|
250
247
|
void combine(const HNSWStats& other) {
|
|
251
248
|
n1 += other.n1;
|
|
252
249
|
n2 += other.n2;
|
|
253
|
-
n3 += other.n3;
|
|
254
250
|
ndis += other.ndis;
|
|
255
|
-
|
|
251
|
+
nhops += other.nhops;
|
|
256
252
|
}
|
|
257
253
|
};
|
|
258
254
|
|
|
259
255
|
// global var that collects them all
|
|
260
256
|
FAISS_API extern HNSWStats hnsw_stats;
|
|
261
257
|
|
|
258
|
+
int search_from_candidates(
|
|
259
|
+
const HNSW& hnsw,
|
|
260
|
+
DistanceComputer& qdis,
|
|
261
|
+
ResultHandler<HNSW::C>& res,
|
|
262
|
+
HNSW::MinimaxHeap& candidates,
|
|
263
|
+
VisitedTable& vt,
|
|
264
|
+
HNSWStats& stats,
|
|
265
|
+
int level,
|
|
266
|
+
int nres_in = 0,
|
|
267
|
+
const SearchParametersHNSW* params = nullptr);
|
|
268
|
+
|
|
269
|
+
HNSWStats greedy_update_nearest(
|
|
270
|
+
const HNSW& hnsw,
|
|
271
|
+
DistanceComputer& qdis,
|
|
272
|
+
int level,
|
|
273
|
+
HNSW::storage_idx_t& nearest,
|
|
274
|
+
float& d_nearest);
|
|
275
|
+
|
|
276
|
+
std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
277
|
+
const HNSW& hnsw,
|
|
278
|
+
const HNSW::Node& node,
|
|
279
|
+
DistanceComputer& qdis,
|
|
280
|
+
int ef,
|
|
281
|
+
VisitedTable* vt,
|
|
282
|
+
HNSWStats& stats);
|
|
283
|
+
|
|
262
284
|
} // namespace faiss
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#include <unordered_set>
|
|
11
11
|
#include <vector>
|
|
12
12
|
|
|
13
|
-
#include <faiss/
|
|
13
|
+
#include <faiss/MetricType.h>
|
|
14
14
|
|
|
15
15
|
/** IDSelector is intended to define a subset of vectors to handle (for removal
|
|
16
16
|
* or as subset to search) */
|
|
@@ -140,7 +140,7 @@ struct IDSelectorAnd : IDSelector {
|
|
|
140
140
|
: lhs(lhs), rhs(rhs) {}
|
|
141
141
|
bool is_member(idx_t id) const final {
|
|
142
142
|
return lhs->is_member(id) && rhs->is_member(id);
|
|
143
|
-
}
|
|
143
|
+
}
|
|
144
144
|
virtual ~IDSelectorAnd() {}
|
|
145
145
|
};
|
|
146
146
|
|
|
@@ -153,7 +153,7 @@ struct IDSelectorOr : IDSelector {
|
|
|
153
153
|
: lhs(lhs), rhs(rhs) {}
|
|
154
154
|
bool is_member(idx_t id) const final {
|
|
155
155
|
return lhs->is_member(id) || rhs->is_member(id);
|
|
156
|
-
}
|
|
156
|
+
}
|
|
157
157
|
virtual ~IDSelectorOr() {}
|
|
158
158
|
};
|
|
159
159
|
|
|
@@ -166,7 +166,7 @@ struct IDSelectorXOr : IDSelector {
|
|
|
166
166
|
: lhs(lhs), rhs(rhs) {}
|
|
167
167
|
bool is_member(idx_t id) const final {
|
|
168
168
|
return lhs->is_member(id) ^ rhs->is_member(id);
|
|
169
|
-
}
|
|
169
|
+
}
|
|
170
170
|
virtual ~IDSelectorXOr() {}
|
|
171
171
|
};
|
|
172
172
|
|
|
@@ -104,10 +104,10 @@ int dgemm_(
|
|
|
104
104
|
|
|
105
105
|
namespace {
|
|
106
106
|
|
|
107
|
-
void fmat_inverse(float* a,
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
std::vector<
|
|
107
|
+
void fmat_inverse(float* a, FINTEGER n) {
|
|
108
|
+
FINTEGER info;
|
|
109
|
+
FINTEGER lwork = n * n;
|
|
110
|
+
std::vector<FINTEGER> ipiv(n);
|
|
111
111
|
std::vector<float> workspace(lwork);
|
|
112
112
|
|
|
113
113
|
sgetrf_(&n, &n, a, &n, ipiv.data(), &info);
|
|
@@ -123,10 +123,10 @@ void dfvec_add(size_t d, const double* a, const float* b, double* c) {
|
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
-
void dmat_inverse(double* a,
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
std::vector<
|
|
126
|
+
void dmat_inverse(double* a, FINTEGER n) {
|
|
127
|
+
FINTEGER info;
|
|
128
|
+
FINTEGER lwork = n * n;
|
|
129
|
+
std::vector<FINTEGER> ipiv(n);
|
|
130
130
|
std::vector<double> workspace(lwork);
|
|
131
131
|
|
|
132
132
|
dgetrf_(&n, &n, a, &n, ipiv.data(), &info);
|
|
@@ -628,7 +628,9 @@ void LocalSearchQuantizer::icm_encode_step(
|
|
|
628
628
|
{
|
|
629
629
|
size_t binary_idx = (other_m + 1) * M * K * K +
|
|
630
630
|
m * K * K + code2 * K + code;
|
|
631
|
-
_mm_prefetch(
|
|
631
|
+
_mm_prefetch(
|
|
632
|
+
(const char*)(binaries + binary_idx),
|
|
633
|
+
_MM_HINT_T0);
|
|
632
634
|
}
|
|
633
635
|
}
|
|
634
636
|
#endif
|
|
@@ -38,6 +38,23 @@ struct DummyScaler {
|
|
|
38
38
|
return simd16uint16(0);
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
#ifdef __AVX512F__
|
|
42
|
+
inline simd64uint8 lookup(const simd64uint8&, const simd64uint8&) const {
|
|
43
|
+
FAISS_THROW_MSG("DummyScaler::lookup should not be called.");
|
|
44
|
+
return simd64uint8(0);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
inline simd32uint16 scale_lo(const simd64uint8&) const {
|
|
48
|
+
FAISS_THROW_MSG("DummyScaler::scale_lo should not be called.");
|
|
49
|
+
return simd32uint16(0);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
inline simd32uint16 scale_hi(const simd64uint8&) const {
|
|
53
|
+
FAISS_THROW_MSG("DummyScaler::scale_hi should not be called.");
|
|
54
|
+
return simd32uint16(0);
|
|
55
|
+
}
|
|
56
|
+
#endif
|
|
57
|
+
|
|
41
58
|
template <class dist_t>
|
|
42
59
|
inline dist_t scale_one(const dist_t&) const {
|
|
43
60
|
FAISS_THROW_MSG("DummyScaler::scale_one should not be called.");
|
|
@@ -67,6 +84,23 @@ struct NormTableScaler {
|
|
|
67
84
|
return (simd16uint16(res) >> 8) * scale_simd;
|
|
68
85
|
}
|
|
69
86
|
|
|
87
|
+
#ifdef __AVX512F__
|
|
88
|
+
inline simd64uint8 lookup(const simd64uint8& lut, const simd64uint8& c)
|
|
89
|
+
const {
|
|
90
|
+
return lut.lookup_4_lanes(c);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
inline simd32uint16 scale_lo(const simd64uint8& res) const {
|
|
94
|
+
auto scale_simd_wide = simd32uint16(scale_simd, scale_simd);
|
|
95
|
+
return simd32uint16(res) * scale_simd_wide;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
inline simd32uint16 scale_hi(const simd64uint8& res) const {
|
|
99
|
+
auto scale_simd_wide = simd32uint16(scale_simd, scale_simd);
|
|
100
|
+
return (simd32uint16(res) >> 8) * scale_simd_wide;
|
|
101
|
+
}
|
|
102
|
+
#endif
|
|
103
|
+
|
|
70
104
|
// for non-SIMD implem 2, 3, 4
|
|
71
105
|
template <class dist_t>
|
|
72
106
|
inline dist_t scale_one(const dist_t& x) const {
|
|
@@ -154,15 +154,20 @@ NNDescent::NNDescent(const int d, const int K) : K(K), d(d) {
|
|
|
154
154
|
NNDescent::~NNDescent() {}
|
|
155
155
|
|
|
156
156
|
void NNDescent::join(DistanceComputer& qdis) {
|
|
157
|
+
idx_t check_period = InterruptCallback::get_period_hint(d * search_L);
|
|
158
|
+
for (idx_t i0 = 0; i0 < (idx_t)ntotal; i0 += check_period) {
|
|
159
|
+
idx_t i1 = std::min(i0 + check_period, (idx_t)ntotal);
|
|
157
160
|
#pragma omp parallel for default(shared) schedule(dynamic, 100)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
161
|
+
for (idx_t n = i0; n < i1; n++) {
|
|
162
|
+
graph[n].join([&](int i, int j) {
|
|
163
|
+
if (i != j) {
|
|
164
|
+
float dist = qdis.symmetric_dis(i, j);
|
|
165
|
+
graph[i].insert(j, dist);
|
|
166
|
+
graph[j].insert(i, dist);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
InterruptCallback::check();
|
|
166
171
|
}
|
|
167
172
|
}
|
|
168
173
|
|
|
@@ -195,8 +200,9 @@ void NNDescent::update() {
|
|
|
195
200
|
int l = 0;
|
|
196
201
|
|
|
197
202
|
while ((l < maxl) && (c < S)) {
|
|
198
|
-
if (nn.pool[l].flag)
|
|
203
|
+
if (nn.pool[l].flag) {
|
|
199
204
|
++c;
|
|
205
|
+
}
|
|
200
206
|
++l;
|
|
201
207
|
}
|
|
202
208
|
nn.M = l;
|
|
@@ -305,8 +311,9 @@ void NNDescent::generate_eval_set(
|
|
|
305
311
|
for (int i = 0; i < c.size(); i++) {
|
|
306
312
|
std::vector<Neighbor> tmp;
|
|
307
313
|
for (int j = 0; j < N; j++) {
|
|
308
|
-
if (c[i] == j)
|
|
314
|
+
if (c[i] == j) {
|
|
309
315
|
continue; // skip itself
|
|
316
|
+
}
|
|
310
317
|
float dist = qdis.symmetric_dis(c[i], j);
|
|
311
318
|
tmp.push_back(Neighbor(j, dist, true));
|
|
312
319
|
}
|
|
@@ -360,8 +367,9 @@ void NNDescent::init_graph(DistanceComputer& qdis) {
|
|
|
360
367
|
|
|
361
368
|
for (int j = 0; j < S; j++) {
|
|
362
369
|
int id = tmp[j];
|
|
363
|
-
if (id == i)
|
|
370
|
+
if (id == i) {
|
|
364
371
|
continue;
|
|
372
|
+
}
|
|
365
373
|
float dist = qdis.symmetric_dis(i, id);
|
|
366
374
|
|
|
367
375
|
graph[i].pool.push_back(Neighbor(id, dist, true));
|
|
@@ -374,6 +382,10 @@ void NNDescent::init_graph(DistanceComputer& qdis) {
|
|
|
374
382
|
|
|
375
383
|
void NNDescent::build(DistanceComputer& qdis, const int n, bool verbose) {
|
|
376
384
|
FAISS_THROW_IF_NOT_MSG(L >= K, "L should be >= K in NNDescent.build");
|
|
385
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
386
|
+
n > NUM_EVAL_POINTS,
|
|
387
|
+
"NNDescent.build cannot build a graph smaller than %d",
|
|
388
|
+
int(NUM_EVAL_POINTS));
|
|
377
389
|
|
|
378
390
|
if (verbose) {
|
|
379
391
|
printf("Parameters: K=%d, S=%d, R=%d, L=%d, iter=%d\n",
|
|
@@ -403,7 +415,7 @@ void NNDescent::build(DistanceComputer& qdis, const int n, bool verbose) {
|
|
|
403
415
|
has_built = true;
|
|
404
416
|
|
|
405
417
|
if (verbose) {
|
|
406
|
-
printf("
|
|
418
|
+
printf("Added %d points into the index\n", ntotal);
|
|
407
419
|
}
|
|
408
420
|
}
|
|
409
421
|
|
|
@@ -414,30 +426,30 @@ void NNDescent::search(
|
|
|
414
426
|
float* dists,
|
|
415
427
|
VisitedTable& vt) const {
|
|
416
428
|
FAISS_THROW_IF_NOT_MSG(has_built, "The index is not build yet.");
|
|
417
|
-
int
|
|
429
|
+
int L_2 = std::max(search_L, topk);
|
|
418
430
|
|
|
419
431
|
// candidate pool, the K best items is the result.
|
|
420
|
-
std::vector<Neighbor> retset(
|
|
432
|
+
std::vector<Neighbor> retset(L_2 + 1);
|
|
421
433
|
|
|
422
|
-
// Randomly choose
|
|
423
|
-
std::vector<int> init_ids(
|
|
434
|
+
// Randomly choose L_2 points to initialize the candidate pool
|
|
435
|
+
std::vector<int> init_ids(L_2);
|
|
424
436
|
std::mt19937 rng(random_seed);
|
|
425
437
|
|
|
426
|
-
gen_random(rng, init_ids.data(),
|
|
427
|
-
for (int i = 0; i <
|
|
438
|
+
gen_random(rng, init_ids.data(), L_2, ntotal);
|
|
439
|
+
for (int i = 0; i < L_2; i++) {
|
|
428
440
|
int id = init_ids[i];
|
|
429
441
|
float dist = qdis(id);
|
|
430
442
|
retset[i] = Neighbor(id, dist, true);
|
|
431
443
|
}
|
|
432
444
|
|
|
433
445
|
// Maintain the candidate pool in ascending order
|
|
434
|
-
std::sort(retset.begin(), retset.begin() +
|
|
446
|
+
std::sort(retset.begin(), retset.begin() + L_2);
|
|
435
447
|
|
|
436
448
|
int k = 0;
|
|
437
449
|
|
|
438
|
-
// Stop until the smallest position updated is >=
|
|
439
|
-
while (k <
|
|
440
|
-
int nk =
|
|
450
|
+
// Stop until the smallest position updated is >= L_2
|
|
451
|
+
while (k < L_2) {
|
|
452
|
+
int nk = L_2;
|
|
441
453
|
|
|
442
454
|
if (retset[k].flag) {
|
|
443
455
|
retset[k].flag = false;
|
|
@@ -445,25 +457,28 @@ void NNDescent::search(
|
|
|
445
457
|
|
|
446
458
|
for (int m = 0; m < K; ++m) {
|
|
447
459
|
int id = final_graph[n * K + m];
|
|
448
|
-
if (vt.get(id))
|
|
460
|
+
if (vt.get(id)) {
|
|
449
461
|
continue;
|
|
462
|
+
}
|
|
450
463
|
|
|
451
464
|
vt.set(id);
|
|
452
465
|
float dist = qdis(id);
|
|
453
|
-
if (dist >= retset[
|
|
466
|
+
if (dist >= retset[L_2 - 1].distance) {
|
|
454
467
|
continue;
|
|
468
|
+
}
|
|
455
469
|
|
|
456
470
|
Neighbor nn(id, dist, true);
|
|
457
|
-
int r = insert_into_pool(retset.data(),
|
|
471
|
+
int r = insert_into_pool(retset.data(), L_2, nn);
|
|
458
472
|
|
|
459
473
|
if (r < nk)
|
|
460
474
|
nk = r;
|
|
461
475
|
}
|
|
462
476
|
}
|
|
463
|
-
if (nk <= k)
|
|
477
|
+
if (nk <= k) {
|
|
464
478
|
k = nk;
|
|
465
|
-
else
|
|
479
|
+
} else {
|
|
466
480
|
++k;
|
|
481
|
+
}
|
|
467
482
|
}
|
|
468
483
|
for (size_t i = 0; i < topk; i++) {
|
|
469
484
|
indices[i] = retset[i].id;
|
|
@@ -25,35 +25,6 @@ namespace {
|
|
|
25
25
|
// It needs to be smaller than 0
|
|
26
26
|
constexpr int EMPTY_ID = -1;
|
|
27
27
|
|
|
28
|
-
/* Wrap the distance computer into one that negates the
|
|
29
|
-
distances. This makes supporting INNER_PRODUCE search easier */
|
|
30
|
-
|
|
31
|
-
struct NegativeDistanceComputer : DistanceComputer {
|
|
32
|
-
/// owned by this
|
|
33
|
-
DistanceComputer* basedis;
|
|
34
|
-
|
|
35
|
-
explicit NegativeDistanceComputer(DistanceComputer* basedis)
|
|
36
|
-
: basedis(basedis) {}
|
|
37
|
-
|
|
38
|
-
void set_query(const float* x) override {
|
|
39
|
-
basedis->set_query(x);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/// compute distance of vector i to current query
|
|
43
|
-
float operator()(idx_t i) override {
|
|
44
|
-
return -(*basedis)(i);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/// compute distance between two stored vectors
|
|
48
|
-
float symmetric_dis(idx_t i, idx_t j) override {
|
|
49
|
-
return -basedis->symmetric_dis(i, j);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
~NegativeDistanceComputer() override {
|
|
53
|
-
delete basedis;
|
|
54
|
-
}
|
|
55
|
-
};
|
|
56
|
-
|
|
57
28
|
} // namespace
|
|
58
29
|
|
|
59
30
|
DistanceComputer* storage_distance_computer(const Index* storage) {
|
|
@@ -54,7 +54,7 @@ namespace nsg {
|
|
|
54
54
|
|
|
55
55
|
template <class node_t>
|
|
56
56
|
struct Graph {
|
|
57
|
-
node_t* data; ///< the flattened adjacency matrix
|
|
57
|
+
node_t* data; ///< the flattened adjacency matrix, size N-by-K
|
|
58
58
|
int K; ///< nb of neighbors per node
|
|
59
59
|
int N; ///< total nb of nodes
|
|
60
60
|
bool own_fields; ///< the underlying data owned by itself or not
|
|
@@ -12,11 +12,11 @@
|
|
|
12
12
|
#include <omp.h>
|
|
13
13
|
#include <stdint.h>
|
|
14
14
|
|
|
15
|
+
#include <algorithm>
|
|
15
16
|
#include <cmath>
|
|
16
17
|
#include <cstdlib>
|
|
17
18
|
#include <cstring>
|
|
18
|
-
|
|
19
|
-
#include <algorithm>
|
|
19
|
+
#include <memory>
|
|
20
20
|
|
|
21
21
|
#include <faiss/utils/distances.h>
|
|
22
22
|
#include <faiss/utils/hamming.h>
|
|
@@ -683,18 +683,21 @@ struct RankingScore2 : Score3Computer<float, double> {
|
|
|
683
683
|
double accum_gt_weight_diff(
|
|
684
684
|
const std::vector<int>& a,
|
|
685
685
|
const std::vector<int>& b) {
|
|
686
|
-
|
|
686
|
+
const auto nb_2 = b.size();
|
|
687
|
+
const auto na = a.size();
|
|
687
688
|
|
|
688
689
|
double accu = 0;
|
|
689
|
-
|
|
690
|
-
for (
|
|
691
|
-
|
|
692
|
-
while (j <
|
|
690
|
+
size_t j = 0;
|
|
691
|
+
for (size_t i = 0; i < na; i++) {
|
|
692
|
+
const auto ai = a[i];
|
|
693
|
+
while (j < nb_2 && ai >= b[j]) {
|
|
693
694
|
j++;
|
|
695
|
+
}
|
|
694
696
|
|
|
695
697
|
double accu_i = 0;
|
|
696
|
-
for (
|
|
698
|
+
for (auto k = j; k < b.size(); k++) {
|
|
697
699
|
accu_i += rank_weight(b[k] - ai);
|
|
700
|
+
}
|
|
698
701
|
|
|
699
702
|
accu += rank_weight(ai) * accu_i;
|
|
700
703
|
}
|
|
@@ -882,14 +885,13 @@ void PolysemousTraining::optimize_ranking(
|
|
|
882
885
|
|
|
883
886
|
double t0 = getmillisecs();
|
|
884
887
|
|
|
885
|
-
PermutationObjective
|
|
888
|
+
std::unique_ptr<PermutationObjective> obj(new RankingScore2(
|
|
886
889
|
nbits,
|
|
887
890
|
nq,
|
|
888
891
|
nb,
|
|
889
892
|
codes.data(),
|
|
890
893
|
codes.data() + nq,
|
|
891
|
-
gt_distances.data());
|
|
892
|
-
ScopeDeleter1<PermutationObjective> del(obj);
|
|
894
|
+
gt_distances.data()));
|
|
893
895
|
|
|
894
896
|
if (verbose > 0) {
|
|
895
897
|
printf(" m=%d, nq=%zd, nb=%zd, initialize RankingScore "
|
|
@@ -900,7 +902,7 @@ void PolysemousTraining::optimize_ranking(
|
|
|
900
902
|
getmillisecs() - t0);
|
|
901
903
|
}
|
|
902
904
|
|
|
903
|
-
SimulatedAnnealingOptimizer optim(obj, *this);
|
|
905
|
+
SimulatedAnnealingOptimizer optim(obj.get(), *this);
|
|
904
906
|
|
|
905
907
|
if (log_pattern.size()) {
|
|
906
908
|
char fname[256];
|
|
@@ -61,6 +61,7 @@ void ProductQuantizer::set_derived_values() {
|
|
|
61
61
|
"The dimension of the vector (d) should be a multiple of the number of subquantizers (M)");
|
|
62
62
|
dsub = d / M;
|
|
63
63
|
code_size = (nbits * M + 7) / 8;
|
|
64
|
+
FAISS_THROW_IF_MSG(nbits > 24, "nbits larger than 24 is not practical.");
|
|
64
65
|
ksub = 1 << nbits;
|
|
65
66
|
centroids.resize(d * ksub);
|
|
66
67
|
verbose = false;
|
|
@@ -135,11 +136,10 @@ void ProductQuantizer::train(size_t n, const float* x) {
|
|
|
135
136
|
}
|
|
136
137
|
}
|
|
137
138
|
|
|
138
|
-
float
|
|
139
|
-
ScopeDeleter<float> del(xslice);
|
|
139
|
+
std::unique_ptr<float[]> xslice(new float[n * dsub]);
|
|
140
140
|
for (int m = 0; m < M; m++) {
|
|
141
141
|
for (int j = 0; j < n; j++)
|
|
142
|
-
memcpy(xslice + j * dsub,
|
|
142
|
+
memcpy(xslice.get() + j * dsub,
|
|
143
143
|
x + j * d + m * dsub,
|
|
144
144
|
dsub * sizeof(float));
|
|
145
145
|
|
|
@@ -153,11 +153,19 @@ void ProductQuantizer::train(size_t n, const float* x) {
|
|
|
153
153
|
switch (final_train_type) {
|
|
154
154
|
case Train_hypercube:
|
|
155
155
|
init_hypercube(
|
|
156
|
-
dsub,
|
|
156
|
+
dsub,
|
|
157
|
+
nbits,
|
|
158
|
+
n,
|
|
159
|
+
xslice.get(),
|
|
160
|
+
clus.centroids.data());
|
|
157
161
|
break;
|
|
158
162
|
case Train_hypercube_pca:
|
|
159
163
|
init_hypercube_pca(
|
|
160
|
-
dsub,
|
|
164
|
+
dsub,
|
|
165
|
+
nbits,
|
|
166
|
+
n,
|
|
167
|
+
xslice.get(),
|
|
168
|
+
clus.centroids.data());
|
|
161
169
|
break;
|
|
162
170
|
case Train_hot_start:
|
|
163
171
|
memcpy(clus.centroids.data(),
|
|
@@ -172,7 +180,7 @@ void ProductQuantizer::train(size_t n, const float* x) {
|
|
|
172
180
|
printf("Training PQ slice %d/%zd\n", m, M);
|
|
173
181
|
}
|
|
174
182
|
IndexFlatL2 index(dsub);
|
|
175
|
-
clus.train(n, xslice, assign_index ? *assign_index : index);
|
|
183
|
+
clus.train(n, xslice.get(), assign_index ? *assign_index : index);
|
|
176
184
|
set_params(clus.centroids.data(), m);
|
|
177
185
|
}
|
|
178
186
|
|
|
@@ -306,7 +314,8 @@ void ProductQuantizer::decode(const uint8_t* code, float* x) const {
|
|
|
306
314
|
}
|
|
307
315
|
|
|
308
316
|
void ProductQuantizer::decode(const uint8_t* code, float* x, size_t n) const {
|
|
309
|
-
|
|
317
|
+
#pragma omp parallel for if (n > 100)
|
|
318
|
+
for (int64_t i = 0; i < n; i++) {
|
|
310
319
|
this->decode(code + code_size * i, x + d * i);
|
|
311
320
|
}
|
|
312
321
|
}
|
|
@@ -342,21 +351,20 @@ void ProductQuantizer::compute_codes_with_assign_index(
|
|
|
342
351
|
assign_index->reset();
|
|
343
352
|
assign_index->add(ksub, get_centroids(m, 0));
|
|
344
353
|
size_t bs = 65536;
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
idx_t
|
|
348
|
-
ScopeDeleter<idx_t> del2(assign);
|
|
354
|
+
|
|
355
|
+
std::unique_ptr<float[]> xslice(new float[bs * dsub]);
|
|
356
|
+
std::unique_ptr<idx_t[]> assign(new idx_t[bs]);
|
|
349
357
|
|
|
350
358
|
for (size_t i0 = 0; i0 < n; i0 += bs) {
|
|
351
359
|
size_t i1 = std::min(i0 + bs, n);
|
|
352
360
|
|
|
353
361
|
for (size_t i = i0; i < i1; i++) {
|
|
354
|
-
memcpy(xslice + (i - i0) * dsub,
|
|
362
|
+
memcpy(xslice.get() + (i - i0) * dsub,
|
|
355
363
|
x + i * d + m * dsub,
|
|
356
364
|
dsub * sizeof(float));
|
|
357
365
|
}
|
|
358
366
|
|
|
359
|
-
assign_index->assign(i1 - i0, xslice, assign);
|
|
367
|
+
assign_index->assign(i1 - i0, xslice.get(), assign.get());
|
|
360
368
|
|
|
361
369
|
if (nbits == 8) {
|
|
362
370
|
uint8_t* c = codes + code_size * i0 + m;
|
|
@@ -405,15 +413,14 @@ void ProductQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
|
|
|
405
413
|
for (int64_t i = 0; i < n; i++)
|
|
406
414
|
compute_code(x + i * d, codes + i * code_size);
|
|
407
415
|
|
|
408
|
-
} else { //
|
|
409
|
-
float
|
|
410
|
-
|
|
411
|
-
compute_distance_tables(n, x, dis_tables);
|
|
416
|
+
} else { // worthwhile to use BLAS
|
|
417
|
+
std::unique_ptr<float[]> dis_tables(new float[n * ksub * M]);
|
|
418
|
+
compute_distance_tables(n, x, dis_tables.get());
|
|
412
419
|
|
|
413
420
|
#pragma omp parallel for
|
|
414
421
|
for (int64_t i = 0; i < n; i++) {
|
|
415
422
|
uint8_t* code = codes + i * code_size;
|
|
416
|
-
const float* tab = dis_tables + i * ksub * M;
|
|
423
|
+
const float* tab = dis_tables.get() + i * ksub * M;
|
|
417
424
|
compute_code_from_distance_table(tab, code);
|
|
418
425
|
}
|
|
419
426
|
}
|
|
@@ -774,10 +781,6 @@ void ProductQuantizer::search_ip(
|
|
|
774
781
|
init_finalize_heap);
|
|
775
782
|
}
|
|
776
783
|
|
|
777
|
-
static float sqr(float x) {
|
|
778
|
-
return x * x;
|
|
779
|
-
}
|
|
780
|
-
|
|
781
784
|
void ProductQuantizer::compute_sdc_table() {
|
|
782
785
|
sdc_table.resize(M * ksub * ksub);
|
|
783
786
|
|