faiss 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -19,6 +19,7 @@
|
|
19
19
|
#include <faiss/InvertedLists.h>
|
20
20
|
#include <faiss/DirectMap.h>
|
21
21
|
#include <faiss/Clustering.h>
|
22
|
+
#include <faiss/impl/platform_macros.h>
|
22
23
|
#include <faiss/utils/Heap.h>
|
23
24
|
|
24
25
|
|
@@ -68,6 +69,7 @@ struct Level1Quantizer {
|
|
68
69
|
struct IVFSearchParameters {
|
69
70
|
size_t nprobe; ///< number of probes at query time
|
70
71
|
size_t max_codes; ///< max nb of codes to visit to do a query
|
72
|
+
IVFSearchParameters(): nprobe(1), max_codes(0) {}
|
71
73
|
virtual ~IVFSearchParameters () {}
|
72
74
|
};
|
73
75
|
|
@@ -96,7 +98,7 @@ struct InvertedListScanner;
|
|
96
98
|
* the distance estimation from the query to databse vectors.
|
97
99
|
*/
|
98
100
|
struct IndexIVF: Index, Level1Quantizer {
|
99
|
-
///
|
101
|
+
/// Access to the actual data
|
100
102
|
InvertedLists *invlists;
|
101
103
|
bool own_invlists;
|
102
104
|
|
@@ -194,7 +196,9 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
194
196
|
|
195
197
|
void range_search_preassigned(idx_t nx, const float *x, float radius,
|
196
198
|
const idx_t *keys, const float *coarse_dis,
|
197
|
-
RangeSearchResult *result
|
199
|
+
RangeSearchResult *result,
|
200
|
+
bool store_pairs=false,
|
201
|
+
const IVFSearchParameters *params=nullptr) const;
|
198
202
|
|
199
203
|
/// get a scanner for this index (store_pairs means ignore labels)
|
200
204
|
virtual InvertedListScanner *get_InvertedListScanner (
|
@@ -363,7 +367,7 @@ struct IndexIVFStats {
|
|
363
367
|
};
|
364
368
|
|
365
369
|
// global var that collects them all
|
366
|
-
extern IndexIVFStats indexIVF_stats;
|
370
|
+
FAISS_API extern IndexIVFStats indexIVF_stats;
|
367
371
|
|
368
372
|
|
369
373
|
} // namespace faiss
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexIVFFlat.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cstdio>
|
13
14
|
|
14
15
|
#include <faiss/IndexFlat.h>
|
@@ -75,7 +76,7 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
|
|
75
76
|
}
|
76
77
|
|
77
78
|
if (verbose) {
|
78
|
-
printf("IndexIVFFlat::add_core: added %
|
79
|
+
printf("IndexIVFFlat::add_core: added %" PRId64 " / %" PRId64 " vectors\n",
|
79
80
|
n_add, n);
|
80
81
|
}
|
81
82
|
ntotal += n;
|
@@ -247,8 +248,8 @@ void IndexIVFFlatDedup::train(idx_t n, const float* x)
|
|
247
248
|
}
|
248
249
|
}
|
249
250
|
if (verbose) {
|
250
|
-
printf ("IndexIVFFlatDedup::train: train on %
|
251
|
-
"(was %
|
251
|
+
printf ("IndexIVFFlatDedup::train: train on %" PRId64 " points after dedup "
|
252
|
+
"(was %" PRId64 " points)\n", n2, n);
|
252
253
|
}
|
253
254
|
IndexIVFFlat::train (n2, x2);
|
254
255
|
}
|
@@ -303,8 +304,8 @@ void IndexIVFFlatDedup::add_with_ids(
|
|
303
304
|
n_add++;
|
304
305
|
}
|
305
306
|
if (verbose) {
|
306
|
-
printf("IndexIVFFlat::add_with_ids: added %
|
307
|
-
" (out of which %
|
307
|
+
printf("IndexIVFFlat::add_with_ids: added %" PRId64 " / %" PRId64 " vectors"
|
308
|
+
" (out of which %" PRId64 " are duplicates)\n",
|
308
309
|
n_add, na, n_dup);
|
309
310
|
}
|
310
311
|
ntotal += n_add;
|
File without changes
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexIVFPQ.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cmath>
|
13
14
|
#include <cstdio>
|
14
15
|
#include <cassert>
|
@@ -91,7 +92,7 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
|
|
91
92
|
trainset = x;
|
92
93
|
}
|
93
94
|
if (verbose)
|
94
|
-
printf ("training %zdx%zd product quantizer on %
|
95
|
+
printf ("training %zdx%zd product quantizer on %" PRId64 " vectors in %dD\n",
|
95
96
|
pq.M, pq.ksub, n, d);
|
96
97
|
pq.verbose = verbose;
|
97
98
|
pq.train (n, trainset);
|
@@ -140,9 +141,9 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
|
|
140
141
|
void IndexIVFPQ::encode (idx_t key, const float * x, uint8_t * code) const
|
141
142
|
{
|
142
143
|
if (by_residual) {
|
143
|
-
float residual_vec
|
144
|
-
quantizer->compute_residual (x, residual_vec, key);
|
145
|
-
pq.compute_code (residual_vec, code);
|
144
|
+
std::vector<float> residual_vec(d);
|
145
|
+
quantizer->compute_residual (x, residual_vec.data(), key);
|
146
|
+
pq.compute_code (residual_vec.data(), code);
|
146
147
|
}
|
147
148
|
else pq.compute_code (x, code);
|
148
149
|
}
|
@@ -240,7 +241,7 @@ void IndexIVFPQ::sa_decode (idx_t n, const uint8_t *codes,
|
|
240
241
|
std::vector<float> residual (d);
|
241
242
|
|
242
243
|
#pragma omp for
|
243
|
-
for (
|
244
|
+
for (idx_t i = 0; i < n; i++) {
|
244
245
|
const uint8_t *code = codes + i * (code_size + coarse_size);
|
245
246
|
int64_t list_no = decode_listno (code);
|
246
247
|
float *xi = x + i * d;
|
@@ -265,7 +266,7 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
265
266
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
266
267
|
idx_t i1 = std::min(i0 + bs, n);
|
267
268
|
if (verbose) {
|
268
|
-
printf("IndexIVFPQ::add_core_o: adding %
|
269
|
+
printf("IndexIVFPQ::add_core_o: adding %" PRId64 ":%" PRId64 " / %" PRId64 "\n",
|
269
270
|
i0, i1, n);
|
270
271
|
}
|
271
272
|
add_core_o (i1 - i0, x + i0 * d,
|
@@ -341,7 +342,7 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
341
342
|
if(verbose) {
|
342
343
|
char comment[100] = {0};
|
343
344
|
if (n_ignore > 0)
|
344
|
-
snprintf (comment, 100, "(%
|
345
|
+
snprintf (comment, 100, "(%zd vectors ignored)", n_ignore);
|
345
346
|
printf(" add_core times: %.3f %.3f %.3f %s\n",
|
346
347
|
t1 - t0, t2 - t1, t3 - t2, comment);
|
347
348
|
}
|
@@ -425,7 +426,7 @@ void IndexIVFPQ::precompute_table ()
|
|
425
426
|
if (verbose) {
|
426
427
|
printf(
|
427
428
|
"IndexIVFPQ::precompute_table: not precomputing table, "
|
428
|
-
"it would be too big: %
|
429
|
+
"it would be too big: %zd bytes (max %zd)\n",
|
429
430
|
table_size, precomputed_table_max_bytes);
|
430
431
|
use_precomputed_table = 0;
|
431
432
|
}
|
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
#include <faiss/IndexIVF.h>
|
17
17
|
#include <faiss/IndexPQ.h>
|
18
|
+
#include <faiss/impl/platform_macros.h>
|
18
19
|
|
19
20
|
|
20
21
|
namespace faiss {
|
@@ -22,6 +23,7 @@ namespace faiss {
|
|
22
23
|
struct IVFPQSearchParameters: IVFSearchParameters {
|
23
24
|
size_t scan_table_threshold; ///< use table computation or on-the-fly?
|
24
25
|
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
26
|
+
IVFPQSearchParameters (): scan_table_threshold(0), polysemous_ht(0) {}
|
25
27
|
~IVFPQSearchParameters () {}
|
26
28
|
};
|
27
29
|
|
@@ -29,7 +31,7 @@ struct IVFPQSearchParameters: IVFSearchParameters {
|
|
29
31
|
/** Inverted file with Product Quantizer encoding. Each residual
|
30
32
|
* vector is encoded as a product quantizer code.
|
31
33
|
*/
|
32
|
-
struct IndexIVFPQ: IndexIVF {
|
34
|
+
struct FAISS_API IndexIVFPQ: IndexIVF {
|
33
35
|
bool by_residual; ///< Encode residual or plain vector?
|
34
36
|
|
35
37
|
ProductQuantizer pq; ///< produces the codes
|
@@ -149,7 +151,7 @@ struct IndexIVFPQStats {
|
|
149
151
|
};
|
150
152
|
|
151
153
|
// global var that collects them all
|
152
|
-
extern IndexIVFPQStats indexIVFPQ_stats;
|
154
|
+
FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
|
153
155
|
|
154
156
|
|
155
157
|
|
@@ -9,6 +9,8 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexIVFPQR.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
13
|
+
|
12
14
|
#include <faiss/utils/Heap.h>
|
13
15
|
#include <faiss/utils/utils.h>
|
14
16
|
#include <faiss/utils/distances.h>
|
@@ -59,7 +61,7 @@ void IndexIVFPQR::train_residual (idx_t n, const float *x)
|
|
59
61
|
train_residual_o (n, x, residual_2);
|
60
62
|
|
61
63
|
if (verbose)
|
62
|
-
printf ("training %zdx%zd 2nd level PQ quantizer on %
|
64
|
+
printf ("training %zdx%zd 2nd level PQ quantizer on %" PRId64 " %dD-vectors\n",
|
63
65
|
refine_pq.M, refine_pq.ksub, n, d);
|
64
66
|
|
65
67
|
refine_pq.cp.max_points_per_centroid = 1000;
|
File without changes
|
@@ -178,7 +178,7 @@ void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
|
|
178
178
|
|
179
179
|
// each thread takes care of a subset of lists
|
180
180
|
#pragma omp for
|
181
|
-
for (
|
181
|
+
for (idx_t i = 0; i < n; i++) {
|
182
182
|
int64_t list_no = list_nos [i];
|
183
183
|
|
184
184
|
if (list_no >= 0) {
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexPQ.h>
|
11
11
|
|
12
|
-
|
12
|
+
#include <cinttypes>
|
13
13
|
#include <cstddef>
|
14
14
|
#include <cstring>
|
15
15
|
#include <cstdio>
|
@@ -59,7 +59,7 @@ void IndexPQ::train (idx_t n, const float *x)
|
|
59
59
|
if (ntrain_perm > n / 4)
|
60
60
|
ntrain_perm = n / 4;
|
61
61
|
if (verbose) {
|
62
|
-
printf ("PQ training on %
|
62
|
+
printf ("PQ training on %" PRId64 " points, remains %" PRId64 " points: "
|
63
63
|
"training polysemous on %s\n",
|
64
64
|
n - ntrain_perm, ntrain_perm,
|
65
65
|
ntrain_perm == 0 ? "centroids" : "these");
|
@@ -522,8 +522,8 @@ void IndexPQ::hamming_distance_histogram (idx_t n, const float *x,
|
|
522
522
|
hamdis_t *distances = new hamdis_t [nb * bs];
|
523
523
|
ScopeDeleter<hamdis_t> del (distances);
|
524
524
|
#pragma omp for
|
525
|
-
for (
|
526
|
-
// printf ("dis stats: %
|
525
|
+
for (idx_t q0 = 0; q0 < n; q0 += bs) {
|
526
|
+
// printf ("dis stats: %zd/%zd\n", q0, n);
|
527
527
|
size_t q1 = q0 + bs;
|
528
528
|
if (q1 > n) q1 = n;
|
529
529
|
|
@@ -835,7 +835,7 @@ struct MinSumK {
|
|
835
835
|
x += ldx;
|
836
836
|
}
|
837
837
|
|
838
|
-
{ //
|
838
|
+
{ // initial result: take min for all elements
|
839
839
|
T sum = 0;
|
840
840
|
terms[0] = 0;
|
841
841
|
mark_seen (0);
|
@@ -955,7 +955,7 @@ void MultiIndexQuantizer::search (idx_t n, const float *x, idx_t k,
|
|
955
955
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
956
956
|
idx_t i1 = std::min(i0 + bs, n);
|
957
957
|
if (verbose) {
|
958
|
-
printf("MultiIndexQuantizer::search: %
|
958
|
+
printf("MultiIndexQuantizer::search: %" PRId64 ":%" PRId64 " / %" PRId64 "\n",
|
959
959
|
i0, i1, n);
|
960
960
|
}
|
961
961
|
search (i1 - i0, x + i0 * d, k,
|
@@ -17,6 +17,8 @@
|
|
17
17
|
#include <faiss/Index.h>
|
18
18
|
#include <faiss/impl/ProductQuantizer.h>
|
19
19
|
#include <faiss/impl/PolysemousTraining.h>
|
20
|
+
#include <faiss/impl/platform_macros.h>
|
21
|
+
|
20
22
|
|
21
23
|
namespace faiss {
|
22
24
|
|
@@ -138,7 +140,7 @@ struct IndexPQStats {
|
|
138
140
|
void reset ();
|
139
141
|
};
|
140
142
|
|
141
|
-
extern IndexPQStats indexPQ_stats;
|
143
|
+
FAISS_API extern IndexPQStats indexPQ_stats;
|
142
144
|
|
143
145
|
|
144
146
|
|
File without changes
|
File without changes
|
@@ -5,6 +5,8 @@
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
7
|
|
8
|
+
#include <cinttypes>
|
9
|
+
|
8
10
|
#include <faiss/IndexReplicas.h>
|
9
11
|
#include <faiss/impl/FaissAssert.h>
|
10
12
|
|
@@ -36,33 +38,65 @@ IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
|
|
36
38
|
FAISS_THROW_IF_NOT_FMT(index->ntotal == existing->ntotal,
|
37
39
|
"IndexReplicas: newly added index does "
|
38
40
|
"not have same number of vectors as prior index; "
|
39
|
-
"prior index has %
|
41
|
+
"prior index has %" PRId64 " vectors, new index has %" PRId64,
|
40
42
|
existing->ntotal, index->ntotal);
|
41
43
|
|
42
44
|
FAISS_THROW_IF_NOT_MSG(index->is_trained == existing->is_trained,
|
43
45
|
"IndexReplicas: newly added index does "
|
44
46
|
"not have same train status as prior index");
|
47
|
+
|
48
|
+
FAISS_THROW_IF_NOT_MSG(index->d == existing->d,
|
49
|
+
"IndexReplicas: newly added index does "
|
50
|
+
"not have same dimension as prior index");
|
45
51
|
} else {
|
46
|
-
|
47
|
-
// (dimension is handled in ThreadedIndex)
|
48
|
-
this->ntotal = index->ntotal;
|
49
|
-
this->verbose = index->verbose;
|
50
|
-
this->is_trained = index->is_trained;
|
51
|
-
this->metric_type = index->metric_type;
|
52
|
+
syncWithSubIndexes();
|
52
53
|
}
|
53
54
|
}
|
54
55
|
|
56
|
+
template <typename IndexT>
|
57
|
+
void
|
58
|
+
IndexReplicasTemplate<IndexT>::onAfterRemoveIndex(IndexT* index) {
|
59
|
+
syncWithSubIndexes();
|
60
|
+
}
|
61
|
+
|
55
62
|
template <typename IndexT>
|
56
63
|
void
|
57
64
|
IndexReplicasTemplate<IndexT>::train(idx_t n, const component_t* x) {
|
58
|
-
|
65
|
+
auto fn =
|
66
|
+
[n, x](int i, IndexT* index) {
|
67
|
+
if (index->verbose) {
|
68
|
+
printf("begin train replica %d on %" PRId64 " points\n", i, n);
|
69
|
+
}
|
70
|
+
|
71
|
+
index->train(n, x);
|
72
|
+
|
73
|
+
if (index->verbose) {
|
74
|
+
printf("end train replica %d\n", i);
|
75
|
+
}
|
76
|
+
};
|
77
|
+
|
78
|
+
this->runOnIndex(fn);
|
79
|
+
syncWithSubIndexes();
|
59
80
|
}
|
60
81
|
|
61
82
|
template <typename IndexT>
|
62
83
|
void
|
63
84
|
IndexReplicasTemplate<IndexT>::add(idx_t n, const component_t* x) {
|
64
|
-
|
65
|
-
|
85
|
+
auto fn =
|
86
|
+
[n, x](int i, IndexT* index) {
|
87
|
+
if (index->verbose) {
|
88
|
+
printf("begin add replica %d on %" PRId64 " points\n", i, n);
|
89
|
+
}
|
90
|
+
|
91
|
+
index->add(n, x);
|
92
|
+
|
93
|
+
if (index->verbose) {
|
94
|
+
printf("end add replica %d\n", i);
|
95
|
+
}
|
96
|
+
};
|
97
|
+
|
98
|
+
this->runOnIndex(fn);
|
99
|
+
syncWithSubIndexes();
|
66
100
|
}
|
67
101
|
|
68
102
|
template <typename IndexT>
|
@@ -105,17 +139,75 @@ IndexReplicasTemplate<IndexT>::search(idx_t n,
|
|
105
139
|
if (base < n) {
|
106
140
|
auto numForIndex = std::min(queriesPerIndex, n - base);
|
107
141
|
|
142
|
+
if (index->verbose) {
|
143
|
+
printf("begin search replica %d on %" PRId64 " points\n",
|
144
|
+
i, numForIndex);
|
145
|
+
}
|
146
|
+
|
108
147
|
index->search(numForIndex,
|
109
148
|
x + base * componentsPerVec,
|
110
149
|
k,
|
111
150
|
distances + base * k,
|
112
151
|
labels + base * k);
|
152
|
+
|
153
|
+
if (index->verbose) {
|
154
|
+
printf("end search replica %d\n", i);
|
155
|
+
}
|
113
156
|
}
|
114
157
|
};
|
115
158
|
|
116
159
|
this->runOnIndex(fn);
|
117
160
|
}
|
118
161
|
|
162
|
+
// FIXME: assumes that nothing is currently running on the sub-indexes, which is
|
163
|
+
// true with the normal API, but should use the runOnIndex API instead
|
164
|
+
template <typename IndexT>
|
165
|
+
void
|
166
|
+
IndexReplicasTemplate<IndexT>::syncWithSubIndexes() {
|
167
|
+
if (!this->count()) {
|
168
|
+
this->is_trained = false;
|
169
|
+
this->ntotal = 0;
|
170
|
+
|
171
|
+
return;
|
172
|
+
}
|
173
|
+
|
174
|
+
auto firstIndex = this->at(0);
|
175
|
+
this->metric_type = firstIndex->metric_type;
|
176
|
+
this->is_trained = firstIndex->is_trained;
|
177
|
+
this->ntotal = firstIndex->ntotal;
|
178
|
+
|
179
|
+
for (int i = 1; i < this->count(); ++i) {
|
180
|
+
auto index = this->at(i);
|
181
|
+
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
|
182
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
183
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
184
|
+
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
// No metric_type for IndexBinary
|
189
|
+
template <>
|
190
|
+
void
|
191
|
+
IndexReplicasTemplate<IndexBinary>::syncWithSubIndexes() {
|
192
|
+
if (!this->count()) {
|
193
|
+
this->is_trained = false;
|
194
|
+
this->ntotal = 0;
|
195
|
+
|
196
|
+
return;
|
197
|
+
}
|
198
|
+
|
199
|
+
auto firstIndex = this->at(0);
|
200
|
+
this->is_trained = firstIndex->is_trained;
|
201
|
+
this->ntotal = firstIndex->ntotal;
|
202
|
+
|
203
|
+
for (int i = 1; i < this->count(); ++i) {
|
204
|
+
auto index = this->at(i);
|
205
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
206
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
207
|
+
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
119
211
|
// explicit instantiations
|
120
212
|
template struct IndexReplicasTemplate<Index>;
|
121
213
|
template struct IndexReplicasTemplate<IndexBinary>;
|