faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -19,6 +19,7 @@
|
|
19
19
|
#include <faiss/InvertedLists.h>
|
20
20
|
#include <faiss/DirectMap.h>
|
21
21
|
#include <faiss/Clustering.h>
|
22
|
+
#include <faiss/impl/platform_macros.h>
|
22
23
|
#include <faiss/utils/Heap.h>
|
23
24
|
|
24
25
|
|
@@ -68,6 +69,7 @@ struct Level1Quantizer {
|
|
68
69
|
struct IVFSearchParameters {
|
69
70
|
size_t nprobe; ///< number of probes at query time
|
70
71
|
size_t max_codes; ///< max nb of codes to visit to do a query
|
72
|
+
IVFSearchParameters(): nprobe(1), max_codes(0) {}
|
71
73
|
virtual ~IVFSearchParameters () {}
|
72
74
|
};
|
73
75
|
|
@@ -96,7 +98,7 @@ struct InvertedListScanner;
|
|
96
98
|
* the distance estimation from the query to databse vectors.
|
97
99
|
*/
|
98
100
|
struct IndexIVF: Index, Level1Quantizer {
|
99
|
-
///
|
101
|
+
/// Access to the actual data
|
100
102
|
InvertedLists *invlists;
|
101
103
|
bool own_invlists;
|
102
104
|
|
@@ -194,7 +196,9 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
194
196
|
|
195
197
|
void range_search_preassigned(idx_t nx, const float *x, float radius,
|
196
198
|
const idx_t *keys, const float *coarse_dis,
|
197
|
-
RangeSearchResult *result
|
199
|
+
RangeSearchResult *result,
|
200
|
+
bool store_pairs=false,
|
201
|
+
const IVFSearchParameters *params=nullptr) const;
|
198
202
|
|
199
203
|
/// get a scanner for this index (store_pairs means ignore labels)
|
200
204
|
virtual InvertedListScanner *get_InvertedListScanner (
|
@@ -363,7 +367,7 @@ struct IndexIVFStats {
|
|
363
367
|
};
|
364
368
|
|
365
369
|
// global var that collects them all
|
366
|
-
extern IndexIVFStats indexIVF_stats;
|
370
|
+
FAISS_API extern IndexIVFStats indexIVF_stats;
|
367
371
|
|
368
372
|
|
369
373
|
} // namespace faiss
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexIVFFlat.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cstdio>
|
13
14
|
|
14
15
|
#include <faiss/IndexFlat.h>
|
@@ -75,7 +76,7 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
|
|
75
76
|
}
|
76
77
|
|
77
78
|
if (verbose) {
|
78
|
-
printf("IndexIVFFlat::add_core: added %
|
79
|
+
printf("IndexIVFFlat::add_core: added %" PRId64 " / %" PRId64 " vectors\n",
|
79
80
|
n_add, n);
|
80
81
|
}
|
81
82
|
ntotal += n;
|
@@ -247,8 +248,8 @@ void IndexIVFFlatDedup::train(idx_t n, const float* x)
|
|
247
248
|
}
|
248
249
|
}
|
249
250
|
if (verbose) {
|
250
|
-
printf ("IndexIVFFlatDedup::train: train on %
|
251
|
-
"(was %
|
251
|
+
printf ("IndexIVFFlatDedup::train: train on %" PRId64 " points after dedup "
|
252
|
+
"(was %" PRId64 " points)\n", n2, n);
|
252
253
|
}
|
253
254
|
IndexIVFFlat::train (n2, x2);
|
254
255
|
}
|
@@ -303,8 +304,8 @@ void IndexIVFFlatDedup::add_with_ids(
|
|
303
304
|
n_add++;
|
304
305
|
}
|
305
306
|
if (verbose) {
|
306
|
-
printf("IndexIVFFlat::add_with_ids: added %
|
307
|
-
" (out of which %
|
307
|
+
printf("IndexIVFFlat::add_with_ids: added %" PRId64 " / %" PRId64 " vectors"
|
308
|
+
" (out of which %" PRId64 " are duplicates)\n",
|
308
309
|
n_add, na, n_dup);
|
309
310
|
}
|
310
311
|
ntotal += n_add;
|
File without changes
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexIVFPQ.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cmath>
|
13
14
|
#include <cstdio>
|
14
15
|
#include <cassert>
|
@@ -91,7 +92,7 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
|
|
91
92
|
trainset = x;
|
92
93
|
}
|
93
94
|
if (verbose)
|
94
|
-
printf ("training %zdx%zd product quantizer on %
|
95
|
+
printf ("training %zdx%zd product quantizer on %" PRId64 " vectors in %dD\n",
|
95
96
|
pq.M, pq.ksub, n, d);
|
96
97
|
pq.verbose = verbose;
|
97
98
|
pq.train (n, trainset);
|
@@ -140,9 +141,9 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
|
|
140
141
|
void IndexIVFPQ::encode (idx_t key, const float * x, uint8_t * code) const
|
141
142
|
{
|
142
143
|
if (by_residual) {
|
143
|
-
float residual_vec
|
144
|
-
quantizer->compute_residual (x, residual_vec, key);
|
145
|
-
pq.compute_code (residual_vec, code);
|
144
|
+
std::vector<float> residual_vec(d);
|
145
|
+
quantizer->compute_residual (x, residual_vec.data(), key);
|
146
|
+
pq.compute_code (residual_vec.data(), code);
|
146
147
|
}
|
147
148
|
else pq.compute_code (x, code);
|
148
149
|
}
|
@@ -240,7 +241,7 @@ void IndexIVFPQ::sa_decode (idx_t n, const uint8_t *codes,
|
|
240
241
|
std::vector<float> residual (d);
|
241
242
|
|
242
243
|
#pragma omp for
|
243
|
-
for (
|
244
|
+
for (idx_t i = 0; i < n; i++) {
|
244
245
|
const uint8_t *code = codes + i * (code_size + coarse_size);
|
245
246
|
int64_t list_no = decode_listno (code);
|
246
247
|
float *xi = x + i * d;
|
@@ -265,7 +266,7 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
265
266
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
266
267
|
idx_t i1 = std::min(i0 + bs, n);
|
267
268
|
if (verbose) {
|
268
|
-
printf("IndexIVFPQ::add_core_o: adding %
|
269
|
+
printf("IndexIVFPQ::add_core_o: adding %" PRId64 ":%" PRId64 " / %" PRId64 "\n",
|
269
270
|
i0, i1, n);
|
270
271
|
}
|
271
272
|
add_core_o (i1 - i0, x + i0 * d,
|
@@ -341,7 +342,7 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
341
342
|
if(verbose) {
|
342
343
|
char comment[100] = {0};
|
343
344
|
if (n_ignore > 0)
|
344
|
-
snprintf (comment, 100, "(%
|
345
|
+
snprintf (comment, 100, "(%zd vectors ignored)", n_ignore);
|
345
346
|
printf(" add_core times: %.3f %.3f %.3f %s\n",
|
346
347
|
t1 - t0, t2 - t1, t3 - t2, comment);
|
347
348
|
}
|
@@ -425,7 +426,7 @@ void IndexIVFPQ::precompute_table ()
|
|
425
426
|
if (verbose) {
|
426
427
|
printf(
|
427
428
|
"IndexIVFPQ::precompute_table: not precomputing table, "
|
428
|
-
"it would be too big: %
|
429
|
+
"it would be too big: %zd bytes (max %zd)\n",
|
429
430
|
table_size, precomputed_table_max_bytes);
|
430
431
|
use_precomputed_table = 0;
|
431
432
|
}
|
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
#include <faiss/IndexIVF.h>
|
17
17
|
#include <faiss/IndexPQ.h>
|
18
|
+
#include <faiss/impl/platform_macros.h>
|
18
19
|
|
19
20
|
|
20
21
|
namespace faiss {
|
@@ -22,6 +23,7 @@ namespace faiss {
|
|
22
23
|
struct IVFPQSearchParameters: IVFSearchParameters {
|
23
24
|
size_t scan_table_threshold; ///< use table computation or on-the-fly?
|
24
25
|
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
26
|
+
IVFPQSearchParameters (): scan_table_threshold(0), polysemous_ht(0) {}
|
25
27
|
~IVFPQSearchParameters () {}
|
26
28
|
};
|
27
29
|
|
@@ -29,7 +31,7 @@ struct IVFPQSearchParameters: IVFSearchParameters {
|
|
29
31
|
/** Inverted file with Product Quantizer encoding. Each residual
|
30
32
|
* vector is encoded as a product quantizer code.
|
31
33
|
*/
|
32
|
-
struct IndexIVFPQ: IndexIVF {
|
34
|
+
struct FAISS_API IndexIVFPQ: IndexIVF {
|
33
35
|
bool by_residual; ///< Encode residual or plain vector?
|
34
36
|
|
35
37
|
ProductQuantizer pq; ///< produces the codes
|
@@ -149,7 +151,7 @@ struct IndexIVFPQStats {
|
|
149
151
|
};
|
150
152
|
|
151
153
|
// global var that collects them all
|
152
|
-
extern IndexIVFPQStats indexIVFPQ_stats;
|
154
|
+
FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
|
153
155
|
|
154
156
|
|
155
157
|
|
@@ -9,6 +9,8 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexIVFPQR.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
13
|
+
|
12
14
|
#include <faiss/utils/Heap.h>
|
13
15
|
#include <faiss/utils/utils.h>
|
14
16
|
#include <faiss/utils/distances.h>
|
@@ -59,7 +61,7 @@ void IndexIVFPQR::train_residual (idx_t n, const float *x)
|
|
59
61
|
train_residual_o (n, x, residual_2);
|
60
62
|
|
61
63
|
if (verbose)
|
62
|
-
printf ("training %zdx%zd 2nd level PQ quantizer on %
|
64
|
+
printf ("training %zdx%zd 2nd level PQ quantizer on %" PRId64 " %dD-vectors\n",
|
63
65
|
refine_pq.M, refine_pq.ksub, n, d);
|
64
66
|
|
65
67
|
refine_pq.cp.max_points_per_centroid = 1000;
|
File without changes
|
@@ -178,7 +178,7 @@ void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
|
|
178
178
|
|
179
179
|
// each thread takes care of a subset of lists
|
180
180
|
#pragma omp for
|
181
|
-
for (
|
181
|
+
for (idx_t i = 0; i < n; i++) {
|
182
182
|
int64_t list_no = list_nos [i];
|
183
183
|
|
184
184
|
if (list_no >= 0) {
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexPQ.h>
|
11
11
|
|
12
|
-
|
12
|
+
#include <cinttypes>
|
13
13
|
#include <cstddef>
|
14
14
|
#include <cstring>
|
15
15
|
#include <cstdio>
|
@@ -59,7 +59,7 @@ void IndexPQ::train (idx_t n, const float *x)
|
|
59
59
|
if (ntrain_perm > n / 4)
|
60
60
|
ntrain_perm = n / 4;
|
61
61
|
if (verbose) {
|
62
|
-
printf ("PQ training on %
|
62
|
+
printf ("PQ training on %" PRId64 " points, remains %" PRId64 " points: "
|
63
63
|
"training polysemous on %s\n",
|
64
64
|
n - ntrain_perm, ntrain_perm,
|
65
65
|
ntrain_perm == 0 ? "centroids" : "these");
|
@@ -522,8 +522,8 @@ void IndexPQ::hamming_distance_histogram (idx_t n, const float *x,
|
|
522
522
|
hamdis_t *distances = new hamdis_t [nb * bs];
|
523
523
|
ScopeDeleter<hamdis_t> del (distances);
|
524
524
|
#pragma omp for
|
525
|
-
for (
|
526
|
-
// printf ("dis stats: %
|
525
|
+
for (idx_t q0 = 0; q0 < n; q0 += bs) {
|
526
|
+
// printf ("dis stats: %zd/%zd\n", q0, n);
|
527
527
|
size_t q1 = q0 + bs;
|
528
528
|
if (q1 > n) q1 = n;
|
529
529
|
|
@@ -835,7 +835,7 @@ struct MinSumK {
|
|
835
835
|
x += ldx;
|
836
836
|
}
|
837
837
|
|
838
|
-
{ //
|
838
|
+
{ // initial result: take min for all elements
|
839
839
|
T sum = 0;
|
840
840
|
terms[0] = 0;
|
841
841
|
mark_seen (0);
|
@@ -955,7 +955,7 @@ void MultiIndexQuantizer::search (idx_t n, const float *x, idx_t k,
|
|
955
955
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
956
956
|
idx_t i1 = std::min(i0 + bs, n);
|
957
957
|
if (verbose) {
|
958
|
-
printf("MultiIndexQuantizer::search: %
|
958
|
+
printf("MultiIndexQuantizer::search: %" PRId64 ":%" PRId64 " / %" PRId64 "\n",
|
959
959
|
i0, i1, n);
|
960
960
|
}
|
961
961
|
search (i1 - i0, x + i0 * d, k,
|
@@ -17,6 +17,8 @@
|
|
17
17
|
#include <faiss/Index.h>
|
18
18
|
#include <faiss/impl/ProductQuantizer.h>
|
19
19
|
#include <faiss/impl/PolysemousTraining.h>
|
20
|
+
#include <faiss/impl/platform_macros.h>
|
21
|
+
|
20
22
|
|
21
23
|
namespace faiss {
|
22
24
|
|
@@ -138,7 +140,7 @@ struct IndexPQStats {
|
|
138
140
|
void reset ();
|
139
141
|
};
|
140
142
|
|
141
|
-
extern IndexPQStats indexPQ_stats;
|
143
|
+
FAISS_API extern IndexPQStats indexPQ_stats;
|
142
144
|
|
143
145
|
|
144
146
|
|
File without changes
|
File without changes
|
@@ -5,6 +5,8 @@
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
7
|
|
8
|
+
#include <cinttypes>
|
9
|
+
|
8
10
|
#include <faiss/IndexReplicas.h>
|
9
11
|
#include <faiss/impl/FaissAssert.h>
|
10
12
|
|
@@ -36,33 +38,65 @@ IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
|
|
36
38
|
FAISS_THROW_IF_NOT_FMT(index->ntotal == existing->ntotal,
|
37
39
|
"IndexReplicas: newly added index does "
|
38
40
|
"not have same number of vectors as prior index; "
|
39
|
-
"prior index has %
|
41
|
+
"prior index has %" PRId64 " vectors, new index has %" PRId64,
|
40
42
|
existing->ntotal, index->ntotal);
|
41
43
|
|
42
44
|
FAISS_THROW_IF_NOT_MSG(index->is_trained == existing->is_trained,
|
43
45
|
"IndexReplicas: newly added index does "
|
44
46
|
"not have same train status as prior index");
|
47
|
+
|
48
|
+
FAISS_THROW_IF_NOT_MSG(index->d == existing->d,
|
49
|
+
"IndexReplicas: newly added index does "
|
50
|
+
"not have same dimension as prior index");
|
45
51
|
} else {
|
46
|
-
|
47
|
-
// (dimension is handled in ThreadedIndex)
|
48
|
-
this->ntotal = index->ntotal;
|
49
|
-
this->verbose = index->verbose;
|
50
|
-
this->is_trained = index->is_trained;
|
51
|
-
this->metric_type = index->metric_type;
|
52
|
+
syncWithSubIndexes();
|
52
53
|
}
|
53
54
|
}
|
54
55
|
|
56
|
+
template <typename IndexT>
|
57
|
+
void
|
58
|
+
IndexReplicasTemplate<IndexT>::onAfterRemoveIndex(IndexT* index) {
|
59
|
+
syncWithSubIndexes();
|
60
|
+
}
|
61
|
+
|
55
62
|
template <typename IndexT>
|
56
63
|
void
|
57
64
|
IndexReplicasTemplate<IndexT>::train(idx_t n, const component_t* x) {
|
58
|
-
|
65
|
+
auto fn =
|
66
|
+
[n, x](int i, IndexT* index) {
|
67
|
+
if (index->verbose) {
|
68
|
+
printf("begin train replica %d on %" PRId64 " points\n", i, n);
|
69
|
+
}
|
70
|
+
|
71
|
+
index->train(n, x);
|
72
|
+
|
73
|
+
if (index->verbose) {
|
74
|
+
printf("end train replica %d\n", i);
|
75
|
+
}
|
76
|
+
};
|
77
|
+
|
78
|
+
this->runOnIndex(fn);
|
79
|
+
syncWithSubIndexes();
|
59
80
|
}
|
60
81
|
|
61
82
|
template <typename IndexT>
|
62
83
|
void
|
63
84
|
IndexReplicasTemplate<IndexT>::add(idx_t n, const component_t* x) {
|
64
|
-
|
65
|
-
|
85
|
+
auto fn =
|
86
|
+
[n, x](int i, IndexT* index) {
|
87
|
+
if (index->verbose) {
|
88
|
+
printf("begin add replica %d on %" PRId64 " points\n", i, n);
|
89
|
+
}
|
90
|
+
|
91
|
+
index->add(n, x);
|
92
|
+
|
93
|
+
if (index->verbose) {
|
94
|
+
printf("end add replica %d\n", i);
|
95
|
+
}
|
96
|
+
};
|
97
|
+
|
98
|
+
this->runOnIndex(fn);
|
99
|
+
syncWithSubIndexes();
|
66
100
|
}
|
67
101
|
|
68
102
|
template <typename IndexT>
|
@@ -105,17 +139,75 @@ IndexReplicasTemplate<IndexT>::search(idx_t n,
|
|
105
139
|
if (base < n) {
|
106
140
|
auto numForIndex = std::min(queriesPerIndex, n - base);
|
107
141
|
|
142
|
+
if (index->verbose) {
|
143
|
+
printf("begin search replica %d on %" PRId64 " points\n",
|
144
|
+
i, numForIndex);
|
145
|
+
}
|
146
|
+
|
108
147
|
index->search(numForIndex,
|
109
148
|
x + base * componentsPerVec,
|
110
149
|
k,
|
111
150
|
distances + base * k,
|
112
151
|
labels + base * k);
|
152
|
+
|
153
|
+
if (index->verbose) {
|
154
|
+
printf("end search replica %d\n", i);
|
155
|
+
}
|
113
156
|
}
|
114
157
|
};
|
115
158
|
|
116
159
|
this->runOnIndex(fn);
|
117
160
|
}
|
118
161
|
|
162
|
+
// FIXME: assumes that nothing is currently running on the sub-indexes, which is
|
163
|
+
// true with the normal API, but should use the runOnIndex API instead
|
164
|
+
template <typename IndexT>
|
165
|
+
void
|
166
|
+
IndexReplicasTemplate<IndexT>::syncWithSubIndexes() {
|
167
|
+
if (!this->count()) {
|
168
|
+
this->is_trained = false;
|
169
|
+
this->ntotal = 0;
|
170
|
+
|
171
|
+
return;
|
172
|
+
}
|
173
|
+
|
174
|
+
auto firstIndex = this->at(0);
|
175
|
+
this->metric_type = firstIndex->metric_type;
|
176
|
+
this->is_trained = firstIndex->is_trained;
|
177
|
+
this->ntotal = firstIndex->ntotal;
|
178
|
+
|
179
|
+
for (int i = 1; i < this->count(); ++i) {
|
180
|
+
auto index = this->at(i);
|
181
|
+
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
|
182
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
183
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
184
|
+
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
// No metric_type for IndexBinary
|
189
|
+
template <>
|
190
|
+
void
|
191
|
+
IndexReplicasTemplate<IndexBinary>::syncWithSubIndexes() {
|
192
|
+
if (!this->count()) {
|
193
|
+
this->is_trained = false;
|
194
|
+
this->ntotal = 0;
|
195
|
+
|
196
|
+
return;
|
197
|
+
}
|
198
|
+
|
199
|
+
auto firstIndex = this->at(0);
|
200
|
+
this->is_trained = firstIndex->is_trained;
|
201
|
+
this->ntotal = firstIndex->ntotal;
|
202
|
+
|
203
|
+
for (int i = 1; i < this->count(); ++i) {
|
204
|
+
auto index = this->at(i);
|
205
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
206
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
207
|
+
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
119
211
|
// explicit instantiations
|
120
212
|
template struct IndexReplicasTemplate<Index>;
|
121
213
|
template struct IndexReplicasTemplate<IndexBinary>;
|