faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -65,9 +65,15 @@ class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
|
|
65
65
|
/// reconstructs from the first index
|
66
66
|
void reconstruct(idx_t, component_t *v) const override;
|
67
67
|
|
68
|
+
/// Synchronize the top-level index (IndexShards) with data in the sub-indices
|
69
|
+
void syncWithSubIndexes();
|
70
|
+
|
68
71
|
protected:
|
69
72
|
/// Called just after an index is added
|
70
73
|
void onAfterAddIndex(IndexT* index) override;
|
74
|
+
|
75
|
+
/// Called just after an index is removed
|
76
|
+
void onAfterRemoveIndex(IndexT* index) override;
|
71
77
|
};
|
72
78
|
|
73
79
|
using IndexReplicas = IndexReplicasTemplate<Index>;
|
@@ -77,7 +77,7 @@ void IndexScalarQuantizer::search(
|
|
77
77
|
ScopeDeleter1<InvertedListScanner> del(scanner);
|
78
78
|
|
79
79
|
#pragma omp for
|
80
|
-
for (
|
80
|
+
for (idx_t i = 0; i < n; i++) {
|
81
81
|
float * D = distances + k * i;
|
82
82
|
idx_t * I = labels + k * i;
|
83
83
|
// re-order heap
|
@@ -197,7 +197,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
|
|
197
197
|
std::vector<float> residual (d);
|
198
198
|
|
199
199
|
#pragma omp for
|
200
|
-
for (
|
200
|
+
for (idx_t i = 0; i < n; i++) {
|
201
201
|
int64_t list_no = list_nos [i];
|
202
202
|
if (list_no >= 0) {
|
203
203
|
const float *xi = x + i * d;
|
@@ -227,7 +227,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
|
|
227
227
|
std::vector<float> residual (d);
|
228
228
|
|
229
229
|
#pragma omp for
|
230
|
-
for (
|
230
|
+
for (idx_t i = 0; i < n; i++) {
|
231
231
|
const uint8_t *code = codes + i * (code_size + coarse_size);
|
232
232
|
int64_t list_no = decode_listno (code);
|
233
233
|
float *xi = x + i * d;
|
File without changes
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexShards.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cstdio>
|
13
14
|
#include <functional>
|
14
15
|
|
@@ -132,18 +133,20 @@ IndexShardsTemplate<IndexT>::IndexShardsTemplate(bool threaded,
|
|
132
133
|
template <typename IndexT>
|
133
134
|
void
|
134
135
|
IndexShardsTemplate<IndexT>::onAfterAddIndex(IndexT* index /* unused */) {
|
135
|
-
|
136
|
+
syncWithSubIndexes();
|
136
137
|
}
|
137
138
|
|
138
139
|
template <typename IndexT>
|
139
140
|
void
|
140
141
|
IndexShardsTemplate<IndexT>::onAfterRemoveIndex(IndexT* index /* unused */) {
|
141
|
-
|
142
|
+
syncWithSubIndexes();
|
142
143
|
}
|
143
144
|
|
145
|
+
// FIXME: assumes that nothing is currently running on the sub-indexes, which is
|
146
|
+
// true with the normal API, but should use the runOnIndex API instead
|
144
147
|
template <typename IndexT>
|
145
148
|
void
|
146
|
-
IndexShardsTemplate<IndexT>::
|
149
|
+
IndexShardsTemplate<IndexT>::syncWithSubIndexes() {
|
147
150
|
if (!this->count()) {
|
148
151
|
this->is_trained = false;
|
149
152
|
this->ntotal = 0;
|
@@ -160,6 +163,31 @@ IndexShardsTemplate<IndexT>::sync_with_shard_indexes() {
|
|
160
163
|
auto index = this->at(i);
|
161
164
|
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
|
162
165
|
FAISS_THROW_IF_NOT(this->d == index->d);
|
166
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
167
|
+
|
168
|
+
this->ntotal += index->ntotal;
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
// No metric_type for IndexBinary
|
173
|
+
template <>
|
174
|
+
void
|
175
|
+
IndexShardsTemplate<IndexBinary>::syncWithSubIndexes() {
|
176
|
+
if (!this->count()) {
|
177
|
+
this->is_trained = false;
|
178
|
+
this->ntotal = 0;
|
179
|
+
|
180
|
+
return;
|
181
|
+
}
|
182
|
+
|
183
|
+
auto firstIndex = this->at(0);
|
184
|
+
this->is_trained = firstIndex->is_trained;
|
185
|
+
this->ntotal = firstIndex->ntotal;
|
186
|
+
|
187
|
+
for (int i = 1; i < this->count(); ++i) {
|
188
|
+
auto index = this->at(i);
|
189
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
190
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
163
191
|
|
164
192
|
this->ntotal += index->ntotal;
|
165
193
|
}
|
@@ -172,7 +200,7 @@ IndexShardsTemplate<IndexT>::train(idx_t n,
|
|
172
200
|
auto fn =
|
173
201
|
[n, x](int no, IndexT *index) {
|
174
202
|
if (index->verbose) {
|
175
|
-
printf("begin train shard %d on %
|
203
|
+
printf("begin train shard %d on %" PRId64 " points\n", no, n);
|
176
204
|
}
|
177
205
|
|
178
206
|
index->train(n, x);
|
@@ -183,7 +211,7 @@ IndexShardsTemplate<IndexT>::train(idx_t n,
|
|
183
211
|
};
|
184
212
|
|
185
213
|
this->runOnIndex(fn);
|
186
|
-
|
214
|
+
syncWithSubIndexes();
|
187
215
|
}
|
188
216
|
|
189
217
|
template <typename IndexT>
|
@@ -237,7 +265,7 @@ IndexShardsTemplate<IndexT>::add_with_ids(idx_t n,
|
|
237
265
|
auto x0 = x + i0 * components_per_vec;
|
238
266
|
|
239
267
|
if (index->verbose) {
|
240
|
-
printf ("begin add shard %d on %
|
268
|
+
printf ("begin add shard %d on %" PRId64 " points\n", no, n);
|
241
269
|
}
|
242
270
|
|
243
271
|
if (ids) {
|
@@ -247,15 +275,12 @@ IndexShardsTemplate<IndexT>::add_with_ids(idx_t n,
|
|
247
275
|
}
|
248
276
|
|
249
277
|
if (index->verbose) {
|
250
|
-
printf ("end add shard %d on %
|
278
|
+
printf ("end add shard %d on %" PRId64 " points\n", no, i1 - i0);
|
251
279
|
}
|
252
280
|
};
|
253
281
|
|
254
282
|
this->runOnIndex(fn);
|
255
|
-
|
256
|
-
// This is safe to do here because the current thread controls execution in
|
257
|
-
// all threads, and nothing else is happening
|
258
|
-
this->ntotal += n;
|
283
|
+
syncWithSubIndexes();
|
259
284
|
}
|
260
285
|
|
261
286
|
template <typename IndexT>
|
@@ -273,7 +298,7 @@ IndexShardsTemplate<IndexT>::search(idx_t n,
|
|
273
298
|
auto fn =
|
274
299
|
[n, k, x, &all_distances, &all_labels](int no, const IndexT *index) {
|
275
300
|
if (index->verbose) {
|
276
|
-
printf ("begin query shard %d on %
|
301
|
+
printf ("begin query shard %d on %" PRId64 " points\n", no, n);
|
277
302
|
}
|
278
303
|
|
279
304
|
index->search (n, x, k,
|
@@ -79,12 +79,11 @@ struct IndexShardsTemplate : public ThreadedIndex<IndexT> {
|
|
79
79
|
|
80
80
|
void train(idx_t n, const component_t* x) override;
|
81
81
|
|
82
|
-
// update metric_type and ntotal. Call if you changes something in
|
83
|
-
// the shard indexes.
|
84
|
-
void sync_with_shard_indexes();
|
85
|
-
|
86
82
|
bool successive_ids;
|
87
83
|
|
84
|
+
/// Synchronize the top-level index (IndexShards) with data in the sub-indices
|
85
|
+
void syncWithSubIndexes();
|
86
|
+
|
88
87
|
protected:
|
89
88
|
/// Called just after an index is added
|
90
89
|
void onAfterAddIndex(IndexT* index) override;
|
@@ -274,7 +274,7 @@ const uint8_t * HStackInvertedLists::get_single_code (
|
|
274
274
|
}
|
275
275
|
offset -= sz;
|
276
276
|
}
|
277
|
-
FAISS_THROW_FMT ("offset %
|
277
|
+
FAISS_THROW_FMT ("offset %zd unknown", offset);
|
278
278
|
}
|
279
279
|
|
280
280
|
|
@@ -309,7 +309,7 @@ Index::idx_t HStackInvertedLists::get_single_id (
|
|
309
309
|
}
|
310
310
|
offset -= sz;
|
311
311
|
}
|
312
|
-
FAISS_THROW_FMT ("offset %
|
312
|
+
FAISS_THROW_FMT ("offset %zd unknown", offset);
|
313
313
|
}
|
314
314
|
|
315
315
|
|
@@ -208,6 +208,7 @@ struct ArrayInvertedLists: InvertedLists {
|
|
208
208
|
* that can be stacked horizontally, vertically and sliced.
|
209
209
|
*****************************************************************/
|
210
210
|
|
211
|
+
/// invlists that fail for all write functions
|
211
212
|
struct ReadOnlyInvertedLists: InvertedLists {
|
212
213
|
|
213
214
|
ReadOnlyInvertedLists (size_t nlist, size_t code_size):
|
File without changes
|
File without changes
|
@@ -9,7 +9,9 @@
|
|
9
9
|
|
10
10
|
#include <faiss/MetaIndexes.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cstdio>
|
14
|
+
#include <limits>
|
13
15
|
#include <stdint.h>
|
14
16
|
|
15
17
|
#include <faiss/impl/FaissAssert.h>
|
@@ -199,7 +201,7 @@ void IndexIDMap2Template<IndexT>::reconstruct
|
|
199
201
|
try {
|
200
202
|
this->index->reconstruct (rev_map.at (key), recons);
|
201
203
|
} catch (const std::out_of_range& e) {
|
202
|
-
FAISS_THROW_FMT ("key %
|
204
|
+
FAISS_THROW_FMT ("key %" PRId64 " not found", key);
|
203
205
|
}
|
204
206
|
}
|
205
207
|
|
@@ -274,7 +276,7 @@ void IndexSplitVectors::search (
|
|
274
276
|
float *distances1 = no == 0 ? distances : all_distances + no * k * n;
|
275
277
|
idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
|
276
278
|
if (index->verbose)
|
277
|
-
printf ("begin query shard %d on %
|
279
|
+
printf ("begin query shard %d on %" PRId64 " points\n", no, n);
|
278
280
|
const Index * sub_index = index->sub_indexes[no];
|
279
281
|
int64_t sub_d = sub_index->d, d = index->d;
|
280
282
|
idx_t ofs = 0;
|
@@ -319,7 +321,7 @@ void IndexSplitVectors::search (
|
|
319
321
|
distances[j] += distances_i[j];
|
320
322
|
} else {
|
321
323
|
labels[j] = -1;
|
322
|
-
distances[j] =
|
324
|
+
distances[j] = std::numeric_limits<float>::quiet_NaN();
|
323
325
|
}
|
324
326
|
}
|
325
327
|
}
|
File without changes
|
File without changes
|
@@ -16,10 +16,14 @@
|
|
16
16
|
#include <sys/mman.h>
|
17
17
|
#include <unistd.h>
|
18
18
|
#include <sys/types.h>
|
19
|
+
#include <sys/stat.h>
|
19
20
|
|
20
21
|
#include <faiss/impl/FaissAssert.h>
|
21
22
|
#include <faiss/utils/utils.h>
|
22
23
|
|
24
|
+
#include <faiss/impl/io.h>
|
25
|
+
#include <faiss/impl/io_macros.h>
|
26
|
+
|
23
27
|
|
24
28
|
namespace faiss {
|
25
29
|
|
@@ -320,7 +324,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
320
324
|
totsize = new_size;
|
321
325
|
|
322
326
|
// create file
|
323
|
-
printf ("resizing %s to %
|
327
|
+
printf ("resizing %s to %zd bytes\n", filename.c_str(), totsize);
|
324
328
|
|
325
329
|
int err = truncate (filename.c_str(), totsize);
|
326
330
|
|
@@ -341,7 +345,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
341
345
|
|
342
346
|
#define INVALID_OFFSET (size_t)(-1)
|
343
347
|
|
344
|
-
|
348
|
+
OnDiskOneList::OnDiskOneList ():
|
345
349
|
size (0), capacity (0), offset (INVALID_OFFSET)
|
346
350
|
{}
|
347
351
|
|
@@ -640,7 +644,7 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
|
|
640
644
|
nmerged++;
|
641
645
|
double t1 = getmillisecs();
|
642
646
|
if (t1 - last_t > 500) {
|
643
|
-
printf("merged %
|
647
|
+
printf("merged %zd lists in %.3f s\r",
|
644
648
|
nmerged, (t1 - t0) / 1000.0);
|
645
649
|
fflush(stdout);
|
646
650
|
last_t = t1;
|
@@ -656,6 +660,12 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
|
|
656
660
|
}
|
657
661
|
|
658
662
|
|
663
|
+
size_t OnDiskInvertedLists::merge_from_1 (const InvertedLists *ils, bool verbose)
|
664
|
+
{
|
665
|
+
return merge_from (&ils, 1, verbose);
|
666
|
+
}
|
667
|
+
|
668
|
+
|
659
669
|
void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
660
670
|
{
|
661
671
|
FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
|
@@ -668,6 +678,134 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
|
668
678
|
nlist = l1 - l0;
|
669
679
|
}
|
670
680
|
|
681
|
+
/*******************************************************
|
682
|
+
* I/O support via callbacks
|
683
|
+
*******************************************************/
|
684
|
+
|
685
|
+
|
686
|
+
|
687
|
+
|
688
|
+
OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook():
|
689
|
+
InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name())
|
690
|
+
{}
|
691
|
+
|
692
|
+
|
693
|
+
void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) const
|
694
|
+
{
|
695
|
+
uint32_t h = fourcc ("ilod");
|
696
|
+
WRITE1 (h);
|
697
|
+
WRITE1 (ils->nlist);
|
698
|
+
WRITE1 (ils->code_size);
|
699
|
+
const OnDiskInvertedLists *od = dynamic_cast<const OnDiskInvertedLists*> (ils);
|
700
|
+
// this is a POD object
|
701
|
+
WRITEVECTOR (od->lists);
|
702
|
+
|
703
|
+
{
|
704
|
+
std::vector<OnDiskInvertedLists::Slot> v(
|
705
|
+
od->slots.begin(), od->slots.end());
|
706
|
+
WRITEVECTOR(v);
|
707
|
+
}
|
708
|
+
{
|
709
|
+
std::vector<char> x(od->filename.begin(), od->filename.end());
|
710
|
+
WRITEVECTOR(x);
|
711
|
+
}
|
712
|
+
WRITE1(od->totsize);
|
713
|
+
}
|
714
|
+
|
715
|
+
InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
716
|
+
{
|
717
|
+
OnDiskInvertedLists *od = new OnDiskInvertedLists();
|
718
|
+
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
719
|
+
READ1 (od->nlist);
|
720
|
+
READ1 (od->code_size);
|
721
|
+
// this is a POD object
|
722
|
+
READVECTOR (od->lists);
|
723
|
+
{
|
724
|
+
std::vector<OnDiskInvertedLists::Slot> v;
|
725
|
+
READVECTOR(v);
|
726
|
+
od->slots.assign(v.begin(), v.end());
|
727
|
+
}
|
728
|
+
{
|
729
|
+
std::vector<char> x;
|
730
|
+
READVECTOR(x);
|
731
|
+
od->filename.assign(x.begin(), x.end());
|
732
|
+
|
733
|
+
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
734
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
735
|
+
FAISS_THROW_IF_NOT_MSG (
|
736
|
+
reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
|
737
|
+
"when reading from file");
|
738
|
+
std::string indexname = reader->name;
|
739
|
+
std::string dirname = "./";
|
740
|
+
size_t slash = indexname.find_last_of('/');
|
741
|
+
if (slash != std::string::npos) {
|
742
|
+
dirname = indexname.substr(0, slash + 1);
|
743
|
+
}
|
744
|
+
std::string filename = od->filename;
|
745
|
+
slash = filename.find_last_of('/');
|
746
|
+
if (slash != std::string::npos) {
|
747
|
+
filename = filename.substr(slash + 1);
|
748
|
+
}
|
749
|
+
filename = dirname + filename;
|
750
|
+
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
751
|
+
"updating ondisk filename from %s to %s\n",
|
752
|
+
od->filename.c_str(), filename.c_str());
|
753
|
+
od->filename = filename;
|
754
|
+
}
|
755
|
+
|
756
|
+
}
|
757
|
+
READ1(od->totsize);
|
758
|
+
od->do_mmap();
|
759
|
+
return od;
|
760
|
+
}
|
761
|
+
|
762
|
+
/** read from a ArrayInvertedLists into this invertedlist type */
|
763
|
+
InvertedLists * OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
|
764
|
+
IOReader *f, int /* io_flags */,
|
765
|
+
size_t nlist, size_t code_size,
|
766
|
+
const std::vector<size_t> &sizes) const
|
767
|
+
{
|
768
|
+
auto ails = new OnDiskInvertedLists ();
|
769
|
+
ails->nlist = nlist;
|
770
|
+
ails->code_size = code_size;
|
771
|
+
ails->read_only = true;
|
772
|
+
ails->lists.resize (nlist);
|
773
|
+
|
774
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
775
|
+
FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
|
776
|
+
FILE *fdesc = reader->f;
|
777
|
+
size_t o0 = ftell(fdesc);
|
778
|
+
size_t o = o0;
|
779
|
+
{ // do the mmap
|
780
|
+
struct stat buf;
|
781
|
+
int ret = fstat (fileno(fdesc), &buf);
|
782
|
+
FAISS_THROW_IF_NOT_FMT (ret == 0,
|
783
|
+
"fstat failed: %s", strerror(errno));
|
784
|
+
ails->totsize = buf.st_size;
|
785
|
+
ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
|
786
|
+
PROT_READ, MAP_SHARED,
|
787
|
+
fileno(fdesc), 0);
|
788
|
+
FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
|
789
|
+
"could not mmap: %s",
|
790
|
+
strerror(errno));
|
791
|
+
}
|
792
|
+
|
793
|
+
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
794
|
+
|
795
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
796
|
+
OnDiskInvertedLists::List & l = ails->lists[i];
|
797
|
+
l.size = l.capacity = sizes[i];
|
798
|
+
l.offset = o;
|
799
|
+
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
800
|
+
ails->code_size);
|
801
|
+
}
|
802
|
+
// resume normal reading of file
|
803
|
+
fseek (fdesc, o, SEEK_SET);
|
804
|
+
|
805
|
+
return ails;
|
806
|
+
}
|
807
|
+
|
808
|
+
|
671
809
|
|
672
810
|
|
673
811
|
|
@@ -12,14 +12,26 @@
|
|
12
12
|
|
13
13
|
#include <vector>
|
14
14
|
#include <list>
|
15
|
+
#include <typeinfo>
|
15
16
|
|
16
17
|
#include <faiss/IndexIVF.h>
|
17
18
|
|
19
|
+
#include <faiss/index_io.h>
|
20
|
+
|
18
21
|
namespace faiss {
|
19
22
|
|
20
23
|
|
21
24
|
struct LockLevels;
|
22
25
|
|
26
|
+
|
27
|
+
struct OnDiskOneList {
|
28
|
+
size_t size; // size of inverted list (entries)
|
29
|
+
size_t capacity; // allocated size (entries)
|
30
|
+
size_t offset; // offset in buffer (bytes)
|
31
|
+
OnDiskOneList ();
|
32
|
+
};
|
33
|
+
|
34
|
+
|
23
35
|
/** On-disk storage of inverted lists.
|
24
36
|
*
|
25
37
|
* The data is stored in a mmapped chunk of memory (base ptointer ptr,
|
@@ -49,13 +61,7 @@ struct LockLevels;
|
|
49
61
|
* lists in parallel.
|
50
62
|
*/
|
51
63
|
struct OnDiskInvertedLists: InvertedLists {
|
52
|
-
|
53
|
-
struct List {
|
54
|
-
size_t size; // size of inverted list (entries)
|
55
|
-
size_t capacity; // allocated size (entries)
|
56
|
-
size_t offset; // offset in buffer (bytes)
|
57
|
-
List ();
|
58
|
-
};
|
64
|
+
using List = OnDiskOneList;
|
59
65
|
|
60
66
|
// size nlist
|
61
67
|
std::vector<List> lists;
|
@@ -95,6 +101,9 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
95
101
|
// allocating slots)
|
96
102
|
size_t merge_from (const InvertedLists **ils, int n_il, bool verbose=false);
|
97
103
|
|
104
|
+
/// same as merge_from for a single invlist
|
105
|
+
size_t merge_from_1 (const InvertedLists *il, bool verbose=false);
|
106
|
+
|
98
107
|
/// restrict the inverted lists to l0:l1 without touching the mmapped region
|
99
108
|
void crop_invlists(size_t l0, size_t l1);
|
100
109
|
|
@@ -121,6 +130,17 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
121
130
|
OnDiskInvertedLists ();
|
122
131
|
};
|
123
132
|
|
133
|
+
struct OnDiskInvertedListsIOHook: InvertedListsIOHook {
|
134
|
+
OnDiskInvertedListsIOHook();
|
135
|
+
void write(const InvertedLists *ils, IOWriter *f) const override;
|
136
|
+
InvertedLists * read(IOReader *f, int io_flags) const override;
|
137
|
+
InvertedLists * read_ArrayInvertedLists(
|
138
|
+
IOReader *f, int io_flags,
|
139
|
+
size_t nlist, size_t code_size,
|
140
|
+
const std::vector<size_t> &sizes) const override;
|
141
|
+
};
|
142
|
+
|
143
|
+
|
124
144
|
|
125
145
|
} // namespace faiss
|
126
146
|
|