faiss 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -65,9 +65,15 @@ class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
|
|
65
65
|
/// reconstructs from the first index
|
66
66
|
void reconstruct(idx_t, component_t *v) const override;
|
67
67
|
|
68
|
+
/// Synchronize the top-level index (IndexShards) with data in the sub-indices
|
69
|
+
void syncWithSubIndexes();
|
70
|
+
|
68
71
|
protected:
|
69
72
|
/// Called just after an index is added
|
70
73
|
void onAfterAddIndex(IndexT* index) override;
|
74
|
+
|
75
|
+
/// Called just after an index is removed
|
76
|
+
void onAfterRemoveIndex(IndexT* index) override;
|
71
77
|
};
|
72
78
|
|
73
79
|
using IndexReplicas = IndexReplicasTemplate<Index>;
|
@@ -77,7 +77,7 @@ void IndexScalarQuantizer::search(
|
|
77
77
|
ScopeDeleter1<InvertedListScanner> del(scanner);
|
78
78
|
|
79
79
|
#pragma omp for
|
80
|
-
for (
|
80
|
+
for (idx_t i = 0; i < n; i++) {
|
81
81
|
float * D = distances + k * i;
|
82
82
|
idx_t * I = labels + k * i;
|
83
83
|
// re-order heap
|
@@ -197,7 +197,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
|
|
197
197
|
std::vector<float> residual (d);
|
198
198
|
|
199
199
|
#pragma omp for
|
200
|
-
for (
|
200
|
+
for (idx_t i = 0; i < n; i++) {
|
201
201
|
int64_t list_no = list_nos [i];
|
202
202
|
if (list_no >= 0) {
|
203
203
|
const float *xi = x + i * d;
|
@@ -227,7 +227,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
|
|
227
227
|
std::vector<float> residual (d);
|
228
228
|
|
229
229
|
#pragma omp for
|
230
|
-
for (
|
230
|
+
for (idx_t i = 0; i < n; i++) {
|
231
231
|
const uint8_t *code = codes + i * (code_size + coarse_size);
|
232
232
|
int64_t list_no = decode_listno (code);
|
233
233
|
float *xi = x + i * d;
|
File without changes
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/IndexShards.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cstdio>
|
13
14
|
#include <functional>
|
14
15
|
|
@@ -132,18 +133,20 @@ IndexShardsTemplate<IndexT>::IndexShardsTemplate(bool threaded,
|
|
132
133
|
template <typename IndexT>
|
133
134
|
void
|
134
135
|
IndexShardsTemplate<IndexT>::onAfterAddIndex(IndexT* index /* unused */) {
|
135
|
-
|
136
|
+
syncWithSubIndexes();
|
136
137
|
}
|
137
138
|
|
138
139
|
template <typename IndexT>
|
139
140
|
void
|
140
141
|
IndexShardsTemplate<IndexT>::onAfterRemoveIndex(IndexT* index /* unused */) {
|
141
|
-
|
142
|
+
syncWithSubIndexes();
|
142
143
|
}
|
143
144
|
|
145
|
+
// FIXME: assumes that nothing is currently running on the sub-indexes, which is
|
146
|
+
// true with the normal API, but should use the runOnIndex API instead
|
144
147
|
template <typename IndexT>
|
145
148
|
void
|
146
|
-
IndexShardsTemplate<IndexT>::
|
149
|
+
IndexShardsTemplate<IndexT>::syncWithSubIndexes() {
|
147
150
|
if (!this->count()) {
|
148
151
|
this->is_trained = false;
|
149
152
|
this->ntotal = 0;
|
@@ -160,6 +163,31 @@ IndexShardsTemplate<IndexT>::sync_with_shard_indexes() {
|
|
160
163
|
auto index = this->at(i);
|
161
164
|
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
|
162
165
|
FAISS_THROW_IF_NOT(this->d == index->d);
|
166
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
167
|
+
|
168
|
+
this->ntotal += index->ntotal;
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
// No metric_type for IndexBinary
|
173
|
+
template <>
|
174
|
+
void
|
175
|
+
IndexShardsTemplate<IndexBinary>::syncWithSubIndexes() {
|
176
|
+
if (!this->count()) {
|
177
|
+
this->is_trained = false;
|
178
|
+
this->ntotal = 0;
|
179
|
+
|
180
|
+
return;
|
181
|
+
}
|
182
|
+
|
183
|
+
auto firstIndex = this->at(0);
|
184
|
+
this->is_trained = firstIndex->is_trained;
|
185
|
+
this->ntotal = firstIndex->ntotal;
|
186
|
+
|
187
|
+
for (int i = 1; i < this->count(); ++i) {
|
188
|
+
auto index = this->at(i);
|
189
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
190
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
163
191
|
|
164
192
|
this->ntotal += index->ntotal;
|
165
193
|
}
|
@@ -172,7 +200,7 @@ IndexShardsTemplate<IndexT>::train(idx_t n,
|
|
172
200
|
auto fn =
|
173
201
|
[n, x](int no, IndexT *index) {
|
174
202
|
if (index->verbose) {
|
175
|
-
printf("begin train shard %d on %
|
203
|
+
printf("begin train shard %d on %" PRId64 " points\n", no, n);
|
176
204
|
}
|
177
205
|
|
178
206
|
index->train(n, x);
|
@@ -183,7 +211,7 @@ IndexShardsTemplate<IndexT>::train(idx_t n,
|
|
183
211
|
};
|
184
212
|
|
185
213
|
this->runOnIndex(fn);
|
186
|
-
|
214
|
+
syncWithSubIndexes();
|
187
215
|
}
|
188
216
|
|
189
217
|
template <typename IndexT>
|
@@ -237,7 +265,7 @@ IndexShardsTemplate<IndexT>::add_with_ids(idx_t n,
|
|
237
265
|
auto x0 = x + i0 * components_per_vec;
|
238
266
|
|
239
267
|
if (index->verbose) {
|
240
|
-
printf ("begin add shard %d on %
|
268
|
+
printf ("begin add shard %d on %" PRId64 " points\n", no, n);
|
241
269
|
}
|
242
270
|
|
243
271
|
if (ids) {
|
@@ -247,15 +275,12 @@ IndexShardsTemplate<IndexT>::add_with_ids(idx_t n,
|
|
247
275
|
}
|
248
276
|
|
249
277
|
if (index->verbose) {
|
250
|
-
printf ("end add shard %d on %
|
278
|
+
printf ("end add shard %d on %" PRId64 " points\n", no, i1 - i0);
|
251
279
|
}
|
252
280
|
};
|
253
281
|
|
254
282
|
this->runOnIndex(fn);
|
255
|
-
|
256
|
-
// This is safe to do here because the current thread controls execution in
|
257
|
-
// all threads, and nothing else is happening
|
258
|
-
this->ntotal += n;
|
283
|
+
syncWithSubIndexes();
|
259
284
|
}
|
260
285
|
|
261
286
|
template <typename IndexT>
|
@@ -273,7 +298,7 @@ IndexShardsTemplate<IndexT>::search(idx_t n,
|
|
273
298
|
auto fn =
|
274
299
|
[n, k, x, &all_distances, &all_labels](int no, const IndexT *index) {
|
275
300
|
if (index->verbose) {
|
276
|
-
printf ("begin query shard %d on %
|
301
|
+
printf ("begin query shard %d on %" PRId64 " points\n", no, n);
|
277
302
|
}
|
278
303
|
|
279
304
|
index->search (n, x, k,
|
@@ -79,12 +79,11 @@ struct IndexShardsTemplate : public ThreadedIndex<IndexT> {
|
|
79
79
|
|
80
80
|
void train(idx_t n, const component_t* x) override;
|
81
81
|
|
82
|
-
// update metric_type and ntotal. Call if you changes something in
|
83
|
-
// the shard indexes.
|
84
|
-
void sync_with_shard_indexes();
|
85
|
-
|
86
82
|
bool successive_ids;
|
87
83
|
|
84
|
+
/// Synchronize the top-level index (IndexShards) with data in the sub-indices
|
85
|
+
void syncWithSubIndexes();
|
86
|
+
|
88
87
|
protected:
|
89
88
|
/// Called just after an index is added
|
90
89
|
void onAfterAddIndex(IndexT* index) override;
|
@@ -274,7 +274,7 @@ const uint8_t * HStackInvertedLists::get_single_code (
|
|
274
274
|
}
|
275
275
|
offset -= sz;
|
276
276
|
}
|
277
|
-
FAISS_THROW_FMT ("offset %
|
277
|
+
FAISS_THROW_FMT ("offset %zd unknown", offset);
|
278
278
|
}
|
279
279
|
|
280
280
|
|
@@ -309,7 +309,7 @@ Index::idx_t HStackInvertedLists::get_single_id (
|
|
309
309
|
}
|
310
310
|
offset -= sz;
|
311
311
|
}
|
312
|
-
FAISS_THROW_FMT ("offset %
|
312
|
+
FAISS_THROW_FMT ("offset %zd unknown", offset);
|
313
313
|
}
|
314
314
|
|
315
315
|
|
@@ -208,6 +208,7 @@ struct ArrayInvertedLists: InvertedLists {
|
|
208
208
|
* that can be stacked horizontally, vertically and sliced.
|
209
209
|
*****************************************************************/
|
210
210
|
|
211
|
+
/// invlists that fail for all write functions
|
211
212
|
struct ReadOnlyInvertedLists: InvertedLists {
|
212
213
|
|
213
214
|
ReadOnlyInvertedLists (size_t nlist, size_t code_size):
|
File without changes
|
File without changes
|
@@ -9,7 +9,9 @@
|
|
9
9
|
|
10
10
|
#include <faiss/MetaIndexes.h>
|
11
11
|
|
12
|
+
#include <cinttypes>
|
12
13
|
#include <cstdio>
|
14
|
+
#include <limits>
|
13
15
|
#include <stdint.h>
|
14
16
|
|
15
17
|
#include <faiss/impl/FaissAssert.h>
|
@@ -199,7 +201,7 @@ void IndexIDMap2Template<IndexT>::reconstruct
|
|
199
201
|
try {
|
200
202
|
this->index->reconstruct (rev_map.at (key), recons);
|
201
203
|
} catch (const std::out_of_range& e) {
|
202
|
-
FAISS_THROW_FMT ("key %
|
204
|
+
FAISS_THROW_FMT ("key %" PRId64 " not found", key);
|
203
205
|
}
|
204
206
|
}
|
205
207
|
|
@@ -274,7 +276,7 @@ void IndexSplitVectors::search (
|
|
274
276
|
float *distances1 = no == 0 ? distances : all_distances + no * k * n;
|
275
277
|
idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
|
276
278
|
if (index->verbose)
|
277
|
-
printf ("begin query shard %d on %
|
279
|
+
printf ("begin query shard %d on %" PRId64 " points\n", no, n);
|
278
280
|
const Index * sub_index = index->sub_indexes[no];
|
279
281
|
int64_t sub_d = sub_index->d, d = index->d;
|
280
282
|
idx_t ofs = 0;
|
@@ -319,7 +321,7 @@ void IndexSplitVectors::search (
|
|
319
321
|
distances[j] += distances_i[j];
|
320
322
|
} else {
|
321
323
|
labels[j] = -1;
|
322
|
-
distances[j] =
|
324
|
+
distances[j] = std::numeric_limits<float>::quiet_NaN();
|
323
325
|
}
|
324
326
|
}
|
325
327
|
}
|
File without changes
|
File without changes
|
@@ -16,10 +16,14 @@
|
|
16
16
|
#include <sys/mman.h>
|
17
17
|
#include <unistd.h>
|
18
18
|
#include <sys/types.h>
|
19
|
+
#include <sys/stat.h>
|
19
20
|
|
20
21
|
#include <faiss/impl/FaissAssert.h>
|
21
22
|
#include <faiss/utils/utils.h>
|
22
23
|
|
24
|
+
#include <faiss/impl/io.h>
|
25
|
+
#include <faiss/impl/io_macros.h>
|
26
|
+
|
23
27
|
|
24
28
|
namespace faiss {
|
25
29
|
|
@@ -320,7 +324,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
320
324
|
totsize = new_size;
|
321
325
|
|
322
326
|
// create file
|
323
|
-
printf ("resizing %s to %
|
327
|
+
printf ("resizing %s to %zd bytes\n", filename.c_str(), totsize);
|
324
328
|
|
325
329
|
int err = truncate (filename.c_str(), totsize);
|
326
330
|
|
@@ -341,7 +345,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
341
345
|
|
342
346
|
#define INVALID_OFFSET (size_t)(-1)
|
343
347
|
|
344
|
-
|
348
|
+
OnDiskOneList::OnDiskOneList ():
|
345
349
|
size (0), capacity (0), offset (INVALID_OFFSET)
|
346
350
|
{}
|
347
351
|
|
@@ -640,7 +644,7 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
|
|
640
644
|
nmerged++;
|
641
645
|
double t1 = getmillisecs();
|
642
646
|
if (t1 - last_t > 500) {
|
643
|
-
printf("merged %
|
647
|
+
printf("merged %zd lists in %.3f s\r",
|
644
648
|
nmerged, (t1 - t0) / 1000.0);
|
645
649
|
fflush(stdout);
|
646
650
|
last_t = t1;
|
@@ -656,6 +660,12 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
|
|
656
660
|
}
|
657
661
|
|
658
662
|
|
663
|
+
size_t OnDiskInvertedLists::merge_from_1 (const InvertedLists *ils, bool verbose)
|
664
|
+
{
|
665
|
+
return merge_from (&ils, 1, verbose);
|
666
|
+
}
|
667
|
+
|
668
|
+
|
659
669
|
void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
660
670
|
{
|
661
671
|
FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
|
@@ -668,6 +678,134 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
|
668
678
|
nlist = l1 - l0;
|
669
679
|
}
|
670
680
|
|
681
|
+
/*******************************************************
|
682
|
+
* I/O support via callbacks
|
683
|
+
*******************************************************/
|
684
|
+
|
685
|
+
|
686
|
+
|
687
|
+
|
688
|
+
OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook():
|
689
|
+
InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name())
|
690
|
+
{}
|
691
|
+
|
692
|
+
|
693
|
+
void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) const
|
694
|
+
{
|
695
|
+
uint32_t h = fourcc ("ilod");
|
696
|
+
WRITE1 (h);
|
697
|
+
WRITE1 (ils->nlist);
|
698
|
+
WRITE1 (ils->code_size);
|
699
|
+
const OnDiskInvertedLists *od = dynamic_cast<const OnDiskInvertedLists*> (ils);
|
700
|
+
// this is a POD object
|
701
|
+
WRITEVECTOR (od->lists);
|
702
|
+
|
703
|
+
{
|
704
|
+
std::vector<OnDiskInvertedLists::Slot> v(
|
705
|
+
od->slots.begin(), od->slots.end());
|
706
|
+
WRITEVECTOR(v);
|
707
|
+
}
|
708
|
+
{
|
709
|
+
std::vector<char> x(od->filename.begin(), od->filename.end());
|
710
|
+
WRITEVECTOR(x);
|
711
|
+
}
|
712
|
+
WRITE1(od->totsize);
|
713
|
+
}
|
714
|
+
|
715
|
+
InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
716
|
+
{
|
717
|
+
OnDiskInvertedLists *od = new OnDiskInvertedLists();
|
718
|
+
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
719
|
+
READ1 (od->nlist);
|
720
|
+
READ1 (od->code_size);
|
721
|
+
// this is a POD object
|
722
|
+
READVECTOR (od->lists);
|
723
|
+
{
|
724
|
+
std::vector<OnDiskInvertedLists::Slot> v;
|
725
|
+
READVECTOR(v);
|
726
|
+
od->slots.assign(v.begin(), v.end());
|
727
|
+
}
|
728
|
+
{
|
729
|
+
std::vector<char> x;
|
730
|
+
READVECTOR(x);
|
731
|
+
od->filename.assign(x.begin(), x.end());
|
732
|
+
|
733
|
+
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
734
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
735
|
+
FAISS_THROW_IF_NOT_MSG (
|
736
|
+
reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
|
737
|
+
"when reading from file");
|
738
|
+
std::string indexname = reader->name;
|
739
|
+
std::string dirname = "./";
|
740
|
+
size_t slash = indexname.find_last_of('/');
|
741
|
+
if (slash != std::string::npos) {
|
742
|
+
dirname = indexname.substr(0, slash + 1);
|
743
|
+
}
|
744
|
+
std::string filename = od->filename;
|
745
|
+
slash = filename.find_last_of('/');
|
746
|
+
if (slash != std::string::npos) {
|
747
|
+
filename = filename.substr(slash + 1);
|
748
|
+
}
|
749
|
+
filename = dirname + filename;
|
750
|
+
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
751
|
+
"updating ondisk filename from %s to %s\n",
|
752
|
+
od->filename.c_str(), filename.c_str());
|
753
|
+
od->filename = filename;
|
754
|
+
}
|
755
|
+
|
756
|
+
}
|
757
|
+
READ1(od->totsize);
|
758
|
+
od->do_mmap();
|
759
|
+
return od;
|
760
|
+
}
|
761
|
+
|
762
|
+
/** read from a ArrayInvertedLists into this invertedlist type */
|
763
|
+
InvertedLists * OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
|
764
|
+
IOReader *f, int /* io_flags */,
|
765
|
+
size_t nlist, size_t code_size,
|
766
|
+
const std::vector<size_t> &sizes) const
|
767
|
+
{
|
768
|
+
auto ails = new OnDiskInvertedLists ();
|
769
|
+
ails->nlist = nlist;
|
770
|
+
ails->code_size = code_size;
|
771
|
+
ails->read_only = true;
|
772
|
+
ails->lists.resize (nlist);
|
773
|
+
|
774
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
775
|
+
FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
|
776
|
+
FILE *fdesc = reader->f;
|
777
|
+
size_t o0 = ftell(fdesc);
|
778
|
+
size_t o = o0;
|
779
|
+
{ // do the mmap
|
780
|
+
struct stat buf;
|
781
|
+
int ret = fstat (fileno(fdesc), &buf);
|
782
|
+
FAISS_THROW_IF_NOT_FMT (ret == 0,
|
783
|
+
"fstat failed: %s", strerror(errno));
|
784
|
+
ails->totsize = buf.st_size;
|
785
|
+
ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
|
786
|
+
PROT_READ, MAP_SHARED,
|
787
|
+
fileno(fdesc), 0);
|
788
|
+
FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
|
789
|
+
"could not mmap: %s",
|
790
|
+
strerror(errno));
|
791
|
+
}
|
792
|
+
|
793
|
+
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
794
|
+
|
795
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
796
|
+
OnDiskInvertedLists::List & l = ails->lists[i];
|
797
|
+
l.size = l.capacity = sizes[i];
|
798
|
+
l.offset = o;
|
799
|
+
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
800
|
+
ails->code_size);
|
801
|
+
}
|
802
|
+
// resume normal reading of file
|
803
|
+
fseek (fdesc, o, SEEK_SET);
|
804
|
+
|
805
|
+
return ails;
|
806
|
+
}
|
807
|
+
|
808
|
+
|
671
809
|
|
672
810
|
|
673
811
|
|
@@ -12,14 +12,26 @@
|
|
12
12
|
|
13
13
|
#include <vector>
|
14
14
|
#include <list>
|
15
|
+
#include <typeinfo>
|
15
16
|
|
16
17
|
#include <faiss/IndexIVF.h>
|
17
18
|
|
19
|
+
#include <faiss/index_io.h>
|
20
|
+
|
18
21
|
namespace faiss {
|
19
22
|
|
20
23
|
|
21
24
|
struct LockLevels;
|
22
25
|
|
26
|
+
|
27
|
+
struct OnDiskOneList {
|
28
|
+
size_t size; // size of inverted list (entries)
|
29
|
+
size_t capacity; // allocated size (entries)
|
30
|
+
size_t offset; // offset in buffer (bytes)
|
31
|
+
OnDiskOneList ();
|
32
|
+
};
|
33
|
+
|
34
|
+
|
23
35
|
/** On-disk storage of inverted lists.
|
24
36
|
*
|
25
37
|
* The data is stored in a mmapped chunk of memory (base ptointer ptr,
|
@@ -49,13 +61,7 @@ struct LockLevels;
|
|
49
61
|
* lists in parallel.
|
50
62
|
*/
|
51
63
|
struct OnDiskInvertedLists: InvertedLists {
|
52
|
-
|
53
|
-
struct List {
|
54
|
-
size_t size; // size of inverted list (entries)
|
55
|
-
size_t capacity; // allocated size (entries)
|
56
|
-
size_t offset; // offset in buffer (bytes)
|
57
|
-
List ();
|
58
|
-
};
|
64
|
+
using List = OnDiskOneList;
|
59
65
|
|
60
66
|
// size nlist
|
61
67
|
std::vector<List> lists;
|
@@ -95,6 +101,9 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
95
101
|
// allocating slots)
|
96
102
|
size_t merge_from (const InvertedLists **ils, int n_il, bool verbose=false);
|
97
103
|
|
104
|
+
/// same as merge_from for a single invlist
|
105
|
+
size_t merge_from_1 (const InvertedLists *il, bool verbose=false);
|
106
|
+
|
98
107
|
/// restrict the inverted lists to l0:l1 without touching the mmapped region
|
99
108
|
void crop_invlists(size_t l0, size_t l1);
|
100
109
|
|
@@ -121,6 +130,17 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
121
130
|
OnDiskInvertedLists ();
|
122
131
|
};
|
123
132
|
|
133
|
+
struct OnDiskInvertedListsIOHook: InvertedListsIOHook {
|
134
|
+
OnDiskInvertedListsIOHook();
|
135
|
+
void write(const InvertedLists *ils, IOWriter *f) const override;
|
136
|
+
InvertedLists * read(IOReader *f, int io_flags) const override;
|
137
|
+
InvertedLists * read_ArrayInvertedLists(
|
138
|
+
IOReader *f, int io_flags,
|
139
|
+
size_t nlist, size_t code_size,
|
140
|
+
const std::vector<size_t> &sizes) const override;
|
141
|
+
};
|
142
|
+
|
143
|
+
|
124
144
|
|
125
145
|
} // namespace faiss
|
126
146
|
|