faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -29,7 +29,7 @@ struct ScalarQuantizer {
|
|
29
29
|
QT_8bit_uniform, ///< same, shared range for all dimensions
|
30
30
|
QT_4bit_uniform,
|
31
31
|
QT_fp16,
|
32
|
-
QT_8bit_direct,
|
32
|
+
QT_8bit_direct, ///< fast indexing of uint8s
|
33
33
|
QT_6bit, ///< 6 bits per component
|
34
34
|
};
|
35
35
|
|
File without changes
|
File without changes
|
@@ -12,13 +12,16 @@
|
|
12
12
|
#include <cstdio>
|
13
13
|
#include <cstdlib>
|
14
14
|
|
15
|
-
#include <sys/mman.h>
|
16
15
|
#include <sys/types.h>
|
17
16
|
#include <sys/stat.h>
|
18
|
-
|
17
|
+
|
18
|
+
#ifndef _MSC_VER
|
19
|
+
#include <sys/mman.h>
|
20
|
+
#endif // !_MSC_VER
|
19
21
|
|
20
22
|
#include <faiss/impl/FaissAssert.h>
|
21
23
|
#include <faiss/impl/io.h>
|
24
|
+
#include <faiss/impl/io_macros.h>
|
22
25
|
#include <faiss/utils/hamming.h>
|
23
26
|
|
24
27
|
#include <faiss/IndexFlat.h>
|
@@ -36,46 +39,19 @@
|
|
36
39
|
#include <faiss/IndexScalarQuantizer.h>
|
37
40
|
#include <faiss/IndexHNSW.h>
|
38
41
|
#include <faiss/IndexLattice.h>
|
39
|
-
|
40
|
-
#include <faiss/OnDiskInvertedLists.h>
|
41
42
|
#include <faiss/IndexBinaryFlat.h>
|
42
43
|
#include <faiss/IndexBinaryFromFloat.h>
|
43
44
|
#include <faiss/IndexBinaryHNSW.h>
|
44
45
|
#include <faiss/IndexBinaryIVF.h>
|
45
46
|
#include <faiss/IndexBinaryHash.h>
|
46
47
|
|
48
|
+
#ifndef _MSC_VER
|
49
|
+
#include <faiss/OnDiskInvertedLists.h>
|
50
|
+
#endif // !_MSC_VER
|
47
51
|
|
48
52
|
|
49
53
|
namespace faiss {
|
50
54
|
|
51
|
-
/*************************************************************
|
52
|
-
* I/O macros
|
53
|
-
*
|
54
|
-
* we use macros so that we have a line number to report in abort
|
55
|
-
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
56
|
-
* always called f and thus is not passed in as a macro parameter.
|
57
|
-
**************************************************************/
|
58
|
-
|
59
|
-
|
60
|
-
#define READANDCHECK(ptr, n) { \
|
61
|
-
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
62
|
-
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
63
|
-
"read error in %s: %ld != %ld (%s)", \
|
64
|
-
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
65
|
-
}
|
66
|
-
|
67
|
-
#define READ1(x) READANDCHECK(&(x), 1)
|
68
|
-
|
69
|
-
// will fail if we write 256G of data at once...
|
70
|
-
#define READVECTOR(vec) { \
|
71
|
-
long size; \
|
72
|
-
READANDCHECK (&size, 1); \
|
73
|
-
FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
|
74
|
-
(vec).resize (size); \
|
75
|
-
READANDCHECK ((vec).data (), size); \
|
76
|
-
}
|
77
|
-
|
78
|
-
|
79
55
|
|
80
56
|
/*************************************************************
|
81
57
|
* Read
|
@@ -202,7 +178,7 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
202
178
|
fprintf(stderr, "read_InvertedLists:"
|
203
179
|
" WARN! inverted lists not stored with IVF object\n");
|
204
180
|
return nullptr;
|
205
|
-
} else if (h == fourcc ("ilar") && !(io_flags &
|
181
|
+
} else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
206
182
|
auto ails = new ArrayInvertedLists (0, 0);
|
207
183
|
READ1 (ails->nlist);
|
208
184
|
READ1 (ails->code_size);
|
@@ -222,95 +198,31 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
222
198
|
}
|
223
199
|
}
|
224
200
|
return ails;
|
225
|
-
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) {
|
226
|
-
// then we load it as an OnDiskInvertedLists
|
227
201
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
202
|
+
#ifdef _MSC_VER
|
203
|
+
} else {
|
204
|
+
FAISS_THROW_MSG("Unsupported inverted list format for Windows");
|
205
|
+
}
|
206
|
+
#else
|
207
|
+
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
208
|
+
// code is always ilxx where xx is specific to the type of invlists we want
|
209
|
+
// so we get the 16 high bits from the io_flag and the 16 low bits as "il"
|
210
|
+
int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
|
211
|
+
size_t nlist, code_size;
|
212
|
+
READ1 (nlist);
|
213
|
+
READ1 (code_size);
|
214
|
+
std::vector<size_t> sizes (nlist);
|
238
215
|
read_ArrayInvertedLists_sizes (f, sizes);
|
239
|
-
|
240
|
-
|
241
|
-
struct stat buf;
|
242
|
-
int ret = fstat (fileno(fdesc), &buf);
|
243
|
-
FAISS_THROW_IF_NOT_FMT (ret == 0,
|
244
|
-
"fstat failed: %s", strerror(errno));
|
245
|
-
ails->totsize = buf.st_size;
|
246
|
-
ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
|
247
|
-
PROT_READ, MAP_SHARED,
|
248
|
-
fileno(fdesc), 0);
|
249
|
-
FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
|
250
|
-
"could not mmap: %s",
|
251
|
-
strerror(errno));
|
252
|
-
}
|
253
|
-
|
254
|
-
for (size_t i = 0; i < ails->nlist; i++) {
|
255
|
-
OnDiskInvertedLists::List & l = ails->lists[i];
|
256
|
-
l.size = l.capacity = sizes[i];
|
257
|
-
l.offset = o;
|
258
|
-
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
259
|
-
ails->code_size);
|
260
|
-
}
|
261
|
-
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
262
|
-
// resume normal reading of file
|
263
|
-
fseek (fdesc, o, SEEK_SET);
|
264
|
-
return ails;
|
265
|
-
} else if (h == fourcc ("ilod")) {
|
266
|
-
OnDiskInvertedLists *od = new OnDiskInvertedLists();
|
267
|
-
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
268
|
-
READ1 (od->nlist);
|
269
|
-
READ1 (od->code_size);
|
270
|
-
// this is a POD object
|
271
|
-
READVECTOR (od->lists);
|
272
|
-
{
|
273
|
-
std::vector<OnDiskInvertedLists::Slot> v;
|
274
|
-
READVECTOR(v);
|
275
|
-
od->slots.assign(v.begin(), v.end());
|
276
|
-
}
|
277
|
-
{
|
278
|
-
std::vector<char> x;
|
279
|
-
READVECTOR(x);
|
280
|
-
od->filename.assign(x.begin(), x.end());
|
281
|
-
|
282
|
-
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
283
|
-
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
284
|
-
FAISS_THROW_IF_NOT_MSG (
|
285
|
-
reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
|
286
|
-
"when reading from file");
|
287
|
-
std::string indexname = reader->name;
|
288
|
-
std::string dirname = "./";
|
289
|
-
size_t slash = indexname.find_last_of('/');
|
290
|
-
if (slash != std::string::npos) {
|
291
|
-
dirname = indexname.substr(0, slash + 1);
|
292
|
-
}
|
293
|
-
std::string filename = od->filename;
|
294
|
-
slash = filename.find_last_of('/');
|
295
|
-
if (slash != std::string::npos) {
|
296
|
-
filename = filename.substr(slash + 1);
|
297
|
-
}
|
298
|
-
filename = dirname + filename;
|
299
|
-
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
300
|
-
"updating ondisk filename from %s to %s\n",
|
301
|
-
od->filename.c_str(), filename.c_str());
|
302
|
-
od->filename = filename;
|
303
|
-
}
|
304
|
-
|
305
|
-
}
|
306
|
-
READ1(od->totsize);
|
307
|
-
od->do_mmap();
|
308
|
-
return od;
|
216
|
+
return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
|
217
|
+
f, io_flags, nlist, code_size, sizes);
|
309
218
|
} else {
|
310
|
-
|
219
|
+
return InvertedListsIOHook::lookup(h)->read(f, io_flags);
|
311
220
|
}
|
221
|
+
#endif // !_MSC_VER
|
222
|
+
|
312
223
|
}
|
313
224
|
|
225
|
+
|
314
226
|
static void read_InvertedLists (
|
315
227
|
IndexIVF *ivf, IOReader *f, int io_flags) {
|
316
228
|
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
@@ -885,5 +797,76 @@ IndexBinary *read_index_binary (const char *fname, int io_flags) {
|
|
885
797
|
return idx;
|
886
798
|
}
|
887
799
|
|
800
|
+
#ifndef _MSC_VER
|
801
|
+
|
802
|
+
/**********************************************************
|
803
|
+
* InvertedListIOHook's
|
804
|
+
**********************************************************/
|
805
|
+
|
806
|
+
InvertedListsIOHook::InvertedListsIOHook(
|
807
|
+
const std::string & key, const std::string & classname):
|
808
|
+
key(key), classname(classname)
|
809
|
+
{}
|
810
|
+
|
811
|
+
namespace {
|
812
|
+
|
813
|
+
/// std::vector that deletes its contents
|
814
|
+
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
815
|
+
|
816
|
+
IOHookTable() {
|
817
|
+
push_back(new OnDiskInvertedListsIOHook());
|
818
|
+
}
|
819
|
+
|
820
|
+
~IOHookTable() {
|
821
|
+
for (auto x: *this) {
|
822
|
+
delete x;
|
823
|
+
}
|
824
|
+
}
|
825
|
+
};
|
826
|
+
|
827
|
+
static IOHookTable InvertedListsIOHook_table;
|
828
|
+
|
829
|
+
} // anonymous namepsace
|
830
|
+
|
831
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
832
|
+
{
|
833
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
834
|
+
if (h == fourcc(callback->key)) {
|
835
|
+
return callback;
|
836
|
+
}
|
837
|
+
}
|
838
|
+
FAISS_THROW_FMT ("read_InvertedLists: could not load ArrayInvertedLists as %04x", h);
|
839
|
+
}
|
840
|
+
|
841
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
|
842
|
+
{
|
843
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
844
|
+
if (callback->classname == classname) {
|
845
|
+
return callback;
|
846
|
+
}
|
847
|
+
}
|
848
|
+
FAISS_THROW_FMT ("read_InvertedLists: could not find classname %s", classname.c_str());
|
849
|
+
}
|
850
|
+
|
851
|
+
void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
|
852
|
+
{
|
853
|
+
InvertedListsIOHook_table.push_back(cb);
|
854
|
+
}
|
855
|
+
|
856
|
+
void InvertedListsIOHook::print_callbacks()
|
857
|
+
{
|
858
|
+
printf("registered %zd InvertedListsIOHooks:\n",
|
859
|
+
InvertedListsIOHook_table.size());
|
860
|
+
for(const auto & cb: InvertedListsIOHook_table) {
|
861
|
+
printf("%08x %s %s\n",
|
862
|
+
fourcc(cb->key.c_str()),
|
863
|
+
cb->key.c_str(),
|
864
|
+
cb->classname.c_str());
|
865
|
+
}
|
866
|
+
}
|
867
|
+
|
868
|
+
#endif // !_MSC_VER
|
869
|
+
|
870
|
+
|
888
871
|
|
889
872
|
} // namespace faiss
|
@@ -12,13 +12,16 @@
|
|
12
12
|
#include <cstdio>
|
13
13
|
#include <cstdlib>
|
14
14
|
|
15
|
-
#include <sys/mman.h>
|
16
15
|
#include <sys/types.h>
|
17
16
|
#include <sys/stat.h>
|
18
|
-
|
17
|
+
|
18
|
+
#ifndef _MSC_VER
|
19
|
+
#include <sys/mman.h>
|
20
|
+
#endif // !_MSC_VER
|
19
21
|
|
20
22
|
#include <faiss/impl/FaissAssert.h>
|
21
23
|
#include <faiss/impl/io.h>
|
24
|
+
#include <faiss/impl/io_macros.h>
|
22
25
|
#include <faiss/utils/hamming.h>
|
23
26
|
|
24
27
|
#include <faiss/IndexFlat.h>
|
@@ -37,13 +40,15 @@
|
|
37
40
|
#include <faiss/IndexHNSW.h>
|
38
41
|
#include <faiss/IndexLattice.h>
|
39
42
|
|
40
|
-
#include <faiss/OnDiskInvertedLists.h>
|
41
43
|
#include <faiss/IndexBinaryFlat.h>
|
42
44
|
#include <faiss/IndexBinaryFromFloat.h>
|
43
45
|
#include <faiss/IndexBinaryHNSW.h>
|
44
46
|
#include <faiss/IndexBinaryIVF.h>
|
45
47
|
#include <faiss/IndexBinaryHash.h>
|
46
48
|
|
49
|
+
#ifndef _MSC_VER
|
50
|
+
#include <faiss/OnDiskInvertedLists.h>
|
51
|
+
#endif // !_MSC_VER
|
47
52
|
|
48
53
|
|
49
54
|
/*************************************************************
|
@@ -68,37 +73,9 @@
|
|
68
73
|
* leak memory.
|
69
74
|
**************************************************************/
|
70
75
|
|
71
|
-
|
72
|
-
|
73
76
|
namespace faiss {
|
74
77
|
|
75
78
|
|
76
|
-
/*************************************************************
|
77
|
-
* I/O macros
|
78
|
-
*
|
79
|
-
* we use macros so that we have a line number to report in abort
|
80
|
-
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
81
|
-
* always called f and thus is not passed in as a macro parameter.
|
82
|
-
**************************************************************/
|
83
|
-
|
84
|
-
|
85
|
-
#define WRITEANDCHECK(ptr, n) { \
|
86
|
-
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
87
|
-
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
88
|
-
"write error in %s: %ld != %ld (%s)", \
|
89
|
-
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
90
|
-
}
|
91
|
-
|
92
|
-
#define WRITE1(x) WRITEANDCHECK(&(x), 1)
|
93
|
-
|
94
|
-
#define WRITEVECTOR(vec) { \
|
95
|
-
size_t size = (vec).size (); \
|
96
|
-
WRITEANDCHECK (&size, 1); \
|
97
|
-
WRITEANDCHECK ((vec).data (), size); \
|
98
|
-
}
|
99
|
-
|
100
|
-
|
101
|
-
|
102
79
|
/*************************************************************
|
103
80
|
* Write
|
104
81
|
**************************************************************/
|
@@ -239,31 +216,19 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
|
|
239
216
|
WRITEANDCHECK (ails->ids[i].data(), n);
|
240
217
|
}
|
241
218
|
}
|
242
|
-
|
243
|
-
|
244
|
-
uint32_t h = fourcc ("ilod");
|
245
|
-
WRITE1 (h);
|
246
|
-
WRITE1 (ils->nlist);
|
247
|
-
WRITE1 (ils->code_size);
|
248
|
-
// this is a POD object
|
249
|
-
WRITEVECTOR (od->lists);
|
219
|
+
#ifndef _MSC_VER
|
220
|
+
} else {
|
250
221
|
|
251
|
-
|
252
|
-
|
253
|
-
od->slots.begin(), od->slots.end());
|
254
|
-
WRITEVECTOR(v);
|
255
|
-
}
|
256
|
-
{
|
257
|
-
std::vector<char> x(od->filename.begin(), od->filename.end());
|
258
|
-
WRITEVECTOR(x);
|
259
|
-
}
|
260
|
-
WRITE1(od->totsize);
|
222
|
+
InvertedListsIOHook::lookup_classname(
|
223
|
+
typeid(*ils).name())->write(ils, f);
|
261
224
|
|
262
|
-
|
225
|
+
/*
|
263
226
|
fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
|
264
227
|
"saving null invlist\n");
|
265
228
|
uint32_t h = fourcc ("il00");
|
266
229
|
WRITE1 (h);
|
230
|
+
*/
|
231
|
+
#endif // !_MSC_VER
|
267
232
|
}
|
268
233
|
}
|
269
234
|
|
@@ -7,6 +7,7 @@
|
|
7
7
|
|
8
8
|
// -*- c++ -*-
|
9
9
|
|
10
|
+
#include <algorithm>
|
10
11
|
#include <cstring>
|
11
12
|
#include <cassert>
|
12
13
|
|
@@ -135,8 +136,8 @@ int FileIOWriter::fileno() {
|
|
135
136
|
* IO buffer
|
136
137
|
***********************************************************************/
|
137
138
|
|
138
|
-
BufferedIOReader::BufferedIOReader(IOReader *reader, size_t bsz
|
139
|
-
reader(reader), bsz(bsz),
|
139
|
+
BufferedIOReader::BufferedIOReader(IOReader *reader, size_t bsz):
|
140
|
+
reader(reader), bsz(bsz), ofs(0), ofs2(0), b0(0), b1(0), buffer(bsz)
|
140
141
|
{
|
141
142
|
}
|
142
143
|
|
@@ -156,15 +157,12 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
156
157
|
size -= nb;
|
157
158
|
}
|
158
159
|
|
159
|
-
if (size > totsz - ofs) {
|
160
|
-
size = totsz - ofs;
|
161
|
-
}
|
162
160
|
// while we would like to have more data
|
163
161
|
while (size > 0) {
|
164
162
|
assert (b0 == b1); // buffer empty on input
|
165
163
|
// try to read from main reader
|
166
164
|
b0 = 0;
|
167
|
-
b1 = (*reader)(buffer.data(), 1,
|
165
|
+
b1 = (*reader)(buffer.data(), 1, bsz);
|
168
166
|
|
169
167
|
if (b1 == 0) {
|
170
168
|
// no more bytes available
|
@@ -180,12 +178,13 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
180
178
|
dst += nb2;
|
181
179
|
size -= nb2;
|
182
180
|
}
|
181
|
+
ofs2 += nb;
|
183
182
|
return nb / unitsize;
|
184
183
|
}
|
185
184
|
|
186
185
|
|
187
186
|
BufferedIOWriter::BufferedIOWriter(IOWriter *writer, size_t bsz):
|
188
|
-
writer(writer), bsz(bsz), b0(0), buffer(bsz)
|
187
|
+
writer(writer), bsz(bsz), ofs2(0), b0(0), buffer(bsz)
|
189
188
|
{
|
190
189
|
}
|
191
190
|
|
@@ -222,7 +221,7 @@ size_t BufferedIOWriter::operator()(const void *ptr, size_t unitsize, size_t nit
|
|
222
221
|
src += nb1;
|
223
222
|
size -= nb1;
|
224
223
|
}
|
225
|
-
|
224
|
+
ofs2 += nb;
|
226
225
|
return nb / unitsize;
|
227
226
|
}
|
228
227
|
|
@@ -230,7 +229,7 @@ BufferedIOWriter::~BufferedIOWriter()
|
|
230
229
|
{
|
231
230
|
size_t ofs = 0;
|
232
231
|
while(ofs != b0) {
|
233
|
-
printf("Destructor write %
|
232
|
+
// printf("Destructor write %zd \n", b0 - ofs);
|
234
233
|
size_t written = (*writer)(buffer.data() + ofs, 1, b0 - ofs);
|
235
234
|
FAISS_THROW_IF_NOT(written > 0);
|
236
235
|
ofs += written;
|
@@ -242,11 +241,17 @@ BufferedIOWriter::~BufferedIOWriter()
|
|
242
241
|
|
243
242
|
|
244
243
|
|
245
|
-
uint32_t fourcc (const
|
244
|
+
uint32_t fourcc (const char sx[4]) {
|
246
245
|
assert(4 == strlen(sx));
|
247
246
|
const unsigned char *x = (unsigned char*)sx;
|
248
247
|
return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
|
249
248
|
}
|
250
249
|
|
250
|
+
uint32_t fourcc (const std::string & sx) {
|
251
|
+
assert(sx.length() == 4);
|
252
|
+
const unsigned char *x = (unsigned char*)sx.c_str();
|
253
|
+
return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
|
254
|
+
}
|
255
|
+
|
251
256
|
|
252
257
|
} // namespace faiss
|