faiss 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -29,7 +29,7 @@ struct ScalarQuantizer {
|
|
29
29
|
QT_8bit_uniform, ///< same, shared range for all dimensions
|
30
30
|
QT_4bit_uniform,
|
31
31
|
QT_fp16,
|
32
|
-
QT_8bit_direct,
|
32
|
+
QT_8bit_direct, ///< fast indexing of uint8s
|
33
33
|
QT_6bit, ///< 6 bits per component
|
34
34
|
};
|
35
35
|
|
File without changes
|
File without changes
|
@@ -12,13 +12,16 @@
|
|
12
12
|
#include <cstdio>
|
13
13
|
#include <cstdlib>
|
14
14
|
|
15
|
-
#include <sys/mman.h>
|
16
15
|
#include <sys/types.h>
|
17
16
|
#include <sys/stat.h>
|
18
|
-
|
17
|
+
|
18
|
+
#ifndef _MSC_VER
|
19
|
+
#include <sys/mman.h>
|
20
|
+
#endif // !_MSC_VER
|
19
21
|
|
20
22
|
#include <faiss/impl/FaissAssert.h>
|
21
23
|
#include <faiss/impl/io.h>
|
24
|
+
#include <faiss/impl/io_macros.h>
|
22
25
|
#include <faiss/utils/hamming.h>
|
23
26
|
|
24
27
|
#include <faiss/IndexFlat.h>
|
@@ -36,46 +39,19 @@
|
|
36
39
|
#include <faiss/IndexScalarQuantizer.h>
|
37
40
|
#include <faiss/IndexHNSW.h>
|
38
41
|
#include <faiss/IndexLattice.h>
|
39
|
-
|
40
|
-
#include <faiss/OnDiskInvertedLists.h>
|
41
42
|
#include <faiss/IndexBinaryFlat.h>
|
42
43
|
#include <faiss/IndexBinaryFromFloat.h>
|
43
44
|
#include <faiss/IndexBinaryHNSW.h>
|
44
45
|
#include <faiss/IndexBinaryIVF.h>
|
45
46
|
#include <faiss/IndexBinaryHash.h>
|
46
47
|
|
48
|
+
#ifndef _MSC_VER
|
49
|
+
#include <faiss/OnDiskInvertedLists.h>
|
50
|
+
#endif // !_MSC_VER
|
47
51
|
|
48
52
|
|
49
53
|
namespace faiss {
|
50
54
|
|
51
|
-
/*************************************************************
|
52
|
-
* I/O macros
|
53
|
-
*
|
54
|
-
* we use macros so that we have a line number to report in abort
|
55
|
-
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
56
|
-
* always called f and thus is not passed in as a macro parameter.
|
57
|
-
**************************************************************/
|
58
|
-
|
59
|
-
|
60
|
-
#define READANDCHECK(ptr, n) { \
|
61
|
-
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
62
|
-
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
63
|
-
"read error in %s: %ld != %ld (%s)", \
|
64
|
-
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
65
|
-
}
|
66
|
-
|
67
|
-
#define READ1(x) READANDCHECK(&(x), 1)
|
68
|
-
|
69
|
-
// will fail if we write 256G of data at once...
|
70
|
-
#define READVECTOR(vec) { \
|
71
|
-
long size; \
|
72
|
-
READANDCHECK (&size, 1); \
|
73
|
-
FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
|
74
|
-
(vec).resize (size); \
|
75
|
-
READANDCHECK ((vec).data (), size); \
|
76
|
-
}
|
77
|
-
|
78
|
-
|
79
55
|
|
80
56
|
/*************************************************************
|
81
57
|
* Read
|
@@ -202,7 +178,7 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
202
178
|
fprintf(stderr, "read_InvertedLists:"
|
203
179
|
" WARN! inverted lists not stored with IVF object\n");
|
204
180
|
return nullptr;
|
205
|
-
} else if (h == fourcc ("ilar") && !(io_flags &
|
181
|
+
} else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
206
182
|
auto ails = new ArrayInvertedLists (0, 0);
|
207
183
|
READ1 (ails->nlist);
|
208
184
|
READ1 (ails->code_size);
|
@@ -222,95 +198,31 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
222
198
|
}
|
223
199
|
}
|
224
200
|
return ails;
|
225
|
-
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) {
|
226
|
-
// then we load it as an OnDiskInvertedLists
|
227
201
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
202
|
+
#ifdef _MSC_VER
|
203
|
+
} else {
|
204
|
+
FAISS_THROW_MSG("Unsupported inverted list format for Windows");
|
205
|
+
}
|
206
|
+
#else
|
207
|
+
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
208
|
+
// code is always ilxx where xx is specific to the type of invlists we want
|
209
|
+
// so we get the 16 high bits from the io_flag and the 16 low bits as "il"
|
210
|
+
int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
|
211
|
+
size_t nlist, code_size;
|
212
|
+
READ1 (nlist);
|
213
|
+
READ1 (code_size);
|
214
|
+
std::vector<size_t> sizes (nlist);
|
238
215
|
read_ArrayInvertedLists_sizes (f, sizes);
|
239
|
-
|
240
|
-
|
241
|
-
struct stat buf;
|
242
|
-
int ret = fstat (fileno(fdesc), &buf);
|
243
|
-
FAISS_THROW_IF_NOT_FMT (ret == 0,
|
244
|
-
"fstat failed: %s", strerror(errno));
|
245
|
-
ails->totsize = buf.st_size;
|
246
|
-
ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
|
247
|
-
PROT_READ, MAP_SHARED,
|
248
|
-
fileno(fdesc), 0);
|
249
|
-
FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
|
250
|
-
"could not mmap: %s",
|
251
|
-
strerror(errno));
|
252
|
-
}
|
253
|
-
|
254
|
-
for (size_t i = 0; i < ails->nlist; i++) {
|
255
|
-
OnDiskInvertedLists::List & l = ails->lists[i];
|
256
|
-
l.size = l.capacity = sizes[i];
|
257
|
-
l.offset = o;
|
258
|
-
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
259
|
-
ails->code_size);
|
260
|
-
}
|
261
|
-
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
262
|
-
// resume normal reading of file
|
263
|
-
fseek (fdesc, o, SEEK_SET);
|
264
|
-
return ails;
|
265
|
-
} else if (h == fourcc ("ilod")) {
|
266
|
-
OnDiskInvertedLists *od = new OnDiskInvertedLists();
|
267
|
-
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
268
|
-
READ1 (od->nlist);
|
269
|
-
READ1 (od->code_size);
|
270
|
-
// this is a POD object
|
271
|
-
READVECTOR (od->lists);
|
272
|
-
{
|
273
|
-
std::vector<OnDiskInvertedLists::Slot> v;
|
274
|
-
READVECTOR(v);
|
275
|
-
od->slots.assign(v.begin(), v.end());
|
276
|
-
}
|
277
|
-
{
|
278
|
-
std::vector<char> x;
|
279
|
-
READVECTOR(x);
|
280
|
-
od->filename.assign(x.begin(), x.end());
|
281
|
-
|
282
|
-
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
283
|
-
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
284
|
-
FAISS_THROW_IF_NOT_MSG (
|
285
|
-
reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
|
286
|
-
"when reading from file");
|
287
|
-
std::string indexname = reader->name;
|
288
|
-
std::string dirname = "./";
|
289
|
-
size_t slash = indexname.find_last_of('/');
|
290
|
-
if (slash != std::string::npos) {
|
291
|
-
dirname = indexname.substr(0, slash + 1);
|
292
|
-
}
|
293
|
-
std::string filename = od->filename;
|
294
|
-
slash = filename.find_last_of('/');
|
295
|
-
if (slash != std::string::npos) {
|
296
|
-
filename = filename.substr(slash + 1);
|
297
|
-
}
|
298
|
-
filename = dirname + filename;
|
299
|
-
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
300
|
-
"updating ondisk filename from %s to %s\n",
|
301
|
-
od->filename.c_str(), filename.c_str());
|
302
|
-
od->filename = filename;
|
303
|
-
}
|
304
|
-
|
305
|
-
}
|
306
|
-
READ1(od->totsize);
|
307
|
-
od->do_mmap();
|
308
|
-
return od;
|
216
|
+
return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
|
217
|
+
f, io_flags, nlist, code_size, sizes);
|
309
218
|
} else {
|
310
|
-
|
219
|
+
return InvertedListsIOHook::lookup(h)->read(f, io_flags);
|
311
220
|
}
|
221
|
+
#endif // !_MSC_VER
|
222
|
+
|
312
223
|
}
|
313
224
|
|
225
|
+
|
314
226
|
static void read_InvertedLists (
|
315
227
|
IndexIVF *ivf, IOReader *f, int io_flags) {
|
316
228
|
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
@@ -885,5 +797,76 @@ IndexBinary *read_index_binary (const char *fname, int io_flags) {
|
|
885
797
|
return idx;
|
886
798
|
}
|
887
799
|
|
800
|
+
#ifndef _MSC_VER
|
801
|
+
|
802
|
+
/**********************************************************
|
803
|
+
* InvertedListIOHook's
|
804
|
+
**********************************************************/
|
805
|
+
|
806
|
+
InvertedListsIOHook::InvertedListsIOHook(
|
807
|
+
const std::string & key, const std::string & classname):
|
808
|
+
key(key), classname(classname)
|
809
|
+
{}
|
810
|
+
|
811
|
+
namespace {
|
812
|
+
|
813
|
+
/// std::vector that deletes its contents
|
814
|
+
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
815
|
+
|
816
|
+
IOHookTable() {
|
817
|
+
push_back(new OnDiskInvertedListsIOHook());
|
818
|
+
}
|
819
|
+
|
820
|
+
~IOHookTable() {
|
821
|
+
for (auto x: *this) {
|
822
|
+
delete x;
|
823
|
+
}
|
824
|
+
}
|
825
|
+
};
|
826
|
+
|
827
|
+
static IOHookTable InvertedListsIOHook_table;
|
828
|
+
|
829
|
+
} // anonymous namepsace
|
830
|
+
|
831
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
832
|
+
{
|
833
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
834
|
+
if (h == fourcc(callback->key)) {
|
835
|
+
return callback;
|
836
|
+
}
|
837
|
+
}
|
838
|
+
FAISS_THROW_FMT ("read_InvertedLists: could not load ArrayInvertedLists as %04x", h);
|
839
|
+
}
|
840
|
+
|
841
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
|
842
|
+
{
|
843
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
844
|
+
if (callback->classname == classname) {
|
845
|
+
return callback;
|
846
|
+
}
|
847
|
+
}
|
848
|
+
FAISS_THROW_FMT ("read_InvertedLists: could not find classname %s", classname.c_str());
|
849
|
+
}
|
850
|
+
|
851
|
+
void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
|
852
|
+
{
|
853
|
+
InvertedListsIOHook_table.push_back(cb);
|
854
|
+
}
|
855
|
+
|
856
|
+
void InvertedListsIOHook::print_callbacks()
|
857
|
+
{
|
858
|
+
printf("registered %zd InvertedListsIOHooks:\n",
|
859
|
+
InvertedListsIOHook_table.size());
|
860
|
+
for(const auto & cb: InvertedListsIOHook_table) {
|
861
|
+
printf("%08x %s %s\n",
|
862
|
+
fourcc(cb->key.c_str()),
|
863
|
+
cb->key.c_str(),
|
864
|
+
cb->classname.c_str());
|
865
|
+
}
|
866
|
+
}
|
867
|
+
|
868
|
+
#endif // !_MSC_VER
|
869
|
+
|
870
|
+
|
888
871
|
|
889
872
|
} // namespace faiss
|
@@ -12,13 +12,16 @@
|
|
12
12
|
#include <cstdio>
|
13
13
|
#include <cstdlib>
|
14
14
|
|
15
|
-
#include <sys/mman.h>
|
16
15
|
#include <sys/types.h>
|
17
16
|
#include <sys/stat.h>
|
18
|
-
|
17
|
+
|
18
|
+
#ifndef _MSC_VER
|
19
|
+
#include <sys/mman.h>
|
20
|
+
#endif // !_MSC_VER
|
19
21
|
|
20
22
|
#include <faiss/impl/FaissAssert.h>
|
21
23
|
#include <faiss/impl/io.h>
|
24
|
+
#include <faiss/impl/io_macros.h>
|
22
25
|
#include <faiss/utils/hamming.h>
|
23
26
|
|
24
27
|
#include <faiss/IndexFlat.h>
|
@@ -37,13 +40,15 @@
|
|
37
40
|
#include <faiss/IndexHNSW.h>
|
38
41
|
#include <faiss/IndexLattice.h>
|
39
42
|
|
40
|
-
#include <faiss/OnDiskInvertedLists.h>
|
41
43
|
#include <faiss/IndexBinaryFlat.h>
|
42
44
|
#include <faiss/IndexBinaryFromFloat.h>
|
43
45
|
#include <faiss/IndexBinaryHNSW.h>
|
44
46
|
#include <faiss/IndexBinaryIVF.h>
|
45
47
|
#include <faiss/IndexBinaryHash.h>
|
46
48
|
|
49
|
+
#ifndef _MSC_VER
|
50
|
+
#include <faiss/OnDiskInvertedLists.h>
|
51
|
+
#endif // !_MSC_VER
|
47
52
|
|
48
53
|
|
49
54
|
/*************************************************************
|
@@ -68,37 +73,9 @@
|
|
68
73
|
* leak memory.
|
69
74
|
**************************************************************/
|
70
75
|
|
71
|
-
|
72
|
-
|
73
76
|
namespace faiss {
|
74
77
|
|
75
78
|
|
76
|
-
/*************************************************************
|
77
|
-
* I/O macros
|
78
|
-
*
|
79
|
-
* we use macros so that we have a line number to report in abort
|
80
|
-
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
81
|
-
* always called f and thus is not passed in as a macro parameter.
|
82
|
-
**************************************************************/
|
83
|
-
|
84
|
-
|
85
|
-
#define WRITEANDCHECK(ptr, n) { \
|
86
|
-
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
87
|
-
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
88
|
-
"write error in %s: %ld != %ld (%s)", \
|
89
|
-
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
90
|
-
}
|
91
|
-
|
92
|
-
#define WRITE1(x) WRITEANDCHECK(&(x), 1)
|
93
|
-
|
94
|
-
#define WRITEVECTOR(vec) { \
|
95
|
-
size_t size = (vec).size (); \
|
96
|
-
WRITEANDCHECK (&size, 1); \
|
97
|
-
WRITEANDCHECK ((vec).data (), size); \
|
98
|
-
}
|
99
|
-
|
100
|
-
|
101
|
-
|
102
79
|
/*************************************************************
|
103
80
|
* Write
|
104
81
|
**************************************************************/
|
@@ -239,31 +216,19 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
|
|
239
216
|
WRITEANDCHECK (ails->ids[i].data(), n);
|
240
217
|
}
|
241
218
|
}
|
242
|
-
|
243
|
-
|
244
|
-
uint32_t h = fourcc ("ilod");
|
245
|
-
WRITE1 (h);
|
246
|
-
WRITE1 (ils->nlist);
|
247
|
-
WRITE1 (ils->code_size);
|
248
|
-
// this is a POD object
|
249
|
-
WRITEVECTOR (od->lists);
|
219
|
+
#ifndef _MSC_VER
|
220
|
+
} else {
|
250
221
|
|
251
|
-
|
252
|
-
|
253
|
-
od->slots.begin(), od->slots.end());
|
254
|
-
WRITEVECTOR(v);
|
255
|
-
}
|
256
|
-
{
|
257
|
-
std::vector<char> x(od->filename.begin(), od->filename.end());
|
258
|
-
WRITEVECTOR(x);
|
259
|
-
}
|
260
|
-
WRITE1(od->totsize);
|
222
|
+
InvertedListsIOHook::lookup_classname(
|
223
|
+
typeid(*ils).name())->write(ils, f);
|
261
224
|
|
262
|
-
|
225
|
+
/*
|
263
226
|
fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
|
264
227
|
"saving null invlist\n");
|
265
228
|
uint32_t h = fourcc ("il00");
|
266
229
|
WRITE1 (h);
|
230
|
+
*/
|
231
|
+
#endif // !_MSC_VER
|
267
232
|
}
|
268
233
|
}
|
269
234
|
|
@@ -7,6 +7,7 @@
|
|
7
7
|
|
8
8
|
// -*- c++ -*-
|
9
9
|
|
10
|
+
#include <algorithm>
|
10
11
|
#include <cstring>
|
11
12
|
#include <cassert>
|
12
13
|
|
@@ -135,8 +136,8 @@ int FileIOWriter::fileno() {
|
|
135
136
|
* IO buffer
|
136
137
|
***********************************************************************/
|
137
138
|
|
138
|
-
BufferedIOReader::BufferedIOReader(IOReader *reader, size_t bsz
|
139
|
-
reader(reader), bsz(bsz),
|
139
|
+
BufferedIOReader::BufferedIOReader(IOReader *reader, size_t bsz):
|
140
|
+
reader(reader), bsz(bsz), ofs(0), ofs2(0), b0(0), b1(0), buffer(bsz)
|
140
141
|
{
|
141
142
|
}
|
142
143
|
|
@@ -156,15 +157,12 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
156
157
|
size -= nb;
|
157
158
|
}
|
158
159
|
|
159
|
-
if (size > totsz - ofs) {
|
160
|
-
size = totsz - ofs;
|
161
|
-
}
|
162
160
|
// while we would like to have more data
|
163
161
|
while (size > 0) {
|
164
162
|
assert (b0 == b1); // buffer empty on input
|
165
163
|
// try to read from main reader
|
166
164
|
b0 = 0;
|
167
|
-
b1 = (*reader)(buffer.data(), 1,
|
165
|
+
b1 = (*reader)(buffer.data(), 1, bsz);
|
168
166
|
|
169
167
|
if (b1 == 0) {
|
170
168
|
// no more bytes available
|
@@ -180,12 +178,13 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
180
178
|
dst += nb2;
|
181
179
|
size -= nb2;
|
182
180
|
}
|
181
|
+
ofs2 += nb;
|
183
182
|
return nb / unitsize;
|
184
183
|
}
|
185
184
|
|
186
185
|
|
187
186
|
BufferedIOWriter::BufferedIOWriter(IOWriter *writer, size_t bsz):
|
188
|
-
writer(writer), bsz(bsz), b0(0), buffer(bsz)
|
187
|
+
writer(writer), bsz(bsz), ofs2(0), b0(0), buffer(bsz)
|
189
188
|
{
|
190
189
|
}
|
191
190
|
|
@@ -222,7 +221,7 @@ size_t BufferedIOWriter::operator()(const void *ptr, size_t unitsize, size_t nit
|
|
222
221
|
src += nb1;
|
223
222
|
size -= nb1;
|
224
223
|
}
|
225
|
-
|
224
|
+
ofs2 += nb;
|
226
225
|
return nb / unitsize;
|
227
226
|
}
|
228
227
|
|
@@ -230,7 +229,7 @@ BufferedIOWriter::~BufferedIOWriter()
|
|
230
229
|
{
|
231
230
|
size_t ofs = 0;
|
232
231
|
while(ofs != b0) {
|
233
|
-
printf("Destructor write %
|
232
|
+
// printf("Destructor write %zd \n", b0 - ofs);
|
234
233
|
size_t written = (*writer)(buffer.data() + ofs, 1, b0 - ofs);
|
235
234
|
FAISS_THROW_IF_NOT(written > 0);
|
236
235
|
ofs += written;
|
@@ -242,11 +241,17 @@ BufferedIOWriter::~BufferedIOWriter()
|
|
242
241
|
|
243
242
|
|
244
243
|
|
245
|
-
uint32_t fourcc (const
|
244
|
+
uint32_t fourcc (const char sx[4]) {
|
246
245
|
assert(4 == strlen(sx));
|
247
246
|
const unsigned char *x = (unsigned char*)sx;
|
248
247
|
return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
|
249
248
|
}
|
250
249
|
|
250
|
+
uint32_t fourcc (const std::string & sx) {
|
251
|
+
assert(sx.length() == 4);
|
252
|
+
const unsigned char *x = (unsigned char*)sx.c_str();
|
253
|
+
return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
|
254
|
+
}
|
255
|
+
|
251
256
|
|
252
257
|
} // namespace faiss
|