faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -8,150 +8,141 @@
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
10
|
#include <algorithm>
|
|
11
|
-
#include <cstring>
|
|
12
11
|
#include <cassert>
|
|
12
|
+
#include <cstring>
|
|
13
13
|
|
|
14
|
-
#include <faiss/impl/io.h>
|
|
15
14
|
#include <faiss/impl/FaissAssert.h>
|
|
16
|
-
|
|
15
|
+
#include <faiss/impl/io.h>
|
|
17
16
|
|
|
18
17
|
namespace faiss {
|
|
19
18
|
|
|
20
|
-
|
|
21
19
|
/***********************************************************************
|
|
22
20
|
* IO functions
|
|
23
21
|
***********************************************************************/
|
|
24
22
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
{
|
|
28
|
-
FAISS_THROW_MSG ("IOReader does not support memory mapping");
|
|
23
|
+
int IOReader::fileno() {
|
|
24
|
+
FAISS_THROW_MSG("IOReader does not support memory mapping");
|
|
29
25
|
}
|
|
30
26
|
|
|
31
|
-
int IOWriter::fileno
|
|
32
|
-
|
|
33
|
-
FAISS_THROW_MSG ("IOWriter does not support memory mapping");
|
|
27
|
+
int IOWriter::fileno() {
|
|
28
|
+
FAISS_THROW_MSG("IOWriter does not support memory mapping");
|
|
34
29
|
}
|
|
35
30
|
|
|
36
31
|
/***********************************************************************
|
|
37
32
|
* IO Vector
|
|
38
33
|
***********************************************************************/
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
size_t VectorIOWriter::operator()(
|
|
42
|
-
const void *ptr, size_t size, size_t nitems)
|
|
43
|
-
{
|
|
35
|
+
size_t VectorIOWriter::operator()(const void* ptr, size_t size, size_t nitems) {
|
|
44
36
|
size_t bytes = size * nitems;
|
|
45
37
|
if (bytes > 0) {
|
|
46
38
|
size_t o = data.size();
|
|
47
39
|
data.resize(o + bytes);
|
|
48
|
-
memcpy
|
|
40
|
+
memcpy(&data[o], ptr, size * nitems);
|
|
49
41
|
}
|
|
50
42
|
return nitems;
|
|
51
43
|
}
|
|
52
44
|
|
|
53
|
-
size_t VectorIOReader::operator()(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if (rp >= data.size()) return 0;
|
|
45
|
+
size_t VectorIOReader::operator()(void* ptr, size_t size, size_t nitems) {
|
|
46
|
+
if (rp >= data.size())
|
|
47
|
+
return 0;
|
|
57
48
|
size_t nremain = (data.size() - rp) / size;
|
|
58
|
-
if (nremain < nitems)
|
|
49
|
+
if (nremain < nitems)
|
|
50
|
+
nitems = nremain;
|
|
59
51
|
if (size * nitems > 0) {
|
|
60
|
-
memcpy
|
|
52
|
+
memcpy(ptr, &data[rp], size * nitems);
|
|
61
53
|
rp += size * nitems;
|
|
62
54
|
}
|
|
63
55
|
return nitems;
|
|
64
56
|
}
|
|
65
57
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
58
|
/***********************************************************************
|
|
70
59
|
* IO File
|
|
71
60
|
***********************************************************************/
|
|
72
61
|
|
|
62
|
+
FileIOReader::FileIOReader(FILE* rf) : f(rf) {}
|
|
73
63
|
|
|
74
|
-
|
|
75
|
-
FileIOReader::FileIOReader(FILE *rf): f(rf) {}
|
|
76
|
-
|
|
77
|
-
FileIOReader::FileIOReader(const char * fname)
|
|
78
|
-
{
|
|
64
|
+
FileIOReader::FileIOReader(const char* fname) {
|
|
79
65
|
name = fname;
|
|
80
66
|
f = fopen(fname, "rb");
|
|
81
|
-
FAISS_THROW_IF_NOT_FMT
|
|
82
|
-
|
|
67
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
68
|
+
f, "could not open %s for reading: %s", fname, strerror(errno));
|
|
83
69
|
need_close = true;
|
|
84
70
|
}
|
|
85
71
|
|
|
86
|
-
FileIOReader::~FileIOReader()
|
|
72
|
+
FileIOReader::~FileIOReader() {
|
|
87
73
|
if (need_close) {
|
|
88
74
|
int ret = fclose(f);
|
|
89
|
-
if (ret != 0) {// we cannot raise and exception in the destructor
|
|
90
|
-
fprintf(stderr,
|
|
91
|
-
|
|
75
|
+
if (ret != 0) { // we cannot raise and exception in the destructor
|
|
76
|
+
fprintf(stderr,
|
|
77
|
+
"file %s close error: %s",
|
|
78
|
+
name.c_str(),
|
|
79
|
+
strerror(errno));
|
|
92
80
|
}
|
|
93
81
|
}
|
|
94
82
|
}
|
|
95
83
|
|
|
96
|
-
size_t FileIOReader::operator()(void
|
|
84
|
+
size_t FileIOReader::operator()(void* ptr, size_t size, size_t nitems) {
|
|
97
85
|
return fread(ptr, size, nitems, f);
|
|
98
86
|
}
|
|
99
87
|
|
|
100
|
-
int FileIOReader::fileno()
|
|
101
|
-
return ::fileno
|
|
88
|
+
int FileIOReader::fileno() {
|
|
89
|
+
return ::fileno(f);
|
|
102
90
|
}
|
|
103
91
|
|
|
92
|
+
FileIOWriter::FileIOWriter(FILE* wf) : f(wf) {}
|
|
104
93
|
|
|
105
|
-
FileIOWriter::FileIOWriter(
|
|
106
|
-
|
|
107
|
-
FileIOWriter::FileIOWriter(const char * fname)
|
|
108
|
-
{
|
|
94
|
+
FileIOWriter::FileIOWriter(const char* fname) {
|
|
109
95
|
name = fname;
|
|
110
96
|
f = fopen(fname, "wb");
|
|
111
|
-
FAISS_THROW_IF_NOT_FMT
|
|
112
|
-
|
|
97
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
98
|
+
f, "could not open %s for writing: %s", fname, strerror(errno));
|
|
113
99
|
need_close = true;
|
|
114
100
|
}
|
|
115
101
|
|
|
116
|
-
FileIOWriter::~FileIOWriter()
|
|
102
|
+
FileIOWriter::~FileIOWriter() {
|
|
117
103
|
if (need_close) {
|
|
118
104
|
int ret = fclose(f);
|
|
119
105
|
if (ret != 0) {
|
|
120
106
|
// we cannot raise and exception in the destructor
|
|
121
|
-
fprintf(stderr,
|
|
122
|
-
|
|
107
|
+
fprintf(stderr,
|
|
108
|
+
"file %s close error: %s",
|
|
109
|
+
name.c_str(),
|
|
110
|
+
strerror(errno));
|
|
123
111
|
}
|
|
124
112
|
}
|
|
125
113
|
}
|
|
126
114
|
|
|
127
|
-
size_t FileIOWriter::operator()(const void
|
|
115
|
+
size_t FileIOWriter::operator()(const void* ptr, size_t size, size_t nitems) {
|
|
128
116
|
return fwrite(ptr, size, nitems, f);
|
|
129
117
|
}
|
|
130
118
|
|
|
131
|
-
int FileIOWriter::fileno()
|
|
132
|
-
return ::fileno
|
|
119
|
+
int FileIOWriter::fileno() {
|
|
120
|
+
return ::fileno(f);
|
|
133
121
|
}
|
|
134
122
|
|
|
135
123
|
/***********************************************************************
|
|
136
124
|
* IO buffer
|
|
137
125
|
***********************************************************************/
|
|
138
126
|
|
|
139
|
-
BufferedIOReader::BufferedIOReader(IOReader
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
127
|
+
BufferedIOReader::BufferedIOReader(IOReader* reader, size_t bsz)
|
|
128
|
+
: reader(reader),
|
|
129
|
+
bsz(bsz),
|
|
130
|
+
ofs(0),
|
|
131
|
+
ofs2(0),
|
|
132
|
+
b0(0),
|
|
133
|
+
b1(0),
|
|
134
|
+
buffer(bsz) {}
|
|
144
135
|
|
|
145
|
-
size_t BufferedIOReader::operator()(void
|
|
146
|
-
{
|
|
136
|
+
size_t BufferedIOReader::operator()(void* ptr, size_t unitsize, size_t nitems) {
|
|
147
137
|
size_t size = unitsize * nitems;
|
|
148
|
-
if (size == 0)
|
|
149
|
-
|
|
138
|
+
if (size == 0)
|
|
139
|
+
return 0;
|
|
140
|
+
char* dst = (char*)ptr;
|
|
150
141
|
size_t nb;
|
|
151
142
|
|
|
152
143
|
{ // first copy available bytes
|
|
153
144
|
nb = std::min(b1 - b0, size);
|
|
154
|
-
memcpy
|
|
145
|
+
memcpy(dst, buffer.data() + b0, nb);
|
|
155
146
|
b0 += nb;
|
|
156
147
|
dst += nb;
|
|
157
148
|
size -= nb;
|
|
@@ -159,7 +150,7 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
|
159
150
|
|
|
160
151
|
// while we would like to have more data
|
|
161
152
|
while (size > 0) {
|
|
162
|
-
assert
|
|
153
|
+
assert(b0 == b1); // buffer empty on input
|
|
163
154
|
// try to read from main reader
|
|
164
155
|
b0 = 0;
|
|
165
156
|
b1 = (*reader)(buffer.data(), 1, bsz);
|
|
@@ -172,7 +163,7 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
|
172
163
|
|
|
173
164
|
// copy remaining bytes
|
|
174
165
|
size_t nb2 = std::min(b1, size);
|
|
175
|
-
memcpy
|
|
166
|
+
memcpy(dst, buffer.data(), nb2);
|
|
176
167
|
b0 = nb2;
|
|
177
168
|
nb += nb2;
|
|
178
169
|
dst += nb2;
|
|
@@ -182,22 +173,22 @@ size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
|
|
182
173
|
return nb / unitsize;
|
|
183
174
|
}
|
|
184
175
|
|
|
176
|
+
BufferedIOWriter::BufferedIOWriter(IOWriter* writer, size_t bsz)
|
|
177
|
+
: writer(writer), bsz(bsz), ofs2(0), b0(0), buffer(bsz) {}
|
|
185
178
|
|
|
186
|
-
BufferedIOWriter::
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
size_t BufferedIOWriter::operator()(const void *ptr, size_t unitsize, size_t nitems)
|
|
192
|
-
{
|
|
179
|
+
size_t BufferedIOWriter::operator()(
|
|
180
|
+
const void* ptr,
|
|
181
|
+
size_t unitsize,
|
|
182
|
+
size_t nitems) {
|
|
193
183
|
size_t size = unitsize * nitems;
|
|
194
|
-
if (size == 0)
|
|
195
|
-
|
|
184
|
+
if (size == 0)
|
|
185
|
+
return 0;
|
|
186
|
+
const char* src = (const char*)ptr;
|
|
196
187
|
size_t nb;
|
|
197
188
|
|
|
198
189
|
{ // copy as many bytes as possible to buffer
|
|
199
190
|
nb = std::min(bsz - b0, size);
|
|
200
|
-
memcpy
|
|
191
|
+
memcpy(buffer.data() + b0, src, nb);
|
|
201
192
|
b0 += nb;
|
|
202
193
|
src += nb;
|
|
203
194
|
size -= nb;
|
|
@@ -207,15 +198,15 @@ size_t BufferedIOWriter::operator()(const void *ptr, size_t unitsize, size_t nit
|
|
|
207
198
|
// now we need to flush to add more bytes
|
|
208
199
|
size_t ofs = 0;
|
|
209
200
|
do {
|
|
210
|
-
assert
|
|
201
|
+
assert(ofs < 10000000);
|
|
211
202
|
size_t written = (*writer)(buffer.data() + ofs, 1, bsz - ofs);
|
|
212
203
|
FAISS_THROW_IF_NOT(written > 0);
|
|
213
204
|
ofs += written;
|
|
214
|
-
} while(ofs != bsz);
|
|
205
|
+
} while (ofs != bsz);
|
|
215
206
|
|
|
216
207
|
// copy src to buffer
|
|
217
208
|
size_t nb1 = std::min(bsz, size);
|
|
218
|
-
memcpy
|
|
209
|
+
memcpy(buffer.data(), src, nb1);
|
|
219
210
|
b0 = nb1;
|
|
220
211
|
nb += nb1;
|
|
221
212
|
src += nb1;
|
|
@@ -225,37 +216,31 @@ size_t BufferedIOWriter::operator()(const void *ptr, size_t unitsize, size_t nit
|
|
|
225
216
|
return nb / unitsize;
|
|
226
217
|
}
|
|
227
218
|
|
|
228
|
-
BufferedIOWriter::~BufferedIOWriter()
|
|
229
|
-
{
|
|
219
|
+
BufferedIOWriter::~BufferedIOWriter() {
|
|
230
220
|
size_t ofs = 0;
|
|
231
|
-
while(ofs != b0) {
|
|
221
|
+
while (ofs != b0) {
|
|
232
222
|
// printf("Destructor write %zd \n", b0 - ofs);
|
|
233
223
|
size_t written = (*writer)(buffer.data() + ofs, 1, b0 - ofs);
|
|
234
224
|
FAISS_THROW_IF_NOT(written > 0);
|
|
235
225
|
ofs += written;
|
|
236
226
|
}
|
|
237
|
-
|
|
238
227
|
}
|
|
239
228
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
uint32_t fourcc (const char sx[4]) {
|
|
245
|
-
FAISS_THROW_IF_NOT (4 == strlen(sx));
|
|
246
|
-
const unsigned char *x = (unsigned char*)sx;
|
|
229
|
+
uint32_t fourcc(const char sx[4]) {
|
|
230
|
+
FAISS_THROW_IF_NOT(4 == strlen(sx));
|
|
231
|
+
const unsigned char* x = (unsigned char*)sx;
|
|
247
232
|
return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
|
|
248
233
|
}
|
|
249
234
|
|
|
250
|
-
uint32_t fourcc
|
|
251
|
-
FAISS_THROW_IF_NOT
|
|
252
|
-
const unsigned char
|
|
235
|
+
uint32_t fourcc(const std::string& sx) {
|
|
236
|
+
FAISS_THROW_IF_NOT(sx.length() == 4);
|
|
237
|
+
const unsigned char* x = (unsigned char*)sx.c_str();
|
|
253
238
|
return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
|
|
254
239
|
}
|
|
255
240
|
|
|
256
241
|
void fourcc_inv(uint32_t x, char str[5]) {
|
|
257
242
|
*(uint32_t*)str = x;
|
|
258
|
-
str[
|
|
243
|
+
str[4] = 0;
|
|
259
244
|
}
|
|
260
245
|
|
|
261
246
|
std::string fourcc_inv(uint32_t x) {
|
|
@@ -264,7 +249,6 @@ std::string fourcc_inv(uint32_t x) {
|
|
|
264
249
|
return std::string(str);
|
|
265
250
|
}
|
|
266
251
|
|
|
267
|
-
|
|
268
252
|
std::string fourcc_inv_printable(uint32_t x) {
|
|
269
253
|
char cstr[5];
|
|
270
254
|
fourcc_inv(x, cstr);
|
|
@@ -282,7 +266,4 @@ std::string fourcc_inv_printable(uint32_t x) {
|
|
|
282
266
|
return str;
|
|
283
267
|
}
|
|
284
268
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
269
|
} // namespace faiss
|
|
@@ -16,25 +16,23 @@
|
|
|
16
16
|
|
|
17
17
|
#pragma once
|
|
18
18
|
|
|
19
|
-
#include <string>
|
|
20
19
|
#include <cstdio>
|
|
20
|
+
#include <string>
|
|
21
21
|
#include <vector>
|
|
22
22
|
|
|
23
23
|
#include <faiss/Index.h>
|
|
24
24
|
|
|
25
25
|
namespace faiss {
|
|
26
26
|
|
|
27
|
-
|
|
28
27
|
struct IOReader {
|
|
29
28
|
// name that can be used in error messages
|
|
30
29
|
std::string name;
|
|
31
30
|
|
|
32
31
|
// fread. Returns number of items read or 0 in case of EOF.
|
|
33
|
-
virtual size_t operator()(
|
|
34
|
-
void *ptr, size_t size, size_t nitems) = 0;
|
|
32
|
+
virtual size_t operator()(void* ptr, size_t size, size_t nitems) = 0;
|
|
35
33
|
|
|
36
34
|
// return a file number that can be memory-mapped
|
|
37
|
-
virtual int fileno
|
|
35
|
+
virtual int fileno();
|
|
38
36
|
|
|
39
37
|
virtual ~IOReader() {}
|
|
40
38
|
};
|
|
@@ -44,53 +42,51 @@ struct IOWriter {
|
|
|
44
42
|
std::string name;
|
|
45
43
|
|
|
46
44
|
// fwrite. Return number of items written
|
|
47
|
-
virtual size_t operator()(
|
|
48
|
-
const void *ptr, size_t size, size_t nitems) = 0;
|
|
45
|
+
virtual size_t operator()(const void* ptr, size_t size, size_t nitems) = 0;
|
|
49
46
|
|
|
50
47
|
// return a file number that can be memory-mapped
|
|
51
|
-
virtual int fileno
|
|
48
|
+
virtual int fileno();
|
|
52
49
|
|
|
53
50
|
virtual ~IOWriter() noexcept(false) {}
|
|
54
51
|
};
|
|
55
52
|
|
|
56
|
-
|
|
57
|
-
struct VectorIOReader:IOReader {
|
|
53
|
+
struct VectorIOReader : IOReader {
|
|
58
54
|
std::vector<uint8_t> data;
|
|
59
55
|
size_t rp = 0;
|
|
60
|
-
size_t operator()(void
|
|
56
|
+
size_t operator()(void* ptr, size_t size, size_t nitems) override;
|
|
61
57
|
};
|
|
62
58
|
|
|
63
|
-
struct VectorIOWriter:IOWriter {
|
|
59
|
+
struct VectorIOWriter : IOWriter {
|
|
64
60
|
std::vector<uint8_t> data;
|
|
65
|
-
size_t operator()(const void
|
|
61
|
+
size_t operator()(const void* ptr, size_t size, size_t nitems) override;
|
|
66
62
|
};
|
|
67
63
|
|
|
68
|
-
struct FileIOReader: IOReader {
|
|
69
|
-
FILE
|
|
64
|
+
struct FileIOReader : IOReader {
|
|
65
|
+
FILE* f = nullptr;
|
|
70
66
|
bool need_close = false;
|
|
71
67
|
|
|
72
|
-
FileIOReader(FILE
|
|
68
|
+
FileIOReader(FILE* rf);
|
|
73
69
|
|
|
74
|
-
FileIOReader(const char
|
|
70
|
+
FileIOReader(const char* fname);
|
|
75
71
|
|
|
76
72
|
~FileIOReader() override;
|
|
77
73
|
|
|
78
|
-
size_t operator()(void
|
|
74
|
+
size_t operator()(void* ptr, size_t size, size_t nitems) override;
|
|
79
75
|
|
|
80
76
|
int fileno() override;
|
|
81
77
|
};
|
|
82
78
|
|
|
83
|
-
struct FileIOWriter: IOWriter {
|
|
84
|
-
FILE
|
|
79
|
+
struct FileIOWriter : IOWriter {
|
|
80
|
+
FILE* f = nullptr;
|
|
85
81
|
bool need_close = false;
|
|
86
82
|
|
|
87
|
-
FileIOWriter(FILE
|
|
83
|
+
FileIOWriter(FILE* wf);
|
|
88
84
|
|
|
89
|
-
FileIOWriter(const char
|
|
85
|
+
FileIOWriter(const char* fname);
|
|
90
86
|
|
|
91
87
|
~FileIOWriter() override;
|
|
92
88
|
|
|
93
|
-
size_t operator()(const void
|
|
89
|
+
size_t operator()(const void* ptr, size_t size, size_t nitems) override;
|
|
94
90
|
|
|
95
91
|
int fileno() override;
|
|
96
92
|
};
|
|
@@ -103,12 +99,9 @@ struct FileIOWriter: IOWriter {
|
|
|
103
99
|
* the read/write functions.
|
|
104
100
|
*******************************************************/
|
|
105
101
|
|
|
106
|
-
|
|
107
|
-
|
|
108
102
|
/** wraps an ioreader to make buffered reads to avoid too small reads */
|
|
109
|
-
struct BufferedIOReader: IOReader {
|
|
110
|
-
|
|
111
|
-
IOReader *reader;
|
|
103
|
+
struct BufferedIOReader : IOReader {
|
|
104
|
+
IOReader* reader;
|
|
112
105
|
size_t bsz;
|
|
113
106
|
size_t ofs; ///< offset in input stream
|
|
114
107
|
size_t ofs2; ///< number of bytes returned to caller
|
|
@@ -119,37 +112,34 @@ struct BufferedIOReader: IOReader {
|
|
|
119
112
|
* @param bsz buffer size (bytes). Reads will be done by batched of
|
|
120
113
|
* this size
|
|
121
114
|
*/
|
|
122
|
-
explicit BufferedIOReader(IOReader
|
|
115
|
+
explicit BufferedIOReader(IOReader* reader, size_t bsz = 1024 * 1024);
|
|
123
116
|
|
|
124
|
-
size_t operator()(void
|
|
117
|
+
size_t operator()(void* ptr, size_t size, size_t nitems) override;
|
|
125
118
|
};
|
|
126
119
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
IOWriter *writer;
|
|
120
|
+
struct BufferedIOWriter : IOWriter {
|
|
121
|
+
IOWriter* writer;
|
|
131
122
|
size_t bsz;
|
|
132
123
|
size_t ofs;
|
|
133
|
-
size_t ofs2;
|
|
134
|
-
size_t b0;
|
|
124
|
+
size_t ofs2; ///< number of bytes received from caller
|
|
125
|
+
size_t b0; ///< amount of data in buffer
|
|
135
126
|
std::vector<char> buffer;
|
|
136
127
|
|
|
137
|
-
explicit BufferedIOWriter(IOWriter
|
|
128
|
+
explicit BufferedIOWriter(IOWriter* writer, size_t bsz = 1024 * 1024);
|
|
138
129
|
|
|
139
|
-
size_t operator()(const void
|
|
130
|
+
size_t operator()(const void* ptr, size_t size, size_t nitems) override;
|
|
140
131
|
|
|
141
132
|
// flushes
|
|
142
133
|
~BufferedIOWriter() override;
|
|
143
134
|
};
|
|
144
135
|
|
|
145
136
|
/// cast a 4-character string to a uint32_t that can be written and read easily
|
|
146
|
-
uint32_t fourcc
|
|
147
|
-
uint32_t fourcc
|
|
137
|
+
uint32_t fourcc(const char sx[4]);
|
|
138
|
+
uint32_t fourcc(const std::string& sx);
|
|
148
139
|
|
|
149
140
|
// decoding of fourcc (int32 -> string)
|
|
150
141
|
void fourcc_inv(uint32_t x, char str[5]);
|
|
151
142
|
std::string fourcc_inv(uint32_t x);
|
|
152
143
|
std::string fourcc_inv_printable(uint32_t x);
|
|
153
144
|
|
|
154
|
-
|
|
155
145
|
} // namespace faiss
|
|
@@ -15,43 +15,74 @@
|
|
|
15
15
|
* always called f and thus is not passed in as a macro parameter.
|
|
16
16
|
**************************************************************/
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
size_t ret = (*f)(ptr, sizeof(*(ptr)), n);
|
|
21
|
-
FAISS_THROW_IF_NOT_FMT(
|
|
22
|
-
|
|
23
|
-
|
|
18
|
+
#define READANDCHECK(ptr, n) \
|
|
19
|
+
{ \
|
|
20
|
+
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
|
21
|
+
FAISS_THROW_IF_NOT_FMT( \
|
|
22
|
+
ret == (n), \
|
|
23
|
+
"read error in %s: %zd != %zd (%s)", \
|
|
24
|
+
f->name.c_str(), \
|
|
25
|
+
ret, \
|
|
26
|
+
size_t(n), \
|
|
27
|
+
strerror(errno)); \
|
|
24
28
|
}
|
|
25
29
|
|
|
26
|
-
#define READ1(x)
|
|
30
|
+
#define READ1(x) READANDCHECK(&(x), 1)
|
|
27
31
|
|
|
28
32
|
// will fail if we write 256G of data at once...
|
|
29
|
-
#define READVECTOR(vec)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
#define READSTRING(s)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
WRITEANDCHECK
|
|
33
|
+
#define READVECTOR(vec) \
|
|
34
|
+
{ \
|
|
35
|
+
size_t size; \
|
|
36
|
+
READANDCHECK(&size, 1); \
|
|
37
|
+
FAISS_THROW_IF_NOT(size >= 0 && size < (uint64_t{1} << 40)); \
|
|
38
|
+
(vec).resize(size); \
|
|
39
|
+
READANDCHECK((vec).data(), size); \
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
#define READSTRING(s) \
|
|
43
|
+
{ \
|
|
44
|
+
size_t size = (s).size(); \
|
|
45
|
+
WRITEANDCHECK(&size, 1); \
|
|
46
|
+
WRITEANDCHECK((s).c_str(), size); \
|
|
42
47
|
}
|
|
43
48
|
|
|
44
|
-
#define WRITEANDCHECK(ptr, n)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
+
#define WRITEANDCHECK(ptr, n) \
|
|
50
|
+
{ \
|
|
51
|
+
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
|
52
|
+
FAISS_THROW_IF_NOT_FMT( \
|
|
53
|
+
ret == (n), \
|
|
54
|
+
"write error in %s: %zd != %zd (%s)", \
|
|
55
|
+
f->name.c_str(), \
|
|
56
|
+
ret, \
|
|
57
|
+
size_t(n), \
|
|
58
|
+
strerror(errno)); \
|
|
49
59
|
}
|
|
50
60
|
|
|
51
61
|
#define WRITE1(x) WRITEANDCHECK(&(x), 1)
|
|
52
62
|
|
|
53
|
-
#define WRITEVECTOR(vec)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
WRITEANDCHECK
|
|
63
|
+
#define WRITEVECTOR(vec) \
|
|
64
|
+
{ \
|
|
65
|
+
size_t size = (vec).size(); \
|
|
66
|
+
WRITEANDCHECK(&size, 1); \
|
|
67
|
+
WRITEANDCHECK((vec).data(), size); \
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// read/write xb vector for backwards compatibility of IndexFlat
|
|
71
|
+
|
|
72
|
+
#define WRITEXBVECTOR(vec) \
|
|
73
|
+
{ \
|
|
74
|
+
FAISS_THROW_IF_NOT((vec).size() % 4 == 0); \
|
|
75
|
+
size_t size = (vec).size() / 4; \
|
|
76
|
+
WRITEANDCHECK(&size, 1); \
|
|
77
|
+
WRITEANDCHECK((vec).data(), size * 4); \
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
#define READXBVECTOR(vec) \
|
|
81
|
+
{ \
|
|
82
|
+
size_t size; \
|
|
83
|
+
READANDCHECK(&size, 1); \
|
|
84
|
+
FAISS_THROW_IF_NOT(size >= 0 && size < (uint64_t{1} << 40)); \
|
|
85
|
+
size *= 4; \
|
|
86
|
+
(vec).resize(size); \
|
|
87
|
+
READANDCHECK((vec).data(), size); \
|
|
57
88
|
}
|