faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
@@ -15,9 +15,8 @@
|
|
15
15
|
* the interface.
|
16
16
|
*/
|
17
17
|
|
18
|
-
#include <vector>
|
19
18
|
#include <faiss/Index.h>
|
20
|
-
|
19
|
+
#include <vector>
|
21
20
|
|
22
21
|
namespace faiss {
|
23
22
|
|
@@ -31,10 +30,10 @@ namespace faiss {
|
|
31
30
|
struct InvertedLists {
|
32
31
|
typedef Index::idx_t idx_t;
|
33
32
|
|
34
|
-
size_t nlist;
|
35
|
-
size_t code_size;
|
33
|
+
size_t nlist; ///< number of possible key values
|
34
|
+
size_t code_size; ///< code size per vector in bytes
|
36
35
|
|
37
|
-
InvertedLists
|
36
|
+
InvertedLists(size_t nlist, size_t code_size);
|
38
37
|
|
39
38
|
/// used for BlockInvertedLists, where the codes are packed into groups
|
40
39
|
/// and the individual code size is meaningless
|
@@ -51,70 +50,77 @@ struct InvertedLists {
|
|
51
50
|
*
|
52
51
|
* @return codes size list_size * code_size
|
53
52
|
*/
|
54
|
-
virtual const uint8_t
|
53
|
+
virtual const uint8_t* get_codes(size_t list_no) const = 0;
|
55
54
|
|
56
55
|
/** get the ids for an inverted list
|
57
56
|
* must be released by release_ids
|
58
57
|
*
|
59
58
|
* @return ids size list_size
|
60
59
|
*/
|
61
|
-
virtual const idx_t
|
60
|
+
virtual const idx_t* get_ids(size_t list_no) const = 0;
|
62
61
|
|
63
62
|
/// release codes returned by get_codes (default implementation is nop
|
64
|
-
virtual void release_codes
|
63
|
+
virtual void release_codes(size_t list_no, const uint8_t* codes) const;
|
65
64
|
|
66
65
|
/// release ids returned by get_ids
|
67
|
-
virtual void release_ids
|
66
|
+
virtual void release_ids(size_t list_no, const idx_t* ids) const;
|
68
67
|
|
69
68
|
/// @return a single id in an inverted list
|
70
|
-
virtual idx_t get_single_id
|
69
|
+
virtual idx_t get_single_id(size_t list_no, size_t offset) const;
|
71
70
|
|
72
71
|
/// @return a single code in an inverted list
|
73
72
|
/// (should be deallocated with release_codes)
|
74
|
-
virtual const uint8_t
|
75
|
-
size_t list_no, size_t offset) const;
|
73
|
+
virtual const uint8_t* get_single_code(size_t list_no, size_t offset) const;
|
76
74
|
|
77
75
|
/// prepare the following lists (default does nothing)
|
78
76
|
/// a list can be -1 hence the signed long
|
79
|
-
virtual void prefetch_lists
|
77
|
+
virtual void prefetch_lists(const idx_t* list_nos, int nlist) const;
|
80
78
|
|
81
79
|
/*************************
|
82
80
|
* writing functions */
|
83
81
|
|
84
82
|
/// add one entry to an inverted list
|
85
|
-
virtual size_t add_entry
|
86
|
-
const uint8_t *code);
|
83
|
+
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t* code);
|
87
84
|
|
88
|
-
virtual size_t add_entries
|
89
|
-
|
90
|
-
|
85
|
+
virtual size_t add_entries(
|
86
|
+
size_t list_no,
|
87
|
+
size_t n_entry,
|
88
|
+
const idx_t* ids,
|
89
|
+
const uint8_t* code) = 0;
|
91
90
|
|
92
|
-
virtual void update_entry
|
93
|
-
|
91
|
+
virtual void update_entry(
|
92
|
+
size_t list_no,
|
93
|
+
size_t offset,
|
94
|
+
idx_t id,
|
95
|
+
const uint8_t* code);
|
94
96
|
|
95
|
-
virtual void update_entries
|
96
|
-
|
97
|
+
virtual void update_entries(
|
98
|
+
size_t list_no,
|
99
|
+
size_t offset,
|
100
|
+
size_t n_entry,
|
101
|
+
const idx_t* ids,
|
102
|
+
const uint8_t* code) = 0;
|
97
103
|
|
98
|
-
virtual void resize
|
104
|
+
virtual void resize(size_t list_no, size_t new_size) = 0;
|
99
105
|
|
100
|
-
virtual void reset
|
106
|
+
virtual void reset();
|
101
107
|
|
102
108
|
/// move all entries from oivf (empty on output)
|
103
|
-
void merge_from
|
109
|
+
void merge_from(InvertedLists* oivf, size_t add_id);
|
104
110
|
|
105
|
-
virtual ~InvertedLists
|
111
|
+
virtual ~InvertedLists();
|
106
112
|
|
107
113
|
/*************************
|
108
114
|
* statistics */
|
109
115
|
|
110
116
|
/// 1= perfectly balanced, >1: imbalanced
|
111
|
-
double imbalance_factor
|
117
|
+
double imbalance_factor() const;
|
112
118
|
|
113
119
|
/// display some stats about the inverted lists
|
114
|
-
void print_stats
|
120
|
+
void print_stats() const;
|
115
121
|
|
116
122
|
/// sum up list sizes
|
117
|
-
size_t compute_ntotal
|
123
|
+
size_t compute_ntotal() const;
|
118
124
|
|
119
125
|
/**************************************
|
120
126
|
* Scoped inverted lists (for automatic deallocation)
|
@@ -138,71 +144,76 @@ struct InvertedLists {
|
|
138
144
|
*/
|
139
145
|
|
140
146
|
struct ScopedIds {
|
141
|
-
const InvertedLists
|
142
|
-
const idx_t
|
147
|
+
const InvertedLists* il;
|
148
|
+
const idx_t* ids;
|
143
149
|
size_t list_no;
|
144
150
|
|
145
|
-
ScopedIds
|
146
|
-
|
147
|
-
{}
|
151
|
+
ScopedIds(const InvertedLists* il, size_t list_no)
|
152
|
+
: il(il), ids(il->get_ids(list_no)), list_no(list_no) {}
|
148
153
|
|
149
|
-
const idx_t
|
154
|
+
const idx_t* get() {
|
155
|
+
return ids;
|
156
|
+
}
|
150
157
|
|
151
|
-
idx_t operator
|
158
|
+
idx_t operator[](size_t i) const {
|
152
159
|
return ids[i];
|
153
160
|
}
|
154
161
|
|
155
|
-
~ScopedIds
|
156
|
-
il->release_ids
|
162
|
+
~ScopedIds() {
|
163
|
+
il->release_ids(list_no, ids);
|
157
164
|
}
|
158
165
|
};
|
159
166
|
|
160
167
|
struct ScopedCodes {
|
161
|
-
const InvertedLists
|
162
|
-
const uint8_t
|
168
|
+
const InvertedLists* il;
|
169
|
+
const uint8_t* codes;
|
163
170
|
size_t list_no;
|
164
171
|
|
165
|
-
ScopedCodes
|
166
|
-
|
167
|
-
{}
|
172
|
+
ScopedCodes(const InvertedLists* il, size_t list_no)
|
173
|
+
: il(il), codes(il->get_codes(list_no)), list_no(list_no) {}
|
168
174
|
|
169
|
-
ScopedCodes
|
170
|
-
|
171
|
-
|
172
|
-
|
175
|
+
ScopedCodes(const InvertedLists* il, size_t list_no, size_t offset)
|
176
|
+
: il(il),
|
177
|
+
codes(il->get_single_code(list_no, offset)),
|
178
|
+
list_no(list_no) {}
|
173
179
|
|
174
|
-
const uint8_t
|
180
|
+
const uint8_t* get() {
|
181
|
+
return codes;
|
182
|
+
}
|
175
183
|
|
176
|
-
~ScopedCodes
|
177
|
-
il->release_codes
|
184
|
+
~ScopedCodes() {
|
185
|
+
il->release_codes(list_no, codes);
|
178
186
|
}
|
179
187
|
};
|
180
|
-
|
181
|
-
|
182
188
|
};
|
183
189
|
|
184
|
-
|
185
190
|
/// simple (default) implementation as an array of inverted lists
|
186
|
-
struct ArrayInvertedLists: InvertedLists {
|
187
|
-
std::vector
|
188
|
-
std::vector
|
191
|
+
struct ArrayInvertedLists : InvertedLists {
|
192
|
+
std::vector<std::vector<uint8_t>> codes; // binary codes, size nlist
|
193
|
+
std::vector<std::vector<idx_t>> ids; ///< Inverted lists for indexes
|
189
194
|
|
190
|
-
ArrayInvertedLists
|
195
|
+
ArrayInvertedLists(size_t nlist, size_t code_size);
|
191
196
|
|
192
197
|
size_t list_size(size_t list_no) const override;
|
193
|
-
const uint8_t
|
194
|
-
const idx_t
|
198
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
199
|
+
const idx_t* get_ids(size_t list_no) const override;
|
195
200
|
|
196
|
-
size_t add_entries
|
197
|
-
|
198
|
-
|
201
|
+
size_t add_entries(
|
202
|
+
size_t list_no,
|
203
|
+
size_t n_entry,
|
204
|
+
const idx_t* ids,
|
205
|
+
const uint8_t* code) override;
|
199
206
|
|
200
|
-
void update_entries
|
201
|
-
|
207
|
+
void update_entries(
|
208
|
+
size_t list_no,
|
209
|
+
size_t offset,
|
210
|
+
size_t n_entry,
|
211
|
+
const idx_t* ids,
|
212
|
+
const uint8_t* code) override;
|
202
213
|
|
203
|
-
void resize
|
214
|
+
void resize(size_t list_no, size_t new_size) override;
|
204
215
|
|
205
|
-
|
216
|
+
~ArrayInvertedLists() override;
|
206
217
|
};
|
207
218
|
|
208
219
|
/*****************************************************************
|
@@ -213,154 +224,143 @@ struct ArrayInvertedLists: InvertedLists {
|
|
213
224
|
*****************************************************************/
|
214
225
|
|
215
226
|
/// invlists that fail for all write functions
|
216
|
-
struct ReadOnlyInvertedLists: InvertedLists {
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
227
|
+
struct ReadOnlyInvertedLists : InvertedLists {
|
228
|
+
ReadOnlyInvertedLists(size_t nlist, size_t code_size)
|
229
|
+
: InvertedLists(nlist, code_size) {}
|
230
|
+
|
231
|
+
size_t add_entries(
|
232
|
+
size_t list_no,
|
233
|
+
size_t n_entry,
|
234
|
+
const idx_t* ids,
|
235
|
+
const uint8_t* code) override;
|
236
|
+
|
237
|
+
void update_entries(
|
238
|
+
size_t list_no,
|
239
|
+
size_t offset,
|
240
|
+
size_t n_entry,
|
241
|
+
const idx_t* ids,
|
242
|
+
const uint8_t* code) override;
|
243
|
+
|
244
|
+
void resize(size_t list_no, size_t new_size) override;
|
230
245
|
};
|
231
246
|
|
232
|
-
|
233
247
|
/// Horizontal stack of inverted lists
|
234
|
-
struct HStackInvertedLists: ReadOnlyInvertedLists {
|
235
|
-
|
236
|
-
std::vector<const InvertedLists *>ils;
|
248
|
+
struct HStackInvertedLists : ReadOnlyInvertedLists {
|
249
|
+
std::vector<const InvertedLists*> ils;
|
237
250
|
|
238
251
|
/// build InvertedLists by concatenating nil of them
|
239
|
-
HStackInvertedLists
|
252
|
+
HStackInvertedLists(int nil, const InvertedLists** ils);
|
240
253
|
|
241
254
|
size_t list_size(size_t list_no) const override;
|
242
|
-
const uint8_t
|
243
|
-
const idx_t
|
244
|
-
|
245
|
-
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
255
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
256
|
+
const idx_t* get_ids(size_t list_no) const override;
|
246
257
|
|
247
|
-
void
|
248
|
-
void release_ids (size_t list_no, const idx_t *ids) const override;
|
258
|
+
void prefetch_lists(const idx_t* list_nos, int nlist) const override;
|
249
259
|
|
250
|
-
|
260
|
+
void release_codes(size_t list_no, const uint8_t* codes) const override;
|
261
|
+
void release_ids(size_t list_no, const idx_t* ids) const override;
|
251
262
|
|
252
|
-
|
253
|
-
size_t list_no, size_t offset) const override;
|
263
|
+
idx_t get_single_id(size_t list_no, size_t offset) const override;
|
254
264
|
|
265
|
+
const uint8_t* get_single_code(size_t list_no, size_t offset)
|
266
|
+
const override;
|
255
267
|
};
|
256
268
|
|
257
269
|
using ConcatenatedInvertedLists = HStackInvertedLists;
|
258
270
|
|
259
|
-
|
260
271
|
/// vertical slice of indexes in another InvertedLists
|
261
|
-
struct SliceInvertedLists: ReadOnlyInvertedLists {
|
262
|
-
const InvertedLists
|
272
|
+
struct SliceInvertedLists : ReadOnlyInvertedLists {
|
273
|
+
const InvertedLists* il;
|
263
274
|
idx_t i0, i1;
|
264
275
|
|
265
|
-
SliceInvertedLists(const InvertedLists
|
276
|
+
SliceInvertedLists(const InvertedLists* il, idx_t i0, idx_t i1);
|
266
277
|
|
267
278
|
size_t list_size(size_t list_no) const override;
|
268
|
-
const uint8_t
|
269
|
-
const idx_t
|
279
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
280
|
+
const idx_t* get_ids(size_t list_no) const override;
|
270
281
|
|
271
|
-
void release_codes
|
272
|
-
void release_ids
|
282
|
+
void release_codes(size_t list_no, const uint8_t* codes) const override;
|
283
|
+
void release_ids(size_t list_no, const idx_t* ids) const override;
|
273
284
|
|
274
|
-
idx_t get_single_id
|
285
|
+
idx_t get_single_id(size_t list_no, size_t offset) const override;
|
275
286
|
|
276
|
-
const uint8_t
|
277
|
-
|
287
|
+
const uint8_t* get_single_code(size_t list_no, size_t offset)
|
288
|
+
const override;
|
278
289
|
|
279
|
-
void prefetch_lists
|
290
|
+
void prefetch_lists(const idx_t* list_nos, int nlist) const override;
|
280
291
|
};
|
281
292
|
|
282
|
-
|
283
|
-
|
284
|
-
std::vector<const InvertedLists *>ils;
|
293
|
+
struct VStackInvertedLists : ReadOnlyInvertedLists {
|
294
|
+
std::vector<const InvertedLists*> ils;
|
285
295
|
std::vector<idx_t> cumsz;
|
286
296
|
|
287
297
|
/// build InvertedLists by concatenating nil of them
|
288
|
-
VStackInvertedLists
|
298
|
+
VStackInvertedLists(int nil, const InvertedLists** ils);
|
289
299
|
|
290
300
|
size_t list_size(size_t list_no) const override;
|
291
|
-
const uint8_t
|
292
|
-
const idx_t
|
301
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
302
|
+
const idx_t* get_ids(size_t list_no) const override;
|
293
303
|
|
294
|
-
void release_codes
|
295
|
-
void release_ids
|
304
|
+
void release_codes(size_t list_no, const uint8_t* codes) const override;
|
305
|
+
void release_ids(size_t list_no, const idx_t* ids) const override;
|
296
306
|
|
297
|
-
idx_t get_single_id
|
307
|
+
idx_t get_single_id(size_t list_no, size_t offset) const override;
|
298
308
|
|
299
|
-
const uint8_t
|
300
|
-
|
301
|
-
|
302
|
-
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
309
|
+
const uint8_t* get_single_code(size_t list_no, size_t offset)
|
310
|
+
const override;
|
303
311
|
|
312
|
+
void prefetch_lists(const idx_t* list_nos, int nlist) const override;
|
304
313
|
};
|
305
314
|
|
306
|
-
|
307
315
|
/** use the first inverted lists if they are non-empty otherwise use the second
|
308
316
|
*
|
309
317
|
* This is useful if il1 has a few inverted lists that are too long,
|
310
318
|
* and that il0 has replacement lists for those, with empty lists for
|
311
319
|
* the others. */
|
312
|
-
struct MaskedInvertedLists: ReadOnlyInvertedLists {
|
313
|
-
|
314
|
-
const InvertedLists
|
315
|
-
const InvertedLists *il1;
|
320
|
+
struct MaskedInvertedLists : ReadOnlyInvertedLists {
|
321
|
+
const InvertedLists* il0;
|
322
|
+
const InvertedLists* il1;
|
316
323
|
|
317
|
-
MaskedInvertedLists
|
318
|
-
const InvertedLists *il1);
|
324
|
+
MaskedInvertedLists(const InvertedLists* il0, const InvertedLists* il1);
|
319
325
|
|
320
326
|
size_t list_size(size_t list_no) const override;
|
321
|
-
const uint8_t
|
322
|
-
const idx_t
|
327
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
328
|
+
const idx_t* get_ids(size_t list_no) const override;
|
323
329
|
|
324
|
-
void release_codes
|
325
|
-
void release_ids
|
330
|
+
void release_codes(size_t list_no, const uint8_t* codes) const override;
|
331
|
+
void release_ids(size_t list_no, const idx_t* ids) const override;
|
326
332
|
|
327
|
-
idx_t get_single_id
|
333
|
+
idx_t get_single_id(size_t list_no, size_t offset) const override;
|
328
334
|
|
329
|
-
const uint8_t
|
330
|
-
|
331
|
-
|
332
|
-
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
335
|
+
const uint8_t* get_single_code(size_t list_no, size_t offset)
|
336
|
+
const override;
|
333
337
|
|
338
|
+
void prefetch_lists(const idx_t* list_nos, int nlist) const override;
|
334
339
|
};
|
335
340
|
|
336
|
-
|
337
341
|
/** if the inverted list in il is smaller than maxsize then return it,
|
338
342
|
* otherwise return an empty invlist */
|
339
|
-
struct StopWordsInvertedLists: ReadOnlyInvertedLists {
|
340
|
-
|
341
|
-
const InvertedLists *il0;
|
343
|
+
struct StopWordsInvertedLists : ReadOnlyInvertedLists {
|
344
|
+
const InvertedLists* il0;
|
342
345
|
size_t maxsize;
|
343
346
|
|
344
|
-
StopWordsInvertedLists
|
347
|
+
StopWordsInvertedLists(const InvertedLists* il, size_t maxsize);
|
345
348
|
|
346
349
|
size_t list_size(size_t list_no) const override;
|
347
|
-
const uint8_t
|
348
|
-
const idx_t
|
350
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
351
|
+
const idx_t* get_ids(size_t list_no) const override;
|
349
352
|
|
350
|
-
void release_codes
|
351
|
-
void release_ids
|
353
|
+
void release_codes(size_t list_no, const uint8_t* codes) const override;
|
354
|
+
void release_ids(size_t list_no, const idx_t* ids) const override;
|
352
355
|
|
353
|
-
idx_t get_single_id
|
356
|
+
idx_t get_single_id(size_t list_no, size_t offset) const override;
|
354
357
|
|
355
|
-
const uint8_t
|
356
|
-
|
357
|
-
|
358
|
-
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
358
|
+
const uint8_t* get_single_code(size_t list_no, size_t offset)
|
359
|
+
const override;
|
359
360
|
|
361
|
+
void prefetch_lists(const idx_t* list_nos, int nlist) const override;
|
360
362
|
};
|
361
363
|
|
362
|
-
|
363
364
|
} // namespace faiss
|
364
365
|
|
365
|
-
|
366
366
|
#endif
|
@@ -7,9 +7,9 @@
|
|
7
7
|
|
8
8
|
#include <faiss/invlists/InvertedListsIOHook.h>
|
9
9
|
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
10
11
|
#include <faiss/impl/io.h>
|
11
12
|
#include <faiss/impl/io_macros.h>
|
12
|
-
#include <faiss/impl/FaissAssert.h>
|
13
13
|
|
14
14
|
#include <faiss/invlists/BlockInvertedLists.h>
|
15
15
|
|
@@ -17,24 +17,21 @@
|
|
17
17
|
#include <faiss/invlists/OnDiskInvertedLists.h>
|
18
18
|
#endif // !_MSC_VER
|
19
19
|
|
20
|
-
|
21
20
|
namespace faiss {
|
22
21
|
|
23
|
-
|
24
22
|
/**********************************************************
|
25
23
|
* InvertedListIOHook's
|
26
24
|
**********************************************************/
|
27
25
|
|
28
26
|
InvertedListsIOHook::InvertedListsIOHook(
|
29
|
-
const std::string
|
30
|
-
|
31
|
-
{}
|
27
|
+
const std::string& key,
|
28
|
+
const std::string& classname)
|
29
|
+
: key(key), classname(classname) {}
|
32
30
|
|
33
31
|
namespace {
|
34
32
|
|
35
33
|
/// std::vector that deletes its contents
|
36
|
-
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
37
|
-
|
34
|
+
struct IOHookTable : std::vector<InvertedListsIOHook*> {
|
38
35
|
IOHookTable() {
|
39
36
|
#ifndef _MSC_VER
|
40
37
|
push_back(new OnDiskInvertedListsIOHook());
|
@@ -43,7 +40,7 @@ struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
|
43
40
|
}
|
44
41
|
|
45
42
|
~IOHookTable() {
|
46
|
-
for (auto x: *this) {
|
43
|
+
for (auto x : *this) {
|
47
44
|
delete x;
|
48
45
|
}
|
49
46
|
}
|
@@ -51,44 +48,41 @@ struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
|
51
48
|
|
52
49
|
static IOHookTable InvertedListsIOHook_table;
|
53
50
|
|
54
|
-
} //
|
51
|
+
} // namespace
|
55
52
|
|
56
|
-
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
57
|
-
{
|
58
|
-
for(const auto & callback: InvertedListsIOHook_table) {
|
53
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup(int h) {
|
54
|
+
for (const auto& callback : InvertedListsIOHook_table) {
|
59
55
|
if (h == fourcc(callback->key)) {
|
60
56
|
return callback;
|
61
57
|
}
|
62
58
|
}
|
63
|
-
FAISS_THROW_FMT
|
64
|
-
|
65
|
-
|
66
|
-
|
59
|
+
FAISS_THROW_FMT(
|
60
|
+
"read_InvertedLists: could not load ArrayInvertedLists as "
|
61
|
+
"%08x (\"%s\")",
|
62
|
+
h,
|
63
|
+
fourcc_inv_printable(h).c_str());
|
67
64
|
}
|
68
65
|
|
69
|
-
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(
|
70
|
-
{
|
71
|
-
for(const auto
|
66
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(
|
67
|
+
const std::string& classname) {
|
68
|
+
for (const auto& callback : InvertedListsIOHook_table) {
|
72
69
|
if (callback->classname == classname) {
|
73
70
|
return callback;
|
74
71
|
}
|
75
72
|
}
|
76
|
-
FAISS_THROW_FMT
|
73
|
+
FAISS_THROW_FMT(
|
77
74
|
"read_InvertedLists: could not find classname %s",
|
78
|
-
classname.c_str()
|
79
|
-
);
|
75
|
+
classname.c_str());
|
80
76
|
}
|
81
77
|
|
82
|
-
void InvertedListsIOHook::add_callback(InvertedListsIOHook
|
83
|
-
{
|
78
|
+
void InvertedListsIOHook::add_callback(InvertedListsIOHook* cb) {
|
84
79
|
InvertedListsIOHook_table.push_back(cb);
|
85
80
|
}
|
86
81
|
|
87
|
-
void InvertedListsIOHook::print_callbacks()
|
88
|
-
{
|
82
|
+
void InvertedListsIOHook::print_callbacks() {
|
89
83
|
printf("registered %zd InvertedListsIOHooks:\n",
|
90
|
-
|
91
|
-
for(const auto
|
84
|
+
InvertedListsIOHook_table.size());
|
85
|
+
for (const auto& cb : InvertedListsIOHook_table) {
|
92
86
|
printf("%08x %s %s\n",
|
93
87
|
fourcc(cb->key.c_str()),
|
94
88
|
cb->key.c_str(),
|
@@ -96,11 +90,12 @@ void InvertedListsIOHook::print_callbacks()
|
|
96
90
|
}
|
97
91
|
}
|
98
92
|
|
99
|
-
InvertedLists
|
100
|
-
IOReader
|
101
|
-
|
102
|
-
|
103
|
-
|
93
|
+
InvertedLists* InvertedListsIOHook::read_ArrayInvertedLists(
|
94
|
+
IOReader*,
|
95
|
+
int,
|
96
|
+
size_t,
|
97
|
+
size_t,
|
98
|
+
const std::vector<size_t>&) const {
|
104
99
|
FAISS_THROW_FMT("read to array not implemented for %s", classname.c_str());
|
105
100
|
}
|
106
101
|
|
@@ -7,14 +7,12 @@
|
|
7
7
|
|
8
8
|
#pragma once
|
9
9
|
|
10
|
-
#include <string>
|
11
|
-
#include <faiss/invlists/InvertedLists.h>
|
12
10
|
#include <faiss/impl/io.h>
|
13
|
-
|
11
|
+
#include <faiss/invlists/InvertedLists.h>
|
12
|
+
#include <string>
|
14
13
|
|
15
14
|
namespace faiss {
|
16
15
|
|
17
|
-
|
18
16
|
/** Callbacks to handle other types of InvertedList objects.
|
19
17
|
*
|
20
18
|
* The callbacks should be registered with add_callback before calling
|
@@ -26,38 +24,39 @@ namespace faiss {
|
|
26
24
|
* - the class name (as given by typeid.name) at write time
|
27
25
|
*/
|
28
26
|
struct InvertedListsIOHook {
|
29
|
-
const std::string key;
|
27
|
+
const std::string key; ///< string version of the fourcc
|
30
28
|
const std::string classname; ///< typeid.name
|
31
29
|
|
32
|
-
InvertedListsIOHook(const std::string
|
30
|
+
InvertedListsIOHook(const std::string& key, const std::string& classname);
|
33
31
|
|
34
32
|
/// write the index to the IOWriter (including the fourcc)
|
35
|
-
virtual void write(const InvertedLists
|
33
|
+
virtual void write(const InvertedLists* ils, IOWriter* f) const = 0;
|
36
34
|
|
37
35
|
/// called when the fourcc matches this class's fourcc
|
38
|
-
virtual InvertedLists
|
36
|
+
virtual InvertedLists* read(IOReader* f, int io_flags) const = 0;
|
39
37
|
|
40
38
|
/** read from a ArrayInvertedLists into this invertedlist type.
|
41
|
-
* For this to work, the callback has to be enabled and the io_flag has to
|
42
|
-
* IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
|
39
|
+
* For this to work, the callback has to be enabled and the io_flag has to
|
40
|
+
* be set to IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
|
43
41
|
*
|
44
42
|
* (default implementation fails)
|
45
43
|
*/
|
46
|
-
virtual InvertedLists
|
47
|
-
IOReader
|
48
|
-
|
49
|
-
|
44
|
+
virtual InvertedLists* read_ArrayInvertedLists(
|
45
|
+
IOReader* f,
|
46
|
+
int io_flags,
|
47
|
+
size_t nlist,
|
48
|
+
size_t code_size,
|
49
|
+
const std::vector<size_t>& sizes) const;
|
50
50
|
|
51
51
|
virtual ~InvertedListsIOHook() {}
|
52
52
|
|
53
53
|
/**************************** Manage the set of callbacks ******/
|
54
54
|
|
55
55
|
// transfers ownership
|
56
|
-
static void add_callback(InvertedListsIOHook
|
56
|
+
static void add_callback(InvertedListsIOHook*);
|
57
57
|
static void print_callbacks();
|
58
58
|
static InvertedListsIOHook* lookup(int h);
|
59
|
-
static InvertedListsIOHook* lookup_classname(const std::string
|
60
|
-
|
59
|
+
static InvertedListsIOHook* lookup_classname(const std::string& classname);
|
61
60
|
};
|
62
61
|
|
63
|
-
} // namespace faiss
|
62
|
+
} // namespace faiss
|