faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -10,30 +10,27 @@
|
|
|
10
10
|
#ifndef FAISS_INDEX_IVF_H
|
|
11
11
|
#define FAISS_INDEX_IVF_H
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
#include <vector>
|
|
15
|
-
#include <unordered_map>
|
|
16
13
|
#include <stdint.h>
|
|
14
|
+
#include <unordered_map>
|
|
15
|
+
#include <vector>
|
|
17
16
|
|
|
18
|
-
#include <faiss/Index.h>
|
|
19
|
-
#include <faiss/invlists/InvertedLists.h>
|
|
20
|
-
#include <faiss/invlists/DirectMap.h>
|
|
21
17
|
#include <faiss/Clustering.h>
|
|
18
|
+
#include <faiss/Index.h>
|
|
22
19
|
#include <faiss/impl/platform_macros.h>
|
|
20
|
+
#include <faiss/invlists/DirectMap.h>
|
|
21
|
+
#include <faiss/invlists/InvertedLists.h>
|
|
23
22
|
#include <faiss/utils/Heap.h>
|
|
24
23
|
|
|
25
|
-
|
|
26
24
|
namespace faiss {
|
|
27
25
|
|
|
28
|
-
|
|
29
26
|
/** Encapsulates a quantizer object for the IndexIVF
|
|
30
27
|
*
|
|
31
28
|
* The class isolates the fields that are independent of the storage
|
|
32
29
|
* of the lists (especially training)
|
|
33
30
|
*/
|
|
34
31
|
struct Level1Quantizer {
|
|
35
|
-
Index
|
|
36
|
-
size_t nlist;
|
|
32
|
+
Index* quantizer; ///< quantizer that maps vectors to inverted lists
|
|
33
|
+
size_t nlist; ///< number of possible key values
|
|
37
34
|
|
|
38
35
|
/**
|
|
39
36
|
* = 0: use the quantizer as index in a kmeans training
|
|
@@ -41,40 +38,37 @@ struct Level1Quantizer {
|
|
|
41
38
|
* = 2: kmeans training on a flat index + add the centroids to the quantizer
|
|
42
39
|
*/
|
|
43
40
|
char quantizer_trains_alone;
|
|
44
|
-
bool own_fields;
|
|
41
|
+
bool own_fields; ///< whether object owns the quantizer (false by default)
|
|
45
42
|
|
|
46
43
|
ClusteringParameters cp; ///< to override default clustering params
|
|
47
|
-
Index
|
|
44
|
+
Index* clustering_index; ///< to override index used during clustering
|
|
48
45
|
|
|
49
46
|
/// Trains the quantizer and calls train_residual to train sub-quantizers
|
|
50
|
-
void train_q1
|
|
51
|
-
|
|
52
|
-
|
|
47
|
+
void train_q1(
|
|
48
|
+
size_t n,
|
|
49
|
+
const float* x,
|
|
50
|
+
bool verbose,
|
|
51
|
+
MetricType metric_type);
|
|
53
52
|
|
|
54
53
|
/// compute the number of bytes required to store list ids
|
|
55
|
-
size_t coarse_code_size
|
|
56
|
-
void encode_listno
|
|
57
|
-
Index::idx_t decode_listno
|
|
58
|
-
|
|
59
|
-
Level1Quantizer (Index * quantizer, size_t nlist);
|
|
54
|
+
size_t coarse_code_size() const;
|
|
55
|
+
void encode_listno(Index::idx_t list_no, uint8_t* code) const;
|
|
56
|
+
Index::idx_t decode_listno(const uint8_t* code) const;
|
|
60
57
|
|
|
61
|
-
Level1Quantizer
|
|
58
|
+
Level1Quantizer(Index* quantizer, size_t nlist);
|
|
62
59
|
|
|
63
|
-
|
|
60
|
+
Level1Quantizer();
|
|
64
61
|
|
|
62
|
+
~Level1Quantizer();
|
|
65
63
|
};
|
|
66
64
|
|
|
67
|
-
|
|
68
|
-
|
|
69
65
|
struct IVFSearchParameters {
|
|
70
|
-
size_t nprobe;
|
|
71
|
-
size_t max_codes;
|
|
72
|
-
IVFSearchParameters(): nprobe(1), max_codes(0) {}
|
|
73
|
-
virtual ~IVFSearchParameters
|
|
66
|
+
size_t nprobe; ///< number of probes at query time
|
|
67
|
+
size_t max_codes; ///< max nb of codes to visit to do a query
|
|
68
|
+
IVFSearchParameters() : nprobe(1), max_codes(0) {}
|
|
69
|
+
virtual ~IVFSearchParameters() {}
|
|
74
70
|
};
|
|
75
71
|
|
|
76
|
-
|
|
77
|
-
|
|
78
72
|
struct InvertedListScanner;
|
|
79
73
|
struct IndexIVFStats;
|
|
80
74
|
|
|
@@ -98,15 +92,15 @@ struct IndexIVFStats;
|
|
|
98
92
|
* Sub-classes implement a post-filtering of the index that refines
|
|
99
93
|
* the distance estimation from the query to databse vectors.
|
|
100
94
|
*/
|
|
101
|
-
struct IndexIVF: Index, Level1Quantizer {
|
|
95
|
+
struct IndexIVF : Index, Level1Quantizer {
|
|
102
96
|
/// Access to the actual data
|
|
103
|
-
InvertedLists
|
|
97
|
+
InvertedLists* invlists;
|
|
104
98
|
bool own_invlists;
|
|
105
99
|
|
|
106
|
-
size_t code_size;
|
|
100
|
+
size_t code_size; ///< code size per vector in bytes
|
|
107
101
|
|
|
108
|
-
size_t nprobe;
|
|
109
|
-
size_t max_codes;
|
|
102
|
+
size_t nprobe; ///< number of probes at query time
|
|
103
|
+
size_t max_codes; ///< max nb of codes to visit to do a query
|
|
110
104
|
|
|
111
105
|
/** Parallel mode determines how queries are parallelized with OpenMP
|
|
112
106
|
*
|
|
@@ -127,12 +121,14 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
127
121
|
|
|
128
122
|
/** The Inverted file takes a quantizer (an Index) on input,
|
|
129
123
|
* which implements the function mapping a vector to a list
|
|
130
|
-
* identifier.
|
|
131
|
-
* be deleted while the IndexIVF is in use.
|
|
124
|
+
* identifier.
|
|
132
125
|
*/
|
|
133
|
-
IndexIVF
|
|
134
|
-
|
|
135
|
-
|
|
126
|
+
IndexIVF(
|
|
127
|
+
Index* quantizer,
|
|
128
|
+
size_t d,
|
|
129
|
+
size_t nlist,
|
|
130
|
+
size_t code_size,
|
|
131
|
+
MetricType metric = METRIC_L2);
|
|
136
132
|
|
|
137
133
|
void reset() override;
|
|
138
134
|
|
|
@@ -145,6 +141,19 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
145
141
|
/// default implementation that calls encode_vectors
|
|
146
142
|
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
147
143
|
|
|
144
|
+
/** Implementation of vector addition where the vector assignments are
|
|
145
|
+
* predefined. The default implementation hands over the code extraction to
|
|
146
|
+
* encode_vectors.
|
|
147
|
+
*
|
|
148
|
+
* @param precomputed_idx quantization indices for the input vectors
|
|
149
|
+
* (size n)
|
|
150
|
+
*/
|
|
151
|
+
virtual void add_core(
|
|
152
|
+
idx_t n,
|
|
153
|
+
const float* x,
|
|
154
|
+
const idx_t* xids,
|
|
155
|
+
const idx_t* precomputed_idx);
|
|
156
|
+
|
|
148
157
|
/** Encodes a set of vectors as they would appear in the inverted lists
|
|
149
158
|
*
|
|
150
159
|
* @param list_nos inverted list ids as returned by the
|
|
@@ -154,14 +163,23 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
154
163
|
* include the list ids in the code (in this case add
|
|
155
164
|
* ceil(log8(nlist)) to the code size)
|
|
156
165
|
*/
|
|
157
|
-
virtual void encode_vectors(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
166
|
+
virtual void encode_vectors(
|
|
167
|
+
idx_t n,
|
|
168
|
+
const float* x,
|
|
169
|
+
const idx_t* list_nos,
|
|
170
|
+
uint8_t* codes,
|
|
171
|
+
bool include_listno = false) const = 0;
|
|
172
|
+
|
|
173
|
+
/** Add vectors that are computed with the standalone codec
|
|
174
|
+
*
|
|
175
|
+
* @param codes codes to add size n * sa_code_size()
|
|
176
|
+
* @param xids corresponding ids, size n
|
|
177
|
+
*/
|
|
178
|
+
void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
|
|
161
179
|
|
|
162
180
|
/// Sub-classes that encode the residuals can train their encoders here
|
|
163
181
|
/// does nothing by default
|
|
164
|
-
virtual void train_residual
|
|
182
|
+
virtual void train_residual(idx_t n, const float* x);
|
|
165
183
|
|
|
166
184
|
/** search a set of vectors, that are pre-quantized by the IVF
|
|
167
185
|
* quantizer. Fill in the corresponding heaps with the query
|
|
@@ -182,36 +200,53 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
182
200
|
* @param params used to override the object's search parameters
|
|
183
201
|
* @param stats search stats to be updated (can be null)
|
|
184
202
|
*/
|
|
185
|
-
virtual void search_preassigned
|
|
186
|
-
idx_t n,
|
|
187
|
-
const
|
|
188
|
-
|
|
203
|
+
virtual void search_preassigned(
|
|
204
|
+
idx_t n,
|
|
205
|
+
const float* x,
|
|
206
|
+
idx_t k,
|
|
207
|
+
const idx_t* assign,
|
|
208
|
+
const float* centroid_dis,
|
|
209
|
+
float* distances,
|
|
210
|
+
idx_t* labels,
|
|
189
211
|
bool store_pairs,
|
|
190
|
-
const IVFSearchParameters
|
|
191
|
-
IndexIVFStats
|
|
192
|
-
) const;
|
|
212
|
+
const IVFSearchParameters* params = nullptr,
|
|
213
|
+
IndexIVFStats* stats = nullptr) const;
|
|
193
214
|
|
|
194
215
|
/** assign the vectors, then call search_preassign */
|
|
195
|
-
void search
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
216
|
+
void search(
|
|
217
|
+
idx_t n,
|
|
218
|
+
const float* x,
|
|
219
|
+
idx_t k,
|
|
220
|
+
float* distances,
|
|
221
|
+
idx_t* labels) const override;
|
|
222
|
+
|
|
223
|
+
void range_search(
|
|
224
|
+
idx_t n,
|
|
225
|
+
const float* x,
|
|
226
|
+
float radius,
|
|
227
|
+
RangeSearchResult* result) const override;
|
|
200
228
|
|
|
201
229
|
void range_search_preassigned(
|
|
202
|
-
idx_t nx,
|
|
203
|
-
const
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
const
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
230
|
+
idx_t nx,
|
|
231
|
+
const float* x,
|
|
232
|
+
float radius,
|
|
233
|
+
const idx_t* keys,
|
|
234
|
+
const float* coarse_dis,
|
|
235
|
+
RangeSearchResult* result,
|
|
236
|
+
bool store_pairs = false,
|
|
237
|
+
const IVFSearchParameters* params = nullptr,
|
|
238
|
+
IndexIVFStats* stats = nullptr) const;
|
|
239
|
+
|
|
240
|
+
/** Get a scanner for this index (store_pairs means ignore labels)
|
|
241
|
+
*
|
|
242
|
+
* The default search implementation uses this to compute the distances
|
|
243
|
+
*/
|
|
244
|
+
virtual InvertedListScanner* get_InvertedListScanner(
|
|
245
|
+
bool store_pairs = false) const;
|
|
212
246
|
|
|
213
|
-
/** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2
|
|
214
|
-
|
|
247
|
+
/** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2
|
|
248
|
+
*/
|
|
249
|
+
void reconstruct(idx_t key, float* recons) const override;
|
|
215
250
|
|
|
216
251
|
/** Update a subset of vectors.
|
|
217
252
|
*
|
|
@@ -221,7 +256,7 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
221
256
|
* @param idx vector indices to update, size nv
|
|
222
257
|
* @param v vectors of new values, size nv*d
|
|
223
258
|
*/
|
|
224
|
-
virtual void update_vectors
|
|
259
|
+
virtual void update_vectors(int nv, const idx_t* idx, const float* v);
|
|
225
260
|
|
|
226
261
|
/** Reconstruct a subset of the indexed vectors.
|
|
227
262
|
*
|
|
@@ -243,9 +278,13 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
243
278
|
*
|
|
244
279
|
* @param recons reconstructed vectors size (n, k, d)
|
|
245
280
|
*/
|
|
246
|
-
void search_and_reconstruct
|
|
247
|
-
|
|
248
|
-
|
|
281
|
+
void search_and_reconstruct(
|
|
282
|
+
idx_t n,
|
|
283
|
+
const float* x,
|
|
284
|
+
idx_t k,
|
|
285
|
+
float* distances,
|
|
286
|
+
idx_t* labels,
|
|
287
|
+
float* recons) const override;
|
|
249
288
|
|
|
250
289
|
/** Reconstruct a vector given the location in terms of (inv list index +
|
|
251
290
|
* inv list offset) instead of the id.
|
|
@@ -254,9 +293,10 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
254
293
|
* the inv list offset is computed by search_preassigned() with
|
|
255
294
|
* `store_pairs` set.
|
|
256
295
|
*/
|
|
257
|
-
virtual void reconstruct_from_offset
|
|
258
|
-
|
|
259
|
-
|
|
296
|
+
virtual void reconstruct_from_offset(
|
|
297
|
+
int64_t list_no,
|
|
298
|
+
int64_t offset,
|
|
299
|
+
float* recons) const;
|
|
260
300
|
|
|
261
301
|
/// Dataset manipulation functions
|
|
262
302
|
|
|
@@ -265,12 +305,12 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
265
305
|
/** check that the two indexes are compatible (ie, they are
|
|
266
306
|
* trained in the same way and have the same
|
|
267
307
|
* parameters). Otherwise throw. */
|
|
268
|
-
void check_compatible_for_merge
|
|
308
|
+
void check_compatible_for_merge(const IndexIVF& other) const;
|
|
269
309
|
|
|
270
310
|
/** moves the entries from another dataset to self. On output,
|
|
271
311
|
* other is empty. add_id is added to all moved ids (for
|
|
272
312
|
* sequential ids, this would be this->ntotal */
|
|
273
|
-
virtual void merge_from
|
|
313
|
+
virtual void merge_from(IndexIVF& other, idx_t add_id);
|
|
274
314
|
|
|
275
315
|
/** copy a subset of the entries index to the other index
|
|
276
316
|
*
|
|
@@ -279,34 +319,36 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
|
279
319
|
* if subset_type == 2: copies inverted lists such that a1
|
|
280
320
|
* elements are left before and a2 elements are after
|
|
281
321
|
*/
|
|
282
|
-
virtual void copy_subset_to
|
|
283
|
-
|
|
322
|
+
virtual void copy_subset_to(
|
|
323
|
+
IndexIVF& other,
|
|
324
|
+
int subset_type,
|
|
325
|
+
idx_t a1,
|
|
326
|
+
idx_t a2) const;
|
|
284
327
|
|
|
285
328
|
~IndexIVF() override;
|
|
286
329
|
|
|
287
|
-
size_t get_list_size
|
|
288
|
-
|
|
330
|
+
size_t get_list_size(size_t list_no) const {
|
|
331
|
+
return invlists->list_size(list_no);
|
|
332
|
+
}
|
|
289
333
|
|
|
290
334
|
/** intialize a direct map
|
|
291
335
|
*
|
|
292
336
|
* @param new_maintain_direct_map if true, create a direct map,
|
|
293
337
|
* else clear it
|
|
294
338
|
*/
|
|
295
|
-
void make_direct_map
|
|
296
|
-
|
|
297
|
-
void set_direct_map_type (DirectMap::Type type);
|
|
339
|
+
void make_direct_map(bool new_maintain_direct_map = true);
|
|
298
340
|
|
|
341
|
+
void set_direct_map_type(DirectMap::Type type);
|
|
299
342
|
|
|
300
343
|
/// replace the inverted lists, old one is deallocated if own_invlists
|
|
301
|
-
void replace_invlists
|
|
344
|
+
void replace_invlists(InvertedLists* il, bool own = false);
|
|
302
345
|
|
|
303
346
|
/* The standalone codec interface (except sa_decode that is specific) */
|
|
304
|
-
size_t sa_code_size
|
|
347
|
+
size_t sa_code_size() const override;
|
|
305
348
|
|
|
306
|
-
void sa_encode
|
|
307
|
-
uint8_t *bytes) const override;
|
|
349
|
+
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
|
|
308
350
|
|
|
309
|
-
IndexIVF
|
|
351
|
+
IndexIVF();
|
|
310
352
|
};
|
|
311
353
|
|
|
312
354
|
struct RangeQueryResult;
|
|
@@ -316,20 +358,28 @@ struct RangeQueryResult;
|
|
|
316
358
|
* distance_to_code and scan_codes can be called in multiple
|
|
317
359
|
* threads */
|
|
318
360
|
struct InvertedListScanner {
|
|
319
|
-
|
|
320
361
|
using idx_t = Index::idx_t;
|
|
321
362
|
|
|
363
|
+
idx_t list_no = -1; ///< remember current list
|
|
364
|
+
bool keep_max = false; ///< keep maximum instead of minimum
|
|
365
|
+
/// store positions in invlists rather than labels
|
|
366
|
+
bool store_pairs = false;
|
|
367
|
+
|
|
368
|
+
/// used in default implementation of scan_codes
|
|
369
|
+
size_t code_size = 0;
|
|
370
|
+
|
|
322
371
|
/// from now on we handle this query.
|
|
323
|
-
virtual void set_query
|
|
372
|
+
virtual void set_query(const float* query_vector) = 0;
|
|
324
373
|
|
|
325
374
|
/// following codes come from this inverted list
|
|
326
|
-
virtual void set_list
|
|
375
|
+
virtual void set_list(idx_t list_no, float coarse_dis) = 0;
|
|
327
376
|
|
|
328
377
|
/// compute a single query-to-code distance
|
|
329
|
-
virtual float distance_to_code
|
|
378
|
+
virtual float distance_to_code(const uint8_t* code) const = 0;
|
|
330
379
|
|
|
331
380
|
/** scan a set of codes, compute distances to current query and
|
|
332
|
-
* update heap of results if necessary.
|
|
381
|
+
* update heap of results if necessary. Default implemetation
|
|
382
|
+
* calls distance_to_code.
|
|
333
383
|
*
|
|
334
384
|
* @param n number of codes to scan
|
|
335
385
|
* @param codes codes to scan (n * code_size)
|
|
@@ -339,45 +389,46 @@ struct InvertedListScanner {
|
|
|
339
389
|
* @param k heap size
|
|
340
390
|
* @return number of heap updates performed
|
|
341
391
|
*/
|
|
342
|
-
virtual size_t scan_codes
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
392
|
+
virtual size_t scan_codes(
|
|
393
|
+
size_t n,
|
|
394
|
+
const uint8_t* codes,
|
|
395
|
+
const idx_t* ids,
|
|
396
|
+
float* distances,
|
|
397
|
+
idx_t* labels,
|
|
398
|
+
size_t k) const;
|
|
347
399
|
|
|
348
400
|
/** scan a set of codes, compute distances to current query and
|
|
349
401
|
* update results if distances are below radius
|
|
350
402
|
*
|
|
351
403
|
* (default implementation fails) */
|
|
352
|
-
virtual void scan_codes_range
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
404
|
+
virtual void scan_codes_range(
|
|
405
|
+
size_t n,
|
|
406
|
+
const uint8_t* codes,
|
|
407
|
+
const idx_t* ids,
|
|
408
|
+
float radius,
|
|
409
|
+
RangeQueryResult& result) const;
|
|
410
|
+
|
|
411
|
+
virtual ~InvertedListScanner() {}
|
|
360
412
|
};
|
|
361
413
|
|
|
362
|
-
|
|
363
414
|
struct IndexIVFStats {
|
|
364
|
-
size_t nq;
|
|
365
|
-
size_t nlist;
|
|
366
|
-
size_t ndis;
|
|
367
|
-
size_t nheap_updates;
|
|
415
|
+
size_t nq; // nb of queries run
|
|
416
|
+
size_t nlist; // nb of inverted lists scanned
|
|
417
|
+
size_t ndis; // nb of distances computed
|
|
418
|
+
size_t nheap_updates; // nb of times the heap was updated
|
|
368
419
|
double quantization_time; // time spent quantizing vectors (in ms)
|
|
369
420
|
double search_time; // time spent searching lists (in ms)
|
|
370
421
|
|
|
371
|
-
IndexIVFStats
|
|
372
|
-
|
|
373
|
-
|
|
422
|
+
IndexIVFStats() {
|
|
423
|
+
reset();
|
|
424
|
+
}
|
|
425
|
+
void reset();
|
|
426
|
+
void add(const IndexIVFStats& other);
|
|
374
427
|
};
|
|
375
428
|
|
|
376
429
|
// global var that collects them all
|
|
377
430
|
FAISS_API extern IndexIVFStats indexIVF_stats;
|
|
378
431
|
|
|
379
|
-
|
|
380
432
|
} // namespace faiss
|
|
381
433
|
|
|
382
|
-
|
|
383
434
|
#endif
|