faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
data/vendor/faiss/faiss/IVFlib.h
CHANGED
@@ -16,35 +16,34 @@
|
|
16
16
|
* IndexIVFs embedded within an IndexPreTransform.
|
17
17
|
*/
|
18
18
|
|
19
|
-
#include <vector>
|
20
19
|
#include <faiss/IndexIVF.h>
|
20
|
+
#include <vector>
|
21
21
|
|
22
|
-
namespace faiss {
|
23
|
-
|
22
|
+
namespace faiss {
|
23
|
+
namespace ivflib {
|
24
24
|
|
25
25
|
/** check if two indexes have the same parameters and are trained in
|
26
26
|
* the same way, otherwise throw. */
|
27
|
-
void check_compatible_for_merge
|
28
|
-
const Index * index2);
|
27
|
+
void check_compatible_for_merge(const Index* index1, const Index* index2);
|
29
28
|
|
30
29
|
/** get an IndexIVF from an index. The index may be an IndexIVF or
|
31
30
|
* some wrapper class that encloses an IndexIVF
|
32
31
|
*
|
33
32
|
* throws an exception if this is not the case.
|
34
33
|
*/
|
35
|
-
const IndexIVF
|
36
|
-
IndexIVF
|
34
|
+
const IndexIVF* extract_index_ivf(const Index* index);
|
35
|
+
IndexIVF* extract_index_ivf(Index* index);
|
37
36
|
|
38
37
|
/// same as above but returns nullptr instead of throwing on failure
|
39
|
-
const IndexIVF
|
40
|
-
IndexIVF
|
38
|
+
const IndexIVF* try_extract_index_ivf(const Index* index);
|
39
|
+
IndexIVF* try_extract_index_ivf(Index* index);
|
41
40
|
|
42
41
|
/** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
|
43
42
|
* embedded in a IndexPreTransform. On output, the index1 is empty.
|
44
43
|
*
|
45
44
|
* @param shift_ids: translate the ids from index1 to index0->prev_ntotal
|
46
45
|
*/
|
47
|
-
void merge_into(Index
|
46
|
+
void merge_into(Index* index0, Index* index1, bool shift_ids);
|
48
47
|
|
49
48
|
typedef Index::idx_t idx_t;
|
50
49
|
|
@@ -57,9 +56,7 @@ typedef Index::idx_t idx_t;
|
|
57
56
|
* @param centroid_ids
|
58
57
|
* cluster id each object belongs to, size num_objects
|
59
58
|
*/
|
60
|
-
void search_centroid(Index
|
61
|
-
const float* x, int n,
|
62
|
-
idx_t* centroid_ids);
|
59
|
+
void search_centroid(Index* index, const float* x, int n, idx_t* centroid_ids);
|
63
60
|
|
64
61
|
/* Returns the cluster the embeddings belong to.
|
65
62
|
*
|
@@ -71,25 +68,25 @@ void search_centroid(Index *index,
|
|
71
68
|
* centroid ids corresponding to the results (size n * k)
|
72
69
|
* other arguments are the same as the standard search function
|
73
70
|
*/
|
74
|
-
void search_and_return_centroids(
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
71
|
+
void search_and_return_centroids(
|
72
|
+
Index* index,
|
73
|
+
size_t n,
|
74
|
+
const float* xin,
|
75
|
+
long k,
|
76
|
+
float* distances,
|
77
|
+
idx_t* labels,
|
78
|
+
idx_t* query_centroid_ids,
|
79
|
+
idx_t* result_centroid_ids);
|
83
80
|
|
84
81
|
/** A set of IndexIVFs concatenated together in a FIFO fashion.
|
85
82
|
* at each "step", the oldest index slice is removed and a new index is added.
|
86
83
|
*/
|
87
84
|
struct SlidingIndexWindow {
|
88
85
|
/// common index that contains the sliding window
|
89
|
-
Index
|
86
|
+
Index* index;
|
90
87
|
|
91
88
|
/// InvertedLists of index
|
92
|
-
ArrayInvertedLists
|
89
|
+
ArrayInvertedLists* ils;
|
93
90
|
|
94
91
|
/// number of slices currently in index
|
95
92
|
int n_slice;
|
@@ -98,27 +95,23 @@ struct SlidingIndexWindow {
|
|
98
95
|
size_t nlist;
|
99
96
|
|
100
97
|
/// cumulative list sizes at each slice
|
101
|
-
std::vector<std::vector<size_t
|
98
|
+
std::vector<std::vector<size_t>> sizes;
|
102
99
|
|
103
100
|
/// index should be initially empty and trained
|
104
|
-
SlidingIndexWindow
|
101
|
+
SlidingIndexWindow(Index* index);
|
105
102
|
|
106
103
|
/** Add one index to the current index and remove the oldest one.
|
107
104
|
*
|
108
105
|
* @param sub_index slice to swap in (can be NULL)
|
109
106
|
* @param remove_oldest if true, remove the oldest slices */
|
110
|
-
void step(const Index
|
111
|
-
|
107
|
+
void step(const Index* sub_index, bool remove_oldest);
|
112
108
|
};
|
113
109
|
|
114
|
-
|
115
110
|
/// Get a subset of inverted lists [i0, i1)
|
116
|
-
ArrayInvertedLists
|
117
|
-
long i0, long i1);
|
111
|
+
ArrayInvertedLists* get_invlist_range(const Index* index, long i0, long i1);
|
118
112
|
|
119
113
|
/// Set a subset of inverted lists
|
120
|
-
void set_invlist_range
|
121
|
-
ArrayInvertedLists * src);
|
114
|
+
void set_invlist_range(Index* index, long i0, long i1, ArrayInvertedLists* src);
|
122
115
|
|
123
116
|
/** search an IndexIVF, possibly embedded in an IndexPreTransform with
|
124
117
|
* given parameters. This is a way to set the nprobe and get
|
@@ -130,25 +123,29 @@ void set_invlist_range (Index *index, long i0, long i1,
|
|
130
123
|
* [1]: coarse quantization,
|
131
124
|
* [2]: list scanning
|
132
125
|
*/
|
133
|
-
void search_with_parameters
|
134
|
-
const Index
|
135
|
-
idx_t n,
|
136
|
-
float
|
137
|
-
|
138
|
-
|
139
|
-
|
126
|
+
void search_with_parameters(
|
127
|
+
const Index* index,
|
128
|
+
idx_t n,
|
129
|
+
const float* x,
|
130
|
+
idx_t k,
|
131
|
+
float* distances,
|
132
|
+
idx_t* labels,
|
133
|
+
const IVFSearchParameters* params,
|
134
|
+
size_t* nb_dis = nullptr,
|
135
|
+
double* ms_per_stage = nullptr);
|
140
136
|
|
141
137
|
/** same as search_with_parameters but for range search */
|
142
|
-
void range_search_with_parameters
|
143
|
-
const Index
|
144
|
-
idx_t n,
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
}
|
138
|
+
void range_search_with_parameters(
|
139
|
+
const Index* index,
|
140
|
+
idx_t n,
|
141
|
+
const float* x,
|
142
|
+
float radius,
|
143
|
+
RangeSearchResult* result,
|
144
|
+
const IVFSearchParameters* params,
|
145
|
+
size_t* nb_dis = nullptr,
|
146
|
+
double* ms_per_stage = nullptr);
|
147
|
+
|
148
|
+
} // namespace ivflib
|
149
|
+
} // namespace faiss
|
153
150
|
|
154
151
|
#endif
|
@@ -15,156 +15,138 @@
|
|
15
15
|
|
16
16
|
#include <cstring>
|
17
17
|
|
18
|
-
|
19
18
|
namespace faiss {
|
20
19
|
|
21
|
-
Index::~Index
|
22
|
-
{
|
23
|
-
}
|
24
|
-
|
20
|
+
Index::~Index() {}
|
25
21
|
|
26
22
|
void Index::train(idx_t /*n*/, const float* /*x*/) {
|
27
23
|
// does nothing by default
|
28
24
|
}
|
29
25
|
|
30
|
-
|
31
|
-
|
32
|
-
RangeSearchResult *) const
|
33
|
-
{
|
34
|
-
FAISS_THROW_MSG ("range search not implemented");
|
26
|
+
void Index::range_search(idx_t, const float*, float, RangeSearchResult*) const {
|
27
|
+
FAISS_THROW_MSG("range search not implemented");
|
35
28
|
}
|
36
29
|
|
37
|
-
void Index::assign
|
38
|
-
|
39
|
-
|
40
|
-
search (n, x, k, distances.data(), labels);
|
30
|
+
void Index::assign(idx_t n, const float* x, idx_t* labels, idx_t k) const {
|
31
|
+
std::vector<float> distances(n * k);
|
32
|
+
search(n, x, k, distances.data(), labels);
|
41
33
|
}
|
42
34
|
|
43
35
|
void Index::add_with_ids(
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
idx_t /*n*/,
|
37
|
+
const float* /*x*/,
|
38
|
+
const idx_t* /*xids*/) {
|
39
|
+
FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
|
48
40
|
}
|
49
41
|
|
50
42
|
size_t Index::remove_ids(const IDSelector& /*sel*/) {
|
51
|
-
|
52
|
-
|
43
|
+
FAISS_THROW_MSG("remove_ids not implemented for this type of index");
|
44
|
+
return -1;
|
53
45
|
}
|
54
46
|
|
55
|
-
|
56
|
-
|
57
|
-
FAISS_THROW_MSG ("reconstruct not implemented for this type of index");
|
47
|
+
void Index::reconstruct(idx_t, float*) const {
|
48
|
+
FAISS_THROW_MSG("reconstruct not implemented for this type of index");
|
58
49
|
}
|
59
50
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
}
|
51
|
+
void Index::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
|
52
|
+
for (idx_t i = 0; i < ni; i++) {
|
53
|
+
reconstruct(i0 + i, recons + i * d);
|
54
|
+
}
|
65
55
|
}
|
66
56
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
57
|
+
void Index::search_and_reconstruct(
|
58
|
+
idx_t n,
|
59
|
+
const float* x,
|
60
|
+
idx_t k,
|
61
|
+
float* distances,
|
62
|
+
idx_t* labels,
|
63
|
+
float* recons) const {
|
64
|
+
FAISS_THROW_IF_NOT(k > 0);
|
65
|
+
|
66
|
+
search(n, x, k, distances, labels);
|
67
|
+
for (idx_t i = 0; i < n; ++i) {
|
68
|
+
for (idx_t j = 0; j < k; ++j) {
|
69
|
+
idx_t ij = i * k + j;
|
70
|
+
idx_t key = labels[ij];
|
71
|
+
float* reconstructed = recons + ij * d;
|
72
|
+
if (key < 0) {
|
73
|
+
// Fill with NaNs
|
74
|
+
memset(reconstructed, -1, sizeof(*reconstructed) * d);
|
75
|
+
} else {
|
76
|
+
reconstruct(key, reconstructed);
|
77
|
+
}
|
78
|
+
}
|
83
79
|
}
|
84
|
-
}
|
85
80
|
}
|
86
81
|
|
87
|
-
void Index::compute_residual
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
}
|
82
|
+
void Index::compute_residual(const float* x, float* residual, idx_t key) const {
|
83
|
+
reconstruct(key, residual);
|
84
|
+
for (size_t i = 0; i < d; i++) {
|
85
|
+
residual[i] = x[i] - residual[i];
|
86
|
+
}
|
93
87
|
}
|
94
88
|
|
95
|
-
void Index::compute_residual_n
|
96
|
-
|
97
|
-
|
89
|
+
void Index::compute_residual_n(
|
90
|
+
idx_t n,
|
91
|
+
const float* xs,
|
92
|
+
float* residuals,
|
93
|
+
const idx_t* keys) const {
|
98
94
|
#pragma omp parallel for
|
99
|
-
|
100
|
-
|
101
|
-
|
95
|
+
for (idx_t i = 0; i < n; ++i) {
|
96
|
+
compute_residual(&xs[i * d], &residuals[i * d], keys[i]);
|
97
|
+
}
|
102
98
|
}
|
103
99
|
|
104
|
-
|
105
|
-
|
106
|
-
size_t Index::sa_code_size () const
|
107
|
-
{
|
108
|
-
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
|
100
|
+
size_t Index::sa_code_size() const {
|
101
|
+
FAISS_THROW_MSG("standalone codec not implemented for this type of index");
|
109
102
|
}
|
110
103
|
|
111
|
-
void Index::sa_encode
|
112
|
-
|
113
|
-
{
|
114
|
-
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
|
104
|
+
void Index::sa_encode(idx_t, const float*, uint8_t*) const {
|
105
|
+
FAISS_THROW_MSG("standalone codec not implemented for this type of index");
|
115
106
|
}
|
116
107
|
|
117
|
-
void Index::sa_decode
|
118
|
-
|
119
|
-
{
|
120
|
-
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
|
108
|
+
void Index::sa_decode(idx_t, const uint8_t*, float*) const {
|
109
|
+
FAISS_THROW_MSG("standalone codec not implemented for this type of index");
|
121
110
|
}
|
122
111
|
|
123
|
-
|
124
112
|
namespace {
|
125
113
|
|
126
|
-
|
127
114
|
// storage that explicitly reconstructs vectors before computing distances
|
128
115
|
struct GenericDistanceComputer : DistanceComputer {
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
}
|
139
|
-
|
140
|
-
float operator () (idx_t i) override {
|
141
|
-
storage.reconstruct(i, buf.data());
|
142
|
-
return fvec_L2sqr(q, buf.data(), d);
|
143
|
-
}
|
144
|
-
|
145
|
-
float symmetric_dis(idx_t i, idx_t j) override {
|
146
|
-
storage.reconstruct(i, buf.data());
|
147
|
-
storage.reconstruct(j, buf.data() + d);
|
148
|
-
return fvec_L2sqr(buf.data() + d, buf.data(), d);
|
149
|
-
}
|
150
|
-
|
151
|
-
void set_query(const float *x) override {
|
152
|
-
q = x;
|
153
|
-
}
|
116
|
+
size_t d;
|
117
|
+
const Index& storage;
|
118
|
+
std::vector<float> buf;
|
119
|
+
const float* q;
|
120
|
+
|
121
|
+
explicit GenericDistanceComputer(const Index& storage) : storage(storage) {
|
122
|
+
d = storage.d;
|
123
|
+
buf.resize(d * 2);
|
124
|
+
}
|
154
125
|
|
155
|
-
|
126
|
+
float operator()(idx_t i) override {
|
127
|
+
storage.reconstruct(i, buf.data());
|
128
|
+
return fvec_L2sqr(q, buf.data(), d);
|
129
|
+
}
|
156
130
|
|
131
|
+
float symmetric_dis(idx_t i, idx_t j) override {
|
132
|
+
storage.reconstruct(i, buf.data());
|
133
|
+
storage.reconstruct(j, buf.data() + d);
|
134
|
+
return fvec_L2sqr(buf.data() + d, buf.data(), d);
|
135
|
+
}
|
157
136
|
|
158
|
-
|
137
|
+
void set_query(const float* x) override {
|
138
|
+
q = x;
|
139
|
+
}
|
140
|
+
};
|
159
141
|
|
142
|
+
} // namespace
|
160
143
|
|
161
|
-
DistanceComputer
|
144
|
+
DistanceComputer* Index::get_distance_computer() const {
|
162
145
|
if (metric_type == METRIC_L2) {
|
163
146
|
return new GenericDistanceComputer(*this);
|
164
147
|
} else {
|
165
|
-
FAISS_THROW_MSG
|
148
|
+
FAISS_THROW_MSG("get_distance_computer() not implemented");
|
166
149
|
}
|
167
150
|
}
|
168
151
|
|
169
|
-
|
170
|
-
}
|
152
|
+
} // namespace faiss
|
data/vendor/faiss/faiss/Index.h
CHANGED
@@ -12,13 +12,13 @@
|
|
12
12
|
|
13
13
|
#include <faiss/MetricType.h>
|
14
14
|
#include <cstdio>
|
15
|
-
#include <typeinfo>
|
16
|
-
#include <string>
|
17
15
|
#include <sstream>
|
16
|
+
#include <string>
|
17
|
+
#include <typeinfo>
|
18
18
|
|
19
19
|
#define FAISS_VERSION_MAJOR 1
|
20
20
|
#define FAISS_VERSION_MINOR 7
|
21
|
-
#define FAISS_VERSION_PATCH
|
21
|
+
#define FAISS_VERSION_PATCH 1
|
22
22
|
|
23
23
|
/**
|
24
24
|
* @namespace faiss
|
@@ -36,7 +36,6 @@
|
|
36
36
|
* an n*d matrix, which implies a row-major storage.
|
37
37
|
*/
|
38
38
|
|
39
|
-
|
40
39
|
namespace faiss {
|
41
40
|
|
42
41
|
/// Forward declarations see AuxIndexStructures.h
|
@@ -50,13 +49,13 @@ struct DistanceComputer;
|
|
50
49
|
* although the internal representation may vary.
|
51
50
|
*/
|
52
51
|
struct Index {
|
53
|
-
using idx_t = int64_t;
|
52
|
+
using idx_t = int64_t; ///< all indices are this type
|
54
53
|
using component_t = float;
|
55
54
|
using distance_t = float;
|
56
55
|
|
57
|
-
int d;
|
58
|
-
idx_t ntotal;
|
59
|
-
bool verbose;
|
56
|
+
int d; ///< vector dimension
|
57
|
+
idx_t ntotal; ///< total nb of indexed vectors
|
58
|
+
bool verbose; ///< verbosity level
|
60
59
|
|
61
60
|
/// set if the Index does not require training, or if training is
|
62
61
|
/// done already
|
@@ -64,18 +63,17 @@ struct Index {
|
|
64
63
|
|
65
64
|
/// type of metric this index uses for search
|
66
65
|
MetricType metric_type;
|
67
|
-
float metric_arg;
|
66
|
+
float metric_arg; ///< argument of the metric type
|
68
67
|
|
69
|
-
explicit Index
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
virtual ~Index ();
|
68
|
+
explicit Index(idx_t d = 0, MetricType metric = METRIC_L2)
|
69
|
+
: d(d),
|
70
|
+
ntotal(0),
|
71
|
+
verbose(false),
|
72
|
+
is_trained(true),
|
73
|
+
metric_type(metric),
|
74
|
+
metric_arg(0) {}
|
78
75
|
|
76
|
+
virtual ~Index();
|
79
77
|
|
80
78
|
/** Perform training on a representative set of vectors
|
81
79
|
*
|
@@ -87,11 +85,11 @@ struct Index {
|
|
87
85
|
/** Add n vectors of dimension d to the index.
|
88
86
|
*
|
89
87
|
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
|
90
|
-
* This function slices the input vectors in
|
88
|
+
* This function slices the input vectors in chunks smaller than
|
91
89
|
* blocksize_add and calls add_core.
|
92
90
|
* @param x input matrix, size n * d
|
93
91
|
*/
|
94
|
-
virtual void add
|
92
|
+
virtual void add(idx_t n, const float* x) = 0;
|
95
93
|
|
96
94
|
/** Same as add, but stores xids instead of sequential ids.
|
97
95
|
*
|
@@ -100,7 +98,7 @@ struct Index {
|
|
100
98
|
*
|
101
99
|
* @param xids if non-null, ids to store for the vectors (size n)
|
102
100
|
*/
|
103
|
-
virtual void add_with_ids
|
101
|
+
virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
|
104
102
|
|
105
103
|
/** query n vectors of dimension d to the index.
|
106
104
|
*
|
@@ -111,8 +109,12 @@ struct Index {
|
|
111
109
|
* @param labels output labels of the NNs, size n*k
|
112
110
|
* @param distances output pairwise distances, size n*k
|
113
111
|
*/
|
114
|
-
virtual void search
|
115
|
-
|
112
|
+
virtual void search(
|
113
|
+
idx_t n,
|
114
|
+
const float* x,
|
115
|
+
idx_t k,
|
116
|
+
float* distances,
|
117
|
+
idx_t* labels) const = 0;
|
116
118
|
|
117
119
|
/** query n vectors of dimension d to the index.
|
118
120
|
*
|
@@ -124,8 +126,11 @@ struct Index {
|
|
124
126
|
* @param radius search radius
|
125
127
|
* @param result result table
|
126
128
|
*/
|
127
|
-
virtual void range_search
|
128
|
-
|
129
|
+
virtual void range_search(
|
130
|
+
idx_t n,
|
131
|
+
const float* x,
|
132
|
+
float radius,
|
133
|
+
RangeSearchResult* result) const;
|
129
134
|
|
130
135
|
/** return the indexes of the k vectors closest to the query x.
|
131
136
|
*
|
@@ -133,7 +138,8 @@ struct Index {
|
|
133
138
|
* @param x input vectors to search, size n * d
|
134
139
|
* @param labels output labels of the NNs, size n*k
|
135
140
|
*/
|
136
|
-
virtual void assign
|
141
|
+
virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
|
142
|
+
const;
|
137
143
|
|
138
144
|
/// removes all elements from the database.
|
139
145
|
virtual void reset() = 0;
|
@@ -141,7 +147,7 @@ struct Index {
|
|
141
147
|
/** removes IDs from the index. Not supported by all
|
142
148
|
* indexes. Returns the number of elements removed.
|
143
149
|
*/
|
144
|
-
virtual size_t remove_ids
|
150
|
+
virtual size_t remove_ids(const IDSelector& sel);
|
145
151
|
|
146
152
|
/** Reconstruct a stored vector (or an approximation if lossy coding)
|
147
153
|
*
|
@@ -149,14 +155,14 @@ struct Index {
|
|
149
155
|
* @param key id of the vector to reconstruct
|
150
156
|
* @param recons reconstucted vector (size d)
|
151
157
|
*/
|
152
|
-
virtual void reconstruct
|
158
|
+
virtual void reconstruct(idx_t key, float* recons) const;
|
153
159
|
|
154
160
|
/** Reconstruct vectors i0 to i0 + ni - 1
|
155
161
|
*
|
156
162
|
* this function may not be defined for some indexes
|
157
163
|
* @param recons reconstucted vector (size ni * d)
|
158
164
|
*/
|
159
|
-
virtual void reconstruct_n
|
165
|
+
virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
|
160
166
|
|
161
167
|
/** Similar to search, but also reconstructs the stored vectors (or an
|
162
168
|
* approximation in the case of lossy coding) for the search results.
|
@@ -166,9 +172,13 @@ struct Index {
|
|
166
172
|
*
|
167
173
|
* @param recons reconstructed vectors size (n, k, d)
|
168
174
|
**/
|
169
|
-
virtual void search_and_reconstruct
|
170
|
-
|
171
|
-
|
175
|
+
virtual void search_and_reconstruct(
|
176
|
+
idx_t n,
|
177
|
+
const float* x,
|
178
|
+
idx_t k,
|
179
|
+
float* distances,
|
180
|
+
idx_t* labels,
|
181
|
+
float* recons) const;
|
172
182
|
|
173
183
|
/** Computes a residual vector after indexing encoding.
|
174
184
|
*
|
@@ -181,8 +191,8 @@ struct Index {
|
|
181
191
|
* @param residual output residual vector, size d
|
182
192
|
* @param key encoded index, as returned by search and assign
|
183
193
|
*/
|
184
|
-
virtual void compute_residual
|
185
|
-
|
194
|
+
virtual void compute_residual(const float* x, float* residual, idx_t key)
|
195
|
+
const;
|
186
196
|
|
187
197
|
/** Computes a residual vector after indexing encoding (batch form).
|
188
198
|
* Equivalent to calling compute_residual for each vector.
|
@@ -197,9 +207,11 @@ struct Index {
|
|
197
207
|
* @param residuals output residual vectors, size (n x d)
|
198
208
|
* @param keys encoded index, as returned by search and assign
|
199
209
|
*/
|
200
|
-
virtual void compute_residual_n
|
201
|
-
|
202
|
-
|
210
|
+
virtual void compute_residual_n(
|
211
|
+
idx_t n,
|
212
|
+
const float* xs,
|
213
|
+
float* residuals,
|
214
|
+
const idx_t* keys) const;
|
203
215
|
|
204
216
|
/** Get a DistanceComputer (defined in AuxIndexStructures) object
|
205
217
|
* for this kind of index.
|
@@ -207,13 +219,12 @@ struct Index {
|
|
207
219
|
* DistanceComputer is implemented for indexes that support random
|
208
220
|
* access of their vectors.
|
209
221
|
*/
|
210
|
-
virtual DistanceComputer
|
211
|
-
|
222
|
+
virtual DistanceComputer* get_distance_computer() const;
|
212
223
|
|
213
224
|
/* The standalone codec interface */
|
214
225
|
|
215
226
|
/** size of the produced codes in bytes */
|
216
|
-
virtual size_t sa_code_size
|
227
|
+
virtual size_t sa_code_size() const;
|
217
228
|
|
218
229
|
/** encode a set of vectors
|
219
230
|
*
|
@@ -221,8 +232,7 @@ struct Index {
|
|
221
232
|
* @param x input vectors, size n * d
|
222
233
|
* @param bytes output encoded vectors, size n * sa_code_size()
|
223
234
|
*/
|
224
|
-
virtual void sa_encode
|
225
|
-
uint8_t *bytes) const;
|
235
|
+
virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
|
226
236
|
|
227
237
|
/** encode a set of vectors
|
228
238
|
*
|
@@ -230,13 +240,9 @@ struct Index {
|
|
230
240
|
* @param bytes input encoded vectors, size n * sa_code_size()
|
231
241
|
* @param x output vectors, size n * d
|
232
242
|
*/
|
233
|
-
virtual void sa_decode
|
234
|
-
float *x) const;
|
235
|
-
|
236
|
-
|
243
|
+
virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
|
237
244
|
};
|
238
245
|
|
239
|
-
}
|
240
|
-
|
246
|
+
} // namespace faiss
|
241
247
|
|
242
248
|
#endif
|