faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#ifndef FAISS_INDEX_PQ_H
|
|
11
9
|
#define FAISS_INDEX_PQ_H
|
|
12
10
|
|
|
@@ -15,18 +13,15 @@
|
|
|
15
13
|
#include <vector>
|
|
16
14
|
|
|
17
15
|
#include <faiss/Index.h>
|
|
18
|
-
#include <faiss/impl/ProductQuantizer.h>
|
|
19
16
|
#include <faiss/impl/PolysemousTraining.h>
|
|
17
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
20
18
|
#include <faiss/impl/platform_macros.h>
|
|
21
19
|
|
|
22
|
-
|
|
23
20
|
namespace faiss {
|
|
24
21
|
|
|
25
|
-
|
|
26
22
|
/** Index based on a product quantizer. Stored vectors are
|
|
27
23
|
* approximated by PQ codes. */
|
|
28
|
-
struct IndexPQ: Index {
|
|
29
|
-
|
|
24
|
+
struct IndexPQ : Index {
|
|
30
25
|
/// The product quantizer used to encode the vectors
|
|
31
26
|
ProductQuantizer pq;
|
|
32
27
|
|
|
@@ -39,23 +34,23 @@ struct IndexPQ: Index {
|
|
|
39
34
|
* @param M number of subquantizers
|
|
40
35
|
* @param nbits number of bit per subvector index
|
|
41
36
|
*/
|
|
42
|
-
IndexPQ
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
37
|
+
IndexPQ(int d, ///< dimensionality of the input vectors
|
|
38
|
+
size_t M, ///< number of subquantizers
|
|
39
|
+
size_t nbits, ///< number of bit per subvector index
|
|
40
|
+
MetricType metric = METRIC_L2);
|
|
46
41
|
|
|
47
|
-
IndexPQ
|
|
42
|
+
IndexPQ();
|
|
48
43
|
|
|
49
44
|
void train(idx_t n, const float* x) override;
|
|
50
45
|
|
|
51
46
|
void add(idx_t n, const float* x) override;
|
|
52
47
|
|
|
53
48
|
void search(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
49
|
+
idx_t n,
|
|
50
|
+
const float* x,
|
|
51
|
+
idx_t k,
|
|
52
|
+
float* distances,
|
|
53
|
+
idx_t* labels) const override;
|
|
59
54
|
|
|
60
55
|
void reset() override;
|
|
61
56
|
|
|
@@ -66,16 +61,13 @@ struct IndexPQ: Index {
|
|
|
66
61
|
size_t remove_ids(const IDSelector& sel) override;
|
|
67
62
|
|
|
68
63
|
/* The standalone codec interface */
|
|
69
|
-
size_t sa_code_size
|
|
64
|
+
size_t sa_code_size() const override;
|
|
70
65
|
|
|
71
|
-
void sa_encode
|
|
72
|
-
uint8_t *bytes) const override;
|
|
66
|
+
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
|
|
73
67
|
|
|
74
|
-
void sa_decode
|
|
75
|
-
float *x) const override;
|
|
68
|
+
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
|
|
76
69
|
|
|
77
|
-
|
|
78
|
-
DistanceComputer * get_distance_computer() const override;
|
|
70
|
+
DistanceComputer* get_distance_computer() const override;
|
|
79
71
|
|
|
80
72
|
/******************************************************
|
|
81
73
|
* Polysemous codes implementation
|
|
@@ -87,12 +79,12 @@ struct IndexPQ: Index {
|
|
|
87
79
|
|
|
88
80
|
/// how to perform the search in search_core
|
|
89
81
|
enum Search_type_t {
|
|
90
|
-
ST_PQ,
|
|
91
|
-
ST_HE,
|
|
92
|
-
ST_generalized_HE,
|
|
93
|
-
ST_SDC,
|
|
94
|
-
ST_polysemous,
|
|
95
|
-
ST_polysemous_generalize,
|
|
82
|
+
ST_PQ, ///< asymmetric product quantizer (default)
|
|
83
|
+
ST_HE, ///< Hamming distance on codes
|
|
84
|
+
ST_generalized_HE, ///< nb of same codes
|
|
85
|
+
ST_SDC, ///< symmetric product quantizer (SDC)
|
|
86
|
+
ST_polysemous, ///< HE filter (using ht) + PQ combination
|
|
87
|
+
ST_polysemous_generalize, ///< Filter on generalized Hamming
|
|
96
88
|
};
|
|
97
89
|
|
|
98
90
|
Search_type_t search_type;
|
|
@@ -105,16 +97,23 @@ struct IndexPQ: Index {
|
|
|
105
97
|
int polysemous_ht;
|
|
106
98
|
|
|
107
99
|
// actual polysemous search
|
|
108
|
-
void search_core_polysemous
|
|
109
|
-
|
|
100
|
+
void search_core_polysemous(
|
|
101
|
+
idx_t n,
|
|
102
|
+
const float* x,
|
|
103
|
+
idx_t k,
|
|
104
|
+
float* distances,
|
|
105
|
+
idx_t* labels) const;
|
|
110
106
|
|
|
111
107
|
/// prepare query for a polysemous search, but instead of
|
|
112
108
|
/// computing the result, just get the histogram of Hamming
|
|
113
109
|
/// distances. May be computed on a provided dataset if xb != NULL
|
|
114
110
|
/// @param dist_histogram (M * nbits + 1)
|
|
115
|
-
void hamming_distance_histogram
|
|
116
|
-
|
|
117
|
-
|
|
111
|
+
void hamming_distance_histogram(
|
|
112
|
+
idx_t n,
|
|
113
|
+
const float* x,
|
|
114
|
+
idx_t nb,
|
|
115
|
+
const float* xb,
|
|
116
|
+
int64_t* dist_histogram);
|
|
118
117
|
|
|
119
118
|
/** compute pairwise distances between queries and database
|
|
120
119
|
*
|
|
@@ -122,80 +121,78 @@ struct IndexPQ: Index {
|
|
|
122
121
|
* @param x query vector, size n * d
|
|
123
122
|
* @param dis output distances, size n * ntotal
|
|
124
123
|
*/
|
|
125
|
-
void hamming_distance_table
|
|
126
|
-
int32_t *dis) const;
|
|
127
|
-
|
|
124
|
+
void hamming_distance_table(idx_t n, const float* x, int32_t* dis) const;
|
|
128
125
|
};
|
|
129
126
|
|
|
130
|
-
|
|
131
127
|
/// statistics are robust to internal threading, but not if
|
|
132
128
|
/// IndexPQ::search is called by multiple threads
|
|
133
129
|
struct IndexPQStats {
|
|
134
|
-
size_t nq;
|
|
135
|
-
size_t ncode;
|
|
130
|
+
size_t nq; // nb of queries run
|
|
131
|
+
size_t ncode; // nb of codes visited
|
|
136
132
|
|
|
137
133
|
size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
|
|
138
134
|
|
|
139
|
-
IndexPQStats
|
|
140
|
-
|
|
135
|
+
IndexPQStats() {
|
|
136
|
+
reset();
|
|
137
|
+
}
|
|
138
|
+
void reset();
|
|
141
139
|
};
|
|
142
140
|
|
|
143
141
|
FAISS_API extern IndexPQStats indexPQ_stats;
|
|
144
142
|
|
|
145
|
-
|
|
146
|
-
|
|
147
143
|
/** Quantizer where centroids are virtual: they are the Cartesian
|
|
148
144
|
* product of sub-centroids. */
|
|
149
|
-
struct MultiIndexQuantizer: Index
|
|
145
|
+
struct MultiIndexQuantizer : Index {
|
|
150
146
|
ProductQuantizer pq;
|
|
151
147
|
|
|
152
|
-
MultiIndexQuantizer
|
|
153
|
-
|
|
154
|
-
|
|
148
|
+
MultiIndexQuantizer(
|
|
149
|
+
int d, ///< dimension of the input vectors
|
|
150
|
+
size_t M, ///< number of subquantizers
|
|
151
|
+
size_t nbits); ///< number of bit per subvector index
|
|
155
152
|
|
|
156
153
|
void train(idx_t n, const float* x) override;
|
|
157
154
|
|
|
158
155
|
void search(
|
|
159
|
-
|
|
160
|
-
|
|
156
|
+
idx_t n,
|
|
157
|
+
const float* x,
|
|
158
|
+
idx_t k,
|
|
159
|
+
float* distances,
|
|
160
|
+
idx_t* labels) const override;
|
|
161
161
|
|
|
162
162
|
/// add and reset will crash at runtime
|
|
163
163
|
void add(idx_t n, const float* x) override;
|
|
164
164
|
void reset() override;
|
|
165
165
|
|
|
166
|
-
MultiIndexQuantizer
|
|
166
|
+
MultiIndexQuantizer() {}
|
|
167
167
|
|
|
168
168
|
void reconstruct(idx_t key, float* recons) const override;
|
|
169
169
|
};
|
|
170
170
|
|
|
171
|
-
|
|
172
171
|
/** MultiIndexQuantizer where the PQ assignmnet is performed by sub-indexes
|
|
173
172
|
*/
|
|
174
|
-
struct MultiIndexQuantizer2: MultiIndexQuantizer {
|
|
175
|
-
|
|
173
|
+
struct MultiIndexQuantizer2 : MultiIndexQuantizer {
|
|
176
174
|
/// M Indexes on d / M dimensions
|
|
177
175
|
std::vector<Index*> assign_indexes;
|
|
178
176
|
bool own_fields;
|
|
179
177
|
|
|
180
|
-
MultiIndexQuantizer2
|
|
181
|
-
int d, size_t M, size_t nbits,
|
|
182
|
-
Index **indexes);
|
|
178
|
+
MultiIndexQuantizer2(int d, size_t M, size_t nbits, Index** indexes);
|
|
183
179
|
|
|
184
|
-
MultiIndexQuantizer2
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
180
|
+
MultiIndexQuantizer2(
|
|
181
|
+
int d,
|
|
182
|
+
size_t nbits,
|
|
183
|
+
Index* assign_index_0,
|
|
184
|
+
Index* assign_index_1);
|
|
188
185
|
|
|
189
186
|
void train(idx_t n, const float* x) override;
|
|
190
187
|
|
|
191
188
|
void search(
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
189
|
+
idx_t n,
|
|
190
|
+
const float* x,
|
|
191
|
+
idx_t k,
|
|
192
|
+
float* distances,
|
|
193
|
+
idx_t* labels) const override;
|
|
195
194
|
};
|
|
196
195
|
|
|
197
|
-
|
|
198
196
|
} // namespace faiss
|
|
199
197
|
|
|
200
|
-
|
|
201
198
|
#endif
|
|
@@ -5,24 +5,21 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/IndexPQFastScan.h>
|
|
10
9
|
|
|
10
|
+
#include <limits.h>
|
|
11
11
|
#include <cassert>
|
|
12
12
|
#include <memory>
|
|
13
|
-
#include <limits.h>
|
|
14
13
|
|
|
15
14
|
#include <omp.h>
|
|
16
15
|
|
|
17
|
-
|
|
18
16
|
#include <faiss/impl/FaissAssert.h>
|
|
19
|
-
#include <faiss/utils/utils.h>
|
|
20
17
|
#include <faiss/utils/random.h>
|
|
18
|
+
#include <faiss/utils/utils.h>
|
|
21
19
|
|
|
20
|
+
#include <faiss/impl/pq4_fast_scan.h>
|
|
22
21
|
#include <faiss/impl/simd_result_handlers.h>
|
|
23
22
|
#include <faiss/utils/quantize_lut.h>
|
|
24
|
-
#include <faiss/impl/pq4_fast_scan.h>
|
|
25
|
-
|
|
26
23
|
|
|
27
24
|
namespace faiss {
|
|
28
25
|
|
|
@@ -33,25 +30,24 @@ inline size_t roundup(size_t a, size_t b) {
|
|
|
33
30
|
}
|
|
34
31
|
|
|
35
32
|
IndexPQFastScan::IndexPQFastScan(
|
|
36
|
-
int d,
|
|
33
|
+
int d,
|
|
34
|
+
size_t M,
|
|
35
|
+
size_t nbits,
|
|
37
36
|
MetricType metric,
|
|
38
|
-
int bbs)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
int bbs)
|
|
38
|
+
: Index(d, metric),
|
|
39
|
+
pq(d, M, nbits),
|
|
40
|
+
bbs(bbs),
|
|
41
|
+
ntotal2(0),
|
|
42
|
+
M2(roundup(M, 2)) {
|
|
42
43
|
FAISS_THROW_IF_NOT(nbits == 4);
|
|
43
44
|
is_trained = false;
|
|
44
45
|
}
|
|
45
46
|
|
|
46
|
-
IndexPQFastScan::IndexPQFastScan():
|
|
47
|
-
bbs(0), ntotal2(0), M2(0)
|
|
48
|
-
{}
|
|
47
|
+
IndexPQFastScan::IndexPQFastScan() : bbs(0), ntotal2(0), M2(0) {}
|
|
49
48
|
|
|
50
|
-
IndexPQFastScan::IndexPQFastScan(const IndexPQ
|
|
51
|
-
|
|
52
|
-
pq(orig.pq),
|
|
53
|
-
bbs(bbs)
|
|
54
|
-
{
|
|
49
|
+
IndexPQFastScan::IndexPQFastScan(const IndexPQ& orig, int bbs)
|
|
50
|
+
: Index(orig.d, orig.metric_type), pq(orig.pq), bbs(bbs) {
|
|
55
51
|
FAISS_THROW_IF_NOT(orig.pq.nbits == 4);
|
|
56
52
|
ntotal = orig.ntotal;
|
|
57
53
|
is_trained = orig.is_trained;
|
|
@@ -70,16 +66,10 @@ IndexPQFastScan::IndexPQFastScan(const IndexPQ & orig, int bbs):
|
|
|
70
66
|
codes.resize(ntotal2 * M2 / 2);
|
|
71
67
|
|
|
72
68
|
// printf("M=%d M2=%d code_size=%d\n", M, M2, pq.code_size);
|
|
73
|
-
pq4_pack_codes(
|
|
74
|
-
orig.codes.data(),
|
|
75
|
-
ntotal, M,
|
|
76
|
-
ntotal2, bbs, M2,
|
|
77
|
-
codes.get()
|
|
78
|
-
);
|
|
69
|
+
pq4_pack_codes(orig.codes.data(), ntotal, M, ntotal2, bbs, M2, codes.get());
|
|
79
70
|
}
|
|
80
71
|
|
|
81
|
-
void IndexPQFastScan::train
|
|
82
|
-
{
|
|
72
|
+
void IndexPQFastScan::train(idx_t n, const float* x) {
|
|
83
73
|
if (is_trained) {
|
|
84
74
|
return;
|
|
85
75
|
}
|
|
@@ -87,11 +77,10 @@ void IndexPQFastScan::train (idx_t n, const float *x)
|
|
|
87
77
|
is_trained = true;
|
|
88
78
|
}
|
|
89
79
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
FAISS_THROW_IF_NOT (is_trained);
|
|
80
|
+
void IndexPQFastScan::add(idx_t n, const float* x) {
|
|
81
|
+
FAISS_THROW_IF_NOT(is_trained);
|
|
93
82
|
AlignedTable<uint8_t> tmp_codes(n * pq.code_size);
|
|
94
|
-
pq.compute_codes
|
|
83
|
+
pq.compute_codes(x, tmp_codes.get(), n);
|
|
95
84
|
ntotal2 = roundup(ntotal + n, bbs);
|
|
96
85
|
size_t new_size = ntotal2 * M2 / 2;
|
|
97
86
|
size_t old_size = codes.size();
|
|
@@ -100,39 +89,35 @@ void IndexPQFastScan::add (idx_t n, const float *x) {
|
|
|
100
89
|
memset(codes.get() + old_size, 0, new_size - old_size);
|
|
101
90
|
}
|
|
102
91
|
pq4_pack_codes_range(
|
|
103
|
-
|
|
104
|
-
bbs, M2, codes.get()
|
|
105
|
-
);
|
|
92
|
+
tmp_codes.get(), pq.M, ntotal, ntotal + n, bbs, M2, codes.get());
|
|
106
93
|
ntotal += n;
|
|
107
94
|
}
|
|
108
95
|
|
|
109
|
-
void IndexPQFastScan::reset()
|
|
110
|
-
{
|
|
96
|
+
void IndexPQFastScan::reset() {
|
|
111
97
|
codes.resize(0);
|
|
112
98
|
ntotal = 0;
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
99
|
+
}
|
|
116
100
|
|
|
117
101
|
namespace {
|
|
118
102
|
|
|
119
103
|
// from impl/ProductQuantizer.cpp
|
|
120
104
|
template <class C, typename dis_t>
|
|
121
105
|
void pq_estimators_from_tables_generic(
|
|
122
|
-
const ProductQuantizer& pq,
|
|
123
|
-
|
|
124
|
-
const
|
|
125
|
-
|
|
126
|
-
|
|
106
|
+
const ProductQuantizer& pq,
|
|
107
|
+
size_t nbits,
|
|
108
|
+
const uint8_t* codes,
|
|
109
|
+
size_t ncodes,
|
|
110
|
+
const dis_t* dis_table,
|
|
111
|
+
size_t k,
|
|
112
|
+
typename C::T* heap_dis,
|
|
113
|
+
int64_t* heap_ids) {
|
|
127
114
|
using accu_t = typename C::T;
|
|
128
115
|
const size_t M = pq.M;
|
|
129
116
|
const size_t ksub = pq.ksub;
|
|
130
117
|
for (size_t j = 0; j < ncodes; ++j) {
|
|
131
|
-
PQDecoderGeneric decoder(
|
|
132
|
-
codes + j * pq.code_size, nbits
|
|
133
|
-
);
|
|
118
|
+
PQDecoderGeneric decoder(codes + j * pq.code_size, nbits);
|
|
134
119
|
accu_t dis = 0;
|
|
135
|
-
const dis_t
|
|
120
|
+
const dis_t* __restrict dt = dis_table;
|
|
136
121
|
for (size_t m = 0; m < M; m++) {
|
|
137
122
|
uint64_t c = decoder.decode();
|
|
138
123
|
dis += dt[c];
|
|
@@ -146,53 +131,55 @@ void pq_estimators_from_tables_generic(
|
|
|
146
131
|
}
|
|
147
132
|
}
|
|
148
133
|
|
|
149
|
-
|
|
150
134
|
} // anonymous namespace
|
|
151
135
|
|
|
152
|
-
|
|
153
136
|
using namespace quantize_lut;
|
|
154
137
|
|
|
155
138
|
void IndexPQFastScan::compute_quantized_LUT(
|
|
156
|
-
idx_t n,
|
|
157
|
-
|
|
158
|
-
|
|
139
|
+
idx_t n,
|
|
140
|
+
const float* x,
|
|
141
|
+
uint8_t* lut,
|
|
142
|
+
float* normalizers) const {
|
|
159
143
|
size_t dim12 = pq.ksub * pq.M;
|
|
160
|
-
std::unique_ptr<float[]> dis_tables(new float
|
|
144
|
+
std::unique_ptr<float[]> dis_tables(new float[n * dim12]);
|
|
161
145
|
if (metric_type == METRIC_L2) {
|
|
162
|
-
pq.compute_distance_tables
|
|
146
|
+
pq.compute_distance_tables(n, x, dis_tables.get());
|
|
163
147
|
} else {
|
|
164
|
-
pq.compute_inner_prod_tables
|
|
148
|
+
pq.compute_inner_prod_tables(n, x, dis_tables.get());
|
|
165
149
|
}
|
|
166
150
|
|
|
167
|
-
for(uint64_t i = 0; i < n; i++) {
|
|
151
|
+
for (uint64_t i = 0; i < n; i++) {
|
|
168
152
|
round_uint8_per_column(
|
|
169
|
-
dis_tables.get() + i * dim12,
|
|
170
|
-
|
|
171
|
-
|
|
153
|
+
dis_tables.get() + i * dim12,
|
|
154
|
+
pq.M,
|
|
155
|
+
pq.ksub,
|
|
156
|
+
&normalizers[2 * i],
|
|
157
|
+
&normalizers[2 * i + 1]);
|
|
172
158
|
}
|
|
173
159
|
|
|
174
|
-
for(uint64_t i = 0; i < n; i++) {
|
|
175
|
-
const float
|
|
176
|
-
uint8_t
|
|
160
|
+
for (uint64_t i = 0; i < n; i++) {
|
|
161
|
+
const float* t_in = dis_tables.get() + i * dim12;
|
|
162
|
+
uint8_t* t_out = lut + i * M2 * pq.ksub;
|
|
177
163
|
|
|
178
|
-
for(int j = 0; j < dim12; j++) {
|
|
164
|
+
for (int j = 0; j < dim12; j++) {
|
|
179
165
|
t_out[j] = int(t_in[j]);
|
|
180
166
|
}
|
|
181
167
|
memset(t_out + dim12, 0, (M2 - pq.M) * pq.ksub);
|
|
182
168
|
}
|
|
183
169
|
}
|
|
184
170
|
|
|
185
|
-
|
|
186
|
-
|
|
187
171
|
/******************************************************************************
|
|
188
172
|
* Search driver routine
|
|
189
173
|
******************************************************************************/
|
|
190
174
|
|
|
191
|
-
|
|
192
175
|
void IndexPQFastScan::search(
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
176
|
+
idx_t n,
|
|
177
|
+
const float* x,
|
|
178
|
+
idx_t k,
|
|
179
|
+
float* distances,
|
|
180
|
+
idx_t* labels) const {
|
|
181
|
+
FAISS_THROW_IF_NOT(k > 0);
|
|
182
|
+
|
|
196
183
|
if (metric_type == METRIC_L2) {
|
|
197
184
|
search_dispatch_implem<true>(n, x, k, distances, labels);
|
|
198
185
|
} else {
|
|
@@ -200,20 +187,20 @@ void IndexPQFastScan::search(
|
|
|
200
187
|
}
|
|
201
188
|
}
|
|
202
189
|
|
|
203
|
-
|
|
204
|
-
template<bool is_max>
|
|
190
|
+
template <bool is_max>
|
|
205
191
|
void IndexPQFastScan::search_dispatch_implem(
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
192
|
+
idx_t n,
|
|
193
|
+
const float* x,
|
|
194
|
+
idx_t k,
|
|
195
|
+
float* distances,
|
|
196
|
+
idx_t* labels) const {
|
|
197
|
+
using Cfloat = typename std::conditional<
|
|
198
|
+
is_max,
|
|
199
|
+
CMax<float, int64_t>,
|
|
200
|
+
CMin<float, int64_t>>::type;
|
|
201
|
+
|
|
202
|
+
using C = typename std::
|
|
203
|
+
conditional<is_max, CMax<uint16_t, int>, CMin<uint16_t, int>>::type;
|
|
217
204
|
|
|
218
205
|
if (n == 0) {
|
|
219
206
|
return;
|
|
@@ -229,26 +216,24 @@ void IndexPQFastScan::search_dispatch_implem(
|
|
|
229
216
|
impl = 14;
|
|
230
217
|
}
|
|
231
218
|
if (k > 20) {
|
|
232
|
-
impl
|
|
219
|
+
impl++;
|
|
233
220
|
}
|
|
234
221
|
}
|
|
235
222
|
|
|
236
|
-
|
|
237
223
|
if (implem == 1) {
|
|
238
224
|
FAISS_THROW_IF_NOT(orig_codes);
|
|
239
225
|
FAISS_THROW_IF_NOT(is_max);
|
|
240
|
-
float_maxheap_array_t res = {
|
|
241
|
-
|
|
242
|
-
pq.search (x, n, orig_codes, ntotal, &res, true);
|
|
226
|
+
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
|
227
|
+
pq.search(x, n, orig_codes, ntotal, &res, true);
|
|
243
228
|
} else if (implem == 2 || implem == 3 || implem == 4) {
|
|
244
229
|
FAISS_THROW_IF_NOT(orig_codes);
|
|
245
230
|
|
|
246
231
|
size_t dim12 = pq.ksub * pq.M;
|
|
247
|
-
std::unique_ptr<float[]> dis_tables(new float
|
|
232
|
+
std::unique_ptr<float[]> dis_tables(new float[n * dim12]);
|
|
248
233
|
if (is_max) {
|
|
249
|
-
pq.compute_distance_tables
|
|
234
|
+
pq.compute_distance_tables(n, x, dis_tables.get());
|
|
250
235
|
} else {
|
|
251
|
-
pq.compute_inner_prod_tables
|
|
236
|
+
pq.compute_inner_prod_tables(n, x, dis_tables.get());
|
|
252
237
|
}
|
|
253
238
|
|
|
254
239
|
std::vector<float> normalizers(n * 2);
|
|
@@ -256,34 +241,39 @@ void IndexPQFastScan::search_dispatch_implem(
|
|
|
256
241
|
if (implem == 2) {
|
|
257
242
|
// default float
|
|
258
243
|
} else if (implem == 3 || implem == 4) {
|
|
259
|
-
for(uint64_t i = 0; i < n; i++) {
|
|
244
|
+
for (uint64_t i = 0; i < n; i++) {
|
|
260
245
|
round_uint8_per_column(
|
|
261
|
-
dis_tables.get() + i * dim12,
|
|
246
|
+
dis_tables.get() + i * dim12,
|
|
247
|
+
pq.M,
|
|
262
248
|
pq.ksub,
|
|
263
|
-
&normalizers[2 * i],
|
|
264
|
-
|
|
249
|
+
&normalizers[2 * i],
|
|
250
|
+
&normalizers[2 * i + 1]);
|
|
265
251
|
}
|
|
266
252
|
}
|
|
267
253
|
|
|
268
254
|
for (int64_t i = 0; i < n; i++) {
|
|
269
|
-
int64_t
|
|
270
|
-
float
|
|
255
|
+
int64_t* heap_ids = labels + i * k;
|
|
256
|
+
float* heap_dis = distances + i * k;
|
|
271
257
|
|
|
272
|
-
heap_heapify<Cfloat>
|
|
258
|
+
heap_heapify<Cfloat>(k, heap_dis, heap_ids);
|
|
273
259
|
|
|
274
260
|
pq_estimators_from_tables_generic<Cfloat>(
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
261
|
+
pq,
|
|
262
|
+
pq.nbits,
|
|
263
|
+
orig_codes,
|
|
264
|
+
ntotal,
|
|
265
|
+
dis_tables.get() + i * dim12,
|
|
266
|
+
k,
|
|
267
|
+
heap_dis,
|
|
268
|
+
heap_ids);
|
|
279
269
|
|
|
280
|
-
heap_reorder<Cfloat>
|
|
270
|
+
heap_reorder<Cfloat>(k, heap_dis, heap_ids);
|
|
281
271
|
|
|
282
272
|
if (implem == 4) {
|
|
283
273
|
float a = normalizers[2 * i];
|
|
284
274
|
float b = normalizers[2 * i + 1];
|
|
285
275
|
|
|
286
|
-
for(int j = 0; j < k; j++) {
|
|
276
|
+
for (int j = 0; j < k; j++) {
|
|
287
277
|
heap_dis[j] = heap_dis[j] / a + b;
|
|
288
278
|
}
|
|
289
279
|
}
|
|
@@ -303,30 +293,30 @@ void IndexPQFastScan::search_dispatch_implem(
|
|
|
303
293
|
for (int slice = 0; slice < nt; slice++) {
|
|
304
294
|
idx_t i0 = n * slice / nt;
|
|
305
295
|
idx_t i1 = n * (slice + 1) / nt;
|
|
306
|
-
float
|
|
307
|
-
idx_t
|
|
296
|
+
float* dis_i = distances + i0 * k;
|
|
297
|
+
idx_t* lab_i = labels + i0 * k;
|
|
308
298
|
if (impl == 12 || impl == 13) {
|
|
309
299
|
search_implem_12<C>(
|
|
310
|
-
|
|
300
|
+
i1 - i0, x + i0 * d, k, dis_i, lab_i, impl);
|
|
311
301
|
} else {
|
|
312
302
|
search_implem_14<C>(
|
|
313
|
-
|
|
303
|
+
i1 - i0, x + i0 * d, k, dis_i, lab_i, impl);
|
|
314
304
|
}
|
|
315
305
|
}
|
|
316
306
|
}
|
|
317
307
|
} else {
|
|
318
308
|
FAISS_THROW_FMT("invalid implem %d impl=%d", implem, impl);
|
|
319
309
|
}
|
|
320
|
-
|
|
321
310
|
}
|
|
322
311
|
|
|
323
|
-
template<class C>
|
|
312
|
+
template <class C>
|
|
324
313
|
void IndexPQFastScan::search_implem_12(
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
314
|
+
idx_t n,
|
|
315
|
+
const float* x,
|
|
316
|
+
idx_t k,
|
|
317
|
+
float* distances,
|
|
318
|
+
idx_t* labels,
|
|
319
|
+
int impl) const {
|
|
330
320
|
FAISS_THROW_IF_NOT(bbs == 32);
|
|
331
321
|
|
|
332
322
|
// handle qbs2 blocking by recursive call
|
|
@@ -335,23 +325,25 @@ void IndexPQFastScan::search_implem_12(
|
|
|
335
325
|
for (int64_t i0 = 0; i0 < n; i0 += qbs2) {
|
|
336
326
|
int64_t i1 = std::min(i0 + qbs2, n);
|
|
337
327
|
search_implem_12<C>(
|
|
338
|
-
i1 - i0,
|
|
339
|
-
|
|
340
|
-
|
|
328
|
+
i1 - i0,
|
|
329
|
+
x + d * i0,
|
|
330
|
+
k,
|
|
331
|
+
distances + i0 * k,
|
|
332
|
+
labels + i0 * k,
|
|
333
|
+
impl);
|
|
341
334
|
}
|
|
342
335
|
return;
|
|
343
336
|
}
|
|
344
337
|
|
|
345
338
|
size_t dim12 = pq.ksub * M2;
|
|
346
339
|
AlignedTable<uint8_t> quantized_dis_tables(n * dim12);
|
|
347
|
-
std::unique_ptr<float
|
|
340
|
+
std::unique_ptr<float[]> normalizers(new float[2 * n]);
|
|
348
341
|
|
|
349
342
|
if (skip & 1) {
|
|
350
343
|
quantized_dis_tables.clear();
|
|
351
344
|
} else {
|
|
352
345
|
compute_quantized_LUT(
|
|
353
|
-
|
|
354
|
-
);
|
|
346
|
+
n, x, quantized_dis_tables.get(), normalizers.get());
|
|
355
347
|
}
|
|
356
348
|
|
|
357
349
|
AlignedTable<uint8_t> LUT(n * dim12);
|
|
@@ -365,9 +357,8 @@ void IndexPQFastScan::search_implem_12(
|
|
|
365
357
|
qbs = pq4_preferred_qbs(n);
|
|
366
358
|
}
|
|
367
359
|
|
|
368
|
-
int LUT_nq =
|
|
369
|
-
|
|
370
|
-
);
|
|
360
|
+
int LUT_nq =
|
|
361
|
+
pq4_pack_LUT_qbs(qbs, M2, quantized_dis_tables.get(), LUT.get());
|
|
371
362
|
FAISS_THROW_IF_NOT(LUT_nq == n);
|
|
372
363
|
|
|
373
364
|
if (k == 1) {
|
|
@@ -377,37 +368,30 @@ void IndexPQFastScan::search_implem_12(
|
|
|
377
368
|
} else {
|
|
378
369
|
handler.disable = bool(skip & 2);
|
|
379
370
|
pq4_accumulate_loop_qbs(
|
|
380
|
-
|
|
381
|
-
codes.get(), LUT.get(),
|
|
382
|
-
handler
|
|
383
|
-
);
|
|
371
|
+
qbs, ntotal2, M2, codes.get(), LUT.get(), handler);
|
|
384
372
|
}
|
|
385
373
|
|
|
386
374
|
handler.to_flat_arrays(distances, labels, normalizers.get());
|
|
387
375
|
|
|
388
376
|
} else if (impl == 12) {
|
|
389
|
-
|
|
390
377
|
std::vector<uint16_t> tmp_dis(n * k);
|
|
391
378
|
std::vector<int32_t> tmp_ids(n * k);
|
|
392
379
|
|
|
393
380
|
if (skip & 4) {
|
|
394
381
|
// skip
|
|
395
382
|
} else {
|
|
396
|
-
HeapHandler<C> handler(
|
|
383
|
+
HeapHandler<C> handler(
|
|
384
|
+
n, tmp_dis.data(), tmp_ids.data(), k, ntotal);
|
|
397
385
|
handler.disable = bool(skip & 2);
|
|
398
386
|
|
|
399
387
|
pq4_accumulate_loop_qbs(
|
|
400
|
-
|
|
401
|
-
codes.get(), LUT.get(),
|
|
402
|
-
handler
|
|
403
|
-
);
|
|
388
|
+
qbs, ntotal2, M2, codes.get(), LUT.get(), handler);
|
|
404
389
|
|
|
405
390
|
if (!(skip & 8)) {
|
|
406
391
|
handler.to_flat_arrays(distances, labels, normalizers.get());
|
|
407
392
|
}
|
|
408
393
|
}
|
|
409
394
|
|
|
410
|
-
|
|
411
395
|
} else { // impl == 13
|
|
412
396
|
|
|
413
397
|
ReservoirHandler<C> handler(n, ntotal, k, 2 * k);
|
|
@@ -417,10 +401,7 @@ void IndexPQFastScan::search_implem_12(
|
|
|
417
401
|
// skip
|
|
418
402
|
} else {
|
|
419
403
|
pq4_accumulate_loop_qbs(
|
|
420
|
-
|
|
421
|
-
codes.get(), LUT.get(),
|
|
422
|
-
handler
|
|
423
|
-
);
|
|
404
|
+
qbs, ntotal2, M2, codes.get(), LUT.get(), handler);
|
|
424
405
|
}
|
|
425
406
|
|
|
426
407
|
if (!(skip & 8)) {
|
|
@@ -431,18 +412,19 @@ void IndexPQFastScan::search_implem_12(
|
|
|
431
412
|
FastScan_stats.t1 += handler.times[1];
|
|
432
413
|
FastScan_stats.t2 += handler.times[2];
|
|
433
414
|
FastScan_stats.t3 += handler.times[3];
|
|
434
|
-
|
|
435
415
|
}
|
|
436
416
|
}
|
|
437
417
|
|
|
438
418
|
FastScanStats FastScan_stats;
|
|
439
419
|
|
|
440
|
-
template<class C>
|
|
420
|
+
template <class C>
|
|
441
421
|
void IndexPQFastScan::search_implem_14(
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
422
|
+
idx_t n,
|
|
423
|
+
const float* x,
|
|
424
|
+
idx_t k,
|
|
425
|
+
float* distances,
|
|
426
|
+
idx_t* labels,
|
|
427
|
+
int impl) const {
|
|
446
428
|
FAISS_THROW_IF_NOT(bbs % 32 == 0);
|
|
447
429
|
|
|
448
430
|
int qbs2 = qbs == 0 ? 4 : qbs;
|
|
@@ -452,23 +434,25 @@ void IndexPQFastScan::search_implem_14(
|
|
|
452
434
|
for (int64_t i0 = 0; i0 < n; i0 += qbs2) {
|
|
453
435
|
int64_t i1 = std::min(i0 + qbs2, n);
|
|
454
436
|
search_implem_14<C>(
|
|
455
|
-
i1 - i0,
|
|
456
|
-
|
|
457
|
-
|
|
437
|
+
i1 - i0,
|
|
438
|
+
x + d * i0,
|
|
439
|
+
k,
|
|
440
|
+
distances + i0 * k,
|
|
441
|
+
labels + i0 * k,
|
|
442
|
+
impl);
|
|
458
443
|
}
|
|
459
444
|
return;
|
|
460
445
|
}
|
|
461
446
|
|
|
462
447
|
size_t dim12 = pq.ksub * M2;
|
|
463
448
|
AlignedTable<uint8_t> quantized_dis_tables(n * dim12);
|
|
464
|
-
std::unique_ptr<float
|
|
449
|
+
std::unique_ptr<float[]> normalizers(new float[2 * n]);
|
|
465
450
|
|
|
466
451
|
if (skip & 1) {
|
|
467
452
|
quantized_dis_tables.clear();
|
|
468
453
|
} else {
|
|
469
454
|
compute_quantized_LUT(
|
|
470
|
-
|
|
471
|
-
);
|
|
455
|
+
n, x, quantized_dis_tables.get(), normalizers.get());
|
|
472
456
|
}
|
|
473
457
|
|
|
474
458
|
AlignedTable<uint8_t> LUT(n * dim12);
|
|
@@ -480,37 +464,30 @@ void IndexPQFastScan::search_implem_14(
|
|
|
480
464
|
// pass
|
|
481
465
|
} else {
|
|
482
466
|
handler.disable = bool(skip & 2);
|
|
483
|
-
pq4_accumulate_loop
|
|
484
|
-
|
|
485
|
-
codes.get(), LUT.get(),
|
|
486
|
-
handler
|
|
487
|
-
);
|
|
467
|
+
pq4_accumulate_loop(
|
|
468
|
+
n, ntotal2, bbs, M2, codes.get(), LUT.get(), handler);
|
|
488
469
|
}
|
|
489
470
|
handler.to_flat_arrays(distances, labels, normalizers.get());
|
|
490
471
|
|
|
491
472
|
} else if (impl == 14) {
|
|
492
|
-
|
|
493
473
|
std::vector<uint16_t> tmp_dis(n * k);
|
|
494
474
|
std::vector<int32_t> tmp_ids(n * k);
|
|
495
475
|
|
|
496
476
|
if (skip & 4) {
|
|
497
477
|
// skip
|
|
498
478
|
} else if (k > 1) {
|
|
499
|
-
HeapHandler<C> handler(
|
|
479
|
+
HeapHandler<C> handler(
|
|
480
|
+
n, tmp_dis.data(), tmp_ids.data(), k, ntotal);
|
|
500
481
|
handler.disable = bool(skip & 2);
|
|
501
482
|
|
|
502
|
-
pq4_accumulate_loop
|
|
503
|
-
|
|
504
|
-
codes.get(), LUT.get(),
|
|
505
|
-
handler
|
|
506
|
-
);
|
|
483
|
+
pq4_accumulate_loop(
|
|
484
|
+
n, ntotal2, bbs, M2, codes.get(), LUT.get(), handler);
|
|
507
485
|
|
|
508
486
|
if (!(skip & 8)) {
|
|
509
487
|
handler.to_flat_arrays(distances, labels, normalizers.get());
|
|
510
488
|
}
|
|
511
489
|
}
|
|
512
490
|
|
|
513
|
-
|
|
514
491
|
} else { // impl == 15
|
|
515
492
|
|
|
516
493
|
ReservoirHandler<C> handler(n, ntotal, k, 2 * k);
|
|
@@ -519,11 +496,8 @@ void IndexPQFastScan::search_implem_14(
|
|
|
519
496
|
if (skip & 4) {
|
|
520
497
|
// skip
|
|
521
498
|
} else {
|
|
522
|
-
pq4_accumulate_loop
|
|
523
|
-
|
|
524
|
-
codes.get(), LUT.get(),
|
|
525
|
-
handler
|
|
526
|
-
);
|
|
499
|
+
pq4_accumulate_loop(
|
|
500
|
+
n, ntotal2, bbs, M2, codes.get(), LUT.get(), handler);
|
|
527
501
|
}
|
|
528
502
|
|
|
529
503
|
if (!(skip & 8)) {
|
|
@@ -532,5 +506,4 @@ void IndexPQFastScan::search_implem_14(
|
|
|
532
506
|
}
|
|
533
507
|
}
|
|
534
508
|
|
|
535
|
-
|
|
536
509
|
} // namespace faiss
|