faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -20,10 +20,8 @@
|
|
|
20
20
|
* parallel compile times. Templates are instanciated explicitly.
|
|
21
21
|
*/
|
|
22
22
|
|
|
23
|
-
|
|
24
23
|
namespace faiss {
|
|
25
24
|
|
|
26
|
-
|
|
27
25
|
/** Pack codes for consumption by the SIMD kernels.
|
|
28
26
|
* The unused bytes are set to 0.
|
|
29
27
|
*
|
|
@@ -36,11 +34,13 @@ namespace faiss {
|
|
|
36
34
|
* @param blocks output array, size nb * nsq / 2.
|
|
37
35
|
*/
|
|
38
36
|
void pq4_pack_codes(
|
|
39
|
-
const uint8_t
|
|
40
|
-
size_t ntotal,
|
|
41
|
-
size_t
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
const uint8_t* codes,
|
|
38
|
+
size_t ntotal,
|
|
39
|
+
size_t M,
|
|
40
|
+
size_t nb,
|
|
41
|
+
size_t bbs,
|
|
42
|
+
size_t M2,
|
|
43
|
+
uint8_t* blocks);
|
|
44
44
|
|
|
45
45
|
/** Same as pack_codes but write in a given range of the output,
|
|
46
46
|
* leaving the rest untouched. Assumes allocated entries are 0 on input.
|
|
@@ -51,12 +51,13 @@ void pq4_pack_codes(
|
|
|
51
51
|
* @param blocks output array, size at least ceil(i1 / bbs) * bbs * nsq / 2
|
|
52
52
|
*/
|
|
53
53
|
void pq4_pack_codes_range(
|
|
54
|
-
const uint8_t
|
|
54
|
+
const uint8_t* codes,
|
|
55
55
|
size_t M,
|
|
56
|
-
size_t i0,
|
|
57
|
-
size_t
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
size_t i0,
|
|
57
|
+
size_t i1,
|
|
58
|
+
size_t bbs,
|
|
59
|
+
size_t M2,
|
|
60
|
+
uint8_t* blocks);
|
|
60
61
|
|
|
61
62
|
/** get a single element from a packed codes table
|
|
62
63
|
*
|
|
@@ -64,9 +65,11 @@ void pq4_pack_codes_range(
|
|
|
64
65
|
* @param sq subquantizer (< nsq)
|
|
65
66
|
*/
|
|
66
67
|
uint8_t pq4_get_packed_element(
|
|
67
|
-
const uint8_t
|
|
68
|
-
size_t
|
|
69
|
-
|
|
68
|
+
const uint8_t* data,
|
|
69
|
+
size_t bbs,
|
|
70
|
+
size_t nsq,
|
|
71
|
+
size_t i,
|
|
72
|
+
size_t sq);
|
|
70
73
|
|
|
71
74
|
/** Pack Look-up table for consumption by the kernel.
|
|
72
75
|
*
|
|
@@ -75,13 +78,7 @@ uint8_t pq4_get_packed_element(
|
|
|
75
78
|
* @param src input array, size (nq, 16)
|
|
76
79
|
* @param dest output array, size (nq, 16)
|
|
77
80
|
*/
|
|
78
|
-
void pq4_pack_LUT(
|
|
79
|
-
int nq, int nsq,
|
|
80
|
-
const uint8_t *src,
|
|
81
|
-
uint8_t *dest
|
|
82
|
-
);
|
|
83
|
-
|
|
84
|
-
|
|
81
|
+
void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
|
|
85
82
|
|
|
86
83
|
/** Loop over database elements and accumulate results into result handler
|
|
87
84
|
*
|
|
@@ -92,16 +89,15 @@ void pq4_pack_LUT(
|
|
|
92
89
|
* @param codes packed codes array
|
|
93
90
|
* @param LUT packed look-up table
|
|
94
91
|
*/
|
|
95
|
-
template<class ResultHandler>
|
|
92
|
+
template <class ResultHandler>
|
|
96
93
|
void pq4_accumulate_loop(
|
|
97
94
|
int nq,
|
|
98
|
-
size_t nb,
|
|
95
|
+
size_t nb,
|
|
96
|
+
int bbs,
|
|
99
97
|
int nsq,
|
|
100
|
-
const uint8_t
|
|
101
|
-
const uint8_t
|
|
102
|
-
ResultHandler
|
|
103
|
-
|
|
104
|
-
|
|
98
|
+
const uint8_t* codes,
|
|
99
|
+
const uint8_t* LUT,
|
|
100
|
+
ResultHandler& res);
|
|
105
101
|
|
|
106
102
|
/* qbs versions, supported only for bbs=32.
|
|
107
103
|
*
|
|
@@ -115,8 +111,7 @@ void pq4_accumulate_loop(
|
|
|
115
111
|
* nq = 3 + 2 + 2 + 1 = 6 queries. For a given total block size, the optimal
|
|
116
112
|
* decomposition into sub-blocks (measured empirically) is given by
|
|
117
113
|
* preferred_qbs().
|
|
118
|
-
*/
|
|
119
|
-
|
|
114
|
+
*/
|
|
120
115
|
|
|
121
116
|
/* compute the number of queries from a base-16 decomposition */
|
|
122
117
|
int pq4_qbs_to_nq(int qbs);
|
|
@@ -133,18 +128,16 @@ int pq4_preferred_qbs(int nq);
|
|
|
133
128
|
* @param dest output array, size (nq, 16)
|
|
134
129
|
* @return nq
|
|
135
130
|
*/
|
|
136
|
-
int pq4_pack_LUT_qbs(
|
|
137
|
-
int fqbs, int nsq,
|
|
138
|
-
const uint8_t *src,
|
|
139
|
-
uint8_t *dest
|
|
140
|
-
);
|
|
131
|
+
int pq4_pack_LUT_qbs(int fqbs, int nsq, const uint8_t* src, uint8_t* dest);
|
|
141
132
|
|
|
142
|
-
/** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
|
|
133
|
+
/** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
|
|
134
|
+
*/
|
|
143
135
|
int pq4_pack_LUT_qbs_q_map(
|
|
144
|
-
int qbs,
|
|
145
|
-
|
|
146
|
-
const
|
|
147
|
-
|
|
136
|
+
int qbs,
|
|
137
|
+
int nsq,
|
|
138
|
+
const uint8_t* src,
|
|
139
|
+
const int* q_map,
|
|
140
|
+
uint8_t* dest);
|
|
148
141
|
|
|
149
142
|
/** Run accumulation loop.
|
|
150
143
|
*
|
|
@@ -155,15 +148,13 @@ int pq4_pack_LUT_qbs_q_map(
|
|
|
155
148
|
* @param LUT look-up table (packed)
|
|
156
149
|
* @param res call-back for the resutls
|
|
157
150
|
*/
|
|
158
|
-
template<class ResultHandler>
|
|
151
|
+
template <class ResultHandler>
|
|
159
152
|
void pq4_accumulate_loop_qbs(
|
|
160
153
|
int qbs,
|
|
161
154
|
size_t nb,
|
|
162
155
|
int nsq,
|
|
163
|
-
const uint8_t
|
|
164
|
-
const uint8_t
|
|
165
|
-
ResultHandler
|
|
166
|
-
|
|
167
|
-
|
|
156
|
+
const uint8_t* codes,
|
|
157
|
+
const uint8_t* LUT,
|
|
158
|
+
ResultHandler& res);
|
|
168
159
|
|
|
169
|
-
} // namespace faiss
|
|
160
|
+
} // namespace faiss
|
|
@@ -5,16 +5,13 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/impl/pq4_fast_scan.h>
|
|
10
9
|
|
|
11
10
|
#include <faiss/impl/FaissAssert.h>
|
|
12
11
|
#include <faiss/impl/simd_result_handlers.h>
|
|
13
12
|
|
|
14
|
-
|
|
15
13
|
namespace faiss {
|
|
16
14
|
|
|
17
|
-
|
|
18
15
|
using namespace simd_result_handlers;
|
|
19
16
|
|
|
20
17
|
/***************************************************************
|
|
@@ -29,18 +26,17 @@ namespace {
|
|
|
29
26
|
* writes results in a ResultHandler
|
|
30
27
|
*/
|
|
31
28
|
|
|
32
|
-
template<int NQ, int BB, class ResultHandler>
|
|
29
|
+
template <int NQ, int BB, class ResultHandler>
|
|
33
30
|
void kernel_accumulate_block(
|
|
34
31
|
int nsq,
|
|
35
|
-
const uint8_t
|
|
36
|
-
const uint8_t
|
|
37
|
-
ResultHandler
|
|
38
|
-
{
|
|
32
|
+
const uint8_t* codes,
|
|
33
|
+
const uint8_t* LUT,
|
|
34
|
+
ResultHandler& res) {
|
|
39
35
|
// distance accumulators
|
|
40
36
|
simd16uint16 accu[NQ][BB][4];
|
|
41
37
|
|
|
42
|
-
for(int q = 0; q < NQ; q++) {
|
|
43
|
-
for(int b = 0; b < BB; b++) {
|
|
38
|
+
for (int q = 0; q < NQ; q++) {
|
|
39
|
+
for (int b = 0; b < BB; b++) {
|
|
44
40
|
accu[q][b][0].clear();
|
|
45
41
|
accu[q][b][1].clear();
|
|
46
42
|
accu[q][b][2].clear();
|
|
@@ -48,9 +44,9 @@ void kernel_accumulate_block(
|
|
|
48
44
|
}
|
|
49
45
|
}
|
|
50
46
|
|
|
51
|
-
for(int sq = 0; sq < nsq; sq += 2) {
|
|
47
|
+
for (int sq = 0; sq < nsq; sq += 2) {
|
|
52
48
|
simd32uint8 lut_cache[NQ];
|
|
53
|
-
for(int q = 0; q < NQ; q++) {
|
|
49
|
+
for (int q = 0; q < NQ; q++) {
|
|
54
50
|
lut_cache[q] = simd32uint8(LUT);
|
|
55
51
|
LUT += 32;
|
|
56
52
|
}
|
|
@@ -62,7 +58,7 @@ void kernel_accumulate_block(
|
|
|
62
58
|
simd32uint8 chi = simd32uint8(simd16uint16(c) >> 4) & mask;
|
|
63
59
|
simd32uint8 clo = c & mask;
|
|
64
60
|
|
|
65
|
-
for(int q = 0; q < NQ; q++) {
|
|
61
|
+
for (int q = 0; q < NQ; q++) {
|
|
66
62
|
simd32uint8 lut = lut_cache[q];
|
|
67
63
|
simd32uint8 res0 = lut.lookup_2_lanes(clo);
|
|
68
64
|
simd32uint8 res1 = lut.lookup_2_lanes(chi);
|
|
@@ -76,9 +72,8 @@ void kernel_accumulate_block(
|
|
|
76
72
|
}
|
|
77
73
|
}
|
|
78
74
|
|
|
79
|
-
for(int q = 0; q < NQ; q++) {
|
|
75
|
+
for (int q = 0; q < NQ; q++) {
|
|
80
76
|
for (int b = 0; b < BB; b++) {
|
|
81
|
-
|
|
82
77
|
accu[q][b][0] -= accu[q][b][1] << 8;
|
|
83
78
|
simd16uint16 dis0 = combine2x2(accu[q][b][0], accu[q][b][1]);
|
|
84
79
|
|
|
@@ -88,19 +83,15 @@ void kernel_accumulate_block(
|
|
|
88
83
|
res.handle(q, b, dis0, dis1);
|
|
89
84
|
}
|
|
90
85
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
86
|
}
|
|
94
87
|
|
|
95
|
-
|
|
96
|
-
template<int NQ, int BB, class ResultHandler>
|
|
88
|
+
template <int NQ, int BB, class ResultHandler>
|
|
97
89
|
void accumulate_fixed_blocks(
|
|
98
90
|
size_t nb,
|
|
99
91
|
int nsq,
|
|
100
|
-
const uint8_t
|
|
101
|
-
const uint8_t
|
|
102
|
-
ResultHandler
|
|
103
|
-
{
|
|
92
|
+
const uint8_t* codes,
|
|
93
|
+
const uint8_t* LUT,
|
|
94
|
+
ResultHandler& res) {
|
|
104
95
|
constexpr int bbs = 32 * BB;
|
|
105
96
|
for (int64_t j0 = 0; j0 < nb; j0 += bbs) {
|
|
106
97
|
FixedStorageHandler<NQ, 2 * BB> res2;
|
|
@@ -111,29 +102,28 @@ void accumulate_fixed_blocks(
|
|
|
111
102
|
}
|
|
112
103
|
}
|
|
113
104
|
|
|
114
|
-
|
|
115
105
|
} // anonymous namespace
|
|
116
106
|
|
|
117
|
-
template<class ResultHandler>
|
|
107
|
+
template <class ResultHandler>
|
|
118
108
|
void pq4_accumulate_loop(
|
|
119
109
|
int nq,
|
|
120
|
-
size_t nb,
|
|
110
|
+
size_t nb,
|
|
111
|
+
int bbs,
|
|
121
112
|
int nsq,
|
|
122
|
-
const uint8_t
|
|
123
|
-
const uint8_t
|
|
124
|
-
ResultHandler
|
|
125
|
-
{
|
|
113
|
+
const uint8_t* codes,
|
|
114
|
+
const uint8_t* LUT,
|
|
115
|
+
ResultHandler& res) {
|
|
126
116
|
FAISS_THROW_IF_NOT(is_aligned_pointer(codes));
|
|
127
117
|
FAISS_THROW_IF_NOT(is_aligned_pointer(LUT));
|
|
128
118
|
FAISS_THROW_IF_NOT(bbs % 32 == 0);
|
|
129
119
|
FAISS_THROW_IF_NOT(nb % bbs == 0);
|
|
130
120
|
|
|
131
|
-
#define DISPATCH(NQ, BB)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
121
|
+
#define DISPATCH(NQ, BB) \
|
|
122
|
+
case NQ * 1000 + BB: \
|
|
123
|
+
accumulate_fixed_blocks<NQ, BB>(nb, nsq, codes, LUT, res); \
|
|
124
|
+
break
|
|
135
125
|
|
|
136
|
-
switch(nq * 1000 + bbs / 32) {
|
|
126
|
+
switch (nq * 1000 + bbs / 32) {
|
|
137
127
|
DISPATCH(1, 1);
|
|
138
128
|
DISPATCH(1, 2);
|
|
139
129
|
DISPATCH(1, 3);
|
|
@@ -143,26 +133,28 @@ void pq4_accumulate_loop(
|
|
|
143
133
|
DISPATCH(2, 2);
|
|
144
134
|
DISPATCH(3, 1);
|
|
145
135
|
DISPATCH(4, 1);
|
|
146
|
-
|
|
147
|
-
|
|
136
|
+
default:
|
|
137
|
+
FAISS_THROW_FMT("nq=%d bbs=%d not instantiated", nq, bbs);
|
|
148
138
|
}
|
|
149
139
|
#undef DISPATCH
|
|
150
|
-
|
|
151
140
|
}
|
|
152
141
|
|
|
153
142
|
// explicit template instantiations
|
|
154
143
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
144
|
+
#define INSTANTIATE_ACCUMULATE(TH, C, with_id_map) \
|
|
145
|
+
template void pq4_accumulate_loop<TH<C, with_id_map>>( \
|
|
146
|
+
int, \
|
|
147
|
+
size_t, \
|
|
148
|
+
int, \
|
|
149
|
+
int, \
|
|
150
|
+
const uint8_t*, \
|
|
151
|
+
const uint8_t*, \
|
|
152
|
+
TH<C, with_id_map>&);
|
|
153
|
+
|
|
154
|
+
#define INSTANTIATE_3(C, with_id_map) \
|
|
155
|
+
INSTANTIATE_ACCUMULATE(SingleResultHandler, C, with_id_map) \
|
|
156
|
+
INSTANTIATE_ACCUMULATE(HeapHandler, C, with_id_map) \
|
|
157
|
+
INSTANTIATE_ACCUMULATE(ReservoirHandler, C, with_id_map)
|
|
166
158
|
|
|
167
159
|
using Csi = CMax<uint16_t, int>;
|
|
168
160
|
INSTANTIATE_3(Csi, false);
|
|
@@ -174,7 +166,4 @@ INSTANTIATE_3(Csl, true);
|
|
|
174
166
|
using CslMin = CMin<uint16_t, int64_t>;
|
|
175
167
|
INSTANTIATE_3(CslMin, true);
|
|
176
168
|
|
|
177
|
-
|
|
178
|
-
|
|
179
169
|
} // namespace faiss
|
|
180
|
-
|
|
@@ -8,13 +8,11 @@
|
|
|
8
8
|
#include <faiss/impl/pq4_fast_scan.h>
|
|
9
9
|
|
|
10
10
|
#include <faiss/impl/FaissAssert.h>
|
|
11
|
-
#include <faiss/utils/simdlib.h>
|
|
12
11
|
#include <faiss/impl/simd_result_handlers.h>
|
|
13
|
-
|
|
12
|
+
#include <faiss/utils/simdlib.h>
|
|
14
13
|
|
|
15
14
|
namespace faiss {
|
|
16
15
|
|
|
17
|
-
|
|
18
16
|
using namespace simd_result_handlers;
|
|
19
17
|
|
|
20
18
|
/************************************************************
|
|
@@ -29,27 +27,25 @@ namespace {
|
|
|
29
27
|
* writes results in a ResultHandler
|
|
30
28
|
*/
|
|
31
29
|
|
|
32
|
-
template<int NQ, class ResultHandler>
|
|
30
|
+
template <int NQ, class ResultHandler>
|
|
33
31
|
void kernel_accumulate_block(
|
|
34
32
|
int nsq,
|
|
35
|
-
const uint8_t
|
|
36
|
-
const uint8_t
|
|
37
|
-
ResultHandler
|
|
38
|
-
{
|
|
33
|
+
const uint8_t* codes,
|
|
34
|
+
const uint8_t* LUT,
|
|
35
|
+
ResultHandler& res) {
|
|
39
36
|
// dummy alloc to keep the windows compiler happy
|
|
40
37
|
constexpr int NQA = NQ > 0 ? NQ : 1;
|
|
41
38
|
// distance accumulators
|
|
42
39
|
simd16uint16 accu[NQA][4];
|
|
43
40
|
|
|
44
|
-
for(int q = 0; q < NQ; q++) {
|
|
45
|
-
for(int b = 0; b < 4; b++) {
|
|
41
|
+
for (int q = 0; q < NQ; q++) {
|
|
42
|
+
for (int b = 0; b < 4; b++) {
|
|
46
43
|
accu[q][b].clear();
|
|
47
44
|
}
|
|
48
45
|
}
|
|
49
46
|
|
|
50
47
|
// _mm_prefetch(codes + 768, 0);
|
|
51
|
-
for(int sq = 0; sq < nsq; sq += 2) {
|
|
52
|
-
|
|
48
|
+
for (int sq = 0; sq < nsq; sq += 2) {
|
|
53
49
|
// prefetch
|
|
54
50
|
simd32uint8 c(codes);
|
|
55
51
|
codes += 32;
|
|
@@ -59,7 +55,7 @@ void kernel_accumulate_block(
|
|
|
59
55
|
simd32uint8 chi = simd32uint8(simd16uint16(c) >> 4) & mask;
|
|
60
56
|
simd32uint8 clo = c & mask;
|
|
61
57
|
|
|
62
|
-
for(int q = 0; q < NQ; q++) {
|
|
58
|
+
for (int q = 0; q < NQ; q++) {
|
|
63
59
|
// load LUTs for 2 quantizers
|
|
64
60
|
simd32uint8 lut(LUT);
|
|
65
61
|
LUT += 32;
|
|
@@ -75,26 +71,23 @@ void kernel_accumulate_block(
|
|
|
75
71
|
}
|
|
76
72
|
}
|
|
77
73
|
|
|
78
|
-
for(int q = 0; q < NQ; q++) {
|
|
74
|
+
for (int q = 0; q < NQ; q++) {
|
|
79
75
|
accu[q][0] -= accu[q][1] << 8;
|
|
80
76
|
simd16uint16 dis0 = combine2x2(accu[q][0], accu[q][1]);
|
|
81
77
|
accu[q][2] -= accu[q][3] << 8;
|
|
82
78
|
simd16uint16 dis1 = combine2x2(accu[q][2], accu[q][3]);
|
|
83
79
|
res.handle(q, 0, dis0, dis1);
|
|
84
80
|
}
|
|
85
|
-
|
|
86
81
|
}
|
|
87
82
|
|
|
88
83
|
// handle at most 4 blocks of queries
|
|
89
|
-
template<int QBS, class ResultHandler>
|
|
84
|
+
template <int QBS, class ResultHandler>
|
|
90
85
|
void accumulate_q_4step(
|
|
91
86
|
size_t ntotal2,
|
|
92
87
|
int nsq,
|
|
93
|
-
const uint8_t
|
|
94
|
-
const uint8_t
|
|
95
|
-
ResultHandler
|
|
96
|
-
{
|
|
97
|
-
|
|
88
|
+
const uint8_t* codes,
|
|
89
|
+
const uint8_t* LUT0,
|
|
90
|
+
ResultHandler& res) {
|
|
98
91
|
constexpr int Q1 = QBS & 15;
|
|
99
92
|
constexpr int Q2 = (QBS >> 4) & 15;
|
|
100
93
|
constexpr int Q3 = (QBS >> 8) & 15;
|
|
@@ -103,7 +96,7 @@ void accumulate_q_4step(
|
|
|
103
96
|
|
|
104
97
|
for (int64_t j0 = 0; j0 < ntotal2; j0 += 32) {
|
|
105
98
|
FixedStorageHandler<SQ, 2> res2;
|
|
106
|
-
const uint8_t
|
|
99
|
+
const uint8_t* LUT = LUT0;
|
|
107
100
|
kernel_accumulate_block<Q1>(nsq, codes, LUT, res2);
|
|
108
101
|
LUT += Q1 * nsq * 16;
|
|
109
102
|
if (Q2 > 0) {
|
|
@@ -126,134 +119,118 @@ void accumulate_q_4step(
|
|
|
126
119
|
}
|
|
127
120
|
}
|
|
128
121
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
template<int NQ, class ResultHandler>
|
|
122
|
+
template <int NQ, class ResultHandler>
|
|
133
123
|
void kernel_accumulate_block_loop(
|
|
134
124
|
size_t ntotal2,
|
|
135
125
|
int nsq,
|
|
136
|
-
const uint8_t
|
|
137
|
-
const uint8_t
|
|
138
|
-
ResultHandler
|
|
139
|
-
{
|
|
140
|
-
|
|
126
|
+
const uint8_t* codes,
|
|
127
|
+
const uint8_t* LUT,
|
|
128
|
+
ResultHandler& res) {
|
|
141
129
|
for (int64_t j0 = 0; j0 < ntotal2; j0 += 32) {
|
|
142
130
|
res.set_block_origin(0, j0);
|
|
143
|
-
kernel_accumulate_block<NQ, ResultHandler>
|
|
144
|
-
|
|
131
|
+
kernel_accumulate_block<NQ, ResultHandler>(
|
|
132
|
+
nsq, codes + j0 * nsq / 2, LUT, res);
|
|
145
133
|
}
|
|
146
|
-
|
|
147
134
|
}
|
|
148
135
|
|
|
149
136
|
// non-template version of accumulate kernel -- dispatches dynamically
|
|
150
|
-
template<class ResultHandler>
|
|
137
|
+
template <class ResultHandler>
|
|
151
138
|
void accumulate(
|
|
152
139
|
int nq,
|
|
153
140
|
size_t ntotal2,
|
|
154
141
|
int nsq,
|
|
155
|
-
const uint8_t
|
|
156
|
-
const uint8_t
|
|
157
|
-
ResultHandler
|
|
158
|
-
{
|
|
159
|
-
|
|
142
|
+
const uint8_t* codes,
|
|
143
|
+
const uint8_t* LUT,
|
|
144
|
+
ResultHandler& res) {
|
|
160
145
|
assert(nsq % 2 == 0);
|
|
161
146
|
assert(is_aligned_pointer(codes));
|
|
162
147
|
assert(is_aligned_pointer(LUT));
|
|
163
148
|
|
|
164
|
-
#define DISPATCH(NQ)
|
|
165
|
-
case NQ:
|
|
166
|
-
kernel_accumulate_block_loop<NQ, ResultHandler>
|
|
167
|
-
|
|
168
|
-
|
|
149
|
+
#define DISPATCH(NQ) \
|
|
150
|
+
case NQ: \
|
|
151
|
+
kernel_accumulate_block_loop<NQ, ResultHandler>( \
|
|
152
|
+
ntotal2, nsq, codes, LUT, res); \
|
|
153
|
+
return
|
|
169
154
|
|
|
170
|
-
switch(nq) {
|
|
155
|
+
switch (nq) {
|
|
171
156
|
DISPATCH(1);
|
|
172
157
|
DISPATCH(2);
|
|
173
158
|
DISPATCH(3);
|
|
174
159
|
DISPATCH(4);
|
|
175
160
|
}
|
|
176
|
-
FAISS_THROW_FMT("accumulate nq=%d not instanciated",
|
|
177
|
-
nq);
|
|
161
|
+
FAISS_THROW_FMT("accumulate nq=%d not instanciated", nq);
|
|
178
162
|
|
|
179
163
|
#undef DISPATCH
|
|
180
164
|
}
|
|
181
165
|
|
|
166
|
+
} // namespace
|
|
182
167
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
template<class ResultHandler>
|
|
168
|
+
template <class ResultHandler>
|
|
188
169
|
void pq4_accumulate_loop_qbs(
|
|
189
170
|
int qbs,
|
|
190
171
|
size_t ntotal2,
|
|
191
172
|
int nsq,
|
|
192
|
-
const uint8_t
|
|
193
|
-
const uint8_t
|
|
194
|
-
ResultHandler
|
|
195
|
-
{
|
|
196
|
-
|
|
173
|
+
const uint8_t* codes,
|
|
174
|
+
const uint8_t* LUT0,
|
|
175
|
+
ResultHandler& res) {
|
|
197
176
|
assert(nsq % 2 == 0);
|
|
198
177
|
assert(is_aligned_pointer(codes));
|
|
199
178
|
assert(is_aligned_pointer(LUT0));
|
|
200
179
|
|
|
201
180
|
// try out optimized versions
|
|
202
|
-
switch(qbs) {
|
|
203
|
-
#define DISPATCH(QBS)
|
|
204
|
-
|
|
205
|
-
|
|
181
|
+
switch (qbs) {
|
|
182
|
+
#define DISPATCH(QBS) \
|
|
183
|
+
case QBS: \
|
|
184
|
+
accumulate_q_4step<QBS>(ntotal2, nsq, codes, LUT0, res); \
|
|
206
185
|
return;
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
186
|
+
DISPATCH(0x3333); // 12
|
|
187
|
+
DISPATCH(0x2333); // 11
|
|
188
|
+
DISPATCH(0x2233); // 10
|
|
189
|
+
DISPATCH(0x333); // 9
|
|
190
|
+
DISPATCH(0x2223); // 9
|
|
191
|
+
DISPATCH(0x233); // 8
|
|
192
|
+
DISPATCH(0x1223); // 8
|
|
193
|
+
DISPATCH(0x223); // 7
|
|
194
|
+
DISPATCH(0x34); // 7
|
|
195
|
+
DISPATCH(0x133); // 7
|
|
196
|
+
DISPATCH(0x6); // 6
|
|
197
|
+
DISPATCH(0x33); // 6
|
|
198
|
+
DISPATCH(0x123); // 6
|
|
199
|
+
DISPATCH(0x222); // 6
|
|
200
|
+
DISPATCH(0x23); // 5
|
|
201
|
+
DISPATCH(0x5); // 5
|
|
202
|
+
DISPATCH(0x13); // 4
|
|
203
|
+
DISPATCH(0x22); // 4
|
|
204
|
+
DISPATCH(0x4); // 4
|
|
205
|
+
DISPATCH(0x3); // 3
|
|
206
|
+
DISPATCH(0x21); // 3
|
|
207
|
+
DISPATCH(0x2); // 2
|
|
208
|
+
DISPATCH(0x1); // 1
|
|
230
209
|
#undef DISPATCH
|
|
231
210
|
}
|
|
232
211
|
|
|
233
212
|
// default implementation where qbs is not known at compile time
|
|
234
213
|
|
|
235
214
|
for (int64_t j0 = 0; j0 < ntotal2; j0 += 32) {
|
|
236
|
-
const uint8_t
|
|
215
|
+
const uint8_t* LUT = LUT0;
|
|
237
216
|
int qi = qbs;
|
|
238
217
|
int i0 = 0;
|
|
239
|
-
while(qi) {
|
|
218
|
+
while (qi) {
|
|
240
219
|
int nq = qi & 15;
|
|
241
220
|
qi >>= 4;
|
|
242
221
|
res.set_block_origin(i0, j0);
|
|
243
|
-
#define DISPATCH(NQ)
|
|
244
|
-
case NQ:
|
|
245
|
-
kernel_accumulate_block<NQ, ResultHandler> \
|
|
246
|
-
(nsq, codes, LUT, res); \
|
|
222
|
+
#define DISPATCH(NQ) \
|
|
223
|
+
case NQ: \
|
|
224
|
+
kernel_accumulate_block<NQ, ResultHandler>(nsq, codes, LUT, res); \
|
|
247
225
|
break
|
|
248
|
-
switch(nq) {
|
|
226
|
+
switch (nq) {
|
|
249
227
|
DISPATCH(1);
|
|
250
228
|
DISPATCH(2);
|
|
251
229
|
DISPATCH(3);
|
|
252
230
|
DISPATCH(4);
|
|
253
231
|
#undef DISPATCH
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
nq);
|
|
232
|
+
default:
|
|
233
|
+
FAISS_THROW_FMT("accumulate nq=%d not instanciated", nq);
|
|
257
234
|
}
|
|
258
235
|
i0 += nq;
|
|
259
236
|
LUT += nq * nsq * 16;
|
|
@@ -262,14 +239,11 @@ void pq4_accumulate_loop_qbs(
|
|
|
262
239
|
}
|
|
263
240
|
}
|
|
264
241
|
|
|
265
|
-
|
|
266
|
-
|
|
267
242
|
// explicit template instantiations
|
|
268
243
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
(int, size_t, int, const uint8_t *, const uint8_t *, RH &);
|
|
244
|
+
#define INSTANTIATE_ACCUMULATE_Q(RH) \
|
|
245
|
+
template void pq4_accumulate_loop_qbs<RH>( \
|
|
246
|
+
int, size_t, int, const uint8_t*, const uint8_t*, RH&);
|
|
273
247
|
|
|
274
248
|
using Csi = CMax<uint16_t, int>;
|
|
275
249
|
INSTANTIATE_ACCUMULATE_Q(SingleResultHandler<Csi>)
|
|
@@ -295,7 +269,6 @@ INSTANTIATE_ACCUMULATE_Q(HHCsl2)
|
|
|
295
269
|
INSTANTIATE_ACCUMULATE_Q(RHCsl2)
|
|
296
270
|
INSTANTIATE_ACCUMULATE_Q(SHCsl2)
|
|
297
271
|
|
|
298
|
-
|
|
299
272
|
/***************************************************************
|
|
300
273
|
* Packing functions
|
|
301
274
|
***************************************************************/
|
|
@@ -303,7 +276,7 @@ INSTANTIATE_ACCUMULATE_Q(SHCsl2)
|
|
|
303
276
|
int pq4_qbs_to_nq(int qbs) {
|
|
304
277
|
int i0 = 0;
|
|
305
278
|
int qi = qbs;
|
|
306
|
-
while(qi) {
|
|
279
|
+
while (qi) {
|
|
307
280
|
int nq = qi & 15;
|
|
308
281
|
qi >>= 4;
|
|
309
282
|
i0 += nq;
|
|
@@ -311,29 +284,22 @@ int pq4_qbs_to_nq(int qbs) {
|
|
|
311
284
|
return i0;
|
|
312
285
|
}
|
|
313
286
|
|
|
314
|
-
|
|
315
|
-
|
|
316
287
|
void accumulate_to_mem(
|
|
317
288
|
int nq,
|
|
318
289
|
size_t ntotal2,
|
|
319
290
|
int nsq,
|
|
320
|
-
const uint8_t
|
|
321
|
-
const uint8_t
|
|
322
|
-
uint16_t* accu)
|
|
323
|
-
{
|
|
291
|
+
const uint8_t* codes,
|
|
292
|
+
const uint8_t* LUT,
|
|
293
|
+
uint16_t* accu) {
|
|
324
294
|
FAISS_THROW_IF_NOT(ntotal2 % 32 == 0);
|
|
325
295
|
StoreResultHandler handler(accu, ntotal2);
|
|
326
296
|
accumulate(nq, ntotal2, nsq, codes, LUT, handler);
|
|
327
297
|
}
|
|
328
298
|
|
|
329
|
-
|
|
330
299
|
int pq4_preferred_qbs(int n) {
|
|
331
300
|
// from timmings in P141901742, P141902828
|
|
332
301
|
static int map[12] = {
|
|
333
|
-
|
|
334
|
-
0x23, 0x33, 0x223, 0x233, 0x333,
|
|
335
|
-
0x2233, 0x2333
|
|
336
|
-
};
|
|
302
|
+
0, 1, 2, 3, 0x13, 0x23, 0x33, 0x223, 0x233, 0x333, 0x2233, 0x2333};
|
|
337
303
|
if (n <= 11) {
|
|
338
304
|
return map[n];
|
|
339
305
|
} else if (n <= 24) {
|
|
@@ -348,7 +314,4 @@ int pq4_preferred_qbs(int n) {
|
|
|
348
314
|
}
|
|
349
315
|
}
|
|
350
316
|
|
|
351
|
-
|
|
352
|
-
|
|
353
317
|
} // namespace faiss
|
|
354
|
-
|