faiss 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +1 -2
- data/vendor/faiss/faiss/Clustering.cpp +39 -22
- data/vendor/faiss/faiss/Clustering.h +40 -21
- data/vendor/faiss/faiss/IVFlib.cpp +26 -12
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +40 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
- data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
- data/vendor/faiss/faiss/IndexHNSW.h +62 -49
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
- data/vendor/faiss/faiss/IndexIVF.h +46 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
- data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +11 -11
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
- data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
- data/vendor/faiss/faiss/impl/HNSW.h +52 -30
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
- data/vendor/faiss/faiss/impl/io.cpp +23 -15
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
- data/vendor/faiss/faiss/index_factory.cpp +41 -20
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +147 -123
- data/vendor/faiss/faiss/utils/distances.h +86 -9
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +120 -7
- data/vendor/faiss/faiss/utils/utils.h +60 -20
- metadata +23 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -5,14 +5,13 @@
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
7
|
|
8
|
-
// -*- c++ -*-
|
9
|
-
|
10
8
|
/*
|
11
9
|
* Implementation of Hamming related functions (distances, smallest distance
|
12
10
|
* selection with regular heap|radix and probabilistic heap|radix.
|
13
11
|
*
|
14
12
|
* IMPLEMENTATION NOTES
|
15
|
-
*
|
13
|
+
* Optimal speed is typically obtained for vector sizes of multiples of 64
|
14
|
+
* bits.
|
16
15
|
*
|
17
16
|
* hamdis_t is used for distances because at this time
|
18
17
|
* it is not clear how we will need to balance
|
@@ -20,15 +19,13 @@
|
|
20
19
|
* - memory usage
|
21
20
|
* - cache-misses when dealing with large volumes of data (lower bits is better)
|
22
21
|
*
|
23
|
-
* The hamdis_t should optimally be compatibe with one of the Torch Storage
|
24
|
-
* (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
|
25
22
|
*/
|
26
23
|
|
27
24
|
#include <faiss/utils/hamming.h>
|
28
25
|
|
29
|
-
#include <math.h>
|
30
|
-
#include <stdio.h>
|
31
26
|
#include <algorithm>
|
27
|
+
#include <cmath>
|
28
|
+
#include <cstdio>
|
32
29
|
#include <memory>
|
33
30
|
#include <vector>
|
34
31
|
|
@@ -38,8 +35,6 @@
|
|
38
35
|
#include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
|
39
36
|
#include <faiss/utils/utils.h>
|
40
37
|
|
41
|
-
static const size_t BLOCKSIZE_QUERY = 8192;
|
42
|
-
|
43
38
|
namespace faiss {
|
44
39
|
|
45
40
|
size_t hamming_batch_size = 65536;
|
@@ -165,9 +160,11 @@ size_t match_hamming_thres(
|
|
165
160
|
return posm;
|
166
161
|
}
|
167
162
|
|
163
|
+
namespace {
|
164
|
+
|
168
165
|
/* Return closest neighbors w.r.t Hamming distance, using a heap. */
|
169
166
|
template <class HammingComputer>
|
170
|
-
|
167
|
+
void hammings_knn_hc(
|
171
168
|
int bytes_per_code,
|
172
169
|
int_maxheap_array_t* __restrict ha,
|
173
170
|
const uint8_t* __restrict bs1,
|
@@ -234,7 +231,7 @@ static void hammings_knn_hc(
|
|
234
231
|
|
235
232
|
/* Return closest neighbors w.r.t Hamming distance, using max count. */
|
236
233
|
template <class HammingComputer>
|
237
|
-
|
234
|
+
void hammings_knn_mc(
|
238
235
|
int bytes_per_code,
|
239
236
|
const uint8_t* __restrict a,
|
240
237
|
const uint8_t* __restrict b,
|
@@ -272,10 +269,10 @@ static void hammings_knn_mc(
|
|
272
269
|
HCounterState<HammingComputer>& csi = cs[i];
|
273
270
|
|
274
271
|
int nres = 0;
|
275
|
-
for (int
|
276
|
-
for (int l = 0; l < csi.counters[
|
277
|
-
labels[i * k + nres] = csi.ids_per_dis[
|
278
|
-
distances[i * k + nres] =
|
272
|
+
for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) {
|
273
|
+
for (int l = 0; l < csi.counters[b_2] && nres < k; l++) {
|
274
|
+
labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l];
|
275
|
+
distances[i * k + nres] = b_2;
|
279
276
|
nres++;
|
280
277
|
}
|
281
278
|
}
|
@@ -287,6 +284,63 @@ static void hammings_knn_mc(
|
|
287
284
|
}
|
288
285
|
}
|
289
286
|
|
287
|
+
template <class HammingComputer>
|
288
|
+
void hamming_range_search(
|
289
|
+
const uint8_t* a,
|
290
|
+
const uint8_t* b,
|
291
|
+
size_t na,
|
292
|
+
size_t nb,
|
293
|
+
int radius,
|
294
|
+
size_t code_size,
|
295
|
+
RangeSearchResult* res) {
|
296
|
+
#pragma omp parallel
|
297
|
+
{
|
298
|
+
RangeSearchPartialResult pres(res);
|
299
|
+
|
300
|
+
#pragma omp for
|
301
|
+
for (int64_t i = 0; i < na; i++) {
|
302
|
+
HammingComputer hc(a + i * code_size, code_size);
|
303
|
+
const uint8_t* yi = b;
|
304
|
+
RangeQueryResult& qres = pres.new_result(i);
|
305
|
+
|
306
|
+
for (size_t j = 0; j < nb; j++) {
|
307
|
+
int dis = hc.hamming(yi);
|
308
|
+
if (dis < radius) {
|
309
|
+
qres.add(dis, j);
|
310
|
+
}
|
311
|
+
yi += code_size;
|
312
|
+
}
|
313
|
+
}
|
314
|
+
pres.finalize();
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
struct Run_hammings_knn_hc {
|
319
|
+
using T = void;
|
320
|
+
template <class HammingComputer, class... Types>
|
321
|
+
void f(Types... args) {
|
322
|
+
hammings_knn_hc<HammingComputer>(args...);
|
323
|
+
}
|
324
|
+
};
|
325
|
+
|
326
|
+
struct Run_hammings_knn_mc {
|
327
|
+
using T = void;
|
328
|
+
template <class HammingComputer, class... Types>
|
329
|
+
void f(Types... args) {
|
330
|
+
hammings_knn_mc<HammingComputer>(args...);
|
331
|
+
}
|
332
|
+
};
|
333
|
+
|
334
|
+
struct Run_hamming_range_search {
|
335
|
+
using T = void;
|
336
|
+
template <class HammingComputer, class... Types>
|
337
|
+
void f(Types... args) {
|
338
|
+
hamming_range_search<HammingComputer>(args...);
|
339
|
+
}
|
340
|
+
};
|
341
|
+
|
342
|
+
} // namespace
|
343
|
+
|
290
344
|
/* Functions to maps vectors to bits. Assume proper allocation done beforehand,
|
291
345
|
meaning that b should be be able to receive as many bits as x may produce. */
|
292
346
|
|
@@ -310,7 +364,7 @@ void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) {
|
|
310
364
|
}
|
311
365
|
|
312
366
|
/* Same but for n vectors.
|
313
|
-
Ensure that the
|
367
|
+
Ensure that the output b is byte-aligned (pad with 0s). */
|
314
368
|
void fvecs2bitvecs(
|
315
369
|
const float* __restrict x,
|
316
370
|
uint8_t* __restrict b,
|
@@ -437,28 +491,9 @@ void hammings_knn_hc(
|
|
437
491
|
size_t ncodes,
|
438
492
|
int order,
|
439
493
|
ApproxTopK_mode_t approx_topk_mode) {
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
4, ha, a, b, nb, order, true, approx_topk_mode);
|
444
|
-
break;
|
445
|
-
case 8:
|
446
|
-
hammings_knn_hc<faiss::HammingComputer8>(
|
447
|
-
8, ha, a, b, nb, order, true, approx_topk_mode);
|
448
|
-
break;
|
449
|
-
case 16:
|
450
|
-
hammings_knn_hc<faiss::HammingComputer16>(
|
451
|
-
16, ha, a, b, nb, order, true, approx_topk_mode);
|
452
|
-
break;
|
453
|
-
case 32:
|
454
|
-
hammings_knn_hc<faiss::HammingComputer32>(
|
455
|
-
32, ha, a, b, nb, order, true, approx_topk_mode);
|
456
|
-
break;
|
457
|
-
default:
|
458
|
-
hammings_knn_hc<faiss::HammingComputerDefault>(
|
459
|
-
ncodes, ha, a, b, nb, order, true, approx_topk_mode);
|
460
|
-
break;
|
461
|
-
}
|
494
|
+
Run_hammings_knn_hc r;
|
495
|
+
dispatch_HammingComputer(
|
496
|
+
ncodes, r, ncodes, ha, a, b, nb, order, true, approx_topk_mode);
|
462
497
|
}
|
463
498
|
|
464
499
|
void hammings_knn_mc(
|
@@ -470,58 +505,9 @@ void hammings_knn_mc(
|
|
470
505
|
size_t ncodes,
|
471
506
|
int32_t* __restrict distances,
|
472
507
|
int64_t* __restrict labels) {
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
4, a, b, na, nb, k, distances, labels);
|
477
|
-
break;
|
478
|
-
case 8:
|
479
|
-
hammings_knn_mc<faiss::HammingComputer8>(
|
480
|
-
8, a, b, na, nb, k, distances, labels);
|
481
|
-
break;
|
482
|
-
case 16:
|
483
|
-
hammings_knn_mc<faiss::HammingComputer16>(
|
484
|
-
16, a, b, na, nb, k, distances, labels);
|
485
|
-
break;
|
486
|
-
case 32:
|
487
|
-
hammings_knn_mc<faiss::HammingComputer32>(
|
488
|
-
32, a, b, na, nb, k, distances, labels);
|
489
|
-
break;
|
490
|
-
default:
|
491
|
-
hammings_knn_mc<faiss::HammingComputerDefault>(
|
492
|
-
ncodes, a, b, na, nb, k, distances, labels);
|
493
|
-
break;
|
494
|
-
}
|
495
|
-
}
|
496
|
-
template <class HammingComputer>
|
497
|
-
static void hamming_range_search_template(
|
498
|
-
const uint8_t* a,
|
499
|
-
const uint8_t* b,
|
500
|
-
size_t na,
|
501
|
-
size_t nb,
|
502
|
-
int radius,
|
503
|
-
size_t code_size,
|
504
|
-
RangeSearchResult* res) {
|
505
|
-
#pragma omp parallel
|
506
|
-
{
|
507
|
-
RangeSearchPartialResult pres(res);
|
508
|
-
|
509
|
-
#pragma omp for
|
510
|
-
for (int64_t i = 0; i < na; i++) {
|
511
|
-
HammingComputer hc(a + i * code_size, code_size);
|
512
|
-
const uint8_t* yi = b;
|
513
|
-
RangeQueryResult& qres = pres.new_result(i);
|
514
|
-
|
515
|
-
for (size_t j = 0; j < nb; j++) {
|
516
|
-
int dis = hc.hamming(yi);
|
517
|
-
if (dis < radius) {
|
518
|
-
qres.add(dis, j);
|
519
|
-
}
|
520
|
-
yi += code_size;
|
521
|
-
}
|
522
|
-
}
|
523
|
-
pres.finalize();
|
524
|
-
}
|
508
|
+
Run_hammings_knn_mc r;
|
509
|
+
dispatch_HammingComputer(
|
510
|
+
ncodes, r, ncodes, a, b, na, nb, k, distances, labels);
|
525
511
|
}
|
526
512
|
|
527
513
|
void hamming_range_search(
|
@@ -532,27 +518,9 @@ void hamming_range_search(
|
|
532
518
|
int radius,
|
533
519
|
size_t code_size,
|
534
520
|
RangeSearchResult* result) {
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
switch (code_size) {
|
539
|
-
case 4:
|
540
|
-
HC(HammingComputer4);
|
541
|
-
break;
|
542
|
-
case 8:
|
543
|
-
HC(HammingComputer8);
|
544
|
-
break;
|
545
|
-
case 16:
|
546
|
-
HC(HammingComputer16);
|
547
|
-
break;
|
548
|
-
case 32:
|
549
|
-
HC(HammingComputer32);
|
550
|
-
break;
|
551
|
-
default:
|
552
|
-
HC(HammingComputerDefault);
|
553
|
-
break;
|
554
|
-
}
|
555
|
-
#undef HC
|
521
|
+
Run_hamming_range_search r;
|
522
|
+
dispatch_HammingComputer(
|
523
|
+
code_size, r, a, b, na, nb, radius, code_size, result);
|
556
524
|
}
|
557
525
|
|
558
526
|
/* Count number of matches given a max threshold */
|
@@ -711,4 +679,88 @@ void generalized_hammings_knn_hc(
|
|
711
679
|
ha->reorder();
|
712
680
|
}
|
713
681
|
|
682
|
+
void pack_bitstrings(
|
683
|
+
size_t n,
|
684
|
+
size_t M,
|
685
|
+
int nbit,
|
686
|
+
const int32_t* unpacked,
|
687
|
+
uint8_t* packed,
|
688
|
+
size_t code_size) {
|
689
|
+
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
|
690
|
+
#pragma omp parallel for if (n > 1000)
|
691
|
+
for (int64_t i = 0; i < n; i++) {
|
692
|
+
const int32_t* in = unpacked + i * M;
|
693
|
+
uint8_t* out = packed + i * code_size;
|
694
|
+
BitstringWriter wr(out, code_size);
|
695
|
+
for (int j = 0; j < M; j++) {
|
696
|
+
wr.write(in[j], nbit);
|
697
|
+
}
|
698
|
+
}
|
699
|
+
}
|
700
|
+
|
701
|
+
void pack_bitstrings(
|
702
|
+
size_t n,
|
703
|
+
size_t M,
|
704
|
+
const int32_t* nbit,
|
705
|
+
const int32_t* unpacked,
|
706
|
+
uint8_t* packed,
|
707
|
+
size_t code_size) {
|
708
|
+
int totbit = 0;
|
709
|
+
for (int j = 0; j < M; j++) {
|
710
|
+
totbit += nbit[j];
|
711
|
+
}
|
712
|
+
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
|
713
|
+
#pragma omp parallel for if (n > 1000)
|
714
|
+
for (int64_t i = 0; i < n; i++) {
|
715
|
+
const int32_t* in = unpacked + i * M;
|
716
|
+
uint8_t* out = packed + i * code_size;
|
717
|
+
BitstringWriter wr(out, code_size);
|
718
|
+
for (int j = 0; j < M; j++) {
|
719
|
+
wr.write(in[j], nbit[j]);
|
720
|
+
}
|
721
|
+
}
|
722
|
+
}
|
723
|
+
|
724
|
+
void unpack_bitstrings(
|
725
|
+
size_t n,
|
726
|
+
size_t M,
|
727
|
+
int nbit,
|
728
|
+
const uint8_t* packed,
|
729
|
+
size_t code_size,
|
730
|
+
int32_t* unpacked) {
|
731
|
+
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
|
732
|
+
#pragma omp parallel for if (n > 1000)
|
733
|
+
for (int64_t i = 0; i < n; i++) {
|
734
|
+
const uint8_t* in = packed + i * code_size;
|
735
|
+
int32_t* out = unpacked + i * M;
|
736
|
+
BitstringReader rd(in, code_size);
|
737
|
+
for (int j = 0; j < M; j++) {
|
738
|
+
out[j] = rd.read(nbit);
|
739
|
+
}
|
740
|
+
}
|
741
|
+
}
|
742
|
+
|
743
|
+
void unpack_bitstrings(
|
744
|
+
size_t n,
|
745
|
+
size_t M,
|
746
|
+
const int32_t* nbit,
|
747
|
+
const uint8_t* packed,
|
748
|
+
size_t code_size,
|
749
|
+
int32_t* unpacked) {
|
750
|
+
int totbit = 0;
|
751
|
+
for (int j = 0; j < M; j++) {
|
752
|
+
totbit += nbit[j];
|
753
|
+
}
|
754
|
+
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
|
755
|
+
#pragma omp parallel for if (n > 1000)
|
756
|
+
for (int64_t i = 0; i < n; i++) {
|
757
|
+
const uint8_t* in = packed + i * code_size;
|
758
|
+
int32_t* out = unpacked + i * M;
|
759
|
+
BitstringReader rd(in, code_size);
|
760
|
+
for (int j = 0; j < M; j++) {
|
761
|
+
out[j] = rd.read(nbit[j]);
|
762
|
+
}
|
763
|
+
}
|
764
|
+
}
|
765
|
+
|
714
766
|
} // namespace faiss
|
@@ -222,6 +222,64 @@ void generalized_hammings_knn_hc(
|
|
222
222
|
size_t code_size,
|
223
223
|
int ordered = true);
|
224
224
|
|
225
|
+
/** Pack a set of n codes of size M * nbit
|
226
|
+
*
|
227
|
+
* @param n number of codes to pack
|
228
|
+
* @param M number of elementary codes per code
|
229
|
+
* @param nbit number of bits per elementary code
|
230
|
+
* @param unpacked input unpacked codes, size (n, M)
|
231
|
+
* @param packed output packed codes, size (n, code_size)
|
232
|
+
* @param code_size should be >= ceil(M * nbit / 8)
|
233
|
+
*/
|
234
|
+
void pack_bitstrings(
|
235
|
+
size_t n,
|
236
|
+
size_t M,
|
237
|
+
int nbit,
|
238
|
+
const int32_t* unpacked,
|
239
|
+
uint8_t* packed,
|
240
|
+
size_t code_size);
|
241
|
+
|
242
|
+
/** Pack a set of n codes of variable sizes
|
243
|
+
*
|
244
|
+
* @param nbit number of bits per entry (size M)
|
245
|
+
*/
|
246
|
+
void pack_bitstrings(
|
247
|
+
size_t n,
|
248
|
+
size_t M,
|
249
|
+
const int32_t* nbits,
|
250
|
+
const int32_t* unpacked,
|
251
|
+
uint8_t* packed,
|
252
|
+
size_t code_size);
|
253
|
+
|
254
|
+
/** Unpack a set of n codes of size M * nbit
|
255
|
+
*
|
256
|
+
* @param n number of codes to pack
|
257
|
+
* @param M number of elementary codes per code
|
258
|
+
* @param nbit number of bits per elementary code
|
259
|
+
* @param unpacked input unpacked codes, size (n, M)
|
260
|
+
* @param packed output packed codes, size (n, code_size)
|
261
|
+
* @param code_size should be >= ceil(M * nbit / 8)
|
262
|
+
*/
|
263
|
+
void unpack_bitstrings(
|
264
|
+
size_t n,
|
265
|
+
size_t M,
|
266
|
+
int nbit,
|
267
|
+
const uint8_t* packed,
|
268
|
+
size_t code_size,
|
269
|
+
int32_t* unpacked);
|
270
|
+
|
271
|
+
/** Unpack a set of n codes of variable sizes
|
272
|
+
*
|
273
|
+
* @param nbit number of bits per entry (size M)
|
274
|
+
*/
|
275
|
+
void unpack_bitstrings(
|
276
|
+
size_t n,
|
277
|
+
size_t M,
|
278
|
+
const int32_t* nbits,
|
279
|
+
const uint8_t* packed,
|
280
|
+
size_t code_size,
|
281
|
+
int32_t* unpacked);
|
282
|
+
|
225
283
|
} // namespace faiss
|
226
284
|
|
227
285
|
#include <faiss/utils/hamming-inl.h>
|
@@ -259,8 +259,8 @@ struct HammingComputerDefault {
|
|
259
259
|
set(a8, code_size);
|
260
260
|
}
|
261
261
|
|
262
|
-
void set(const uint8_t*
|
263
|
-
this->a8 =
|
262
|
+
void set(const uint8_t* a8_2, int code_size) {
|
263
|
+
this->a8 = a8_2;
|
264
264
|
quotient8 = code_size / 8;
|
265
265
|
remainder8 = code_size % 8;
|
266
266
|
}
|
@@ -277,24 +277,31 @@ struct HammingComputerDefault {
|
|
277
277
|
len -= 8;
|
278
278
|
accu += popcount64(a64[i] ^ b64[i]);
|
279
279
|
i++;
|
280
|
+
[[fallthrough]];
|
280
281
|
case 7:
|
281
282
|
accu += popcount64(a64[i] ^ b64[i]);
|
282
283
|
i++;
|
284
|
+
[[fallthrough]];
|
283
285
|
case 6:
|
284
286
|
accu += popcount64(a64[i] ^ b64[i]);
|
285
287
|
i++;
|
288
|
+
[[fallthrough]];
|
286
289
|
case 5:
|
287
290
|
accu += popcount64(a64[i] ^ b64[i]);
|
288
291
|
i++;
|
292
|
+
[[fallthrough]];
|
289
293
|
case 4:
|
290
294
|
accu += popcount64(a64[i] ^ b64[i]);
|
291
295
|
i++;
|
296
|
+
[[fallthrough]];
|
292
297
|
case 3:
|
293
298
|
accu += popcount64(a64[i] ^ b64[i]);
|
294
299
|
i++;
|
300
|
+
[[fallthrough]];
|
295
301
|
case 2:
|
296
302
|
accu += popcount64(a64[i] ^ b64[i]);
|
297
303
|
i++;
|
304
|
+
[[fallthrough]];
|
298
305
|
case 1:
|
299
306
|
accu += popcount64(a64[i] ^ b64[i]);
|
300
307
|
i++;
|
@@ -306,18 +313,25 @@ struct HammingComputerDefault {
|
|
306
313
|
switch (remainder8) {
|
307
314
|
case 7:
|
308
315
|
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
316
|
+
[[fallthrough]];
|
309
317
|
case 6:
|
310
318
|
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
319
|
+
[[fallthrough]];
|
311
320
|
case 5:
|
312
321
|
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
322
|
+
[[fallthrough]];
|
313
323
|
case 4:
|
314
324
|
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
325
|
+
[[fallthrough]];
|
315
326
|
case 3:
|
316
327
|
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
328
|
+
[[fallthrough]];
|
317
329
|
case 2:
|
318
330
|
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
331
|
+
[[fallthrough]];
|
319
332
|
case 1:
|
320
333
|
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
334
|
+
[[fallthrough]];
|
321
335
|
default:
|
322
336
|
break;
|
323
337
|
}
|
@@ -331,93 +345,6 @@ struct HammingComputerDefault {
|
|
331
345
|
}
|
332
346
|
};
|
333
347
|
|
334
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
335
|
-
struct HammingComputerM8 {
|
336
|
-
const uint64_t* a;
|
337
|
-
int n;
|
338
|
-
|
339
|
-
HammingComputerM8() {}
|
340
|
-
|
341
|
-
HammingComputerM8(const uint8_t* a8, int code_size) {
|
342
|
-
set(a8, code_size);
|
343
|
-
}
|
344
|
-
|
345
|
-
void set(const uint8_t* a8, int code_size) {
|
346
|
-
assert(code_size % 8 == 0);
|
347
|
-
a = (uint64_t*)a8;
|
348
|
-
n = code_size / 8;
|
349
|
-
}
|
350
|
-
|
351
|
-
int hamming(const uint8_t* b8) const {
|
352
|
-
const uint64_t* b = (uint64_t*)b8;
|
353
|
-
int accu = 0;
|
354
|
-
for (int i = 0; i < n; i++)
|
355
|
-
accu += popcount64(a[i] ^ b[i]);
|
356
|
-
return accu;
|
357
|
-
}
|
358
|
-
|
359
|
-
inline int get_code_size() const {
|
360
|
-
return n * 8;
|
361
|
-
}
|
362
|
-
};
|
363
|
-
|
364
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
365
|
-
struct HammingComputerM4 {
|
366
|
-
const uint32_t* a;
|
367
|
-
int n;
|
368
|
-
|
369
|
-
HammingComputerM4() {}
|
370
|
-
|
371
|
-
HammingComputerM4(const uint8_t* a4, int code_size) {
|
372
|
-
set(a4, code_size);
|
373
|
-
}
|
374
|
-
|
375
|
-
void set(const uint8_t* a4, int code_size) {
|
376
|
-
assert(code_size % 4 == 0);
|
377
|
-
a = (uint32_t*)a4;
|
378
|
-
n = code_size / 4;
|
379
|
-
}
|
380
|
-
|
381
|
-
int hamming(const uint8_t* b8) const {
|
382
|
-
const uint32_t* b = (uint32_t*)b8;
|
383
|
-
int accu = 0;
|
384
|
-
for (int i = 0; i < n; i++)
|
385
|
-
accu += popcount64(a[i] ^ b[i]);
|
386
|
-
return accu;
|
387
|
-
}
|
388
|
-
|
389
|
-
inline int get_code_size() const {
|
390
|
-
return n * 4;
|
391
|
-
}
|
392
|
-
};
|
393
|
-
|
394
|
-
/***************************************************************************
|
395
|
-
* Equivalence with a template class when code size is known at compile time
|
396
|
-
**************************************************************************/
|
397
|
-
|
398
|
-
// default template
|
399
|
-
template <int CODE_SIZE>
|
400
|
-
struct HammingComputer : HammingComputerDefault {
|
401
|
-
HammingComputer(const uint8_t* a, int code_size)
|
402
|
-
: HammingComputerDefault(a, code_size) {}
|
403
|
-
};
|
404
|
-
|
405
|
-
#define SPECIALIZED_HC(CODE_SIZE) \
|
406
|
-
template <> \
|
407
|
-
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
408
|
-
HammingComputer(const uint8_t* a) \
|
409
|
-
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
410
|
-
}
|
411
|
-
|
412
|
-
SPECIALIZED_HC(4);
|
413
|
-
SPECIALIZED_HC(8);
|
414
|
-
SPECIALIZED_HC(16);
|
415
|
-
SPECIALIZED_HC(20);
|
416
|
-
SPECIALIZED_HC(32);
|
417
|
-
SPECIALIZED_HC(64);
|
418
|
-
|
419
|
-
#undef SPECIALIZED_HC
|
420
|
-
|
421
348
|
/***************************************************************************
|
422
349
|
* generalized Hamming = number of bytes that are different between
|
423
350
|
* two codes.
|