faiss 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +10 -3
@@ -0,0 +1,29 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <arm_neon.h>
|
11
|
+
#include <cstdint>
|
12
|
+
|
13
|
+
namespace faiss {
|
14
|
+
|
15
|
+
inline uint16_t encode_fp16(float x) {
|
16
|
+
float32x4_t fx4 = vdupq_n_f32(x);
|
17
|
+
float16x4_t f16x4 = vcvt_f16_f32(fx4);
|
18
|
+
uint16x4_t ui16x4 = vreinterpret_u16_f16(f16x4);
|
19
|
+
return vduph_lane_u16(ui16x4, 3);
|
20
|
+
}
|
21
|
+
|
22
|
+
inline float decode_fp16(uint16_t x) {
|
23
|
+
uint16x4_t ui16x4 = vdup_n_u16(x);
|
24
|
+
float16x4_t f16x4 = vreinterpret_f16_u16(ui16x4);
|
25
|
+
float32x4_t fx4 = vcvt_f32_f16(f16x4);
|
26
|
+
return vdups_laneq_f32(fx4, 3);
|
27
|
+
}
|
28
|
+
|
29
|
+
} // namespace faiss
|
@@ -5,14 +5,13 @@
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
7
|
|
8
|
-
// -*- c++ -*-
|
9
|
-
|
10
8
|
/*
|
11
9
|
* Implementation of Hamming related functions (distances, smallest distance
|
12
10
|
* selection with regular heap|radix and probabilistic heap|radix.
|
13
11
|
*
|
14
12
|
* IMPLEMENTATION NOTES
|
15
|
-
*
|
13
|
+
* Optimal speed is typically obtained for vector sizes of multiples of 64
|
14
|
+
* bits.
|
16
15
|
*
|
17
16
|
* hamdis_t is used for distances because at this time
|
18
17
|
* it is not clear how we will need to balance
|
@@ -20,15 +19,13 @@
|
|
20
19
|
* - memory usage
|
21
20
|
* - cache-misses when dealing with large volumes of data (lower bits is better)
|
22
21
|
*
|
23
|
-
* The hamdis_t should optimally be compatibe with one of the Torch Storage
|
24
|
-
* (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
|
25
22
|
*/
|
26
23
|
|
27
24
|
#include <faiss/utils/hamming.h>
|
28
25
|
|
29
|
-
#include <math.h>
|
30
|
-
#include <stdio.h>
|
31
26
|
#include <algorithm>
|
27
|
+
#include <cmath>
|
28
|
+
#include <cstdio>
|
32
29
|
#include <memory>
|
33
30
|
#include <vector>
|
34
31
|
|
@@ -38,8 +35,6 @@
|
|
38
35
|
#include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
|
39
36
|
#include <faiss/utils/utils.h>
|
40
37
|
|
41
|
-
static const size_t BLOCKSIZE_QUERY = 8192;
|
42
|
-
|
43
38
|
namespace faiss {
|
44
39
|
|
45
40
|
size_t hamming_batch_size = 65536;
|
@@ -165,9 +160,11 @@ size_t match_hamming_thres(
|
|
165
160
|
return posm;
|
166
161
|
}
|
167
162
|
|
163
|
+
namespace {
|
164
|
+
|
168
165
|
/* Return closest neighbors w.r.t Hamming distance, using a heap. */
|
169
166
|
template <class HammingComputer>
|
170
|
-
|
167
|
+
void hammings_knn_hc(
|
171
168
|
int bytes_per_code,
|
172
169
|
int_maxheap_array_t* __restrict ha,
|
173
170
|
const uint8_t* __restrict bs1,
|
@@ -234,7 +231,7 @@ static void hammings_knn_hc(
|
|
234
231
|
|
235
232
|
/* Return closest neighbors w.r.t Hamming distance, using max count. */
|
236
233
|
template <class HammingComputer>
|
237
|
-
|
234
|
+
void hammings_knn_mc(
|
238
235
|
int bytes_per_code,
|
239
236
|
const uint8_t* __restrict a,
|
240
237
|
const uint8_t* __restrict b,
|
@@ -272,10 +269,10 @@ static void hammings_knn_mc(
|
|
272
269
|
HCounterState<HammingComputer>& csi = cs[i];
|
273
270
|
|
274
271
|
int nres = 0;
|
275
|
-
for (int
|
276
|
-
for (int l = 0; l < csi.counters[
|
277
|
-
labels[i * k + nres] = csi.ids_per_dis[
|
278
|
-
distances[i * k + nres] =
|
272
|
+
for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) {
|
273
|
+
for (int l = 0; l < csi.counters[b_2] && nres < k; l++) {
|
274
|
+
labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l];
|
275
|
+
distances[i * k + nres] = b_2;
|
279
276
|
nres++;
|
280
277
|
}
|
281
278
|
}
|
@@ -287,6 +284,63 @@ static void hammings_knn_mc(
|
|
287
284
|
}
|
288
285
|
}
|
289
286
|
|
287
|
+
template <class HammingComputer>
|
288
|
+
void hamming_range_search(
|
289
|
+
const uint8_t* a,
|
290
|
+
const uint8_t* b,
|
291
|
+
size_t na,
|
292
|
+
size_t nb,
|
293
|
+
int radius,
|
294
|
+
size_t code_size,
|
295
|
+
RangeSearchResult* res) {
|
296
|
+
#pragma omp parallel
|
297
|
+
{
|
298
|
+
RangeSearchPartialResult pres(res);
|
299
|
+
|
300
|
+
#pragma omp for
|
301
|
+
for (int64_t i = 0; i < na; i++) {
|
302
|
+
HammingComputer hc(a + i * code_size, code_size);
|
303
|
+
const uint8_t* yi = b;
|
304
|
+
RangeQueryResult& qres = pres.new_result(i);
|
305
|
+
|
306
|
+
for (size_t j = 0; j < nb; j++) {
|
307
|
+
int dis = hc.hamming(yi);
|
308
|
+
if (dis < radius) {
|
309
|
+
qres.add(dis, j);
|
310
|
+
}
|
311
|
+
yi += code_size;
|
312
|
+
}
|
313
|
+
}
|
314
|
+
pres.finalize();
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
struct Run_hammings_knn_hc {
|
319
|
+
using T = void;
|
320
|
+
template <class HammingComputer, class... Types>
|
321
|
+
void f(Types... args) {
|
322
|
+
hammings_knn_hc<HammingComputer>(args...);
|
323
|
+
}
|
324
|
+
};
|
325
|
+
|
326
|
+
struct Run_hammings_knn_mc {
|
327
|
+
using T = void;
|
328
|
+
template <class HammingComputer, class... Types>
|
329
|
+
void f(Types... args) {
|
330
|
+
hammings_knn_mc<HammingComputer>(args...);
|
331
|
+
}
|
332
|
+
};
|
333
|
+
|
334
|
+
struct Run_hamming_range_search {
|
335
|
+
using T = void;
|
336
|
+
template <class HammingComputer, class... Types>
|
337
|
+
void f(Types... args) {
|
338
|
+
hamming_range_search<HammingComputer>(args...);
|
339
|
+
}
|
340
|
+
};
|
341
|
+
|
342
|
+
} // namespace
|
343
|
+
|
290
344
|
/* Functions to maps vectors to bits. Assume proper allocation done beforehand,
|
291
345
|
meaning that b should be be able to receive as many bits as x may produce. */
|
292
346
|
|
@@ -437,28 +491,9 @@ void hammings_knn_hc(
|
|
437
491
|
size_t ncodes,
|
438
492
|
int order,
|
439
493
|
ApproxTopK_mode_t approx_topk_mode) {
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
4, ha, a, b, nb, order, true, approx_topk_mode);
|
444
|
-
break;
|
445
|
-
case 8:
|
446
|
-
hammings_knn_hc<faiss::HammingComputer8>(
|
447
|
-
8, ha, a, b, nb, order, true, approx_topk_mode);
|
448
|
-
break;
|
449
|
-
case 16:
|
450
|
-
hammings_knn_hc<faiss::HammingComputer16>(
|
451
|
-
16, ha, a, b, nb, order, true, approx_topk_mode);
|
452
|
-
break;
|
453
|
-
case 32:
|
454
|
-
hammings_knn_hc<faiss::HammingComputer32>(
|
455
|
-
32, ha, a, b, nb, order, true, approx_topk_mode);
|
456
|
-
break;
|
457
|
-
default:
|
458
|
-
hammings_knn_hc<faiss::HammingComputerDefault>(
|
459
|
-
ncodes, ha, a, b, nb, order, true, approx_topk_mode);
|
460
|
-
break;
|
461
|
-
}
|
494
|
+
Run_hammings_knn_hc r;
|
495
|
+
dispatch_HammingComputer(
|
496
|
+
ncodes, r, ncodes, ha, a, b, nb, order, true, approx_topk_mode);
|
462
497
|
}
|
463
498
|
|
464
499
|
void hammings_knn_mc(
|
@@ -470,58 +505,9 @@ void hammings_knn_mc(
|
|
470
505
|
size_t ncodes,
|
471
506
|
int32_t* __restrict distances,
|
472
507
|
int64_t* __restrict labels) {
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
4, a, b, na, nb, k, distances, labels);
|
477
|
-
break;
|
478
|
-
case 8:
|
479
|
-
hammings_knn_mc<faiss::HammingComputer8>(
|
480
|
-
8, a, b, na, nb, k, distances, labels);
|
481
|
-
break;
|
482
|
-
case 16:
|
483
|
-
hammings_knn_mc<faiss::HammingComputer16>(
|
484
|
-
16, a, b, na, nb, k, distances, labels);
|
485
|
-
break;
|
486
|
-
case 32:
|
487
|
-
hammings_knn_mc<faiss::HammingComputer32>(
|
488
|
-
32, a, b, na, nb, k, distances, labels);
|
489
|
-
break;
|
490
|
-
default:
|
491
|
-
hammings_knn_mc<faiss::HammingComputerDefault>(
|
492
|
-
ncodes, a, b, na, nb, k, distances, labels);
|
493
|
-
break;
|
494
|
-
}
|
495
|
-
}
|
496
|
-
template <class HammingComputer>
|
497
|
-
static void hamming_range_search_template(
|
498
|
-
const uint8_t* a,
|
499
|
-
const uint8_t* b,
|
500
|
-
size_t na,
|
501
|
-
size_t nb,
|
502
|
-
int radius,
|
503
|
-
size_t code_size,
|
504
|
-
RangeSearchResult* res) {
|
505
|
-
#pragma omp parallel
|
506
|
-
{
|
507
|
-
RangeSearchPartialResult pres(res);
|
508
|
-
|
509
|
-
#pragma omp for
|
510
|
-
for (int64_t i = 0; i < na; i++) {
|
511
|
-
HammingComputer hc(a + i * code_size, code_size);
|
512
|
-
const uint8_t* yi = b;
|
513
|
-
RangeQueryResult& qres = pres.new_result(i);
|
514
|
-
|
515
|
-
for (size_t j = 0; j < nb; j++) {
|
516
|
-
int dis = hc.hamming(yi);
|
517
|
-
if (dis < radius) {
|
518
|
-
qres.add(dis, j);
|
519
|
-
}
|
520
|
-
yi += code_size;
|
521
|
-
}
|
522
|
-
}
|
523
|
-
pres.finalize();
|
524
|
-
}
|
508
|
+
Run_hammings_knn_mc r;
|
509
|
+
dispatch_HammingComputer(
|
510
|
+
ncodes, r, ncodes, a, b, na, nb, k, distances, labels);
|
525
511
|
}
|
526
512
|
|
527
513
|
void hamming_range_search(
|
@@ -532,27 +518,9 @@ void hamming_range_search(
|
|
532
518
|
int radius,
|
533
519
|
size_t code_size,
|
534
520
|
RangeSearchResult* result) {
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
switch (code_size) {
|
539
|
-
case 4:
|
540
|
-
HC(HammingComputer4);
|
541
|
-
break;
|
542
|
-
case 8:
|
543
|
-
HC(HammingComputer8);
|
544
|
-
break;
|
545
|
-
case 16:
|
546
|
-
HC(HammingComputer16);
|
547
|
-
break;
|
548
|
-
case 32:
|
549
|
-
HC(HammingComputer32);
|
550
|
-
break;
|
551
|
-
default:
|
552
|
-
HC(HammingComputerDefault);
|
553
|
-
break;
|
554
|
-
}
|
555
|
-
#undef HC
|
521
|
+
Run_hamming_range_search r;
|
522
|
+
dispatch_HammingComputer(
|
523
|
+
code_size, r, a, b, na, nb, radius, code_size, result);
|
556
524
|
}
|
557
525
|
|
558
526
|
/* Count number of matches given a max threshold */
|
@@ -711,4 +679,88 @@ void generalized_hammings_knn_hc(
|
|
711
679
|
ha->reorder();
|
712
680
|
}
|
713
681
|
|
682
|
+
void pack_bitstrings(
|
683
|
+
size_t n,
|
684
|
+
size_t M,
|
685
|
+
int nbit,
|
686
|
+
const int32_t* unpacked,
|
687
|
+
uint8_t* packed,
|
688
|
+
size_t code_size) {
|
689
|
+
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
|
690
|
+
#pragma omp parallel for if (n > 1000)
|
691
|
+
for (int64_t i = 0; i < n; i++) {
|
692
|
+
const int32_t* in = unpacked + i * M;
|
693
|
+
uint8_t* out = packed + i * code_size;
|
694
|
+
BitstringWriter wr(out, code_size);
|
695
|
+
for (int j = 0; j < M; j++) {
|
696
|
+
wr.write(in[j], nbit);
|
697
|
+
}
|
698
|
+
}
|
699
|
+
}
|
700
|
+
|
701
|
+
void pack_bitstrings(
|
702
|
+
size_t n,
|
703
|
+
size_t M,
|
704
|
+
const int32_t* nbit,
|
705
|
+
const int32_t* unpacked,
|
706
|
+
uint8_t* packed,
|
707
|
+
size_t code_size) {
|
708
|
+
int totbit = 0;
|
709
|
+
for (int j = 0; j < M; j++) {
|
710
|
+
totbit += nbit[j];
|
711
|
+
}
|
712
|
+
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
|
713
|
+
#pragma omp parallel for if (n > 1000)
|
714
|
+
for (int64_t i = 0; i < n; i++) {
|
715
|
+
const int32_t* in = unpacked + i * M;
|
716
|
+
uint8_t* out = packed + i * code_size;
|
717
|
+
BitstringWriter wr(out, code_size);
|
718
|
+
for (int j = 0; j < M; j++) {
|
719
|
+
wr.write(in[j], nbit[j]);
|
720
|
+
}
|
721
|
+
}
|
722
|
+
}
|
723
|
+
|
724
|
+
void unpack_bitstrings(
|
725
|
+
size_t n,
|
726
|
+
size_t M,
|
727
|
+
int nbit,
|
728
|
+
const uint8_t* packed,
|
729
|
+
size_t code_size,
|
730
|
+
int32_t* unpacked) {
|
731
|
+
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
|
732
|
+
#pragma omp parallel for if (n > 1000)
|
733
|
+
for (int64_t i = 0; i < n; i++) {
|
734
|
+
const uint8_t* in = packed + i * code_size;
|
735
|
+
int32_t* out = unpacked + i * M;
|
736
|
+
BitstringReader rd(in, code_size);
|
737
|
+
for (int j = 0; j < M; j++) {
|
738
|
+
out[j] = rd.read(nbit);
|
739
|
+
}
|
740
|
+
}
|
741
|
+
}
|
742
|
+
|
743
|
+
void unpack_bitstrings(
|
744
|
+
size_t n,
|
745
|
+
size_t M,
|
746
|
+
const int32_t* nbit,
|
747
|
+
const uint8_t* packed,
|
748
|
+
size_t code_size,
|
749
|
+
int32_t* unpacked) {
|
750
|
+
int totbit = 0;
|
751
|
+
for (int j = 0; j < M; j++) {
|
752
|
+
totbit += nbit[j];
|
753
|
+
}
|
754
|
+
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
|
755
|
+
#pragma omp parallel for if (n > 1000)
|
756
|
+
for (int64_t i = 0; i < n; i++) {
|
757
|
+
const uint8_t* in = packed + i * code_size;
|
758
|
+
int32_t* out = unpacked + i * M;
|
759
|
+
BitstringReader rd(in, code_size);
|
760
|
+
for (int j = 0; j < M; j++) {
|
761
|
+
out[j] = rd.read(nbit[j]);
|
762
|
+
}
|
763
|
+
}
|
764
|
+
}
|
765
|
+
|
714
766
|
} // namespace faiss
|
@@ -222,6 +222,64 @@ void generalized_hammings_knn_hc(
|
|
222
222
|
size_t code_size,
|
223
223
|
int ordered = true);
|
224
224
|
|
225
|
+
/** Pack a set of n codes of size M * nbit
|
226
|
+
*
|
227
|
+
* @param n number of codes to pack
|
228
|
+
* @param M number of elementary codes per code
|
229
|
+
* @param nbit number of bits per elementary code
|
230
|
+
* @param unpacked input unpacked codes, size (n, M)
|
231
|
+
* @param packed output packed codes, size (n, code_size)
|
232
|
+
* @param code_size should be >= ceil(M * nbit / 8)
|
233
|
+
*/
|
234
|
+
void pack_bitstrings(
|
235
|
+
size_t n,
|
236
|
+
size_t M,
|
237
|
+
int nbit,
|
238
|
+
const int32_t* unpacked,
|
239
|
+
uint8_t* packed,
|
240
|
+
size_t code_size);
|
241
|
+
|
242
|
+
/** Pack a set of n codes of variable sizes
|
243
|
+
*
|
244
|
+
* @param nbit number of bits per entry (size M)
|
245
|
+
*/
|
246
|
+
void pack_bitstrings(
|
247
|
+
size_t n,
|
248
|
+
size_t M,
|
249
|
+
const int32_t* nbits,
|
250
|
+
const int32_t* unpacked,
|
251
|
+
uint8_t* packed,
|
252
|
+
size_t code_size);
|
253
|
+
|
254
|
+
/** Unpack a set of n codes of size M * nbit
|
255
|
+
*
|
256
|
+
* @param n number of codes to pack
|
257
|
+
* @param M number of elementary codes per code
|
258
|
+
* @param nbit number of bits per elementary code
|
259
|
+
* @param unpacked input unpacked codes, size (n, M)
|
260
|
+
* @param packed output packed codes, size (n, code_size)
|
261
|
+
* @param code_size should be >= ceil(M * nbit / 8)
|
262
|
+
*/
|
263
|
+
void unpack_bitstrings(
|
264
|
+
size_t n,
|
265
|
+
size_t M,
|
266
|
+
int nbit,
|
267
|
+
const uint8_t* packed,
|
268
|
+
size_t code_size,
|
269
|
+
int32_t* unpacked);
|
270
|
+
|
271
|
+
/** Unpack a set of n codes of variable sizes
|
272
|
+
*
|
273
|
+
* @param nbit number of bits per entry (size M)
|
274
|
+
*/
|
275
|
+
void unpack_bitstrings(
|
276
|
+
size_t n,
|
277
|
+
size_t M,
|
278
|
+
const int32_t* nbits,
|
279
|
+
const uint8_t* packed,
|
280
|
+
size_t code_size,
|
281
|
+
int32_t* unpacked);
|
282
|
+
|
225
283
|
} // namespace faiss
|
226
284
|
|
227
285
|
#include <faiss/utils/hamming-inl.h>
|
@@ -259,8 +259,8 @@ struct HammingComputerDefault {
|
|
259
259
|
set(a8, code_size);
|
260
260
|
}
|
261
261
|
|
262
|
-
void set(const uint8_t*
|
263
|
-
this->a8 =
|
262
|
+
void set(const uint8_t* a8_2, int code_size) {
|
263
|
+
this->a8 = a8_2;
|
264
264
|
quotient8 = code_size / 8;
|
265
265
|
remainder8 = code_size % 8;
|
266
266
|
}
|
@@ -277,24 +277,31 @@ struct HammingComputerDefault {
|
|
277
277
|
len -= 8;
|
278
278
|
accu += popcount64(a64[i] ^ b64[i]);
|
279
279
|
i++;
|
280
|
+
[[fallthrough]];
|
280
281
|
case 7:
|
281
282
|
accu += popcount64(a64[i] ^ b64[i]);
|
282
283
|
i++;
|
284
|
+
[[fallthrough]];
|
283
285
|
case 6:
|
284
286
|
accu += popcount64(a64[i] ^ b64[i]);
|
285
287
|
i++;
|
288
|
+
[[fallthrough]];
|
286
289
|
case 5:
|
287
290
|
accu += popcount64(a64[i] ^ b64[i]);
|
288
291
|
i++;
|
292
|
+
[[fallthrough]];
|
289
293
|
case 4:
|
290
294
|
accu += popcount64(a64[i] ^ b64[i]);
|
291
295
|
i++;
|
296
|
+
[[fallthrough]];
|
292
297
|
case 3:
|
293
298
|
accu += popcount64(a64[i] ^ b64[i]);
|
294
299
|
i++;
|
300
|
+
[[fallthrough]];
|
295
301
|
case 2:
|
296
302
|
accu += popcount64(a64[i] ^ b64[i]);
|
297
303
|
i++;
|
304
|
+
[[fallthrough]];
|
298
305
|
case 1:
|
299
306
|
accu += popcount64(a64[i] ^ b64[i]);
|
300
307
|
i++;
|
@@ -306,18 +313,25 @@ struct HammingComputerDefault {
|
|
306
313
|
switch (remainder8) {
|
307
314
|
case 7:
|
308
315
|
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
316
|
+
[[fallthrough]];
|
309
317
|
case 6:
|
310
318
|
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
319
|
+
[[fallthrough]];
|
311
320
|
case 5:
|
312
321
|
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
322
|
+
[[fallthrough]];
|
313
323
|
case 4:
|
314
324
|
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
325
|
+
[[fallthrough]];
|
315
326
|
case 3:
|
316
327
|
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
328
|
+
[[fallthrough]];
|
317
329
|
case 2:
|
318
330
|
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
331
|
+
[[fallthrough]];
|
319
332
|
case 1:
|
320
333
|
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
334
|
+
[[fallthrough]];
|
321
335
|
default:
|
322
336
|
break;
|
323
337
|
}
|
@@ -331,93 +345,6 @@ struct HammingComputerDefault {
|
|
331
345
|
}
|
332
346
|
};
|
333
347
|
|
334
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
335
|
-
struct HammingComputerM8 {
|
336
|
-
const uint64_t* a;
|
337
|
-
int n;
|
338
|
-
|
339
|
-
HammingComputerM8() {}
|
340
|
-
|
341
|
-
HammingComputerM8(const uint8_t* a8, int code_size) {
|
342
|
-
set(a8, code_size);
|
343
|
-
}
|
344
|
-
|
345
|
-
void set(const uint8_t* a8, int code_size) {
|
346
|
-
assert(code_size % 8 == 0);
|
347
|
-
a = (uint64_t*)a8;
|
348
|
-
n = code_size / 8;
|
349
|
-
}
|
350
|
-
|
351
|
-
int hamming(const uint8_t* b8) const {
|
352
|
-
const uint64_t* b = (uint64_t*)b8;
|
353
|
-
int accu = 0;
|
354
|
-
for (int i = 0; i < n; i++)
|
355
|
-
accu += popcount64(a[i] ^ b[i]);
|
356
|
-
return accu;
|
357
|
-
}
|
358
|
-
|
359
|
-
inline int get_code_size() const {
|
360
|
-
return n * 8;
|
361
|
-
}
|
362
|
-
};
|
363
|
-
|
364
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
365
|
-
struct HammingComputerM4 {
|
366
|
-
const uint32_t* a;
|
367
|
-
int n;
|
368
|
-
|
369
|
-
HammingComputerM4() {}
|
370
|
-
|
371
|
-
HammingComputerM4(const uint8_t* a4, int code_size) {
|
372
|
-
set(a4, code_size);
|
373
|
-
}
|
374
|
-
|
375
|
-
void set(const uint8_t* a4, int code_size) {
|
376
|
-
assert(code_size % 4 == 0);
|
377
|
-
a = (uint32_t*)a4;
|
378
|
-
n = code_size / 4;
|
379
|
-
}
|
380
|
-
|
381
|
-
int hamming(const uint8_t* b8) const {
|
382
|
-
const uint32_t* b = (uint32_t*)b8;
|
383
|
-
int accu = 0;
|
384
|
-
for (int i = 0; i < n; i++)
|
385
|
-
accu += popcount64(a[i] ^ b[i]);
|
386
|
-
return accu;
|
387
|
-
}
|
388
|
-
|
389
|
-
inline int get_code_size() const {
|
390
|
-
return n * 4;
|
391
|
-
}
|
392
|
-
};
|
393
|
-
|
394
|
-
/***************************************************************************
|
395
|
-
* Equivalence with a template class when code size is known at compile time
|
396
|
-
**************************************************************************/
|
397
|
-
|
398
|
-
// default template
|
399
|
-
template <int CODE_SIZE>
|
400
|
-
struct HammingComputer : HammingComputerDefault {
|
401
|
-
HammingComputer(const uint8_t* a, int code_size)
|
402
|
-
: HammingComputerDefault(a, code_size) {}
|
403
|
-
};
|
404
|
-
|
405
|
-
#define SPECIALIZED_HC(CODE_SIZE) \
|
406
|
-
template <> \
|
407
|
-
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
408
|
-
HammingComputer(const uint8_t* a) \
|
409
|
-
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
410
|
-
}
|
411
|
-
|
412
|
-
SPECIALIZED_HC(4);
|
413
|
-
SPECIALIZED_HC(8);
|
414
|
-
SPECIALIZED_HC(16);
|
415
|
-
SPECIALIZED_HC(20);
|
416
|
-
SPECIALIZED_HC(32);
|
417
|
-
SPECIALIZED_HC(64);
|
418
|
-
|
419
|
-
#undef SPECIALIZED_HC
|
420
|
-
|
421
348
|
/***************************************************************************
|
422
349
|
* generalized Hamming = number of bytes that are different between
|
423
350
|
* two codes.
|