faiss 0.2.7 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +11 -4
@@ -275,24 +275,31 @@ struct HammingComputerDefault {
|
|
275
275
|
len -= 8;
|
276
276
|
accu += popcount64(a64[i] ^ b64[i]);
|
277
277
|
i++;
|
278
|
+
[[fallthrough]];
|
278
279
|
case 7:
|
279
280
|
accu += popcount64(a64[i] ^ b64[i]);
|
280
281
|
i++;
|
282
|
+
[[fallthrough]];
|
281
283
|
case 6:
|
282
284
|
accu += popcount64(a64[i] ^ b64[i]);
|
283
285
|
i++;
|
286
|
+
[[fallthrough]];
|
284
287
|
case 5:
|
285
288
|
accu += popcount64(a64[i] ^ b64[i]);
|
286
289
|
i++;
|
290
|
+
[[fallthrough]];
|
287
291
|
case 4:
|
288
292
|
accu += popcount64(a64[i] ^ b64[i]);
|
289
293
|
i++;
|
294
|
+
[[fallthrough]];
|
290
295
|
case 3:
|
291
296
|
accu += popcount64(a64[i] ^ b64[i]);
|
292
297
|
i++;
|
298
|
+
[[fallthrough]];
|
293
299
|
case 2:
|
294
300
|
accu += popcount64(a64[i] ^ b64[i]);
|
295
301
|
i++;
|
302
|
+
[[fallthrough]];
|
296
303
|
case 1:
|
297
304
|
accu += popcount64(a64[i] ^ b64[i]);
|
298
305
|
i++;
|
@@ -302,20 +309,28 @@ struct HammingComputerDefault {
|
|
302
309
|
const uint8_t* a = a8 + 8 * quotient8;
|
303
310
|
const uint8_t* b = b8 + 8 * quotient8;
|
304
311
|
switch (remainder8) {
|
312
|
+
[[fallthrough]];
|
305
313
|
case 7:
|
306
314
|
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
315
|
+
[[fallthrough]];
|
307
316
|
case 6:
|
308
317
|
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
318
|
+
[[fallthrough]];
|
309
319
|
case 5:
|
310
320
|
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
321
|
+
[[fallthrough]];
|
311
322
|
case 4:
|
312
323
|
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
324
|
+
[[fallthrough]];
|
313
325
|
case 3:
|
314
326
|
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
327
|
+
[[fallthrough]];
|
315
328
|
case 2:
|
316
329
|
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
330
|
+
[[fallthrough]];
|
317
331
|
case 1:
|
318
332
|
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
333
|
+
[[fallthrough]];
|
319
334
|
default:
|
320
335
|
break;
|
321
336
|
}
|
@@ -329,93 +344,6 @@ struct HammingComputerDefault {
|
|
329
344
|
}
|
330
345
|
};
|
331
346
|
|
332
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
333
|
-
struct HammingComputerM8 {
|
334
|
-
const uint64_t* a;
|
335
|
-
int n;
|
336
|
-
|
337
|
-
HammingComputerM8() {}
|
338
|
-
|
339
|
-
HammingComputerM8(const uint8_t* a8, int code_size) {
|
340
|
-
set(a8, code_size);
|
341
|
-
}
|
342
|
-
|
343
|
-
void set(const uint8_t* a8, int code_size) {
|
344
|
-
assert(code_size % 8 == 0);
|
345
|
-
a = (uint64_t*)a8;
|
346
|
-
n = code_size / 8;
|
347
|
-
}
|
348
|
-
|
349
|
-
int hamming(const uint8_t* b8) const {
|
350
|
-
const uint64_t* b = (uint64_t*)b8;
|
351
|
-
int accu = 0;
|
352
|
-
for (int i = 0; i < n; i++)
|
353
|
-
accu += popcount64(a[i] ^ b[i]);
|
354
|
-
return accu;
|
355
|
-
}
|
356
|
-
|
357
|
-
inline int get_code_size() const {
|
358
|
-
return n * 8;
|
359
|
-
}
|
360
|
-
};
|
361
|
-
|
362
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
363
|
-
struct HammingComputerM4 {
|
364
|
-
const uint32_t* a;
|
365
|
-
int n;
|
366
|
-
|
367
|
-
HammingComputerM4() {}
|
368
|
-
|
369
|
-
HammingComputerM4(const uint8_t* a4, int code_size) {
|
370
|
-
set(a4, code_size);
|
371
|
-
}
|
372
|
-
|
373
|
-
void set(const uint8_t* a4, int code_size) {
|
374
|
-
assert(code_size % 4 == 0);
|
375
|
-
a = (uint32_t*)a4;
|
376
|
-
n = code_size / 4;
|
377
|
-
}
|
378
|
-
|
379
|
-
int hamming(const uint8_t* b8) const {
|
380
|
-
const uint32_t* b = (uint32_t*)b8;
|
381
|
-
int accu = 0;
|
382
|
-
for (int i = 0; i < n; i++)
|
383
|
-
accu += popcount64(a[i] ^ b[i]);
|
384
|
-
return accu;
|
385
|
-
}
|
386
|
-
|
387
|
-
inline int get_code_size() const {
|
388
|
-
return n * 4;
|
389
|
-
}
|
390
|
-
};
|
391
|
-
|
392
|
-
/***************************************************************************
|
393
|
-
* Equivalence with a template class when code size is known at compile time
|
394
|
-
**************************************************************************/
|
395
|
-
|
396
|
-
// default template
|
397
|
-
template <int CODE_SIZE>
|
398
|
-
struct HammingComputer : HammingComputerDefault {
|
399
|
-
HammingComputer(const uint8_t* a, int code_size)
|
400
|
-
: HammingComputerDefault(a, code_size) {}
|
401
|
-
};
|
402
|
-
|
403
|
-
#define SPECIALIZED_HC(CODE_SIZE) \
|
404
|
-
template <> \
|
405
|
-
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
406
|
-
HammingComputer(const uint8_t* a) \
|
407
|
-
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
408
|
-
}
|
409
|
-
|
410
|
-
SPECIALIZED_HC(4);
|
411
|
-
SPECIALIZED_HC(8);
|
412
|
-
SPECIALIZED_HC(16);
|
413
|
-
SPECIALIZED_HC(20);
|
414
|
-
SPECIALIZED_HC(32);
|
415
|
-
SPECIALIZED_HC(64);
|
416
|
-
|
417
|
-
#undef SPECIALIZED_HC
|
418
|
-
|
419
347
|
/***************************************************************************
|
420
348
|
* generalized Hamming = number of bytes that are different between
|
421
349
|
* two codes.
|
@@ -23,4 +23,61 @@
|
|
23
23
|
#include <faiss/utils/hamming_distance/generic-inl.h>
|
24
24
|
#endif
|
25
25
|
|
26
|
+
namespace faiss {
|
27
|
+
|
28
|
+
/***************************************************************************
|
29
|
+
* Equivalence with a template class when code size is known at compile time
|
30
|
+
**************************************************************************/
|
31
|
+
|
32
|
+
// default template
|
33
|
+
template <int CODE_SIZE>
|
34
|
+
struct HammingComputer : HammingComputerDefault {
|
35
|
+
HammingComputer(const uint8_t* a, int code_size)
|
36
|
+
: HammingComputerDefault(a, code_size) {}
|
37
|
+
};
|
38
|
+
|
39
|
+
#define SPECIALIZED_HC(CODE_SIZE) \
|
40
|
+
template <> \
|
41
|
+
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
42
|
+
HammingComputer(const uint8_t* a) \
|
43
|
+
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
44
|
+
}
|
45
|
+
|
46
|
+
SPECIALIZED_HC(4);
|
47
|
+
SPECIALIZED_HC(8);
|
48
|
+
SPECIALIZED_HC(16);
|
49
|
+
SPECIALIZED_HC(20);
|
50
|
+
SPECIALIZED_HC(32);
|
51
|
+
SPECIALIZED_HC(64);
|
52
|
+
|
53
|
+
#undef SPECIALIZED_HC
|
54
|
+
|
55
|
+
/***************************************************************************
|
56
|
+
* Dispatching function that takes a code size and a consumer object
|
57
|
+
* the consumer object should contain a retun type t and a operation template
|
58
|
+
* function f() that to be called to perform the operation.
|
59
|
+
**************************************************************************/
|
60
|
+
|
61
|
+
template <class Consumer, class... Types>
|
62
|
+
typename Consumer::T dispatch_HammingComputer(
|
63
|
+
int code_size,
|
64
|
+
Consumer& consumer,
|
65
|
+
Types... args) {
|
66
|
+
switch (code_size) {
|
67
|
+
#define DISPATCH_HC(CODE_SIZE) \
|
68
|
+
case CODE_SIZE: \
|
69
|
+
return consumer.template f<HammingComputer##CODE_SIZE>(args...);
|
70
|
+
DISPATCH_HC(4);
|
71
|
+
DISPATCH_HC(8);
|
72
|
+
DISPATCH_HC(16);
|
73
|
+
DISPATCH_HC(20);
|
74
|
+
DISPATCH_HC(32);
|
75
|
+
DISPATCH_HC(64);
|
76
|
+
default:
|
77
|
+
return consumer.template f<HammingComputerDefault>(args...);
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
} // namespace faiss
|
82
|
+
|
26
83
|
#endif
|
@@ -260,7 +260,6 @@ struct HammingComputer32 {
|
|
260
260
|
}
|
261
261
|
|
262
262
|
inline int hamming(const uint8_t* b8) const {
|
263
|
-
const uint64_t* b = (uint64_t*)b8;
|
264
263
|
uint8x16_t b0 = vld1q_u8(b8);
|
265
264
|
uint8x16_t b1 = vld1q_u8(b8 + 16);
|
266
265
|
|
@@ -338,24 +337,31 @@ struct HammingComputerDefault {
|
|
338
337
|
len -= 8;
|
339
338
|
accu += popcount64(a64[i] ^ b64[i]);
|
340
339
|
i++;
|
340
|
+
[[fallthrough]];
|
341
341
|
case 7:
|
342
342
|
accu += popcount64(a64[i] ^ b64[i]);
|
343
343
|
i++;
|
344
|
+
[[fallthrough]];
|
344
345
|
case 6:
|
345
346
|
accu += popcount64(a64[i] ^ b64[i]);
|
346
347
|
i++;
|
348
|
+
[[fallthrough]];
|
347
349
|
case 5:
|
348
350
|
accu += popcount64(a64[i] ^ b64[i]);
|
349
351
|
i++;
|
352
|
+
[[fallthrough]];
|
350
353
|
case 4:
|
351
354
|
accu += popcount64(a64[i] ^ b64[i]);
|
352
355
|
i++;
|
356
|
+
[[fallthrough]];
|
353
357
|
case 3:
|
354
358
|
accu += popcount64(a64[i] ^ b64[i]);
|
355
359
|
i++;
|
360
|
+
[[fallthrough]];
|
356
361
|
case 2:
|
357
362
|
accu += popcount64(a64[i] ^ b64[i]);
|
358
363
|
i++;
|
364
|
+
[[fallthrough]];
|
359
365
|
case 1:
|
360
366
|
accu += popcount64(a64[i] ^ b64[i]);
|
361
367
|
i++;
|
@@ -367,18 +373,25 @@ struct HammingComputerDefault {
|
|
367
373
|
switch (remainder8) {
|
368
374
|
case 7:
|
369
375
|
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
376
|
+
[[fallthrough]];
|
370
377
|
case 6:
|
371
378
|
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
379
|
+
[[fallthrough]];
|
372
380
|
case 5:
|
373
381
|
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
382
|
+
[[fallthrough]];
|
374
383
|
case 4:
|
375
384
|
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
385
|
+
[[fallthrough]];
|
376
386
|
case 3:
|
377
387
|
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
388
|
+
[[fallthrough]];
|
378
389
|
case 2:
|
379
390
|
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
391
|
+
[[fallthrough]];
|
380
392
|
case 1:
|
381
393
|
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
394
|
+
[[fallthrough]];
|
382
395
|
default:
|
383
396
|
break;
|
384
397
|
}
|
@@ -392,109 +405,6 @@ struct HammingComputerDefault {
|
|
392
405
|
}
|
393
406
|
};
|
394
407
|
|
395
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
396
|
-
struct HammingComputerM8 {
|
397
|
-
const uint64_t* a;
|
398
|
-
int n;
|
399
|
-
|
400
|
-
HammingComputerM8() {}
|
401
|
-
|
402
|
-
HammingComputerM8(const uint8_t* a8, int code_size) {
|
403
|
-
set(a8, code_size);
|
404
|
-
}
|
405
|
-
|
406
|
-
void set(const uint8_t* a8, int code_size) {
|
407
|
-
assert(code_size % 8 == 0);
|
408
|
-
a = (uint64_t*)a8;
|
409
|
-
n = code_size / 8;
|
410
|
-
}
|
411
|
-
|
412
|
-
int hamming(const uint8_t* b8) const {
|
413
|
-
const uint64_t* b = (uint64_t*)b8;
|
414
|
-
int n4 = (n / 4) * 4;
|
415
|
-
int accu = 0;
|
416
|
-
|
417
|
-
int i = 0;
|
418
|
-
for (; i < n4; i += 4) {
|
419
|
-
accu += ::faiss::hamming<256>(a + i, b + i);
|
420
|
-
}
|
421
|
-
for (; i < n; i++) {
|
422
|
-
accu += popcount64(a[i] ^ b[i]);
|
423
|
-
}
|
424
|
-
return accu;
|
425
|
-
}
|
426
|
-
|
427
|
-
inline int get_code_size() const {
|
428
|
-
return n * 8;
|
429
|
-
}
|
430
|
-
};
|
431
|
-
|
432
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
433
|
-
struct HammingComputerM4 {
|
434
|
-
const uint32_t* a;
|
435
|
-
int n;
|
436
|
-
|
437
|
-
HammingComputerM4() {}
|
438
|
-
|
439
|
-
HammingComputerM4(const uint8_t* a4, int code_size) {
|
440
|
-
set(a4, code_size);
|
441
|
-
}
|
442
|
-
|
443
|
-
void set(const uint8_t* a4, int code_size) {
|
444
|
-
assert(code_size % 4 == 0);
|
445
|
-
a = (uint32_t*)a4;
|
446
|
-
n = code_size / 4;
|
447
|
-
}
|
448
|
-
|
449
|
-
int hamming(const uint8_t* b8) const {
|
450
|
-
const uint32_t* b = (uint32_t*)b8;
|
451
|
-
|
452
|
-
int n8 = (n / 8) * 8;
|
453
|
-
int accu = 0;
|
454
|
-
|
455
|
-
int i = 0;
|
456
|
-
for (; i < n8; i += 8) {
|
457
|
-
accu += ::faiss::hamming<256>(
|
458
|
-
(const uint64_t*)(a + i), (const uint64_t*)(b + i));
|
459
|
-
}
|
460
|
-
for (; i < n; i++) {
|
461
|
-
accu += popcount64(a[i] ^ b[i]);
|
462
|
-
}
|
463
|
-
return accu;
|
464
|
-
}
|
465
|
-
|
466
|
-
inline int get_code_size() const {
|
467
|
-
return n * 4;
|
468
|
-
}
|
469
|
-
};
|
470
|
-
|
471
|
-
/***************************************************************************
|
472
|
-
* Equivalence with a template class when code size is known at compile time
|
473
|
-
**************************************************************************/
|
474
|
-
|
475
|
-
// default template
|
476
|
-
template <int CODE_SIZE>
|
477
|
-
struct HammingComputer : HammingComputerDefault {
|
478
|
-
HammingComputer(const uint8_t* a, int code_size)
|
479
|
-
: HammingComputerDefault(a, code_size) {}
|
480
|
-
};
|
481
|
-
|
482
|
-
#define SPECIALIZED_HC(CODE_SIZE) \
|
483
|
-
template <> \
|
484
|
-
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
485
|
-
HammingComputer(const uint8_t* a) \
|
486
|
-
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
487
|
-
}
|
488
|
-
|
489
|
-
SPECIALIZED_HC(4);
|
490
|
-
SPECIALIZED_HC(8);
|
491
|
-
SPECIALIZED_HC(16);
|
492
|
-
SPECIALIZED_HC(20);
|
493
|
-
SPECIALIZED_HC(32);
|
494
|
-
SPECIALIZED_HC(64);
|
495
|
-
|
496
|
-
#undef SPECIALIZED_HC
|
497
|
-
|
498
408
|
/***************************************************************************
|
499
409
|
* generalized Hamming = number of bytes that are different between
|
500
410
|
* two codes.
|
@@ -206,7 +206,8 @@ typename C::T partition_fuzzy_median3(
|
|
206
206
|
assert(n_eq_1 <= n_eq);
|
207
207
|
}
|
208
208
|
|
209
|
-
int wp =
|
209
|
+
[[maybe_unused]] const int wp =
|
210
|
+
compress_array<C>(vals, ids, n, thresh, n_eq_1);
|
210
211
|
|
211
212
|
assert(wp == q);
|
212
213
|
if (q_out) {
|
@@ -750,8 +751,6 @@ typename C::T partition_fuzzy(
|
|
750
751
|
size_t q_min,
|
751
752
|
size_t q_max,
|
752
753
|
size_t* q_out) {
|
753
|
-
// the code below compiles and runs without AVX2 but it's slower than
|
754
|
-
// the scalar implementation
|
755
754
|
#ifdef __AVX2__
|
756
755
|
constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
|
757
756
|
if (is_uint16 && is_aligned_pointer(vals)) {
|
@@ -882,7 +881,7 @@ static const simd32uint8 shifts = simd32uint8::create<
|
|
882
881
|
// 2-bit accumulator: we can add only up to 3 elements
|
883
882
|
// on output we return 2*4-bit results
|
884
883
|
// preproc returns either an index in 0..7 or 0xffff
|
885
|
-
// that
|
884
|
+
// that yields a 0 when used in the table look-up
|
886
885
|
template <int N, class Preproc>
|
887
886
|
void compute_accu2(
|
888
887
|
const uint16_t*& data,
|
@@ -0,0 +1,77 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
// prefetches
|
11
|
+
|
12
|
+
#ifdef __AVX__
|
13
|
+
|
14
|
+
// AVX
|
15
|
+
|
16
|
+
#include <xmmintrin.h>
|
17
|
+
|
18
|
+
inline void prefetch_L1(const void* address) {
|
19
|
+
_mm_prefetch((const char*)address, _MM_HINT_T0);
|
20
|
+
}
|
21
|
+
inline void prefetch_L2(const void* address) {
|
22
|
+
_mm_prefetch((const char*)address, _MM_HINT_T1);
|
23
|
+
}
|
24
|
+
inline void prefetch_L3(const void* address) {
|
25
|
+
_mm_prefetch((const char*)address, _MM_HINT_T2);
|
26
|
+
}
|
27
|
+
|
28
|
+
#elif defined(__aarch64__)
|
29
|
+
|
30
|
+
// ARM64
|
31
|
+
|
32
|
+
#ifdef _MSC_VER
|
33
|
+
|
34
|
+
// todo: arm on MSVC
|
35
|
+
inline void prefetch_L1(const void* address) {}
|
36
|
+
inline void prefetch_L2(const void* address) {}
|
37
|
+
inline void prefetch_L3(const void* address) {}
|
38
|
+
|
39
|
+
#else
|
40
|
+
// arm on non-MSVC
|
41
|
+
|
42
|
+
inline void prefetch_L1(const void* address) {
|
43
|
+
__builtin_prefetch(address, 0, 3);
|
44
|
+
}
|
45
|
+
inline void prefetch_L2(const void* address) {
|
46
|
+
__builtin_prefetch(address, 0, 2);
|
47
|
+
}
|
48
|
+
inline void prefetch_L3(const void* address) {
|
49
|
+
__builtin_prefetch(address, 0, 1);
|
50
|
+
}
|
51
|
+
#endif
|
52
|
+
|
53
|
+
#else
|
54
|
+
|
55
|
+
// a generic platform
|
56
|
+
|
57
|
+
#ifdef _MSC_VER
|
58
|
+
|
59
|
+
inline void prefetch_L1(const void* address) {}
|
60
|
+
inline void prefetch_L2(const void* address) {}
|
61
|
+
inline void prefetch_L3(const void* address) {}
|
62
|
+
|
63
|
+
#else
|
64
|
+
|
65
|
+
inline void prefetch_L1(const void* address) {
|
66
|
+
__builtin_prefetch(address, 0, 3);
|
67
|
+
}
|
68
|
+
inline void prefetch_L2(const void* address) {
|
69
|
+
__builtin_prefetch(address, 0, 2);
|
70
|
+
}
|
71
|
+
inline void prefetch_L3(const void* address) {
|
72
|
+
__builtin_prefetch(address, 0, 1);
|
73
|
+
}
|
74
|
+
|
75
|
+
#endif
|
76
|
+
|
77
|
+
#endif
|
@@ -24,20 +24,6 @@ namespace quantize_lut {
|
|
24
24
|
|
25
25
|
namespace {
|
26
26
|
|
27
|
-
float round_uint8_and_mul(float* tab, size_t n) {
|
28
|
-
float max = 0;
|
29
|
-
for (int i = 0; i < n; i++) {
|
30
|
-
if (fabs(tab[i]) > max) {
|
31
|
-
max = fabs(tab[i]);
|
32
|
-
}
|
33
|
-
}
|
34
|
-
float multiplier = 127 / max;
|
35
|
-
for (int i = 0; i < n; i++) {
|
36
|
-
tab[i] = floorf(tab[i] * multiplier + 128);
|
37
|
-
}
|
38
|
-
return multiplier;
|
39
|
-
}
|
40
|
-
|
41
27
|
// there can be NaNs in tables, they should be ignored
|
42
28
|
float tab_min(const float* tab, size_t n) {
|
43
29
|
float min = HUGE_VAL;
|
@@ -202,12 +202,6 @@ struct simd16uint16 : simd256bit {
|
|
202
202
|
return simd16uint16(_mm256_cmpeq_epi16(lhs.i, rhs.i));
|
203
203
|
}
|
204
204
|
|
205
|
-
bool is_same(simd16uint16 other) const {
|
206
|
-
const __m256i pcmp = _mm256_cmpeq_epi16(i, other.i);
|
207
|
-
unsigned bitmask = _mm256_movemask_epi8(pcmp);
|
208
|
-
return (bitmask == 0xffffffffU);
|
209
|
-
}
|
210
|
-
|
211
205
|
simd16uint16 operator~() const {
|
212
206
|
return simd16uint16(_mm256_xor_si256(i, _mm256_set1_epi32(-1)));
|
213
207
|
}
|