faiss 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +1 -2
- data/vendor/faiss/faiss/Clustering.cpp +39 -22
- data/vendor/faiss/faiss/Clustering.h +40 -21
- data/vendor/faiss/faiss/IVFlib.cpp +26 -12
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +40 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
- data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
- data/vendor/faiss/faiss/IndexHNSW.h +62 -49
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
- data/vendor/faiss/faiss/IndexIVF.h +46 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
- data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +11 -11
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
- data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
- data/vendor/faiss/faiss/impl/HNSW.h +52 -30
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
- data/vendor/faiss/faiss/impl/io.cpp +23 -15
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
- data/vendor/faiss/faiss/index_factory.cpp +41 -20
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +147 -123
- data/vendor/faiss/faiss/utils/distances.h +86 -9
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +120 -7
- data/vendor/faiss/faiss/utils/utils.h +60 -20
- metadata +23 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
|
@@ -166,9 +166,12 @@ struct HammingComputer20 {
|
|
|
166
166
|
void set(const uint8_t* a8, int code_size) {
|
|
167
167
|
assert(code_size == 20);
|
|
168
168
|
const uint64_t* a = (uint64_t*)a8;
|
|
169
|
+
const uint32_t* b = (uint32_t*)a8;
|
|
169
170
|
a0 = a[0];
|
|
170
171
|
a1 = a[1];
|
|
171
|
-
|
|
172
|
+
// can't read a[2] since it is uint64_t, not uint32_t
|
|
173
|
+
// results in AddressSanitizer failure reading past end of array
|
|
174
|
+
a2 = b[4];
|
|
172
175
|
}
|
|
173
176
|
|
|
174
177
|
inline int hamming(const uint8_t* b8) const {
|
|
@@ -275,24 +278,31 @@ struct HammingComputerDefault {
|
|
|
275
278
|
len -= 8;
|
|
276
279
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
277
280
|
i++;
|
|
281
|
+
[[fallthrough]];
|
|
278
282
|
case 7:
|
|
279
283
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
280
284
|
i++;
|
|
285
|
+
[[fallthrough]];
|
|
281
286
|
case 6:
|
|
282
287
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
283
288
|
i++;
|
|
289
|
+
[[fallthrough]];
|
|
284
290
|
case 5:
|
|
285
291
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
286
292
|
i++;
|
|
293
|
+
[[fallthrough]];
|
|
287
294
|
case 4:
|
|
288
295
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
289
296
|
i++;
|
|
297
|
+
[[fallthrough]];
|
|
290
298
|
case 3:
|
|
291
299
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
292
300
|
i++;
|
|
301
|
+
[[fallthrough]];
|
|
293
302
|
case 2:
|
|
294
303
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
295
304
|
i++;
|
|
305
|
+
[[fallthrough]];
|
|
296
306
|
case 1:
|
|
297
307
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
298
308
|
i++;
|
|
@@ -302,20 +312,28 @@ struct HammingComputerDefault {
|
|
|
302
312
|
const uint8_t* a = a8 + 8 * quotient8;
|
|
303
313
|
const uint8_t* b = b8 + 8 * quotient8;
|
|
304
314
|
switch (remainder8) {
|
|
315
|
+
[[fallthrough]];
|
|
305
316
|
case 7:
|
|
306
317
|
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
|
318
|
+
[[fallthrough]];
|
|
307
319
|
case 6:
|
|
308
320
|
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
|
321
|
+
[[fallthrough]];
|
|
309
322
|
case 5:
|
|
310
323
|
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
|
324
|
+
[[fallthrough]];
|
|
311
325
|
case 4:
|
|
312
326
|
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
|
327
|
+
[[fallthrough]];
|
|
313
328
|
case 3:
|
|
314
329
|
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
|
330
|
+
[[fallthrough]];
|
|
315
331
|
case 2:
|
|
316
332
|
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
|
333
|
+
[[fallthrough]];
|
|
317
334
|
case 1:
|
|
318
335
|
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
|
336
|
+
[[fallthrough]];
|
|
319
337
|
default:
|
|
320
338
|
break;
|
|
321
339
|
}
|
|
@@ -329,93 +347,6 @@ struct HammingComputerDefault {
|
|
|
329
347
|
}
|
|
330
348
|
};
|
|
331
349
|
|
|
332
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
|
333
|
-
struct HammingComputerM8 {
|
|
334
|
-
const uint64_t* a;
|
|
335
|
-
int n;
|
|
336
|
-
|
|
337
|
-
HammingComputerM8() {}
|
|
338
|
-
|
|
339
|
-
HammingComputerM8(const uint8_t* a8, int code_size) {
|
|
340
|
-
set(a8, code_size);
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
void set(const uint8_t* a8, int code_size) {
|
|
344
|
-
assert(code_size % 8 == 0);
|
|
345
|
-
a = (uint64_t*)a8;
|
|
346
|
-
n = code_size / 8;
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
int hamming(const uint8_t* b8) const {
|
|
350
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
351
|
-
int accu = 0;
|
|
352
|
-
for (int i = 0; i < n; i++)
|
|
353
|
-
accu += popcount64(a[i] ^ b[i]);
|
|
354
|
-
return accu;
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
inline int get_code_size() const {
|
|
358
|
-
return n * 8;
|
|
359
|
-
}
|
|
360
|
-
};
|
|
361
|
-
|
|
362
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
|
363
|
-
struct HammingComputerM4 {
|
|
364
|
-
const uint32_t* a;
|
|
365
|
-
int n;
|
|
366
|
-
|
|
367
|
-
HammingComputerM4() {}
|
|
368
|
-
|
|
369
|
-
HammingComputerM4(const uint8_t* a4, int code_size) {
|
|
370
|
-
set(a4, code_size);
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
void set(const uint8_t* a4, int code_size) {
|
|
374
|
-
assert(code_size % 4 == 0);
|
|
375
|
-
a = (uint32_t*)a4;
|
|
376
|
-
n = code_size / 4;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
int hamming(const uint8_t* b8) const {
|
|
380
|
-
const uint32_t* b = (uint32_t*)b8;
|
|
381
|
-
int accu = 0;
|
|
382
|
-
for (int i = 0; i < n; i++)
|
|
383
|
-
accu += popcount64(a[i] ^ b[i]);
|
|
384
|
-
return accu;
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
inline int get_code_size() const {
|
|
388
|
-
return n * 4;
|
|
389
|
-
}
|
|
390
|
-
};
|
|
391
|
-
|
|
392
|
-
/***************************************************************************
|
|
393
|
-
* Equivalence with a template class when code size is known at compile time
|
|
394
|
-
**************************************************************************/
|
|
395
|
-
|
|
396
|
-
// default template
|
|
397
|
-
template <int CODE_SIZE>
|
|
398
|
-
struct HammingComputer : HammingComputerDefault {
|
|
399
|
-
HammingComputer(const uint8_t* a, int code_size)
|
|
400
|
-
: HammingComputerDefault(a, code_size) {}
|
|
401
|
-
};
|
|
402
|
-
|
|
403
|
-
#define SPECIALIZED_HC(CODE_SIZE) \
|
|
404
|
-
template <> \
|
|
405
|
-
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
|
406
|
-
HammingComputer(const uint8_t* a) \
|
|
407
|
-
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
SPECIALIZED_HC(4);
|
|
411
|
-
SPECIALIZED_HC(8);
|
|
412
|
-
SPECIALIZED_HC(16);
|
|
413
|
-
SPECIALIZED_HC(20);
|
|
414
|
-
SPECIALIZED_HC(32);
|
|
415
|
-
SPECIALIZED_HC(64);
|
|
416
|
-
|
|
417
|
-
#undef SPECIALIZED_HC
|
|
418
|
-
|
|
419
350
|
/***************************************************************************
|
|
420
351
|
* generalized Hamming = number of bytes that are different between
|
|
421
352
|
* two codes.
|
|
@@ -23,4 +23,62 @@
|
|
|
23
23
|
#include <faiss/utils/hamming_distance/generic-inl.h>
|
|
24
24
|
#endif
|
|
25
25
|
|
|
26
|
+
namespace faiss {
|
|
27
|
+
|
|
28
|
+
/***************************************************************************
|
|
29
|
+
* Equivalence with a template class when code size is known at compile time
|
|
30
|
+
**************************************************************************/
|
|
31
|
+
|
|
32
|
+
// default template
|
|
33
|
+
template <int CODE_SIZE>
|
|
34
|
+
struct HammingComputer : HammingComputerDefault {
|
|
35
|
+
HammingComputer(const uint8_t* a, int code_size)
|
|
36
|
+
: HammingComputerDefault(a, code_size) {}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
#define SPECIALIZED_HC(CODE_SIZE) \
|
|
40
|
+
template <> \
|
|
41
|
+
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
|
42
|
+
HammingComputer(const uint8_t* a) \
|
|
43
|
+
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
SPECIALIZED_HC(4);
|
|
47
|
+
SPECIALIZED_HC(8);
|
|
48
|
+
SPECIALIZED_HC(16);
|
|
49
|
+
SPECIALIZED_HC(20);
|
|
50
|
+
SPECIALIZED_HC(32);
|
|
51
|
+
SPECIALIZED_HC(64);
|
|
52
|
+
|
|
53
|
+
#undef SPECIALIZED_HC
|
|
54
|
+
|
|
55
|
+
/***************************************************************************
|
|
56
|
+
* Dispatching function that takes a code size and a consumer object
|
|
57
|
+
* the consumer object should contain a retun type t and a operation template
|
|
58
|
+
* function f() that must be called to perform the operation.
|
|
59
|
+
**************************************************************************/
|
|
60
|
+
|
|
61
|
+
template <class Consumer, class... Types>
|
|
62
|
+
typename Consumer::T dispatch_HammingComputer(
|
|
63
|
+
int code_size,
|
|
64
|
+
Consumer& consumer,
|
|
65
|
+
Types... args) {
|
|
66
|
+
switch (code_size) {
|
|
67
|
+
#define DISPATCH_HC(CODE_SIZE) \
|
|
68
|
+
case CODE_SIZE: \
|
|
69
|
+
return consumer.template f<HammingComputer##CODE_SIZE>(args...);
|
|
70
|
+
DISPATCH_HC(4);
|
|
71
|
+
DISPATCH_HC(8);
|
|
72
|
+
DISPATCH_HC(16);
|
|
73
|
+
DISPATCH_HC(20);
|
|
74
|
+
DISPATCH_HC(32);
|
|
75
|
+
DISPATCH_HC(64);
|
|
76
|
+
default:
|
|
77
|
+
return consumer.template f<HammingComputerDefault>(args...);
|
|
78
|
+
}
|
|
79
|
+
#undef DISPATCH_HC
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
} // namespace faiss
|
|
83
|
+
|
|
26
84
|
#endif
|
|
@@ -260,7 +260,6 @@ struct HammingComputer32 {
|
|
|
260
260
|
}
|
|
261
261
|
|
|
262
262
|
inline int hamming(const uint8_t* b8) const {
|
|
263
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
264
263
|
uint8x16_t b0 = vld1q_u8(b8);
|
|
265
264
|
uint8x16_t b1 = vld1q_u8(b8 + 16);
|
|
266
265
|
|
|
@@ -338,24 +337,31 @@ struct HammingComputerDefault {
|
|
|
338
337
|
len -= 8;
|
|
339
338
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
340
339
|
i++;
|
|
340
|
+
[[fallthrough]];
|
|
341
341
|
case 7:
|
|
342
342
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
343
343
|
i++;
|
|
344
|
+
[[fallthrough]];
|
|
344
345
|
case 6:
|
|
345
346
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
346
347
|
i++;
|
|
348
|
+
[[fallthrough]];
|
|
347
349
|
case 5:
|
|
348
350
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
349
351
|
i++;
|
|
352
|
+
[[fallthrough]];
|
|
350
353
|
case 4:
|
|
351
354
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
352
355
|
i++;
|
|
356
|
+
[[fallthrough]];
|
|
353
357
|
case 3:
|
|
354
358
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
355
359
|
i++;
|
|
360
|
+
[[fallthrough]];
|
|
356
361
|
case 2:
|
|
357
362
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
358
363
|
i++;
|
|
364
|
+
[[fallthrough]];
|
|
359
365
|
case 1:
|
|
360
366
|
accu += popcount64(a64[i] ^ b64[i]);
|
|
361
367
|
i++;
|
|
@@ -367,18 +373,25 @@ struct HammingComputerDefault {
|
|
|
367
373
|
switch (remainder8) {
|
|
368
374
|
case 7:
|
|
369
375
|
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
|
376
|
+
[[fallthrough]];
|
|
370
377
|
case 6:
|
|
371
378
|
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
|
379
|
+
[[fallthrough]];
|
|
372
380
|
case 5:
|
|
373
381
|
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
|
382
|
+
[[fallthrough]];
|
|
374
383
|
case 4:
|
|
375
384
|
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
|
385
|
+
[[fallthrough]];
|
|
376
386
|
case 3:
|
|
377
387
|
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
|
388
|
+
[[fallthrough]];
|
|
378
389
|
case 2:
|
|
379
390
|
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
|
391
|
+
[[fallthrough]];
|
|
380
392
|
case 1:
|
|
381
393
|
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
|
394
|
+
[[fallthrough]];
|
|
382
395
|
default:
|
|
383
396
|
break;
|
|
384
397
|
}
|
|
@@ -392,109 +405,6 @@ struct HammingComputerDefault {
|
|
|
392
405
|
}
|
|
393
406
|
};
|
|
394
407
|
|
|
395
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
|
396
|
-
struct HammingComputerM8 {
|
|
397
|
-
const uint64_t* a;
|
|
398
|
-
int n;
|
|
399
|
-
|
|
400
|
-
HammingComputerM8() {}
|
|
401
|
-
|
|
402
|
-
HammingComputerM8(const uint8_t* a8, int code_size) {
|
|
403
|
-
set(a8, code_size);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
void set(const uint8_t* a8, int code_size) {
|
|
407
|
-
assert(code_size % 8 == 0);
|
|
408
|
-
a = (uint64_t*)a8;
|
|
409
|
-
n = code_size / 8;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
int hamming(const uint8_t* b8) const {
|
|
413
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
414
|
-
int n4 = (n / 4) * 4;
|
|
415
|
-
int accu = 0;
|
|
416
|
-
|
|
417
|
-
int i = 0;
|
|
418
|
-
for (; i < n4; i += 4) {
|
|
419
|
-
accu += ::faiss::hamming<256>(a + i, b + i);
|
|
420
|
-
}
|
|
421
|
-
for (; i < n; i++) {
|
|
422
|
-
accu += popcount64(a[i] ^ b[i]);
|
|
423
|
-
}
|
|
424
|
-
return accu;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
inline int get_code_size() const {
|
|
428
|
-
return n * 8;
|
|
429
|
-
}
|
|
430
|
-
};
|
|
431
|
-
|
|
432
|
-
// more inefficient than HammingComputerDefault (obsolete)
|
|
433
|
-
struct HammingComputerM4 {
|
|
434
|
-
const uint32_t* a;
|
|
435
|
-
int n;
|
|
436
|
-
|
|
437
|
-
HammingComputerM4() {}
|
|
438
|
-
|
|
439
|
-
HammingComputerM4(const uint8_t* a4, int code_size) {
|
|
440
|
-
set(a4, code_size);
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
void set(const uint8_t* a4, int code_size) {
|
|
444
|
-
assert(code_size % 4 == 0);
|
|
445
|
-
a = (uint32_t*)a4;
|
|
446
|
-
n = code_size / 4;
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
int hamming(const uint8_t* b8) const {
|
|
450
|
-
const uint32_t* b = (uint32_t*)b8;
|
|
451
|
-
|
|
452
|
-
int n8 = (n / 8) * 8;
|
|
453
|
-
int accu = 0;
|
|
454
|
-
|
|
455
|
-
int i = 0;
|
|
456
|
-
for (; i < n8; i += 8) {
|
|
457
|
-
accu += ::faiss::hamming<256>(
|
|
458
|
-
(const uint64_t*)(a + i), (const uint64_t*)(b + i));
|
|
459
|
-
}
|
|
460
|
-
for (; i < n; i++) {
|
|
461
|
-
accu += popcount64(a[i] ^ b[i]);
|
|
462
|
-
}
|
|
463
|
-
return accu;
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
inline int get_code_size() const {
|
|
467
|
-
return n * 4;
|
|
468
|
-
}
|
|
469
|
-
};
|
|
470
|
-
|
|
471
|
-
/***************************************************************************
|
|
472
|
-
* Equivalence with a template class when code size is known at compile time
|
|
473
|
-
**************************************************************************/
|
|
474
|
-
|
|
475
|
-
// default template
|
|
476
|
-
template <int CODE_SIZE>
|
|
477
|
-
struct HammingComputer : HammingComputerDefault {
|
|
478
|
-
HammingComputer(const uint8_t* a, int code_size)
|
|
479
|
-
: HammingComputerDefault(a, code_size) {}
|
|
480
|
-
};
|
|
481
|
-
|
|
482
|
-
#define SPECIALIZED_HC(CODE_SIZE) \
|
|
483
|
-
template <> \
|
|
484
|
-
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
|
|
485
|
-
HammingComputer(const uint8_t* a) \
|
|
486
|
-
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
SPECIALIZED_HC(4);
|
|
490
|
-
SPECIALIZED_HC(8);
|
|
491
|
-
SPECIALIZED_HC(16);
|
|
492
|
-
SPECIALIZED_HC(20);
|
|
493
|
-
SPECIALIZED_HC(32);
|
|
494
|
-
SPECIALIZED_HC(64);
|
|
495
|
-
|
|
496
|
-
#undef SPECIALIZED_HC
|
|
497
|
-
|
|
498
408
|
/***************************************************************************
|
|
499
409
|
* generalized Hamming = number of bytes that are different between
|
|
500
410
|
* two codes.
|
|
@@ -206,7 +206,8 @@ typename C::T partition_fuzzy_median3(
|
|
|
206
206
|
assert(n_eq_1 <= n_eq);
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
-
int wp =
|
|
209
|
+
[[maybe_unused]] const int wp =
|
|
210
|
+
compress_array<C>(vals, ids, n, thresh, n_eq_1);
|
|
210
211
|
|
|
211
212
|
assert(wp == q);
|
|
212
213
|
if (q_out) {
|
|
@@ -750,8 +751,6 @@ typename C::T partition_fuzzy(
|
|
|
750
751
|
size_t q_min,
|
|
751
752
|
size_t q_max,
|
|
752
753
|
size_t* q_out) {
|
|
753
|
-
// the code below compiles and runs without AVX2 but it's slower than
|
|
754
|
-
// the scalar implementation
|
|
755
754
|
#ifdef __AVX2__
|
|
756
755
|
constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
|
|
757
756
|
if (is_uint16 && is_aligned_pointer(vals)) {
|
|
@@ -882,7 +881,7 @@ static const simd32uint8 shifts = simd32uint8::create<
|
|
|
882
881
|
// 2-bit accumulator: we can add only up to 3 elements
|
|
883
882
|
// on output we return 2*4-bit results
|
|
884
883
|
// preproc returns either an index in 0..7 or 0xffff
|
|
885
|
-
// that
|
|
884
|
+
// that yields a 0 when used in the table look-up
|
|
886
885
|
template <int N, class Preproc>
|
|
887
886
|
void compute_accu2(
|
|
888
887
|
const uint16_t*& data,
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
// prefetches
|
|
11
|
+
|
|
12
|
+
#ifdef __AVX__
|
|
13
|
+
|
|
14
|
+
// AVX
|
|
15
|
+
|
|
16
|
+
#include <xmmintrin.h>
|
|
17
|
+
|
|
18
|
+
inline void prefetch_L1(const void* address) {
|
|
19
|
+
_mm_prefetch((const char*)address, _MM_HINT_T0);
|
|
20
|
+
}
|
|
21
|
+
inline void prefetch_L2(const void* address) {
|
|
22
|
+
_mm_prefetch((const char*)address, _MM_HINT_T1);
|
|
23
|
+
}
|
|
24
|
+
inline void prefetch_L3(const void* address) {
|
|
25
|
+
_mm_prefetch((const char*)address, _MM_HINT_T2);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
#elif defined(__aarch64__)
|
|
29
|
+
|
|
30
|
+
// ARM64
|
|
31
|
+
|
|
32
|
+
#ifdef _MSC_VER
|
|
33
|
+
|
|
34
|
+
// todo: arm on MSVC
|
|
35
|
+
inline void prefetch_L1(const void* address) {}
|
|
36
|
+
inline void prefetch_L2(const void* address) {}
|
|
37
|
+
inline void prefetch_L3(const void* address) {}
|
|
38
|
+
|
|
39
|
+
#else
|
|
40
|
+
// arm on non-MSVC
|
|
41
|
+
|
|
42
|
+
inline void prefetch_L1(const void* address) {
|
|
43
|
+
__builtin_prefetch(address, 0, 3);
|
|
44
|
+
}
|
|
45
|
+
inline void prefetch_L2(const void* address) {
|
|
46
|
+
__builtin_prefetch(address, 0, 2);
|
|
47
|
+
}
|
|
48
|
+
inline void prefetch_L3(const void* address) {
|
|
49
|
+
__builtin_prefetch(address, 0, 1);
|
|
50
|
+
}
|
|
51
|
+
#endif
|
|
52
|
+
|
|
53
|
+
#else
|
|
54
|
+
|
|
55
|
+
// a generic platform
|
|
56
|
+
|
|
57
|
+
#ifdef _MSC_VER
|
|
58
|
+
|
|
59
|
+
inline void prefetch_L1(const void* address) {}
|
|
60
|
+
inline void prefetch_L2(const void* address) {}
|
|
61
|
+
inline void prefetch_L3(const void* address) {}
|
|
62
|
+
|
|
63
|
+
#else
|
|
64
|
+
|
|
65
|
+
inline void prefetch_L1(const void* address) {
|
|
66
|
+
__builtin_prefetch(address, 0, 3);
|
|
67
|
+
}
|
|
68
|
+
inline void prefetch_L2(const void* address) {
|
|
69
|
+
__builtin_prefetch(address, 0, 2);
|
|
70
|
+
}
|
|
71
|
+
inline void prefetch_L3(const void* address) {
|
|
72
|
+
__builtin_prefetch(address, 0, 1);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
#endif
|
|
76
|
+
|
|
77
|
+
#endif
|
|
@@ -24,20 +24,6 @@ namespace quantize_lut {
|
|
|
24
24
|
|
|
25
25
|
namespace {
|
|
26
26
|
|
|
27
|
-
float round_uint8_and_mul(float* tab, size_t n) {
|
|
28
|
-
float max = 0;
|
|
29
|
-
for (int i = 0; i < n; i++) {
|
|
30
|
-
if (fabs(tab[i]) > max) {
|
|
31
|
-
max = fabs(tab[i]);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
float multiplier = 127 / max;
|
|
35
|
-
for (int i = 0; i < n; i++) {
|
|
36
|
-
tab[i] = floorf(tab[i] * multiplier + 128);
|
|
37
|
-
}
|
|
38
|
-
return multiplier;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
27
|
// there can be NaNs in tables, they should be ignored
|
|
42
28
|
float tab_min(const float* tab, size_t n) {
|
|
43
29
|
float min = HUGE_VAL;
|
|
@@ -54,6 +54,37 @@ double RandomGenerator::rand_double() {
|
|
|
54
54
|
return mt() / double(mt.max());
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
SplitMix64RandomGenerator::SplitMix64RandomGenerator(int64_t seed)
|
|
58
|
+
: state{static_cast<uint64_t>(seed)} {}
|
|
59
|
+
|
|
60
|
+
int SplitMix64RandomGenerator::rand_int() {
|
|
61
|
+
return next() & 0x7fffffff;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
int64_t SplitMix64RandomGenerator::rand_int64() {
|
|
65
|
+
uint64_t value = next();
|
|
66
|
+
return static_cast<int64_t>(value & 0x7fffffffffffffffULL);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
int SplitMix64RandomGenerator::rand_int(int max) {
|
|
70
|
+
return next() % max;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
float SplitMix64RandomGenerator::rand_float() {
|
|
74
|
+
return next() / float(std::numeric_limits<uint64_t>::max());
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
double SplitMix64RandomGenerator::rand_double() {
|
|
78
|
+
return next() / double(std::numeric_limits<uint64_t>::max());
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
uint64_t SplitMix64RandomGenerator::next() {
|
|
82
|
+
uint64_t z = (state += 0x9e3779b97f4a7c15ULL);
|
|
83
|
+
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL;
|
|
84
|
+
z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL;
|
|
85
|
+
return z ^ (z >> 31);
|
|
86
|
+
}
|
|
87
|
+
|
|
57
88
|
/***********************************************************************
|
|
58
89
|
* Random functions in this C file only exist because Torch
|
|
59
90
|
* counterparts are slow and not multi-threaded. Typical use is for
|
|
@@ -162,6 +193,18 @@ void rand_perm(int* perm, size_t n, int64_t seed) {
|
|
|
162
193
|
}
|
|
163
194
|
}
|
|
164
195
|
|
|
196
|
+
void rand_perm_splitmix64(int* perm, size_t n, int64_t seed) {
|
|
197
|
+
for (size_t i = 0; i < n; i++)
|
|
198
|
+
perm[i] = i;
|
|
199
|
+
|
|
200
|
+
SplitMix64RandomGenerator rng(seed);
|
|
201
|
+
|
|
202
|
+
for (size_t i = 0; i + 1 < n; i++) {
|
|
203
|
+
int i2 = i + rng.rand_int(n - i);
|
|
204
|
+
std::swap(perm[i], perm[i2]);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
165
208
|
void byte_rand(uint8_t* x, size_t n, int64_t seed) {
|
|
166
209
|
// only try to parallelize on large enough arrays
|
|
167
210
|
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
@@ -43,6 +43,30 @@ struct RandomGenerator {
|
|
|
43
43
|
explicit RandomGenerator(int64_t seed = 1234);
|
|
44
44
|
};
|
|
45
45
|
|
|
46
|
+
/// fast random generator that cannot be used in multithreaded contexts.
|
|
47
|
+
/// based on https://prng.di.unimi.it/
|
|
48
|
+
struct SplitMix64RandomGenerator {
|
|
49
|
+
uint64_t state;
|
|
50
|
+
|
|
51
|
+
/// random positive integer
|
|
52
|
+
int rand_int();
|
|
53
|
+
|
|
54
|
+
/// random int64_t
|
|
55
|
+
int64_t rand_int64();
|
|
56
|
+
|
|
57
|
+
/// generate random integer between 0 and max-1
|
|
58
|
+
int rand_int(int max);
|
|
59
|
+
|
|
60
|
+
/// between 0 and 1
|
|
61
|
+
float rand_float();
|
|
62
|
+
|
|
63
|
+
double rand_double();
|
|
64
|
+
|
|
65
|
+
explicit SplitMix64RandomGenerator(int64_t seed = 1234);
|
|
66
|
+
|
|
67
|
+
uint64_t next();
|
|
68
|
+
};
|
|
69
|
+
|
|
46
70
|
/* Generate an array of uniform random floats / multi-threaded implementation */
|
|
47
71
|
void float_rand(float* x, size_t n, int64_t seed);
|
|
48
72
|
void float_randn(float* x, size_t n, int64_t seed);
|
|
@@ -53,6 +77,7 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
|
|
|
53
77
|
|
|
54
78
|
/* random permutation */
|
|
55
79
|
void rand_perm(int* perm, size_t n, int64_t seed);
|
|
80
|
+
void rand_perm_splitmix64(int* perm, size_t n, int64_t seed);
|
|
56
81
|
|
|
57
82
|
/* Random set of vectors with intrinsic dimensionality 10 that is harder to
|
|
58
83
|
* index than a subspace of dim 10 but easier than uniform data in dimension d
|
|
@@ -14,7 +14,12 @@
|
|
|
14
14
|
* functions.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
#
|
|
17
|
+
#if defined(__AVX512F__)
|
|
18
|
+
|
|
19
|
+
#include <faiss/utils/simdlib_avx2.h>
|
|
20
|
+
#include <faiss/utils/simdlib_avx512.h>
|
|
21
|
+
|
|
22
|
+
#elif defined(__AVX2__)
|
|
18
23
|
|
|
19
24
|
#include <faiss/utils/simdlib_avx2.h>
|
|
20
25
|
|
|
@@ -22,6 +27,10 @@
|
|
|
22
27
|
|
|
23
28
|
#include <faiss/utils/simdlib_neon.h>
|
|
24
29
|
|
|
30
|
+
#elif defined(__PPC64__)
|
|
31
|
+
|
|
32
|
+
#include <faiss/utils/simdlib_ppc64.h>
|
|
33
|
+
|
|
25
34
|
#else
|
|
26
35
|
|
|
27
36
|
// emulated = all operations are implemented as scalars
|
|
@@ -202,12 +202,6 @@ struct simd16uint16 : simd256bit {
|
|
|
202
202
|
return simd16uint16(_mm256_cmpeq_epi16(lhs.i, rhs.i));
|
|
203
203
|
}
|
|
204
204
|
|
|
205
|
-
bool is_same(simd16uint16 other) const {
|
|
206
|
-
const __m256i pcmp = _mm256_cmpeq_epi16(i, other.i);
|
|
207
|
-
unsigned bitmask = _mm256_movemask_epi8(pcmp);
|
|
208
|
-
return (bitmask == 0xffffffffU);
|
|
209
|
-
}
|
|
210
|
-
|
|
211
205
|
simd16uint16 operator~() const {
|
|
212
206
|
return simd16uint16(_mm256_xor_si256(i, _mm256_set1_epi32(-1)));
|
|
213
207
|
}
|