faiss 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +5 -6
- data/ext/faiss/index_binary.cpp +76 -17
- data/ext/faiss/{index.cpp → index_rb.cpp} +108 -35
- data/ext/faiss/kmeans.cpp +12 -9
- data/ext/faiss/numo.hpp +11 -9
- data/ext/faiss/pca_matrix.cpp +10 -8
- data/ext/faiss/product_quantizer.cpp +14 -12
- data/ext/faiss/{utils.cpp → utils_rb.cpp} +10 -3
- data/ext/faiss/{utils.h → utils_rb.h} +6 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +130 -11
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +59 -10
- data/vendor/faiss/faiss/Clustering.h +12 -0
- data/vendor/faiss/faiss/IVFlib.cpp +31 -28
- data/vendor/faiss/faiss/Index.cpp +20 -8
- data/vendor/faiss/faiss/Index.h +25 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +19 -24
- data/vendor/faiss/faiss/IndexBinary.cpp +1 -0
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +45 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +35 -22
- data/vendor/faiss/faiss/IndexFastScan.h +10 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +193 -136
- data/vendor/faiss/faiss/IndexFlat.h +16 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +46 -22
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +24 -50
- data/vendor/faiss/faiss/IndexHNSW.h +14 -12
- data/vendor/faiss/faiss/IndexIDMap.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVF.cpp +76 -49
- data/vendor/faiss/faiss/IndexIVF.h +14 -4
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +11 -8
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +25 -14
- data/vendor/faiss/faiss/IndexIVFFastScan.h +26 -22
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +10 -61
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +39 -111
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +89 -147
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +37 -5
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +42 -30
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -2
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +246 -97
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +32 -29
- data/vendor/faiss/faiss/IndexLSH.cpp +8 -6
- data/vendor/faiss/faiss/IndexLattice.cpp +29 -24
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +2 -1
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +1 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +19 -10
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +26 -13
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -2
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +132 -78
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +14 -12
- data/vendor/faiss/faiss/IndexRefine.cpp +0 -30
- data/vendor/faiss/faiss/IndexShards.cpp +3 -4
- data/vendor/faiss/faiss/MetricType.h +16 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +120 -0
- data/vendor/faiss/faiss/VectorTransform.h +23 -0
- data/vendor/faiss/faiss/clone_index.cpp +7 -4
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +1 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +37 -11
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -28
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +4 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/FaissAssert.h +60 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +25 -34
- data/vendor/faiss/faiss/impl/HNSW.h +8 -6
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +34 -27
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/NSG.cpp +6 -5
- data/vendor/faiss/faiss/impl/NSG.h +17 -7
- data/vendor/faiss/faiss/impl/Panorama.cpp +53 -46
- data/vendor/faiss/faiss/impl/Panorama.h +22 -6
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +16 -5
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +70 -58
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +92 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +93 -31
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +12 -28
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +14 -9
- data/vendor/faiss/faiss/impl/ResultHandler.h +131 -50
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +67 -2358
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -2
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +69 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +158 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +829 -471
- data/vendor/faiss/faiss/impl/index_read_utils.h +0 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +17 -8
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +47 -20
- data/vendor/faiss/faiss/impl/mapped_io.cpp +9 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +7 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +11 -3
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +19 -13
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +29 -21
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.cpp} +42 -215
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.cpp} +68 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +141 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +23 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -144
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +9 -6
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +136 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +280 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +164 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +455 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +430 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +329 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +467 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +203 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +42 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +139 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +18 -18
- data/vendor/faiss/faiss/index_factory.cpp +35 -16
- data/vendor/faiss/faiss/index_io.h +29 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +7 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +9 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +9 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +46 -0
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +10 -7
- data/vendor/faiss/faiss/utils/distances.cpp +141 -23
- data/vendor/faiss/faiss/utils/distances.h +98 -0
- data/vendor/faiss/faiss/utils/distances_dispatch.h +170 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +74 -3511
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +164 -157
- data/vendor/faiss/faiss/utils/extra_distances.cpp +52 -95
- data/vendor/faiss/faiss/utils/extra_distances.h +47 -1
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -1
- data/vendor/faiss/faiss/utils/partitioning.cpp +1 -1
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/rabitq_simd.h +260 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +150 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +568 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +153 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1185 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1092 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +391 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +322 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +91 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +12 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +69 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +6 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +4 -4
- data/vendor/faiss/faiss/utils/utils.cpp +16 -9
- metadata +47 -18
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <string>
|
|
12
|
+
|
|
13
|
+
#include <faiss/impl/platform_macros.h>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
#define COMPILE_SIMD_NONE
|
|
18
|
+
|
|
19
|
+
enum class SIMDLevel {
|
|
20
|
+
NONE,
|
|
21
|
+
// x86
|
|
22
|
+
AVX2,
|
|
23
|
+
AVX512,
|
|
24
|
+
AVX512_SPR, // Sapphire Rapids: AVX512 + BF16 + FP16 + VNNI
|
|
25
|
+
// arm & aarch64
|
|
26
|
+
ARM_NEON,
|
|
27
|
+
ARM_SVE, // Scalable Vector Extension (ARMv8.2+)
|
|
28
|
+
|
|
29
|
+
COUNT
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/// Number of float32 lanes for a given SIMD level.
|
|
33
|
+
template <SIMDLevel SL>
|
|
34
|
+
constexpr int simd_width() {
|
|
35
|
+
if constexpr (SL == SIMDLevel::AVX512 || SL == SIMDLevel::AVX512_SPR)
|
|
36
|
+
return 16;
|
|
37
|
+
else if constexpr (SL == SIMDLevel::AVX2 || SL == SIMDLevel::ARM_NEON)
|
|
38
|
+
return 8;
|
|
39
|
+
else
|
|
40
|
+
return 1;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/// Convert SIMDLevel to string. Throws FaissException for invalid level.
|
|
44
|
+
std::string to_string(SIMDLevel level);
|
|
45
|
+
|
|
46
|
+
/// Parse string to SIMDLevel. Throws FaissException for invalid strings.
|
|
47
|
+
SIMDLevel to_simd_level(const std::string& level_str);
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Current SIMD configuration.
|
|
51
|
+
*
|
|
52
|
+
* This class provides a uniform API for querying and setting the SIMD level,
|
|
53
|
+
* regardless of whether faiss was built with Dynamic Dispatch (DD) or static
|
|
54
|
+
* SIMD selection.
|
|
55
|
+
*
|
|
56
|
+
* In DD mode:
|
|
57
|
+
* - get_level() returns the runtime-detected or user-set level
|
|
58
|
+
* - set_level() changes the runtime level (if level is supported)
|
|
59
|
+
* - supported_simd_levels() returns bitmask of all compiled-in levels
|
|
60
|
+
*
|
|
61
|
+
* In static mode:
|
|
62
|
+
* - get_level() returns the compiled-in level
|
|
63
|
+
* - set_level() succeeds only if level matches compiled-in level
|
|
64
|
+
* - supported_simd_levels() returns bitmask with single level
|
|
65
|
+
*/
|
|
66
|
+
struct FAISS_API SIMDConfig {
|
|
67
|
+
static SIMDLevel level;
|
|
68
|
+
|
|
69
|
+
/// Returns bitmask of supported SIMD levels (1 << SIMDLevel).
|
|
70
|
+
static uint64_t supported_simd_levels;
|
|
71
|
+
|
|
72
|
+
static SIMDLevel auto_detect_simd_level();
|
|
73
|
+
|
|
74
|
+
SIMDConfig(const char** faiss_simd_level_env = nullptr);
|
|
75
|
+
|
|
76
|
+
/// Set the SIMD level. Throws FaissException if level is not supported.
|
|
77
|
+
static void set_level(SIMDLevel level);
|
|
78
|
+
static SIMDLevel get_level();
|
|
79
|
+
static std::string get_level_name();
|
|
80
|
+
|
|
81
|
+
/// Check if a SIMD level is available (compiled in).
|
|
82
|
+
static bool is_simd_level_available(SIMDLevel level);
|
|
83
|
+
|
|
84
|
+
/// Returns the SIMD level via the dispatch mechanism.
|
|
85
|
+
/// In DD mode, uses DISPATCH_SIMDLevel internally.
|
|
86
|
+
/// In static mode, returns the compiled-in level.
|
|
87
|
+
/// Useful for verification: get_level() == get_dispatched_level()
|
|
88
|
+
static SIMDLevel get_dispatched_level();
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
} // namespace faiss
|
|
@@ -794,10 +794,21 @@ inline simd8float32 getlow128(simd8float32 a, simd8float32 b) {
|
|
|
794
794
|
return simd8float32(_mm256_permute2f128_ps(a.f, b.f, 0 | 2 << 4));
|
|
795
795
|
}
|
|
796
796
|
|
|
797
|
-
inline simd8float32 gethigh128(simd8float32 a, simd8float32 b) {
|
|
797
|
+
inline simd8float32 gethigh128(const simd8float32& a, const simd8float32& b) {
|
|
798
798
|
return simd8float32(_mm256_permute2f128_ps(a.f, b.f, 1 | 3 << 4));
|
|
799
799
|
}
|
|
800
800
|
|
|
801
|
+
// horizontal add: sum all 8 floats in the register
|
|
802
|
+
inline float horizontal_add(const simd8float32& a) {
|
|
803
|
+
__m128 sum = _mm_add_ps(
|
|
804
|
+
_mm256_castps256_ps128(a.f), _mm256_extractf128_ps(a.f, 1));
|
|
805
|
+
__m128 v0 = _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2));
|
|
806
|
+
__m128 v1 = _mm_add_ps(sum, v0);
|
|
807
|
+
__m128 v2 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(0, 0, 0, 1));
|
|
808
|
+
__m128 v3 = _mm_add_ps(v1, v2);
|
|
809
|
+
return _mm_cvtss_f32(v3);
|
|
810
|
+
}
|
|
811
|
+
|
|
801
812
|
} // namespace
|
|
802
813
|
|
|
803
814
|
} // namespace faiss
|
|
@@ -293,4 +293,73 @@ struct simd64uint8 : simd512bit {
|
|
|
293
293
|
}
|
|
294
294
|
};
|
|
295
295
|
|
|
296
|
+
/// vector of 16 32-bit floats
|
|
297
|
+
struct simd16float32 : simd512bit {
|
|
298
|
+
simd16float32() {}
|
|
299
|
+
|
|
300
|
+
explicit simd16float32(__m512 f) : simd512bit(f) {}
|
|
301
|
+
|
|
302
|
+
explicit simd16float32(float x) : simd512bit(_mm512_set1_ps(x)) {}
|
|
303
|
+
|
|
304
|
+
explicit simd16float32(const float* x) : simd512bit(_mm512_loadu_ps(x)) {}
|
|
305
|
+
|
|
306
|
+
void clear() {
|
|
307
|
+
f = _mm512_setzero_ps();
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
void storeu(float* ptr) const {
|
|
311
|
+
_mm512_storeu_ps(ptr, f);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
void loadu(const float* ptr) {
|
|
315
|
+
f = _mm512_loadu_ps(ptr);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
void store(float* ptr) const {
|
|
319
|
+
_mm512_storeu_ps(ptr, f);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
simd16float32 operator*(const simd16float32& other) const {
|
|
323
|
+
return simd16float32(_mm512_mul_ps(f, other.f));
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
simd16float32 operator+(const simd16float32& other) const {
|
|
327
|
+
return simd16float32(_mm512_add_ps(f, other.f));
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
simd16float32 operator-(const simd16float32& other) const {
|
|
331
|
+
return simd16float32(_mm512_sub_ps(f, other.f));
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
simd16float32& operator+=(const simd16float32& other) {
|
|
335
|
+
f = _mm512_add_ps(f, other.f);
|
|
336
|
+
return *this;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
std::string tostring() const {
|
|
340
|
+
float tab[16];
|
|
341
|
+
storeu(tab);
|
|
342
|
+
char res[1000];
|
|
343
|
+
char* ptr = res;
|
|
344
|
+
for (int i = 0; i < 16; i++) {
|
|
345
|
+
ptr += sprintf(ptr, "%g,", tab[i]);
|
|
346
|
+
}
|
|
347
|
+
ptr[-1] = 0;
|
|
348
|
+
return std::string(res);
|
|
349
|
+
}
|
|
350
|
+
};
|
|
351
|
+
|
|
352
|
+
// compute a * b + c
|
|
353
|
+
inline simd16float32 fmadd(
|
|
354
|
+
const simd16float32& a,
|
|
355
|
+
const simd16float32& b,
|
|
356
|
+
const simd16float32& c) {
|
|
357
|
+
return simd16float32(_mm512_fmadd_ps(a.f, b.f, c.f));
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// horizontal add: sum all 16 floats in the register
|
|
361
|
+
inline float horizontal_add(const simd16float32& a) {
|
|
362
|
+
return _mm512_reduce_add_ps(a.f);
|
|
363
|
+
}
|
|
364
|
+
|
|
296
365
|
} // namespace faiss
|
|
@@ -1355,6 +1355,12 @@ simd8float32 gethigh128(const simd8float32& a, const simd8float32& b) {
|
|
|
1355
1355
|
return simd8float32{float32x4x2_t{a.data.val[1], b.data.val[1]}};
|
|
1356
1356
|
}
|
|
1357
1357
|
|
|
1358
|
+
// horizontal add: sum all 8 floats in the register
|
|
1359
|
+
inline float horizontal_add(const simd8float32& a) {
|
|
1360
|
+
float32x4_t sum = vaddq_f32(a.data.val[0], a.data.val[1]);
|
|
1361
|
+
return vaddvq_f32(sum);
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1358
1364
|
} // namespace
|
|
1359
1365
|
|
|
1360
1366
|
} // namespace faiss
|
|
@@ -795,15 +795,15 @@ void hashtable_int64_to_int64_lookup(
|
|
|
795
795
|
#pragma omp parallel for
|
|
796
796
|
for (int64_t i = 0; i < n; i++) {
|
|
797
797
|
int64_t k = keys[i];
|
|
798
|
-
int64_t
|
|
799
|
-
size_t slot =
|
|
798
|
+
int64_t hashValue = hash_function(k) & mask;
|
|
799
|
+
size_t slot = hashValue;
|
|
800
800
|
|
|
801
801
|
if (tab[2 * slot] == -1) { // not in table
|
|
802
802
|
vals[i] = -1;
|
|
803
803
|
} else if (tab[2 * slot] == k) { // found!
|
|
804
804
|
vals[i] = tab[2 * slot + 1];
|
|
805
805
|
} else { // need to search in [k0, k1)
|
|
806
|
-
size_t bucket =
|
|
806
|
+
size_t bucket = hashValue >> (log2_capacity - log2_nbucket);
|
|
807
807
|
size_t k0 = bucket << (log2_capacity - log2_nbucket);
|
|
808
808
|
size_t k1 = (bucket + 1) << (log2_capacity - log2_nbucket);
|
|
809
809
|
for (;;) {
|
|
@@ -815,7 +815,7 @@ void hashtable_int64_to_int64_lookup(
|
|
|
815
815
|
if (slot == k1) {
|
|
816
816
|
slot = k0;
|
|
817
817
|
}
|
|
818
|
-
if (slot ==
|
|
818
|
+
if (slot == hashValue) { // bucket is full and not found
|
|
819
819
|
vals[i] = -1;
|
|
820
820
|
break;
|
|
821
821
|
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
10
|
#include <faiss/Index.h>
|
|
11
|
+
#include <faiss/utils/simd_levels.h>
|
|
11
12
|
#include <faiss/utils/utils.h>
|
|
12
13
|
|
|
13
14
|
#include <cassert>
|
|
@@ -115,16 +116,22 @@ std::string get_compile_options() {
|
|
|
115
116
|
options += "OPTIMIZE ";
|
|
116
117
|
#endif
|
|
117
118
|
|
|
118
|
-
#ifdef
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
119
|
+
#ifdef FAISS_ENABLE_DD
|
|
120
|
+
// Dynamic Dispatch mode: report DD and all available SIMD levels
|
|
121
|
+
options += "DD ";
|
|
122
|
+
int supported = SIMDConfig::supported_simd_levels;
|
|
123
|
+
for (int i = 0; i < static_cast<int>(SIMDLevel::COUNT); ++i) {
|
|
124
|
+
auto level = static_cast<SIMDLevel>(i);
|
|
125
|
+
if ((supported & (1 << i)) && level != SIMDLevel::NONE) {
|
|
126
|
+
options += to_string(level) + " ";
|
|
127
|
+
}
|
|
128
|
+
}
|
|
126
129
|
#else
|
|
127
|
-
|
|
130
|
+
// Static mode: report the compiled-in SIMD level
|
|
131
|
+
SIMDLevel level = SIMDConfig::get_level();
|
|
132
|
+
if (level != SIMDLevel::NONE) {
|
|
133
|
+
options += to_string(level) + " ";
|
|
134
|
+
}
|
|
128
135
|
#endif
|
|
129
136
|
|
|
130
137
|
#ifdef FAISS_ENABLE_SVS
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: faiss
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -15,28 +15,28 @@ dependencies:
|
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version:
|
|
18
|
+
version: 4.11.5
|
|
19
19
|
type: :runtime
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version:
|
|
25
|
+
version: 4.11.5
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
|
-
name: numo-narray
|
|
27
|
+
name: numo-narray-alt
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
29
29
|
requirements:
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '0'
|
|
32
|
+
version: '0.10'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '0'
|
|
39
|
+
version: '0.10'
|
|
40
40
|
email: andrew@ankane.org
|
|
41
41
|
executables: []
|
|
42
42
|
extensions:
|
|
@@ -48,14 +48,14 @@ files:
|
|
|
48
48
|
- README.md
|
|
49
49
|
- ext/faiss/ext.cpp
|
|
50
50
|
- ext/faiss/extconf.rb
|
|
51
|
-
- ext/faiss/index.cpp
|
|
52
51
|
- ext/faiss/index_binary.cpp
|
|
52
|
+
- ext/faiss/index_rb.cpp
|
|
53
53
|
- ext/faiss/kmeans.cpp
|
|
54
54
|
- ext/faiss/numo.hpp
|
|
55
55
|
- ext/faiss/pca_matrix.cpp
|
|
56
56
|
- ext/faiss/product_quantizer.cpp
|
|
57
|
-
- ext/faiss/
|
|
58
|
-
- ext/faiss/
|
|
57
|
+
- ext/faiss/utils_rb.cpp
|
|
58
|
+
- ext/faiss/utils_rb.h
|
|
59
59
|
- lib/faiss.rb
|
|
60
60
|
- lib/faiss/version.rb
|
|
61
61
|
- vendor/faiss/LICENSE
|
|
@@ -165,8 +165,6 @@ files:
|
|
|
165
165
|
- vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h
|
|
166
166
|
- vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h
|
|
167
167
|
- vendor/faiss/faiss/cppcontrib/detail/UintReader.h
|
|
168
|
-
- vendor/faiss/faiss/cppcontrib/factory_tools.cpp
|
|
169
|
-
- vendor/faiss/faiss/cppcontrib/factory_tools.h
|
|
170
168
|
- vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h
|
|
171
169
|
- vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h
|
|
172
170
|
- vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h
|
|
@@ -175,6 +173,8 @@ files:
|
|
|
175
173
|
- vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h
|
|
176
174
|
- vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h
|
|
177
175
|
- vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h
|
|
176
|
+
- vendor/faiss/faiss/factory_tools.cpp
|
|
177
|
+
- vendor/faiss/faiss/factory_tools.h
|
|
178
178
|
- vendor/faiss/faiss/gpu/GpuAutoTune.cpp
|
|
179
179
|
- vendor/faiss/faiss/gpu/GpuAutoTune.h
|
|
180
180
|
- vendor/faiss/faiss/gpu/GpuCloner.cpp
|
|
@@ -231,8 +231,12 @@ files:
|
|
|
231
231
|
- vendor/faiss/faiss/impl/AdditiveQuantizer.h
|
|
232
232
|
- vendor/faiss/faiss/impl/AuxIndexStructures.cpp
|
|
233
233
|
- vendor/faiss/faiss/impl/AuxIndexStructures.h
|
|
234
|
+
- vendor/faiss/faiss/impl/ClusteringInitialization.cpp
|
|
235
|
+
- vendor/faiss/faiss/impl/ClusteringInitialization.h
|
|
234
236
|
- vendor/faiss/faiss/impl/CodePacker.cpp
|
|
235
237
|
- vendor/faiss/faiss/impl/CodePacker.h
|
|
238
|
+
- vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp
|
|
239
|
+
- vendor/faiss/faiss/impl/CodePackerRaBitQ.h
|
|
236
240
|
- vendor/faiss/faiss/impl/DistanceComputer.h
|
|
237
241
|
- vendor/faiss/faiss/impl/FaissAssert.h
|
|
238
242
|
- vendor/faiss/faiss/impl/FaissException.cpp
|
|
@@ -276,11 +280,9 @@ files:
|
|
|
276
280
|
- vendor/faiss/faiss/impl/ScalarQuantizer.h
|
|
277
281
|
- vendor/faiss/faiss/impl/ThreadedIndex-inl.h
|
|
278
282
|
- vendor/faiss/faiss/impl/ThreadedIndex.h
|
|
279
|
-
- vendor/faiss/faiss/impl/
|
|
280
|
-
- vendor/faiss/faiss/impl/
|
|
281
|
-
- vendor/faiss/faiss/impl/
|
|
282
|
-
- vendor/faiss/faiss/impl/code_distance/code_distance-sve.h
|
|
283
|
-
- vendor/faiss/faiss/impl/code_distance/code_distance.h
|
|
283
|
+
- vendor/faiss/faiss/impl/VisitedTable.cpp
|
|
284
|
+
- vendor/faiss/faiss/impl/VisitedTable.h
|
|
285
|
+
- vendor/faiss/faiss/impl/expanded_scanners.h
|
|
284
286
|
- vendor/faiss/faiss/impl/index_read.cpp
|
|
285
287
|
- vendor/faiss/faiss/impl/index_read_utils.h
|
|
286
288
|
- vendor/faiss/faiss/impl/index_write.cpp
|
|
@@ -299,8 +301,25 @@ files:
|
|
|
299
301
|
- vendor/faiss/faiss/impl/pq4_fast_scan.h
|
|
300
302
|
- vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp
|
|
301
303
|
- vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp
|
|
304
|
+
- vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-avx2.cpp
|
|
305
|
+
- vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-avx512.cpp
|
|
306
|
+
- vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp
|
|
307
|
+
- vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h
|
|
308
|
+
- vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp
|
|
302
309
|
- vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp
|
|
303
310
|
- vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h
|
|
311
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/codecs.h
|
|
312
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h
|
|
313
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h
|
|
314
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/scanners.h
|
|
315
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/similarities.h
|
|
316
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp
|
|
317
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp
|
|
318
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h
|
|
319
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp
|
|
320
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/training.cpp
|
|
321
|
+
- vendor/faiss/faiss/impl/scalar_quantizer/training.h
|
|
322
|
+
- vendor/faiss/faiss/impl/simd_dispatch.h
|
|
304
323
|
- vendor/faiss/faiss/impl/simd_result_handlers.h
|
|
305
324
|
- vendor/faiss/faiss/impl/svs_io.cpp
|
|
306
325
|
- vendor/faiss/faiss/impl/svs_io.h
|
|
@@ -345,6 +364,7 @@ files:
|
|
|
345
364
|
- vendor/faiss/faiss/utils/bf16.h
|
|
346
365
|
- vendor/faiss/faiss/utils/distances.cpp
|
|
347
366
|
- vendor/faiss/faiss/utils/distances.h
|
|
367
|
+
- vendor/faiss/faiss/utils/distances_dispatch.h
|
|
348
368
|
- vendor/faiss/faiss/utils/distances_fused/avx512.cpp
|
|
349
369
|
- vendor/faiss/faiss/utils/distances_fused/avx512.h
|
|
350
370
|
- vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp
|
|
@@ -371,12 +391,21 @@ files:
|
|
|
371
391
|
- vendor/faiss/faiss/utils/ordered_key_value.h
|
|
372
392
|
- vendor/faiss/faiss/utils/partitioning.cpp
|
|
373
393
|
- vendor/faiss/faiss/utils/partitioning.h
|
|
394
|
+
- vendor/faiss/faiss/utils/pq_code_distance.h
|
|
374
395
|
- vendor/faiss/faiss/utils/prefetch.h
|
|
375
396
|
- vendor/faiss/faiss/utils/quantize_lut.cpp
|
|
376
397
|
- vendor/faiss/faiss/utils/quantize_lut.h
|
|
377
398
|
- vendor/faiss/faiss/utils/rabitq_simd.h
|
|
378
399
|
- vendor/faiss/faiss/utils/random.cpp
|
|
379
400
|
- vendor/faiss/faiss/utils/random.h
|
|
401
|
+
- vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp
|
|
402
|
+
- vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp
|
|
403
|
+
- vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h
|
|
404
|
+
- vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp
|
|
405
|
+
- vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp
|
|
406
|
+
- vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h
|
|
407
|
+
- vendor/faiss/faiss/utils/simd_levels.cpp
|
|
408
|
+
- vendor/faiss/faiss/utils/simd_levels.h
|
|
380
409
|
- vendor/faiss/faiss/utils/simdlib.h
|
|
381
410
|
- vendor/faiss/faiss/utils/simdlib_avx2.h
|
|
382
411
|
- vendor/faiss/faiss/utils/simdlib_avx512.h
|
|
@@ -400,14 +429,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
400
429
|
requirements:
|
|
401
430
|
- - ">="
|
|
402
431
|
- !ruby/object:Gem::Version
|
|
403
|
-
version: '3.
|
|
432
|
+
version: '3.3'
|
|
404
433
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
405
434
|
requirements:
|
|
406
435
|
- - ">="
|
|
407
436
|
- !ruby/object:Gem::Version
|
|
408
437
|
version: '0'
|
|
409
438
|
requirements: []
|
|
410
|
-
rubygems_version: 4.0.
|
|
439
|
+
rubygems_version: 4.0.6
|
|
411
440
|
specification_version: 4
|
|
412
441
|
summary: Efficient similarity search and clustering for Ruby
|
|
413
442
|
test_files: []
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
#pragma once
|
|
9
|
-
|
|
10
|
-
#include <cstddef>
|
|
11
|
-
#include <cstdint>
|
|
12
|
-
|
|
13
|
-
namespace faiss {
|
|
14
|
-
|
|
15
|
-
/// Returns the distance to a single code.
|
|
16
|
-
template <typename PQDecoderT>
|
|
17
|
-
inline float distance_single_code_generic(
|
|
18
|
-
// number of subquantizers
|
|
19
|
-
const size_t M,
|
|
20
|
-
// number of bits per quantization index
|
|
21
|
-
const size_t nbits,
|
|
22
|
-
// precomputed distances, layout (M, ksub)
|
|
23
|
-
const float* sim_table,
|
|
24
|
-
// the code
|
|
25
|
-
const uint8_t* code) {
|
|
26
|
-
PQDecoderT decoder(code, nbits);
|
|
27
|
-
const size_t ksub = 1 << nbits;
|
|
28
|
-
|
|
29
|
-
const float* tab = sim_table;
|
|
30
|
-
float result = 0;
|
|
31
|
-
|
|
32
|
-
for (size_t m = 0; m < M; m++) {
|
|
33
|
-
result += tab[decoder.decode()];
|
|
34
|
-
tab += ksub;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
return result;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/// Combines 4 operations of distance_single_code()
|
|
41
|
-
/// General-purpose version.
|
|
42
|
-
template <typename PQDecoderT>
|
|
43
|
-
inline void distance_four_codes_generic(
|
|
44
|
-
// number of subquantizers
|
|
45
|
-
const size_t M,
|
|
46
|
-
// number of bits per quantization index
|
|
47
|
-
const size_t nbits,
|
|
48
|
-
// precomputed distances, layout (M, ksub)
|
|
49
|
-
const float* sim_table,
|
|
50
|
-
// codes
|
|
51
|
-
const uint8_t* __restrict code0,
|
|
52
|
-
const uint8_t* __restrict code1,
|
|
53
|
-
const uint8_t* __restrict code2,
|
|
54
|
-
const uint8_t* __restrict code3,
|
|
55
|
-
// computed distances
|
|
56
|
-
float& result0,
|
|
57
|
-
float& result1,
|
|
58
|
-
float& result2,
|
|
59
|
-
float& result3) {
|
|
60
|
-
PQDecoderT decoder0(code0, nbits);
|
|
61
|
-
PQDecoderT decoder1(code1, nbits);
|
|
62
|
-
PQDecoderT decoder2(code2, nbits);
|
|
63
|
-
PQDecoderT decoder3(code3, nbits);
|
|
64
|
-
const size_t ksub = 1 << nbits;
|
|
65
|
-
|
|
66
|
-
const float* tab = sim_table;
|
|
67
|
-
result0 = 0;
|
|
68
|
-
result1 = 0;
|
|
69
|
-
result2 = 0;
|
|
70
|
-
result3 = 0;
|
|
71
|
-
|
|
72
|
-
for (size_t m = 0; m < M; m++) {
|
|
73
|
-
result0 += tab[decoder0.decode()];
|
|
74
|
-
result1 += tab[decoder1.decode()];
|
|
75
|
-
result2 += tab[decoder2.decode()];
|
|
76
|
-
result3 += tab[decoder3.decode()];
|
|
77
|
-
tab += ksub;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
} // namespace faiss
|