RubyGems - faiss - Versions diffs - 0.3.1 → 0.3.2 - Mend

faiss 0.3.1 → 0.3.2

Files changed (119) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +35 -4
data/vendor/faiss/faiss/Clustering.h +10 -1
data/vendor/faiss/faiss/IVFlib.cpp +4 -1
data/vendor/faiss/faiss/Index.h +21 -6
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexFastScan.cpp +22 -4
data/vendor/faiss/faiss/IndexFlat.cpp +11 -7
data/vendor/faiss/faiss/IndexFlatCodes.cpp +159 -5
data/vendor/faiss/faiss/IndexFlatCodes.h +20 -3
data/vendor/faiss/faiss/IndexHNSW.cpp +143 -90
data/vendor/faiss/faiss/IndexHNSW.h +52 -3
data/vendor/faiss/faiss/IndexIVF.cpp +3 -3
data/vendor/faiss/faiss/IndexIVF.h +9 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +15 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +130 -57
data/vendor/faiss/faiss/IndexIVFFastScan.h +14 -7
data/vendor/faiss/faiss/IndexIVFPQ.cpp +1 -3
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +21 -2
data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
data/vendor/faiss/faiss/IndexLattice.h +3 -22
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -29
data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
data/vendor/faiss/faiss/IndexNSG.h +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
data/vendor/faiss/faiss/IndexRefine.cpp +5 -5
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +3 -1
data/vendor/faiss/faiss/MetricType.h +7 -2
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +36 -4
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +6 -0
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +2 -8
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +6 -0
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +2 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +25 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +6 -0
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +65 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +25 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +9 -1
data/vendor/faiss/faiss/impl/DistanceComputer.h +46 -0
data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +358 -190
data/vendor/faiss/faiss/impl/HNSW.h +43 -22
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +8 -8
data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +13 -8
data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +1 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +5 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +151 -32
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +719 -102
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +5 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
data/vendor/faiss/faiss/impl/index_read.cpp +29 -15
data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
data/vendor/faiss/faiss/impl/index_write.cpp +28 -10
data/vendor/faiss/faiss/impl/io.cpp +13 -5
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/io_macros.h +6 -0
data/vendor/faiss/faiss/impl/platform_macros.h +22 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +11 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +1 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +448 -1
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +5 -5
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
data/vendor/faiss/faiss/impl/simd_result_handlers.h +143 -59
data/vendor/faiss/faiss/index_factory.cpp +31 -13
data/vendor/faiss/faiss/index_io.h +12 -5
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +9 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +55 -17
data/vendor/faiss/faiss/invlists/InvertedLists.h +18 -9
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +21 -6
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +3 -3
data/vendor/faiss/faiss/utils/Heap.h +105 -0
data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
data/vendor/faiss/faiss/utils/bf16.h +36 -0
data/vendor/faiss/faiss/utils/distances.cpp +58 -88
data/vendor/faiss/faiss/utils/distances.h +5 -5
data/vendor/faiss/faiss/utils/distances_simd.cpp +997 -9
data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
data/vendor/faiss/faiss/utils/hamming.cpp +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +4 -1
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +2 -1
data/vendor/faiss/faiss/utils/random.cpp +43 -0
data/vendor/faiss/faiss/utils/random.h +25 -0
data/vendor/faiss/faiss/utils/simdlib.h +10 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +5 -2
data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
data/vendor/faiss/faiss/utils/utils.cpp +10 -3
data/vendor/faiss/faiss/utils/utils.h +3 -0
metadata +16 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102

data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h ADDED Viewed

@@ -0,0 +1,176 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+// This file contains transposing kernels for AVX512 for // tiny float/int32
+// matrices, such as 16x2.
+#ifdef __AVX512F__
+#include <immintrin.h>
+namespace faiss {
+// 16x2 -> 2x16
+inline void transpose_16x2(
+        const __m512 i0,
+        const __m512 i1,
+        __m512& o0,
+        __m512& o1) {
+    // assume we have the following input:
+    // i0:  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+    // i1: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    // 0  1  2  3  8  9 10 11 16 17 18 19 24 25 26 27
+    const __m512 r0 = _mm512_shuffle_f32x4(i0, i1, _MM_SHUFFLE(2, 0, 2, 0));
+    // 4  5  6  7 12 13 14 15 20 21 22 23 28 29 30 31
+    const __m512 r1 = _mm512_shuffle_f32x4(i0, i1, _MM_SHUFFLE(3, 1, 3, 1));
+    // 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30
+    o0 = _mm512_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 0, 2, 0));
+    // 1  3  5  7  9 11 13 15 17 19 21 23 25 27 29 31
+    o1 = _mm512_shuffle_ps(r0, r1, _MM_SHUFFLE(3, 1, 3, 1));
+}
+// 16x4 -> 4x16
+inline void transpose_16x4(
+        const __m512 i0,
+        const __m512 i1,
+        const __m512 i2,
+        const __m512 i3,
+        __m512& o0,
+        __m512& o1,
+        __m512& o2,
+        __m512& o3) {
+    // assume that we have the following input:
+    // i0:  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+    // i1: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    // i2: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
+    // i3: 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+    //  0  1  2  3  8  9 10 11 16 17 18 19 24 25 26 27
+    const __m512 r0 = _mm512_shuffle_f32x4(i0, i1, _MM_SHUFFLE(2, 0, 2, 0));
+    //  4  5  6  7 12 13 14 15 20 21 22 23 28 29 30 31
+    const __m512 r1 = _mm512_shuffle_f32x4(i0, i1, _MM_SHUFFLE(3, 1, 3, 1));
+    // 32 33 34 35 40 41 42 43 48 49 50 51 56 57 58 59
+    const __m512 r2 = _mm512_shuffle_f32x4(i2, i3, _MM_SHUFFLE(2, 0, 2, 0));
+    // 52 53 54 55 60 61 62 63 52 53 54 55 60 61 62 63
+    const __m512 r3 = _mm512_shuffle_f32x4(i2, i3, _MM_SHUFFLE(3, 1, 3, 1));
+    //  0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30
+    const __m512 t0 = _mm512_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 0, 2, 0));
+    //  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29 31
+    const __m512 t1 = _mm512_shuffle_ps(r0, r1, _MM_SHUFFLE(3, 1, 3, 1));
+    // 32 34 52 54 40 42 60 62 48 50 52 54 56 58 60 62
+    const __m512 t2 = _mm512_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 0, 2, 0));
+    // 33 35 53 55 41 43 61 63 49 51 53 55 57 59 61 63
+    const __m512 t3 = _mm512_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 1, 3, 1));
+    const __m512i idx0 = _mm512_set_epi32(
+            30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
+    const __m512i idx1 = _mm512_set_epi32(
+            31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1);
+    // 0 4  8 12 16 20 24 28 32 52 40 60 48 52 56 60
+    o0 = _mm512_permutex2var_ps(t0, idx0, t2);
+    // 1 5  9 13 17 21 25 29 33 53 41 61 49 53 57 61
+    o1 = _mm512_permutex2var_ps(t1, idx0, t3);
+    // 2 6 10 14 18 22 26 30 34 54 42 62 50 54 58 62
+    o2 = _mm512_permutex2var_ps(t0, idx1, t2);
+    // 3 7 11 15 19 23 27 31 35 55 43 63 51 55 59 63
+    o3 = _mm512_permutex2var_ps(t1, idx1, t3);
+}
+// 16x8 -> 8x16 transpose
+inline void transpose_16x8(
+        const __m512 i0,
+        const __m512 i1,
+        const __m512 i2,
+        const __m512 i3,
+        const __m512 i4,
+        const __m512 i5,
+        const __m512 i6,
+        const __m512 i7,
+        __m512& o0,
+        __m512& o1,
+        __m512& o2,
+        __m512& o3,
+        __m512& o4,
+        __m512& o5,
+        __m512& o6,
+        __m512& o7) {
+    // assume that we have the following input:
+    // i0:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+    // i1:  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
+    // i2:  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47
+    // i3:  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63
+    // i4:  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79
+    // i5:  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95
+    // i6:  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111
+    // i7: 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
+    //  0  16   1  17   4  20   5  21   8  24   9  25  12  28  13  29
+    const __m512 r0 = _mm512_unpacklo_ps(i0, i1);
+    //  2  18   3  19   6  22   7  23  10  26  11  27  14  30  15  31
+    const __m512 r1 = _mm512_unpackhi_ps(i0, i1);
+    // 32  48  33  49  36  52  37  53  40  56  41  57  44  60  45  61
+    const __m512 r2 = _mm512_unpacklo_ps(i2, i3);
+    // 34  50  35  51  38  54  39  55  42  58  43  59  46  62  47  63
+    const __m512 r3 = _mm512_unpackhi_ps(i2, i3);
+    // 64  80  65  81  68  84  69  85  72  88  73  89  76  92  77  93
+    const __m512 r4 = _mm512_unpacklo_ps(i4, i5);
+    // 66  82  67  83  70  86  71  87  74  90  75  91  78  94  79  95
+    const __m512 r5 = _mm512_unpackhi_ps(i4, i5);
+    // 96 112  97 113 100 116 101 117 104 120 105 121 108 124 109 125
+    const __m512 r6 = _mm512_unpacklo_ps(i6, i7);
+    // 98 114  99 115 102 118 103 119 106 122 107 123 110 126 111 127
+    const __m512 r7 = _mm512_unpackhi_ps(i6, i7);
+    //  0  16  32  48   4  20  36  52   8  24  40  56  12  28  44  60
+    const __m512 t0 = _mm512_shuffle_ps(r0, r2, _MM_SHUFFLE(1, 0, 1, 0));
+    //  1  17  33  49   5  21  37  53   9  25  41  57  13  29  45  61
+    const __m512 t1 = _mm512_shuffle_ps(r0, r2, _MM_SHUFFLE(3, 2, 3, 2));
+    //  2  18  34  50   6  22  38  54  10  26  42  58  14  30  46  62
+    const __m512 t2 = _mm512_shuffle_ps(r1, r3, _MM_SHUFFLE(1, 0, 1, 0));
+    //  3  19  35  51   7  23  39  55  11  27  43  59  15  31  47  63
+    const __m512 t3 = _mm512_shuffle_ps(r1, r3, _MM_SHUFFLE(3, 2, 3, 2));
+    // 64  80  96 112  68  84 100 116  72  88 104 120  76  92 108 124
+    const __m512 t4 = _mm512_shuffle_ps(r4, r6, _MM_SHUFFLE(1, 0, 1, 0));
+    // 65  81  97 113  69  85 101 117  73  89 105 121  77  93 109 125
+    const __m512 t5 = _mm512_shuffle_ps(r4, r6, _MM_SHUFFLE(3, 2, 3, 2));
+    // 66  82  98 114  70  86 102 118  74  90 106 122  78  94 110 126
+    const __m512 t6 = _mm512_shuffle_ps(r5, r7, _MM_SHUFFLE(1, 0, 1, 0));
+    // 67  83  99 115  71  87 103 119  75  91 107 123  79  95 111 127
+    const __m512 t7 = _mm512_shuffle_ps(r5, r7, _MM_SHUFFLE(3, 2, 3, 2));
+    const __m512i idx0 = _mm512_set_epi32(
+            27, 19, 26, 18, 25, 17, 24, 16, 11, 3, 10, 2, 9, 1, 8, 0);
+    const __m512i idx1 = _mm512_set_epi32(
+            31, 23, 30, 22, 29, 21, 28, 20, 15, 7, 14, 6, 13, 5, 12, 4);
+    //  0   8  16  24  32  40  48  56  64  72  80  88  96 104 112 120
+    o0 = _mm512_permutex2var_ps(t0, idx0, t4);
+    //  1   9  17  25  33  41  49  57  65  73  81  89  97 105 113 121
+    o1 = _mm512_permutex2var_ps(t1, idx0, t5);
+    //  2  10  18  26  34  42  50  58  66  74  82  90  98 106 114 122
+    o2 = _mm512_permutex2var_ps(t2, idx0, t6);
+    //  3  11  19  27  35  43  51  59  67  75  83  91  99 107 115 123
+    o3 = _mm512_permutex2var_ps(t3, idx0, t7);
+    //  4  12  20  28  36  44  52  60  68  76  84  92 100 108 116 124
+    o4 = _mm512_permutex2var_ps(t0, idx1, t4);
+    //  5  13  21  29  37  45  53  61  69  77  85  93 101 109 117 125
+    o5 = _mm512_permutex2var_ps(t1, idx1, t5);
+    //  6  14  22  30  38  46  54  62  70  78  86  94 102 110 118 126
+    o6 = _mm512_permutex2var_ps(t2, idx1, t6);
+    //  7  15  23  31  39  47  55  63  71  79  87  95 103 111 119 127
+    o7 = _mm512_permutex2var_ps(t3, idx1, t7);
+}
+} // namespace faiss
+#endif

data/vendor/faiss/faiss/utils/utils.cpp CHANGED Viewed

@@ -7,6 +7,7 @@
 // -*- c++ -*-
+#include <faiss/Index.h>
 #include <faiss/utils/utils.h>
 #include <cassert>
@@ -114,10 +115,12 @@ std::string get_compile_options() {
     options += "OPTIMIZE ";
 #endif
-#ifdef __AVX2__
-    options += "AVX2 ";
-#elif __AVX512F__
+#ifdef __AVX512F__
     options += "AVX512 ";
+#elif defined(__AVX2__)
+    options += "AVX2 ";
+#elif defined(__ARM_FEATURE_SVE)
+    options += "SVE NEON ";
 #elif defined(__aarch64__)
     options += "NEON ";
 #else
@@ -129,6 +132,10 @@ std::string get_compile_options() {
     return options;
 }
+std::string get_version() {
+    return VERSION_STRING;
+}
 #ifdef _MSC_VER
 double getmillisecs() {
     LARGE_INTEGER ts;

data/vendor/faiss/faiss/utils/utils.h CHANGED Viewed

@@ -37,6 +37,9 @@ std::string get_compile_options();
  * Get some stats about the system
  **************************************************/
+// Expose FAISS version as a string
+std::string get_version();
 /// ms elapsed since some arbitrary epoch
 double getmillisecs();

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: faiss
 version: !ruby/object:Gem::Version
-  version: 0.3.1
+  version: 0.3.2
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-03-14 00:00:00.000000000 Z
+date: 2024-10-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -125,6 +125,8 @@ files:
 - vendor/faiss/faiss/IndexNNDescent.h
 - vendor/faiss/faiss/IndexNSG.cpp
 - vendor/faiss/faiss/IndexNSG.h
+- vendor/faiss/faiss/IndexNeuralNetCodec.cpp
+- vendor/faiss/faiss/IndexNeuralNetCodec.h
 - vendor/faiss/faiss/IndexPQ.cpp
 - vendor/faiss/faiss/IndexPQ.h
 - vendor/faiss/faiss/IndexPQFastScan.cpp
@@ -155,6 +157,8 @@ files:
 - vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h
 - vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h
 - vendor/faiss/faiss/cppcontrib/detail/UintReader.h
+- vendor/faiss/faiss/cppcontrib/factory_tools.cpp
+- vendor/faiss/faiss/cppcontrib/factory_tools.h
 - vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h
 - vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h
 - vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h
@@ -173,6 +177,7 @@ files:
 - vendor/faiss/faiss/gpu/GpuIcmEncoder.h
 - vendor/faiss/faiss/gpu/GpuIndex.h
 - vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h
+- vendor/faiss/faiss/gpu/GpuIndexCagra.h
 - vendor/faiss/faiss/gpu/GpuIndexFlat.h
 - vendor/faiss/faiss/gpu/GpuIndexIVF.h
 - vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h
@@ -247,10 +252,11 @@ files:
 - vendor/faiss/faiss/impl/ThreadedIndex-inl.h
 - vendor/faiss/faiss/impl/ThreadedIndex.h
 - vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h
+- vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h
 - vendor/faiss/faiss/impl/code_distance/code_distance-generic.h
 - vendor/faiss/faiss/impl/code_distance/code_distance.h
-- vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h
 - vendor/faiss/faiss/impl/index_read.cpp
+- vendor/faiss/faiss/impl/index_read_utils.h
 - vendor/faiss/faiss/impl/index_write.cpp
 - vendor/faiss/faiss/impl/io.cpp
 - vendor/faiss/faiss/impl/io.h
@@ -285,6 +291,8 @@ files:
 - vendor/faiss/faiss/utils/AlignedTable.h
 - vendor/faiss/faiss/utils/Heap.cpp
 - vendor/faiss/faiss/utils/Heap.h
+- vendor/faiss/faiss/utils/NeuralNet.cpp
+- vendor/faiss/faiss/utils/NeuralNet.h
 - vendor/faiss/faiss/utils/WorkerThread.cpp
 - vendor/faiss/faiss/utils/WorkerThread.h
 - vendor/faiss/faiss/utils/approx_topk/approx_topk.h
@@ -292,6 +300,7 @@ files:
 - vendor/faiss/faiss/utils/approx_topk/generic.h
 - vendor/faiss/faiss/utils/approx_topk/mode.h
 - vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h
+- vendor/faiss/faiss/utils/bf16.h
 - vendor/faiss/faiss/utils/distances.cpp
 - vendor/faiss/faiss/utils/distances.h
 - vendor/faiss/faiss/utils/distances_fused/avx512.cpp
@@ -326,11 +335,14 @@ files:
 - vendor/faiss/faiss/utils/random.h
 - vendor/faiss/faiss/utils/simdlib.h
 - vendor/faiss/faiss/utils/simdlib_avx2.h
+- vendor/faiss/faiss/utils/simdlib_avx512.h
 - vendor/faiss/faiss/utils/simdlib_emulated.h
 - vendor/faiss/faiss/utils/simdlib_neon.h
+- vendor/faiss/faiss/utils/simdlib_ppc64.h
 - vendor/faiss/faiss/utils/sorting.cpp
 - vendor/faiss/faiss/utils/sorting.h
 - vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h
+- vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h
 - vendor/faiss/faiss/utils/utils.cpp
 - vendor/faiss/faiss/utils/utils.h
 homepage: https://github.com/ankane/faiss-ruby
@@ -352,7 +364,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.3
+rubygems_version: 3.5.16
 signing_key:
 specification_version: 4
 summary: Efficient similarity search and clustering for Ruby

data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h DELETED Viewed

@@ -1,102 +0,0 @@
-/**
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-// // // AVX-512 version. It is not used, but let it be for the future
-// // // needs.
-// // template <class SearchResultType, typename T = PQDecoder>
-// // typename std::enable_if<(std::is_same<T, PQDecoder8>::value), void>::
-// //         type distance_four_codes(
-// //     const uint8_t* __restrict code0,
-// //     const uint8_t* __restrict code1,
-// //     const uint8_t* __restrict code2,
-// //     const uint8_t* __restrict code3,
-// //     float& result0,
-// //     float& result1,
-// //     float& result2,
-// //     float& result3
-// // ) const {
-// //     result0 = 0;
-// //     result1 = 0;
-// //     result2 = 0;
-// //     result3 = 0;
-// //     size_t m = 0;
-// //     const size_t pqM16 = pq.M / 16;
-// //     constexpr intptr_t N = 4;
-// //     const float* tab = sim_table;
-// //     if (pqM16 > 0) {
-// //         // process 16 values per loop
-// //         const __m512i ksub = _mm512_set1_epi32(pq.ksub);
-// //         __m512i offsets_0 = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7,
-// //              8, 9, 10, 11, 12, 13, 14, 15);
-// //         offsets_0 = _mm512_mullo_epi32(offsets_0, ksub);
-// //         // accumulators of partial sums
-// //         __m512 partialSums[N];
-// //         for (intptr_t j = 0; j < N; j++) {
-// //             partialSums[j] = _mm512_setzero_ps();
-// //         }
-// //         // loop
-// //         for (m = 0; m < pqM16 * 16; m += 16) {
-// //             // load 16 uint8 values
-// //             __m128i mm1[N];
-// //             mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
-// //             mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
-// //             mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
-// //             mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
-// //             // process first 8 codes
-// //             for (intptr_t j = 0; j < N; j++) {
-// //                 // convert uint8 values (low part of __m128i) to int32
-// //                 // values
-// //                 const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
-// //                 // add offsets
-// //                 const __m512i indices_to_read_from =
-// //                     _mm512_add_epi32(idx1, offsets_0);
-// //                 // gather 8 values, similar to 8 operations of
-// // //                    tab[idx]
-// //                 __m512 collected =
-// //                        _mm512_i32gather_ps(
-// //                             indices_to_read_from, tab, sizeof(float));
-// //                 // collect partial sums
-// //                 partialSums[j] = _mm512_add_ps(partialSums[j],
-// //                    collected);
-// //             }
-// //             tab += pq.ksub * 16;
-// //         }
-// //         // horizontal sum for partialSum
-// //         result0 += _mm512_reduce_add_ps(partialSums[0]);
-// //         result1 += _mm512_reduce_add_ps(partialSums[1]);
-// //         result2 += _mm512_reduce_add_ps(partialSums[2]);
-// //         result3 += _mm512_reduce_add_ps(partialSums[3]);
-// //     }
-// //     //
-// //     if (m < pq.M) {
-// //         // process leftovers
-// //         PQDecoder decoder0(code0 + m, pq.nbits);
-// //         PQDecoder decoder1(code1 + m, pq.nbits);
-// //         PQDecoder decoder2(code2 + m, pq.nbits);
-// //         PQDecoder decoder3(code3 + m, pq.nbits);
-// //         for (; m < pq.M; m++) {
-// //             result0 += tab[decoder0.decode()];
-// //             result1 += tab[decoder1.decode()];
-// //             result2 += tab[decoder2.decode()];
-// //             result3 += tab[decoder3.decode()];
-// //             tab += pq.ksub;
-// //         }
-// //     }
-// // }