RubyGems - faiss - Versions diffs - 0.2.6 → 0.2.7 - Mend

faiss 0.2.6 → 0.2.7

Files changed (189) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/lib/faiss.rb +2 -2
data/vendor/faiss/faiss/AutoTune.cpp +15 -4
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +1 -5
data/vendor/faiss/faiss/Clustering.h +0 -2
data/vendor/faiss/faiss/IVFlib.h +0 -2
data/vendor/faiss/faiss/Index.h +1 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
data/vendor/faiss/faiss/IndexBinary.h +0 -1
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
data/vendor/faiss/faiss/IndexFastScan.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
data/vendor/faiss/faiss/IndexHNSW.h +0 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
data/vendor/faiss/faiss/IndexIDMap.h +0 -2
data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
data/vendor/faiss/faiss/IndexIVF.h +121 -61
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
data/vendor/faiss/faiss/IndexReplicas.h +0 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
data/vendor/faiss/faiss/IndexShards.cpp +26 -109
data/vendor/faiss/faiss/IndexShards.h +2 -3
data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
data/vendor/faiss/faiss/MetaIndexes.h +29 -0
data/vendor/faiss/faiss/MetricType.h +14 -0
data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
data/vendor/faiss/faiss/VectorTransform.h +1 -3
data/vendor/faiss/faiss/clone_index.cpp +232 -18
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
data/vendor/faiss/faiss/impl/HNSW.h +6 -9
data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
data/vendor/faiss/faiss/impl/NSG.h +4 -7
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
data/vendor/faiss/faiss/index_factory.cpp +8 -10
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
data/vendor/faiss/faiss/utils/Heap.h +35 -1
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
data/vendor/faiss/faiss/utils/distances.cpp +61 -7
data/vendor/faiss/faiss/utils/distances.h +11 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
data/vendor/faiss/faiss/utils/fp16.h +7 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
data/vendor/faiss/faiss/utils/hamming.h +21 -10
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
data/vendor/faiss/faiss/utils/sorting.h +71 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
data/vendor/faiss/faiss/utils/utils.cpp +4 -176
data/vendor/faiss/faiss/utils/utils.h +2 -9
metadata +29 -3
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26

data/vendor/faiss/faiss/utils/sorting.h ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/impl/platform_macros.h>
+namespace faiss {
+/** Indirect sort of a floating-point array
+ *
+ * @param n     size of the array
+ * @param vals  array to sort, size n
+ * @param perm  output: permutation of [0..n-1], st.
+ *              vals[perm[i + 1]] >= vals[perm[i]]
+ */
+void fvec_argsort(size_t n, const float* vals, size_t* perm);
+/** Same as fvec_argsort, parallelized */
+void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm);
+/// increase verbosity of the bucket_sort functions
+FAISS_API extern int bucket_sort_verbose;
+/** Bucket sort of a list of values
+ *
+ * @param vals     values to sort, size nval, max value nbucket - 1
+ * @param lims     output limits of buckets, size nbucket + 1
+ * @param perm     output buckets, the elements of bucket
+ *                 i are in perm[lims[i]:lims[i + 1]]
+ * @param nt       number of threads (0 = pure sequential code)
+ */
+void bucket_sort(
+        size_t nval,
+        const uint64_t* vals,
+        uint64_t nbucket,
+        int64_t* lims,
+        int64_t* perm,
+        int nt = 0);
+/** in-place bucket sort (with attention to memory=>int32)
+ * on input the values are in a nrow * col matrix
+ * we want to store the row numbers in the output.
+ *
+ * @param vals     positive values to sort, size nrow * ncol,
+ *                 max value nbucket - 1
+ * @param lims     output limits of buckets, size nbucket + 1
+ * @param nt       number of threads (0 = pure sequential code)
+ */
+void matrix_bucket_sort_inplace(
+        size_t nrow,
+        size_t ncol,
+        int32_t* vals,
+        int32_t nbucket,
+        int64_t* lims,
+        int nt = 0);
+/// same with int64 elements
+void matrix_bucket_sort_inplace(
+        size_t nrow,
+        size_t ncol,
+        int64_t* vals,
+        int64_t nbucket,
+        int64_t* lims,
+        int nt = 0);
+} // namespace faiss

data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h ADDED Viewed

@@ -0,0 +1,165 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+// This file contains transposing kernels for AVX2 for
+// tiny float/int32 matrices, such as 8x2.
+#ifdef __AVX2__
+#include <immintrin.h>
+namespace faiss {
+// 8x2 -> 2x8
+inline void transpose_8x2(
+        const __m256 i0,
+        const __m256 i1,
+        __m256& o0,
+        __m256& o1) {
+    // say, we have the following as in input:
+    // i0:  00 01 10 11 20 21 30 31
+    // i1:  40 41 50 51 60 61 70 71
+    // 00 01 10 11 40 41 50 51
+    const __m256 r0 = _mm256_permute2f128_ps(i0, i1, _MM_SHUFFLE(0, 2, 0, 0));
+    // 20 21 30 31 60 61 70 71
+    const __m256 r1 = _mm256_permute2f128_ps(i0, i1, _MM_SHUFFLE(0, 3, 0, 1));
+    // 00 10 20 30 40 50 60 70
+    o0 = _mm256_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 0, 2, 0));
+    // 01 11 21 31 41 51 61 71
+    o1 = _mm256_shuffle_ps(r0, r1, _MM_SHUFFLE(3, 1, 3, 1));
+}
+// 8x4 -> 4x8
+inline void transpose_8x4(
+        const __m256 i0,
+        const __m256 i1,
+        const __m256 i2,
+        const __m256 i3,
+        __m256& o0,
+        __m256& o1,
+        __m256& o2,
+        __m256& o3) {
+    // say, we have the following as an input:
+    // i0:  00 01 02 03 10 11 12 13
+    // i1:  20 21 22 23 30 31 32 33
+    // i2:  40 41 42 43 50 51 52 53
+    // i3:  60 61 62 63 70 71 72 73
+    // 00 01 02 03 40 41 42 43
+    const __m256 r0 = _mm256_permute2f128_ps(i0, i2, _MM_SHUFFLE(0, 2, 0, 0));
+    // 20 21 22 23 60 61 62 63
+    const __m256 r1 = _mm256_permute2f128_ps(i1, i3, _MM_SHUFFLE(0, 2, 0, 0));
+    // 10 11 12 13 50 51 52 53
+    const __m256 r2 = _mm256_permute2f128_ps(i0, i2, _MM_SHUFFLE(0, 3, 0, 1));
+    // 30 31 32 33 70 71 72 73
+    const __m256 r3 = _mm256_permute2f128_ps(i1, i3, _MM_SHUFFLE(0, 3, 0, 1));
+    // 00 02 10 12 40 42 50 52
+    const __m256 t0 = _mm256_shuffle_ps(r0, r2, _MM_SHUFFLE(2, 0, 2, 0));
+    // 01 03 11 13 41 43 51 53
+    const __m256 t1 = _mm256_shuffle_ps(r0, r2, _MM_SHUFFLE(3, 1, 3, 1));
+    // 20 22 30 32 60 62 70 72
+    const __m256 t2 = _mm256_shuffle_ps(r1, r3, _MM_SHUFFLE(2, 0, 2, 0));
+    // 21 23 31 33 61 63 71 73
+    const __m256 t3 = _mm256_shuffle_ps(r1, r3, _MM_SHUFFLE(3, 1, 3, 1));
+    // 00 10 20 30 40 50 60 70
+    o0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(2, 0, 2, 0));
+    // 01 11 21 31 41 51 61 71
+    o1 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(2, 0, 2, 0));
+    // 02 12 22 32 42 52 62 72
+    o2 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 1, 3, 1));
+    // 03 13 23 33 43 53 63 73
+    o3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 1, 3, 1));
+}
+inline void transpose_8x8(
+        const __m256 i0,
+        const __m256 i1,
+        const __m256 i2,
+        const __m256 i3,
+        const __m256 i4,
+        const __m256 i5,
+        const __m256 i6,
+        const __m256 i7,
+        __m256& o0,
+        __m256& o1,
+        __m256& o2,
+        __m256& o3,
+        __m256& o4,
+        __m256& o5,
+        __m256& o6,
+        __m256& o7) {
+    // say, we have the following as an input:
+    // i0:  00 01 02 03 04 05 06 07
+    // i1:  10 11 12 13 14 15 16 17
+    // i2:  20 21 22 23 24 25 26 27
+    // i3:  30 31 32 33 34 35 36 37
+    // i4:  40 41 42 43 44 45 46 47
+    // i5:  50 51 52 53 54 55 56 57
+    // i6:  60 61 62 63 64 65 66 67
+    // i7:  70 71 72 73 74 75 76 77
+    // 00 10 01 11 04 14 05 15
+    const __m256 r0 = _mm256_unpacklo_ps(i0, i1);
+    // 02 12 03 13 06 16 07 17
+    const __m256 r1 = _mm256_unpackhi_ps(i0, i1);
+    // 20 30 21 31 24 34 25 35
+    const __m256 r2 = _mm256_unpacklo_ps(i2, i3);
+    // 22 32 23 33 26 36 27 37
+    const __m256 r3 = _mm256_unpackhi_ps(i2, i3);
+    // 40 50 41 51 44 54 45 55
+    const __m256 r4 = _mm256_unpacklo_ps(i4, i5);
+    // 42 52 43 53 46 56 47 57
+    const __m256 r5 = _mm256_unpackhi_ps(i4, i5);
+    // 60 70 61 71 64 74 65 75
+    const __m256 r6 = _mm256_unpacklo_ps(i6, i7);
+    // 62 72 63 73 66 76 67 77
+    const __m256 r7 = _mm256_unpackhi_ps(i6, i7);
+    // 00 10 20 30 04 14 24 34
+    const __m256 rr0 = _mm256_shuffle_ps(r0, r2, _MM_SHUFFLE(1, 0, 1, 0));
+    // 01 11 21 31 05 15 25 35
+    const __m256 rr1 = _mm256_shuffle_ps(r0, r2, _MM_SHUFFLE(3, 2, 3, 2));
+    // 02 12 22 32 06 16 26 36
+    const __m256 rr2 = _mm256_shuffle_ps(r1, r3, _MM_SHUFFLE(1, 0, 1, 0));
+    // 03 13 23 33 07 17 27 37
+    const __m256 rr3 = _mm256_shuffle_ps(r1, r3, _MM_SHUFFLE(3, 2, 3, 2));
+    // 40 50 60 70 44 54 64 74
+    const __m256 rr4 = _mm256_shuffle_ps(r4, r6, _MM_SHUFFLE(1, 0, 1, 0));
+    // 41 51 61 71 45 55 65 75
+    const __m256 rr5 = _mm256_shuffle_ps(r4, r6, _MM_SHUFFLE(3, 2, 3, 2));
+    // 42 52 62 72 46 56 66 76
+    const __m256 rr6 = _mm256_shuffle_ps(r5, r7, _MM_SHUFFLE(1, 0, 1, 0));
+    // 43 53 63 73 47 57 67 77
+    const __m256 rr7 = _mm256_shuffle_ps(r5, r7, _MM_SHUFFLE(3, 2, 3, 2));
+    // 00 10 20 30 40 50 60 70
+    o0 = _mm256_permute2f128_ps(rr0, rr4, 0x20);
+    // 01 11 21 31 41 51 61 71
+    o1 = _mm256_permute2f128_ps(rr1, rr5, 0x20);
+    // 02 12 22 32 42 52 62 72
+    o2 = _mm256_permute2f128_ps(rr2, rr6, 0x20);
+    // 03 13 23 33 43 53 63 73
+    o3 = _mm256_permute2f128_ps(rr3, rr7, 0x20);
+    // 04 14 24 34 44 54 64 74
+    o4 = _mm256_permute2f128_ps(rr0, rr4, 0x31);
+    // 05 15 25 35 45 55 65 75
+    o5 = _mm256_permute2f128_ps(rr1, rr5, 0x31);
+    // 06 16 26 36 46 56 66 76
+    o6 = _mm256_permute2f128_ps(rr2, rr6, 0x31);
+    // 07 17 27 37 47 57 67 77
+    o7 = _mm256_permute2f128_ps(rr3, rr7, 0x31);
+}
+} // namespace faiss
+#endif

data/vendor/faiss/faiss/utils/utils.cpp CHANGED Viewed

@@ -423,185 +423,13 @@ void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
     }
 }
-size_t ivec_checksum(size_t n, const int* a) {
+size_t ivec_checksum(size_t n, const int32_t* asigned) {
+    const uint32_t* a = reinterpret_cast<const uint32_t*>(asigned);
     size_t cs = 112909;
-    while (n--)
+    while (n--) {
         cs = cs * 65713 + a[n] * 1686049;
-    return cs;
-}
-namespace {
-struct ArgsortComparator {
-    const float* vals;
-    bool operator()(const size_t a, const size_t b) const {
-        return vals[a] < vals[b];
-    }
-};
-struct SegmentS {
-    size_t i0; // begin pointer in the permutation array
-    size_t i1; // end
-    size_t len() const {
-        return i1 - i0;
-    }
-};
-// see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
-// extended to > 1 merge thread
-// merges 2 ranges that should be consecutive on the source into
-// the union of the two on the destination
-template <typename T>
-void parallel_merge(
-        const T* src,
-        T* dst,
-        SegmentS& s1,
-        SegmentS& s2,
-        int nt,
-        const ArgsortComparator& comp) {
-    if (s2.len() > s1.len()) { // make sure that s1 larger than s2
-        std::swap(s1, s2);
-    }
-    // compute sub-ranges for each thread
-    std::vector<SegmentS> s1s(nt), s2s(nt), sws(nt);
-    s2s[0].i0 = s2.i0;
-    s2s[nt - 1].i1 = s2.i1;
-    // not sure parallel actually helps here
-#pragma omp parallel for num_threads(nt)
-    for (int t = 0; t < nt; t++) {
-        s1s[t].i0 = s1.i0 + s1.len() * t / nt;
-        s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
-        if (t + 1 < nt) {
-            T pivot = src[s1s[t].i1];
-            size_t i0 = s2.i0, i1 = s2.i1;
-            while (i0 + 1 < i1) {
-                size_t imed = (i1 + i0) / 2;
-                if (comp(pivot, src[imed])) {
-                    i1 = imed;
-                } else {
-                    i0 = imed;
-                }
-            }
-            s2s[t].i1 = s2s[t + 1].i0 = i1;
-        }
-    }
-    s1.i0 = std::min(s1.i0, s2.i0);
-    s1.i1 = std::max(s1.i1, s2.i1);
-    s2 = s1;
-    sws[0].i0 = s1.i0;
-    for (int t = 0; t < nt; t++) {
-        sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
-        if (t + 1 < nt) {
-            sws[t + 1].i0 = sws[t].i1;
-        }
-    }
-    assert(sws[nt - 1].i1 == s1.i1);
-    // do the actual merging
-#pragma omp parallel for num_threads(nt)
-    for (int t = 0; t < nt; t++) {
-        SegmentS sw = sws[t];
-        SegmentS s1t = s1s[t];
-        SegmentS s2t = s2s[t];
-        if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
-            for (;;) {
-                // assert (sw.len() == s1t.len() + s2t.len());
-                if (comp(src[s1t.i0], src[s2t.i0])) {
-                    dst[sw.i0++] = src[s1t.i0++];
-                    if (s1t.i0 == s1t.i1)
-                        break;
-                } else {
-                    dst[sw.i0++] = src[s2t.i0++];
-                    if (s2t.i0 == s2t.i1)
-                        break;
-                }
-            }
-        }
-        if (s1t.len() > 0) {
-            assert(s1t.len() == sw.len());
-            memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
-        } else if (s2t.len() > 0) {
-            assert(s2t.len() == sw.len());
-            memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
-        }
     }
-}
-}; // namespace
-void fvec_argsort(size_t n, const float* vals, size_t* perm) {
-    for (size_t i = 0; i < n; i++)
-        perm[i] = i;
-    ArgsortComparator comp = {vals};
-    std::sort(perm, perm + n, comp);
-}
-void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
-    size_t* perm2 = new size_t[n];
-    // 2 result tables, during merging, flip between them
-    size_t *permB = perm2, *permA = perm;
-    int nt = omp_get_max_threads();
-    { // prepare correct permutation so that the result ends in perm
-      // at final iteration
-        int nseg = nt;
-        while (nseg > 1) {
-            nseg = (nseg + 1) / 2;
-            std::swap(permA, permB);
-        }
-    }
-#pragma omp parallel
-    for (size_t i = 0; i < n; i++)
-        permA[i] = i;
-    ArgsortComparator comp = {vals};
-    std::vector<SegmentS> segs(nt);
-    // independent sorts
-#pragma omp parallel for
-    for (int t = 0; t < nt; t++) {
-        size_t i0 = t * n / nt;
-        size_t i1 = (t + 1) * n / nt;
-        SegmentS seg = {i0, i1};
-        std::sort(permA + seg.i0, permA + seg.i1, comp);
-        segs[t] = seg;
-    }
-    int prev_nested = omp_get_nested();
-    omp_set_nested(1);
-    int nseg = nt;
-    while (nseg > 1) {
-        int nseg1 = (nseg + 1) / 2;
-        int sub_nt = nseg % 2 == 0 ? nt : nt - 1;
-        int sub_nseg1 = nseg / 2;
-#pragma omp parallel for num_threads(nseg1)
-        for (int s = 0; s < nseg; s += 2) {
-            if (s + 1 == nseg) { // otherwise isolated segment
-                memcpy(permB + segs[s].i0,
-                       permA + segs[s].i0,
-                       segs[s].len() * sizeof(size_t));
-            } else {
-                int t0 = s * sub_nt / sub_nseg1;
-                int t1 = (s + 1) * sub_nt / sub_nseg1;
-                printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
-                parallel_merge(
-                        permA, permB, segs[s], segs[s + 1], t1 - t0, comp);
-            }
-        }
-        for (int s = 0; s < nseg; s += 2)
-            segs[s / 2] = segs[s];
-        nseg = nseg1;
-        std::swap(permA, permB);
-    }
-    assert(permA == perm);
-    omp_set_nested(prev_nested);
-    delete[] perm2;
+    return cs;
 }
 const float* fvecs_maybe_subsample(

data/vendor/faiss/faiss/utils/utils.h CHANGED Viewed

@@ -19,10 +19,7 @@
 #include <stdint.h>
 #include <string>
-#ifdef _MSC_VER
-#define strtok_r strtok_s
-#endif // _MSC_VER
+#include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
@@ -113,10 +110,6 @@ double imbalance_factor(int n, int k, const int64_t* assign);
 /// same, takes a histogram as input
 double imbalance_factor(int k, const int* hist);
-void fvec_argsort(size_t n, const float* vals, size_t* perm);
-void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm);
 /// compute histogram on v
 int ivec_hist(size_t n, const int* v, int vmax, int* hist);
@@ -128,7 +121,7 @@ int ivec_hist(size_t n, const int* v, int vmax, int* hist);
 void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist);
 /// compute a checksum on a table.
-size_t ivec_checksum(size_t n, const int* a);
+size_t ivec_checksum(size_t n, const int32_t* a);
 /** random subsamples a set of vectors if there are too many of them
  *

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: faiss
 version: !ruby/object:Gem::Version
-  version: 0.2.6
+  version: 0.2.7
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-04-11 00:00:00.000000000 Z
+date: 2023-04-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -139,6 +139,8 @@ files:
 - vendor/faiss/faiss/IndexScalarQuantizer.h
 - vendor/faiss/faiss/IndexShards.cpp
 - vendor/faiss/faiss/IndexShards.h
+- vendor/faiss/faiss/IndexShardsIVF.cpp
+- vendor/faiss/faiss/IndexShardsIVF.h
 - vendor/faiss/faiss/MatrixStats.cpp
 - vendor/faiss/faiss/MatrixStats.h
 - vendor/faiss/faiss/MetaIndexes.cpp
@@ -163,7 +165,6 @@ files:
 - vendor/faiss/faiss/gpu/GpuAutoTune.h
 - vendor/faiss/faiss/gpu/GpuCloner.cpp
 - vendor/faiss/faiss/gpu/GpuCloner.h
-- vendor/faiss/faiss/gpu/GpuClonerOptions.cpp
 - vendor/faiss/faiss/gpu/GpuClonerOptions.h
 - vendor/faiss/faiss/gpu/GpuDistance.h
 - vendor/faiss/faiss/gpu/GpuFaissAssert.h
@@ -210,6 +211,8 @@ files:
 - vendor/faiss/faiss/impl/AdditiveQuantizer.h
 - vendor/faiss/faiss/impl/AuxIndexStructures.cpp
 - vendor/faiss/faiss/impl/AuxIndexStructures.h
+- vendor/faiss/faiss/impl/CodePacker.cpp
+- vendor/faiss/faiss/impl/CodePacker.h
 - vendor/faiss/faiss/impl/DistanceComputer.h
 - vendor/faiss/faiss/impl/FaissAssert.h
 - vendor/faiss/faiss/impl/FaissException.cpp
@@ -240,6 +243,10 @@ files:
 - vendor/faiss/faiss/impl/ScalarQuantizer.h
 - vendor/faiss/faiss/impl/ThreadedIndex-inl.h
 - vendor/faiss/faiss/impl/ThreadedIndex.h
+- vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h
+- vendor/faiss/faiss/impl/code_distance/code_distance-generic.h
+- vendor/faiss/faiss/impl/code_distance/code_distance.h
+- vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h
 - vendor/faiss/faiss/impl/index_read.cpp
 - vendor/faiss/faiss/impl/index_write.cpp
 - vendor/faiss/faiss/impl/io.cpp
@@ -275,8 +282,19 @@ files:
 - vendor/faiss/faiss/utils/Heap.h
 - vendor/faiss/faiss/utils/WorkerThread.cpp
 - vendor/faiss/faiss/utils/WorkerThread.h
+- vendor/faiss/faiss/utils/approx_topk/approx_topk.h
+- vendor/faiss/faiss/utils/approx_topk/avx2-inl.h
+- vendor/faiss/faiss/utils/approx_topk/generic.h
+- vendor/faiss/faiss/utils/approx_topk/mode.h
+- vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h
 - vendor/faiss/faiss/utils/distances.cpp
 - vendor/faiss/faiss/utils/distances.h
+- vendor/faiss/faiss/utils/distances_fused/avx512.cpp
+- vendor/faiss/faiss/utils/distances_fused/avx512.h
+- vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp
+- vendor/faiss/faiss/utils/distances_fused/distances_fused.h
+- vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp
+- vendor/faiss/faiss/utils/distances_fused/simdlib_based.h
 - vendor/faiss/faiss/utils/distances_simd.cpp
 - vendor/faiss/faiss/utils/extra_distances-inl.h
 - vendor/faiss/faiss/utils/extra_distances.cpp
@@ -287,6 +305,11 @@ files:
 - vendor/faiss/faiss/utils/hamming-inl.h
 - vendor/faiss/faiss/utils/hamming.cpp
 - vendor/faiss/faiss/utils/hamming.h
+- vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h
+- vendor/faiss/faiss/utils/hamming_distance/common.h
+- vendor/faiss/faiss/utils/hamming_distance/generic-inl.h
+- vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h
+- vendor/faiss/faiss/utils/hamming_distance/neon-inl.h
 - vendor/faiss/faiss/utils/ordered_key_value.h
 - vendor/faiss/faiss/utils/partitioning.cpp
 - vendor/faiss/faiss/utils/partitioning.h
@@ -298,6 +321,9 @@ files:
 - vendor/faiss/faiss/utils/simdlib_avx2.h
 - vendor/faiss/faiss/utils/simdlib_emulated.h
 - vendor/faiss/faiss/utils/simdlib_neon.h
+- vendor/faiss/faiss/utils/sorting.cpp
+- vendor/faiss/faiss/utils/sorting.h
+- vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h
 - vendor/faiss/faiss/utils/utils.cpp
 - vendor/faiss/faiss/utils/utils.h
 homepage: https://github.com/ankane/faiss-ruby

data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp DELETED Viewed

@@ -1,26 +0,0 @@
-/**
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-#include <faiss/gpu/GpuClonerOptions.h>
-namespace faiss {
-namespace gpu {
-GpuClonerOptions::GpuClonerOptions()
-        : indicesOptions(INDICES_64_BIT),
-          useFloat16CoarseQuantizer(false),
-          useFloat16(false),
-          usePrecomputed(false),
-          reserveVecs(0),
-          storeTransposed(false),
-          verbose(false) {}
-GpuMultipleClonerOptions::GpuMultipleClonerOptions()
-        : shard(false), shard_type(1) {}
-} // namespace gpu
-} // namespace faiss