RubyGems - faiss - Versions diffs - 0.1.3 → 0.2.0 - Mend

faiss 0.1.3 → 0.2.0

Files changed (199) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +25 -0
data/LICENSE.txt +1 -1
data/README.md +16 -4
data/ext/faiss/ext.cpp +12 -308
data/ext/faiss/extconf.rb +6 -3
data/ext/faiss/index.cpp +189 -0
data/ext/faiss/index_binary.cpp +75 -0
data/ext/faiss/kmeans.cpp +40 -0
data/ext/faiss/numo.hpp +867 -0
data/ext/faiss/pca_matrix.cpp +33 -0
data/ext/faiss/product_quantizer.cpp +53 -0
data/ext/faiss/utils.cpp +13 -0
data/ext/faiss/utils.h +5 -0
data/lib/faiss.rb +0 -5
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +54 -149
data/lib/faiss/index.rb +0 -20
data/lib/faiss/index_binary.rb +0 -20
data/lib/faiss/kmeans.rb +0 -15
data/lib/faiss/pca_matrix.rb +0 -15
data/lib/faiss/product_quantizer.rb +0 -22
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <stdint.h>
+#include <vector>
+// Utilities for bit packing and unpacking CPU non-interleaved and GPU
+// interleaved by 32 encodings
+namespace faiss { namespace gpu {
+// Unpacks arbitrary bitwidth codes to a whole number of bytes per code
+// The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
+// (bit packed)
+// The layout of the output is the same (byte packed to roundUp(bitsPerCode, 8)
+// / 8 bytes)
+std::vector<uint8_t> unpackNonInterleaved(std::vector<uint8_t> data,
+                                          int numVecs,
+                                          int dims,
+                                          int bitsPerCode);
+// Unpacks arbitrary bitwidth codes to a whole number of bytes per scalar code
+// The layout of the input is (v0 d0)(v1 d0) ... (v31 d0)(v0 d1) ...
+// (bit packed)
+// The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
+// (byte packed)
+std::vector<uint8_t> unpackInterleaved(std::vector<uint8_t> data,
+                                       int numVecs,
+                                       int dims,
+                                       int bitsPerCode);
+// Packs data in the byte packed non-interleaved form to bit packed
+// non-interleaved form
+std::vector<uint8_t> packNonInterleaved(std::vector<uint8_t> data,
+                                        int numVecs,
+                                        int dims,
+                                        int bitsPerCode);
+// Packs data in the byte packed non-interleaved form to bit packed
+// interleaved form
+std::vector<uint8_t> packInterleaved(std::vector<uint8_t> data,
+                                     int numVecs,
+                                     int dims,
+                                     int bitsPerCode);
+} } // namespace

data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp CHANGED Viewed

@@ -14,17 +14,17 @@ namespace faiss { namespace gpu {
 // Utility function to translate (list id, offset) to a user index on
 // the CPU. In a cpp in order to use OpenMP
 void ivfOffsetToUserIndex(
-  long* indices,
+  Index::idx_t* indices,
   int numLists,
   int queries,
   int k,
-  const std::vector<std::vector<long>>& listOffsetToUserIndex) {
+  const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex) {
   FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
 #pragma omp parallel for
   for (int q = 0; q < queries; ++q) {
     for (int r = 0; r < k; ++r) {
-      long offsetIndex = indices[q * k + r];
+      auto offsetIndex = indices[q * k + r];
       if (offsetIndex < 0) continue;

data/vendor/faiss/faiss/gpu/impl/RemapIndices.h CHANGED Viewed

@@ -8,6 +8,7 @@
 #pragma once
+#include <faiss/Index.h>
 #include <vector>
 namespace faiss { namespace gpu {
@@ -15,10 +16,10 @@ namespace faiss { namespace gpu {
 /// Utility function to translate (list id, offset) to a user index on
 /// the CPU. In a cpp in order to use OpenMP.
 void ivfOffsetToUserIndex(
-  long* indices,
+  Index::idx_t* indices,
   int numLists,
   int queries,
   int k,
-  const std::vector<std::vector<long>>& listOffsetToUserIndex);
+  const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex);
 } } // namespace

data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp ADDED Viewed

@@ -0,0 +1,274 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/impl/InterleavedCodes.h>
+#include <faiss/gpu/utils/StaticUtils.h>
+#include <faiss/gpu/test/TestUtils.h>
+#include <cmath>
+#include <gtest/gtest.h>
+#include <random>
+#include <sstream>
+#include <vector>
+TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  auto seed = std::random_device()();
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<uint8_t> dist;
+  std::cout << "seed " << seed << "\n";
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
+        std::vector<uint8_t> data(numVecs * srcVecSize);
+        for (auto& v : data) {
+          v = dist(gen);
+        }
+        // currently unimplemented
+        EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
+        // Due to bit packing, mask out bits that should be zero based on
+        // dimensions we shouldn't have present
+        int vectorSizeBits = dims * bitsPerCode;
+        int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
+        int remainder = vectorSizeBits % 8;
+        if (remainder > 0) {
+          uint8_t mask = 0xff >> (8 - remainder);
+          for (int i = 0; i < numVecs; ++i) {
+            int lastVecByte = (i + 1) * vectorSizeBytes - 1;
+            data[lastVecByte] &= mask;
+          }
+        }
+        auto up = unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
+        auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, p);
+      }
+    }
+  }
+}
+TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<uint8_t> dist;
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
+        // currently unimplemented
+        EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
+        // Mask out high bits we shouldn't have based on code size
+        uint8_t mask = bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
+        for (auto& v : data) {
+          v = dist(gen) & mask;
+        }
+        auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
+        auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, up);
+      }
+    }
+  }
+}
+TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<uint8_t> dist;
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        int blocks = utils::divUp(numVecs, 32);
+        int bytesPerDimBlock = 32 * bitsPerCode / 8;
+        int bytesPerBlock = bytesPerDimBlock * dims;
+        int size = blocks * bytesPerBlock;
+        std::vector<uint8_t> data(size);
+        if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
+          int bytesPerCode = bitsPerCode / 8;
+          for (int i = 0; i < blocks; ++i) {
+            for (int j = 0; j < dims; ++j) {
+              for (int k = 0; k < 32; ++k) {
+                for (int l = 0; l < bytesPerCode; ++l) {
+                  int vec = i * 32 + k;
+                  if (vec < numVecs) {
+                    data[i * bytesPerBlock +
+                         j * bytesPerDimBlock +
+                         k * bytesPerCode + l] = dist(gen);
+                  }
+                }
+              }
+            }
+          }
+        } else if (bitsPerCode < 8) {
+          for (int i = 0; i < blocks; ++i) {
+            for (int j = 0; j < dims; ++j) {
+              for (int k = 0; k < bytesPerDimBlock; ++k) {
+                int loVec = i * 32 + (k * 8) / bitsPerCode;
+                int hiVec = loVec + 1;
+                int hiVec2 = hiVec + 1;
+                uint8_t lo = loVec < numVecs ?
+                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
+                uint8_t hi = hiVec < numVecs ?
+                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
+                uint8_t hi2 = hiVec2 < numVecs ?
+                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
+                uint8_t v = 0;
+                if (bitsPerCode == 4) {
+                  v = lo | (hi << 4);
+                } else if (bitsPerCode == 5) {
+                  switch (k % 5) {
+                    case 0:
+                      // 5 msbs of lower as vOut lsbs
+                      // 3 lsbs of upper as vOut msbs
+                      v = (lo & 0x1f) | (hi << 5);
+                      break;
+                    case 1:
+                      // 2 msbs of lower as vOut lsbs
+                      // 5 lsbs of upper as vOut msbs
+                      // 1 lsbs of upper2 as vOut msb
+                      v = (lo >> 3) | (hi << 2) | (hi2 << 7);
+                      break;
+                    case 2:
+                      // 4 msbs of lower as vOut lsbs
+                      // 4 lsbs of upper as vOut msbs
+                      v = (lo >> 1) | (hi << 4);
+                      break;
+                    case 3:
+                      // 1 msbs of lower as vOut lsbs
+                      // 5 lsbs of upper as vOut msbs
+                      // 2 lsbs of upper2 as vOut msb
+                      v = (lo >> 4) | (hi << 1) | (hi2 << 6);
+                      break;
+                    case 4:
+                      // 3 msbs of lower as vOut lsbs
+                      // 5 lsbs of upper as vOut msbs
+                      v = (lo >> 2) | (hi << 3);
+                      break;
+                  }
+                } else if (bitsPerCode == 6) {
+                  switch (k % 3) {
+                    case 0:
+                      // 6 msbs of lower as vOut lsbs
+                      // 2 lsbs of upper as vOut msbs
+                      v = (lo & 0x3f) | (hi << 6);
+                      break;
+                    case 1:
+                      // 4 msbs of lower as vOut lsbs
+                      // 4 lsbs of upper as vOut msbs
+                      v = (lo >> 2) | (hi << 4);
+                      break;
+                    case 2:
+                      // 2 msbs of lower as vOut lsbs
+                      // 6 lsbs of upper as vOut msbs
+                      v = (lo >> 4) | (hi << 2);
+                      break;
+                  }
+                } else {
+                  // unimplemented
+                  EXPECT_TRUE(false);
+                }
+                data[i * bytesPerBlock + j * bytesPerDimBlock + k] = v;
+              }
+            }
+          }
+        } else {
+          // unimplemented
+          EXPECT_TRUE(false);
+        }
+        auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
+        auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, p);
+      }
+    }
+  }
+}
+TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<uint8_t> dist;
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
+        if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
+          for (auto& v : data) {
+            v = dist(gen);
+          }
+        } else if (bitsPerCode < 8) {
+          uint8_t mask = 0xff >> (8 - bitsPerCode);
+          for (auto& v : data) {
+            v = dist(gen) & mask;
+          }
+        } else {
+          // unimplemented
+          EXPECT_TRUE(false);
+        }
+        auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
+        auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, up);
+      }
+    }
+  }
+}
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp CHANGED Viewed

@@ -206,6 +206,8 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
   EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
   EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
   // Query both objects; results should be equivalent
   bool compFloat16 = useFloat16CoarseQuantizer;
   faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -255,6 +257,8 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
   EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
   EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
   // Query both objects; results should be equivalent
   bool compFloat16 = useFloat16CoarseQuantizer;
   faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -466,9 +470,10 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
   std::vector<float> nans(numNans * opt.dim,
                           std::numeric_limits<float>::quiet_NaN());
-  // Make one vector valid, which should actually add
+  // Make one vector valid (not the first vector, in order to test offset
+  // issues), which should actually add
   for (int i = 0; i < opt.dim; ++i) {
-    nans[i] = 0.0f;
+    nans[opt.dim + i] = i;
   }
   std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp CHANGED Viewed

@@ -426,6 +426,8 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
     EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
     EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
+    testIVFEquality(cpuIndex, gpuIndex);
     // Query both objects; results should be equivalent
     faiss::gpu::compareIndices(cpuIndex, gpuIndex,
                                opt.numQuery, opt.dim, opt.k, opt.toString(),
@@ -458,7 +460,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
   // Use garbage values to see if we overwrite them
   faiss::gpu::GpuIndexIVFPQ
-    gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
+    gpuIndex(&res, 1, 1, 1, 8, faiss::METRIC_L2, config);
   gpuIndex.setNumProbes(1);
   gpuIndex.copyFrom(&cpuIndex);
@@ -476,6 +478,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
   EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
   EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
+  testIVFEquality(cpuIndex, gpuIndex);
   // Query both objects; results should be equivalent
   faiss::gpu::compareIndices(cpuIndex, gpuIndex,
                              opt.numQuery, opt.dim, opt.k, opt.toString(),

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp ADDED Viewed

@@ -0,0 +1,231 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexScalarQuantizer.h>
+#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
+#include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
+#include <faiss/gpu/test/TestUtils.h>
+#include <cmath>
+#include <gtest/gtest.h>
+#include <sstream>
+#include <vector>
+constexpr float kF32MaxRelErr = 0.03f;
+struct Options {
+  Options() {
+    numAdd = 2 * faiss::gpu::randVal(2000, 5000);
+    dim = faiss::gpu::randVal(64, 200);
+    numCentroids = std::sqrt((float) numAdd / 2);
+    numTrain = numCentroids * 40;
+    nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
+    numQuery = faiss::gpu::randVal(32, 100);
+    // Due to the approximate nature of the query and of floating point
+    // differences between GPU and CPU, to stay within our error bounds, only
+    // use a small k
+    k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
+    indicesOpt = faiss::gpu::randSelect({
+        faiss::gpu::INDICES_CPU,
+        faiss::gpu::INDICES_32_BIT,
+        faiss::gpu::INDICES_64_BIT});
+    device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+  }
+  std::string toString() const {
+    std::stringstream str;
+    str << "IVFFlat device " << device
+        << " numVecs " << numAdd
+        << " dim " << dim
+        << " numCentroids " << numCentroids
+        << " nprobe " << nprobe
+        << " numQuery " << numQuery
+        << " k " << k
+        << " indicesOpt " << indicesOpt;
+    return str.str();
+  }
+  int numAdd;
+  int dim;
+  int numCentroids;
+  int numTrain;
+  int nprobe;
+  int numQuery;
+  int k;
+  int device;
+  faiss::gpu::IndicesOptions indicesOpt;
+};
+void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
+  using namespace faiss;
+  using namespace faiss::gpu;
+  Options opt;
+  std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
+  std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
+  StandardGpuResources res;
+  res.noTempMemory();
+  auto config = GpuIndexIVFScalarQuantizerConfig();
+  config.device = opt.device;
+  GpuIndexIVFScalarQuantizer gpuIndex(&res,
+                                      opt.dim,
+                                      opt.numCentroids,
+                                      qtype,
+                                      METRIC_L2,
+                                      true,
+                                      config);
+  gpuIndex.train(opt.numTrain, trainVecs.data());
+  gpuIndex.add(opt.numAdd, addVecs.data());
+  gpuIndex.setNumProbes(opt.nprobe);
+  // use garbage values to see if we overwrite then
+  IndexFlatL2 cpuQuantizer(1);
+  IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, 1, 1,
+                                   ScalarQuantizer::QuantizerType::QT_6bit,
+                                   METRIC_L2);
+  cpuIndex.nprobe = 1;
+  gpuIndex.copyTo(&cpuIndex);
+  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
+  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+  EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
+  EXPECT_EQ(cpuIndex.d, opt.dim);
+  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
+  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
+  // Query both objects; results should be equivalent
+  compareIndices(cpuIndex, gpuIndex,
+                 opt.numQuery, opt.dim, opt.k, opt.toString(),
+                 kF32MaxRelErr,
+                 0.1f,
+                 0.015f);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_fp16) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit_uniform) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_6bit) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit_uniform) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
+}
+void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
+  using namespace faiss;
+  using namespace faiss::gpu;
+  Options opt;
+  std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
+  std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
+  IndexFlatL2 cpuQuantizer(opt.dim);
+  IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, opt.dim, opt.numCentroids,
+                                   qtype,
+                                   METRIC_L2);
+  cpuIndex.nprobe = opt.nprobe;
+  cpuIndex.train(opt.numTrain, trainVecs.data());
+  cpuIndex.add(opt.numAdd, addVecs.data());
+  // use garbage values to see if we overwrite then
+  StandardGpuResources res;
+  res.noTempMemory();
+  auto config = GpuIndexIVFScalarQuantizerConfig();
+  config.device = opt.device;
+  GpuIndexIVFScalarQuantizer gpuIndex(
+    &res,
+    1,
+    1,
+    ScalarQuantizer::QuantizerType::QT_4bit,
+    METRIC_L2,
+    false,
+    config);
+  gpuIndex.setNumProbes(1);
+  gpuIndex.copyFrom(&cpuIndex);
+  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
+  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+  EXPECT_EQ(cpuIndex.d, opt.dim);
+  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
+  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
+  // Query both objects; results should be equivalent
+  compareIndices(cpuIndex, gpuIndex,
+                 opt.numQuery, opt.dim, opt.k, opt.toString(),
+                 kF32MaxRelErr,
+                 0.1f,
+                 0.015f);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_fp16) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit_uniform) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_6bit) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit_uniform) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
+}
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  // just run with a fixed test seed
+  faiss::gpu::setTestSeed(100);
+  return RUN_ALL_TESTS();
+}