RubyGems - faiss - Versions diffs - 0.1.3 → 0.1.4 - Mend

faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +43 -141
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <stdint.h>
+#include <vector>
+// Utilities for bit packing and unpacking CPU non-interleaved and GPU
+// interleaved by 32 encodings
+namespace faiss { namespace gpu {
+// Unpacks arbitrary bitwidth codes to a whole number of bytes per code
+// The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
+// (bit packed)
+// The layout of the output is the same (byte packed to roundUp(bitsPerCode, 8)
+// / 8 bytes)
+std::vector<uint8_t> unpackNonInterleaved(std::vector<uint8_t> data,
+                                          int numVecs,
+                                          int dims,
+                                          int bitsPerCode);
+// Unpacks arbitrary bitwidth codes to a whole number of bytes per scalar code
+// The layout of the input is (v0 d0)(v1 d0) ... (v31 d0)(v0 d1) ...
+// (bit packed)
+// The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
+// (byte packed)
+std::vector<uint8_t> unpackInterleaved(std::vector<uint8_t> data,
+                                       int numVecs,
+                                       int dims,
+                                       int bitsPerCode);
+// Packs data in the byte packed non-interleaved form to bit packed
+// non-interleaved form
+std::vector<uint8_t> packNonInterleaved(std::vector<uint8_t> data,
+                                        int numVecs,
+                                        int dims,
+                                        int bitsPerCode);
+// Packs data in the byte packed non-interleaved form to bit packed
+// interleaved form
+std::vector<uint8_t> packInterleaved(std::vector<uint8_t> data,
+                                     int numVecs,
+                                     int dims,
+                                     int bitsPerCode);
+} } // namespace

data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp CHANGED Viewed

@@ -14,17 +14,17 @@ namespace faiss { namespace gpu {
 // Utility function to translate (list id, offset) to a user index on
 // the CPU. In a cpp in order to use OpenMP
 void ivfOffsetToUserIndex(
-  long* indices,
+  Index::idx_t* indices,
   int numLists,
   int queries,
   int k,
-  const std::vector<std::vector<long>>& listOffsetToUserIndex) {
+  const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex) {
   FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
 #pragma omp parallel for
   for (int q = 0; q < queries; ++q) {
     for (int r = 0; r < k; ++r) {
-      long offsetIndex = indices[q * k + r];
+      auto offsetIndex = indices[q * k + r];
       if (offsetIndex < 0) continue;

data/vendor/faiss/faiss/gpu/impl/RemapIndices.h CHANGED Viewed

@@ -8,6 +8,7 @@
 #pragma once
+#include <faiss/Index.h>
 #include <vector>
 namespace faiss { namespace gpu {
@@ -15,10 +16,10 @@ namespace faiss { namespace gpu {
 /// Utility function to translate (list id, offset) to a user index on
 /// the CPU. In a cpp in order to use OpenMP.
 void ivfOffsetToUserIndex(
-  long* indices,
+  Index::idx_t* indices,
   int numLists,
   int queries,
   int k,
-  const std::vector<std::vector<long>>& listOffsetToUserIndex);
+  const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex);
 } } // namespace

data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp ADDED Viewed

@@ -0,0 +1,274 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/impl/InterleavedCodes.h>
+#include <faiss/gpu/utils/StaticUtils.h>
+#include <faiss/gpu/test/TestUtils.h>
+#include <cmath>
+#include <gtest/gtest.h>
+#include <random>
+#include <sstream>
+#include <vector>
+TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  auto seed = std::random_device()();
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<uint8_t> dist;
+  std::cout << "seed " << seed << "\n";
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
+        std::vector<uint8_t> data(numVecs * srcVecSize);
+        for (auto& v : data) {
+          v = dist(gen);
+        }
+        // currently unimplemented
+        EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
+        // Due to bit packing, mask out bits that should be zero based on
+        // dimensions we shouldn't have present
+        int vectorSizeBits = dims * bitsPerCode;
+        int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
+        int remainder = vectorSizeBits % 8;
+        if (remainder > 0) {
+          uint8_t mask = 0xff >> (8 - remainder);
+          for (int i = 0; i < numVecs; ++i) {
+            int lastVecByte = (i + 1) * vectorSizeBytes - 1;
+            data[lastVecByte] &= mask;
+          }
+        }
+        auto up = unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
+        auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, p);
+      }
+    }
+  }
+}
+TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<uint8_t> dist;
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
+        // currently unimplemented
+        EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
+        // Mask out high bits we shouldn't have based on code size
+        uint8_t mask = bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
+        for (auto& v : data) {
+          v = dist(gen) & mask;
+        }
+        auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
+        auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, up);
+      }
+    }
+  }
+}
+TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<uint8_t> dist;
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        int blocks = utils::divUp(numVecs, 32);
+        int bytesPerDimBlock = 32 * bitsPerCode / 8;
+        int bytesPerBlock = bytesPerDimBlock * dims;
+        int size = blocks * bytesPerBlock;
+        std::vector<uint8_t> data(size);
+        if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
+          int bytesPerCode = bitsPerCode / 8;
+          for (int i = 0; i < blocks; ++i) {
+            for (int j = 0; j < dims; ++j) {
+              for (int k = 0; k < 32; ++k) {
+                for (int l = 0; l < bytesPerCode; ++l) {
+                  int vec = i * 32 + k;
+                  if (vec < numVecs) {
+                    data[i * bytesPerBlock +
+                         j * bytesPerDimBlock +
+                         k * bytesPerCode + l] = dist(gen);
+                  }
+                }
+              }
+            }
+          }
+        } else if (bitsPerCode < 8) {
+          for (int i = 0; i < blocks; ++i) {
+            for (int j = 0; j < dims; ++j) {
+              for (int k = 0; k < bytesPerDimBlock; ++k) {
+                int loVec = i * 32 + (k * 8) / bitsPerCode;
+                int hiVec = loVec + 1;
+                int hiVec2 = hiVec + 1;
+                uint8_t lo = loVec < numVecs ?
+                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
+                uint8_t hi = hiVec < numVecs ?
+                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
+                uint8_t hi2 = hiVec2 < numVecs ?
+                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
+                uint8_t v = 0;
+                if (bitsPerCode == 4) {
+                  v = lo | (hi << 4);
+                } else if (bitsPerCode == 5) {
+                  switch (k % 5) {
+                    case 0:
+                      // 5 msbs of lower as vOut lsbs
+                      // 3 lsbs of upper as vOut msbs
+                      v = (lo & 0x1f) | (hi << 5);
+                      break;
+                    case 1:
+                      // 2 msbs of lower as vOut lsbs
+                      // 5 lsbs of upper as vOut msbs
+                      // 1 lsbs of upper2 as vOut msb
+                      v = (lo >> 3) | (hi << 2) | (hi2 << 7);
+                      break;
+                    case 2:
+                      // 4 msbs of lower as vOut lsbs
+                      // 4 lsbs of upper as vOut msbs
+                      v = (lo >> 1) | (hi << 4);
+                      break;
+                    case 3:
+                      // 1 msbs of lower as vOut lsbs
+                      // 5 lsbs of upper as vOut msbs
+                      // 2 lsbs of upper2 as vOut msb
+                      v = (lo >> 4) | (hi << 1) | (hi2 << 6);
+                      break;
+                    case 4:
+                      // 3 msbs of lower as vOut lsbs
+                      // 5 lsbs of upper as vOut msbs
+                      v = (lo >> 2) | (hi << 3);
+                      break;
+                  }
+                } else if (bitsPerCode == 6) {
+                  switch (k % 3) {
+                    case 0:
+                      // 6 msbs of lower as vOut lsbs
+                      // 2 lsbs of upper as vOut msbs
+                      v = (lo & 0x3f) | (hi << 6);
+                      break;
+                    case 1:
+                      // 4 msbs of lower as vOut lsbs
+                      // 4 lsbs of upper as vOut msbs
+                      v = (lo >> 2) | (hi << 4);
+                      break;
+                    case 2:
+                      // 2 msbs of lower as vOut lsbs
+                      // 6 lsbs of upper as vOut msbs
+                      v = (lo >> 4) | (hi << 2);
+                      break;
+                  }
+                } else {
+                  // unimplemented
+                  EXPECT_TRUE(false);
+                }
+                data[i * bytesPerBlock + j * bytesPerDimBlock + k] = v;
+              }
+            }
+          }
+        } else {
+          // unimplemented
+          EXPECT_TRUE(false);
+        }
+        auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
+        auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, p);
+      }
+    }
+  }
+}
+TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
+  using namespace faiss::gpu;
+  // We are fine using non-fixed seeds here, the results should be fully
+  // deterministic
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<uint8_t> dist;
+  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+    for (auto dims : {1, 7, 8, 31, 32}) {
+      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+        std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
+        if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
+          for (auto& v : data) {
+            v = dist(gen);
+          }
+        } else if (bitsPerCode < 8) {
+          uint8_t mask = 0xff >> (8 - bitsPerCode);
+          for (auto& v : data) {
+            v = dist(gen) & mask;
+          }
+        } else {
+          // unimplemented
+          EXPECT_TRUE(false);
+        }
+        auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
+        auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
+        EXPECT_EQ(data, up);
+      }
+    }
+  }
+}
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp CHANGED Viewed

@@ -206,6 +206,8 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
   EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
   EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
   // Query both objects; results should be equivalent
   bool compFloat16 = useFloat16CoarseQuantizer;
   faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -255,6 +257,8 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
   EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
   EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
   // Query both objects; results should be equivalent
   bool compFloat16 = useFloat16CoarseQuantizer;
   faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -466,9 +470,10 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
   std::vector<float> nans(numNans * opt.dim,
                           std::numeric_limits<float>::quiet_NaN());
-  // Make one vector valid, which should actually add
+  // Make one vector valid (not the first vector, in order to test offset
+  // issues), which should actually add
   for (int i = 0; i < opt.dim; ++i) {
-    nans[i] = 0.0f;
+    nans[opt.dim + i] = i;
   }
   std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp CHANGED Viewed

@@ -426,6 +426,8 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
     EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
     EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
+    testIVFEquality(cpuIndex, gpuIndex);
     // Query both objects; results should be equivalent
     faiss::gpu::compareIndices(cpuIndex, gpuIndex,
                                opt.numQuery, opt.dim, opt.k, opt.toString(),
@@ -458,7 +460,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
   // Use garbage values to see if we overwrite them
   faiss::gpu::GpuIndexIVFPQ
-    gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
+    gpuIndex(&res, 1, 1, 1, 8, faiss::METRIC_L2, config);
   gpuIndex.setNumProbes(1);
   gpuIndex.copyFrom(&cpuIndex);
@@ -476,6 +478,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
   EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
   EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
+  testIVFEquality(cpuIndex, gpuIndex);
   // Query both objects; results should be equivalent
   faiss::gpu::compareIndices(cpuIndex, gpuIndex,
                              opt.numQuery, opt.dim, opt.k, opt.toString(),

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp ADDED Viewed

@@ -0,0 +1,231 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexScalarQuantizer.h>
+#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
+#include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
+#include <faiss/gpu/test/TestUtils.h>
+#include <cmath>
+#include <gtest/gtest.h>
+#include <sstream>
+#include <vector>
+constexpr float kF32MaxRelErr = 0.03f;
+struct Options {
+  Options() {
+    numAdd = 2 * faiss::gpu::randVal(2000, 5000);
+    dim = faiss::gpu::randVal(64, 200);
+    numCentroids = std::sqrt((float) numAdd / 2);
+    numTrain = numCentroids * 40;
+    nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
+    numQuery = faiss::gpu::randVal(32, 100);
+    // Due to the approximate nature of the query and of floating point
+    // differences between GPU and CPU, to stay within our error bounds, only
+    // use a small k
+    k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
+    indicesOpt = faiss::gpu::randSelect({
+        faiss::gpu::INDICES_CPU,
+        faiss::gpu::INDICES_32_BIT,
+        faiss::gpu::INDICES_64_BIT});
+    device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+  }
+  std::string toString() const {
+    std::stringstream str;
+    str << "IVFFlat device " << device
+        << " numVecs " << numAdd
+        << " dim " << dim
+        << " numCentroids " << numCentroids
+        << " nprobe " << nprobe
+        << " numQuery " << numQuery
+        << " k " << k
+        << " indicesOpt " << indicesOpt;
+    return str.str();
+  }
+  int numAdd;
+  int dim;
+  int numCentroids;
+  int numTrain;
+  int nprobe;
+  int numQuery;
+  int k;
+  int device;
+  faiss::gpu::IndicesOptions indicesOpt;
+};
+void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
+  using namespace faiss;
+  using namespace faiss::gpu;
+  Options opt;
+  std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
+  std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
+  StandardGpuResources res;
+  res.noTempMemory();
+  auto config = GpuIndexIVFScalarQuantizerConfig();
+  config.device = opt.device;
+  GpuIndexIVFScalarQuantizer gpuIndex(&res,
+                                      opt.dim,
+                                      opt.numCentroids,
+                                      qtype,
+                                      METRIC_L2,
+                                      true,
+                                      config);
+  gpuIndex.train(opt.numTrain, trainVecs.data());
+  gpuIndex.add(opt.numAdd, addVecs.data());
+  gpuIndex.setNumProbes(opt.nprobe);
+  // use garbage values to see if we overwrite then
+  IndexFlatL2 cpuQuantizer(1);
+  IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, 1, 1,
+                                   ScalarQuantizer::QuantizerType::QT_6bit,
+                                   METRIC_L2);
+  cpuIndex.nprobe = 1;
+  gpuIndex.copyTo(&cpuIndex);
+  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
+  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+  EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
+  EXPECT_EQ(cpuIndex.d, opt.dim);
+  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
+  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
+  // Query both objects; results should be equivalent
+  compareIndices(cpuIndex, gpuIndex,
+                 opt.numQuery, opt.dim, opt.k, opt.toString(),
+                 kF32MaxRelErr,
+                 0.1f,
+                 0.015f);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_fp16) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit_uniform) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_6bit) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit_uniform) {
+  runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
+}
+void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
+  using namespace faiss;
+  using namespace faiss::gpu;
+  Options opt;
+  std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
+  std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
+  IndexFlatL2 cpuQuantizer(opt.dim);
+  IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, opt.dim, opt.numCentroids,
+                                   qtype,
+                                   METRIC_L2);
+  cpuIndex.nprobe = opt.nprobe;
+  cpuIndex.train(opt.numTrain, trainVecs.data());
+  cpuIndex.add(opt.numAdd, addVecs.data());
+  // use garbage values to see if we overwrite then
+  StandardGpuResources res;
+  res.noTempMemory();
+  auto config = GpuIndexIVFScalarQuantizerConfig();
+  config.device = opt.device;
+  GpuIndexIVFScalarQuantizer gpuIndex(
+    &res,
+    1,
+    1,
+    ScalarQuantizer::QuantizerType::QT_4bit,
+    METRIC_L2,
+    false,
+    config);
+  gpuIndex.setNumProbes(1);
+  gpuIndex.copyFrom(&cpuIndex);
+  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
+  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+  EXPECT_EQ(cpuIndex.d, opt.dim);
+  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
+  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+  testIVFEquality(cpuIndex, gpuIndex);
+  // Query both objects; results should be equivalent
+  compareIndices(cpuIndex, gpuIndex,
+                 opt.numQuery, opt.dim, opt.k, opt.toString(),
+                 kF32MaxRelErr,
+                 0.1f,
+                 0.015f);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_fp16) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit_uniform) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_6bit) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
+}
+TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit_uniform) {
+  runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
+}
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  // just run with a fixed test seed
+  faiss::gpu::setTestSeed(100);
+  return RUN_ALL_TESTS();
+}