faiss 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <stdint.h>
11
+ #include <vector>
12
+
13
+ // Utilities for bit packing and unpacking CPU non-interleaved and GPU
14
+ // interleaved by 32 encodings
15
+ namespace faiss { namespace gpu {
16
+
17
+ // Unpacks arbitrary bitwidth codes to a whole number of bytes per code
18
+ // The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
19
+ // (bit packed)
20
+ // The layout of the output is the same (byte packed to roundUp(bitsPerCode, 8)
21
+ // / 8 bytes)
22
+ std::vector<uint8_t> unpackNonInterleaved(std::vector<uint8_t> data,
23
+ int numVecs,
24
+ int dims,
25
+ int bitsPerCode);
26
+
27
+ // Unpacks arbitrary bitwidth codes to a whole number of bytes per scalar code
28
+ // The layout of the input is (v0 d0)(v1 d0) ... (v31 d0)(v0 d1) ...
29
+ // (bit packed)
30
+ // The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
31
+ // (byte packed)
32
+ std::vector<uint8_t> unpackInterleaved(std::vector<uint8_t> data,
33
+ int numVecs,
34
+ int dims,
35
+ int bitsPerCode);
36
+
37
+ // Packs data in the byte packed non-interleaved form to bit packed
38
+ // non-interleaved form
39
+ std::vector<uint8_t> packNonInterleaved(std::vector<uint8_t> data,
40
+ int numVecs,
41
+ int dims,
42
+ int bitsPerCode);
43
+
44
+ // Packs data in the byte packed non-interleaved form to bit packed
45
+ // interleaved form
46
+ std::vector<uint8_t> packInterleaved(std::vector<uint8_t> data,
47
+ int numVecs,
48
+ int dims,
49
+ int bitsPerCode);
50
+
51
+ } } // namespace
@@ -14,17 +14,17 @@ namespace faiss { namespace gpu {
14
14
  // Utility function to translate (list id, offset) to a user index on
15
15
  // the CPU. In a cpp in order to use OpenMP
16
16
  void ivfOffsetToUserIndex(
17
- long* indices,
17
+ Index::idx_t* indices,
18
18
  int numLists,
19
19
  int queries,
20
20
  int k,
21
- const std::vector<std::vector<long>>& listOffsetToUserIndex) {
21
+ const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex) {
22
22
  FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
23
23
 
24
24
  #pragma omp parallel for
25
25
  for (int q = 0; q < queries; ++q) {
26
26
  for (int r = 0; r < k; ++r) {
27
- long offsetIndex = indices[q * k + r];
27
+ auto offsetIndex = indices[q * k + r];
28
28
 
29
29
  if (offsetIndex < 0) continue;
30
30
 
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include <faiss/Index.h>
11
12
  #include <vector>
12
13
 
13
14
  namespace faiss { namespace gpu {
@@ -15,10 +16,10 @@ namespace faiss { namespace gpu {
15
16
  /// Utility function to translate (list id, offset) to a user index on
16
17
  /// the CPU. In a cpp in order to use OpenMP.
17
18
  void ivfOffsetToUserIndex(
18
- long* indices,
19
+ Index::idx_t* indices,
19
20
  int numLists,
20
21
  int queries,
21
22
  int k,
22
- const std::vector<std::vector<long>>& listOffsetToUserIndex);
23
+ const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex);
23
24
 
24
25
  } } // namespace
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/impl/InterleavedCodes.h>
10
+ #include <faiss/gpu/utils/StaticUtils.h>
11
+ #include <faiss/gpu/test/TestUtils.h>
12
+ #include <cmath>
13
+ #include <gtest/gtest.h>
14
+ #include <random>
15
+ #include <sstream>
16
+ #include <vector>
17
+
18
+ TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
19
+ using namespace faiss::gpu;
20
+
21
+ // We are fine using non-fixed seeds here, the results should be fully
22
+ // deterministic
23
+ auto seed = std::random_device()();
24
+ std::mt19937 gen(seed);
25
+ std::uniform_int_distribution<uint8_t> dist;
26
+
27
+ std::cout << "seed " << seed << "\n";
28
+
29
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
30
+ for (auto dims : {1, 7, 8, 31, 32}) {
31
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
32
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
33
+
34
+ int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
35
+ std::vector<uint8_t> data(numVecs * srcVecSize);
36
+
37
+ for (auto& v : data) {
38
+ v = dist(gen);
39
+ }
40
+
41
+ // currently unimplemented
42
+ EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
43
+
44
+ // Due to bit packing, mask out bits that should be zero based on
45
+ // dimensions we shouldn't have present
46
+ int vectorSizeBits = dims * bitsPerCode;
47
+ int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
48
+ int remainder = vectorSizeBits % 8;
49
+
50
+ if (remainder > 0) {
51
+ uint8_t mask = 0xff >> (8 - remainder);
52
+
53
+ for (int i = 0; i < numVecs; ++i) {
54
+ int lastVecByte = (i + 1) * vectorSizeBytes - 1;
55
+ data[lastVecByte] &= mask;
56
+ }
57
+ }
58
+
59
+ auto up = unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
60
+ auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
61
+
62
+ EXPECT_EQ(data, p);
63
+ }
64
+ }
65
+ }
66
+ }
67
+
68
+ TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
69
+ using namespace faiss::gpu;
70
+
71
+ // We are fine using non-fixed seeds here, the results should be fully
72
+ // deterministic
73
+ std::random_device rd;
74
+ std::mt19937 gen(rd());
75
+ std::uniform_int_distribution<uint8_t> dist;
76
+
77
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
78
+ for (auto dims : {1, 7, 8, 31, 32}) {
79
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
80
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
81
+
82
+ std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
83
+
84
+ // currently unimplemented
85
+ EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
86
+
87
+ // Mask out high bits we shouldn't have based on code size
88
+ uint8_t mask = bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
89
+
90
+ for (auto& v : data) {
91
+ v = dist(gen) & mask;
92
+ }
93
+
94
+ auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
95
+ auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
96
+
97
+ EXPECT_EQ(data, up);
98
+ }
99
+ }
100
+ }
101
+ }
102
+
103
+ TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
104
+ using namespace faiss::gpu;
105
+
106
+ // We are fine using non-fixed seeds here, the results should be fully
107
+ // deterministic
108
+ std::random_device rd;
109
+ std::mt19937 gen(rd());
110
+ std::uniform_int_distribution<uint8_t> dist;
111
+
112
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
113
+ for (auto dims : {1, 7, 8, 31, 32}) {
114
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
115
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
116
+
117
+ int blocks = utils::divUp(numVecs, 32);
118
+ int bytesPerDimBlock = 32 * bitsPerCode / 8;
119
+ int bytesPerBlock = bytesPerDimBlock * dims;
120
+ int size = blocks * bytesPerBlock;
121
+
122
+ std::vector<uint8_t> data(size);
123
+
124
+ if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
125
+ int bytesPerCode = bitsPerCode / 8;
126
+
127
+ for (int i = 0; i < blocks; ++i) {
128
+ for (int j = 0; j < dims; ++j) {
129
+ for (int k = 0; k < 32; ++k) {
130
+ for (int l = 0; l < bytesPerCode; ++l) {
131
+ int vec = i * 32 + k;
132
+ if (vec < numVecs) {
133
+ data[i * bytesPerBlock +
134
+ j * bytesPerDimBlock +
135
+ k * bytesPerCode + l] = dist(gen);
136
+ }
137
+ }
138
+ }
139
+ }
140
+ }
141
+ } else if (bitsPerCode < 8) {
142
+ for (int i = 0; i < blocks; ++i) {
143
+ for (int j = 0; j < dims; ++j) {
144
+ for (int k = 0; k < bytesPerDimBlock; ++k) {
145
+ int loVec = i * 32 + (k * 8) / bitsPerCode;
146
+ int hiVec = loVec + 1;
147
+ int hiVec2 = hiVec + 1;
148
+
149
+ uint8_t lo = loVec < numVecs ?
150
+ dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
151
+ uint8_t hi = hiVec < numVecs ?
152
+ dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
153
+ uint8_t hi2 = hiVec2 < numVecs ?
154
+ dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
155
+
156
+ uint8_t v = 0;
157
+ if (bitsPerCode == 4) {
158
+ v = lo | (hi << 4);
159
+ } else if (bitsPerCode == 5) {
160
+ switch (k % 5) {
161
+ case 0:
162
+ // 5 msbs of lower as vOut lsbs
163
+ // 3 lsbs of upper as vOut msbs
164
+ v = (lo & 0x1f) | (hi << 5);
165
+ break;
166
+ case 1:
167
+ // 2 msbs of lower as vOut lsbs
168
+ // 5 lsbs of upper as vOut msbs
169
+ // 1 lsbs of upper2 as vOut msb
170
+ v = (lo >> 3) | (hi << 2) | (hi2 << 7);
171
+ break;
172
+ case 2:
173
+ // 4 msbs of lower as vOut lsbs
174
+ // 4 lsbs of upper as vOut msbs
175
+ v = (lo >> 1) | (hi << 4);
176
+ break;
177
+ case 3:
178
+ // 1 msbs of lower as vOut lsbs
179
+ // 5 lsbs of upper as vOut msbs
180
+ // 2 lsbs of upper2 as vOut msb
181
+ v = (lo >> 4) | (hi << 1) | (hi2 << 6);
182
+ break;
183
+ case 4:
184
+ // 3 msbs of lower as vOut lsbs
185
+ // 5 lsbs of upper as vOut msbs
186
+ v = (lo >> 2) | (hi << 3);
187
+ break;
188
+ }
189
+ } else if (bitsPerCode == 6) {
190
+ switch (k % 3) {
191
+ case 0:
192
+ // 6 msbs of lower as vOut lsbs
193
+ // 2 lsbs of upper as vOut msbs
194
+ v = (lo & 0x3f) | (hi << 6);
195
+ break;
196
+ case 1:
197
+ // 4 msbs of lower as vOut lsbs
198
+ // 4 lsbs of upper as vOut msbs
199
+ v = (lo >> 2) | (hi << 4);
200
+ break;
201
+ case 2:
202
+ // 2 msbs of lower as vOut lsbs
203
+ // 6 lsbs of upper as vOut msbs
204
+ v = (lo >> 4) | (hi << 2);
205
+ break;
206
+ }
207
+ } else {
208
+ // unimplemented
209
+ EXPECT_TRUE(false);
210
+ }
211
+
212
+ data[i * bytesPerBlock + j * bytesPerDimBlock + k] = v;
213
+ }
214
+ }
215
+ }
216
+ } else {
217
+ // unimplemented
218
+ EXPECT_TRUE(false);
219
+ }
220
+
221
+ auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
222
+ auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
223
+
224
+ EXPECT_EQ(data, p);
225
+ }
226
+ }
227
+ }
228
+ }
229
+
230
+ TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
231
+ using namespace faiss::gpu;
232
+
233
+ // We are fine using non-fixed seeds here, the results should be fully
234
+ // deterministic
235
+ std::random_device rd;
236
+ std::mt19937 gen(rd());
237
+ std::uniform_int_distribution<uint8_t> dist;
238
+
239
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
240
+ for (auto dims : {1, 7, 8, 31, 32}) {
241
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
242
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
243
+
244
+ std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
245
+
246
+ if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
247
+ for (auto& v : data) {
248
+ v = dist(gen);
249
+ }
250
+ } else if (bitsPerCode < 8) {
251
+ uint8_t mask = 0xff >> (8 - bitsPerCode);
252
+
253
+ for (auto& v : data) {
254
+ v = dist(gen) & mask;
255
+ }
256
+ } else {
257
+ // unimplemented
258
+ EXPECT_TRUE(false);
259
+ }
260
+
261
+ auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
262
+ auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
263
+
264
+ EXPECT_EQ(data, up);
265
+ }
266
+ }
267
+ }
268
+ }
269
+
270
+ int main(int argc, char** argv) {
271
+ testing::InitGoogleTest(&argc, argv);
272
+
273
+ return RUN_ALL_TESTS();
274
+ }
@@ -206,6 +206,8 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
206
206
  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
207
207
  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
208
208
 
209
+ testIVFEquality(cpuIndex, gpuIndex);
210
+
209
211
  // Query both objects; results should be equivalent
210
212
  bool compFloat16 = useFloat16CoarseQuantizer;
211
213
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -255,6 +257,8 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
255
257
  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
256
258
  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
257
259
 
260
+ testIVFEquality(cpuIndex, gpuIndex);
261
+
258
262
  // Query both objects; results should be equivalent
259
263
  bool compFloat16 = useFloat16CoarseQuantizer;
260
264
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -466,9 +470,10 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
466
470
  std::vector<float> nans(numNans * opt.dim,
467
471
  std::numeric_limits<float>::quiet_NaN());
468
472
 
469
- // Make one vector valid, which should actually add
473
+ // Make one vector valid (not the first vector, in order to test offset
474
+ // issues), which should actually add
470
475
  for (int i = 0; i < opt.dim; ++i) {
471
- nans[i] = 0.0f;
476
+ nans[opt.dim + i] = i;
472
477
  }
473
478
 
474
479
  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
@@ -426,6 +426,8 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
426
426
  EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
427
427
  EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
428
428
 
429
+ testIVFEquality(cpuIndex, gpuIndex);
430
+
429
431
  // Query both objects; results should be equivalent
430
432
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
431
433
  opt.numQuery, opt.dim, opt.k, opt.toString(),
@@ -458,7 +460,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
458
460
 
459
461
  // Use garbage values to see if we overwrite them
460
462
  faiss::gpu::GpuIndexIVFPQ
461
- gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
463
+ gpuIndex(&res, 1, 1, 1, 8, faiss::METRIC_L2, config);
462
464
  gpuIndex.setNumProbes(1);
463
465
 
464
466
  gpuIndex.copyFrom(&cpuIndex);
@@ -476,6 +478,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
476
478
  EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
477
479
  EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
478
480
 
481
+ testIVFEquality(cpuIndex, gpuIndex);
482
+
479
483
  // Query both objects; results should be equivalent
480
484
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
481
485
  opt.numQuery, opt.dim, opt.k, opt.toString(),
@@ -0,0 +1,231 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/IndexFlat.h>
10
+ #include <faiss/IndexScalarQuantizer.h>
11
+ #include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
12
+ #include <faiss/gpu/StandardGpuResources.h>
13
+ #include <faiss/gpu/utils/DeviceUtils.h>
14
+ #include <faiss/gpu/test/TestUtils.h>
15
+ #include <cmath>
16
+ #include <gtest/gtest.h>
17
+ #include <sstream>
18
+ #include <vector>
19
+
20
+ constexpr float kF32MaxRelErr = 0.03f;
21
+
22
+ struct Options {
23
+ Options() {
24
+ numAdd = 2 * faiss::gpu::randVal(2000, 5000);
25
+ dim = faiss::gpu::randVal(64, 200);
26
+
27
+ numCentroids = std::sqrt((float) numAdd / 2);
28
+ numTrain = numCentroids * 40;
29
+ nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
30
+ numQuery = faiss::gpu::randVal(32, 100);
31
+
32
+ // Due to the approximate nature of the query and of floating point
33
+ // differences between GPU and CPU, to stay within our error bounds, only
34
+ // use a small k
35
+ k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
36
+ indicesOpt = faiss::gpu::randSelect({
37
+ faiss::gpu::INDICES_CPU,
38
+ faiss::gpu::INDICES_32_BIT,
39
+ faiss::gpu::INDICES_64_BIT});
40
+
41
+ device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
42
+ }
43
+
44
+ std::string toString() const {
45
+ std::stringstream str;
46
+ str << "IVFFlat device " << device
47
+ << " numVecs " << numAdd
48
+ << " dim " << dim
49
+ << " numCentroids " << numCentroids
50
+ << " nprobe " << nprobe
51
+ << " numQuery " << numQuery
52
+ << " k " << k
53
+ << " indicesOpt " << indicesOpt;
54
+
55
+ return str.str();
56
+ }
57
+
58
+ int numAdd;
59
+ int dim;
60
+ int numCentroids;
61
+ int numTrain;
62
+ int nprobe;
63
+ int numQuery;
64
+ int k;
65
+ int device;
66
+ faiss::gpu::IndicesOptions indicesOpt;
67
+ };
68
+
69
+ void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
70
+ using namespace faiss;
71
+ using namespace faiss::gpu;
72
+
73
+ Options opt;
74
+ std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
75
+ std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
76
+
77
+ StandardGpuResources res;
78
+ res.noTempMemory();
79
+
80
+ auto config = GpuIndexIVFScalarQuantizerConfig();
81
+ config.device = opt.device;
82
+
83
+ GpuIndexIVFScalarQuantizer gpuIndex(&res,
84
+ opt.dim,
85
+ opt.numCentroids,
86
+ qtype,
87
+ METRIC_L2,
88
+ true,
89
+ config);
90
+ gpuIndex.train(opt.numTrain, trainVecs.data());
91
+ gpuIndex.add(opt.numAdd, addVecs.data());
92
+ gpuIndex.setNumProbes(opt.nprobe);
93
+
94
+ // use garbage values to see if we overwrite then
95
+ IndexFlatL2 cpuQuantizer(1);
96
+ IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, 1, 1,
97
+ ScalarQuantizer::QuantizerType::QT_6bit,
98
+ METRIC_L2);
99
+ cpuIndex.nprobe = 1;
100
+
101
+ gpuIndex.copyTo(&cpuIndex);
102
+
103
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
104
+ EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
105
+
106
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
107
+ EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
108
+ EXPECT_EQ(cpuIndex.d, opt.dim);
109
+ EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
110
+ EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
111
+
112
+ testIVFEquality(cpuIndex, gpuIndex);
113
+
114
+ // Query both objects; results should be equivalent
115
+ compareIndices(cpuIndex, gpuIndex,
116
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
117
+ kF32MaxRelErr,
118
+ 0.1f,
119
+ 0.015f);
120
+ }
121
+
122
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_fp16) {
123
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
124
+ }
125
+
126
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit) {
127
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
128
+ }
129
+
130
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit_uniform) {
131
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
132
+ }
133
+
134
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_6bit) {
135
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
136
+ }
137
+
138
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit) {
139
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
140
+ }
141
+
142
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit_uniform) {
143
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
144
+ }
145
+
146
+ void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
147
+ using namespace faiss;
148
+ using namespace faiss::gpu;
149
+
150
+ Options opt;
151
+ std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
152
+ std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
153
+
154
+ IndexFlatL2 cpuQuantizer(opt.dim);
155
+ IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, opt.dim, opt.numCentroids,
156
+ qtype,
157
+ METRIC_L2);
158
+
159
+ cpuIndex.nprobe = opt.nprobe;
160
+ cpuIndex.train(opt.numTrain, trainVecs.data());
161
+ cpuIndex.add(opt.numAdd, addVecs.data());
162
+
163
+ // use garbage values to see if we overwrite then
164
+ StandardGpuResources res;
165
+ res.noTempMemory();
166
+
167
+ auto config = GpuIndexIVFScalarQuantizerConfig();
168
+ config.device = opt.device;
169
+
170
+ GpuIndexIVFScalarQuantizer gpuIndex(
171
+ &res,
172
+ 1,
173
+ 1,
174
+ ScalarQuantizer::QuantizerType::QT_4bit,
175
+ METRIC_L2,
176
+ false,
177
+ config);
178
+ gpuIndex.setNumProbes(1);
179
+
180
+ gpuIndex.copyFrom(&cpuIndex);
181
+
182
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
183
+ EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
184
+
185
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
186
+ EXPECT_EQ(cpuIndex.d, opt.dim);
187
+ EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
188
+ EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
189
+
190
+ testIVFEquality(cpuIndex, gpuIndex);
191
+
192
+ // Query both objects; results should be equivalent
193
+ compareIndices(cpuIndex, gpuIndex,
194
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
195
+ kF32MaxRelErr,
196
+ 0.1f,
197
+ 0.015f);
198
+ }
199
+
200
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_fp16) {
201
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
202
+ }
203
+
204
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit) {
205
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
206
+ }
207
+
208
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit_uniform) {
209
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
210
+ }
211
+
212
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_6bit) {
213
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
214
+ }
215
+
216
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit) {
217
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
218
+ }
219
+
220
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit_uniform) {
221
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
222
+ }
223
+
224
+ int main(int argc, char** argv) {
225
+ testing::InitGoogleTest(&argc, argv);
226
+
227
+ // just run with a fixed test seed
228
+ faiss::gpu::setTestSeed(100);
229
+
230
+ return RUN_ALL_TESTS();
231
+ }