faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <stdint.h>
11
+ #include <vector>
12
+
13
+ // Utilities for bit packing and unpacking CPU non-interleaved and GPU
14
+ // interleaved by 32 encodings
15
+ namespace faiss { namespace gpu {
16
+
17
+ // Unpacks arbitrary bitwidth codes to a whole number of bytes per code
18
+ // The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
19
+ // (bit packed)
20
+ // The layout of the output is the same (byte packed to roundUp(bitsPerCode, 8)
21
+ // / 8 bytes)
22
+ std::vector<uint8_t> unpackNonInterleaved(std::vector<uint8_t> data,
23
+ int numVecs,
24
+ int dims,
25
+ int bitsPerCode);
26
+
27
+ // Unpacks arbitrary bitwidth codes to a whole number of bytes per scalar code
28
+ // The layout of the input is (v0 d0)(v1 d0) ... (v31 d0)(v0 d1) ...
29
+ // (bit packed)
30
+ // The layout of the input is (v0 d0)(v0 d1) ... (v0 dD)(v1 d0) ...
31
+ // (byte packed)
32
+ std::vector<uint8_t> unpackInterleaved(std::vector<uint8_t> data,
33
+ int numVecs,
34
+ int dims,
35
+ int bitsPerCode);
36
+
37
+ // Packs data in the byte packed non-interleaved form to bit packed
38
+ // non-interleaved form
39
+ std::vector<uint8_t> packNonInterleaved(std::vector<uint8_t> data,
40
+ int numVecs,
41
+ int dims,
42
+ int bitsPerCode);
43
+
44
+ // Packs data in the byte packed non-interleaved form to bit packed
45
+ // interleaved form
46
+ std::vector<uint8_t> packInterleaved(std::vector<uint8_t> data,
47
+ int numVecs,
48
+ int dims,
49
+ int bitsPerCode);
50
+
51
+ } } // namespace
@@ -14,17 +14,17 @@ namespace faiss { namespace gpu {
14
14
  // Utility function to translate (list id, offset) to a user index on
15
15
  // the CPU. In a cpp in order to use OpenMP
16
16
  void ivfOffsetToUserIndex(
17
- long* indices,
17
+ Index::idx_t* indices,
18
18
  int numLists,
19
19
  int queries,
20
20
  int k,
21
- const std::vector<std::vector<long>>& listOffsetToUserIndex) {
21
+ const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex) {
22
22
  FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
23
23
 
24
24
  #pragma omp parallel for
25
25
  for (int q = 0; q < queries; ++q) {
26
26
  for (int r = 0; r < k; ++r) {
27
- long offsetIndex = indices[q * k + r];
27
+ auto offsetIndex = indices[q * k + r];
28
28
 
29
29
  if (offsetIndex < 0) continue;
30
30
 
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include <faiss/Index.h>
11
12
  #include <vector>
12
13
 
13
14
  namespace faiss { namespace gpu {
@@ -15,10 +16,10 @@ namespace faiss { namespace gpu {
15
16
  /// Utility function to translate (list id, offset) to a user index on
16
17
  /// the CPU. In a cpp in order to use OpenMP.
17
18
  void ivfOffsetToUserIndex(
18
- long* indices,
19
+ Index::idx_t* indices,
19
20
  int numLists,
20
21
  int queries,
21
22
  int k,
22
- const std::vector<std::vector<long>>& listOffsetToUserIndex);
23
+ const std::vector<std::vector<Index::idx_t>>& listOffsetToUserIndex);
23
24
 
24
25
  } } // namespace
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/impl/InterleavedCodes.h>
10
+ #include <faiss/gpu/utils/StaticUtils.h>
11
+ #include <faiss/gpu/test/TestUtils.h>
12
+ #include <cmath>
13
+ #include <gtest/gtest.h>
14
+ #include <random>
15
+ #include <sstream>
16
+ #include <vector>
17
+
18
+ TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
19
+ using namespace faiss::gpu;
20
+
21
+ // We are fine using non-fixed seeds here, the results should be fully
22
+ // deterministic
23
+ auto seed = std::random_device()();
24
+ std::mt19937 gen(seed);
25
+ std::uniform_int_distribution<uint8_t> dist;
26
+
27
+ std::cout << "seed " << seed << "\n";
28
+
29
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
30
+ for (auto dims : {1, 7, 8, 31, 32}) {
31
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
32
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
33
+
34
+ int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
35
+ std::vector<uint8_t> data(numVecs * srcVecSize);
36
+
37
+ for (auto& v : data) {
38
+ v = dist(gen);
39
+ }
40
+
41
+ // currently unimplemented
42
+ EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
43
+
44
+ // Due to bit packing, mask out bits that should be zero based on
45
+ // dimensions we shouldn't have present
46
+ int vectorSizeBits = dims * bitsPerCode;
47
+ int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
48
+ int remainder = vectorSizeBits % 8;
49
+
50
+ if (remainder > 0) {
51
+ uint8_t mask = 0xff >> (8 - remainder);
52
+
53
+ for (int i = 0; i < numVecs; ++i) {
54
+ int lastVecByte = (i + 1) * vectorSizeBytes - 1;
55
+ data[lastVecByte] &= mask;
56
+ }
57
+ }
58
+
59
+ auto up = unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
60
+ auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
61
+
62
+ EXPECT_EQ(data, p);
63
+ }
64
+ }
65
+ }
66
+ }
67
+
68
+ TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
69
+ using namespace faiss::gpu;
70
+
71
+ // We are fine using non-fixed seeds here, the results should be fully
72
+ // deterministic
73
+ std::random_device rd;
74
+ std::mt19937 gen(rd());
75
+ std::uniform_int_distribution<uint8_t> dist;
76
+
77
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
78
+ for (auto dims : {1, 7, 8, 31, 32}) {
79
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
80
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
81
+
82
+ std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
83
+
84
+ // currently unimplemented
85
+ EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
86
+
87
+ // Mask out high bits we shouldn't have based on code size
88
+ uint8_t mask = bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
89
+
90
+ for (auto& v : data) {
91
+ v = dist(gen) & mask;
92
+ }
93
+
94
+ auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
95
+ auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
96
+
97
+ EXPECT_EQ(data, up);
98
+ }
99
+ }
100
+ }
101
+ }
102
+
103
+ TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
104
+ using namespace faiss::gpu;
105
+
106
+ // We are fine using non-fixed seeds here, the results should be fully
107
+ // deterministic
108
+ std::random_device rd;
109
+ std::mt19937 gen(rd());
110
+ std::uniform_int_distribution<uint8_t> dist;
111
+
112
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
113
+ for (auto dims : {1, 7, 8, 31, 32}) {
114
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
115
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
116
+
117
+ int blocks = utils::divUp(numVecs, 32);
118
+ int bytesPerDimBlock = 32 * bitsPerCode / 8;
119
+ int bytesPerBlock = bytesPerDimBlock * dims;
120
+ int size = blocks * bytesPerBlock;
121
+
122
+ std::vector<uint8_t> data(size);
123
+
124
+ if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
125
+ int bytesPerCode = bitsPerCode / 8;
126
+
127
+ for (int i = 0; i < blocks; ++i) {
128
+ for (int j = 0; j < dims; ++j) {
129
+ for (int k = 0; k < 32; ++k) {
130
+ for (int l = 0; l < bytesPerCode; ++l) {
131
+ int vec = i * 32 + k;
132
+ if (vec < numVecs) {
133
+ data[i * bytesPerBlock +
134
+ j * bytesPerDimBlock +
135
+ k * bytesPerCode + l] = dist(gen);
136
+ }
137
+ }
138
+ }
139
+ }
140
+ }
141
+ } else if (bitsPerCode < 8) {
142
+ for (int i = 0; i < blocks; ++i) {
143
+ for (int j = 0; j < dims; ++j) {
144
+ for (int k = 0; k < bytesPerDimBlock; ++k) {
145
+ int loVec = i * 32 + (k * 8) / bitsPerCode;
146
+ int hiVec = loVec + 1;
147
+ int hiVec2 = hiVec + 1;
148
+
149
+ uint8_t lo = loVec < numVecs ?
150
+ dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
151
+ uint8_t hi = hiVec < numVecs ?
152
+ dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
153
+ uint8_t hi2 = hiVec2 < numVecs ?
154
+ dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
155
+
156
+ uint8_t v = 0;
157
+ if (bitsPerCode == 4) {
158
+ v = lo | (hi << 4);
159
+ } else if (bitsPerCode == 5) {
160
+ switch (k % 5) {
161
+ case 0:
162
+ // 5 msbs of lower as vOut lsbs
163
+ // 3 lsbs of upper as vOut msbs
164
+ v = (lo & 0x1f) | (hi << 5);
165
+ break;
166
+ case 1:
167
+ // 2 msbs of lower as vOut lsbs
168
+ // 5 lsbs of upper as vOut msbs
169
+ // 1 lsbs of upper2 as vOut msb
170
+ v = (lo >> 3) | (hi << 2) | (hi2 << 7);
171
+ break;
172
+ case 2:
173
+ // 4 msbs of lower as vOut lsbs
174
+ // 4 lsbs of upper as vOut msbs
175
+ v = (lo >> 1) | (hi << 4);
176
+ break;
177
+ case 3:
178
+ // 1 msbs of lower as vOut lsbs
179
+ // 5 lsbs of upper as vOut msbs
180
+ // 2 lsbs of upper2 as vOut msb
181
+ v = (lo >> 4) | (hi << 1) | (hi2 << 6);
182
+ break;
183
+ case 4:
184
+ // 3 msbs of lower as vOut lsbs
185
+ // 5 lsbs of upper as vOut msbs
186
+ v = (lo >> 2) | (hi << 3);
187
+ break;
188
+ }
189
+ } else if (bitsPerCode == 6) {
190
+ switch (k % 3) {
191
+ case 0:
192
+ // 6 msbs of lower as vOut lsbs
193
+ // 2 lsbs of upper as vOut msbs
194
+ v = (lo & 0x3f) | (hi << 6);
195
+ break;
196
+ case 1:
197
+ // 4 msbs of lower as vOut lsbs
198
+ // 4 lsbs of upper as vOut msbs
199
+ v = (lo >> 2) | (hi << 4);
200
+ break;
201
+ case 2:
202
+ // 2 msbs of lower as vOut lsbs
203
+ // 6 lsbs of upper as vOut msbs
204
+ v = (lo >> 4) | (hi << 2);
205
+ break;
206
+ }
207
+ } else {
208
+ // unimplemented
209
+ EXPECT_TRUE(false);
210
+ }
211
+
212
+ data[i * bytesPerBlock + j * bytesPerDimBlock + k] = v;
213
+ }
214
+ }
215
+ }
216
+ } else {
217
+ // unimplemented
218
+ EXPECT_TRUE(false);
219
+ }
220
+
221
+ auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
222
+ auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
223
+
224
+ EXPECT_EQ(data, p);
225
+ }
226
+ }
227
+ }
228
+ }
229
+
230
+ TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
231
+ using namespace faiss::gpu;
232
+
233
+ // We are fine using non-fixed seeds here, the results should be fully
234
+ // deterministic
235
+ std::random_device rd;
236
+ std::mt19937 gen(rd());
237
+ std::uniform_int_distribution<uint8_t> dist;
238
+
239
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
240
+ for (auto dims : {1, 7, 8, 31, 32}) {
241
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
242
+ std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
243
+
244
+ std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
245
+
246
+ if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
247
+ for (auto& v : data) {
248
+ v = dist(gen);
249
+ }
250
+ } else if (bitsPerCode < 8) {
251
+ uint8_t mask = 0xff >> (8 - bitsPerCode);
252
+
253
+ for (auto& v : data) {
254
+ v = dist(gen) & mask;
255
+ }
256
+ } else {
257
+ // unimplemented
258
+ EXPECT_TRUE(false);
259
+ }
260
+
261
+ auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
262
+ auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
263
+
264
+ EXPECT_EQ(data, up);
265
+ }
266
+ }
267
+ }
268
+ }
269
+
270
+ int main(int argc, char** argv) {
271
+ testing::InitGoogleTest(&argc, argv);
272
+
273
+ return RUN_ALL_TESTS();
274
+ }
@@ -206,6 +206,8 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
206
206
  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
207
207
  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
208
208
 
209
+ testIVFEquality(cpuIndex, gpuIndex);
210
+
209
211
  // Query both objects; results should be equivalent
210
212
  bool compFloat16 = useFloat16CoarseQuantizer;
211
213
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -255,6 +257,8 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
255
257
  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
256
258
  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
257
259
 
260
+ testIVFEquality(cpuIndex, gpuIndex);
261
+
258
262
  // Query both objects; results should be equivalent
259
263
  bool compFloat16 = useFloat16CoarseQuantizer;
260
264
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
@@ -466,9 +470,10 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
466
470
  std::vector<float> nans(numNans * opt.dim,
467
471
  std::numeric_limits<float>::quiet_NaN());
468
472
 
469
- // Make one vector valid, which should actually add
473
+ // Make one vector valid (not the first vector, in order to test offset
474
+ // issues), which should actually add
470
475
  for (int i = 0; i < opt.dim; ++i) {
471
- nans[i] = 0.0f;
476
+ nans[opt.dim + i] = i;
472
477
  }
473
478
 
474
479
  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
@@ -426,6 +426,8 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
426
426
  EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
427
427
  EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
428
428
 
429
+ testIVFEquality(cpuIndex, gpuIndex);
430
+
429
431
  // Query both objects; results should be equivalent
430
432
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
431
433
  opt.numQuery, opt.dim, opt.k, opt.toString(),
@@ -458,7 +460,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
458
460
 
459
461
  // Use garbage values to see if we overwrite them
460
462
  faiss::gpu::GpuIndexIVFPQ
461
- gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
463
+ gpuIndex(&res, 1, 1, 1, 8, faiss::METRIC_L2, config);
462
464
  gpuIndex.setNumProbes(1);
463
465
 
464
466
  gpuIndex.copyFrom(&cpuIndex);
@@ -476,6 +478,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
476
478
  EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
477
479
  EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
478
480
 
481
+ testIVFEquality(cpuIndex, gpuIndex);
482
+
479
483
  // Query both objects; results should be equivalent
480
484
  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
481
485
  opt.numQuery, opt.dim, opt.k, opt.toString(),
@@ -0,0 +1,231 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/IndexFlat.h>
10
+ #include <faiss/IndexScalarQuantizer.h>
11
+ #include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
12
+ #include <faiss/gpu/StandardGpuResources.h>
13
+ #include <faiss/gpu/utils/DeviceUtils.h>
14
+ #include <faiss/gpu/test/TestUtils.h>
15
+ #include <cmath>
16
+ #include <gtest/gtest.h>
17
+ #include <sstream>
18
+ #include <vector>
19
+
20
+ constexpr float kF32MaxRelErr = 0.03f;
21
+
22
+ struct Options {
23
+ Options() {
24
+ numAdd = 2 * faiss::gpu::randVal(2000, 5000);
25
+ dim = faiss::gpu::randVal(64, 200);
26
+
27
+ numCentroids = std::sqrt((float) numAdd / 2);
28
+ numTrain = numCentroids * 40;
29
+ nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
30
+ numQuery = faiss::gpu::randVal(32, 100);
31
+
32
+ // Due to the approximate nature of the query and of floating point
33
+ // differences between GPU and CPU, to stay within our error bounds, only
34
+ // use a small k
35
+ k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
36
+ indicesOpt = faiss::gpu::randSelect({
37
+ faiss::gpu::INDICES_CPU,
38
+ faiss::gpu::INDICES_32_BIT,
39
+ faiss::gpu::INDICES_64_BIT});
40
+
41
+ device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
42
+ }
43
+
44
+ std::string toString() const {
45
+ std::stringstream str;
46
+ str << "IVFFlat device " << device
47
+ << " numVecs " << numAdd
48
+ << " dim " << dim
49
+ << " numCentroids " << numCentroids
50
+ << " nprobe " << nprobe
51
+ << " numQuery " << numQuery
52
+ << " k " << k
53
+ << " indicesOpt " << indicesOpt;
54
+
55
+ return str.str();
56
+ }
57
+
58
+ int numAdd;
59
+ int dim;
60
+ int numCentroids;
61
+ int numTrain;
62
+ int nprobe;
63
+ int numQuery;
64
+ int k;
65
+ int device;
66
+ faiss::gpu::IndicesOptions indicesOpt;
67
+ };
68
+
69
+ void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
70
+ using namespace faiss;
71
+ using namespace faiss::gpu;
72
+
73
+ Options opt;
74
+ std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
75
+ std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
76
+
77
+ StandardGpuResources res;
78
+ res.noTempMemory();
79
+
80
+ auto config = GpuIndexIVFScalarQuantizerConfig();
81
+ config.device = opt.device;
82
+
83
+ GpuIndexIVFScalarQuantizer gpuIndex(&res,
84
+ opt.dim,
85
+ opt.numCentroids,
86
+ qtype,
87
+ METRIC_L2,
88
+ true,
89
+ config);
90
+ gpuIndex.train(opt.numTrain, trainVecs.data());
91
+ gpuIndex.add(opt.numAdd, addVecs.data());
92
+ gpuIndex.setNumProbes(opt.nprobe);
93
+
94
+ // use garbage values to see if we overwrite then
95
+ IndexFlatL2 cpuQuantizer(1);
96
+ IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, 1, 1,
97
+ ScalarQuantizer::QuantizerType::QT_6bit,
98
+ METRIC_L2);
99
+ cpuIndex.nprobe = 1;
100
+
101
+ gpuIndex.copyTo(&cpuIndex);
102
+
103
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
104
+ EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
105
+
106
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
107
+ EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
108
+ EXPECT_EQ(cpuIndex.d, opt.dim);
109
+ EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
110
+ EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
111
+
112
+ testIVFEquality(cpuIndex, gpuIndex);
113
+
114
+ // Query both objects; results should be equivalent
115
+ compareIndices(cpuIndex, gpuIndex,
116
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
117
+ kF32MaxRelErr,
118
+ 0.1f,
119
+ 0.015f);
120
+ }
121
+
122
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_fp16) {
123
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
124
+ }
125
+
126
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit) {
127
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
128
+ }
129
+
130
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_8bit_uniform) {
131
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
132
+ }
133
+
134
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_6bit) {
135
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
136
+ }
137
+
138
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit) {
139
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
140
+ }
141
+
142
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyTo_4bit_uniform) {
143
+ runCopyToTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
144
+ }
145
+
146
+ void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
147
+ using namespace faiss;
148
+ using namespace faiss::gpu;
149
+
150
+ Options opt;
151
+ std::vector<float> trainVecs = randVecs(opt.numTrain, opt.dim);
152
+ std::vector<float> addVecs = randVecs(opt.numAdd, opt.dim);
153
+
154
+ IndexFlatL2 cpuQuantizer(opt.dim);
155
+ IndexIVFScalarQuantizer cpuIndex(&cpuQuantizer, opt.dim, opt.numCentroids,
156
+ qtype,
157
+ METRIC_L2);
158
+
159
+ cpuIndex.nprobe = opt.nprobe;
160
+ cpuIndex.train(opt.numTrain, trainVecs.data());
161
+ cpuIndex.add(opt.numAdd, addVecs.data());
162
+
163
+ // use garbage values to see if we overwrite then
164
+ StandardGpuResources res;
165
+ res.noTempMemory();
166
+
167
+ auto config = GpuIndexIVFScalarQuantizerConfig();
168
+ config.device = opt.device;
169
+
170
+ GpuIndexIVFScalarQuantizer gpuIndex(
171
+ &res,
172
+ 1,
173
+ 1,
174
+ ScalarQuantizer::QuantizerType::QT_4bit,
175
+ METRIC_L2,
176
+ false,
177
+ config);
178
+ gpuIndex.setNumProbes(1);
179
+
180
+ gpuIndex.copyFrom(&cpuIndex);
181
+
182
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
183
+ EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
184
+
185
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
186
+ EXPECT_EQ(cpuIndex.d, opt.dim);
187
+ EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
188
+ EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
189
+
190
+ testIVFEquality(cpuIndex, gpuIndex);
191
+
192
+ // Query both objects; results should be equivalent
193
+ compareIndices(cpuIndex, gpuIndex,
194
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
195
+ kF32MaxRelErr,
196
+ 0.1f,
197
+ 0.015f);
198
+ }
199
+
200
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_fp16) {
201
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_fp16);
202
+ }
203
+
204
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit) {
205
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit);
206
+ }
207
+
208
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_8bit_uniform) {
209
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform);
210
+ }
211
+
212
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_6bit) {
213
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_6bit);
214
+ }
215
+
216
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit) {
217
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit);
218
+ }
219
+
220
+ TEST(TestGpuIndexIVFScalarQuantizer, CopyFrom_4bit_uniform) {
221
+ runCopyFromTest(faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform);
222
+ }
223
+
224
+ int main(int argc, char** argv) {
225
+ testing::InitGoogleTest(&argc, argv);
226
+
227
+ // just run with a fixed test seed
228
+ faiss::gpu::setTestSeed(100);
229
+
230
+ return RUN_ALL_TESTS();
231
+ }