faiss 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -1,4 +1,10 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
2
8
  #ifndef LEVEL2_NEON_INL_H
3
9
  #define LEVEL2_NEON_INL_H
4
10
 
@@ -1940,9 +1946,15 @@ struct Index2LevelDecoderImpl<
1940
1946
  } // namespace
1941
1947
 
1942
1948
  // Suitable for IVF256,PQ[1]x8
1949
+ // Subtable for IVF256,PQ[1]x10 (such as IVF256,PQ16x10np)
1950
+ // Subtable for IVF256,PQ[1]x12 (such as IVF256,PQ16x12np)
1951
+ // Suitable for IVF256,PQ[1]x16 (such as IVF256,PQ16x16np)
1943
1952
  // Suitable for Residual[1]x8,PQ[2]x8
1944
- // Suitable for IVF[9-16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
1945
- // Suitable for Residual1x[9-16 bit],PQ[1]x8 (such as Residual1x9,PQ8)
1953
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
1954
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x10 (such as IVF1024,PQ16x10np)
1955
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x12 (such as IVF1024,PQ16x12np)
1956
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x16 (such as IVF1024,PQ16x16np)
1957
+ // Suitable for Residual[1]x[9-16 bit],PQ[2]x[3] (such as Residual2x9,PQ8)
1946
1958
  template <
1947
1959
  intptr_t DIM,
1948
1960
  intptr_t COARSE_SIZE,
@@ -1951,11 +1963,13 @@ template <
1951
1963
  intptr_t FINE_BITS = 8>
1952
1964
  struct Index2LevelDecoder {
1953
1965
  static_assert(
1954
- COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 16,
1955
- "Only 8, 10 or 16 bits are currently supported for COARSE_BITS");
1966
+ COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 12 ||
1967
+ COARSE_BITS == 16,
1968
+ "Only 8, 10, 12 or 16 bits are currently supported for COARSE_BITS");
1956
1969
  static_assert(
1957
- FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
1958
- "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
1970
+ FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
1971
+ FINE_BITS == 16,
1972
+ "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
1959
1973
 
1960
1974
  static constexpr intptr_t dim = DIM;
1961
1975
  static constexpr intptr_t coarseSize = COARSE_SIZE;
@@ -1,3 +1,10 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #pragma once
2
9
 
3
10
  #include <cstddef>
@@ -1,3 +1,10 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #pragma once
2
9
 
3
10
  #include <cstddef>
@@ -1,4 +1,9 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
2
7
 
3
8
  #ifndef PQ_AVX2_INL_H
4
9
  #define PQ_AVX2_INL_H
@@ -1488,12 +1493,14 @@ struct IndexPQDecoderImpl<
1488
1493
 
1489
1494
  // Suitable for PQ[1]x8
1490
1495
  // Suitable for PQ[1]x10
1496
+ // Suitable for PQ[1]x12
1491
1497
  // Suitable for PQ[1]x16
1492
1498
  template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
1493
1499
  struct IndexPQDecoder {
1494
1500
  static_assert(
1495
- FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
1496
- "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
1501
+ FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
1502
+ FINE_BITS == 16,
1503
+ "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
1497
1504
 
1498
1505
  static constexpr intptr_t dim = DIM;
1499
1506
  static constexpr intptr_t fineSize = FINE_SIZE;
@@ -1,4 +1,10 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
2
8
  #ifndef PQ_INL_H
3
9
  #define PQ_INL_H
4
10
 
@@ -1,4 +1,10 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
2
8
  #ifndef PQ_NEON_INL_H
3
9
  #define PQ_NEON_INL_H
4
10
 
@@ -1322,12 +1328,14 @@ struct IndexPQDecoderImpl<
1322
1328
 
1323
1329
  // Suitable for PQ[1]x8
1324
1330
  // Suitable for PQ[1]x10
1331
+ // Suitable for PQ[1]x12
1325
1332
  // Suitable for PQ[1]x16
1326
1333
  template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
1327
1334
  struct IndexPQDecoder {
1328
1335
  static_assert(
1329
- FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
1330
- "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
1336
+ FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
1337
+ FINE_BITS == 16,
1338
+ "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
1331
1339
 
1332
1340
  static constexpr intptr_t dim = DIM;
1333
1341
  static constexpr intptr_t fineSize = FINE_SIZE;
@@ -11,6 +11,8 @@
11
11
  #include <faiss/IndexPreTransform.h>
12
12
  #include <faiss/IndexReplicas.h>
13
13
  #include <faiss/IndexShards.h>
14
+ #include <faiss/IndexShardsIVF.h>
15
+
14
16
  #include <faiss/gpu/GpuIndex.h>
15
17
  #include <faiss/gpu/GpuIndexFlat.h>
16
18
  #include <faiss/gpu/GpuIndexIVFFlat.h>
@@ -33,7 +35,12 @@ using namespace ::faiss;
33
35
 
34
36
  void GpuParameterSpace::initialize(const Index* index) {
35
37
  if (DC(IndexPreTransform)) {
36
- index = ix->index;
38
+ initialize(ix->index);
39
+ return;
40
+ }
41
+ if (DC(IndexShardsIVF)) {
42
+ ParameterSpace::initialize(index);
43
+ return;
37
44
  }
38
45
  if (DC(IndexReplicas)) {
39
46
  if (ix->count() == 0)
@@ -53,6 +60,14 @@ void GpuParameterSpace::initialize(const Index* index) {
53
60
  break;
54
61
  pr.values.push_back(nprobe);
55
62
  }
63
+
64
+ ParameterSpace ivf_pspace;
65
+ ivf_pspace.initialize(ix->quantizer);
66
+
67
+ for (const ParameterRange& p : ivf_pspace.parameter_ranges) {
68
+ ParameterRange& pr = add_range("quantizer_" + p.name);
69
+ pr.values = p.values;
70
+ }
56
71
  }
57
72
  // not sure we should call the parent initializer
58
73
  }
@@ -72,7 +87,7 @@ void GpuParameterSpace::set_index_parameter(
72
87
  }
73
88
  if (name == "nprobe") {
74
89
  if (DC(GpuIndexIVF)) {
75
- ix->setNumProbes(int(val));
90
+ ix->nprobe = size_t(val);
76
91
  return;
77
92
  }
78
93
  }
@@ -83,6 +98,14 @@ void GpuParameterSpace::set_index_parameter(
83
98
  }
84
99
  }
85
100
 
101
+ if (name.find("quantizer_") == 0) {
102
+ if (DC(GpuIndexIVF)) {
103
+ std::string sub_name = name.substr(strlen("quantizer_"));
104
+ set_index_parameter(ix->quantizer, sub_name, val);
105
+ return;
106
+ }
107
+ }
108
+
86
109
  // maybe normal index parameters apply?
87
110
  ParameterSpace::set_index_parameter(index, name, val);
88
111
  }
@@ -18,6 +18,7 @@
18
18
  #include <faiss/IndexPreTransform.h>
19
19
  #include <faiss/IndexReplicas.h>
20
20
  #include <faiss/IndexScalarQuantizer.h>
21
+ #include <faiss/IndexShardsIVF.h>
21
22
  #include <faiss/MetaIndexes.h>
22
23
  #include <faiss/gpu/GpuIndex.h>
23
24
  #include <faiss/gpu/GpuIndexFlat.h>
@@ -116,7 +117,6 @@ ToGpuCloner::ToGpuCloner(
116
117
  : GpuClonerOptions(options), provider(prov), device(device) {}
117
118
 
118
119
  Index* ToGpuCloner::clone_Index(const Index* index) {
119
- using idx_t = Index::idx_t;
120
120
  if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
121
121
  GpuIndexFlatConfig config;
122
122
  config.device = device;
@@ -227,8 +227,8 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
227
227
  std::vector<int>& devices,
228
228
  const GpuMultipleClonerOptions& options)
229
229
  : GpuMultipleClonerOptions(options) {
230
- FAISS_ASSERT(provider.size() == devices.size());
231
- for (int i = 0; i < provider.size(); i++) {
230
+ FAISS_THROW_IF_NOT(provider.size() == devices.size());
231
+ for (size_t i = 0; i < provider.size(); i++) {
232
232
  sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
233
233
  }
234
234
  }
@@ -241,28 +241,43 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
241
241
  void ToGpuClonerMultiple::copy_ivf_shard(
242
242
  const IndexIVF* index_ivf,
243
243
  IndexIVF* idx2,
244
- long n,
245
- long i) {
244
+ idx_t n,
245
+ idx_t i) {
246
246
  if (shard_type == 2) {
247
- long i0 = i * index_ivf->ntotal / n;
248
- long i1 = (i + 1) * index_ivf->ntotal / n;
247
+ idx_t i0 = i * index_ivf->ntotal / n;
248
+ idx_t i1 = (i + 1) * index_ivf->ntotal / n;
249
249
 
250
250
  if (verbose)
251
251
  printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
252
- index_ivf->copy_subset_to(*idx2, 2, i0, i1);
252
+ index_ivf->copy_subset_to(
253
+ *idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
253
254
  FAISS_ASSERT(idx2->ntotal == i1 - i0);
254
255
  } else if (shard_type == 1) {
255
256
  if (verbose)
256
257
  printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
257
- index_ivf->copy_subset_to(*idx2, 1, n, i);
258
+ index_ivf->copy_subset_to(
259
+ *idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
260
+ } else if (shard_type == 4) {
261
+ idx_t i0 = i * index_ivf->nlist / n;
262
+ idx_t i1 = (i + 1) * index_ivf->nlist / n;
263
+ if (verbose) {
264
+ printf("IndexShards %ld/%ld select lists %d:%d\n",
265
+ i,
266
+ n,
267
+ int(i0),
268
+ int(i1));
269
+ }
270
+ index_ivf->copy_subset_to(
271
+ *idx2, InvertedLists::SUBSET_TYPE_INVLIST, i0, i1);
258
272
  } else {
259
273
  FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
260
274
  }
261
275
  }
262
276
 
263
277
  Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
264
- long n = sub_cloners.size();
278
+ idx_t n = sub_cloners.size();
265
279
 
280
+ auto index_ivf = dynamic_cast<const faiss::IndexIVF*>(index);
266
281
  auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
267
282
  auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
268
283
  auto index_ivfsq =
@@ -274,16 +289,36 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
274
289
  "IndexIVFFlat, IndexIVFScalarQuantizer, "
275
290
  "IndexFlat and IndexIVFPQ");
276
291
 
292
+ // decide what coarse quantizer the sub-indexes are going to have
293
+ const Index* quantizer = nullptr;
294
+ std::unique_ptr<Index> new_quantizer;
295
+ if (index_ivf) {
296
+ quantizer = index_ivf->quantizer;
297
+ if (common_ivf_quantizer &&
298
+ !dynamic_cast<const IndexFlat*>(quantizer)) {
299
+ // then we flatten the coarse quantizer so that everything remains
300
+ // on GPU
301
+ new_quantizer.reset(
302
+ new IndexFlat(quantizer->d, quantizer->metric_type));
303
+ std::vector<float> centroids(quantizer->d * quantizer->ntotal);
304
+ quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
305
+ new_quantizer->add(quantizer->ntotal, centroids.data());
306
+ quantizer = new_quantizer.get();
307
+ }
308
+ }
309
+
277
310
  std::vector<faiss::Index*> shards(n);
278
311
 
279
- for (long i = 0; i < n; i++) {
312
+ for (idx_t i = 0; i < n; i++) {
280
313
  // make a shallow copy
281
- if (reserveVecs)
314
+ if (reserveVecs) {
282
315
  sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
283
-
316
+ }
317
+ // note: const_casts here are harmless because the indexes build here
318
+ // are short-lived, translated immediately to GPU indexes.
284
319
  if (index_ivfpq) {
285
320
  faiss::IndexIVFPQ idx2(
286
- index_ivfpq->quantizer,
321
+ const_cast<Index*>(quantizer),
287
322
  index_ivfpq->d,
288
323
  index_ivfpq->nlist,
289
324
  index_ivfpq->code_size,
@@ -297,7 +332,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
297
332
  shards[i] = sub_cloners[i].clone_Index(&idx2);
298
333
  } else if (index_ivfflat) {
299
334
  faiss::IndexIVFFlat idx2(
300
- index_ivfflat->quantizer,
335
+ const_cast<Index*>(quantizer),
301
336
  index->d,
302
337
  index_ivfflat->nlist,
303
338
  index_ivfflat->metric_type);
@@ -307,7 +342,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
307
342
  shards[i] = sub_cloners[i].clone_Index(&idx2);
308
343
  } else if (index_ivfsq) {
309
344
  faiss::IndexIVFScalarQuantizer idx2(
310
- index_ivfsq->quantizer,
345
+ const_cast<Index*>(quantizer),
311
346
  index->d,
312
347
  index_ivfsq->nlist,
313
348
  index_ivfsq->sq.qtype,
@@ -323,40 +358,52 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
323
358
  faiss::IndexFlat idx2(index->d, index->metric_type);
324
359
  shards[i] = sub_cloners[i].clone_Index(&idx2);
325
360
  if (index->ntotal > 0) {
326
- long i0 = index->ntotal * i / n;
327
- long i1 = index->ntotal * (i + 1) / n;
361
+ idx_t i0 = index->ntotal * i / n;
362
+ idx_t i1 = index->ntotal * (i + 1) / n;
328
363
  shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
329
364
  }
330
365
  }
331
366
  }
332
367
 
333
368
  bool successive_ids = index_flat != nullptr;
334
- faiss::IndexShards* res =
335
- new faiss::IndexShards(index->d, true, successive_ids);
369
+ faiss::IndexShards* res;
370
+ if (common_ivf_quantizer && index_ivf) {
371
+ this->shard = false;
372
+ Index* common_quantizer = clone_Index(index_ivf->quantizer);
373
+ this->shard = true;
374
+ IndexShardsIVF* idx = new faiss::IndexShardsIVF(
375
+ common_quantizer, index_ivf->nlist, true, false);
376
+ idx->own_fields = true;
377
+ idx->own_indices = true;
378
+ res = idx;
379
+ } else {
380
+ res = new faiss::IndexShards(index->d, true, successive_ids);
381
+ res->own_indices = true;
382
+ }
336
383
 
337
384
  for (int i = 0; i < n; i++) {
338
385
  res->add_shard(shards[i]);
339
386
  }
340
- res->own_fields = true;
341
387
  FAISS_ASSERT(index->ntotal == res->ntotal);
342
388
  return res;
343
389
  }
344
390
 
345
391
  Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
346
- long n = sub_cloners.size();
347
- if (n == 1)
392
+ idx_t n = sub_cloners.size();
393
+ if (n == 1) {
348
394
  return sub_cloners[0].clone_Index(index);
395
+ }
349
396
 
350
397
  if (dynamic_cast<const IndexFlat*>(index) ||
351
- dynamic_cast<const faiss::IndexIVFFlat*>(index) ||
352
- dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index) ||
353
- dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
398
+ dynamic_cast<const IndexIVFFlat*>(index) ||
399
+ dynamic_cast<const IndexIVFScalarQuantizer*>(index) ||
400
+ dynamic_cast<const IndexIVFPQ*>(index)) {
354
401
  if (!shard) {
355
402
  IndexReplicas* res = new IndexReplicas();
356
403
  for (auto& sub_cloner : sub_cloners) {
357
404
  res->addIndex(sub_cloner.clone_Index(index));
358
405
  }
359
- res->own_fields = true;
406
+ res->own_indices = true;
360
407
  return res;
361
408
  } else {
362
409
  return clone_Index_to_shards(index);
@@ -373,8 +420,8 @@ Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
373
420
  for (int m = 0; m < pq.M; m++) {
374
421
  // which GPU(s) will be assigned to this sub-quantizer
375
422
 
376
- long i0 = m * n / pq.M;
377
- long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
423
+ idx_t i0 = m * n / pq.M;
424
+ idx_t i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
378
425
  std::vector<ToGpuCloner> sub_cloners_2;
379
426
  sub_cloners_2.insert(
380
427
  sub_cloners_2.begin(),
@@ -55,8 +55,8 @@ struct ToGpuClonerMultiple : faiss::Cloner, GpuMultipleClonerOptions {
55
55
  void copy_ivf_shard(
56
56
  const IndexIVF* index_ivf,
57
57
  IndexIVF* idx2,
58
- long n,
59
- long i);
58
+ idx_t n,
59
+ idx_t i);
60
60
 
61
61
  Index* clone_Index_to_shards(const Index* index);
62
62
 
@@ -14,41 +14,42 @@ namespace gpu {
14
14
 
15
15
  /// set some options on how to copy to GPU
16
16
  struct GpuClonerOptions {
17
- GpuClonerOptions();
18
-
19
17
  /// how should indices be stored on index types that support indices
20
18
  /// (anything but GpuIndexFlat*)?
21
- IndicesOptions indicesOptions;
19
+ IndicesOptions indicesOptions = INDICES_64_BIT;
22
20
 
23
21
  /// is the coarse quantizer in float16?
24
- bool useFloat16CoarseQuantizer;
22
+ bool useFloat16CoarseQuantizer = false;
25
23
 
26
24
  /// for GpuIndexIVFFlat, is storage in float16?
27
25
  /// for GpuIndexIVFPQ, are intermediate calculations in float16?
28
- bool useFloat16;
26
+ bool useFloat16 = false;
29
27
 
30
28
  /// use precomputed tables?
31
- bool usePrecomputed;
29
+ bool usePrecomputed = false;
32
30
 
33
31
  /// reserve vectors in the invfiles?
34
- long reserveVecs;
32
+ long reserveVecs = 0;
35
33
 
36
34
  /// For GpuIndexFlat, store data in transposed layout?
37
- bool storeTransposed;
35
+ bool storeTransposed = false;
38
36
 
39
37
  /// Set verbose options on the index
40
- bool verbose;
38
+ bool verbose = false;
41
39
  };
42
40
 
43
41
  struct GpuMultipleClonerOptions : public GpuClonerOptions {
44
- GpuMultipleClonerOptions();
45
-
46
42
  /// Whether to shard the index across GPUs, versus replication
47
43
  /// across GPUs
48
- bool shard;
44
+ bool shard = false;
49
45
 
50
46
  /// IndexIVF::copy_subset_to subset type
51
- int shard_type;
47
+ int shard_type = 1;
48
+
49
+ /// set to true if an IndexIVF is to be dispatched to multiple GPUs with a
50
+ /// single common IVF quantizer, ie. only the inverted lists are sharded on
51
+ /// the sub-indexes (uses an IndexShardsIVF)
52
+ bool common_ivf_quantizer = false;
52
53
  };
53
54
 
54
55
  } // namespace gpu
@@ -45,7 +45,8 @@ struct GpuDistanceParams {
45
45
  outDistances(nullptr),
46
46
  ignoreOutDistances(false),
47
47
  outIndicesType(IndicesDataType::I64),
48
- outIndices(nullptr) {}
48
+ outIndices(nullptr),
49
+ device(-1) {}
49
50
 
50
51
  //
51
52
  // Search parameters
@@ -76,7 +77,7 @@ struct GpuDistanceParams {
76
77
  const void* vectors;
77
78
  DistanceDataType vectorType;
78
79
  bool vectorsRowMajor;
79
- int numVectors;
80
+ idx_t numVectors;
80
81
 
81
82
  /// Precomputed L2 norms for each vector in `vectors`, which can be
82
83
  /// optionally provided in advance to speed computation for METRIC_L2
@@ -93,7 +94,7 @@ struct GpuDistanceParams {
93
94
  const void* queries;
94
95
  DistanceDataType queryType;
95
96
  bool queriesRowMajor;
96
- int numQueries;
97
+ idx_t numQueries;
97
98
 
98
99
  //
99
100
  // Output results
@@ -112,6 +113,17 @@ struct GpuDistanceParams {
112
113
  /// innermost (row major). Not used if k == -1 (all pairwise distances)
113
114
  IndicesDataType outIndicesType;
114
115
  void* outIndices;
116
+
117
+ //
118
+ // Execution information
119
+ //
120
+
121
+ /// On which GPU device should the search run?
122
+ /// -1 indicates that the current CUDA thread-local device
123
+ /// (via cudaGetDevice/cudaSetDevice) is used
124
+ /// Otherwise, an integer 0 <= device < numDevices indicates the device for
125
+ /// execution
126
+ int device;
115
127
  };
116
128
 
117
129
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
@@ -137,13 +149,13 @@ void bruteForceKnn(
137
149
  // dims x numVectors, with numVectors innermost
138
150
  const float* vectors,
139
151
  bool vectorsRowMajor,
140
- int numVectors,
152
+ idx_t numVectors,
141
153
  // If queriesRowMajor is true, this is
142
154
  // numQueries x dims, with dims innermost; otherwise,
143
155
  // dims x numQueries, with numQueries innermost
144
156
  const float* queries,
145
157
  bool queriesRowMajor,
146
- int numQueries,
158
+ idx_t numQueries,
147
159
  int dims,
148
160
  int k,
149
161
  // A region of memory size numQueries x k, with k
@@ -151,7 +163,7 @@ void bruteForceKnn(
151
163
  float* outDistances,
152
164
  // A region of memory size numQueries x k, with k
153
165
  // innermost (row major)
154
- Index::idx_t* outIndices);
166
+ idx_t* outIndices);
155
167
 
156
168
  } // namespace gpu
157
169
  } // namespace faiss
@@ -51,30 +51,31 @@ class GpuIndex : public faiss::Index {
51
51
  /// `x` can be resident on the CPU or any GPU; copies are performed
52
52
  /// as needed
53
53
  /// Handles paged adds if the add set is too large; calls addInternal_
54
- void add(Index::idx_t, const float* x) override;
54
+ void add(idx_t, const float* x) override;
55
55
 
56
56
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
57
57
  /// performed as needed
58
58
  /// Handles paged adds if the add set is too large; calls addInternal_
59
- void add_with_ids(Index::idx_t n, const float* x, const Index::idx_t* ids)
60
- override;
59
+ void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
61
60
 
62
61
  /// `x` and `labels` can be resident on the CPU or any GPU; copies are
63
62
  /// performed as needed
64
63
  void assign(
65
- Index::idx_t n,
64
+ idx_t n,
66
65
  const float* x,
67
- Index::idx_t* labels,
68
- Index::idx_t k = 1) const override;
66
+ idx_t* labels,
67
+ // faiss::Index has idx_t for k
68
+ idx_t k = 1) const override;
69
69
 
70
70
  /// `x`, `distances` and `labels` can be resident on the CPU or any
71
71
  /// GPU; copies are performed as needed
72
72
  void search(
73
- Index::idx_t n,
73
+ idx_t n,
74
74
  const float* x,
75
- Index::idx_t k,
75
+ // faiss::Index has idx_t for k
76
+ idx_t k,
76
77
  float* distances,
77
- Index::idx_t* labels,
78
+ idx_t* labels,
78
79
  const SearchParameters* params = nullptr) const override;
79
80
 
80
81
  /// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
@@ -82,6 +83,7 @@ class GpuIndex : public faiss::Index {
82
83
  void search_and_reconstruct(
83
84
  idx_t n,
84
85
  const float* x,
86
+ // faiss::Index has idx_t for k
85
87
  idx_t k,
86
88
  float* distances,
87
89
  idx_t* labels,
@@ -90,16 +92,16 @@ class GpuIndex : public faiss::Index {
90
92
 
91
93
  /// Overridden to force GPU indices to provide their own GPU-friendly
92
94
  /// implementation
93
- void compute_residual(const float* x, float* residual, Index::idx_t key)
95
+ void compute_residual(const float* x, float* residual, idx_t key)
94
96
  const override;
95
97
 
96
98
  /// Overridden to force GPU indices to provide their own GPU-friendly
97
99
  /// implementation
98
100
  void compute_residual_n(
99
- Index::idx_t n,
101
+ idx_t n,
100
102
  const float* xs,
101
103
  float* residuals,
102
- const Index::idx_t* keys) const override;
104
+ const idx_t* keys) const override;
103
105
 
104
106
  protected:
105
107
  /// Copy what we need from the CPU equivalent
@@ -114,43 +116,43 @@ class GpuIndex : public faiss::Index {
114
116
 
115
117
  /// Overridden to actually perform the add
116
118
  /// All data is guaranteed to be resident on our device
117
- virtual void addImpl_(int n, const float* x, const Index::idx_t* ids) = 0;
119
+ virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
118
120
 
119
121
  /// Overridden to actually perform the search
120
122
  /// All data is guaranteed to be resident on our device
121
123
  virtual void searchImpl_(
122
- int n,
124
+ idx_t n,
123
125
  const float* x,
124
126
  int k,
125
127
  float* distances,
126
- Index::idx_t* labels,
128
+ idx_t* labels,
127
129
  const SearchParameters* params) const = 0;
128
130
 
129
131
  private:
130
132
  /// Handles paged adds if the add set is too large, passes to
131
133
  /// addImpl_ to actually perform the add for the current page
132
- void addPaged_(int n, const float* x, const Index::idx_t* ids);
134
+ void addPaged_(idx_t n, const float* x, const idx_t* ids);
133
135
 
134
136
  /// Calls addImpl_ for a single page of GPU-resident data
135
- void addPage_(int n, const float* x, const Index::idx_t* ids);
137
+ void addPage_(idx_t n, const float* x, const idx_t* ids);
136
138
 
137
139
  /// Calls searchImpl_ for a single page of GPU-resident data
138
140
  void searchNonPaged_(
139
- int n,
141
+ idx_t n,
140
142
  const float* x,
141
143
  int k,
142
144
  float* outDistancesData,
143
- Index::idx_t* outIndicesData,
145
+ idx_t* outIndicesData,
144
146
  const SearchParameters* params) const;
145
147
 
146
148
  /// Calls searchImpl_ for a single page of GPU-resident data,
147
149
  /// handling paging of the data and copies from the CPU
148
150
  void searchFromCpuPaged_(
149
- int n,
151
+ idx_t n,
150
152
  const float* x,
151
153
  int k,
152
154
  float* outDistancesData,
153
- Index::idx_t* outIndicesData,
155
+ idx_t* outIndicesData,
154
156
  const SearchParameters* params) const;
155
157
 
156
158
  protected: