faiss 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -1,4 +1,10 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
2
8
  #ifndef LEVEL2_NEON_INL_H
3
9
  #define LEVEL2_NEON_INL_H
4
10
 
@@ -1940,9 +1946,15 @@ struct Index2LevelDecoderImpl<
1940
1946
  } // namespace
1941
1947
 
1942
1948
  // Suitable for IVF256,PQ[1]x8
1949
+ // Subtable for IVF256,PQ[1]x10 (such as IVF256,PQ16x10np)
1950
+ // Subtable for IVF256,PQ[1]x12 (such as IVF256,PQ16x12np)
1951
+ // Suitable for IVF256,PQ[1]x16 (such as IVF256,PQ16x16np)
1943
1952
  // Suitable for Residual[1]x8,PQ[2]x8
1944
- // Suitable for IVF[9-16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
1945
- // Suitable for Residual1x[9-16 bit],PQ[1]x8 (such as Residual1x9,PQ8)
1953
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
1954
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x10 (such as IVF1024,PQ16x10np)
1955
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x12 (such as IVF1024,PQ16x12np)
1956
+ // Suitable for IVF[2^9-2^16 bit],PQ[1]x16 (such as IVF1024,PQ16x16np)
1957
+ // Suitable for Residual[1]x[9-16 bit],PQ[2]x[3] (such as Residual2x9,PQ8)
1946
1958
  template <
1947
1959
  intptr_t DIM,
1948
1960
  intptr_t COARSE_SIZE,
@@ -1951,11 +1963,13 @@ template <
1951
1963
  intptr_t FINE_BITS = 8>
1952
1964
  struct Index2LevelDecoder {
1953
1965
  static_assert(
1954
- COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 16,
1955
- "Only 8, 10 or 16 bits are currently supported for COARSE_BITS");
1966
+ COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 12 ||
1967
+ COARSE_BITS == 16,
1968
+ "Only 8, 10, 12 or 16 bits are currently supported for COARSE_BITS");
1956
1969
  static_assert(
1957
- FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
1958
- "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
1970
+ FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
1971
+ FINE_BITS == 16,
1972
+ "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
1959
1973
 
1960
1974
  static constexpr intptr_t dim = DIM;
1961
1975
  static constexpr intptr_t coarseSize = COARSE_SIZE;
@@ -1,3 +1,10 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #pragma once
2
9
 
3
10
  #include <cstddef>
@@ -1,3 +1,10 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #pragma once
2
9
 
3
10
  #include <cstddef>
@@ -1,4 +1,9 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
2
7
 
3
8
  #ifndef PQ_AVX2_INL_H
4
9
  #define PQ_AVX2_INL_H
@@ -1488,12 +1493,14 @@ struct IndexPQDecoderImpl<
1488
1493
 
1489
1494
  // Suitable for PQ[1]x8
1490
1495
  // Suitable for PQ[1]x10
1496
+ // Suitable for PQ[1]x12
1491
1497
  // Suitable for PQ[1]x16
1492
1498
  template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
1493
1499
  struct IndexPQDecoder {
1494
1500
  static_assert(
1495
- FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
1496
- "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
1501
+ FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
1502
+ FINE_BITS == 16,
1503
+ "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
1497
1504
 
1498
1505
  static constexpr intptr_t dim = DIM;
1499
1506
  static constexpr intptr_t fineSize = FINE_SIZE;
@@ -1,4 +1,10 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
2
8
  #ifndef PQ_INL_H
3
9
  #define PQ_INL_H
4
10
 
@@ -1,4 +1,10 @@
1
- // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
2
8
  #ifndef PQ_NEON_INL_H
3
9
  #define PQ_NEON_INL_H
4
10
 
@@ -1322,12 +1328,14 @@ struct IndexPQDecoderImpl<
1322
1328
 
1323
1329
  // Suitable for PQ[1]x8
1324
1330
  // Suitable for PQ[1]x10
1331
+ // Suitable for PQ[1]x12
1325
1332
  // Suitable for PQ[1]x16
1326
1333
  template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
1327
1334
  struct IndexPQDecoder {
1328
1335
  static_assert(
1329
- FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
1330
- "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
1336
+ FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
1337
+ FINE_BITS == 16,
1338
+ "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
1331
1339
 
1332
1340
  static constexpr intptr_t dim = DIM;
1333
1341
  static constexpr intptr_t fineSize = FINE_SIZE;
@@ -11,6 +11,8 @@
11
11
  #include <faiss/IndexPreTransform.h>
12
12
  #include <faiss/IndexReplicas.h>
13
13
  #include <faiss/IndexShards.h>
14
+ #include <faiss/IndexShardsIVF.h>
15
+
14
16
  #include <faiss/gpu/GpuIndex.h>
15
17
  #include <faiss/gpu/GpuIndexFlat.h>
16
18
  #include <faiss/gpu/GpuIndexIVFFlat.h>
@@ -33,7 +35,12 @@ using namespace ::faiss;
33
35
 
34
36
  void GpuParameterSpace::initialize(const Index* index) {
35
37
  if (DC(IndexPreTransform)) {
36
- index = ix->index;
38
+ initialize(ix->index);
39
+ return;
40
+ }
41
+ if (DC(IndexShardsIVF)) {
42
+ ParameterSpace::initialize(index);
43
+ return;
37
44
  }
38
45
  if (DC(IndexReplicas)) {
39
46
  if (ix->count() == 0)
@@ -53,6 +60,14 @@ void GpuParameterSpace::initialize(const Index* index) {
53
60
  break;
54
61
  pr.values.push_back(nprobe);
55
62
  }
63
+
64
+ ParameterSpace ivf_pspace;
65
+ ivf_pspace.initialize(ix->quantizer);
66
+
67
+ for (const ParameterRange& p : ivf_pspace.parameter_ranges) {
68
+ ParameterRange& pr = add_range("quantizer_" + p.name);
69
+ pr.values = p.values;
70
+ }
56
71
  }
57
72
  // not sure we should call the parent initializer
58
73
  }
@@ -72,7 +87,7 @@ void GpuParameterSpace::set_index_parameter(
72
87
  }
73
88
  if (name == "nprobe") {
74
89
  if (DC(GpuIndexIVF)) {
75
- ix->setNumProbes(int(val));
90
+ ix->nprobe = size_t(val);
76
91
  return;
77
92
  }
78
93
  }
@@ -83,6 +98,14 @@ void GpuParameterSpace::set_index_parameter(
83
98
  }
84
99
  }
85
100
 
101
+ if (name.find("quantizer_") == 0) {
102
+ if (DC(GpuIndexIVF)) {
103
+ std::string sub_name = name.substr(strlen("quantizer_"));
104
+ set_index_parameter(ix->quantizer, sub_name, val);
105
+ return;
106
+ }
107
+ }
108
+
86
109
  // maybe normal index parameters apply?
87
110
  ParameterSpace::set_index_parameter(index, name, val);
88
111
  }
@@ -18,6 +18,7 @@
18
18
  #include <faiss/IndexPreTransform.h>
19
19
  #include <faiss/IndexReplicas.h>
20
20
  #include <faiss/IndexScalarQuantizer.h>
21
+ #include <faiss/IndexShardsIVF.h>
21
22
  #include <faiss/MetaIndexes.h>
22
23
  #include <faiss/gpu/GpuIndex.h>
23
24
  #include <faiss/gpu/GpuIndexFlat.h>
@@ -116,7 +117,6 @@ ToGpuCloner::ToGpuCloner(
116
117
  : GpuClonerOptions(options), provider(prov), device(device) {}
117
118
 
118
119
  Index* ToGpuCloner::clone_Index(const Index* index) {
119
- using idx_t = Index::idx_t;
120
120
  if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
121
121
  GpuIndexFlatConfig config;
122
122
  config.device = device;
@@ -227,8 +227,8 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
227
227
  std::vector<int>& devices,
228
228
  const GpuMultipleClonerOptions& options)
229
229
  : GpuMultipleClonerOptions(options) {
230
- FAISS_ASSERT(provider.size() == devices.size());
231
- for (int i = 0; i < provider.size(); i++) {
230
+ FAISS_THROW_IF_NOT(provider.size() == devices.size());
231
+ for (size_t i = 0; i < provider.size(); i++) {
232
232
  sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
233
233
  }
234
234
  }
@@ -241,28 +241,43 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
241
241
  void ToGpuClonerMultiple::copy_ivf_shard(
242
242
  const IndexIVF* index_ivf,
243
243
  IndexIVF* idx2,
244
- long n,
245
- long i) {
244
+ idx_t n,
245
+ idx_t i) {
246
246
  if (shard_type == 2) {
247
- long i0 = i * index_ivf->ntotal / n;
248
- long i1 = (i + 1) * index_ivf->ntotal / n;
247
+ idx_t i0 = i * index_ivf->ntotal / n;
248
+ idx_t i1 = (i + 1) * index_ivf->ntotal / n;
249
249
 
250
250
  if (verbose)
251
251
  printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
252
- index_ivf->copy_subset_to(*idx2, 2, i0, i1);
252
+ index_ivf->copy_subset_to(
253
+ *idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
253
254
  FAISS_ASSERT(idx2->ntotal == i1 - i0);
254
255
  } else if (shard_type == 1) {
255
256
  if (verbose)
256
257
  printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
257
- index_ivf->copy_subset_to(*idx2, 1, n, i);
258
+ index_ivf->copy_subset_to(
259
+ *idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
260
+ } else if (shard_type == 4) {
261
+ idx_t i0 = i * index_ivf->nlist / n;
262
+ idx_t i1 = (i + 1) * index_ivf->nlist / n;
263
+ if (verbose) {
264
+ printf("IndexShards %ld/%ld select lists %d:%d\n",
265
+ i,
266
+ n,
267
+ int(i0),
268
+ int(i1));
269
+ }
270
+ index_ivf->copy_subset_to(
271
+ *idx2, InvertedLists::SUBSET_TYPE_INVLIST, i0, i1);
258
272
  } else {
259
273
  FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
260
274
  }
261
275
  }
262
276
 
263
277
  Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
264
- long n = sub_cloners.size();
278
+ idx_t n = sub_cloners.size();
265
279
 
280
+ auto index_ivf = dynamic_cast<const faiss::IndexIVF*>(index);
266
281
  auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
267
282
  auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
268
283
  auto index_ivfsq =
@@ -274,16 +289,36 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
274
289
  "IndexIVFFlat, IndexIVFScalarQuantizer, "
275
290
  "IndexFlat and IndexIVFPQ");
276
291
 
292
+ // decide what coarse quantizer the sub-indexes are going to have
293
+ const Index* quantizer = nullptr;
294
+ std::unique_ptr<Index> new_quantizer;
295
+ if (index_ivf) {
296
+ quantizer = index_ivf->quantizer;
297
+ if (common_ivf_quantizer &&
298
+ !dynamic_cast<const IndexFlat*>(quantizer)) {
299
+ // then we flatten the coarse quantizer so that everything remains
300
+ // on GPU
301
+ new_quantizer.reset(
302
+ new IndexFlat(quantizer->d, quantizer->metric_type));
303
+ std::vector<float> centroids(quantizer->d * quantizer->ntotal);
304
+ quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
305
+ new_quantizer->add(quantizer->ntotal, centroids.data());
306
+ quantizer = new_quantizer.get();
307
+ }
308
+ }
309
+
277
310
  std::vector<faiss::Index*> shards(n);
278
311
 
279
- for (long i = 0; i < n; i++) {
312
+ for (idx_t i = 0; i < n; i++) {
280
313
  // make a shallow copy
281
- if (reserveVecs)
314
+ if (reserveVecs) {
282
315
  sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
283
-
316
+ }
317
+ // note: const_casts here are harmless because the indexes build here
318
+ // are short-lived, translated immediately to GPU indexes.
284
319
  if (index_ivfpq) {
285
320
  faiss::IndexIVFPQ idx2(
286
- index_ivfpq->quantizer,
321
+ const_cast<Index*>(quantizer),
287
322
  index_ivfpq->d,
288
323
  index_ivfpq->nlist,
289
324
  index_ivfpq->code_size,
@@ -297,7 +332,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
297
332
  shards[i] = sub_cloners[i].clone_Index(&idx2);
298
333
  } else if (index_ivfflat) {
299
334
  faiss::IndexIVFFlat idx2(
300
- index_ivfflat->quantizer,
335
+ const_cast<Index*>(quantizer),
301
336
  index->d,
302
337
  index_ivfflat->nlist,
303
338
  index_ivfflat->metric_type);
@@ -307,7 +342,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
307
342
  shards[i] = sub_cloners[i].clone_Index(&idx2);
308
343
  } else if (index_ivfsq) {
309
344
  faiss::IndexIVFScalarQuantizer idx2(
310
- index_ivfsq->quantizer,
345
+ const_cast<Index*>(quantizer),
311
346
  index->d,
312
347
  index_ivfsq->nlist,
313
348
  index_ivfsq->sq.qtype,
@@ -323,40 +358,52 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
323
358
  faiss::IndexFlat idx2(index->d, index->metric_type);
324
359
  shards[i] = sub_cloners[i].clone_Index(&idx2);
325
360
  if (index->ntotal > 0) {
326
- long i0 = index->ntotal * i / n;
327
- long i1 = index->ntotal * (i + 1) / n;
361
+ idx_t i0 = index->ntotal * i / n;
362
+ idx_t i1 = index->ntotal * (i + 1) / n;
328
363
  shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
329
364
  }
330
365
  }
331
366
  }
332
367
 
333
368
  bool successive_ids = index_flat != nullptr;
334
- faiss::IndexShards* res =
335
- new faiss::IndexShards(index->d, true, successive_ids);
369
+ faiss::IndexShards* res;
370
+ if (common_ivf_quantizer && index_ivf) {
371
+ this->shard = false;
372
+ Index* common_quantizer = clone_Index(index_ivf->quantizer);
373
+ this->shard = true;
374
+ IndexShardsIVF* idx = new faiss::IndexShardsIVF(
375
+ common_quantizer, index_ivf->nlist, true, false);
376
+ idx->own_fields = true;
377
+ idx->own_indices = true;
378
+ res = idx;
379
+ } else {
380
+ res = new faiss::IndexShards(index->d, true, successive_ids);
381
+ res->own_indices = true;
382
+ }
336
383
 
337
384
  for (int i = 0; i < n; i++) {
338
385
  res->add_shard(shards[i]);
339
386
  }
340
- res->own_fields = true;
341
387
  FAISS_ASSERT(index->ntotal == res->ntotal);
342
388
  return res;
343
389
  }
344
390
 
345
391
  Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
346
- long n = sub_cloners.size();
347
- if (n == 1)
392
+ idx_t n = sub_cloners.size();
393
+ if (n == 1) {
348
394
  return sub_cloners[0].clone_Index(index);
395
+ }
349
396
 
350
397
  if (dynamic_cast<const IndexFlat*>(index) ||
351
- dynamic_cast<const faiss::IndexIVFFlat*>(index) ||
352
- dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index) ||
353
- dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
398
+ dynamic_cast<const IndexIVFFlat*>(index) ||
399
+ dynamic_cast<const IndexIVFScalarQuantizer*>(index) ||
400
+ dynamic_cast<const IndexIVFPQ*>(index)) {
354
401
  if (!shard) {
355
402
  IndexReplicas* res = new IndexReplicas();
356
403
  for (auto& sub_cloner : sub_cloners) {
357
404
  res->addIndex(sub_cloner.clone_Index(index));
358
405
  }
359
- res->own_fields = true;
406
+ res->own_indices = true;
360
407
  return res;
361
408
  } else {
362
409
  return clone_Index_to_shards(index);
@@ -373,8 +420,8 @@ Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
373
420
  for (int m = 0; m < pq.M; m++) {
374
421
  // which GPU(s) will be assigned to this sub-quantizer
375
422
 
376
- long i0 = m * n / pq.M;
377
- long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
423
+ idx_t i0 = m * n / pq.M;
424
+ idx_t i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
378
425
  std::vector<ToGpuCloner> sub_cloners_2;
379
426
  sub_cloners_2.insert(
380
427
  sub_cloners_2.begin(),
@@ -55,8 +55,8 @@ struct ToGpuClonerMultiple : faiss::Cloner, GpuMultipleClonerOptions {
55
55
  void copy_ivf_shard(
56
56
  const IndexIVF* index_ivf,
57
57
  IndexIVF* idx2,
58
- long n,
59
- long i);
58
+ idx_t n,
59
+ idx_t i);
60
60
 
61
61
  Index* clone_Index_to_shards(const Index* index);
62
62
 
@@ -14,41 +14,42 @@ namespace gpu {
14
14
 
15
15
  /// set some options on how to copy to GPU
16
16
  struct GpuClonerOptions {
17
- GpuClonerOptions();
18
-
19
17
  /// how should indices be stored on index types that support indices
20
18
  /// (anything but GpuIndexFlat*)?
21
- IndicesOptions indicesOptions;
19
+ IndicesOptions indicesOptions = INDICES_64_BIT;
22
20
 
23
21
  /// is the coarse quantizer in float16?
24
- bool useFloat16CoarseQuantizer;
22
+ bool useFloat16CoarseQuantizer = false;
25
23
 
26
24
  /// for GpuIndexIVFFlat, is storage in float16?
27
25
  /// for GpuIndexIVFPQ, are intermediate calculations in float16?
28
- bool useFloat16;
26
+ bool useFloat16 = false;
29
27
 
30
28
  /// use precomputed tables?
31
- bool usePrecomputed;
29
+ bool usePrecomputed = false;
32
30
 
33
31
  /// reserve vectors in the invfiles?
34
- long reserveVecs;
32
+ long reserveVecs = 0;
35
33
 
36
34
  /// For GpuIndexFlat, store data in transposed layout?
37
- bool storeTransposed;
35
+ bool storeTransposed = false;
38
36
 
39
37
  /// Set verbose options on the index
40
- bool verbose;
38
+ bool verbose = false;
41
39
  };
42
40
 
43
41
  struct GpuMultipleClonerOptions : public GpuClonerOptions {
44
- GpuMultipleClonerOptions();
45
-
46
42
  /// Whether to shard the index across GPUs, versus replication
47
43
  /// across GPUs
48
- bool shard;
44
+ bool shard = false;
49
45
 
50
46
  /// IndexIVF::copy_subset_to subset type
51
- int shard_type;
47
+ int shard_type = 1;
48
+
49
+ /// set to true if an IndexIVF is to be dispatched to multiple GPUs with a
50
+ /// single common IVF quantizer, ie. only the inverted lists are sharded on
51
+ /// the sub-indexes (uses an IndexShardsIVF)
52
+ bool common_ivf_quantizer = false;
52
53
  };
53
54
 
54
55
  } // namespace gpu
@@ -45,7 +45,8 @@ struct GpuDistanceParams {
45
45
  outDistances(nullptr),
46
46
  ignoreOutDistances(false),
47
47
  outIndicesType(IndicesDataType::I64),
48
- outIndices(nullptr) {}
48
+ outIndices(nullptr),
49
+ device(-1) {}
49
50
 
50
51
  //
51
52
  // Search parameters
@@ -76,7 +77,7 @@ struct GpuDistanceParams {
76
77
  const void* vectors;
77
78
  DistanceDataType vectorType;
78
79
  bool vectorsRowMajor;
79
- int numVectors;
80
+ idx_t numVectors;
80
81
 
81
82
  /// Precomputed L2 norms for each vector in `vectors`, which can be
82
83
  /// optionally provided in advance to speed computation for METRIC_L2
@@ -93,7 +94,7 @@ struct GpuDistanceParams {
93
94
  const void* queries;
94
95
  DistanceDataType queryType;
95
96
  bool queriesRowMajor;
96
- int numQueries;
97
+ idx_t numQueries;
97
98
 
98
99
  //
99
100
  // Output results
@@ -112,6 +113,17 @@ struct GpuDistanceParams {
112
113
  /// innermost (row major). Not used if k == -1 (all pairwise distances)
113
114
  IndicesDataType outIndicesType;
114
115
  void* outIndices;
116
+
117
+ //
118
+ // Execution information
119
+ //
120
+
121
+ /// On which GPU device should the search run?
122
+ /// -1 indicates that the current CUDA thread-local device
123
+ /// (via cudaGetDevice/cudaSetDevice) is used
124
+ /// Otherwise, an integer 0 <= device < numDevices indicates the device for
125
+ /// execution
126
+ int device;
115
127
  };
116
128
 
117
129
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
@@ -137,13 +149,13 @@ void bruteForceKnn(
137
149
  // dims x numVectors, with numVectors innermost
138
150
  const float* vectors,
139
151
  bool vectorsRowMajor,
140
- int numVectors,
152
+ idx_t numVectors,
141
153
  // If queriesRowMajor is true, this is
142
154
  // numQueries x dims, with dims innermost; otherwise,
143
155
  // dims x numQueries, with numQueries innermost
144
156
  const float* queries,
145
157
  bool queriesRowMajor,
146
- int numQueries,
158
+ idx_t numQueries,
147
159
  int dims,
148
160
  int k,
149
161
  // A region of memory size numQueries x k, with k
@@ -151,7 +163,7 @@ void bruteForceKnn(
151
163
  float* outDistances,
152
164
  // A region of memory size numQueries x k, with k
153
165
  // innermost (row major)
154
- Index::idx_t* outIndices);
166
+ idx_t* outIndices);
155
167
 
156
168
  } // namespace gpu
157
169
  } // namespace faiss
@@ -51,30 +51,31 @@ class GpuIndex : public faiss::Index {
51
51
  /// `x` can be resident on the CPU or any GPU; copies are performed
52
52
  /// as needed
53
53
  /// Handles paged adds if the add set is too large; calls addInternal_
54
- void add(Index::idx_t, const float* x) override;
54
+ void add(idx_t, const float* x) override;
55
55
 
56
56
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
57
57
  /// performed as needed
58
58
  /// Handles paged adds if the add set is too large; calls addInternal_
59
- void add_with_ids(Index::idx_t n, const float* x, const Index::idx_t* ids)
60
- override;
59
+ void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
61
60
 
62
61
  /// `x` and `labels` can be resident on the CPU or any GPU; copies are
63
62
  /// performed as needed
64
63
  void assign(
65
- Index::idx_t n,
64
+ idx_t n,
66
65
  const float* x,
67
- Index::idx_t* labels,
68
- Index::idx_t k = 1) const override;
66
+ idx_t* labels,
67
+ // faiss::Index has idx_t for k
68
+ idx_t k = 1) const override;
69
69
 
70
70
  /// `x`, `distances` and `labels` can be resident on the CPU or any
71
71
  /// GPU; copies are performed as needed
72
72
  void search(
73
- Index::idx_t n,
73
+ idx_t n,
74
74
  const float* x,
75
- Index::idx_t k,
75
+ // faiss::Index has idx_t for k
76
+ idx_t k,
76
77
  float* distances,
77
- Index::idx_t* labels,
78
+ idx_t* labels,
78
79
  const SearchParameters* params = nullptr) const override;
79
80
 
80
81
  /// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
@@ -82,6 +83,7 @@ class GpuIndex : public faiss::Index {
82
83
  void search_and_reconstruct(
83
84
  idx_t n,
84
85
  const float* x,
86
+ // faiss::Index has idx_t for k
85
87
  idx_t k,
86
88
  float* distances,
87
89
  idx_t* labels,
@@ -90,16 +92,16 @@ class GpuIndex : public faiss::Index {
90
92
 
91
93
  /// Overridden to force GPU indices to provide their own GPU-friendly
92
94
  /// implementation
93
- void compute_residual(const float* x, float* residual, Index::idx_t key)
95
+ void compute_residual(const float* x, float* residual, idx_t key)
94
96
  const override;
95
97
 
96
98
  /// Overridden to force GPU indices to provide their own GPU-friendly
97
99
  /// implementation
98
100
  void compute_residual_n(
99
- Index::idx_t n,
101
+ idx_t n,
100
102
  const float* xs,
101
103
  float* residuals,
102
- const Index::idx_t* keys) const override;
104
+ const idx_t* keys) const override;
103
105
 
104
106
  protected:
105
107
  /// Copy what we need from the CPU equivalent
@@ -114,43 +116,43 @@ class GpuIndex : public faiss::Index {
114
116
 
115
117
  /// Overridden to actually perform the add
116
118
  /// All data is guaranteed to be resident on our device
117
- virtual void addImpl_(int n, const float* x, const Index::idx_t* ids) = 0;
119
+ virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
118
120
 
119
121
  /// Overridden to actually perform the search
120
122
  /// All data is guaranteed to be resident on our device
121
123
  virtual void searchImpl_(
122
- int n,
124
+ idx_t n,
123
125
  const float* x,
124
126
  int k,
125
127
  float* distances,
126
- Index::idx_t* labels,
128
+ idx_t* labels,
127
129
  const SearchParameters* params) const = 0;
128
130
 
129
131
  private:
130
132
  /// Handles paged adds if the add set is too large, passes to
131
133
  /// addImpl_ to actually perform the add for the current page
132
- void addPaged_(int n, const float* x, const Index::idx_t* ids);
134
+ void addPaged_(idx_t n, const float* x, const idx_t* ids);
133
135
 
134
136
  /// Calls addImpl_ for a single page of GPU-resident data
135
- void addPage_(int n, const float* x, const Index::idx_t* ids);
137
+ void addPage_(idx_t n, const float* x, const idx_t* ids);
136
138
 
137
139
  /// Calls searchImpl_ for a single page of GPU-resident data
138
140
  void searchNonPaged_(
139
- int n,
141
+ idx_t n,
140
142
  const float* x,
141
143
  int k,
142
144
  float* outDistancesData,
143
- Index::idx_t* outIndicesData,
145
+ idx_t* outIndicesData,
144
146
  const SearchParameters* params) const;
145
147
 
146
148
  /// Calls searchImpl_ for a single page of GPU-resident data,
147
149
  /// handling paging of the data and copies from the CPU
148
150
  void searchFromCpuPaged_(
149
- int n,
151
+ idx_t n,
150
152
  const float* x,
151
153
  int k,
152
154
  float* outDistancesData,
153
- Index::idx_t* outIndicesData,
155
+ idx_t* outIndicesData,
154
156
  const SearchParameters* params) const;
155
157
 
156
158
  protected: