faiss 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -0,0 +1,282 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+ /*
8
+ * Copyright (c) 2024, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
22
+
23
+ #pragma once
24
+
25
+ #include <faiss/IndexIVF.h>
26
+ #include <faiss/gpu/GpuIndex.h>
27
+ #include <faiss/gpu/GpuIndexIVFPQ.h>
28
+
29
+ namespace faiss {
30
+ struct IndexHNSWCagra;
31
+ }
32
+
33
+ namespace faiss {
34
+ namespace gpu {
35
+
36
+ class RaftCagra;
37
+
38
+ enum class graph_build_algo {
39
+ /// Use IVF-PQ to build all-neighbors knn graph
40
+ IVF_PQ,
41
+ /// Experimental, use NN-Descent to build all-neighbors knn graph
42
+ NN_DESCENT
43
+ };
44
+
45
+ /// A type for specifying how PQ codebooks are created.
46
+ enum class codebook_gen { // NOLINT
47
+ PER_SUBSPACE = 0, // NOLINT
48
+ PER_CLUSTER = 1, // NOLINT
49
+ };
50
+
51
+ struct IVFPQBuildCagraConfig {
52
+ ///
53
+ /// The number of inverted lists (clusters)
54
+ ///
55
+ /// Hint: the number of vectors per cluster (`n_rows/n_lists`) should be
56
+ /// approximately 1,000 to 10,000.
57
+
58
+ uint32_t n_lists = 1024;
59
+ /// The number of iterations searching for kmeans centers (index building).
60
+ uint32_t kmeans_n_iters = 20;
61
+ /// The fraction of data to use during iterative kmeans building.
62
+ double kmeans_trainset_fraction = 0.5;
63
+ ///
64
+ /// The bit length of the vector element after compression by PQ.
65
+ ///
66
+ /// Possible values: [4, 5, 6, 7, 8].
67
+ ///
68
+ /// Hint: the smaller the 'pq_bits', the smaller the index size and the
69
+ /// better the search performance, but the lower the recall.
70
+
71
+ uint32_t pq_bits = 8;
72
+ ///
73
+ /// The dimensionality of the vector after compression by PQ. When zero, an
74
+ /// optimal value is selected using a heuristic.
75
+ ///
76
+ /// NB: `pq_dim /// pq_bits` must be a multiple of 8.
77
+ ///
78
+ /// Hint: a smaller 'pq_dim' results in a smaller index size and better
79
+ /// search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be
80
+ /// set to any number, but multiple of 8 are desirable for good performance.
81
+ /// If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good
82
+ /// performance, it is desirable that 'pq_dim' is a multiple of 32. Ideally,
83
+ /// 'pq_dim' should be also a divisor of the dataset dim.
84
+
85
+ uint32_t pq_dim = 0;
86
+ /// How PQ codebooks are created.
87
+ codebook_gen codebook_kind = codebook_gen::PER_SUBSPACE;
88
+ ///
89
+ /// Apply a random rotation matrix on the input data and queries even if
90
+ /// `dim % pq_dim == 0`.
91
+ ///
92
+ /// Note: if `dim` is not multiple of `pq_dim`, a random rotation is always
93
+ /// applied to the input data and queries to transform the working space
94
+ /// from `dim` to `rot_dim`, which may be slightly larger than the original
95
+ /// space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
96
+ /// However, this transform is not necessary when `dim` is multiple of
97
+ /// `pq_dim`
98
+ /// (`dim == rot_dim`, hence no need in adding "extra" data columns /
99
+ /// features).
100
+ ///
101
+ /// By default, if `dim == rot_dim`, the rotation transform is initialized
102
+ /// with the identity matrix. When `force_random_rotation == true`, a random
103
+ /// orthogonal transform matrix is generated regardless of the values of
104
+ /// `dim` and `pq_dim`.
105
+
106
+ bool force_random_rotation = false;
107
+ ///
108
+ /// By default, the algorithm allocates more space than necessary for
109
+ /// individual clusters
110
+ /// (`list_data`). This allows to amortize the cost of memory allocation and
111
+ /// reduce the number of data copies during repeated calls to `extend`
112
+ /// (extending the database).
113
+ ///
114
+ /// The alternative is the conservative allocation behavior; when enabled,
115
+ /// the algorithm always allocates the minimum amount of memory required to
116
+ /// store the given number of records. Set this flag to `true` if you prefer
117
+ /// to use as little GPU memory for the database as possible.
118
+
119
+ bool conservative_memory_allocation = false;
120
+ };
121
+
122
+ struct IVFPQSearchCagraConfig {
123
+ /// The number of clusters to search.
124
+ uint32_t n_probes = 20;
125
+ ///
126
+ /// Data type of look up table to be created dynamically at search time.
127
+ ///
128
+ /// Possible values: [CUDA_R_32F, CUDA_R_16F, CUDA_R_8U]
129
+ ///
130
+ /// The use of low-precision types reduces the amount of shared memory
131
+ /// required at search time, so fast shared memory kernels can be used even
132
+ /// for datasets with large dimansionality. Note that the recall is slightly
133
+ /// degraded when low-precision type is selected.
134
+
135
+ cudaDataType_t lut_dtype = CUDA_R_32F;
136
+ ///
137
+ /// Storage data type for distance/similarity computed at search time.
138
+ ///
139
+ /// Possible values: [CUDA_R_16F, CUDA_R_32F]
140
+ ///
141
+ /// If the performance limiter at search time is device memory access,
142
+ /// selecting FP16 will improve performance slightly.
143
+
144
+ cudaDataType_t internal_distance_dtype = CUDA_R_32F;
145
+ ///
146
+ /// Preferred fraction of SM's unified memory / L1 cache to be used as
147
+ /// shared memory.
148
+ ///
149
+ /// Possible values: [0.0 - 1.0] as a fraction of the
150
+ /// `sharedMemPerMultiprocessor`.
151
+ ///
152
+ /// One wants to increase the carveout to make sure a good GPU occupancy for
153
+ /// the main search kernel, but not to keep it too high to leave some memory
154
+ /// to be used as L1 cache. Note, this value is interpreted only as a hint.
155
+ /// Moreover, a GPU usually allows only a fixed set of cache configurations,
156
+ /// so the provided value is rounded up to the nearest configuration. Refer
157
+ /// to the NVIDIA tuning guide for the target GPU architecture.
158
+ ///
159
+ /// Note, this is a low-level tuning parameter that can have drastic
160
+ /// negative effects on the search performance if tweaked incorrectly.
161
+
162
+ double preferred_shmem_carveout = 1.0;
163
+ };
164
+
165
+ struct GpuIndexCagraConfig : public GpuIndexConfig {
166
+ /// Degree of input graph for pruning.
167
+ size_t intermediate_graph_degree = 128;
168
+ /// Degree of output graph.
169
+ size_t graph_degree = 64;
170
+ /// ANN algorithm to build knn graph.
171
+ graph_build_algo build_algo = graph_build_algo::IVF_PQ;
172
+ /// Number of Iterations to run if building with NN_DESCENT
173
+ size_t nn_descent_niter = 20;
174
+
175
+ IVFPQBuildCagraConfig* ivf_pq_params = nullptr;
176
+ IVFPQSearchCagraConfig* ivf_pq_search_params = nullptr;
177
+ };
178
+
179
+ enum class search_algo {
180
+ /// For large batch sizes.
181
+ SINGLE_CTA,
182
+ /// For small batch sizes.
183
+ MULTI_CTA,
184
+ MULTI_KERNEL,
185
+ AUTO
186
+ };
187
+
188
+ enum class hash_mode { HASH, SMALL, AUTO };
189
+
190
+ struct SearchParametersCagra : SearchParameters {
191
+ /// Maximum number of queries to search at the same time (batch size). Auto
192
+ /// select when 0.
193
+ size_t max_queries = 0;
194
+
195
+ /// Number of intermediate search results retained during the search.
196
+ ///
197
+ /// This is the main knob to adjust trade off between accuracy and search
198
+ /// speed. Higher values improve the search accuracy.
199
+
200
+ size_t itopk_size = 64;
201
+
202
+ /// Upper limit of search iterations. Auto select when 0.
203
+ size_t max_iterations = 0;
204
+
205
+ // In the following we list additional search parameters for fine tuning.
206
+ // Reasonable default values are automatically chosen.
207
+
208
+ /// Which search implementation to use.
209
+ search_algo algo = search_algo::AUTO;
210
+
211
+ /// Number of threads used to calculate a single distance. 4, 8, 16, or 32.
212
+
213
+ size_t team_size = 0;
214
+
215
+ /// Number of graph nodes to select as the starting point for the search in
216
+ /// each iteration. aka search width?
217
+ size_t search_width = 1;
218
+ /// Lower limit of search iterations.
219
+ size_t min_iterations = 0;
220
+
221
+ /// Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.
222
+ size_t thread_block_size = 0;
223
+ /// Hashmap type. Auto selection when AUTO.
224
+ hash_mode hashmap_mode = hash_mode::AUTO;
225
+ /// Lower limit of hashmap bit length. More than 8.
226
+ size_t hashmap_min_bitlen = 0;
227
+ /// Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
228
+ float hashmap_max_fill_rate = 0.5;
229
+
230
+ /// Number of iterations of initial random seed node selection. 1 or more.
231
+
232
+ uint32_t num_random_samplings = 1;
233
+ /// Bit mask used for initial random seed node selection.
234
+ uint64_t seed = 0x128394;
235
+ };
236
+
237
+ struct GpuIndexCagra : public GpuIndex {
238
+ public:
239
+ GpuIndexCagra(
240
+ GpuResourcesProvider* provider,
241
+ int dims,
242
+ faiss::MetricType metric = faiss::METRIC_L2,
243
+ GpuIndexCagraConfig config = GpuIndexCagraConfig());
244
+
245
+ /// Trains CAGRA based on the given vector data
246
+ void train(idx_t n, const float* x) override;
247
+
248
+ /// Initialize ourselves from the given CPU index; will overwrite
249
+ /// all data in ourselves
250
+ void copyFrom(const faiss::IndexHNSWCagra* index);
251
+
252
+ /// Copy ourselves to the given CPU index; will overwrite all data
253
+ /// in the index instance
254
+ void copyTo(faiss::IndexHNSWCagra* index) const;
255
+
256
+ void reset() override;
257
+
258
+ std::vector<idx_t> get_knngraph() const;
259
+
260
+ protected:
261
+ bool addImplRequiresIDs_() const override;
262
+
263
+ void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
264
+
265
+ /// Called from GpuIndex for search
266
+ void searchImpl_(
267
+ idx_t n,
268
+ const float* x,
269
+ int k,
270
+ float* distances,
271
+ idx_t* labels,
272
+ const SearchParameters* search_params) const override;
273
+
274
+ /// Our configuration options
275
+ const GpuIndexCagraConfig cagraConfig_;
276
+
277
+ /// Instance that we own; contains the inverted lists
278
+ std::shared_ptr<RaftCagra> index_;
279
+ };
280
+
281
+ } // namespace gpu
282
+ } // namespace faiss
@@ -24,15 +24,13 @@ namespace gpu {
24
24
  class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
- inline GpuIndexFlatConfig() : useFloat16(false) {}
28
-
29
27
  /// Whether or not data is stored as float16
30
- bool useFloat16;
28
+ bool ALIGNED(8) useFloat16 = false;
31
29
 
32
30
  /// Deprecated: no longer used
33
31
  /// Previously used to indicate whether internal storage of vectors is
34
32
  /// transposed
35
- bool storeTransposed;
33
+ bool storeTransposed = false;
36
34
  };
37
35
 
38
36
  /// Wrapper around the GPU implementation that looks like
@@ -115,6 +113,8 @@ class GpuIndexFlat : public GpuIndex {
115
113
  }
116
114
 
117
115
  protected:
116
+ void resetIndex_(int dims);
117
+
118
118
  /// Flat index does not require IDs as there is no storage available for
119
119
  /// them
120
120
  bool addImplRequiresIDs_() const override;
@@ -21,13 +21,17 @@ class GpuIndexFlat;
21
21
  class IVFBase;
22
22
 
23
23
  struct GpuIndexIVFConfig : public GpuIndexConfig {
24
- inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
25
-
26
24
  /// Index storage options for the GPU
27
- IndicesOptions indicesOptions;
25
+ IndicesOptions indicesOptions = INDICES_64_BIT;
28
26
 
29
27
  /// Configuration for the coarse quantizer object
30
28
  GpuIndexFlatConfig flatConfig;
29
+
30
+ /// This flag controls the CPU fallback logic for coarse quantizer
31
+ /// component of the index. When set to false (default), the cloner will
32
+ /// throw an exception for indices not implemented on GPU. When set to
33
+ /// true, it will fallback to a CPU implementation.
34
+ bool allowCpuCoarseQuantizer = false;
31
35
  };
32
36
 
33
37
  /// Base class of all GPU IVF index types. This (for now) deliberately does not
@@ -75,10 +79,10 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
75
79
  virtual void updateQuantizer() = 0;
76
80
 
77
81
  /// Returns the number of inverted lists we're managing
78
- idx_t getNumLists() const;
82
+ virtual idx_t getNumLists() const;
79
83
 
80
84
  /// Returns the number of vectors present in a particular inverted list
81
- idx_t getListLength(idx_t listId) const;
85
+ virtual idx_t getListLength(idx_t listId) const;
82
86
 
83
87
  /// Return the encoded vector data contained in a particular inverted list,
84
88
  /// for debugging purposes.
@@ -86,12 +90,13 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
86
90
  /// GPU-side representation.
87
91
  /// Otherwise, it is converted to the CPU format.
88
92
  /// compliant format, while the native GPU format may differ.
89
- std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
90
- const;
93
+ virtual std::vector<uint8_t> getListVectorData(
94
+ idx_t listId,
95
+ bool gpuFormat = false) const;
91
96
 
92
97
  /// Return the vector indices contained in a particular inverted list, for
93
98
  /// debugging purposes.
94
- std::vector<idx_t> getListIndices(idx_t listId) const;
99
+ virtual std::vector<idx_t> getListIndices(idx_t listId) const;
95
100
 
96
101
  void search_preassigned(
97
102
  idx_t n,
@@ -123,7 +128,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
123
128
  int getCurrentNProbe_(const SearchParameters* params) const;
124
129
  void verifyIVFSettings_() const;
125
130
  bool addImplRequiresIDs_() const override;
126
- void trainQuantizer_(idx_t n, const float* x);
131
+ virtual void trainQuantizer_(idx_t n, const float* x);
127
132
 
128
133
  /// Called from GpuIndex for add/add_with_ids
129
134
  void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
@@ -8,6 +8,8 @@
8
8
  #pragma once
9
9
 
10
10
  #include <faiss/gpu/GpuIndexIVF.h>
11
+ #include <faiss/impl/ScalarQuantizer.h>
12
+
11
13
  #include <memory>
12
14
 
13
15
  namespace faiss {
@@ -21,11 +23,9 @@ class IVFFlat;
21
23
  class GpuIndexFlat;
22
24
 
23
25
  struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
24
- inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
25
-
26
26
  /// Use the alternative memory layout for the IVF lists
27
27
  /// (currently the default)
28
- bool interleavedLayout;
28
+ bool interleavedLayout = true;
29
29
  };
30
30
 
31
31
  /// Wrapper around the GPU implementation that looks like
@@ -87,6 +87,23 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
87
87
  /// Trains the coarse quantizer based on the given vector data
88
88
  void train(idx_t n, const float* x) override;
89
89
 
90
+ void reconstruct_n(idx_t i0, idx_t n, float* out) const override;
91
+
92
+ protected:
93
+ /// Initialize appropriate index
94
+ void setIndex_(
95
+ GpuResources* resources,
96
+ int dim,
97
+ int nlist,
98
+ faiss::MetricType metric,
99
+ float metricArg,
100
+ bool useResidual,
101
+ /// Optional ScalarQuantizer
102
+ faiss::ScalarQuantizer* scalarQ,
103
+ bool interleavedLayout,
104
+ IndicesOptions indicesOptions,
105
+ MemorySpace space);
106
+
90
107
  protected:
91
108
  /// Our configuration options
92
109
  const GpuIndexIVFFlatConfig ivfFlatConfig_;
@@ -23,24 +23,19 @@ class GpuIndexFlat;
23
23
  class IVFPQ;
24
24
 
25
25
  struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
26
- inline GpuIndexIVFPQConfig()
27
- : useFloat16LookupTables(false),
28
- usePrecomputedTables(false),
29
- interleavedLayout(false),
30
- useMMCodeDistance(false) {}
31
-
32
26
  /// Whether or not float16 residual distance tables are used in the
33
27
  /// list scanning kernels. When subQuantizers * 2^bitsPerCode >
34
28
  /// 16384, this is required.
35
- bool useFloat16LookupTables;
29
+ bool useFloat16LookupTables = false;
36
30
 
37
31
  /// Whether or not we enable the precomputed table option for
38
32
  /// search, which can substantially increase the memory requirement.
39
- bool usePrecomputedTables;
33
+ bool usePrecomputedTables = false;
40
34
 
41
35
  /// Use the alternative memory layout for the IVF lists
42
- /// WARNING: this is a feature under development, do not use!
43
- bool interleavedLayout;
36
+ /// WARNING: this is a feature under development, and is only supported with
37
+ /// RAFT enabled for the index. Do not use if RAFT is not enabled.
38
+ bool interleavedLayout = false;
44
39
 
45
40
  /// Use GEMM-backed computation of PQ code distances for the no precomputed
46
41
  /// table version of IVFPQ.
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
50
45
  /// Note that MM code distance is enabled automatically if one uses a number
51
46
  /// of dimensions per sub-quantizer that is not natively specialized (an odd
52
47
  /// number like 7 or so).
53
- bool useMMCodeDistance;
48
+ bool useMMCodeDistance = false;
54
49
  };
55
50
 
56
51
  /// IVFPQ index for the GPU
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
139
134
  ProductQuantizer pq;
140
135
 
141
136
  protected:
137
+ /// Initialize appropriate index
138
+ void setIndex_(
139
+ GpuResources* resources,
140
+ int dim,
141
+ idx_t nlist,
142
+ faiss::MetricType metric,
143
+ float metricArg,
144
+ int numSubQuantizers,
145
+ int bitsPerSubQuantizer,
146
+ bool useFloat16LookupTables,
147
+ bool useMMCodeDistance,
148
+ bool interleavedLayout,
149
+ float* pqCentroidData,
150
+ IndicesOptions indicesOptions,
151
+ MemorySpace space);
152
+
142
153
  /// Throws errors if configuration settings are improper
143
154
  void verifyPQSettings_() const;
144
155
 
@@ -18,11 +18,9 @@ class IVFFlat;
18
18
  class GpuIndexFlat;
19
19
 
20
20
  struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21
- inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
22
-
23
21
  /// Use the alternative memory layout for the IVF lists
24
22
  /// (currently the default)
25
- bool interleavedLayout;
23
+ bool interleavedLayout = true;
26
24
  };
27
25
 
28
26
  /// Wrapper around the GPU implementation that looks like
@@ -4,6 +4,21 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #include <faiss/gpu/GpuResources.h>
9
24
  #include <faiss/gpu/utils/DeviceUtils.h>
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
143
158
  // GpuResources
144
159
  //
145
160
 
146
- GpuResources::~GpuResources() {}
161
+ GpuResources::~GpuResources() = default;
147
162
 
148
163
  cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
149
164
  return getBlasHandle(getCurrentDevice());
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
153
168
  return getDefaultStream(getCurrentDevice());
154
169
  }
155
170
 
171
+ #if defined USE_NVIDIA_RAFT
172
+ raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
173
+ return getRaftHandle(getCurrentDevice());
174
+ }
175
+ #endif
176
+
156
177
  std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
157
178
  return getAlternateStreams(getCurrentDevice());
158
179
  }
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
182
203
  // GpuResourcesProvider
183
204
  //
184
205
 
185
- GpuResourcesProvider::~GpuResourcesProvider() {}
206
+ GpuResourcesProvider::~GpuResourcesProvider() = default;
186
207
 
187
208
  //
188
209
  // GpuResourcesProviderFromResourceInstance
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
192
213
  std::shared_ptr<GpuResources> p)
193
214
  : res_(p) {}
194
215
 
195
- GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
216
+ GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
196
217
 
197
218
  std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
198
219
  return res_;
@@ -4,16 +4,37 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
10
25
  #include <cublas_v2.h>
11
26
  #include <cuda_runtime.h>
12
27
  #include <faiss/impl/FaissAssert.h>
28
+
13
29
  #include <memory>
14
30
  #include <utility>
15
31
  #include <vector>
16
32
 
33
+ #if defined USE_NVIDIA_RAFT
34
+ #include <raft/core/device_resources.hpp>
35
+ #include <rmm/mr/device/device_memory_resource.hpp>
36
+ #endif
37
+
17
38
  namespace faiss {
18
39
  namespace gpu {
19
40
 
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
82
103
 
83
104
  /// Information on what/where an allocation is
84
105
  struct AllocInfo {
85
- inline AllocInfo()
86
- : type(AllocType::Other),
87
- device(0),
88
- space(MemorySpace::Device),
89
- stream(nullptr) {}
106
+ inline AllocInfo() {}
90
107
 
91
108
  inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
92
109
  : type(at), device(dev), space(sp), stream(st) {}
@@ -95,13 +112,13 @@ struct AllocInfo {
95
112
  std::string toString() const;
96
113
 
97
114
  /// The internal category of the allocation
98
- AllocType type;
115
+ AllocType type = AllocType::Other;
99
116
 
100
117
  /// The device on which the allocation is happening
101
- int device;
118
+ int device = 0;
102
119
 
103
120
  /// The memory space of the allocation
104
- MemorySpace space;
121
+ MemorySpace space = MemorySpace::Device;
105
122
 
106
123
  /// The stream on which new work on the memory will be ordered (e.g., if a
107
124
  /// piece of memory cached and to be returned for this call was last used on
@@ -111,7 +128,7 @@ struct AllocInfo {
111
128
  ///
112
129
  /// The memory manager guarantees that the returned memory is free to use
113
130
  /// without data races on this stream specified.
114
- cudaStream_t stream;
131
+ cudaStream_t stream = nullptr;
115
132
  };
116
133
 
117
134
  /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
125
142
 
126
143
  /// Information on what/where an allocation is, along with how big it should be
127
144
  struct AllocRequest : public AllocInfo {
128
- inline AllocRequest() : AllocInfo(), size(0) {}
145
+ inline AllocRequest() {}
129
146
 
130
147
  inline AllocRequest(const AllocInfo& info, size_t sz)
131
148
  : AllocInfo(info), size(sz) {}
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
142
159
  std::string toString() const;
143
160
 
144
161
  /// The size in bytes of the allocation
145
- size_t size;
162
+ size_t size = 0;
163
+
164
+ #if defined USE_NVIDIA_RAFT
165
+ rmm::mr::device_memory_resource* mr = nullptr;
166
+ #endif
146
167
  };
147
168
 
148
169
  /// A RAII object that manages a temporary memory request
@@ -190,6 +211,13 @@ class GpuResources {
190
211
  /// given device
191
212
  virtual cudaStream_t getDefaultStream(int device) = 0;
192
213
 
214
+ #if defined USE_NVIDIA_RAFT
215
+ /// Returns the raft handle for the given device which can be used to
216
+ /// make calls to other raft primitives.
217
+ virtual raft::device_resources& getRaftHandle(int device) = 0;
218
+ raft::device_resources& getRaftHandleCurrentDevice();
219
+ #endif
220
+
193
221
  /// Overrides the default stream for a device to the user-supplied stream.
194
222
  /// The resources object does not own this stream (i.e., it will not destroy
195
223
  /// it).