faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -26,8 +26,8 @@ struct VectorTransform {
26
26
  int d_in; ///! input dimension
27
27
  int d_out; ///! output dimension
28
28
 
29
- explicit VectorTransform(int d_in = 0, int d_out = 0)
30
- : d_in(d_in), d_out(d_out), is_trained(true) {}
29
+ explicit VectorTransform(int d_in_val = 0, int d_out_val = 0)
30
+ : d_in(d_in_val), d_out(d_out_val), is_trained(true) {}
31
31
 
32
32
  /// set if the VectorTransform does not require training, or if
33
33
  /// training is done already
@@ -82,9 +82,9 @@ struct LinearTransform : VectorTransform {
82
82
 
83
83
  /// both d_in > d_out and d_out < d_in are supported
84
84
  explicit LinearTransform(
85
- int d_in = 0,
86
- int d_out = 0,
87
- bool have_bias = false);
85
+ int din = 0,
86
+ int dout = 0,
87
+ bool have_bias_in = false);
88
88
 
89
89
  /// same as apply, but result is pre-allocated
90
90
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
@@ -114,8 +114,8 @@ struct LinearTransform : VectorTransform {
114
114
  /// Randomly rotate a set of vectors
115
115
  struct RandomRotationMatrix : LinearTransform {
116
116
  /// both d_in > d_out and d_out < d_in are supported
117
- RandomRotationMatrix(int d_in, int d_out)
118
- : LinearTransform(d_in, d_out, false) {}
117
+ RandomRotationMatrix(int d_in_val, int d_out_val)
118
+ : LinearTransform(d_in_val, d_out_val, false) {}
119
119
 
120
120
  /// must be called before the transform is used
121
121
  void init(int seed);
@@ -183,10 +183,10 @@ struct PCAMatrix : LinearTransform {
183
183
 
184
184
  // the final matrix is computed after random rotation and/or whitening
185
185
  explicit PCAMatrix(
186
- int d_in = 0,
187
- int d_out = 0,
188
- float eigen_power = 0,
189
- bool random_rotation = false);
186
+ int din = 0,
187
+ int dout = 0,
188
+ float eigen_power_in = 0,
189
+ bool random_rotation_in = false);
190
190
 
191
191
  /// train on n vectors. If n < d_in then the eigenvector matrix
192
192
  /// will be completed with 0s
@@ -233,7 +233,7 @@ struct ITQTransform : VectorTransform {
233
233
  // concatenation of PCA + ITQ transformation
234
234
  LinearTransform pca_then_itq;
235
235
 
236
- explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
236
+ explicit ITQTransform(int din = 0, int dout = 0, bool do_pca_in = false);
237
237
 
238
238
  void train(idx_t n, const float* x) override;
239
239
 
@@ -267,7 +267,7 @@ struct OPQMatrix : LinearTransform {
267
267
  ProductQuantizer* pq = nullptr;
268
268
 
269
269
  /// if d2 != -1, output vectors of this dimension
270
- explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
270
+ explicit OPQMatrix(int d = 0, int M_in = 1, int d2 = -1);
271
271
 
272
272
  void train(idx_t n, const float* x) override;
273
273
  };
@@ -280,12 +280,12 @@ struct RemapDimensionsTransform : VectorTransform {
280
280
  /// -1 -> set output to 0
281
281
  std::vector<int> map;
282
282
 
283
- RemapDimensionsTransform(int d_in, int d_out, const int* map);
283
+ RemapDimensionsTransform(int din, int dout, const int* map);
284
284
 
285
285
  /// remap input to output, skipping or inserting dimensions as needed
286
286
  /// if uniform: distribute dimensions uniformly
287
287
  /// otherwise just take the d_out first ones.
288
- RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
288
+ RemapDimensionsTransform(int din, int dout, bool uniform = true);
289
289
 
290
290
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
291
291
 
@@ -301,7 +301,7 @@ struct RemapDimensionsTransform : VectorTransform {
301
301
  struct NormalizationTransform : VectorTransform {
302
302
  float norm;
303
303
 
304
- explicit NormalizationTransform(int d, float norm = 2.0);
304
+ explicit NormalizationTransform(int d, float norm_in = 2.0);
305
305
  NormalizationTransform();
306
306
 
307
307
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
@@ -0,0 +1,23 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include "faiss/build.h"
9
+
10
+ namespace faiss {
11
+
12
+ bool has_omp() {
13
+ int omp_available = 1;
14
+ // Detect whether OpenMP is enabled by using the 'max' reduction to render
15
+ // the below assignment a no-op. This works:
16
+ // 1) without starting any threads
17
+ // 2) irrespective of the current thread limit
18
+ #pragma omp parallel reduction(max : omp_available) num_threads(1)
19
+ omp_available = 0;
20
+ return omp_available != 0;
21
+ }
22
+
23
+ } // namespace faiss
@@ -0,0 +1,15 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ namespace faiss {
11
+
12
+ // Returns true iff `faiss` was compiled with non-mocked OpenMP support.
13
+ bool has_omp();
14
+
15
+ } // namespace faiss
@@ -102,8 +102,8 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
102
102
  TRYCLONE(IndexIVFRaBitQ, ivf)
103
103
 
104
104
  TRYCLONE(IndexIVFFlatDedup, ivf)
105
- TRYCLONE(IndexIVFFlat, ivf)
106
105
  TRYCLONE(IndexIVFFlatPanorama, ivf)
106
+ TRYCLONE(IndexIVFFlat, ivf)
107
107
 
108
108
  TRYCLONE(IndexIVFSpectralHash, ivf)
109
109
 
@@ -173,7 +173,7 @@ IndexRowwiseMinMaxBase* clone_IndexRowwiseMinMax(
173
173
  }
174
174
  }
175
175
 
176
- #define TRYCAST(classname) classname* res = dynamic_cast<classname*>(index)
176
+ #define TRYCAST(classname, var) auto* var = dynamic_cast<classname*>(index)
177
177
 
178
178
  void reset_AdditiveQuantizerIndex(Index* index) {
179
179
  auto clone_ProductQuantizers =
@@ -182,50 +182,50 @@ void reset_AdditiveQuantizerIndex(Index* index) {
182
182
  q = dynamic_cast<AdditiveQuantizer*>(clone_Quantizer(q));
183
183
  }
184
184
  };
185
- if (TRYCAST(IndexIVFLocalSearchQuantizerFastScan)) {
186
- res->aq = &res->lsq;
187
- } else if (TRYCAST(IndexIVFResidualQuantizerFastScan)) {
188
- res->aq = &res->rq;
189
- } else if (TRYCAST(IndexIVFProductLocalSearchQuantizerFastScan)) {
190
- res->aq = &res->plsq;
191
- clone_ProductQuantizers(res->plsq.quantizers);
192
- } else if (TRYCAST(IndexIVFProductResidualQuantizerFastScan)) {
193
- res->aq = &res->prq;
194
- clone_ProductQuantizers(res->prq.quantizers);
195
- } else if (TRYCAST(IndexIVFLocalSearchQuantizer)) {
196
- res->aq = &res->lsq;
197
- } else if (TRYCAST(IndexIVFResidualQuantizer)) {
198
- res->aq = &res->rq;
199
- } else if (TRYCAST(IndexIVFProductLocalSearchQuantizer)) {
200
- res->aq = &res->plsq;
201
- clone_ProductQuantizers(res->plsq.quantizers);
202
- } else if (TRYCAST(IndexIVFProductResidualQuantizer)) {
203
- res->aq = &res->prq;
204
- clone_ProductQuantizers(res->prq.quantizers);
205
- } else if (TRYCAST(IndexLocalSearchQuantizerFastScan)) {
206
- res->aq = &res->lsq;
207
- } else if (TRYCAST(IndexResidualQuantizerFastScan)) {
208
- res->aq = &res->rq;
209
- } else if (TRYCAST(IndexProductLocalSearchQuantizerFastScan)) {
210
- res->aq = &res->plsq;
211
- clone_ProductQuantizers(res->plsq.quantizers);
212
- } else if (TRYCAST(IndexProductResidualQuantizerFastScan)) {
213
- res->aq = &res->prq;
214
- clone_ProductQuantizers(res->prq.quantizers);
215
- } else if (TRYCAST(IndexLocalSearchQuantizer)) {
216
- res->aq = &res->lsq;
217
- } else if (TRYCAST(IndexResidualQuantizer)) {
218
- res->aq = &res->rq;
219
- } else if (TRYCAST(IndexProductLocalSearchQuantizer)) {
220
- res->aq = &res->plsq;
221
- clone_ProductQuantizers(res->plsq.quantizers);
222
- } else if (TRYCAST(IndexProductResidualQuantizer)) {
223
- res->aq = &res->prq;
224
- clone_ProductQuantizers(res->prq.quantizers);
225
- } else if (TRYCAST(LocalSearchCoarseQuantizer)) {
226
- res->aq = &res->lsq;
227
- } else if (TRYCAST(ResidualCoarseQuantizer)) {
228
- res->aq = &res->rq;
185
+ if (TRYCAST(IndexIVFLocalSearchQuantizerFastScan, r1)) {
186
+ r1->aq = &r1->lsq;
187
+ } else if (TRYCAST(IndexIVFResidualQuantizerFastScan, r2)) {
188
+ r2->aq = &r2->rq;
189
+ } else if (TRYCAST(IndexIVFProductLocalSearchQuantizerFastScan, r3)) {
190
+ r3->aq = &r3->plsq;
191
+ clone_ProductQuantizers(r3->plsq.quantizers);
192
+ } else if (TRYCAST(IndexIVFProductResidualQuantizerFastScan, r4)) {
193
+ r4->aq = &r4->prq;
194
+ clone_ProductQuantizers(r4->prq.quantizers);
195
+ } else if (TRYCAST(IndexIVFLocalSearchQuantizer, r5)) {
196
+ r5->aq = &r5->lsq;
197
+ } else if (TRYCAST(IndexIVFResidualQuantizer, r6)) {
198
+ r6->aq = &r6->rq;
199
+ } else if (TRYCAST(IndexIVFProductLocalSearchQuantizer, r7)) {
200
+ r7->aq = &r7->plsq;
201
+ clone_ProductQuantizers(r7->plsq.quantizers);
202
+ } else if (TRYCAST(IndexIVFProductResidualQuantizer, r8)) {
203
+ r8->aq = &r8->prq;
204
+ clone_ProductQuantizers(r8->prq.quantizers);
205
+ } else if (TRYCAST(IndexLocalSearchQuantizerFastScan, r9)) {
206
+ r9->aq = &r9->lsq;
207
+ } else if (TRYCAST(IndexResidualQuantizerFastScan, r10)) {
208
+ r10->aq = &r10->rq;
209
+ } else if (TRYCAST(IndexProductLocalSearchQuantizerFastScan, r11)) {
210
+ r11->aq = &r11->plsq;
211
+ clone_ProductQuantizers(r11->plsq.quantizers);
212
+ } else if (TRYCAST(IndexProductResidualQuantizerFastScan, r12)) {
213
+ r12->aq = &r12->prq;
214
+ clone_ProductQuantizers(r12->prq.quantizers);
215
+ } else if (TRYCAST(IndexLocalSearchQuantizer, r13)) {
216
+ r13->aq = &r13->lsq;
217
+ } else if (TRYCAST(IndexResidualQuantizer, r14)) {
218
+ r14->aq = &r14->rq;
219
+ } else if (TRYCAST(IndexProductLocalSearchQuantizer, r15)) {
220
+ r15->aq = &r15->plsq;
221
+ clone_ProductQuantizers(r15->plsq.quantizers);
222
+ } else if (TRYCAST(IndexProductResidualQuantizer, r16)) {
223
+ r16->aq = &r16->prq;
224
+ clone_ProductQuantizers(r16->prq.quantizers);
225
+ } else if (TRYCAST(LocalSearchCoarseQuantizer, r17)) {
226
+ r17->aq = &r17->lsq;
227
+ } else if (TRYCAST(ResidualCoarseQuantizer, r18)) {
228
+ r18->aq = &r18->rq;
229
229
  } else {
230
230
  FAISS_THROW_MSG(
231
231
  "clone not supported for this type of additive quantizer index");
@@ -319,7 +319,7 @@ Index* Cloner::clone_Index(const Index* index) {
319
319
  res->metric_arg = ipt->metric_arg;
320
320
 
321
321
  res->index = clone_Index(ipt->index);
322
- for (int i = 0; i < ipt->chain.size(); i++) {
322
+ for (size_t i = 0; i < ipt->chain.size(); i++) {
323
323
  res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
324
324
  }
325
325
  res->own_fields = true;
@@ -377,6 +377,7 @@ Index* Cloner::clone_Index(const Index* index) {
377
377
  IndexRowwiseMinMaxBase* res = clone_IndexRowwiseMinMax(irmmb);
378
378
  res->own_fields = true;
379
379
  res->index = clone_Index(irmmb->index);
380
+ return res;
380
381
  } else if (
381
382
  dynamic_cast<const IndexAdditiveQuantizerFastScan*>(index) ||
382
383
  dynamic_cast<const IndexAdditiveQuantizer*>(index) ||
@@ -1785,72 +1785,72 @@ struct Index2LevelDecoderImpl<
1785
1785
 
1786
1786
  // process 1 sample
1787
1787
  static void store(
1788
- const float* const __restrict pqCoarseCentroids0,
1789
- const float* const __restrict pqFineCentroids0,
1790
- const uint8_t* const __restrict code0,
1791
- float* const __restrict outputStore) {}
1788
+ const float* const __restrict /*pqCoarseCentroids0*/,
1789
+ const float* const __restrict /*pqFineCentroids0*/,
1790
+ const uint8_t* const __restrict /*code0*/,
1791
+ float* const __restrict /*outputStore*/) {}
1792
1792
 
1793
1793
  // process 1 sample
1794
1794
  static void accum(
1795
- const float* const __restrict pqCoarseCentroids0,
1796
- const float* const __restrict pqFineCentroids0,
1797
- const uint8_t* const __restrict code0,
1798
- const float weight0,
1799
- float* const __restrict outputAccum) {}
1795
+ const float* const __restrict /*pqCoarseCentroids0*/,
1796
+ const float* const __restrict /*pqFineCentroids0*/,
1797
+ const uint8_t* const __restrict /*code0*/,
1798
+ const float /*weight0*/,
1799
+ float* const __restrict /*outputAccum*/) {}
1800
1800
 
1801
1801
  // Process 2 samples.
1802
1802
  // Each code uses its own coarse pq centroids table and fine pq centroids table.
1803
1803
  static void accum(
1804
- const float* const __restrict pqCoarseCentroids0,
1805
- const float* const __restrict pqFineCentroids0,
1806
- const uint8_t* const __restrict code0,
1807
- const float weight0,
1808
- const float* const __restrict pqCoarseCentroids1,
1809
- const float* const __restrict pqFineCentroids1,
1810
- const uint8_t* const __restrict code1,
1811
- const float weight1,
1812
- float* const __restrict outputAccum) {}
1804
+ const float* const __restrict /*pqCoarseCentroids0*/,
1805
+ const float* const __restrict /*pqFineCentroids0*/,
1806
+ const uint8_t* const __restrict /*code0*/,
1807
+ const float /*weight0*/,
1808
+ const float* const __restrict /*pqCoarseCentroids1*/,
1809
+ const float* const __restrict /*pqFineCentroids1*/,
1810
+ const uint8_t* const __restrict /*code1*/,
1811
+ const float /*weight1*/,
1812
+ float* const __restrict /*outputAccum*/) {}
1813
1813
 
1814
1814
  // Process 2 samples.
1815
1815
  // Coarse pq centroids table and fine pq centroids table are shared among codes.
1816
1816
  static void accum(
1817
- const float* const __restrict pqCoarseCentroids,
1818
- const float* const __restrict pqFineCentroids,
1819
- const uint8_t* const __restrict code0,
1820
- const float weight0,
1821
- const uint8_t* const __restrict code1,
1822
- const float weight1,
1823
- float* const __restrict outputAccum) {}
1817
+ const float* const __restrict /*pqCoarseCentroids*/,
1818
+ const float* const __restrict /*pqFineCentroids*/,
1819
+ const uint8_t* const __restrict /*code0*/,
1820
+ const float /*weight0*/,
1821
+ const uint8_t* const __restrict /*code1*/,
1822
+ const float /*weight1*/,
1823
+ float* const __restrict /*outputAccum*/) {}
1824
1824
 
1825
1825
  // Process 3 samples.
1826
1826
  // Each code uses its own coarse pq centroids table and fine pq centroids table.
1827
1827
  static void accum(
1828
- const float* const __restrict pqCoarseCentroids0,
1829
- const float* const __restrict pqFineCentroids0,
1830
- const uint8_t* const __restrict code0,
1831
- const float weight0,
1832
- const float* const __restrict pqCoarseCentroids1,
1833
- const float* const __restrict pqFineCentroids1,
1834
- const uint8_t* const __restrict code1,
1835
- const float weight1,
1836
- const float* const __restrict pqCoarseCentroids2,
1837
- const float* const __restrict pqFineCentroids2,
1838
- const uint8_t* const __restrict code2,
1839
- const float weight2,
1840
- float* const __restrict outputAccum) {}
1828
+ const float* const __restrict /*pqCoarseCentroids0*/,
1829
+ const float* const __restrict /*pqFineCentroids0*/,
1830
+ const uint8_t* const __restrict /*code0*/,
1831
+ const float /*weight0*/,
1832
+ const float* const __restrict /*pqCoarseCentroids1*/,
1833
+ const float* const __restrict /*pqFineCentroids1*/,
1834
+ const uint8_t* const __restrict /*code1*/,
1835
+ const float /*weight1*/,
1836
+ const float* const __restrict /*pqCoarseCentroids2*/,
1837
+ const float* const __restrict /*pqFineCentroids2*/,
1838
+ const uint8_t* const __restrict /*code2*/,
1839
+ const float /*weight2*/,
1840
+ float* const __restrict /*outputAccum*/) {}
1841
1841
 
1842
1842
  // Process 3 samples.
1843
1843
  // Coarse pq centroids table and fine pq centroids table are shared among codes.
1844
1844
  static void accum(
1845
- const float* const __restrict pqCoarseCentroids,
1846
- const float* const __restrict pqFineCentroids,
1847
- const uint8_t* const __restrict code0,
1848
- const float weight0,
1849
- const uint8_t* const __restrict code1,
1850
- const float weight1,
1851
- const uint8_t* const __restrict code2,
1852
- const float weight2,
1853
- float* const __restrict outputAccum) {}
1845
+ const float* const __restrict /*pqCoarseCentroids*/,
1846
+ const float* const __restrict /*pqFineCentroids*/,
1847
+ const uint8_t* const __restrict /*code0*/,
1848
+ const float /*weight0*/,
1849
+ const uint8_t* const __restrict /*code1*/,
1850
+ const float /*weight1*/,
1851
+ const uint8_t* const __restrict /*code2*/,
1852
+ const float /*weight2*/,
1853
+ float* const __restrict /*outputAccum*/) {}
1854
1854
 
1855
1855
  // clang-format on
1856
1856
  };
@@ -8,6 +8,12 @@
8
8
  #ifndef LEVEL2_INL_H
9
9
  #define LEVEL2_INL_H
10
10
 
11
+ // GCC does not recognize #pragma unroll (Clang extension)
12
+ #if defined(__GNUC__) && !defined(__clang__)
13
+ #pragma GCC diagnostic push
14
+ #pragma GCC diagnostic ignored "-Wunknown-pragmas"
15
+ #endif
16
+
11
17
  #include <cstddef>
12
18
  #include <cstdint>
13
19
 
@@ -464,4 +470,9 @@ struct Index2LevelDecoder {
464
470
 
465
471
  } // namespace cppcontrib
466
472
  } // namespace faiss
473
+
474
+ #if defined(__GNUC__) && !defined(__clang__)
475
+ #pragma GCC diagnostic pop
476
+ #endif
477
+
467
478
  #endif // LEVEL2_INL_H
@@ -1428,63 +1428,63 @@ struct IndexPQDecoderImpl<
1428
1428
 
1429
1429
  // process 1 sample
1430
1430
  static void store(
1431
- const float* const __restrict pqFineCentroids0,
1432
- const uint8_t* const __restrict code0,
1433
- float* const __restrict outputStore) {}
1431
+ const float* const __restrict /*pqFineCentroids0*/,
1432
+ const uint8_t* const __restrict /*code0*/,
1433
+ float* const __restrict /*outputStore*/) {}
1434
1434
 
1435
1435
  // process 1 sample
1436
1436
  static void accum(
1437
- const float* const __restrict pqFineCentroids0,
1438
- const uint8_t* const __restrict code0,
1439
- const float weight0,
1440
- float* const __restrict outputAccum) {}
1437
+ const float* const __restrict /*pqFineCentroids0*/,
1438
+ const uint8_t* const __restrict /*code0*/,
1439
+ const float /*weight0*/,
1440
+ float* const __restrict /*outputAccum*/) {}
1441
1441
 
1442
1442
  // Process 2 samples.
1443
1443
  // Each code uses its own fine pq centroids table.
1444
1444
  static void accum(
1445
- const float* const __restrict pqFineCentroids0,
1446
- const uint8_t* const __restrict code0,
1447
- const float weight0,
1448
- const float* const __restrict pqFineCentroids1,
1449
- const uint8_t* const __restrict code1,
1450
- const float weight1,
1451
- float* const __restrict outputAccum) {}
1445
+ const float* const __restrict /*pqFineCentroids0*/,
1446
+ const uint8_t* const __restrict /*code0*/,
1447
+ const float /*weight0*/,
1448
+ const float* const __restrict /*pqFineCentroids1*/,
1449
+ const uint8_t* const __restrict /*code1*/,
1450
+ const float /*weight1*/,
1451
+ float* const __restrict /*outputAccum*/) {}
1452
1452
 
1453
1453
  // Process 2 samples.
1454
1454
  // Fine pq centroids table is shared among codes.
1455
1455
  static void accum(
1456
- const float* const __restrict pqFineCentroids,
1457
- const uint8_t* const __restrict code0,
1458
- const float weight0,
1459
- const uint8_t* const __restrict code1,
1460
- const float weight1,
1461
- float* const __restrict outputAccum) {}
1456
+ const float* const __restrict /*pqFineCentroids*/,
1457
+ const uint8_t* const __restrict /*code0*/,
1458
+ const float /*weight0*/,
1459
+ const uint8_t* const __restrict /*code1*/,
1460
+ const float /*weight1*/,
1461
+ float* const __restrict /*outputAccum*/) {}
1462
1462
 
1463
1463
  // Process 3 samples.
1464
1464
  // Each code uses its own fine pq centroids table.
1465
1465
  static void accum(
1466
- const float* const __restrict pqFineCentroids0,
1467
- const uint8_t* const __restrict code0,
1468
- const float weight0,
1469
- const float* const __restrict pqFineCentroids1,
1470
- const uint8_t* const __restrict code1,
1471
- const float weight1,
1472
- const float* const __restrict pqFineCentroids2,
1473
- const uint8_t* const __restrict code2,
1474
- const float weight2,
1475
- float* const __restrict outputAccum) {}
1466
+ const float* const __restrict /*pqFineCentroids0*/,
1467
+ const uint8_t* const __restrict /*code0*/,
1468
+ const float /*weight0*/,
1469
+ const float* const __restrict /*pqFineCentroids1*/,
1470
+ const uint8_t* const __restrict /*code1*/,
1471
+ const float /*weight1*/,
1472
+ const float* const __restrict /*pqFineCentroids2*/,
1473
+ const uint8_t* const __restrict /*code2*/,
1474
+ const float /*weight2*/,
1475
+ float* const __restrict /*outputAccum*/) {}
1476
1476
 
1477
1477
  // Process 3 samples.
1478
1478
  // Fine pq centroids table is shared among codes.
1479
1479
  static void accum(
1480
- const float* const __restrict pqFineCentroids,
1481
- const uint8_t* const __restrict code0,
1482
- const float weight0,
1483
- const uint8_t* const __restrict code1,
1484
- const float weight1,
1485
- const uint8_t* const __restrict code2,
1486
- const float weight2,
1487
- float* const __restrict outputAccum) {}
1480
+ const float* const __restrict /*pqFineCentroids*/,
1481
+ const uint8_t* const __restrict /*code0*/,
1482
+ const float /*weight0*/,
1483
+ const uint8_t* const __restrict /*code1*/,
1484
+ const float /*weight1*/,
1485
+ const uint8_t* const __restrict /*code2*/,
1486
+ const float /*weight2*/,
1487
+ float* const __restrict /*outputAccum*/) {}
1488
1488
 
1489
1489
  // clang-format on
1490
1490
  };
@@ -8,6 +8,12 @@
8
8
  #ifndef PQ_INL_H
9
9
  #define PQ_INL_H
10
10
 
11
+ // GCC does not recognize #pragma unroll (Clang extension)
12
+ #if defined(__GNUC__) && !defined(__clang__)
13
+ #pragma GCC diagnostic push
14
+ #pragma GCC diagnostic ignored "-Wunknown-pragmas"
15
+ #endif
16
+
11
17
  #include <cstddef>
12
18
  #include <cstdint>
13
19
 
@@ -254,4 +260,9 @@ struct IndexPQDecoder {
254
260
 
255
261
  } // namespace cppcontrib
256
262
  } // namespace faiss
263
+
264
+ #if defined(__GNUC__) && !defined(__clang__)
265
+ #pragma GCC diagnostic pop
266
+ #endif
267
+
257
268
  #endif // PQ_INL_H
@@ -38,6 +38,11 @@ const std::map<faiss::ScalarQuantizer::QuantizerType, std::string> sq_types = {
38
38
  {faiss::ScalarQuantizer::QT_bf16, "SQbf16"},
39
39
  {faiss::ScalarQuantizer::QT_8bit_direct_signed, "SQ8_direct_signed"},
40
40
  {faiss::ScalarQuantizer::QT_8bit_direct, "SQ8_direct"},
41
+ {faiss::ScalarQuantizer::QT_1bit_tqmse, "SQtqmse1"},
42
+ {faiss::ScalarQuantizer::QT_2bit_tqmse, "SQtqmse2"},
43
+ {faiss::ScalarQuantizer::QT_3bit_tqmse, "SQtqmse3"},
44
+ {faiss::ScalarQuantizer::QT_4bit_tqmse, "SQtqmse4"},
45
+ {faiss::ScalarQuantizer::QT_8bit_tqmse, "SQtqmse8"},
41
46
  };
42
47
 
43
48
  int get_hnsw_M(const faiss::IndexHNSW* index) {
@@ -28,6 +28,7 @@
28
28
  #include <faiss/gpu/GpuIndexIVFPQ.h>
29
29
 
30
30
  #include <variant>
31
+ #include <vector>
31
32
  #include "faiss/Index.h"
32
33
 
33
34
  namespace faiss {
@@ -193,14 +194,14 @@ struct GpuIndexCagraConfig : public GpuIndexConfig {
193
194
 
194
195
  enum class search_algo {
195
196
  /// For large batch sizes.
196
- SINGLE_CTA,
197
+ SINGLE_CTA = 0,
197
198
  /// For small batch sizes.
198
- MULTI_CTA,
199
- MULTI_KERNEL,
200
- AUTO
199
+ MULTI_CTA = 1,
200
+ MULTI_KERNEL = 2,
201
+ AUTO = 100
201
202
  };
202
203
 
203
- enum class hash_mode { HASH, SMALL, AUTO };
204
+ enum class hash_mode { HASH = 0, SMALL = 1, AUTO = 100 };
204
205
 
205
206
  struct SearchParametersCagra : SearchParameters {
206
207
  /// Maximum number of queries to search at the same time (batch size). Auto
@@ -33,7 +33,7 @@
33
33
 
34
34
  #if defined USE_NVIDIA_CUVS
35
35
  #include <raft/core/device_resources.hpp>
36
- #include <rmm/mr/device/device_memory_resource.hpp>
36
+ #include <rmm/mr/device_memory_resource.hpp>
37
37
  #endif
38
38
 
39
39
  namespace faiss {
@@ -23,9 +23,9 @@
23
23
 
24
24
  #if defined USE_NVIDIA_CUVS
25
25
  #include <raft/core/device_resources.hpp>
26
- #include <rmm/mr/device/managed_memory_resource.hpp>
27
- #include <rmm/mr/device/per_device_resource.hpp>
28
- #include <rmm/mr/host/pinned_memory_resource.hpp>
26
+ #include <rmm/mr/managed_memory_resource.hpp>
27
+ #include <rmm/mr/per_device_resource.hpp>
28
+ #include <rmm/mr/pinned_host_memory_resource.hpp>
29
29
  #include <memory>
30
30
  #endif
31
31
 
@@ -93,7 +93,7 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
93
93
  :
94
94
  #if defined USE_NVIDIA_CUVS
95
95
  mmr_(new rmm::mr::managed_memory_resource),
96
- pmr_(new rmm::mr::pinned_memory_resource),
96
+ pmr_(new rmm::mr::pinned_host_memory_resource),
97
97
  #endif
98
98
  pinnedMemAlloc_(nullptr),
99
99
  pinnedMemAllocSize_(0),
@@ -164,7 +164,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
164
164
 
165
165
  if (pinnedMemAlloc_) {
166
166
  #if defined USE_NVIDIA_CUVS
167
- pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
167
+ pmr_->deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_);
168
168
  #else
169
169
  auto err = cudaFreeHost(pinnedMemAlloc_);
170
170
  FAISS_ASSERT_FMT(
@@ -350,7 +350,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
350
350
  // pinned memory allocation
351
351
  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
352
352
  try {
353
- pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
353
+ pinnedMemAlloc_ = pmr_->allocate_sync(pinnedMemSize_);
354
354
  } catch (const std::bad_alloc& rmm_ex) {
355
355
  FAISS_THROW_MSG("CUDA memory allocation error");
356
356
  }
@@ -549,7 +549,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
549
549
  rmm::mr::device_memory_resource* current_mr =
550
550
  rmm::mr::get_per_device_resource(
551
551
  rmm::cuda_device_id{adjReq.device});
552
- p = current_mr->allocate_async(adjReq.size, adjReq.stream);
552
+ p = current_mr->allocate(adjReq.stream, adjReq.size);
553
553
  adjReq.mr = current_mr;
554
554
  } catch (const std::bad_alloc& rmm_ex) {
555
555
  FAISS_THROW_MSG("CUDA memory allocation error");
@@ -584,7 +584,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
584
584
  // TODO: change this to use the current device resource once RMM has
585
585
  // a way to retrieve a "guaranteed" managed memory resource for a
586
586
  // device.
587
- p = mmr_->allocate_async(adjReq.size, adjReq.stream);
587
+ p = mmr_->allocate(adjReq.stream, adjReq.size);
588
588
  adjReq.mr = mmr_.get();
589
589
  } catch (const std::bad_alloc& rmm_ex) {
590
590
  FAISS_THROW_MSG("CUDA memory allocation error");
@@ -648,7 +648,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
648
648
  req.space == MemorySpace::Device ||
649
649
  req.space == MemorySpace::Unified) {
650
650
  #if defined USE_NVIDIA_CUVS
651
- req.mr->deallocate_async(p, req.size, req.stream);
651
+ req.mr->deallocate(req.stream, p, req.size);
652
652
  #else
653
653
  auto err = cudaFree(p);
654
654
  FAISS_ASSERT_FMT(