faiss 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/ext/faiss/ext.cpp +1 -1
  4. data/ext/faiss/extconf.rb +4 -4
  5. data/ext/faiss/index.cpp +63 -45
  6. data/ext/faiss/index_binary.cpp +37 -27
  7. data/ext/faiss/kmeans.cpp +9 -8
  8. data/ext/faiss/pca_matrix.cpp +9 -7
  9. data/ext/faiss/product_quantizer.cpp +13 -11
  10. data/ext/faiss/utils.cpp +4 -2
  11. data/ext/faiss/utils.h +4 -0
  12. data/lib/faiss/version.rb +1 -1
  13. data/lib/faiss.rb +1 -1
  14. data/vendor/faiss/faiss/AutoTune.cpp +214 -82
  15. data/vendor/faiss/faiss/AutoTune.h +14 -1
  16. data/vendor/faiss/faiss/Clustering.cpp +97 -249
  17. data/vendor/faiss/faiss/Clustering.h +18 -0
  18. data/vendor/faiss/faiss/IVFlib.cpp +67 -44
  19. data/vendor/faiss/faiss/Index.cpp +25 -12
  20. data/vendor/faiss/faiss/Index.h +26 -4
  21. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  22. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  24. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  25. data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
  26. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  27. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  28. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  30. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
  31. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  32. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  33. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  34. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
  35. data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
  36. data/vendor/faiss/faiss/IndexFastScan.h +35 -24
  37. data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
  38. data/vendor/faiss/faiss/IndexFlat.h +32 -14
  39. data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
  40. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  41. data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
  42. data/vendor/faiss/faiss/IndexHNSW.h +30 -14
  43. data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
  44. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  45. data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
  46. data/vendor/faiss/faiss/IndexIVF.h +47 -16
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
  49. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
  50. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
  51. data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
  52. data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
  53. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  54. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
  55. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  56. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  57. data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
  58. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
  59. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  60. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
  61. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
  62. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
  63. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
  64. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
  65. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  66. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  67. data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
  68. data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
  69. data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
  70. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  71. data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
  72. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  73. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
  74. data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
  75. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  76. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  77. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  78. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  79. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  80. data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
  81. data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
  82. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
  83. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
  84. data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
  85. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  86. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  87. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  88. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  89. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  90. data/vendor/faiss/faiss/IndexShards.cpp +13 -13
  91. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  92. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  93. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  94. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  95. data/vendor/faiss/faiss/MetricType.h +29 -6
  96. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  97. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  98. data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
  99. data/vendor/faiss/faiss/VectorTransform.h +39 -16
  100. data/vendor/faiss/faiss/build.cpp +23 -0
  101. data/vendor/faiss/faiss/build.h +15 -0
  102. data/vendor/faiss/faiss/clone_index.cpp +55 -51
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  107. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
  108. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  109. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  110. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  111. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  113. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  118. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  119. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  120. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  130. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  132. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
  134. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  136. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
  139. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  140. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  141. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  142. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  143. data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
  144. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  145. data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
  146. data/vendor/faiss/faiss/impl/HNSW.h +21 -40
  147. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  148. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  149. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  150. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
  151. data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
  152. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  153. data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
  154. data/vendor/faiss/faiss/impl/NSG.h +20 -10
  155. data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
  156. data/vendor/faiss/faiss/impl/Panorama.h +265 -78
  157. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  158. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  159. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
  160. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  161. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  162. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  163. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
  164. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  165. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
  166. data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
  167. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
  168. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  169. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  170. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  171. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
  172. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  173. data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
  174. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
  175. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
  176. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  177. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  178. data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  181. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  182. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  183. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  184. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  185. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  191. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  192. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  193. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  194. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  195. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  196. data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
  197. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  198. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  199. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  203. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
  204. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  205. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  206. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  208. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  209. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  210. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
  211. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  212. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  213. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
  214. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  215. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  216. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  217. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  218. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  219. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  220. data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
  221. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
  222. data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
  223. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  225. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  226. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
  227. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  228. data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
  229. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  230. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  233. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  234. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  235. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  237. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
  238. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
  239. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
  240. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  241. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
  242. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
  243. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  244. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
  245. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  256. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
  257. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
  258. data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  260. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
  261. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  262. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  264. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
  265. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  266. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  267. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  268. data/vendor/faiss/faiss/index_factory.cpp +115 -28
  269. data/vendor/faiss/faiss/index_io.h +53 -3
  270. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
  271. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  272. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  273. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  274. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  275. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  276. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
  277. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  278. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  279. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  280. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  285. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  286. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  287. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  290. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
  291. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
  292. data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
  293. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  294. data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
  295. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  296. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  297. data/vendor/faiss/faiss/utils/distances.cpp +507 -559
  298. data/vendor/faiss/faiss/utils/distances.h +118 -1
  299. data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
  300. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  301. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  302. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  304. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  305. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  306. data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
  307. data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
  308. data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
  309. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  310. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  311. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  312. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  355. data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
  357. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  358. data/vendor/faiss/faiss/utils/utils.cpp +21 -14
  359. data/vendor/faiss/faiss/utils/utils.h +3 -3
  360. metadata +156 -42
  361. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  362. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  363. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  364. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
  366. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
  367. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  368. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  369. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  370. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  371. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  373. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  374. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
  375. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  376. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  377. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  378. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
  379. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -5,8 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  #pragma once
11
9
 
12
10
  #include <faiss/impl/AuxIndexStructures.h>
@@ -35,6 +33,13 @@ struct ScalarQuantizer : Quantizer {
35
33
  QT_bf16,
36
34
  QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from
37
35
  ///< [-128 to 127]
36
+ QT_0bit, ///< 0 bits per component, centroid-only distance (for IVF)
37
+ QT_1bit_tqmse, ///< TurboQuant MSE-optimized, 1 bit per component
38
+ QT_2bit_tqmse, ///< TurboQuant MSE-optimized, 2 bits per component
39
+ QT_3bit_tqmse, ///< TurboQuant MSE-optimized, 3 bits per component
40
+ QT_4bit_tqmse, ///< TurboQuant MSE-optimized, 4 bits per component
41
+ QT_8bit_tqmse, ///< TurboQuant MSE-optimized, 8 bits per component
42
+ QT_count
38
43
  };
39
44
 
40
45
  QuantizerType qtype = QT_8bit;
@@ -60,7 +65,7 @@ struct ScalarQuantizer : Quantizer {
60
65
  /// trained values (including the range)
61
66
  std::vector<float> trained;
62
67
 
63
- ScalarQuantizer(size_t d, QuantizerType qtype);
68
+ ScalarQuantizer(size_t d_in, QuantizerType qtype_in);
64
69
  ScalarQuantizer();
65
70
 
66
71
  /// updates internal values based on qtype and d
@@ -102,6 +107,25 @@ struct ScalarQuantizer : Quantizer {
102
107
 
103
108
  virtual float query_to_code(const uint8_t* code) const = 0;
104
109
 
110
+ /// Compute four query-to-code distances in one call. Default loops
111
+ /// query_to_code four times; per-SIMD specializations may batch the
112
+ /// inner dim loop across the four codes to amortize query state and
113
+ /// expose ILP across independent accumulators.
114
+ virtual void query_to_codes_batch_4(
115
+ const uint8_t* code_0,
116
+ const uint8_t* code_1,
117
+ const uint8_t* code_2,
118
+ const uint8_t* code_3,
119
+ float& dis0,
120
+ float& dis1,
121
+ float& dis2,
122
+ float& dis3) const {
123
+ dis0 = query_to_code(code_0);
124
+ dis1 = query_to_code(code_1);
125
+ dis2 = query_to_code(code_2);
126
+ dis3 = query_to_code(code_3);
127
+ }
128
+
105
129
  float distance_to_code(const uint8_t* code) final {
106
130
  return query_to_code(code);
107
131
  }
@@ -19,8 +19,8 @@ ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
19
19
  : ThreadedIndex(0, threaded) {}
20
20
 
21
21
  template <typename IndexT>
22
- ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
23
- : IndexT(d), isThreaded_(threaded) {}
22
+ ThreadedIndex<IndexT>::ThreadedIndex(int d_in, bool threaded)
23
+ : IndexT(d_in), isThreaded_(threaded) {}
24
24
 
25
25
  template <typename IndexT>
26
26
  ThreadedIndex<IndexT>::~ThreadedIndex() {
@@ -122,11 +122,12 @@ void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
122
122
  if (isThreaded_) {
123
123
  std::vector<std::future<bool>> v;
124
124
 
125
- for (int i = 0; i < this->indices_.size(); ++i) {
125
+ for (size_t i = 0; i < this->indices_.size(); ++i) {
126
126
  auto& p = this->indices_[i];
127
127
  auto indexPtr = p.first;
128
+ int idx = static_cast<int>(i);
128
129
  v.emplace_back(
129
- p.second->add([f, i, indexPtr]() { f(i, indexPtr); }));
130
+ p.second->add([f, idx, indexPtr]() { f(idx, indexPtr); }));
130
131
  }
131
132
 
132
133
  waitAndHandleFutures(v);
@@ -135,13 +136,14 @@ void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
135
136
  // while letting everything else run to completion
136
137
  std::vector<std::pair<int, std::exception_ptr>> exceptions;
137
138
 
138
- for (int i = 0; i < this->indices_.size(); ++i) {
139
+ for (size_t i = 0; i < this->indices_.size(); ++i) {
139
140
  auto& p = this->indices_[i];
140
141
  try {
141
- f(i, p.first);
142
+ f(static_cast<int>(i), p.first);
142
143
  } catch (...) {
143
144
  exceptions.emplace_back(
144
- std::make_pair(i, std::current_exception()));
145
+ std::make_pair(
146
+ static_cast<int>(i), std::current_exception()));
145
147
  }
146
148
  }
147
149
 
@@ -164,10 +166,10 @@ void ThreadedIndex<IndexT>::reset() {
164
166
  }
165
167
 
166
168
  template <typename IndexT>
167
- void ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* index) {}
169
+ void ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* /* index */) {}
168
170
 
169
171
  template <typename IndexT>
170
- void ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* index) {}
172
+ void ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* /* index */) {}
171
173
 
172
174
  template <typename IndexT>
173
175
  void ThreadedIndex<IndexT>::waitAndHandleFutures(
@@ -176,14 +178,15 @@ void ThreadedIndex<IndexT>::waitAndHandleFutures(
176
178
  // exceptions that are generated
177
179
  std::vector<std::pair<int, std::exception_ptr>> exceptions;
178
180
 
179
- for (int i = 0; i < v.size(); ++i) {
181
+ for (size_t i = 0; i < v.size(); ++i) {
180
182
  auto& fut = v[i];
181
183
 
182
184
  try {
183
185
  fut.get();
184
186
  } catch (...) {
185
187
  exceptions.emplace_back(
186
- std::make_pair(i, std::current_exception()));
188
+ std::make_pair(
189
+ static_cast<int>(i), std::current_exception()));
187
190
  }
188
191
  }
189
192
 
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/VisitedTable.h>
9
+
10
+ #include <cstring>
11
+
12
+ namespace faiss {
13
+
14
+ // The vector strategy is faster for get()/set(), but O(size) to initialize.
15
+ // advance() is O(1) except every 250 calls, which are O(size).
16
+ // The hash set strategy is a constant factor slower for get()/set(),
17
+ // but O(1) to construct and O(visits) to advance.
18
+ // A size of ~1M seems to be the threshold where the hash set wins.
19
+ size_t visited_table_hashset_threshold = 500000;
20
+
21
+ VisitedTable::VisitedTable(size_t size, std::optional<bool> use_hashset)
22
+ : visno(use_hashset.value_or(size >= visited_table_hashset_threshold)
23
+ ? 0
24
+ : 1) {
25
+ if (visno != 0) {
26
+ visited.resize(size, 0);
27
+ }
28
+ }
29
+
30
+ void VisitedTable::advance() {
31
+ if (visno == 0) {
32
+ visited_set.clear();
33
+ } else if (visno < 254) {
34
+ // 254 rather than 255 because sometimes we use visno and visno+1
35
+ ++visno;
36
+ } else {
37
+ memset(visited.data(), 0, sizeof(visited[0]) * visited.size());
38
+ visno = 1;
39
+ }
40
+ }
41
+
42
+ } // namespace faiss
@@ -0,0 +1,76 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifndef FAISS_VISITED_TABLE_H
9
+ #define FAISS_VISITED_TABLE_H
10
+
11
+ #include <stdint.h>
12
+
13
+ #include <optional>
14
+ #include <unordered_set>
15
+ #include <vector>
16
+
17
+ #include <faiss/impl/platform_macros.h>
18
+ #include <faiss/utils/prefetch.h>
19
+
20
+ namespace faiss {
21
+
22
+ FAISS_API extern size_t visited_table_hashset_threshold;
23
+
24
+ /// A fast, reusable Visited Set for graph search algorithms.
25
+ struct VisitedTable {
26
+ std::vector<uint8_t> visited;
27
+ std::unordered_set<size_t> visited_set;
28
+ uint8_t visno; // 0 if using visited_set, 1..250 if using vector.
29
+
30
+ // If use_hashset is nullopt, the use of a hashset will be determined by
31
+ // size >= visited_table_hashset_threshold.
32
+ explicit VisitedTable(
33
+ size_t size,
34
+ std::optional<bool> use_hashset = std::nullopt);
35
+
36
+ /// set flag #no to true, return whether this changed it.
37
+ bool set(size_t no) {
38
+ if (visno == 0) {
39
+ return visited_set.insert(no).second;
40
+ } else if (visited[no] == visno) {
41
+ return false;
42
+ } else {
43
+ visited[no] = visno;
44
+ return true;
45
+ }
46
+ }
47
+
48
+ /// pre-allocate bucket space to avoid rehashing during repeated set() calls
49
+ void reserve(size_t n) {
50
+ if (visno == 0) {
51
+ visited_set.reserve(n);
52
+ }
53
+ }
54
+
55
+ /// get flag #no
56
+ bool get(size_t no) const {
57
+ if (visno == 0) {
58
+ return visited_set.count(no) != 0;
59
+ } else {
60
+ return visited[no] == visno;
61
+ }
62
+ }
63
+
64
+ void prefetch(size_t no) const {
65
+ if (visno != 0) {
66
+ prefetch_L2(&visited[no]);
67
+ }
68
+ }
69
+
70
+ /// reset all flags to false
71
+ void advance();
72
+ };
73
+
74
+ } // namespace faiss
75
+
76
+ #endif
@@ -0,0 +1,276 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Approximate top-k search using bucketed heaps with SIMD acceleration.
9
+ //
10
+ // HeapWithBuckets<C, NBUCKETS, N> splits n elements into NBUCKETS buckets,
11
+ // tracks the top-N per bucket using SIMD, then merges into a regular heap.
12
+ // This trades a small accuracy loss for significant speedup.
13
+ //
14
+ // The core idea is the following.
15
+ // Say we need to find beam_size indices with the minimal distance
16
+ // values. It is done via heap (priority_queue) using the following
17
+ // pseudocode:
18
+ //
19
+ // def baseline():
20
+ // distances = np.empty([beam_size * n], dtype=float)
21
+ // indices = np.empty([beam_size * n], dtype=int)
22
+ //
23
+ // heap = Heap(max_heap_size=beam_size)
24
+ //
25
+ // for i in range(0, beam_size * n):
26
+ // heap.push(distances[i], indices[i])
27
+ //
28
+ // Basically, this is what heap_addn() function from utils/Heap.h does.
29
+ //
30
+ // The following scheme can be used for approximate beam search.
31
+ // Say, we need to find elements with min distance.
32
+ // Basically, we split n elements of every beam into NBUCKETS buckets
33
+ // and track the index with the minimal distance for every bucket.
34
+ // This can be effectively SIMD-ed and significantly lowers the number
35
+ // of operations, but yields approximate results for beam_size >= 2.
36
+ //
37
+ // def approximate_v1():
38
+ // distances = np.empty([beam_size * n], dtype=float)
39
+ // indices = np.empty([beam_size * n], dtype=int)
40
+ //
41
+ // heap = Heap(max_heap_size=beam_size)
42
+ //
43
+ // for beam in range(0, beam_size):
44
+ // # The value of 32 is just an example.
45
+ // # The value may be varied: the larger the value is,
46
+ // # the slower and the more precise vs baseline beam search is
47
+ // NBUCKETS = 32
48
+ //
49
+ // local_min_distances = [HUGE_VALF] * NBUCKETS
50
+ // local_min_indices = [0] * NBUCKETS
51
+ //
52
+ // for i in range(0, n / NBUCKETS):
53
+ // for j in range(0, NBUCKETS):
54
+ // idx = beam * n + i * NBUCKETS + j
55
+ // if distances[idx] < local_min_distances[j]:
56
+ // local_min_distances[j] = distances[idx]
57
+ // local_min_indices[j] = indices[idx]
58
+ //
59
+ // for j in range(0, NBUCKETS):
60
+ // heap.push(local_min_distances[j], local_min_indices[j])
61
+ //
62
+ // The accuracy can be improved by tracking min-2 elements for every
63
+ // bucket. Such a min-2 implementation with NBUCKETS buckets provides
64
+ // better accuracy than top-1 implementation with 2 * NBUCKETS buckets.
65
+ // Min-3 is also doable. One can use min-N approach, but I'm not sure
66
+ // whether min-4 and above are practical, because of the lack of SIMD
67
+ // registers (unless AVX-512 version is used).
68
+ //
69
+ // C++ template for top-N implementation is provided. The code
70
+ // assumes that indices[idx] == idx. One can write a code that lifts
71
+ // such an assumption easily.
72
+ //
73
+ // Currently, the code that tracks elements with min distances is implemented
74
+ // (Max Heap). Min Heap option can be added easily.
75
+ //
76
+ // Dispatch:
77
+ // AVX2 / ARM_NEON → HeapWithBucketsCMaxFloat (simdlib256-inl.h)
78
+ // NONE (scalar) → HeapWithBucketsGenericCMaxFloat (generic.h)
79
+ //
80
+ // The SIMD definitions live in simdlib256-inl.h (only included by per-ISA
81
+ // .cpp files). Common TUs see only declarations here, so no extern template
82
+ // suppression is needed.
83
+
84
+ #pragma once
85
+
86
+ #include <cstdint>
87
+
88
+ #include <faiss/impl/approx_topk/generic.h>
89
+ #include <faiss/impl/platform_macros.h>
90
+ #include <faiss/impl/simd_dispatch.h>
91
+ #include <faiss/utils/Heap.h>
92
+ #include <faiss/utils/simd_levels.h>
93
+
94
+ // -----------------------------------------------------------------------
95
+ // ApproxTopK_mode_t
96
+ // -----------------------------------------------------------------------
97
+
98
+ /// Represents the mode of use of approximate top-k computations
99
+ /// that allows to trade accuracy vs speed. So, every options
100
+ /// besides EXACT_TOPK increases the speed.
101
+ ///
102
+ /// B represents the number of buckets.
103
+ /// D is the number of min-k elements to track within every bucket.
104
+ ///
105
+ /// Default option is EXACT_TOPK.
106
+ /// APPROX_TOPK_BUCKETS_B16_D2 is worth starting from, if you'd like
107
+ /// to experiment a bit.
108
+ ///
109
+ /// It seems that only the limited number of combinations are
110
+ /// meaningful, because of the limited supply of SIMD registers.
111
+ /// Also, certain combinations, such as B32_D1 and B16_D1, were concluded
112
+ /// to be not very precise in benchmarks, so they were not introduced.
113
+
114
+ enum ApproxTopK_mode_t : int {
115
+ EXACT_TOPK = 0,
116
+ APPROX_TOPK_BUCKETS_B32_D2 = 1,
117
+ APPROX_TOPK_BUCKETS_B8_D3 = 2,
118
+ APPROX_TOPK_BUCKETS_B16_D2 = 3,
119
+ APPROX_TOPK_BUCKETS_B8_D2 = 4,
120
+ };
121
+
122
+ // -----------------------------------------------------------------------
123
+ // HeapWithBuckets dispatch
124
+ // -----------------------------------------------------------------------
125
+
126
+ namespace faiss {
127
+
128
+ // Primary template: declared, bs_addn NOT defined here.
129
+ // The out-of-line definition lives in simdlib256-inl.h, which is only
130
+ // included by the per-ISA .cpp files (avx2.cpp, neon.cpp).
131
+ template <uint32_t NBUCKETS, uint32_t N, SIMDLevel SL>
132
+ struct HeapWithBucketsCMaxFloat {
133
+ static_assert(
134
+ (NBUCKETS) > 0 && ((NBUCKETS % 8) == 0),
135
+ "Number of buckets needs to be 8, 16, 24, ...");
136
+
137
+ static void addn(
138
+ const uint32_t n,
139
+ const float* const __restrict distances,
140
+ const uint32_t k,
141
+ float* const __restrict bh_val,
142
+ int32_t* const __restrict bh_ids) {
143
+ bs_addn(1, n, distances, k, bh_val, bh_ids);
144
+ }
145
+
146
+ // Declared but not defined — resolved at link time from avx2.cpp/neon.cpp.
147
+ static void bs_addn(
148
+ const uint32_t beam_size,
149
+ const uint32_t n_per_beam,
150
+ const float* const __restrict distances,
151
+ const uint32_t k,
152
+ float* const __restrict bh_val,
153
+ int32_t* const __restrict bh_ids);
154
+ };
155
+
156
+ // NONE specialization: delegates to the scalar generic implementation.
157
+ template <uint32_t NBUCKETS, uint32_t N>
158
+ struct HeapWithBucketsCMaxFloat<NBUCKETS, N, SIMDLevel::NONE> {
159
+ static void addn(
160
+ const uint32_t n,
161
+ const float* const __restrict distances,
162
+ const uint32_t k,
163
+ float* const __restrict bh_val,
164
+ int32_t* const __restrict bh_ids) {
165
+ bs_addn(1, n, distances, k, bh_val, bh_ids);
166
+ }
167
+
168
+ static void bs_addn(
169
+ const uint32_t beam_size,
170
+ const uint32_t n_per_beam,
171
+ const float* const __restrict distances,
172
+ const uint32_t k,
173
+ float* const __restrict bh_val,
174
+ int32_t* const __restrict bh_ids) {
175
+ HeapWithBucketsGenericCMaxFloat<NBUCKETS, N>::bs_addn(
176
+ beam_size, n_per_beam, distances, k, bh_val, bh_ids);
177
+ }
178
+ };
179
+
180
+ // Primary template — not implemented for arbitrary comparators.
181
+ template <typename C, uint32_t NBUCKETS, uint32_t N>
182
+ struct HeapWithBuckets {
183
+ static_assert(
184
+ sizeof(C) == 0,
185
+ "HeapWithBuckets: unsupported comparator type");
186
+ };
187
+
188
+ // Partial specialization for CMax<float, int> that dispatches
189
+ // via with_simd_level_256bit.
190
+ template <uint32_t NBUCKETS, uint32_t N>
191
+ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
192
+ static void addn(
193
+ const uint32_t n,
194
+ const float* const __restrict distances,
195
+ const uint32_t k,
196
+ float* const __restrict bh_val,
197
+ int32_t* const __restrict bh_ids) {
198
+ bs_addn(1, n, distances, k, bh_val, bh_ids);
199
+ }
200
+
201
+ static void bs_addn(
202
+ const uint32_t beam_size,
203
+ const uint32_t n_per_beam,
204
+ const float* const __restrict distances,
205
+ const uint32_t k,
206
+ float* const __restrict bh_val,
207
+ int32_t* const __restrict bh_ids) {
208
+ with_simd_level_256bit([&]<SIMDLevel SL>() {
209
+ HeapWithBucketsCMaxFloat<NBUCKETS, N, SL>::bs_addn(
210
+ beam_size, n_per_beam, distances, k, bh_val, bh_ids);
211
+ });
212
+ }
213
+ };
214
+
215
+ // -----------------------------------------------------------------------
216
+ // approx_topk_by_mode: consolidates the mode switch + dispatch pattern
217
+ // used by residual_quantizer_encode_steps.cpp and other callers.
218
+ // -----------------------------------------------------------------------
219
+
220
+ // SL-parameterized version for callers that have already resolved the
221
+ // SIMD level (e.g., inside a with_simd_level_256bit lambda).
222
+ template <SIMDLevel SL>
223
+ inline void approx_topk_by_mode(
224
+ ApproxTopK_mode_t mode,
225
+ uint32_t beam_size,
226
+ uint32_t n_per_beam,
227
+ const float* distances,
228
+ uint32_t k,
229
+ float* bh_val,
230
+ int32_t* bh_ids) {
231
+ using C = CMax<float, int>;
232
+ auto approx = [&]<uint32_t NB, uint32_t ND>() {
233
+ HeapWithBucketsCMaxFloat<NB, ND, SL>::bs_addn(
234
+ beam_size, n_per_beam, distances, k, bh_val, bh_ids);
235
+ };
236
+ switch (mode) {
237
+ case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B8_D3:
238
+ approx.template operator()<8, 3>();
239
+ break;
240
+ case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B8_D2:
241
+ approx.template operator()<8, 2>();
242
+ break;
243
+ case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B16_D2:
244
+ approx.template operator()<16, 2>();
245
+ break;
246
+ case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B32_D2:
247
+ approx.template operator()<32, 2>();
248
+ break;
249
+ default:
250
+ heap_addn<C>(
251
+ k,
252
+ bh_val,
253
+ bh_ids,
254
+ distances,
255
+ nullptr,
256
+ beam_size * n_per_beam);
257
+ break;
258
+ }
259
+ }
260
+
261
+ // Non-SL wrapper that dispatches via with_simd_level_256bit.
262
+ inline void approx_topk_by_mode(
263
+ ApproxTopK_mode_t mode,
264
+ uint32_t beam_size,
265
+ uint32_t n_per_beam,
266
+ const float* distances,
267
+ uint32_t k,
268
+ float* bh_val,
269
+ int32_t* bh_ids) {
270
+ with_simd_level_256bit([&]<SIMDLevel SL>() {
271
+ approx_topk_by_mode<SL>(
272
+ mode, beam_size, n_per_beam, distances, k, bh_val, bh_ids);
273
+ });
274
+ }
275
+
276
+ } // namespace faiss
@@ -0,0 +1,68 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Explicit template instantiations of HeapWithBucketsCMaxFloat and
9
+ // accum_and_*_tab for SIMDLevel::AVX2.
10
+
11
+ #ifdef COMPILE_SIMD_AVX2
12
+
13
+ #include <faiss/impl/approx_topk/rq_beam_search_tab-inl.h>
14
+ #include <faiss/impl/approx_topk/simdlib256-inl.h>
15
+ #include <faiss/impl/simdlib/simdlib_avx2.h>
16
+
17
+ namespace faiss {
18
+
19
+ template struct HeapWithBucketsCMaxFloat<8, 3, SIMDLevel::AVX2>;
20
+ template struct HeapWithBucketsCMaxFloat<8, 2, SIMDLevel::AVX2>;
21
+ template struct HeapWithBucketsCMaxFloat<16, 2, SIMDLevel::AVX2>;
22
+ template struct HeapWithBucketsCMaxFloat<16, 1, SIMDLevel::AVX2>;
23
+ template struct HeapWithBucketsCMaxFloat<32, 2, SIMDLevel::AVX2>;
24
+
25
+ #define INSTANTIATE_ACCUM_TAB(M) \
26
+ template void accum_and_store_tab<M, 4, SIMDLevel::AVX2>( \
27
+ size_t, \
28
+ const float* __restrict, \
29
+ const uint64_t* __restrict, \
30
+ const int32_t* __restrict, \
31
+ size_t, \
32
+ size_t, \
33
+ size_t, \
34
+ float* __restrict); \
35
+ template void accum_and_add_tab<M, 4, SIMDLevel::AVX2>( \
36
+ size_t, \
37
+ const float* __restrict, \
38
+ const uint64_t* __restrict, \
39
+ const int32_t* __restrict, \
40
+ size_t, \
41
+ size_t, \
42
+ size_t, \
43
+ float* __restrict); \
44
+ template void accum_and_finalize_tab<M, 4, SIMDLevel::AVX2>( \
45
+ const float* __restrict, \
46
+ const uint64_t* __restrict, \
47
+ const int32_t* __restrict, \
48
+ size_t, \
49
+ size_t, \
50
+ size_t, \
51
+ const float* __restrict, \
52
+ const float* __restrict, \
53
+ float* __restrict);
54
+
55
+ INSTANTIATE_ACCUM_TAB(1)
56
+ INSTANTIATE_ACCUM_TAB(2)
57
+ INSTANTIATE_ACCUM_TAB(3)
58
+ INSTANTIATE_ACCUM_TAB(4)
59
+ INSTANTIATE_ACCUM_TAB(5)
60
+ INSTANTIATE_ACCUM_TAB(6)
61
+ INSTANTIATE_ACCUM_TAB(7)
62
+ INSTANTIATE_ACCUM_TAB(8)
63
+
64
+ #undef INSTANTIATE_ACCUM_TAB
65
+
66
+ } // namespace faiss
67
+
68
+ #endif // COMPILE_SIMD_AVX2
@@ -16,16 +16,12 @@
16
16
 
17
17
  namespace faiss {
18
18
 
19
- // This is the implementation of the idea and it is very slow,
20
- // because a compiler is unable to vectorize it properly.
21
-
22
- template <typename C, uint32_t NBUCKETS, uint32_t N>
23
- struct HeapWithBuckets {
24
- // this case was not implemented yet.
25
- };
19
+ // Scalar (generic) implementation of HeapWithBuckets.
20
+ // This is correct but slow because a compiler is unable to
21
+ // vectorize it properly. Used as the SIMDLevel::NONE fallback.
26
22
 
27
23
  template <uint32_t NBUCKETS, uint32_t N>
28
- struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
24
+ struct HeapWithBucketsGenericCMaxFloat {
29
25
  static void addn(
30
26
  // number of elements
31
27
  const uint32_t n,
@@ -135,4 +131,15 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
135
131
  }
136
132
  };
137
133
 
134
+ // Legacy name kept for backward compatibility (used when
135
+ // approx_topk.h is not included).
136
+ template <typename C, uint32_t NBUCKETS, uint32_t N>
137
+ struct HeapWithBucketsGeneric {
138
+ // not implemented for arbitrary C
139
+ };
140
+
141
+ template <uint32_t NBUCKETS, uint32_t N>
142
+ struct HeapWithBucketsGeneric<CMax<float, int>, NBUCKETS, N>
143
+ : HeapWithBucketsGenericCMaxFloat<NBUCKETS, N> {};
144
+
138
145
  } // namespace faiss