faiss 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  85. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  86. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  87. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  88. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  89. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  90. data/vendor/faiss/faiss/MetricType.h +14 -7
  91. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  92. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  93. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  94. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  95. data/vendor/faiss/faiss/build.cpp +23 -0
  96. data/vendor/faiss/faiss/build.h +15 -0
  97. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  102. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  105. data/vendor/faiss/faiss/factory_tools.cpp +9 -0
  106. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  107. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  108. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
  109. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  113. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  114. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  115. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  116. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  117. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  120. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  130. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  136. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  139. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  140. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  141. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  142. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  143. data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
  144. data/vendor/faiss/faiss/impl/HNSW.h +61 -44
  145. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  146. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  147. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  148. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  149. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  150. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  151. data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
  152. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  153. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  154. data/vendor/faiss/faiss/impl/Panorama.h +269 -87
  155. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  156. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  157. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  158. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  159. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  160. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  161. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
  162. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  163. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  164. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  165. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
  166. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  167. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  168. data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
  169. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
  170. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
  171. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  172. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  173. data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
  174. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  175. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  176. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  177. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  178. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  182. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  183. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  184. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  185. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  191. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  192. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  193. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  194. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  196. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  197. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
  198. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  199. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  203. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  204. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  205. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  206. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  208. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  209. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  210. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  211. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
  212. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
  213. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
  214. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
  215. data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
  216. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  217. data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
  218. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  219. data/vendor/faiss/faiss/impl/io_macros.h +58 -16
  220. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  221. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  222. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  223. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
  225. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  226. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  228. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  229. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
  230. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  233. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  234. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
  235. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
  237. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
  238. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  239. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
  240. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  241. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
  244. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
  245. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  256. data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
  257. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  258. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  260. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  261. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  262. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  264. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  265. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  266. data/vendor/faiss/faiss/index_factory.cpp +90 -18
  267. data/vendor/faiss/faiss/index_io.h +40 -0
  268. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  269. data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
  270. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  271. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
  272. data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
  273. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  274. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  275. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  276. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  277. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  278. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  279. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  280. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
  285. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
  286. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  287. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
  290. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  291. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  292. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  293. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  294. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  295. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  296. data/vendor/faiss/faiss/utils/distances.h +20 -1
  297. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  298. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  299. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  300. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  301. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  302. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  304. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
  305. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  306. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  307. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  308. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  309. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  310. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  311. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
  312. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  355. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
  357. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  358. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  359. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  360. data/vendor/faiss/faiss/utils/utils.h +3 -3
  361. metadata +129 -34
  362. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  363. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  364. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  366. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  367. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  368. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  369. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  370. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  371. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  373. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  374. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  375. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  376. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  377. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  378. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -5,114 +5,52 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // This TU provides:
9
- // 1. _impl specializations for NONE (and ARM_NEON), using scalar code.
10
- // 2. Non-templated PQ code distance dispatch wrappers
11
- // (pq_code_distance_single, pq_code_distance_four) declared in
12
- // pq_code_distance.h. These use DISPATCH_SIMDLevel to route to the
13
- // best available SIMD implementation via pq_code_distance_*_impl
14
- // function template specializations defined in the per-SIMD .cpp files.
8
+ // This TU provides non-templated PQ code distance dispatch wrappers
9
+ // (pq_code_distance_8bit_single, pq_code_distance_8bit_four) declared
10
+ // in pq_code_distance-inl.h. These use with_simd_level to route to the
11
+ // best available SIMD implementation via pq_code_distance_8bit_*_impl
12
+ // function template specializations.
13
+ //
14
+ // The NONE and ARM_NEON _impl specializations are defined inline in
15
+ // pq_code_distance-generic.h (included transitively). The AVX2, AVX512,
16
+ // and ARM_SVE specializations are in their respective per-SIMD files.
15
17
 
16
- #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
18
+ #include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
19
+
20
+ #define THE_SIMD_LEVEL SIMDLevel::NONE
21
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
22
+ #include <faiss/impl/pq_code_distance/pq_scan_impl.h>
23
+ #undef THE_SIMD_LEVEL
17
24
 
18
25
  namespace faiss {
19
26
  namespace pq_code_distance {
20
27
 
21
- // NONE: use scalar directly.
22
-
23
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
24
- template <>
25
- float pq_code_distance_single_impl<SIMDLevel::NONE>(
26
- size_t M,
27
- size_t nbits,
28
- const float* sim_table,
29
- const uint8_t* code) {
30
- return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
31
- M, nbits, sim_table, code);
32
- }
33
-
34
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
35
- template <>
36
- void pq_code_distance_four_impl<SIMDLevel::NONE>(
37
- size_t M,
38
- size_t nbits,
39
- const float* sim_table,
40
- const uint8_t* __restrict code0,
41
- const uint8_t* __restrict code1,
42
- const uint8_t* __restrict code2,
43
- const uint8_t* __restrict code3,
44
- float& result0,
45
- float& result1,
46
- float& result2,
47
- float& result3) {
48
- PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
49
- M,
50
- nbits,
51
- sim_table,
52
- code0,
53
- code1,
54
- code2,
55
- code3,
56
- result0,
57
- result1,
58
- result2,
59
- result3);
60
- }
61
-
62
- #ifdef COMPILE_SIMD_ARM_NEON
63
- // ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
64
-
65
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
66
- template <>
67
- float pq_code_distance_single_impl<SIMDLevel::ARM_NEON>(
28
+ void pq_scan_8bit(
68
29
  size_t M,
69
- size_t nbits,
70
- const float* sim_table,
71
- const uint8_t* code) {
72
- return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
73
- M, nbits, sim_table, code);
74
- }
75
-
76
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
77
- template <>
78
- void pq_code_distance_four_impl<SIMDLevel::ARM_NEON>(
79
- size_t M,
80
- size_t nbits,
81
- const float* sim_table,
82
- const uint8_t* __restrict code0,
83
- const uint8_t* __restrict code1,
84
- const uint8_t* __restrict code2,
85
- const uint8_t* __restrict code3,
86
- float& result0,
87
- float& result1,
88
- float& result2,
89
- float& result3) {
90
- PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
91
- M,
92
- nbits,
93
- sim_table,
94
- code0,
95
- code1,
96
- code2,
97
- code3,
98
- result0,
99
- result1,
100
- result2,
101
- result3);
30
+ const float* dis_table,
31
+ const uint8_t* codes,
32
+ size_t ncodes,
33
+ size_t k,
34
+ float* heap_dis,
35
+ int64_t* heap_ids,
36
+ bool max_heap) {
37
+ with_simd_level([&]<SIMDLevel SL>() {
38
+ pq_scan_8bit_impl<SL>(
39
+ M, dis_table, codes, ncodes, k, heap_dis, heap_ids, max_heap);
40
+ });
102
41
  }
103
- #endif // COMPILE_SIMD_ARM_NEON
104
42
 
105
- float pq_code_distance_single(
43
+ float pq_code_distance_8bit_single(
106
44
  size_t M,
107
- size_t nbits,
108
45
  const float* sim_table,
109
46
  const uint8_t* code) {
110
- DISPATCH_SIMDLevel(pq_code_distance_single_impl, M, nbits, sim_table, code);
47
+ return with_simd_level([&]<SIMDLevel SL>() {
48
+ return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
49
+ });
111
50
  }
112
51
 
113
- void pq_code_distance_four(
52
+ void pq_code_distance_8bit_four(
114
53
  size_t M,
115
- size_t nbits,
116
54
  const float* sim_table,
117
55
  const uint8_t* __restrict code0,
118
56
  const uint8_t* __restrict code1,
@@ -122,19 +60,19 @@ void pq_code_distance_four(
122
60
  float& result1,
123
61
  float& result2,
124
62
  float& result3) {
125
- DISPATCH_SIMDLevel(
126
- pq_code_distance_four_impl,
127
- M,
128
- nbits,
129
- sim_table,
130
- code0,
131
- code1,
132
- code2,
133
- code3,
134
- result0,
135
- result1,
136
- result2,
137
- result3);
63
+ with_simd_level([&]<SIMDLevel SL>() {
64
+ pq_code_distance_8bit_four_impl<SL>(
65
+ M,
66
+ sim_table,
67
+ code0,
68
+ code1,
69
+ code2,
70
+ code3,
71
+ result0,
72
+ result1,
73
+ result2,
74
+ result3);
75
+ });
138
76
  }
139
77
 
140
78
  } // namespace pq_code_distance
@@ -0,0 +1,96 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
11
+
12
+ namespace faiss {
13
+ namespace pq_code_distance {
14
+
15
+ // NONE: use scalar directly.
16
+
17
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
18
+ template <>
19
+ inline float pq_code_distance_8bit_single_impl<SIMDLevel::NONE>(
20
+ size_t M,
21
+ const float* sim_table,
22
+ const uint8_t* code) {
23
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
24
+ M, 8, sim_table, code);
25
+ }
26
+
27
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
28
+ template <>
29
+ inline void pq_code_distance_8bit_four_impl<SIMDLevel::NONE>(
30
+ size_t M,
31
+ const float* sim_table,
32
+ const uint8_t* __restrict code0,
33
+ const uint8_t* __restrict code1,
34
+ const uint8_t* __restrict code2,
35
+ const uint8_t* __restrict code3,
36
+ float& result0,
37
+ float& result1,
38
+ float& result2,
39
+ float& result3) {
40
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
41
+ M,
42
+ 8,
43
+ sim_table,
44
+ code0,
45
+ code1,
46
+ code2,
47
+ code3,
48
+ result0,
49
+ result1,
50
+ result2,
51
+ result3);
52
+ }
53
+
54
+ #ifdef COMPILE_SIMD_ARM_NEON
55
+ // ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
56
+
57
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
58
+ template <>
59
+ inline float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_NEON>(
60
+ size_t M,
61
+ const float* sim_table,
62
+ const uint8_t* code) {
63
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
64
+ M, 8, sim_table, code);
65
+ }
66
+
67
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
68
+ template <>
69
+ inline void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_NEON>(
70
+ size_t M,
71
+ const float* sim_table,
72
+ const uint8_t* __restrict code0,
73
+ const uint8_t* __restrict code1,
74
+ const uint8_t* __restrict code2,
75
+ const uint8_t* __restrict code3,
76
+ float& result0,
77
+ float& result1,
78
+ float& result2,
79
+ float& result3) {
80
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
81
+ M,
82
+ 8,
83
+ sim_table,
84
+ code0,
85
+ code1,
86
+ code2,
87
+ code3,
88
+ result0,
89
+ result1,
90
+ result2,
91
+ result3);
92
+ }
93
+ #endif // COMPILE_SIMD_ARM_NEON
94
+
95
+ } // namespace pq_code_distance
96
+ } // namespace faiss
@@ -9,15 +9,284 @@
9
9
 
10
10
  /**
11
11
  * @file pq_code_distance-inl.h
12
- * @brief Private header for PQ code distance SIMD implementations.
12
+ * @brief PQ code distance SIMD-dispatched implementations.
13
13
  *
14
14
  * This is a PRIVATE header — do not include in public APIs or user code.
15
15
  * Only faiss internal .cpp files (the per-SIMD implementation files and
16
16
  * pq_code_distance-generic.cpp) should include this header.
17
- *
18
- * This header re-exports the public API (pq_code_distance.h) plus the
19
- * simd_dispatch.h machinery needed by the implementation files.
20
17
  */
21
18
 
19
+ #include <cstddef>
20
+ #include <cstdint>
21
+ #include <type_traits>
22
+
23
+ #include <faiss/impl/ProductQuantizer.h>
24
+ #include <faiss/impl/platform_macros.h>
22
25
  #include <faiss/impl/simd_dispatch.h>
23
- #include <faiss/utils/pq_code_distance.h>
26
+
27
+ namespace faiss {
28
+ namespace pq_code_distance {
29
+
30
+ /*********************************************************************
31
+ * PQCodeDistance — SIMD-dispatched PQ code distance
32
+ *
33
+ * Computes the distance from a PQ-encoded vector to a query vector,
34
+ * given a precomputed table of sub-distances (one per subquantizer
35
+ * per centroid). Originally extracted from IndexIVFPQ.cpp.
36
+ *
37
+ * DESIGN:
38
+ *
39
+ * PQCodeDistance<PQDecoderT, SL> computes PQ code distances at a given
40
+ * SIMD level. The dispatch site (IndexIVFPQ.cpp, IndexPQ.cpp) uses
41
+ * with_simd_level to select SL at runtime, which instantiates
42
+ * PQCodeDistance for ALL decoder types (PQDecoder8, PQDecoder16,
43
+ * PQDecoderGeneric) at the chosen level.
44
+ *
45
+ * Only PQDecoder8 has SIMD-optimized implementations (AVX2, AVX512,
46
+ * ARM_SVE). The other decoders always use scalar code — their decode()
47
+ * method is inherently sequential, so SIMD doesn't help.
48
+ *
49
+ * The primary template is always complete (no forward declarations
50
+ * needed). For PQDecoder8, it delegates to _impl dispatch bridge
51
+ * functions whose specializations are defined in per-SIMD .cpp files
52
+ * and resolved at link time. For other decoders, it uses scalar.
53
+ *
54
+ * ADDING A NEW SIMD LEVEL:
55
+ *
56
+ * 1. Add the level to SIMDLevel enum (simd_levels.h)
57
+ * 2. Add dispatch_config entry (simd_dispatch.bzl)
58
+ * 3. Define pq_code_distance_8bit_single_impl<NEW_LEVEL> and
59
+ * pq_code_distance_8bit_four_impl<NEW_LEVEL> specializations in a
60
+ * new .cpp file compiled with appropriate SIMD flags
61
+ * 4. Add the .cpp to the build (CMakeLists.txt, xplat.bzl)
62
+ *********************************************************************/
63
+
64
+ /// Scalar PQ code distance implementation.
65
+ /// Templated only on decoder type, independent of SIMD level.
66
+ /// Used directly by non-PQDecoder8 decoders (PQDecoder16,
67
+ /// PQDecoderGeneric) and as fallback for PQDecoder8 at NONE/NEON.
68
+ template <typename PQDecoderT>
69
+ struct PQCodeDistanceScalar {
70
+ using PQDecoder = PQDecoderT;
71
+
72
+ static float distance_single_code(
73
+ // number of subquantizers
74
+ size_t M,
75
+ size_t nbits,
76
+ // precomputed distances, layout (M, ksub)
77
+ const float* sim_table,
78
+ const uint8_t* code) {
79
+ PQDecoderT decoder(code, nbits);
80
+ const size_t ksub = 1 << nbits;
81
+
82
+ const float* tab = sim_table;
83
+ float result = 0;
84
+
85
+ for (size_t m = 0; m < M; m++) {
86
+ result += tab[decoder.decode()];
87
+ tab += ksub;
88
+ }
89
+
90
+ return result;
91
+ }
92
+
93
+ static void distance_four_codes(
94
+ size_t M,
95
+ size_t nbits,
96
+ const float* sim_table,
97
+ const uint8_t* __restrict code0,
98
+ const uint8_t* __restrict code1,
99
+ const uint8_t* __restrict code2,
100
+ const uint8_t* __restrict code3,
101
+ float& result0,
102
+ float& result1,
103
+ float& result2,
104
+ float& result3) {
105
+ PQDecoderT decoder0(code0, nbits);
106
+ PQDecoderT decoder1(code1, nbits);
107
+ PQDecoderT decoder2(code2, nbits);
108
+ PQDecoderT decoder3(code3, nbits);
109
+ const size_t ksub = 1 << nbits;
110
+
111
+ const float* tab = sim_table;
112
+ result0 = 0;
113
+ result1 = 0;
114
+ result2 = 0;
115
+ result3 = 0;
116
+
117
+ for (size_t m = 0; m < M; m++) {
118
+ result0 += tab[decoder0.decode()];
119
+ result1 += tab[decoder1.decode()];
120
+ result2 += tab[decoder2.decode()];
121
+ result3 += tab[decoder3.decode()];
122
+ tab += ksub;
123
+ }
124
+ }
125
+ };
126
+
127
+ /*********************************************************************
128
+ * Dispatch bridge — function templates for PQDecoder8 SIMD dispatch.
129
+ *
130
+ * Primary declarations only; specializations are defined in per-SIMD
131
+ * .cpp files (AVX2, AVX512, ARM_SVE) and pq_code_distance-generic.cpp
132
+ * (NONE, ARM_NEON). Same pattern as fvec_L2sqr et al. in distances.h.
133
+ *********************************************************************/
134
+
135
+ template <SIMDLevel SL>
136
+ float pq_code_distance_8bit_single_impl(
137
+ size_t M,
138
+ const float* sim_table,
139
+ const uint8_t* code);
140
+
141
+ template <SIMDLevel SL>
142
+ void pq_code_distance_8bit_four_impl(
143
+ size_t M,
144
+ const float* sim_table,
145
+ const uint8_t* __restrict code0,
146
+ const uint8_t* __restrict code1,
147
+ const uint8_t* __restrict code2,
148
+ const uint8_t* __restrict code3,
149
+ float& result0,
150
+ float& result1,
151
+ float& result2,
152
+ float& result3);
153
+
154
+ /// Primary template — always complete.
155
+ /// For PQDecoder8, delegates to _impl dispatch bridges (resolved at
156
+ /// link time to per-SIMD implementations). For other decoders, uses
157
+ /// scalar — their sequential decode() methods don't benefit from SIMD.
158
+ template <typename PQDecoderT, SIMDLevel SL>
159
+ struct PQCodeDistance {
160
+ using PQDecoder = PQDecoderT;
161
+ static constexpr SIMDLevel simd_level = SL;
162
+
163
+ static float distance_single_code(
164
+ size_t M,
165
+ size_t nbits,
166
+ const float* sim_table,
167
+ const uint8_t* code) {
168
+ if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
169
+ return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
170
+ } else {
171
+ return PQCodeDistanceScalar<PQDecoderT>::distance_single_code(
172
+ M, nbits, sim_table, code);
173
+ }
174
+ }
175
+
176
+ static void distance_four_codes(
177
+ size_t M,
178
+ size_t nbits,
179
+ const float* sim_table,
180
+ const uint8_t* __restrict code0,
181
+ const uint8_t* __restrict code1,
182
+ const uint8_t* __restrict code2,
183
+ const uint8_t* __restrict code3,
184
+ float& result0,
185
+ float& result1,
186
+ float& result2,
187
+ float& result3) {
188
+ if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
189
+ pq_code_distance_8bit_four_impl<SL>(
190
+ M,
191
+ sim_table,
192
+ code0,
193
+ code1,
194
+ code2,
195
+ code3,
196
+ result0,
197
+ result1,
198
+ result2,
199
+ result3);
200
+ } else {
201
+ PQCodeDistanceScalar<PQDecoderT>::distance_four_codes(
202
+ M,
203
+ nbits,
204
+ sim_table,
205
+ code0,
206
+ code1,
207
+ code2,
208
+ code3,
209
+ result0,
210
+ result1,
211
+ result2,
212
+ result3);
213
+ }
214
+ }
215
+ };
216
+
217
+ /*********************************************************************
218
+ * Non-templated PQ code distance dispatch (PQDecoder8 only).
219
+ *
220
+ * These follow the same pattern as distances.h: the caller does not
221
+ * name a SIMDLevel. Internally they dispatch via with_simd_level
222
+ * to the best available SIMD implementation (DD: runtime detection,
223
+ * static: compile-time selection). Definitions are in
224
+ * pq_code_distance-generic.cpp.
225
+ *********************************************************************/
226
+
227
+ /// Compute PQ distance for a single code, dispatching to the best
228
+ /// available SIMD level.
229
+ FAISS_API float pq_code_distance_8bit_single(
230
+ size_t M,
231
+ const float* sim_table,
232
+ const uint8_t* code);
233
+
234
+ /// Compute PQ distances for four codes simultaneously, dispatching
235
+ /// to the best available SIMD level.
236
+ FAISS_API void pq_code_distance_8bit_four(
237
+ size_t M,
238
+ const float* sim_table,
239
+ const uint8_t* __restrict code0,
240
+ const uint8_t* __restrict code1,
241
+ const uint8_t* __restrict code2,
242
+ const uint8_t* __restrict code3,
243
+ float& result0,
244
+ float& result1,
245
+ float& result2,
246
+ float& result3);
247
+
248
+ /*********************************************************************
249
+ * Standalone PQ scan — SIMD-dispatched full-index scan.
250
+ *
251
+ * Scans all ncodes PQ codes against a precomputed distance table,
252
+ * maintaining a k-nearest-neighbor heap. Uses the SIMD PQ distance
253
+ * kernels (AVX2 gathers, etc.) for the inner loop, with the SIMD
254
+ * gathers inlined into the scan loop in each per-SIMD TU.
255
+ *
256
+ * Definitions are in pq_scan_impl.h (per-SIMD TUs) and
257
+ * pq_code_distance-generic.cpp (dispatch wrapper).
258
+ *********************************************************************/
259
+
260
+ template <SIMDLevel SL>
261
+ void pq_scan_8bit_impl(
262
+ size_t M,
263
+ const float* dis_table,
264
+ const uint8_t* codes,
265
+ size_t ncodes,
266
+ size_t k,
267
+ float* heap_dis,
268
+ int64_t* heap_ids,
269
+ bool max_heap);
270
+
271
+ /// Scan all ncodes 8-bit PQ codes, dispatching to the best SIMD level.
272
+ /// max_heap=true for L2 (CMax), false for IP (CMin).
273
+ FAISS_API void pq_scan_8bit(
274
+ size_t M,
275
+ const float* dis_table,
276
+ const uint8_t* codes,
277
+ size_t ncodes,
278
+ size_t k,
279
+ float* heap_dis,
280
+ int64_t* heap_ids,
281
+ bool max_heap);
282
+
283
+ } // namespace pq_code_distance
284
+
285
+ // Re-export public API into namespace faiss for convenience
286
+ using pq_code_distance::pq_code_distance_8bit_four;
287
+ using pq_code_distance::pq_code_distance_8bit_single;
288
+ using pq_code_distance::pq_scan_8bit;
289
+ using pq_code_distance::PQCodeDistance;
290
+ using pq_code_distance::PQCodeDistanceScalar;
291
+
292
+ } // namespace faiss
@@ -83,16 +83,15 @@ namespace pq_code_distance {
83
83
 
84
84
  // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
85
85
  template <>
86
- float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
86
+ float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_SVE>(
87
87
  size_t M,
88
- size_t nbits,
89
88
  const float* sim_table,
90
89
  const uint8_t* code) {
91
90
  if (M <= svcntw())
92
91
  return distance_single_code_sve_for_small_m(M, sim_table, code);
93
92
 
94
93
  const float* tab = sim_table;
95
- const size_t ksub = 1 << nbits;
94
+ constexpr size_t ksub = 1 << 8;
96
95
 
97
96
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
98
97
  auto partialSum = svdup_n_f32(0.f);
@@ -159,12 +158,11 @@ float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
159
158
  return svaddv_f32(svptrue_b32(), partialSum);
160
159
  }
161
160
 
162
- // Combines 4 operations of pq_code_distance_single_impl().
161
+ // Combines 4 operations of pq_code_distance_8bit_single_impl().
163
162
  // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
164
163
  template <>
165
- void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
164
+ void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_SVE>(
166
165
  size_t M,
167
- size_t nbits,
168
166
  const float* sim_table,
169
167
  const uint8_t* __restrict code0,
170
168
  const uint8_t* __restrict code1,
@@ -190,7 +188,7 @@ void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
190
188
  }
191
189
 
192
190
  const float* tab = sim_table;
193
- const size_t ksub = 1 << nbits;
191
+ constexpr size_t ksub = 1 << 8;
194
192
 
195
193
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
196
194
 
@@ -349,4 +347,9 @@ void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
349
347
  } // namespace pq_code_distance
350
348
  } // namespace faiss
351
349
 
350
+ #define THE_SIMD_LEVEL SIMDLevel::ARM_SVE
351
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
352
+ #include <faiss/impl/pq_code_distance/pq_scan_impl.h>
353
+ #undef THE_SIMD_LEVEL
354
+
352
355
  #endif // COMPILE_SIMD_ARM_SVE