faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -5,114 +5,32 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // This TU provides:
9
- // 1. _impl specializations for NONE (and ARM_NEON), using scalar code.
10
- // 2. Non-templated PQ code distance dispatch wrappers
11
- // (pq_code_distance_single, pq_code_distance_four) declared in
12
- // pq_code_distance.h. These use DISPATCH_SIMDLevel to route to the
13
- // best available SIMD implementation via pq_code_distance_*_impl
14
- // function template specializations defined in the per-SIMD .cpp files.
15
-
16
- #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
8
+ // This TU provides non-templated PQ code distance dispatch wrappers
9
+ // (pq_code_distance_8bit_single, pq_code_distance_8bit_four) declared
10
+ // in pq_code_distance-inl.h. These use with_simd_level to route to the
11
+ // best available SIMD implementation via pq_code_distance_8bit_*_impl
12
+ // function template specializations.
13
+ //
14
+ // The NONE and ARM_NEON _impl specializations are defined inline in
15
+ // pq_code_distance-generic.h (included transitively). The AVX2, AVX512,
16
+ // and ARM_SVE specializations are in their respective per-SIMD files.
17
+
18
+ #include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
17
19
 
18
20
  namespace faiss {
19
21
  namespace pq_code_distance {
20
22
 
21
- // NONE: use scalar directly.
22
-
23
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
24
- template <>
25
- float pq_code_distance_single_impl<SIMDLevel::NONE>(
26
- size_t M,
27
- size_t nbits,
28
- const float* sim_table,
29
- const uint8_t* code) {
30
- return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
31
- M, nbits, sim_table, code);
32
- }
33
-
34
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
35
- template <>
36
- void pq_code_distance_four_impl<SIMDLevel::NONE>(
37
- size_t M,
38
- size_t nbits,
39
- const float* sim_table,
40
- const uint8_t* __restrict code0,
41
- const uint8_t* __restrict code1,
42
- const uint8_t* __restrict code2,
43
- const uint8_t* __restrict code3,
44
- float& result0,
45
- float& result1,
46
- float& result2,
47
- float& result3) {
48
- PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
49
- M,
50
- nbits,
51
- sim_table,
52
- code0,
53
- code1,
54
- code2,
55
- code3,
56
- result0,
57
- result1,
58
- result2,
59
- result3);
60
- }
61
-
62
- #ifdef COMPILE_SIMD_ARM_NEON
63
- // ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
64
-
65
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
66
- template <>
67
- float pq_code_distance_single_impl<SIMDLevel::ARM_NEON>(
68
- size_t M,
69
- size_t nbits,
70
- const float* sim_table,
71
- const uint8_t* code) {
72
- return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
73
- M, nbits, sim_table, code);
74
- }
75
-
76
- // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
77
- template <>
78
- void pq_code_distance_four_impl<SIMDLevel::ARM_NEON>(
79
- size_t M,
80
- size_t nbits,
81
- const float* sim_table,
82
- const uint8_t* __restrict code0,
83
- const uint8_t* __restrict code1,
84
- const uint8_t* __restrict code2,
85
- const uint8_t* __restrict code3,
86
- float& result0,
87
- float& result1,
88
- float& result2,
89
- float& result3) {
90
- PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
91
- M,
92
- nbits,
93
- sim_table,
94
- code0,
95
- code1,
96
- code2,
97
- code3,
98
- result0,
99
- result1,
100
- result2,
101
- result3);
102
- }
103
- #endif // COMPILE_SIMD_ARM_NEON
104
-
105
- float pq_code_distance_single(
23
+ float pq_code_distance_8bit_single(
106
24
  size_t M,
107
- size_t nbits,
108
25
  const float* sim_table,
109
26
  const uint8_t* code) {
110
- DISPATCH_SIMDLevel(pq_code_distance_single_impl, M, nbits, sim_table, code);
27
+ return with_simd_level([&]<SIMDLevel SL>() {
28
+ return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
29
+ });
111
30
  }
112
31
 
113
- void pq_code_distance_four(
32
+ void pq_code_distance_8bit_four(
114
33
  size_t M,
115
- size_t nbits,
116
34
  const float* sim_table,
117
35
  const uint8_t* __restrict code0,
118
36
  const uint8_t* __restrict code1,
@@ -122,19 +40,19 @@ void pq_code_distance_four(
122
40
  float& result1,
123
41
  float& result2,
124
42
  float& result3) {
125
- DISPATCH_SIMDLevel(
126
- pq_code_distance_four_impl,
127
- M,
128
- nbits,
129
- sim_table,
130
- code0,
131
- code1,
132
- code2,
133
- code3,
134
- result0,
135
- result1,
136
- result2,
137
- result3);
43
+ with_simd_level([&]<SIMDLevel SL>() {
44
+ pq_code_distance_8bit_four_impl<SL>(
45
+ M,
46
+ sim_table,
47
+ code0,
48
+ code1,
49
+ code2,
50
+ code3,
51
+ result0,
52
+ result1,
53
+ result2,
54
+ result3);
55
+ });
138
56
  }
139
57
 
140
58
  } // namespace pq_code_distance
@@ -0,0 +1,96 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
11
+
12
+ namespace faiss {
13
+ namespace pq_code_distance {
14
+
15
+ // NONE: use scalar directly.
16
+
17
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
18
+ template <>
19
+ inline float pq_code_distance_8bit_single_impl<SIMDLevel::NONE>(
20
+ size_t M,
21
+ const float* sim_table,
22
+ const uint8_t* code) {
23
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
24
+ M, 8, sim_table, code);
25
+ }
26
+
27
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
28
+ template <>
29
+ inline void pq_code_distance_8bit_four_impl<SIMDLevel::NONE>(
30
+ size_t M,
31
+ const float* sim_table,
32
+ const uint8_t* __restrict code0,
33
+ const uint8_t* __restrict code1,
34
+ const uint8_t* __restrict code2,
35
+ const uint8_t* __restrict code3,
36
+ float& result0,
37
+ float& result1,
38
+ float& result2,
39
+ float& result3) {
40
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
41
+ M,
42
+ 8,
43
+ sim_table,
44
+ code0,
45
+ code1,
46
+ code2,
47
+ code3,
48
+ result0,
49
+ result1,
50
+ result2,
51
+ result3);
52
+ }
53
+
54
+ #ifdef COMPILE_SIMD_ARM_NEON
55
+ // ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
56
+
57
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
58
+ template <>
59
+ inline float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_NEON>(
60
+ size_t M,
61
+ const float* sim_table,
62
+ const uint8_t* code) {
63
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
64
+ M, 8, sim_table, code);
65
+ }
66
+
67
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
68
+ template <>
69
+ inline void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_NEON>(
70
+ size_t M,
71
+ const float* sim_table,
72
+ const uint8_t* __restrict code0,
73
+ const uint8_t* __restrict code1,
74
+ const uint8_t* __restrict code2,
75
+ const uint8_t* __restrict code3,
76
+ float& result0,
77
+ float& result1,
78
+ float& result2,
79
+ float& result3) {
80
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
81
+ M,
82
+ 8,
83
+ sim_table,
84
+ code0,
85
+ code1,
86
+ code2,
87
+ code3,
88
+ result0,
89
+ result1,
90
+ result2,
91
+ result3);
92
+ }
93
+ #endif // COMPILE_SIMD_ARM_NEON
94
+
95
+ } // namespace pq_code_distance
96
+ } // namespace faiss
@@ -9,15 +9,248 @@
9
9
 
10
10
  /**
11
11
  * @file pq_code_distance-inl.h
12
- * @brief Private header for PQ code distance SIMD implementations.
12
+ * @brief PQ code distance SIMD-dispatched implementations.
13
13
  *
14
14
  * This is a PRIVATE header — do not include in public APIs or user code.
15
15
  * Only faiss internal .cpp files (the per-SIMD implementation files and
16
16
  * pq_code_distance-generic.cpp) should include this header.
17
- *
18
- * This header re-exports the public API (pq_code_distance.h) plus the
19
- * simd_dispatch.h machinery needed by the implementation files.
20
17
  */
21
18
 
19
+ #include <cstddef>
20
+ #include <cstdint>
21
+ #include <type_traits>
22
+
23
+ #include <faiss/impl/ProductQuantizer.h>
24
+ #include <faiss/impl/platform_macros.h>
22
25
  #include <faiss/impl/simd_dispatch.h>
23
- #include <faiss/utils/pq_code_distance.h>
26
+
27
+ namespace faiss {
28
+ namespace pq_code_distance {
29
+
30
+ /*********************************************************************
31
+ * PQCodeDistance — SIMD-dispatched PQ code distance
32
+ *
33
+ * Computes the distance from a PQ-encoded vector to a query vector,
34
+ * given a precomputed table of sub-distances (one per subquantizer
35
+ * per centroid). Originally extracted from IndexIVFPQ.cpp.
36
+ *
37
+ * DESIGN:
38
+ *
39
+ * PQCodeDistance<PQDecoderT, SL> computes PQ code distances at a given
40
+ * SIMD level. The dispatch site (IndexIVFPQ.cpp, IndexPQ.cpp) uses
41
+ * with_simd_level to select SL at runtime, which instantiates
42
+ * PQCodeDistance for ALL decoder types (PQDecoder8, PQDecoder16,
43
+ * PQDecoderGeneric) at the chosen level.
44
+ *
45
+ * Only PQDecoder8 has SIMD-optimized implementations (AVX2, AVX512,
46
+ * ARM_SVE). The other decoders always use scalar code — their decode()
47
+ * method is inherently sequential, so SIMD doesn't help.
48
+ *
49
+ * The primary template is always complete (no forward declarations
50
+ * needed). For PQDecoder8, it delegates to _impl dispatch bridge
51
+ * functions whose specializations are defined in per-SIMD .cpp files
52
+ * and resolved at link time. For other decoders, it uses scalar.
53
+ *
54
+ * ADDING A NEW SIMD LEVEL:
55
+ *
56
+ * 1. Add the level to SIMDLevel enum (simd_levels.h)
57
+ * 2. Add dispatch_config entry (simd_dispatch.bzl)
58
+ * 3. Define pq_code_distance_8bit_single_impl<NEW_LEVEL> and
59
+ * pq_code_distance_8bit_four_impl<NEW_LEVEL> specializations in a
60
+ * new .cpp file compiled with appropriate SIMD flags
61
+ * 4. Add the .cpp to the build (CMakeLists.txt, xplat.bzl)
62
+ *********************************************************************/
63
+
64
+ /// Scalar PQ code distance implementation.
65
+ /// Templated only on decoder type, independent of SIMD level.
66
+ /// Used directly by non-PQDecoder8 decoders (PQDecoder16,
67
+ /// PQDecoderGeneric) and as fallback for PQDecoder8 at NONE/NEON.
68
+ template <typename PQDecoderT>
69
+ struct PQCodeDistanceScalar {
70
+ using PQDecoder = PQDecoderT;
71
+
72
+ static float distance_single_code(
73
+ // number of subquantizers
74
+ size_t M,
75
+ size_t nbits,
76
+ // precomputed distances, layout (M, ksub)
77
+ const float* sim_table,
78
+ const uint8_t* code) {
79
+ PQDecoderT decoder(code, nbits);
80
+ const size_t ksub = 1 << nbits;
81
+
82
+ const float* tab = sim_table;
83
+ float result = 0;
84
+
85
+ for (size_t m = 0; m < M; m++) {
86
+ result += tab[decoder.decode()];
87
+ tab += ksub;
88
+ }
89
+
90
+ return result;
91
+ }
92
+
93
+ static void distance_four_codes(
94
+ size_t M,
95
+ size_t nbits,
96
+ const float* sim_table,
97
+ const uint8_t* __restrict code0,
98
+ const uint8_t* __restrict code1,
99
+ const uint8_t* __restrict code2,
100
+ const uint8_t* __restrict code3,
101
+ float& result0,
102
+ float& result1,
103
+ float& result2,
104
+ float& result3) {
105
+ PQDecoderT decoder0(code0, nbits);
106
+ PQDecoderT decoder1(code1, nbits);
107
+ PQDecoderT decoder2(code2, nbits);
108
+ PQDecoderT decoder3(code3, nbits);
109
+ const size_t ksub = 1 << nbits;
110
+
111
+ const float* tab = sim_table;
112
+ result0 = 0;
113
+ result1 = 0;
114
+ result2 = 0;
115
+ result3 = 0;
116
+
117
+ for (size_t m = 0; m < M; m++) {
118
+ result0 += tab[decoder0.decode()];
119
+ result1 += tab[decoder1.decode()];
120
+ result2 += tab[decoder2.decode()];
121
+ result3 += tab[decoder3.decode()];
122
+ tab += ksub;
123
+ }
124
+ }
125
+ };
126
+
127
+ /*********************************************************************
128
+ * Dispatch bridge — function templates for PQDecoder8 SIMD dispatch.
129
+ *
130
+ * Primary declarations only; specializations are defined in per-SIMD
131
+ * .cpp files (AVX2, AVX512, ARM_SVE) and pq_code_distance-generic.cpp
132
+ * (NONE, ARM_NEON). Same pattern as fvec_L2sqr et al. in distances.h.
133
+ *********************************************************************/
134
+
135
+ template <SIMDLevel SL>
136
+ float pq_code_distance_8bit_single_impl(
137
+ size_t M,
138
+ const float* sim_table,
139
+ const uint8_t* code);
140
+
141
+ template <SIMDLevel SL>
142
+ void pq_code_distance_8bit_four_impl(
143
+ size_t M,
144
+ const float* sim_table,
145
+ const uint8_t* __restrict code0,
146
+ const uint8_t* __restrict code1,
147
+ const uint8_t* __restrict code2,
148
+ const uint8_t* __restrict code3,
149
+ float& result0,
150
+ float& result1,
151
+ float& result2,
152
+ float& result3);
153
+
154
+ /// Primary template — always complete.
155
+ /// For PQDecoder8, delegates to _impl dispatch bridges (resolved at
156
+ /// link time to per-SIMD implementations). For other decoders, uses
157
+ /// scalar — their sequential decode() methods don't benefit from SIMD.
158
+ template <typename PQDecoderT, SIMDLevel SL>
159
+ struct PQCodeDistance {
160
+ using PQDecoder = PQDecoderT;
161
+ static constexpr SIMDLevel simd_level = SL;
162
+
163
+ static float distance_single_code(
164
+ size_t M,
165
+ size_t nbits,
166
+ const float* sim_table,
167
+ const uint8_t* code) {
168
+ if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
169
+ return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
170
+ } else {
171
+ return PQCodeDistanceScalar<PQDecoderT>::distance_single_code(
172
+ M, nbits, sim_table, code);
173
+ }
174
+ }
175
+
176
+ static void distance_four_codes(
177
+ size_t M,
178
+ size_t nbits,
179
+ const float* sim_table,
180
+ const uint8_t* __restrict code0,
181
+ const uint8_t* __restrict code1,
182
+ const uint8_t* __restrict code2,
183
+ const uint8_t* __restrict code3,
184
+ float& result0,
185
+ float& result1,
186
+ float& result2,
187
+ float& result3) {
188
+ if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
189
+ pq_code_distance_8bit_four_impl<SL>(
190
+ M,
191
+ sim_table,
192
+ code0,
193
+ code1,
194
+ code2,
195
+ code3,
196
+ result0,
197
+ result1,
198
+ result2,
199
+ result3);
200
+ } else {
201
+ PQCodeDistanceScalar<PQDecoderT>::distance_four_codes(
202
+ M,
203
+ nbits,
204
+ sim_table,
205
+ code0,
206
+ code1,
207
+ code2,
208
+ code3,
209
+ result0,
210
+ result1,
211
+ result2,
212
+ result3);
213
+ }
214
+ }
215
+ };
216
+
217
+ /*********************************************************************
218
+ * Non-templated PQ code distance dispatch (PQDecoder8 only).
219
+ *
220
+ * These follow the same pattern as distances.h: the caller does not
221
+ * name a SIMDLevel. Internally they dispatch via with_simd_level
222
+ * to the best available SIMD implementation (DD: runtime detection,
223
+ * static: compile-time selection). Definitions are in
224
+ * pq_code_distance-generic.cpp.
225
+ *********************************************************************/
226
+
227
+ /// Compute PQ distance for a single code, dispatching to the best
228
+ /// available SIMD level.
229
+ FAISS_API float pq_code_distance_8bit_single(
230
+ size_t M,
231
+ const float* sim_table,
232
+ const uint8_t* code);
233
+
234
+ /// Compute PQ distances for four codes simultaneously, dispatching
235
+ /// to the best available SIMD level.
236
+ FAISS_API void pq_code_distance_8bit_four(
237
+ size_t M,
238
+ const float* sim_table,
239
+ const uint8_t* __restrict code0,
240
+ const uint8_t* __restrict code1,
241
+ const uint8_t* __restrict code2,
242
+ const uint8_t* __restrict code3,
243
+ float& result0,
244
+ float& result1,
245
+ float& result2,
246
+ float& result3);
247
+
248
+ } // namespace pq_code_distance
249
+
250
+ // Re-export public API into namespace faiss for convenience
251
+ using pq_code_distance::pq_code_distance_8bit_four;
252
+ using pq_code_distance::pq_code_distance_8bit_single;
253
+ using pq_code_distance::PQCodeDistance;
254
+ using pq_code_distance::PQCodeDistanceScalar;
255
+
256
+ } // namespace faiss
@@ -83,16 +83,15 @@ namespace pq_code_distance {
83
83
 
84
84
  // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
85
85
  template <>
86
- float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
86
+ float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_SVE>(
87
87
  size_t M,
88
- size_t nbits,
89
88
  const float* sim_table,
90
89
  const uint8_t* code) {
91
90
  if (M <= svcntw())
92
91
  return distance_single_code_sve_for_small_m(M, sim_table, code);
93
92
 
94
93
  const float* tab = sim_table;
95
- const size_t ksub = 1 << nbits;
94
+ constexpr size_t ksub = 1 << 8;
96
95
 
97
96
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
98
97
  auto partialSum = svdup_n_f32(0.f);
@@ -159,12 +158,11 @@ float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
159
158
  return svaddv_f32(svptrue_b32(), partialSum);
160
159
  }
161
160
 
162
- // Combines 4 operations of pq_code_distance_single_impl().
161
+ // Combines 4 operations of pq_code_distance_8bit_single_impl().
163
162
  // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
164
163
  template <>
165
- void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
164
+ void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_SVE>(
166
165
  size_t M,
167
- size_t nbits,
168
166
  const float* sim_table,
169
167
  const uint8_t* __restrict code0,
170
168
  const uint8_t* __restrict code1,
@@ -190,7 +188,7 @@ void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
190
188
  }
191
189
 
192
190
  const float* tab = sim_table;
193
- const size_t ksub = 1 << nbits;
191
+ constexpr size_t ksub = 1 << 8;
194
192
 
195
193
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
196
194
 
@@ -0,0 +1,68 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_RISCV_RVV
9
+
10
+ #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
11
+
12
+ namespace faiss {
13
+ namespace pq_code_distance {
14
+
15
+ // RISCV_RVV: no RVV-optimized PQ code distance exists yet. Use scalar.
16
+
17
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
18
+ template <>
19
+ float pq_code_distance_8bit_single_impl<SIMDLevel::RISCV_RVV>(
20
+ size_t M,
21
+ const float* sim_table,
22
+ const uint8_t* code) {
23
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
24
+ M, 8, sim_table, code);
25
+ }
26
+
27
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
28
+ template <>
29
+ void pq_code_distance_8bit_four_impl<SIMDLevel::RISCV_RVV>(
30
+ size_t M,
31
+ const float* sim_table,
32
+ const uint8_t* __restrict code0,
33
+ const uint8_t* __restrict code1,
34
+ const uint8_t* __restrict code2,
35
+ const uint8_t* __restrict code3,
36
+ float& result0,
37
+ float& result1,
38
+ float& result2,
39
+ float& result3) {
40
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
41
+ M,
42
+ 8,
43
+ sim_table,
44
+ code0,
45
+ code1,
46
+ code2,
47
+ code3,
48
+ result0,
49
+ result1,
50
+ result2,
51
+ result3);
52
+ }
53
+
54
+ } // namespace pq_code_distance
55
+ } // namespace faiss
56
+
57
+ #define THE_SIMD_LEVEL SIMDLevel::RISCV_RVV
58
+
59
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
60
+ #include <faiss/utils/hamming_distance/hamming_computer-rvv.h>
61
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
62
+ #include <faiss/impl/pq_code_distance/PQDistanceComputer_impl.h>
63
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
64
+ #include <faiss/impl/pq_code_distance/IVFPQScanner_impl.h>
65
+
66
+ #undef THE_SIMD_LEVEL
67
+
68
+ #endif // COMPILE_SIMD_RISCV_RVV