faiss 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/ext/faiss/ext.cpp +1 -1
  4. data/ext/faiss/extconf.rb +4 -4
  5. data/ext/faiss/index.cpp +63 -45
  6. data/ext/faiss/index_binary.cpp +37 -27
  7. data/ext/faiss/kmeans.cpp +9 -8
  8. data/ext/faiss/pca_matrix.cpp +9 -7
  9. data/ext/faiss/product_quantizer.cpp +13 -11
  10. data/ext/faiss/utils.cpp +4 -2
  11. data/ext/faiss/utils.h +4 -0
  12. data/lib/faiss/version.rb +1 -1
  13. data/lib/faiss.rb +1 -1
  14. data/vendor/faiss/faiss/AutoTune.cpp +214 -82
  15. data/vendor/faiss/faiss/AutoTune.h +14 -1
  16. data/vendor/faiss/faiss/Clustering.cpp +97 -249
  17. data/vendor/faiss/faiss/Clustering.h +18 -0
  18. data/vendor/faiss/faiss/IVFlib.cpp +67 -44
  19. data/vendor/faiss/faiss/Index.cpp +25 -12
  20. data/vendor/faiss/faiss/Index.h +26 -4
  21. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  22. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  24. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  25. data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
  26. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  27. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  28. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  30. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
  31. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  32. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  33. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  34. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
  35. data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
  36. data/vendor/faiss/faiss/IndexFastScan.h +35 -24
  37. data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
  38. data/vendor/faiss/faiss/IndexFlat.h +32 -14
  39. data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
  40. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  41. data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
  42. data/vendor/faiss/faiss/IndexHNSW.h +30 -14
  43. data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
  44. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  45. data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
  46. data/vendor/faiss/faiss/IndexIVF.h +47 -16
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
  49. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
  50. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
  51. data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
  52. data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
  53. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  54. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
  55. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  56. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  57. data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
  58. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
  59. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  60. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
  61. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
  62. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
  63. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
  64. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
  65. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  66. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  67. data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
  68. data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
  69. data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
  70. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  71. data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
  72. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  73. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
  74. data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
  75. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  76. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  77. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  78. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  79. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  80. data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
  81. data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
  82. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
  83. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
  84. data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
  85. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  86. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  87. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  88. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  89. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  90. data/vendor/faiss/faiss/IndexShards.cpp +13 -13
  91. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  92. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  93. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  94. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  95. data/vendor/faiss/faiss/MetricType.h +29 -6
  96. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  97. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  98. data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
  99. data/vendor/faiss/faiss/VectorTransform.h +39 -16
  100. data/vendor/faiss/faiss/build.cpp +23 -0
  101. data/vendor/faiss/faiss/build.h +15 -0
  102. data/vendor/faiss/faiss/clone_index.cpp +55 -51
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  107. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
  108. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  109. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  110. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  111. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  113. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  118. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  119. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  120. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  130. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  132. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
  134. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  136. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
  139. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  140. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  141. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  142. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  143. data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
  144. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  145. data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
  146. data/vendor/faiss/faiss/impl/HNSW.h +21 -40
  147. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  148. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  149. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  150. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
  151. data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
  152. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  153. data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
  154. data/vendor/faiss/faiss/impl/NSG.h +20 -10
  155. data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
  156. data/vendor/faiss/faiss/impl/Panorama.h +265 -78
  157. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  158. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  159. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
  160. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  161. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  162. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  163. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
  164. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  165. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
  166. data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
  167. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
  168. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  169. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  170. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  171. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
  172. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  173. data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
  174. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
  175. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
  176. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  177. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  178. data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  181. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  182. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  183. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  184. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  185. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  191. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  192. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  193. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  194. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  195. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  196. data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
  197. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  198. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  199. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  203. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
  204. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  205. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  206. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  208. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  209. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  210. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
  211. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  212. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  213. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
  214. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  215. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  216. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  217. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  218. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  219. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  220. data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
  221. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
  222. data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
  223. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  225. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  226. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
  227. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  228. data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
  229. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  230. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  233. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  234. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  235. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  237. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
  238. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
  239. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
  240. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  241. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
  242. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
  243. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  244. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
  245. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  256. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
  257. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
  258. data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  260. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
  261. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  262. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  264. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
  265. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  266. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  267. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  268. data/vendor/faiss/faiss/index_factory.cpp +115 -28
  269. data/vendor/faiss/faiss/index_io.h +53 -3
  270. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
  271. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  272. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  273. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  274. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  275. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  276. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
  277. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  278. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  279. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  280. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  285. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  286. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  287. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  290. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
  291. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
  292. data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
  293. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  294. data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
  295. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  296. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  297. data/vendor/faiss/faiss/utils/distances.cpp +507 -559
  298. data/vendor/faiss/faiss/utils/distances.h +118 -1
  299. data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
  300. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  301. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  302. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  304. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  305. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  306. data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
  307. data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
  308. data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
  309. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  310. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  311. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  312. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  355. data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
  357. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  358. data/vendor/faiss/faiss/utils/utils.cpp +21 -14
  359. data/vendor/faiss/faiss/utils/utils.h +3 -3
  360. metadata +156 -42
  361. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  362. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  363. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  364. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
  366. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
  367. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  368. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  369. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  370. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  371. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  373. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  374. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
  375. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  376. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  377. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  378. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
  379. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -7,47 +7,25 @@
7
7
 
8
8
  #pragma once
9
9
 
10
- #ifdef __AVX512F__
11
-
12
10
  #include <immintrin.h>
13
11
 
14
- #include <type_traits>
15
-
16
- #include <faiss/impl/ProductQuantizer.h>
17
- #include <faiss/impl/code_distance/code_distance-generic.h>
12
+ #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
18
13
 
19
14
  namespace faiss {
15
+ namespace pq_code_distance {
20
16
 
21
17
  // According to experiments, the AVX-512 version may be SLOWER than
22
- // the AVX2 version, which is somewhat unexpected.
23
- // This version is not used for now, but it may be used later.
18
+ // the AVX2 version, which is somewhat unexpected.
19
+ // This version is kept for completeness.
24
20
  //
25
21
  // TODO: test for AMD CPUs.
26
22
 
27
- template <typename PQDecoderT>
28
- typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, float>::
29
- type inline distance_single_code_avx512(
30
- // number of subquantizers
31
- const size_t M,
32
- // number of bits per quantization index
33
- const size_t nbits,
34
- // precomputed distances, layout (M, ksub)
35
- const float* sim_table,
36
- const uint8_t* code) {
37
- // default implementation
38
- return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
39
- }
40
-
41
- template <typename PQDecoderT>
42
- typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
43
- type inline distance_single_code_avx512(
44
- // number of subquantizers
45
- const size_t M,
46
- // number of bits per quantization index
47
- const size_t nbits,
48
- // precomputed distances, layout (M, ksub)
49
- const float* sim_table,
50
- const uint8_t* code0) {
23
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
24
+ template <>
25
+ float pq_code_distance_8bit_single_impl<SIMDLevel::AVX512>(
26
+ size_t M,
27
+ const float* sim_table,
28
+ const uint8_t* code0) {
51
29
  float result0 = 0;
52
30
  constexpr size_t ksub = 1 << 8;
53
31
 
@@ -59,50 +37,38 @@ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
59
37
  const float* tab = sim_table;
60
38
 
61
39
  if (pqM16 > 0) {
62
- // process 16 values per loop
63
40
  const __m512i vksub = _mm512_set1_epi32(ksub);
64
41
  __m512i offsets_0 = _mm512_setr_epi32(
65
42
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
66
43
  offsets_0 = _mm512_mullo_epi32(offsets_0, vksub);
67
44
 
68
- // accumulators of partial sums
69
45
  __m512 partialSums[N];
70
46
  for (intptr_t j = 0; j < N; j++) {
71
47
  partialSums[j] = _mm512_setzero_ps();
72
48
  }
73
49
 
74
- // loop
50
+ // Process 16 values per loop iteration.
75
51
  for (m = 0; m < pqM16 * 16; m += 16) {
76
- // load 16 uint8 values
77
52
  __m128i mm1[N];
78
53
  mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
79
54
 
80
- // process first 8 codes
81
55
  for (intptr_t j = 0; j < N; j++) {
82
56
  const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
83
-
84
- // add offsets
85
57
  const __m512i indices_to_read_from =
86
58
  _mm512_add_epi32(idx1, offsets_0);
87
-
88
- // gather 16 values, similar to 16 operations of tab[idx]
89
59
  __m512 collected = _mm512_i32gather_ps(
90
60
  indices_to_read_from, tab, sizeof(float));
91
-
92
- // collect partial sums
93
61
  partialSums[j] = _mm512_add_ps(partialSums[j], collected);
94
62
  }
95
63
  tab += ksub * 16;
96
64
  }
97
65
 
98
- // horizontal sum for partialSum
99
66
  result0 += _mm512_reduce_add_ps(partialSums[0]);
100
67
  }
101
68
 
102
- //
69
+ // Process leftovers.
103
70
  if (m < M) {
104
- // process leftovers
105
- PQDecoder8 decoder0(code0 + m, nbits);
71
+ PQDecoder8 decoder0(code0 + m, 8);
106
72
  for (; m < M; m++) {
107
73
  result0 += tab[decoder0.decode()];
108
74
  tab += ksub;
@@ -112,56 +78,16 @@ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
112
78
  return result0;
113
79
  }
114
80
 
115
- template <typename PQDecoderT>
116
- typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, void>::
117
- type
118
- distance_four_codes_avx512(
119
- // number of subquantizers
120
- const size_t M,
121
- // number of bits per quantization index
122
- const size_t nbits,
123
- // precomputed distances, layout (M, ksub)
124
- const float* sim_table,
125
- // codes
126
- const uint8_t* __restrict code0,
127
- const uint8_t* __restrict code1,
128
- const uint8_t* __restrict code2,
129
- const uint8_t* __restrict code3,
130
- // computed distances
131
- float& result0,
132
- float& result1,
133
- float& result2,
134
- float& result3) {
135
- distance_four_codes_generic<PQDecoderT>(
136
- M,
137
- nbits,
138
- sim_table,
139
- code0,
140
- code1,
141
- code2,
142
- code3,
143
- result0,
144
- result1,
145
- result2,
146
- result3);
147
- }
148
-
149
- // Combines 4 operations of distance_single_code()
150
- template <typename PQDecoderT>
151
- typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, void>::type
152
- distance_four_codes_avx512(
153
- // number of subquantizers
154
- const size_t M,
155
- // number of bits per quantization index
156
- const size_t nbits,
157
- // precomputed distances, layout (M, ksub)
81
+ // Combines 4 operations of pq_code_distance_8bit_single_impl().
82
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
83
+ template <>
84
+ void pq_code_distance_8bit_four_impl<SIMDLevel::AVX512>(
85
+ size_t M,
158
86
  const float* sim_table,
159
- // codes
160
87
  const uint8_t* __restrict code0,
161
88
  const uint8_t* __restrict code1,
162
89
  const uint8_t* __restrict code2,
163
90
  const uint8_t* __restrict code3,
164
- // computed distances
165
91
  float& result0,
166
92
  float& result1,
167
93
  float& result2,
@@ -180,59 +106,47 @@ distance_four_codes_avx512(
180
106
  const float* tab = sim_table;
181
107
 
182
108
  if (pqM16 > 0) {
183
- // process 16 values per loop
184
109
  const __m512i vksub = _mm512_set1_epi32(ksub);
185
110
  __m512i offsets_0 = _mm512_setr_epi32(
186
111
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
187
112
  offsets_0 = _mm512_mullo_epi32(offsets_0, vksub);
188
113
 
189
- // accumulators of partial sums
190
114
  __m512 partialSums[N];
191
115
  for (intptr_t j = 0; j < N; j++) {
192
116
  partialSums[j] = _mm512_setzero_ps();
193
117
  }
194
118
 
195
- // loop
119
+ // Process 16 values per loop iteration.
196
120
  for (m = 0; m < pqM16 * 16; m += 16) {
197
- // load 16 uint8 values
198
121
  __m128i mm1[N];
199
122
  mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
200
123
  mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
201
124
  mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
202
125
  mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
203
126
 
204
- // process first 8 codes
205
127
  for (intptr_t j = 0; j < N; j++) {
206
128
  const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
207
-
208
- // add offsets
209
129
  const __m512i indices_to_read_from =
210
130
  _mm512_add_epi32(idx1, offsets_0);
211
-
212
- // gather 16 values, similar to 16 operations of tab[idx]
213
131
  __m512 collected = _mm512_i32gather_ps(
214
132
  indices_to_read_from, tab, sizeof(float));
215
-
216
- // collect partial sums
217
133
  partialSums[j] = _mm512_add_ps(partialSums[j], collected);
218
134
  }
219
135
  tab += ksub * 16;
220
136
  }
221
137
 
222
- // horizontal sum for partialSum
223
138
  result0 += _mm512_reduce_add_ps(partialSums[0]);
224
139
  result1 += _mm512_reduce_add_ps(partialSums[1]);
225
140
  result2 += _mm512_reduce_add_ps(partialSums[2]);
226
141
  result3 += _mm512_reduce_add_ps(partialSums[3]);
227
142
  }
228
143
 
229
- //
144
+ // Process leftovers.
230
145
  if (m < M) {
231
- // process leftovers
232
- PQDecoder8 decoder0(code0 + m, nbits);
233
- PQDecoder8 decoder1(code1 + m, nbits);
234
- PQDecoder8 decoder2(code2 + m, nbits);
235
- PQDecoder8 decoder3(code3 + m, nbits);
146
+ PQDecoder8 decoder0(code0 + m, 8);
147
+ PQDecoder8 decoder1(code1 + m, 8);
148
+ PQDecoder8 decoder2(code2 + m, 8);
149
+ PQDecoder8 decoder3(code3 + m, 8);
236
150
  for (; m < M; m++) {
237
151
  result0 += tab[decoder0.decode()];
238
152
  result1 += tab[decoder1.decode()];
@@ -243,6 +157,5 @@ distance_four_codes_avx512(
243
157
  }
244
158
  }
245
159
 
160
+ } // namespace pq_code_distance
246
161
  } // namespace faiss
247
-
248
- #endif
@@ -0,0 +1,59 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // This TU provides non-templated PQ code distance dispatch wrappers
9
+ // (pq_code_distance_8bit_single, pq_code_distance_8bit_four) declared
10
+ // in pq_code_distance-inl.h. These use with_simd_level to route to the
11
+ // best available SIMD implementation via pq_code_distance_8bit_*_impl
12
+ // function template specializations.
13
+ //
14
+ // The NONE and ARM_NEON _impl specializations are defined inline in
15
+ // pq_code_distance-generic.h (included transitively). The AVX2, AVX512,
16
+ // and ARM_SVE specializations are in their respective per-SIMD files.
17
+
18
+ #include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
19
+
20
+ namespace faiss {
21
+ namespace pq_code_distance {
22
+
23
+ float pq_code_distance_8bit_single(
24
+ size_t M,
25
+ const float* sim_table,
26
+ const uint8_t* code) {
27
+ return with_simd_level([&]<SIMDLevel SL>() {
28
+ return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
29
+ });
30
+ }
31
+
32
+ void pq_code_distance_8bit_four(
33
+ size_t M,
34
+ const float* sim_table,
35
+ const uint8_t* __restrict code0,
36
+ const uint8_t* __restrict code1,
37
+ const uint8_t* __restrict code2,
38
+ const uint8_t* __restrict code3,
39
+ float& result0,
40
+ float& result1,
41
+ float& result2,
42
+ float& result3) {
43
+ with_simd_level([&]<SIMDLevel SL>() {
44
+ pq_code_distance_8bit_four_impl<SL>(
45
+ M,
46
+ sim_table,
47
+ code0,
48
+ code1,
49
+ code2,
50
+ code3,
51
+ result0,
52
+ result1,
53
+ result2,
54
+ result3);
55
+ });
56
+ }
57
+
58
+ } // namespace pq_code_distance
59
+ } // namespace faiss
@@ -0,0 +1,96 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
11
+
12
+ namespace faiss {
13
+ namespace pq_code_distance {
14
+
15
+ // NONE: use scalar directly.
16
+
17
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
18
+ template <>
19
+ inline float pq_code_distance_8bit_single_impl<SIMDLevel::NONE>(
20
+ size_t M,
21
+ const float* sim_table,
22
+ const uint8_t* code) {
23
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
24
+ M, 8, sim_table, code);
25
+ }
26
+
27
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
28
+ template <>
29
+ inline void pq_code_distance_8bit_four_impl<SIMDLevel::NONE>(
30
+ size_t M,
31
+ const float* sim_table,
32
+ const uint8_t* __restrict code0,
33
+ const uint8_t* __restrict code1,
34
+ const uint8_t* __restrict code2,
35
+ const uint8_t* __restrict code3,
36
+ float& result0,
37
+ float& result1,
38
+ float& result2,
39
+ float& result3) {
40
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
41
+ M,
42
+ 8,
43
+ sim_table,
44
+ code0,
45
+ code1,
46
+ code2,
47
+ code3,
48
+ result0,
49
+ result1,
50
+ result2,
51
+ result3);
52
+ }
53
+
54
+ #ifdef COMPILE_SIMD_ARM_NEON
55
+ // ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
56
+
57
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
58
+ template <>
59
+ inline float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_NEON>(
60
+ size_t M,
61
+ const float* sim_table,
62
+ const uint8_t* code) {
63
+ return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
64
+ M, 8, sim_table, code);
65
+ }
66
+
67
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
68
+ template <>
69
+ inline void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_NEON>(
70
+ size_t M,
71
+ const float* sim_table,
72
+ const uint8_t* __restrict code0,
73
+ const uint8_t* __restrict code1,
74
+ const uint8_t* __restrict code2,
75
+ const uint8_t* __restrict code3,
76
+ float& result0,
77
+ float& result1,
78
+ float& result2,
79
+ float& result3) {
80
+ PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
81
+ M,
82
+ 8,
83
+ sim_table,
84
+ code0,
85
+ code1,
86
+ code2,
87
+ code3,
88
+ result0,
89
+ result1,
90
+ result2,
91
+ result3);
92
+ }
93
+ #endif // COMPILE_SIMD_ARM_NEON
94
+
95
+ } // namespace pq_code_distance
96
+ } // namespace faiss
@@ -0,0 +1,256 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ /**
11
+ * @file pq_code_distance-inl.h
12
+ * @brief PQ code distance SIMD-dispatched implementations.
13
+ *
14
+ * This is a PRIVATE header — do not include in public APIs or user code.
15
+ * Only faiss internal .cpp files (the per-SIMD implementation files and
16
+ * pq_code_distance-generic.cpp) should include this header.
17
+ */
18
+
19
+ #include <cstddef>
20
+ #include <cstdint>
21
+ #include <type_traits>
22
+
23
+ #include <faiss/impl/ProductQuantizer.h>
24
+ #include <faiss/impl/platform_macros.h>
25
+ #include <faiss/impl/simd_dispatch.h>
26
+
27
+ namespace faiss {
28
+ namespace pq_code_distance {
29
+
30
+ /*********************************************************************
31
+ * PQCodeDistance — SIMD-dispatched PQ code distance
32
+ *
33
+ * Computes the distance from a PQ-encoded vector to a query vector,
34
+ * given a precomputed table of sub-distances (one per subquantizer
35
+ * per centroid). Originally extracted from IndexIVFPQ.cpp.
36
+ *
37
+ * DESIGN:
38
+ *
39
+ * PQCodeDistance<PQDecoderT, SL> computes PQ code distances at a given
40
+ * SIMD level. The dispatch site (IndexIVFPQ.cpp, IndexPQ.cpp) uses
41
+ * with_simd_level to select SL at runtime, which instantiates
42
+ * PQCodeDistance for ALL decoder types (PQDecoder8, PQDecoder16,
43
+ * PQDecoderGeneric) at the chosen level.
44
+ *
45
+ * Only PQDecoder8 has SIMD-optimized implementations (AVX2, AVX512,
46
+ * ARM_SVE). The other decoders always use scalar code — their decode()
47
+ * method is inherently sequential, so SIMD doesn't help.
48
+ *
49
+ * The primary template is always complete (no forward declarations
50
+ * needed). For PQDecoder8, it delegates to _impl dispatch bridge
51
+ * functions whose specializations are defined in per-SIMD .cpp files
52
+ * and resolved at link time. For other decoders, it uses scalar.
53
+ *
54
+ * ADDING A NEW SIMD LEVEL:
55
+ *
56
+ * 1. Add the level to SIMDLevel enum (simd_levels.h)
57
+ * 2. Add dispatch_config entry (simd_dispatch.bzl)
58
+ * 3. Define pq_code_distance_8bit_single_impl<NEW_LEVEL> and
59
+ * pq_code_distance_8bit_four_impl<NEW_LEVEL> specializations in a
60
+ * new .cpp file compiled with appropriate SIMD flags
61
+ * 4. Add the .cpp to the build (CMakeLists.txt, xplat.bzl)
62
+ *********************************************************************/
63
+
64
+ /// Scalar PQ code distance implementation.
65
+ /// Templated only on decoder type, independent of SIMD level.
66
+ /// Used directly by non-PQDecoder8 decoders (PQDecoder16,
67
+ /// PQDecoderGeneric) and as fallback for PQDecoder8 at NONE/NEON.
68
+ template <typename PQDecoderT>
69
+ struct PQCodeDistanceScalar {
70
+ using PQDecoder = PQDecoderT;
71
+
72
+ static float distance_single_code(
73
+ // number of subquantizers
74
+ size_t M,
75
+ size_t nbits,
76
+ // precomputed distances, layout (M, ksub)
77
+ const float* sim_table,
78
+ const uint8_t* code) {
79
+ PQDecoderT decoder(code, nbits);
80
+ const size_t ksub = 1 << nbits;
81
+
82
+ const float* tab = sim_table;
83
+ float result = 0;
84
+
85
+ for (size_t m = 0; m < M; m++) {
86
+ result += tab[decoder.decode()];
87
+ tab += ksub;
88
+ }
89
+
90
+ return result;
91
+ }
92
+
93
+ static void distance_four_codes(
94
+ size_t M,
95
+ size_t nbits,
96
+ const float* sim_table,
97
+ const uint8_t* __restrict code0,
98
+ const uint8_t* __restrict code1,
99
+ const uint8_t* __restrict code2,
100
+ const uint8_t* __restrict code3,
101
+ float& result0,
102
+ float& result1,
103
+ float& result2,
104
+ float& result3) {
105
+ PQDecoderT decoder0(code0, nbits);
106
+ PQDecoderT decoder1(code1, nbits);
107
+ PQDecoderT decoder2(code2, nbits);
108
+ PQDecoderT decoder3(code3, nbits);
109
+ const size_t ksub = 1 << nbits;
110
+
111
+ const float* tab = sim_table;
112
+ result0 = 0;
113
+ result1 = 0;
114
+ result2 = 0;
115
+ result3 = 0;
116
+
117
+ for (size_t m = 0; m < M; m++) {
118
+ result0 += tab[decoder0.decode()];
119
+ result1 += tab[decoder1.decode()];
120
+ result2 += tab[decoder2.decode()];
121
+ result3 += tab[decoder3.decode()];
122
+ tab += ksub;
123
+ }
124
+ }
125
+ };
126
+
127
+ /*********************************************************************
128
+ * Dispatch bridge — function templates for PQDecoder8 SIMD dispatch.
129
+ *
130
+ * Primary declarations only; specializations are defined in per-SIMD
131
+ * .cpp files (AVX2, AVX512, ARM_SVE) and pq_code_distance-generic.cpp
132
+ * (NONE, ARM_NEON). Same pattern as fvec_L2sqr et al. in distances.h.
133
+ *********************************************************************/
134
+
135
+ template <SIMDLevel SL>
136
+ float pq_code_distance_8bit_single_impl(
137
+ size_t M,
138
+ const float* sim_table,
139
+ const uint8_t* code);
140
+
141
+ template <SIMDLevel SL>
142
+ void pq_code_distance_8bit_four_impl(
143
+ size_t M,
144
+ const float* sim_table,
145
+ const uint8_t* __restrict code0,
146
+ const uint8_t* __restrict code1,
147
+ const uint8_t* __restrict code2,
148
+ const uint8_t* __restrict code3,
149
+ float& result0,
150
+ float& result1,
151
+ float& result2,
152
+ float& result3);
153
+
154
+ /// Primary template — always complete.
155
+ /// For PQDecoder8, delegates to _impl dispatch bridges (resolved at
156
+ /// link time to per-SIMD implementations). For other decoders, uses
157
+ /// scalar — their sequential decode() methods don't benefit from SIMD.
158
+ template <typename PQDecoderT, SIMDLevel SL>
159
+ struct PQCodeDistance {
160
+ using PQDecoder = PQDecoderT;
161
+ static constexpr SIMDLevel simd_level = SL;
162
+
163
+ static float distance_single_code(
164
+ size_t M,
165
+ size_t nbits,
166
+ const float* sim_table,
167
+ const uint8_t* code) {
168
+ if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
169
+ return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
170
+ } else {
171
+ return PQCodeDistanceScalar<PQDecoderT>::distance_single_code(
172
+ M, nbits, sim_table, code);
173
+ }
174
+ }
175
+
176
+ static void distance_four_codes(
177
+ size_t M,
178
+ size_t nbits,
179
+ const float* sim_table,
180
+ const uint8_t* __restrict code0,
181
+ const uint8_t* __restrict code1,
182
+ const uint8_t* __restrict code2,
183
+ const uint8_t* __restrict code3,
184
+ float& result0,
185
+ float& result1,
186
+ float& result2,
187
+ float& result3) {
188
+ if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
189
+ pq_code_distance_8bit_four_impl<SL>(
190
+ M,
191
+ sim_table,
192
+ code0,
193
+ code1,
194
+ code2,
195
+ code3,
196
+ result0,
197
+ result1,
198
+ result2,
199
+ result3);
200
+ } else {
201
+ PQCodeDistanceScalar<PQDecoderT>::distance_four_codes(
202
+ M,
203
+ nbits,
204
+ sim_table,
205
+ code0,
206
+ code1,
207
+ code2,
208
+ code3,
209
+ result0,
210
+ result1,
211
+ result2,
212
+ result3);
213
+ }
214
+ }
215
+ };
216
+
217
+ /*********************************************************************
218
+ * Non-templated PQ code distance dispatch (PQDecoder8 only).
219
+ *
220
+ * These follow the same pattern as distances.h: the caller does not
221
+ * name a SIMDLevel. Internally they dispatch via with_simd_level
222
+ * to the best available SIMD implementation (DD: runtime detection,
223
+ * static: compile-time selection). Definitions are in
224
+ * pq_code_distance-generic.cpp.
225
+ *********************************************************************/
226
+
227
+ /// Compute PQ distance for a single code, dispatching to the best
228
+ /// available SIMD level.
229
+ FAISS_API float pq_code_distance_8bit_single(
230
+ size_t M,
231
+ const float* sim_table,
232
+ const uint8_t* code);
233
+
234
+ /// Compute PQ distances for four codes simultaneously, dispatching
235
+ /// to the best available SIMD level.
236
+ FAISS_API void pq_code_distance_8bit_four(
237
+ size_t M,
238
+ const float* sim_table,
239
+ const uint8_t* __restrict code0,
240
+ const uint8_t* __restrict code1,
241
+ const uint8_t* __restrict code2,
242
+ const uint8_t* __restrict code3,
243
+ float& result0,
244
+ float& result1,
245
+ float& result2,
246
+ float& result3);
247
+
248
+ } // namespace pq_code_distance
249
+
250
+ // Re-export public API into namespace faiss for convenience
251
+ using pq_code_distance::pq_code_distance_8bit_four;
252
+ using pq_code_distance::pq_code_distance_8bit_single;
253
+ using pq_code_distance::PQCodeDistance;
254
+ using pq_code_distance::PQCodeDistanceScalar;
255
+
256
+ } // namespace faiss