faiss 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  85. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  86. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  87. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  88. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  89. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  90. data/vendor/faiss/faiss/MetricType.h +14 -7
  91. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  92. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  93. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  94. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  95. data/vendor/faiss/faiss/build.cpp +23 -0
  96. data/vendor/faiss/faiss/build.h +15 -0
  97. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  102. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  105. data/vendor/faiss/faiss/factory_tools.cpp +9 -0
  106. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  107. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  108. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
  109. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  113. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  114. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  115. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  116. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  117. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  120. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  130. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  136. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  139. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  140. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  141. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  142. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  143. data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
  144. data/vendor/faiss/faiss/impl/HNSW.h +61 -44
  145. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  146. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  147. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  148. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  149. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  150. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  151. data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
  152. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  153. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  154. data/vendor/faiss/faiss/impl/Panorama.h +269 -87
  155. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  156. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  157. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  158. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  159. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  160. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  161. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
  162. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  163. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  164. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  165. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
  166. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  167. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  168. data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
  169. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
  170. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
  171. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  172. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  173. data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
  174. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  175. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  176. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  177. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  178. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  182. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  183. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  184. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  185. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  191. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  192. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  193. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  194. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  196. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  197. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
  198. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  199. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  203. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  204. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  205. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  206. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  208. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  209. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  210. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  211. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
  212. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
  213. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
  214. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
  215. data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
  216. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  217. data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
  218. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  219. data/vendor/faiss/faiss/impl/io_macros.h +58 -16
  220. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  221. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  222. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  223. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
  225. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  226. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  228. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  229. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
  230. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  233. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  234. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
  235. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
  237. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
  238. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  239. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
  240. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  241. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
  244. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
  245. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  256. data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
  257. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  258. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  260. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  261. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  262. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  264. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  265. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  266. data/vendor/faiss/faiss/index_factory.cpp +90 -18
  267. data/vendor/faiss/faiss/index_io.h +40 -0
  268. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  269. data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
  270. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  271. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
  272. data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
  273. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  274. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  275. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  276. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  277. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  278. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  279. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  280. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
  285. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
  286. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  287. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
  290. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  291. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  292. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  293. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  294. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  295. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  296. data/vendor/faiss/faiss/utils/distances.h +20 -1
  297. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  298. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  299. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  300. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  301. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  302. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  304. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
  305. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  306. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  307. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  308. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  309. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  310. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  311. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
  312. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  355. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
  357. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  358. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  359. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  360. data/vendor/faiss/faiss/utils/utils.h +3 -3
  361. metadata +129 -34
  362. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  363. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  364. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  366. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  367. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  368. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  369. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  370. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  371. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  373. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  374. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  375. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  376. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  377. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  378. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -18,10 +18,201 @@
18
18
  #include <algorithm>
19
19
  #include <cstddef>
20
20
  #include <cstdint>
21
+ #include <cstring>
21
22
  #include <vector>
22
23
 
24
+ #if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
25
+ #include <immintrin.h>
26
+ #endif
27
+
23
28
  namespace faiss {
24
29
 
30
+ #ifndef SWIG
31
+
32
+ /// Compute dot products between query_level and active vectors.
33
+ ///
34
+ /// @tparam AllActive If true, vectors are at sequential positions 0..N-1
35
+ /// (first level, full batch). If false, positions come
36
+ /// from active_indices (subsequent levels after pruning).
37
+ /// @tparam LevelWidth Compile-time level width in floats (0 = use runtime
38
+ /// level_width_dims). Enables full loop unrolling.
39
+ // Skip pragmas under nvcc: its EDG frontend warns on `#pragma GCC optimize`
40
+ // (#1675-D) for every `.cu` that transitively includes this header. These
41
+ // templates are CPU-only, so the hint is irrelevant during nvcc parse.
42
+ #if !defined(__NVCC__)
43
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
44
+ #endif
45
+ template <bool AllActive = false, size_t LevelWidth = 0>
46
+ static inline void compute_level_dot_kernel(
47
+ const float* FAISS_RESTRICT query_level,
48
+ const float* FAISS_RESTRICT level_storage,
49
+ const uint32_t* active_indices,
50
+ const size_t num_active,
51
+ const size_t level_width_dims,
52
+ float* FAISS_RESTRICT dot_products) {
53
+ const size_t width = LevelWidth > 0 ? LevelWidth : level_width_dims;
54
+ size_t i = 0;
55
+ for (; i + 4 <= num_active; i += 4) {
56
+ const float* y0 = level_storage +
57
+ (AllActive ? (i + 0) : active_indices[i + 0]) * width;
58
+ const float* y1 = level_storage +
59
+ (AllActive ? (i + 1) : active_indices[i + 1]) * width;
60
+ const float* y2 = level_storage +
61
+ (AllActive ? (i + 2) : active_indices[i + 2]) * width;
62
+ const float* y3 = level_storage +
63
+ (AllActive ? (i + 3) : active_indices[i + 3]) * width;
64
+
65
+ float dp0 = 0, dp1 = 0, dp2 = 0, dp3 = 0;
66
+ FAISS_PRAGMA_IMPRECISE_LOOP
67
+ for (size_t j = 0; j < width; j++) {
68
+ float q = query_level[j];
69
+ dp0 += q * y0[j];
70
+ dp1 += q * y1[j];
71
+ dp2 += q * y2[j];
72
+ dp3 += q * y3[j];
73
+ }
74
+
75
+ dot_products[i + 0] = dp0;
76
+ dot_products[i + 1] = dp1;
77
+ dot_products[i + 2] = dp2;
78
+ dot_products[i + 3] = dp3;
79
+ }
80
+ for (; i < num_active; i++) {
81
+ const float* yj =
82
+ level_storage + (AllActive ? i : active_indices[i]) * width;
83
+ float dp = 0;
84
+ FAISS_PRAGMA_IMPRECISE_LOOP
85
+ for (size_t j = 0; j < width; j++) {
86
+ dp += query_level[j] * yj[j];
87
+ }
88
+ dot_products[i] = dp;
89
+ }
90
+ }
91
+ #if !defined(__NVCC__)
92
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
93
+ #endif
94
+
95
+ /// Update exact distances with the current level's dot products, then apply
96
+ /// Panorama pruning: for each active vector, compute a lower bound on
97
+ /// the final distance and mark it for removal if it cannot beat the current
98
+ /// threshold. Writes 0/1 into active_byteset for subsequent compaction.
99
+ ///
100
+ /// Uses `if constexpr` on C::is_max rather than C::cmp() to ensure the
101
+ /// comparison autovectorizes (C::cmp generates scalar function calls).
102
+ #if !defined(__NVCC__)
103
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
104
+ #endif
105
+ template <bool AllActive, typename C, MetricType M>
106
+ static inline void prune_kernel(
107
+ float* FAISS_RESTRICT exact_distances,
108
+ const float* FAISS_RESTRICT dot_buffer,
109
+ const float* FAISS_RESTRICT level_cum_sums,
110
+ uint8_t* FAISS_RESTRICT active_byteset,
111
+ const uint32_t* FAISS_RESTRICT active_indices,
112
+ const uint32_t num_active,
113
+ const float query_cum_norm,
114
+ const float threshold) {
115
+ FAISS_PRAGMA_IMPRECISE_LOOP
116
+ for (uint32_t i = 0; i < num_active; i++) {
117
+ uint32_t idx = AllActive ? i : active_indices[i];
118
+ if constexpr (M == METRIC_INNER_PRODUCT) {
119
+ exact_distances[idx] += dot_buffer[i];
120
+ } else {
121
+ exact_distances[idx] -= 2.0f * dot_buffer[i];
122
+ }
123
+
124
+ float cum_sum = level_cum_sums[idx];
125
+ float cauchy_schwarz_bound;
126
+ if constexpr (M == METRIC_INNER_PRODUCT) {
127
+ cauchy_schwarz_bound = -cum_sum * query_cum_norm;
128
+ } else {
129
+ cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
130
+ }
131
+
132
+ float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
133
+ if constexpr (C::is_max) {
134
+ active_byteset[i] = (threshold > lower_bound) ? 1 : 0;
135
+ } else {
136
+ active_byteset[i] = (threshold < lower_bound) ? 1 : 0;
137
+ }
138
+ }
139
+ }
140
+ #if !defined(__NVCC__)
141
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
142
+ #endif
143
+
144
+ /// Compact active_indices in-place, removing entries where active_byteset[i]
145
+ /// is zero. Returns the new count of active elements. Uses a branchless BMI2 +
146
+ /// AVX2 fast path (8 elements/iteration via _pext_u64 permutation) with a
147
+ /// scalar fallback for the tail and non-x86 platforms.
148
+ inline size_t compact_active_kernel(
149
+ uint32_t* active_indices,
150
+ const uint8_t* FAISS_RESTRICT active_byteset,
151
+ const size_t num_active) {
152
+ size_t next_active = 0;
153
+ size_t i = 0;
154
+
155
+ #if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
156
+ for (; i + 8 <= num_active; i += 8) {
157
+ uint64_t bytes;
158
+ memcpy(&bytes, &active_byteset[i], 8);
159
+
160
+ uint64_t expanded = bytes * 0xFFULL;
161
+ uint64_t packed = _pext_u64(0x0706050403020100ULL, expanded);
162
+
163
+ __m256i perm = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128((int64_t)packed));
164
+ __m256i data = _mm256_loadu_si256((const __m256i*)&active_indices[i]);
165
+ __m256i compacted = _mm256_permutevar8x32_epi32(data, perm);
166
+ _mm256_storeu_si256((__m256i*)&active_indices[next_active], compacted);
167
+
168
+ next_active += __builtin_popcountll(bytes);
169
+ }
170
+ #endif
171
+
172
+ for (; i < num_active; i++) {
173
+ active_indices[next_active] = active_indices[i];
174
+ next_active += active_byteset[i] ? 1 : 0;
175
+ }
176
+
177
+ return next_active;
178
+ }
179
+
180
+ /// Compile-time dispatch: converts a runtime `width` value into a template
181
+ /// parameter by generating an if-else chain over [Lo, Hi] in steps of Step.
182
+ /// Falls through to LevelWidth=0 (runtime path) if no specialization matches.
183
+ /// Allows for specialization of common level widths.
184
+ namespace detail {
185
+ template <size_t Lo, size_t Hi, size_t Step, typename Lambda>
186
+ inline auto dispatch_width(size_t width, Lambda&& fn) {
187
+ if constexpr (Lo > Hi) {
188
+ return fn.template operator()<0>();
189
+ } else {
190
+ if (width == Lo) {
191
+ return fn.template operator()<Lo>();
192
+ }
193
+ return dispatch_width<Lo + Step, Hi, Step>(
194
+ width, std::forward<Lambda>(fn));
195
+ }
196
+ }
197
+ } // namespace detail
198
+
199
+ /// Specialize for common float level widths (multiples of 8 up to 128).
200
+ template <typename LambdaType>
201
+ inline auto with_level_width(size_t width, LambdaType&& action) {
202
+ return detail::dispatch_width<8, 128, 8>(
203
+ width, std::forward<LambdaType>(action));
204
+ }
205
+
206
+ template <typename Lambda>
207
+ inline auto with_bool(bool value, Lambda&& fn) {
208
+ if (value) {
209
+ return fn.template operator()<true>();
210
+ } else {
211
+ return fn.template operator()<false>();
212
+ }
213
+ }
214
+ #endif // SWIG
215
+
25
216
  /**
26
217
  * Implements the core logic of Panorama-based refinement.
27
218
  * arXiv: https://arxiv.org/abs/2510.00566
@@ -42,6 +233,8 @@ namespace faiss {
42
233
  * accelerating the refinement stage.
43
234
  */
44
235
  struct Panorama {
236
+ static constexpr size_t kDefaultBatchSize = 128;
237
+
45
238
  size_t d = 0;
46
239
  size_t code_size = 0;
47
240
  size_t n_levels = 0;
@@ -98,6 +291,7 @@ struct Panorama {
98
291
  /// 4. After all levels, survivors are exact distances; update heap.
99
292
  /// This achieves early termination while maintaining SIMD-friendly
100
293
  /// sequential access patterns in the level-oriented storage layout.
294
+ #ifndef SWIG
101
295
  template <typename C, MetricType M>
102
296
  size_t progressive_filter_batch(
103
297
  const uint8_t* codes_base,
@@ -110,111 +304,99 @@ struct Panorama {
110
304
  const idx_t* ids,
111
305
  bool use_sel,
112
306
  std::vector<uint32_t>& active_indices,
307
+ std::vector<uint8_t>& active_byteset,
113
308
  std::vector<float>& exact_distances,
309
+ std::vector<float>& dot_buffer,
114
310
  float threshold,
115
- PanoramaStats& local_stats) const;
116
-
117
- void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
118
- };
311
+ PanoramaStats& local_stats) const {
312
+ size_t batch_start = batch_no * batch_size;
313
+ size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
119
314
 
120
- template <typename C, MetricType M>
121
- size_t Panorama::progressive_filter_batch(
122
- const uint8_t* codes_base,
123
- const float* cum_sums,
124
- const float* query,
125
- const float* query_cum_sums,
126
- size_t batch_no,
127
- size_t list_size,
128
- const IDSelector* sel,
129
- const idx_t* ids,
130
- bool use_sel,
131
- std::vector<uint32_t>& active_indices,
132
- std::vector<float>& exact_distances,
133
- float threshold,
134
- PanoramaStats& local_stats) const {
135
- size_t batch_start = batch_no * batch_size;
136
- size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
137
-
138
- size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
139
- const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
140
- const float* level_cum_sums = batch_cum_sums + batch_size;
141
- float q_norm = query_cum_sums[0] * query_cum_sums[0];
142
-
143
- size_t batch_offset = batch_no * batch_size * code_size;
144
- const uint8_t* storage_base = codes_base + batch_offset;
145
-
146
- // Initialize active set with ID-filtered vectors.
147
- size_t num_active = 0;
148
- for (size_t i = 0; i < curr_batch_size; i++) {
149
- size_t global_idx = batch_start + i;
150
- idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
151
- bool include = !use_sel || sel->is_member(id);
152
-
153
- active_indices[num_active] = i;
154
- float cum_sum = batch_cum_sums[i];
315
+ size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
316
+ const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
317
+ const float* level_cum_sums = batch_cum_sums + batch_size;
318
+ float q_norm = query_cum_sums[0] * query_cum_sums[0];
155
319
 
156
- if constexpr (M == METRIC_INNER_PRODUCT) {
157
- exact_distances[i] = 0.0f;
158
- } else {
159
- exact_distances[i] = cum_sum * cum_sum + q_norm;
160
- }
320
+ size_t batch_offset = batch_no * batch_size * code_size;
321
+ const uint8_t* storage_base = codes_base + batch_offset;
161
322
 
162
- num_active += include;
163
- }
323
+ // Initialize active set with ID-filtered vectors.
324
+ size_t num_active = 0;
325
+ for (size_t i = 0; i < curr_batch_size; i++) {
326
+ size_t global_idx = batch_start + i;
327
+ idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
328
+ bool include = !use_sel || sel->is_member(id);
164
329
 
165
- if (num_active == 0) {
166
- return 0;
167
- }
168
-
169
- size_t total_active = num_active;
170
- for (size_t level = 0; level < n_levels; level++) {
171
- local_stats.total_dims_scanned += num_active;
172
- local_stats.total_dims += total_active;
330
+ active_indices[num_active] = i;
331
+ float cum_sum = batch_cum_sums[i];
173
332
 
174
- float query_cum_norm = query_cum_sums[level + 1];
175
-
176
- size_t level_offset = level * level_width * batch_size;
177
- const float* level_storage =
178
- (const float*)(storage_base + level_offset);
333
+ if constexpr (M == METRIC_INNER_PRODUCT) {
334
+ exact_distances[i] = 0.0f;
335
+ } else {
336
+ exact_distances[i] = cum_sum * cum_sum + q_norm;
337
+ }
179
338
 
180
- size_t next_active = 0;
181
- for (size_t i = 0; i < num_active; i++) {
182
- uint32_t idx = active_indices[i];
183
- size_t actual_level_width = std::min(
184
- level_width_floats, d - level * level_width_floats);
339
+ num_active += include;
340
+ }
185
341
 
186
- const float* yj = level_storage + idx * actual_level_width;
187
- const float* query_level = query + level * level_width_floats;
342
+ size_t total_active = num_active;
343
+ const bool first_level_full = (num_active == curr_batch_size);
188
344
 
189
- float dot_product =
190
- fvec_inner_product(query_level, yj, actual_level_width);
345
+ local_stats.total_dims += total_active * n_levels;
191
346
 
192
- if constexpr (M == METRIC_INNER_PRODUCT) {
193
- exact_distances[idx] += dot_product;
194
- } else {
195
- exact_distances[idx] -= 2.0f * dot_product;
196
- }
347
+ for (size_t level = 0; (level < n_levels) && (num_active > 0);
348
+ level++) {
349
+ local_stats.total_dims_scanned += num_active;
197
350
 
198
- float cum_sum = level_cum_sums[idx];
199
- float cauchy_schwarz_bound;
200
- if constexpr (M == METRIC_INNER_PRODUCT) {
201
- cauchy_schwarz_bound = -cum_sum * query_cum_norm;
202
- } else {
203
- cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
204
- }
351
+ float query_cum_norm = query_cum_sums[level + 1];
205
352
 
206
- float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
353
+ size_t level_offset = level * level_width * batch_size;
354
+ const float* level_storage =
355
+ (const float*)(storage_base + level_offset);
356
+ const float* query_level = query + level * level_width_floats;
357
+ size_t actual_level_width = std::min(
358
+ level_width_floats, d - level * level_width_floats);
207
359
 
208
- active_indices[next_active] = idx;
209
- next_active += C::cmp(threshold, lower_bound) ? 1 : 0;
360
+ num_active = with_bool(
361
+ level == 0 && first_level_full, [&]<bool AllActive>() {
362
+ with_level_width(
363
+ actual_level_width, [&]<size_t LevelWidth>() {
364
+ compute_level_dot_kernel<
365
+ AllActive,
366
+ LevelWidth>(
367
+ query_level,
368
+ level_storage,
369
+ active_indices.data(),
370
+ num_active,
371
+ actual_level_width,
372
+ dot_buffer.data());
373
+ });
374
+
375
+ prune_kernel<AllActive, C, M>(
376
+ exact_distances.data(),
377
+ dot_buffer.data(),
378
+ level_cum_sums,
379
+ active_byteset.data(),
380
+ active_indices.data(),
381
+ (uint32_t)num_active,
382
+ query_cum_norm,
383
+ threshold);
384
+
385
+ return compact_active_kernel(
386
+ active_indices.data(),
387
+ active_byteset.data(),
388
+ num_active);
389
+ });
390
+
391
+ level_cum_sums += batch_size;
210
392
  }
211
393
 
212
- num_active = next_active;
213
- level_cum_sums += batch_size;
394
+ return num_active;
214
395
  }
396
+ #endif // SWIG
215
397
 
216
- return num_active;
217
- }
398
+ void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
399
+ };
218
400
  } // namespace faiss
219
401
 
220
402
  #endif
@@ -0,0 +1,93 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/PdxLayout.h>
9
+
10
+ #include <cstddef>
11
+ #include <cstring>
12
+
13
+ namespace faiss {
14
+ namespace detail {
15
+
16
+ void pdxify(
17
+ const float* Y,
18
+ int k,
19
+ int d_trail,
20
+ int pdx_block_size,
21
+ float* Y_pdx) {
22
+ const int n_full_blocks = d_trail / pdx_block_size;
23
+ const int tail = d_trail % pdx_block_size;
24
+ size_t offset = 0;
25
+ for (int b = 0; b < n_full_blocks; ++b) {
26
+ const size_t src_start = static_cast<size_t>(b) * pdx_block_size;
27
+ for (int j = 0; j < k; ++j) {
28
+ std::memcpy(
29
+ Y_pdx + offset,
30
+ Y + static_cast<size_t>(j) * d_trail + src_start,
31
+ pdx_block_size * sizeof(float));
32
+ offset += pdx_block_size;
33
+ }
34
+ }
35
+ if (tail > 0) {
36
+ const size_t src_start =
37
+ static_cast<size_t>(n_full_blocks) * pdx_block_size;
38
+ for (int j = 0; j < k; ++j) {
39
+ std::memcpy(
40
+ Y_pdx + offset,
41
+ Y + static_cast<size_t>(j) * d_trail + src_start,
42
+ tail * sizeof(float));
43
+ offset += tail;
44
+ }
45
+ }
46
+ }
47
+
48
+ void de_pdxify(
49
+ const float* Y_pdx,
50
+ int k,
51
+ int d_trail,
52
+ int pdx_block_size,
53
+ float* Y) {
54
+ const int n_full_blocks = d_trail / pdx_block_size;
55
+ const int tail = d_trail % pdx_block_size;
56
+ size_t offset = 0;
57
+ for (int b = 0; b < n_full_blocks; ++b) {
58
+ const size_t dst_start = static_cast<size_t>(b) * pdx_block_size;
59
+ for (int j = 0; j < k; ++j) {
60
+ std::memcpy(
61
+ Y + static_cast<size_t>(j) * d_trail + dst_start,
62
+ Y_pdx + offset,
63
+ pdx_block_size * sizeof(float));
64
+ offset += pdx_block_size;
65
+ }
66
+ }
67
+ if (tail > 0) {
68
+ const size_t dst_start =
69
+ static_cast<size_t>(n_full_blocks) * pdx_block_size;
70
+ for (int j = 0; j < k; ++j) {
71
+ std::memcpy(
72
+ Y + static_cast<size_t>(j) * d_trail + dst_start,
73
+ Y_pdx + offset,
74
+ tail * sizeof(float));
75
+ offset += tail;
76
+ }
77
+ }
78
+ }
79
+
80
+ void compute_partial_norms(const float* X, int n, int d, int p, float* norms) {
81
+ #pragma omp parallel for
82
+ for (int i = 0; i < n; ++i) {
83
+ float s = 0.0f;
84
+ const float* row = X + static_cast<size_t>(i) * d;
85
+ for (int m = 0; m < p; ++m) {
86
+ s += row[m] * row[m];
87
+ }
88
+ norms[i] = s;
89
+ }
90
+ }
91
+
92
+ } // namespace detail
93
+ } // namespace faiss
@@ -0,0 +1,41 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ namespace faiss {
11
+ namespace detail {
12
+
13
+ /** Reorder a row-major (k, d_trail) matrix into PDX block-column-major
14
+ * layout. Inside each block of `pdx_block_size` dims the layout is
15
+ * column-major across centroids, so all k centroids' values for the same
16
+ * dim are contiguous — the access pattern that makes progressive pruning
17
+ * cache-friendly. Trailing block (size `d_trail % pdx_block_size`) uses
18
+ * the same convention. `Y_pdx` must already be sized to `k * d_trail`. */
19
+ void pdxify(
20
+ const float* Y,
21
+ int k,
22
+ int d_trail,
23
+ int pdx_block_size,
24
+ float* Y_pdx);
25
+
26
+ /** Inverse of pdxify (used in tests for the bit-identical round-trip
27
+ * check). */
28
+ void de_pdxify(
29
+ const float* Y_pdx,
30
+ int k,
31
+ int d_trail,
32
+ int pdx_block_size,
33
+ float* Y);
34
+
35
+ /** norms[i] = sum_{m<p} X[i, m]^2 for row-major X of shape (n, d).
36
+ * Parallel over rows. Used by SuperKMeans to keep partial-norm caches
37
+ * in sync with the current d_prime. */
38
+ void compute_partial_norms(const float* X, int n, int d, int p, float* norms);
39
+
40
+ } // namespace detail
41
+ } // namespace faiss