faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -18,10 +18,190 @@
18
18
  #include <algorithm>
19
19
  #include <cstddef>
20
20
  #include <cstdint>
21
+ #include <cstring>
21
22
  #include <vector>
22
23
 
24
+ #if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
25
+ #include <immintrin.h>
26
+ #endif
27
+
23
28
  namespace faiss {
24
29
 
30
+ #ifndef SWIG
31
+
32
+ /// Compute dot products between query_level and active vectors.
33
+ ///
34
+ /// @tparam AllActive If true, vectors are at sequential positions 0..N-1
35
+ /// (first level, full batch). If false, positions come
36
+ /// from active_indices (subsequent levels after pruning).
37
+ /// @tparam LevelWidth Compile-time level width in floats (0 = use runtime
38
+ /// level_width_dims). Enables full loop unrolling.
39
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
40
+ template <bool AllActive = false, size_t LevelWidth = 0>
41
+ static inline void compute_level_dot_kernel(
42
+ const float* FAISS_RESTRICT query_level,
43
+ const float* FAISS_RESTRICT level_storage,
44
+ const uint32_t* active_indices,
45
+ const size_t num_active,
46
+ const size_t level_width_dims,
47
+ float* FAISS_RESTRICT dot_products) {
48
+ const size_t width = LevelWidth > 0 ? LevelWidth : level_width_dims;
49
+ size_t i = 0;
50
+ for (; i + 4 <= num_active; i += 4) {
51
+ const float* y0 = level_storage +
52
+ (AllActive ? (i + 0) : active_indices[i + 0]) * width;
53
+ const float* y1 = level_storage +
54
+ (AllActive ? (i + 1) : active_indices[i + 1]) * width;
55
+ const float* y2 = level_storage +
56
+ (AllActive ? (i + 2) : active_indices[i + 2]) * width;
57
+ const float* y3 = level_storage +
58
+ (AllActive ? (i + 3) : active_indices[i + 3]) * width;
59
+
60
+ float dp0 = 0, dp1 = 0, dp2 = 0, dp3 = 0;
61
+ FAISS_PRAGMA_IMPRECISE_LOOP
62
+ for (size_t j = 0; j < width; j++) {
63
+ float q = query_level[j];
64
+ dp0 += q * y0[j];
65
+ dp1 += q * y1[j];
66
+ dp2 += q * y2[j];
67
+ dp3 += q * y3[j];
68
+ }
69
+
70
+ dot_products[i + 0] = dp0;
71
+ dot_products[i + 1] = dp1;
72
+ dot_products[i + 2] = dp2;
73
+ dot_products[i + 3] = dp3;
74
+ }
75
+ for (; i < num_active; i++) {
76
+ const float* yj =
77
+ level_storage + (AllActive ? i : active_indices[i]) * width;
78
+ float dp = 0;
79
+ FAISS_PRAGMA_IMPRECISE_LOOP
80
+ for (size_t j = 0; j < width; j++) {
81
+ dp += query_level[j] * yj[j];
82
+ }
83
+ dot_products[i] = dp;
84
+ }
85
+ }
86
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
87
+
88
+ /// Update exact distances with the current level's dot products, then apply
89
+ /// Panorama pruning: for each active vector, compute a lower bound on
90
+ /// the final distance and mark it for removal if it cannot beat the current
91
+ /// threshold. Writes 0/1 into active_byteset for subsequent compaction.
92
+ ///
93
+ /// Uses `if constexpr` on C::is_max rather than C::cmp() to ensure the
94
+ /// comparison autovectorizes (C::cmp generates scalar function calls).
95
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
96
+ template <bool AllActive, typename C, MetricType M>
97
+ static inline void prune_kernel(
98
+ float* FAISS_RESTRICT exact_distances,
99
+ const float* FAISS_RESTRICT dot_buffer,
100
+ const float* FAISS_RESTRICT level_cum_sums,
101
+ uint8_t* FAISS_RESTRICT active_byteset,
102
+ const uint32_t* FAISS_RESTRICT active_indices,
103
+ const uint32_t num_active,
104
+ const float query_cum_norm,
105
+ const float threshold) {
106
+ FAISS_PRAGMA_IMPRECISE_LOOP
107
+ for (uint32_t i = 0; i < num_active; i++) {
108
+ uint32_t idx = AllActive ? i : active_indices[i];
109
+ if constexpr (M == METRIC_INNER_PRODUCT) {
110
+ exact_distances[idx] += dot_buffer[i];
111
+ } else {
112
+ exact_distances[idx] -= 2.0f * dot_buffer[i];
113
+ }
114
+
115
+ float cum_sum = level_cum_sums[idx];
116
+ float cauchy_schwarz_bound;
117
+ if constexpr (M == METRIC_INNER_PRODUCT) {
118
+ cauchy_schwarz_bound = -cum_sum * query_cum_norm;
119
+ } else {
120
+ cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
121
+ }
122
+
123
+ float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
124
+ if constexpr (C::is_max) {
125
+ active_byteset[i] = (threshold > lower_bound) ? 1 : 0;
126
+ } else {
127
+ active_byteset[i] = (threshold < lower_bound) ? 1 : 0;
128
+ }
129
+ }
130
+ }
131
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
132
+
133
+ /// Compact active_indices in-place, removing entries where active_byteset[i]
134
+ /// is zero. Returns the new count of active elements. Uses a branchless BMI2 +
135
+ /// AVX2 fast path (8 elements/iteration via _pext_u64 permutation) with a
136
+ /// scalar fallback for the tail and non-x86 platforms.
137
+ inline size_t compact_active_kernel(
138
+ uint32_t* active_indices,
139
+ const uint8_t* FAISS_RESTRICT active_byteset,
140
+ const size_t num_active) {
141
+ size_t next_active = 0;
142
+ size_t i = 0;
143
+
144
+ #if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
145
+ for (; i + 8 <= num_active; i += 8) {
146
+ uint64_t bytes;
147
+ memcpy(&bytes, &active_byteset[i], 8);
148
+
149
+ uint64_t expanded = bytes * 0xFFULL;
150
+ uint64_t packed = _pext_u64(0x0706050403020100ULL, expanded);
151
+
152
+ __m256i perm = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128((int64_t)packed));
153
+ __m256i data = _mm256_loadu_si256((const __m256i*)&active_indices[i]);
154
+ __m256i compacted = _mm256_permutevar8x32_epi32(data, perm);
155
+ _mm256_storeu_si256((__m256i*)&active_indices[next_active], compacted);
156
+
157
+ next_active += __builtin_popcountll(bytes);
158
+ }
159
+ #endif
160
+
161
+ for (; i < num_active; i++) {
162
+ active_indices[next_active] = active_indices[i];
163
+ next_active += active_byteset[i] ? 1 : 0;
164
+ }
165
+
166
+ return next_active;
167
+ }
168
+
169
+ /// Compile-time dispatch: converts a runtime `width` value into a template
170
+ /// parameter by generating an if-else chain over [Lo, Hi] in steps of Step.
171
+ /// Falls through to LevelWidth=0 (runtime path) if no specialization matches.
172
+ /// Allows for specialization of common level widths.
173
+ namespace detail {
174
+ template <size_t Lo, size_t Hi, size_t Step, typename Lambda>
175
+ inline auto dispatch_width(size_t width, Lambda&& fn) {
176
+ if constexpr (Lo > Hi) {
177
+ return fn.template operator()<0>();
178
+ } else {
179
+ if (width == Lo) {
180
+ return fn.template operator()<Lo>();
181
+ }
182
+ return dispatch_width<Lo + Step, Hi, Step>(
183
+ width, std::forward<Lambda>(fn));
184
+ }
185
+ }
186
+ } // namespace detail
187
+
188
+ /// Specialize for common float level widths (multiples of 8 up to 128).
189
+ template <typename LambdaType>
190
+ inline auto with_level_width(size_t width, LambdaType&& action) {
191
+ return detail::dispatch_width<8, 128, 8>(
192
+ width, std::forward<LambdaType>(action));
193
+ }
194
+
195
+ template <typename Lambda>
196
+ inline auto with_bool(bool value, Lambda&& fn) {
197
+ if (value) {
198
+ return fn.template operator()<true>();
199
+ } else {
200
+ return fn.template operator()<false>();
201
+ }
202
+ }
203
+ #endif // SWIG
204
+
25
205
  /**
26
206
  * Implements the core logic of Panorama-based refinement.
27
207
  * arXiv: https://arxiv.org/abs/2510.00566
@@ -42,6 +222,8 @@ namespace faiss {
42
222
  * accelerating the refinement stage.
43
223
  */
44
224
  struct Panorama {
225
+ static constexpr size_t kDefaultBatchSize = 128;
226
+
45
227
  size_t d = 0;
46
228
  size_t code_size = 0;
47
229
  size_t n_levels = 0;
@@ -98,6 +280,7 @@ struct Panorama {
98
280
  /// 4. After all levels, survivors are exact distances; update heap.
99
281
  /// This achieves early termination while maintaining SIMD-friendly
100
282
  /// sequential access patterns in the level-oriented storage layout.
283
+ #ifndef SWIG
101
284
  template <typename C, MetricType M>
102
285
  size_t progressive_filter_batch(
103
286
  const uint8_t* codes_base,
@@ -110,111 +293,99 @@ struct Panorama {
110
293
  const idx_t* ids,
111
294
  bool use_sel,
112
295
  std::vector<uint32_t>& active_indices,
296
+ std::vector<uint8_t>& active_byteset,
113
297
  std::vector<float>& exact_distances,
298
+ std::vector<float>& dot_buffer,
114
299
  float threshold,
115
- PanoramaStats& local_stats) const;
300
+ PanoramaStats& local_stats) const {
301
+ size_t batch_start = batch_no * batch_size;
302
+ size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
116
303
 
117
- void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
118
- };
304
+ size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
305
+ const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
306
+ const float* level_cum_sums = batch_cum_sums + batch_size;
307
+ float q_norm = query_cum_sums[0] * query_cum_sums[0];
119
308
 
120
- template <typename C, MetricType M>
121
- size_t Panorama::progressive_filter_batch(
122
- const uint8_t* codes_base,
123
- const float* cum_sums,
124
- const float* query,
125
- const float* query_cum_sums,
126
- size_t batch_no,
127
- size_t list_size,
128
- const IDSelector* sel,
129
- const idx_t* ids,
130
- bool use_sel,
131
- std::vector<uint32_t>& active_indices,
132
- std::vector<float>& exact_distances,
133
- float threshold,
134
- PanoramaStats& local_stats) const {
135
- size_t batch_start = batch_no * batch_size;
136
- size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
137
-
138
- size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
139
- const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
140
- const float* level_cum_sums = batch_cum_sums + batch_size;
141
- float q_norm = query_cum_sums[0] * query_cum_sums[0];
142
-
143
- size_t batch_offset = batch_no * batch_size * code_size;
144
- const uint8_t* storage_base = codes_base + batch_offset;
145
-
146
- // Initialize active set with ID-filtered vectors.
147
- size_t num_active = 0;
148
- for (size_t i = 0; i < curr_batch_size; i++) {
149
- size_t global_idx = batch_start + i;
150
- idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
151
- bool include = !use_sel || sel->is_member(id);
152
-
153
- active_indices[num_active] = i;
154
- float cum_sum = batch_cum_sums[i];
309
+ size_t batch_offset = batch_no * batch_size * code_size;
310
+ const uint8_t* storage_base = codes_base + batch_offset;
155
311
 
156
- if constexpr (M == METRIC_INNER_PRODUCT) {
157
- exact_distances[i] = 0.0f;
158
- } else {
159
- exact_distances[i] = cum_sum * cum_sum + q_norm;
160
- }
312
+ // Initialize active set with ID-filtered vectors.
313
+ size_t num_active = 0;
314
+ for (size_t i = 0; i < curr_batch_size; i++) {
315
+ size_t global_idx = batch_start + i;
316
+ idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
317
+ bool include = !use_sel || sel->is_member(id);
161
318
 
162
- num_active += include;
163
- }
164
-
165
- if (num_active == 0) {
166
- return 0;
167
- }
168
-
169
- size_t total_active = num_active;
170
- for (size_t level = 0; level < n_levels; level++) {
171
- local_stats.total_dims_scanned += num_active;
172
- local_stats.total_dims += total_active;
319
+ active_indices[num_active] = i;
320
+ float cum_sum = batch_cum_sums[i];
173
321
 
174
- float query_cum_norm = query_cum_sums[level + 1];
175
-
176
- size_t level_offset = level * level_width * batch_size;
177
- const float* level_storage =
178
- (const float*)(storage_base + level_offset);
322
+ if constexpr (M == METRIC_INNER_PRODUCT) {
323
+ exact_distances[i] = 0.0f;
324
+ } else {
325
+ exact_distances[i] = cum_sum * cum_sum + q_norm;
326
+ }
179
327
 
180
- size_t next_active = 0;
181
- for (size_t i = 0; i < num_active; i++) {
182
- uint32_t idx = active_indices[i];
183
- size_t actual_level_width = std::min(
184
- level_width_floats, d - level * level_width_floats);
328
+ num_active += include;
329
+ }
185
330
 
186
- const float* yj = level_storage + idx * actual_level_width;
187
- const float* query_level = query + level * level_width_floats;
331
+ size_t total_active = num_active;
332
+ const bool first_level_full = (num_active == curr_batch_size);
188
333
 
189
- float dot_product =
190
- fvec_inner_product(query_level, yj, actual_level_width);
334
+ local_stats.total_dims += total_active * n_levels;
191
335
 
192
- if constexpr (M == METRIC_INNER_PRODUCT) {
193
- exact_distances[idx] += dot_product;
194
- } else {
195
- exact_distances[idx] -= 2.0f * dot_product;
196
- }
336
+ for (size_t level = 0; (level < n_levels) && (num_active > 0);
337
+ level++) {
338
+ local_stats.total_dims_scanned += num_active;
197
339
 
198
- float cum_sum = level_cum_sums[idx];
199
- float cauchy_schwarz_bound;
200
- if constexpr (M == METRIC_INNER_PRODUCT) {
201
- cauchy_schwarz_bound = -cum_sum * query_cum_norm;
202
- } else {
203
- cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
204
- }
340
+ float query_cum_norm = query_cum_sums[level + 1];
205
341
 
206
- float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
342
+ size_t level_offset = level * level_width * batch_size;
343
+ const float* level_storage =
344
+ (const float*)(storage_base + level_offset);
345
+ const float* query_level = query + level * level_width_floats;
346
+ size_t actual_level_width = std::min(
347
+ level_width_floats, d - level * level_width_floats);
207
348
 
208
- active_indices[next_active] = idx;
209
- next_active += C::cmp(threshold, lower_bound) ? 1 : 0;
349
+ num_active = with_bool(
350
+ level == 0 && first_level_full, [&]<bool AllActive>() {
351
+ with_level_width(
352
+ actual_level_width, [&]<size_t LevelWidth>() {
353
+ compute_level_dot_kernel<
354
+ AllActive,
355
+ LevelWidth>(
356
+ query_level,
357
+ level_storage,
358
+ active_indices.data(),
359
+ num_active,
360
+ actual_level_width,
361
+ dot_buffer.data());
362
+ });
363
+
364
+ prune_kernel<AllActive, C, M>(
365
+ exact_distances.data(),
366
+ dot_buffer.data(),
367
+ level_cum_sums,
368
+ active_byteset.data(),
369
+ active_indices.data(),
370
+ (uint32_t)num_active,
371
+ query_cum_norm,
372
+ threshold);
373
+
374
+ return compact_active_kernel(
375
+ active_indices.data(),
376
+ active_byteset.data(),
377
+ num_active);
378
+ });
379
+
380
+ level_cum_sums += batch_size;
210
381
  }
211
382
 
212
- num_active = next_active;
213
- level_cum_sums += batch_size;
383
+ return num_active;
214
384
  }
385
+ #endif // SWIG
215
386
 
216
- return num_active;
217
- }
387
+ void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
388
+ };
218
389
  } // namespace faiss
219
390
 
220
391
  #endif
@@ -0,0 +1,93 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/PdxLayout.h>
9
+
10
+ #include <cstddef>
11
+ #include <cstring>
12
+
13
+ namespace faiss {
14
+ namespace detail {
15
+
16
+ void pdxify(
17
+ const float* Y,
18
+ int k,
19
+ int d_trail,
20
+ int pdx_block_size,
21
+ float* Y_pdx) {
22
+ const int n_full_blocks = d_trail / pdx_block_size;
23
+ const int tail = d_trail % pdx_block_size;
24
+ size_t offset = 0;
25
+ for (int b = 0; b < n_full_blocks; ++b) {
26
+ const size_t src_start = static_cast<size_t>(b) * pdx_block_size;
27
+ for (int j = 0; j < k; ++j) {
28
+ std::memcpy(
29
+ Y_pdx + offset,
30
+ Y + static_cast<size_t>(j) * d_trail + src_start,
31
+ pdx_block_size * sizeof(float));
32
+ offset += pdx_block_size;
33
+ }
34
+ }
35
+ if (tail > 0) {
36
+ const size_t src_start =
37
+ static_cast<size_t>(n_full_blocks) * pdx_block_size;
38
+ for (int j = 0; j < k; ++j) {
39
+ std::memcpy(
40
+ Y_pdx + offset,
41
+ Y + static_cast<size_t>(j) * d_trail + src_start,
42
+ tail * sizeof(float));
43
+ offset += tail;
44
+ }
45
+ }
46
+ }
47
+
48
+ void de_pdxify(
49
+ const float* Y_pdx,
50
+ int k,
51
+ int d_trail,
52
+ int pdx_block_size,
53
+ float* Y) {
54
+ const int n_full_blocks = d_trail / pdx_block_size;
55
+ const int tail = d_trail % pdx_block_size;
56
+ size_t offset = 0;
57
+ for (int b = 0; b < n_full_blocks; ++b) {
58
+ const size_t dst_start = static_cast<size_t>(b) * pdx_block_size;
59
+ for (int j = 0; j < k; ++j) {
60
+ std::memcpy(
61
+ Y + static_cast<size_t>(j) * d_trail + dst_start,
62
+ Y_pdx + offset,
63
+ pdx_block_size * sizeof(float));
64
+ offset += pdx_block_size;
65
+ }
66
+ }
67
+ if (tail > 0) {
68
+ const size_t dst_start =
69
+ static_cast<size_t>(n_full_blocks) * pdx_block_size;
70
+ for (int j = 0; j < k; ++j) {
71
+ std::memcpy(
72
+ Y + static_cast<size_t>(j) * d_trail + dst_start,
73
+ Y_pdx + offset,
74
+ tail * sizeof(float));
75
+ offset += tail;
76
+ }
77
+ }
78
+ }
79
+
80
+ void compute_partial_norms(const float* X, int n, int d, int p, float* norms) {
81
+ #pragma omp parallel for
82
+ for (int i = 0; i < n; ++i) {
83
+ float s = 0.0f;
84
+ const float* row = X + static_cast<size_t>(i) * d;
85
+ for (int m = 0; m < p; ++m) {
86
+ s += row[m] * row[m];
87
+ }
88
+ norms[i] = s;
89
+ }
90
+ }
91
+
92
+ } // namespace detail
93
+ } // namespace faiss
@@ -0,0 +1,41 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ namespace faiss {
11
+ namespace detail {
12
+
13
+ /** Reorder a row-major (k, d_trail) matrix into PDX block-column-major
14
+ * layout. Inside each block of `pdx_block_size` dims the layout is
15
+ * column-major across centroids, so all k centroids' values for the same
16
+ * dim are contiguous — the access pattern that makes progressive pruning
17
+ * cache-friendly. Trailing block (size `d_trail % pdx_block_size`) uses
18
+ * the same convention. `Y_pdx` must already be sized to `k * d_trail`. */
19
+ void pdxify(
20
+ const float* Y,
21
+ int k,
22
+ int d_trail,
23
+ int pdx_block_size,
24
+ float* Y_pdx);
25
+
26
+ /** Inverse of pdxify (used in tests for the bit-identical round-trip
27
+ * check). */
28
+ void de_pdxify(
29
+ const float* Y_pdx,
30
+ int k,
31
+ int d_trail,
32
+ int pdx_block_size,
33
+ float* Y);
34
+
35
+ /** norms[i] = sum_{m<p} X[i, m]^2 for row-major X of shape (n, d).
36
+ * Parallel over rows. Used by SuperKMeans to keep partial-norm caches
37
+ * in sync with the current d_prime. */
38
+ void compute_partial_norms(const float* X, int n, int d, int p, float* norms);
39
+
40
+ } // namespace detail
41
+ } // namespace faiss