faiss 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/ext/faiss/ext.cpp +1 -1
  4. data/ext/faiss/extconf.rb +4 -4
  5. data/ext/faiss/index.cpp +63 -45
  6. data/ext/faiss/index_binary.cpp +37 -27
  7. data/ext/faiss/kmeans.cpp +9 -8
  8. data/ext/faiss/pca_matrix.cpp +9 -7
  9. data/ext/faiss/product_quantizer.cpp +13 -11
  10. data/ext/faiss/utils.cpp +4 -2
  11. data/ext/faiss/utils.h +4 -0
  12. data/lib/faiss/version.rb +1 -1
  13. data/lib/faiss.rb +1 -1
  14. data/vendor/faiss/faiss/AutoTune.cpp +214 -82
  15. data/vendor/faiss/faiss/AutoTune.h +14 -1
  16. data/vendor/faiss/faiss/Clustering.cpp +97 -249
  17. data/vendor/faiss/faiss/Clustering.h +18 -0
  18. data/vendor/faiss/faiss/IVFlib.cpp +67 -44
  19. data/vendor/faiss/faiss/Index.cpp +25 -12
  20. data/vendor/faiss/faiss/Index.h +26 -4
  21. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  22. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  24. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  25. data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
  26. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  27. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  28. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  30. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
  31. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  32. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  33. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  34. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
  35. data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
  36. data/vendor/faiss/faiss/IndexFastScan.h +35 -24
  37. data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
  38. data/vendor/faiss/faiss/IndexFlat.h +32 -14
  39. data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
  40. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  41. data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
  42. data/vendor/faiss/faiss/IndexHNSW.h +30 -14
  43. data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
  44. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  45. data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
  46. data/vendor/faiss/faiss/IndexIVF.h +47 -16
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
  49. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
  50. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
  51. data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
  52. data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
  53. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  54. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
  55. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  56. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  57. data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
  58. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
  59. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  60. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
  61. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
  62. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
  63. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
  64. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
  65. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  66. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  67. data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
  68. data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
  69. data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
  70. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  71. data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
  72. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  73. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
  74. data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
  75. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  76. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  77. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  78. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  79. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  80. data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
  81. data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
  82. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
  83. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
  84. data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
  85. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  86. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  87. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  88. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  89. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  90. data/vendor/faiss/faiss/IndexShards.cpp +13 -13
  91. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  92. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  93. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  94. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  95. data/vendor/faiss/faiss/MetricType.h +29 -6
  96. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  97. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  98. data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
  99. data/vendor/faiss/faiss/VectorTransform.h +39 -16
  100. data/vendor/faiss/faiss/build.cpp +23 -0
  101. data/vendor/faiss/faiss/build.h +15 -0
  102. data/vendor/faiss/faiss/clone_index.cpp +55 -51
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  107. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
  108. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  109. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  110. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  111. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  113. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  118. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  119. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  120. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  130. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  132. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
  134. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  136. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
  139. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  140. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  141. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  142. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  143. data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
  144. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  145. data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
  146. data/vendor/faiss/faiss/impl/HNSW.h +21 -40
  147. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  148. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  149. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  150. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
  151. data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
  152. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  153. data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
  154. data/vendor/faiss/faiss/impl/NSG.h +20 -10
  155. data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
  156. data/vendor/faiss/faiss/impl/Panorama.h +265 -78
  157. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  158. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  159. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
  160. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  161. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  162. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  163. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
  164. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  165. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
  166. data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
  167. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
  168. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  169. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  170. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  171. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
  172. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  173. data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
  174. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
  175. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
  176. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  177. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  178. data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  181. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  182. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  183. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  184. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  185. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  191. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  192. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  193. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  194. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  195. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  196. data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
  197. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  198. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  199. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  203. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
  204. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  205. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  206. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  208. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  209. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  210. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
  211. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  212. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  213. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
  214. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  215. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  216. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  217. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  218. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  219. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  220. data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
  221. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
  222. data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
  223. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  225. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  226. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
  227. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  228. data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
  229. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  230. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  233. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  234. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  235. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  237. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
  238. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
  239. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
  240. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  241. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
  242. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
  243. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  244. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
  245. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  256. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
  257. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
  258. data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  260. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
  261. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  262. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  264. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
  265. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  266. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  267. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  268. data/vendor/faiss/faiss/index_factory.cpp +115 -28
  269. data/vendor/faiss/faiss/index_io.h +53 -3
  270. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
  271. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  272. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  273. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  274. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  275. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  276. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
  277. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  278. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  279. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  280. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  285. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  286. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  287. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  290. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
  291. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
  292. data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
  293. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  294. data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
  295. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  296. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  297. data/vendor/faiss/faiss/utils/distances.cpp +507 -559
  298. data/vendor/faiss/faiss/utils/distances.h +118 -1
  299. data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
  300. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  301. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  302. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  304. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  305. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  306. data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
  307. data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
  308. data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
  309. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  310. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  311. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  312. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  355. data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
  357. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  358. data/vendor/faiss/faiss/utils/utils.cpp +21 -14
  359. data/vendor/faiss/faiss/utils/utils.h +3 -3
  360. metadata +156 -42
  361. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  362. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  363. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  364. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
  366. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
  367. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  368. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  369. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  370. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  371. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  373. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  374. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
  375. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  376. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  377. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  378. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
  379. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -10,6 +10,7 @@
10
10
  #ifndef FAISS_PANORAMA_H
11
11
  #define FAISS_PANORAMA_H
12
12
 
13
+ #include <faiss/MetricType.h>
13
14
  #include <faiss/impl/IDSelector.h>
14
15
  #include <faiss/impl/PanoramaStats.h>
15
16
  #include <faiss/utils/distances.h>
@@ -17,10 +18,190 @@
17
18
  #include <algorithm>
18
19
  #include <cstddef>
19
20
  #include <cstdint>
21
+ #include <cstring>
20
22
  #include <vector>
21
23
 
24
+ #if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
25
+ #include <immintrin.h>
26
+ #endif
27
+
22
28
  namespace faiss {
23
29
 
30
+ #ifndef SWIG
31
+
32
+ /// Compute dot products between query_level and active vectors.
33
+ ///
34
+ /// @tparam AllActive If true, vectors are at sequential positions 0..N-1
35
+ /// (first level, full batch). If false, positions come
36
+ /// from active_indices (subsequent levels after pruning).
37
+ /// @tparam LevelWidth Compile-time level width in floats (0 = use runtime
38
+ /// level_width_dims). Enables full loop unrolling.
39
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
40
+ template <bool AllActive = false, size_t LevelWidth = 0>
41
+ static inline void compute_level_dot_kernel(
42
+ const float* FAISS_RESTRICT query_level,
43
+ const float* FAISS_RESTRICT level_storage,
44
+ const uint32_t* active_indices,
45
+ const size_t num_active,
46
+ const size_t level_width_dims,
47
+ float* FAISS_RESTRICT dot_products) {
48
+ const size_t width = LevelWidth > 0 ? LevelWidth : level_width_dims;
49
+ size_t i = 0;
50
+ for (; i + 4 <= num_active; i += 4) {
51
+ const float* y0 = level_storage +
52
+ (AllActive ? (i + 0) : active_indices[i + 0]) * width;
53
+ const float* y1 = level_storage +
54
+ (AllActive ? (i + 1) : active_indices[i + 1]) * width;
55
+ const float* y2 = level_storage +
56
+ (AllActive ? (i + 2) : active_indices[i + 2]) * width;
57
+ const float* y3 = level_storage +
58
+ (AllActive ? (i + 3) : active_indices[i + 3]) * width;
59
+
60
+ float dp0 = 0, dp1 = 0, dp2 = 0, dp3 = 0;
61
+ FAISS_PRAGMA_IMPRECISE_LOOP
62
+ for (size_t j = 0; j < width; j++) {
63
+ float q = query_level[j];
64
+ dp0 += q * y0[j];
65
+ dp1 += q * y1[j];
66
+ dp2 += q * y2[j];
67
+ dp3 += q * y3[j];
68
+ }
69
+
70
+ dot_products[i + 0] = dp0;
71
+ dot_products[i + 1] = dp1;
72
+ dot_products[i + 2] = dp2;
73
+ dot_products[i + 3] = dp3;
74
+ }
75
+ for (; i < num_active; i++) {
76
+ const float* yj =
77
+ level_storage + (AllActive ? i : active_indices[i]) * width;
78
+ float dp = 0;
79
+ FAISS_PRAGMA_IMPRECISE_LOOP
80
+ for (size_t j = 0; j < width; j++) {
81
+ dp += query_level[j] * yj[j];
82
+ }
83
+ dot_products[i] = dp;
84
+ }
85
+ }
86
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
87
+
88
+ /// Update exact distances with the current level's dot products, then apply
89
+ /// Panorama pruning: for each active vector, compute a lower bound on
90
+ /// the final distance and mark it for removal if it cannot beat the current
91
+ /// threshold. Writes 0/1 into active_byteset for subsequent compaction.
92
+ ///
93
+ /// Uses `if constexpr` on C::is_max rather than C::cmp() to ensure the
94
+ /// comparison autovectorizes (C::cmp generates scalar function calls).
95
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
96
+ template <bool AllActive, typename C, MetricType M>
97
+ static inline void prune_kernel(
98
+ float* FAISS_RESTRICT exact_distances,
99
+ const float* FAISS_RESTRICT dot_buffer,
100
+ const float* FAISS_RESTRICT level_cum_sums,
101
+ uint8_t* FAISS_RESTRICT active_byteset,
102
+ const uint32_t* FAISS_RESTRICT active_indices,
103
+ const uint32_t num_active,
104
+ const float query_cum_norm,
105
+ const float threshold) {
106
+ FAISS_PRAGMA_IMPRECISE_LOOP
107
+ for (uint32_t i = 0; i < num_active; i++) {
108
+ uint32_t idx = AllActive ? i : active_indices[i];
109
+ if constexpr (M == METRIC_INNER_PRODUCT) {
110
+ exact_distances[idx] += dot_buffer[i];
111
+ } else {
112
+ exact_distances[idx] -= 2.0f * dot_buffer[i];
113
+ }
114
+
115
+ float cum_sum = level_cum_sums[idx];
116
+ float cauchy_schwarz_bound;
117
+ if constexpr (M == METRIC_INNER_PRODUCT) {
118
+ cauchy_schwarz_bound = -cum_sum * query_cum_norm;
119
+ } else {
120
+ cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
121
+ }
122
+
123
+ float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
124
+ if constexpr (C::is_max) {
125
+ active_byteset[i] = (threshold > lower_bound) ? 1 : 0;
126
+ } else {
127
+ active_byteset[i] = (threshold < lower_bound) ? 1 : 0;
128
+ }
129
+ }
130
+ }
131
+ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
132
+
133
+ /// Compact active_indices in-place, removing entries where active_byteset[i]
134
+ /// is zero. Returns the new count of active elements. Uses a branchless BMI2 +
135
+ /// AVX2 fast path (8 elements/iteration via _pext_u64 permutation) with a
136
+ /// scalar fallback for the tail and non-x86 platforms.
137
+ inline size_t compact_active_kernel(
138
+ uint32_t* active_indices,
139
+ const uint8_t* FAISS_RESTRICT active_byteset,
140
+ const size_t num_active) {
141
+ size_t next_active = 0;
142
+ size_t i = 0;
143
+
144
+ #if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
145
+ for (; i + 8 <= num_active; i += 8) {
146
+ uint64_t bytes;
147
+ memcpy(&bytes, &active_byteset[i], 8);
148
+
149
+ uint64_t expanded = bytes * 0xFFULL;
150
+ uint64_t packed = _pext_u64(0x0706050403020100ULL, expanded);
151
+
152
+ __m256i perm = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128((int64_t)packed));
153
+ __m256i data = _mm256_loadu_si256((const __m256i*)&active_indices[i]);
154
+ __m256i compacted = _mm256_permutevar8x32_epi32(data, perm);
155
+ _mm256_storeu_si256((__m256i*)&active_indices[next_active], compacted);
156
+
157
+ next_active += __builtin_popcountll(bytes);
158
+ }
159
+ #endif
160
+
161
+ for (; i < num_active; i++) {
162
+ active_indices[next_active] = active_indices[i];
163
+ next_active += active_byteset[i] ? 1 : 0;
164
+ }
165
+
166
+ return next_active;
167
+ }
168
+
169
+ /// Compile-time dispatch: converts a runtime `width` value into a template
170
+ /// parameter by generating an if-else chain over [Lo, Hi] in steps of Step.
171
+ /// Falls through to LevelWidth=0 (runtime path) if no specialization matches.
172
+ /// Allows for specialization of common level widths.
173
+ namespace detail {
174
+ template <size_t Lo, size_t Hi, size_t Step, typename Lambda>
175
+ inline auto dispatch_width(size_t width, Lambda&& fn) {
176
+ if constexpr (Lo > Hi) {
177
+ return fn.template operator()<0>();
178
+ } else {
179
+ if (width == Lo) {
180
+ return fn.template operator()<Lo>();
181
+ }
182
+ return dispatch_width<Lo + Step, Hi, Step>(
183
+ width, std::forward<Lambda>(fn));
184
+ }
185
+ }
186
+ } // namespace detail
187
+
188
+ /// Specialize for common float level widths (multiples of 8 up to 128).
189
+ template <typename LambdaType>
190
+ inline auto with_level_width(size_t width, LambdaType&& action) {
191
+ return detail::dispatch_width<8, 128, 8>(
192
+ width, std::forward<LambdaType>(action));
193
+ }
194
+
195
+ template <typename Lambda>
196
+ inline auto with_bool(bool value, Lambda&& fn) {
197
+ if (value) {
198
+ return fn.template operator()<true>();
199
+ } else {
200
+ return fn.template operator()<false>();
201
+ }
202
+ }
203
+ #endif // SWIG
204
+
24
205
  /**
25
206
  * Implements the core logic of Panorama-based refinement.
26
207
  * arXiv: https://arxiv.org/abs/2510.00566
@@ -41,6 +222,8 @@ namespace faiss {
41
222
  * accelerating the refinement stage.
42
223
  */
43
224
  struct Panorama {
225
+ static constexpr size_t kDefaultBatchSize = 128;
226
+
44
227
  size_t d = 0;
45
228
  size_t code_size = 0;
46
229
  size_t n_levels = 0;
@@ -67,7 +250,7 @@ struct Panorama {
67
250
  float* cumsum_base,
68
251
  size_t offset,
69
252
  size_t n_entry,
70
- const float* vectors);
253
+ const float* vectors) const;
71
254
 
72
255
  /// Compute the cumulative sums of the query vector.
73
256
  void compute_query_cum_sums(const float* query, float* query_cum_sums)
@@ -97,7 +280,8 @@ struct Panorama {
97
280
  /// 4. After all levels, survivors are exact distances; update heap.
98
281
  /// This achieves early termination while maintaining SIMD-friendly
99
282
  /// sequential access patterns in the level-oriented storage layout.
100
- template <typename C>
283
+ #ifndef SWIG
284
+ template <typename C, MetricType M>
101
285
  size_t progressive_filter_batch(
102
286
  const uint8_t* codes_base,
103
287
  const float* cum_sums,
@@ -109,96 +293,99 @@ struct Panorama {
109
293
  const idx_t* ids,
110
294
  bool use_sel,
111
295
  std::vector<uint32_t>& active_indices,
296
+ std::vector<uint8_t>& active_byteset,
112
297
  std::vector<float>& exact_distances,
298
+ std::vector<float>& dot_buffer,
113
299
  float threshold,
114
- PanoramaStats& local_stats) const;
115
-
116
- void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
117
- };
118
-
119
- template <typename C>
120
- size_t Panorama::progressive_filter_batch(
121
- const uint8_t* codes_base,
122
- const float* cum_sums,
123
- const float* query,
124
- const float* query_cum_sums,
125
- size_t batch_no,
126
- size_t list_size,
127
- const IDSelector* sel,
128
- const idx_t* ids,
129
- bool use_sel,
130
- std::vector<uint32_t>& active_indices,
131
- std::vector<float>& exact_distances,
132
- float threshold,
133
- PanoramaStats& local_stats) const {
134
- size_t batch_start = batch_no * batch_size;
135
- size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
136
-
137
- size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
138
- const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
139
- const float* level_cum_sums = batch_cum_sums + batch_size;
140
- float q_norm = query_cum_sums[0] * query_cum_sums[0];
141
-
142
- size_t batch_offset = batch_no * batch_size * code_size;
143
- const uint8_t* storage_base = codes_base + batch_offset;
144
-
145
- // Initialize active set with ID-filtered vectors.
146
- size_t num_active = 0;
147
- for (size_t i = 0; i < curr_batch_size; i++) {
148
- size_t global_idx = batch_start + i;
149
- idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
150
- bool include = !use_sel || sel->is_member(id);
151
-
152
- active_indices[num_active] = i;
153
- float cum_sum = batch_cum_sums[i];
154
- exact_distances[i] = cum_sum * cum_sum + q_norm;
155
-
156
- num_active += include;
157
- }
300
+ PanoramaStats& local_stats) const {
301
+ size_t batch_start = batch_no * batch_size;
302
+ size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
303
+
304
+ size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
305
+ const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
306
+ const float* level_cum_sums = batch_cum_sums + batch_size;
307
+ float q_norm = query_cum_sums[0] * query_cum_sums[0];
308
+
309
+ size_t batch_offset = batch_no * batch_size * code_size;
310
+ const uint8_t* storage_base = codes_base + batch_offset;
311
+
312
+ // Initialize active set with ID-filtered vectors.
313
+ size_t num_active = 0;
314
+ for (size_t i = 0; i < curr_batch_size; i++) {
315
+ size_t global_idx = batch_start + i;
316
+ idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
317
+ bool include = !use_sel || sel->is_member(id);
318
+
319
+ active_indices[num_active] = i;
320
+ float cum_sum = batch_cum_sums[i];
321
+
322
+ if constexpr (M == METRIC_INNER_PRODUCT) {
323
+ exact_distances[i] = 0.0f;
324
+ } else {
325
+ exact_distances[i] = cum_sum * cum_sum + q_norm;
326
+ }
327
+
328
+ num_active += include;
329
+ }
158
330
 
159
- if (num_active == 0) {
160
- return 0;
161
- }
331
+ size_t total_active = num_active;
332
+ const bool first_level_full = (num_active == curr_batch_size);
162
333
 
163
- size_t total_active = num_active;
164
- for (size_t level = 0; level < n_levels; level++) {
165
- local_stats.total_dims_scanned += num_active;
166
- local_stats.total_dims += total_active;
334
+ local_stats.total_dims += total_active * n_levels;
167
335
 
168
- float query_cum_norm = query_cum_sums[level + 1];
336
+ for (size_t level = 0; (level < n_levels) && (num_active > 0);
337
+ level++) {
338
+ local_stats.total_dims_scanned += num_active;
169
339
 
170
- size_t level_offset = level * level_width * batch_size;
171
- const float* level_storage =
172
- (const float*)(storage_base + level_offset);
340
+ float query_cum_norm = query_cum_sums[level + 1];
173
341
 
174
- size_t next_active = 0;
175
- for (size_t i = 0; i < num_active; i++) {
176
- uint32_t idx = active_indices[i];
342
+ size_t level_offset = level * level_width * batch_size;
343
+ const float* level_storage =
344
+ (const float*)(storage_base + level_offset);
345
+ const float* query_level = query + level * level_width_floats;
177
346
  size_t actual_level_width = std::min(
178
347
  level_width_floats, d - level * level_width_floats);
179
348
 
180
- const float* yj = level_storage + idx * actual_level_width;
181
- const float* query_level = query + level * level_width_floats;
182
-
183
- float dot_product =
184
- fvec_inner_product(query_level, yj, actual_level_width);
185
-
186
- exact_distances[idx] -= 2.0f * dot_product;
187
-
188
- float cum_sum = level_cum_sums[idx];
189
- float cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
190
- float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
191
-
192
- active_indices[next_active] = idx;
193
- next_active += C::cmp(threshold, lower_bound) ? 1 : 0;
349
+ num_active = with_bool(
350
+ level == 0 && first_level_full, [&]<bool AllActive>() {
351
+ with_level_width(
352
+ actual_level_width, [&]<size_t LevelWidth>() {
353
+ compute_level_dot_kernel<
354
+ AllActive,
355
+ LevelWidth>(
356
+ query_level,
357
+ level_storage,
358
+ active_indices.data(),
359
+ num_active,
360
+ actual_level_width,
361
+ dot_buffer.data());
362
+ });
363
+
364
+ prune_kernel<AllActive, C, M>(
365
+ exact_distances.data(),
366
+ dot_buffer.data(),
367
+ level_cum_sums,
368
+ active_byteset.data(),
369
+ active_indices.data(),
370
+ (uint32_t)num_active,
371
+ query_cum_norm,
372
+ threshold);
373
+
374
+ return compact_active_kernel(
375
+ active_indices.data(),
376
+ active_byteset.data(),
377
+ num_active);
378
+ });
379
+
380
+ level_cum_sums += batch_size;
194
381
  }
195
382
 
196
- num_active = next_active;
197
- level_cum_sums += batch_size;
383
+ return num_active;
198
384
  }
385
+ #endif // SWIG
199
386
 
200
- return num_active;
201
- }
387
+ void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
388
+ };
202
389
  } // namespace faiss
203
390
 
204
391
  #endif
@@ -0,0 +1,93 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/PdxLayout.h>
9
+
10
+ #include <cstddef>
11
+ #include <cstring>
12
+
13
+ namespace faiss {
14
+ namespace detail {
15
+
16
+ void pdxify(
17
+ const float* Y,
18
+ int k,
19
+ int d_trail,
20
+ int pdx_block_size,
21
+ float* Y_pdx) {
22
+ const int n_full_blocks = d_trail / pdx_block_size;
23
+ const int tail = d_trail % pdx_block_size;
24
+ size_t offset = 0;
25
+ for (int b = 0; b < n_full_blocks; ++b) {
26
+ const size_t src_start = static_cast<size_t>(b) * pdx_block_size;
27
+ for (int j = 0; j < k; ++j) {
28
+ std::memcpy(
29
+ Y_pdx + offset,
30
+ Y + static_cast<size_t>(j) * d_trail + src_start,
31
+ pdx_block_size * sizeof(float));
32
+ offset += pdx_block_size;
33
+ }
34
+ }
35
+ if (tail > 0) {
36
+ const size_t src_start =
37
+ static_cast<size_t>(n_full_blocks) * pdx_block_size;
38
+ for (int j = 0; j < k; ++j) {
39
+ std::memcpy(
40
+ Y_pdx + offset,
41
+ Y + static_cast<size_t>(j) * d_trail + src_start,
42
+ tail * sizeof(float));
43
+ offset += tail;
44
+ }
45
+ }
46
+ }
47
+
48
+ void de_pdxify(
49
+ const float* Y_pdx,
50
+ int k,
51
+ int d_trail,
52
+ int pdx_block_size,
53
+ float* Y) {
54
+ const int n_full_blocks = d_trail / pdx_block_size;
55
+ const int tail = d_trail % pdx_block_size;
56
+ size_t offset = 0;
57
+ for (int b = 0; b < n_full_blocks; ++b) {
58
+ const size_t dst_start = static_cast<size_t>(b) * pdx_block_size;
59
+ for (int j = 0; j < k; ++j) {
60
+ std::memcpy(
61
+ Y + static_cast<size_t>(j) * d_trail + dst_start,
62
+ Y_pdx + offset,
63
+ pdx_block_size * sizeof(float));
64
+ offset += pdx_block_size;
65
+ }
66
+ }
67
+ if (tail > 0) {
68
+ const size_t dst_start =
69
+ static_cast<size_t>(n_full_blocks) * pdx_block_size;
70
+ for (int j = 0; j < k; ++j) {
71
+ std::memcpy(
72
+ Y + static_cast<size_t>(j) * d_trail + dst_start,
73
+ Y_pdx + offset,
74
+ tail * sizeof(float));
75
+ offset += tail;
76
+ }
77
+ }
78
+ }
79
+
80
+ void compute_partial_norms(const float* X, int n, int d, int p, float* norms) {
81
+ #pragma omp parallel for
82
+ for (int i = 0; i < n; ++i) {
83
+ float s = 0.0f;
84
+ const float* row = X + static_cast<size_t>(i) * d;
85
+ for (int m = 0; m < p; ++m) {
86
+ s += row[m] * row[m];
87
+ }
88
+ norms[i] = s;
89
+ }
90
+ }
91
+
92
+ } // namespace detail
93
+ } // namespace faiss
@@ -0,0 +1,41 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ namespace faiss {
11
+ namespace detail {
12
+
13
+ /** Reorder a row-major (k, d_trail) matrix into PDX block-column-major
14
+ * layout. Inside each block of `pdx_block_size` dims the layout is
15
+ * column-major across centroids, so all k centroids' values for the same
16
+ * dim are contiguous — the access pattern that makes progressive pruning
17
+ * cache-friendly. Trailing block (size `d_trail % pdx_block_size`) uses
18
+ * the same convention. `Y_pdx` must already be sized to `k * d_trail`. */
19
+ void pdxify(
20
+ const float* Y,
21
+ int k,
22
+ int d_trail,
23
+ int pdx_block_size,
24
+ float* Y_pdx);
25
+
26
+ /** Inverse of pdxify (used in tests for the bit-identical round-trip
27
+ * check). */
28
+ void de_pdxify(
29
+ const float* Y_pdx,
30
+ int k,
31
+ int d_trail,
32
+ int pdx_block_size,
33
+ float* Y);
34
+
35
+ /** norms[i] = sum_{m<p} X[i, m]^2 for row-major X of shape (n, d).
36
+ * Parallel over rows. Used by SuperKMeans to keep partial-norm caches
37
+ * in sync with the current d_prime. */
38
+ void compute_partial_norms(const float* X, int n, int d, int p, float* norms);
39
+
40
+ } // namespace detail
41
+ } // namespace faiss