faiss 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/ext/faiss/ext.cpp +1 -1
  4. data/ext/faiss/extconf.rb +4 -4
  5. data/ext/faiss/index.cpp +63 -45
  6. data/ext/faiss/index_binary.cpp +37 -27
  7. data/ext/faiss/kmeans.cpp +9 -8
  8. data/ext/faiss/pca_matrix.cpp +9 -7
  9. data/ext/faiss/product_quantizer.cpp +13 -11
  10. data/ext/faiss/utils.cpp +4 -2
  11. data/ext/faiss/utils.h +4 -0
  12. data/lib/faiss/version.rb +1 -1
  13. data/lib/faiss.rb +1 -1
  14. data/vendor/faiss/faiss/AutoTune.cpp +214 -82
  15. data/vendor/faiss/faiss/AutoTune.h +14 -1
  16. data/vendor/faiss/faiss/Clustering.cpp +97 -249
  17. data/vendor/faiss/faiss/Clustering.h +18 -0
  18. data/vendor/faiss/faiss/IVFlib.cpp +67 -44
  19. data/vendor/faiss/faiss/Index.cpp +25 -12
  20. data/vendor/faiss/faiss/Index.h +26 -4
  21. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  22. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  24. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  25. data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
  26. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  27. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  28. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  30. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
  31. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  32. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  33. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  34. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
  35. data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
  36. data/vendor/faiss/faiss/IndexFastScan.h +35 -24
  37. data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
  38. data/vendor/faiss/faiss/IndexFlat.h +32 -14
  39. data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
  40. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  41. data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
  42. data/vendor/faiss/faiss/IndexHNSW.h +30 -14
  43. data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
  44. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  45. data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
  46. data/vendor/faiss/faiss/IndexIVF.h +47 -16
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
  49. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
  50. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
  51. data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
  52. data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
  53. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  54. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
  55. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  56. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  57. data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
  58. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
  59. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  60. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
  61. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
  62. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
  63. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
  64. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
  65. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  66. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  67. data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
  68. data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
  69. data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
  70. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  71. data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
  72. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  73. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
  74. data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
  75. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  76. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  77. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  78. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  79. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  80. data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
  81. data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
  82. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
  83. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
  84. data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
  85. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  86. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  87. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  88. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  89. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  90. data/vendor/faiss/faiss/IndexShards.cpp +13 -13
  91. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  92. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  93. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  94. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  95. data/vendor/faiss/faiss/MetricType.h +29 -6
  96. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  97. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  98. data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
  99. data/vendor/faiss/faiss/VectorTransform.h +39 -16
  100. data/vendor/faiss/faiss/build.cpp +23 -0
  101. data/vendor/faiss/faiss/build.h +15 -0
  102. data/vendor/faiss/faiss/clone_index.cpp +55 -51
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  107. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
  108. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  109. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  110. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  111. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  113. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  118. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  119. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  120. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  130. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  132. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
  134. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  136. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
  139. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  140. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  141. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  142. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  143. data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
  144. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  145. data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
  146. data/vendor/faiss/faiss/impl/HNSW.h +21 -40
  147. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  148. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  149. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  150. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
  151. data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
  152. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  153. data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
  154. data/vendor/faiss/faiss/impl/NSG.h +20 -10
  155. data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
  156. data/vendor/faiss/faiss/impl/Panorama.h +265 -78
  157. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  158. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  159. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
  160. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  161. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  162. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  163. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
  164. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  165. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
  166. data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
  167. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
  168. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  169. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  170. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  171. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
  172. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  173. data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
  174. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
  175. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
  176. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  177. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  178. data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  181. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  182. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  183. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  184. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  185. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  191. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  192. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  193. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  194. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  195. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  196. data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
  197. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  198. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  199. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  203. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
  204. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  205. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  206. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  208. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  209. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  210. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
  211. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  212. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  213. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
  214. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  215. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  216. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  217. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  218. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  219. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  220. data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
  221. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
  222. data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
  223. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  225. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  226. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
  227. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  228. data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
  229. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  230. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  233. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  234. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  235. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  237. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
  238. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
  239. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
  240. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  241. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
  242. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
  243. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  244. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
  245. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  256. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
  257. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
  258. data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  260. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
  261. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  262. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  264. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
  265. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  266. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  267. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  268. data/vendor/faiss/faiss/index_factory.cpp +115 -28
  269. data/vendor/faiss/faiss/index_io.h +53 -3
  270. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
  271. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  272. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  273. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  274. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  275. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  276. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
  277. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  278. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  279. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  280. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  285. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  286. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  287. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  290. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
  291. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
  292. data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
  293. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  294. data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
  295. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  296. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  297. data/vendor/faiss/faiss/utils/distances.cpp +507 -559
  298. data/vendor/faiss/faiss/utils/distances.h +118 -1
  299. data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
  300. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  301. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  302. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  304. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  305. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  306. data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
  307. data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
  308. data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
  309. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  310. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  311. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  312. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  355. data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
  357. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  358. data/vendor/faiss/faiss/utils/utils.cpp +21 -14
  359. data/vendor/faiss/faiss/utils/utils.h +3 -3
  360. metadata +156 -42
  361. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  362. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  363. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  364. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
  366. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
  367. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  368. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  369. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  370. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  371. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  373. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  374. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
  375. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  376. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  377. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  378. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
  379. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -7,15 +7,15 @@
7
7
 
8
8
  #pragma once
9
9
 
10
+ #include <memory>
10
11
  #include <vector>
11
12
 
12
13
  #include <faiss/IndexIVFFastScan.h>
13
14
  #include <faiss/IndexIVFRaBitQ.h>
14
15
  #include <faiss/IndexRaBitQFastScan.h>
15
- #include <faiss/impl/RaBitQStats.h>
16
16
  #include <faiss/impl/RaBitQUtils.h>
17
17
  #include <faiss/impl/RaBitQuantizer.h>
18
- #include <faiss/impl/simd_result_handlers.h>
18
+ #include <faiss/impl/fast_scan/rabitq_result_handler.h>
19
19
  #include <faiss/utils/AlignedTable.h>
20
20
  #include <faiss/utils/Heap.h>
21
21
 
@@ -55,17 +55,6 @@ struct IndexIVFRaBitQFastScan : IndexIVFFastScan {
55
55
  /// Use zero-centered scalar quantizer for queries
56
56
  bool centered = false;
57
57
 
58
- /// Per-vector auxiliary data (1-bit codes stored separately in `codes`)
59
- ///
60
- /// 1-bit codes (sign bits) are stored in the inherited `codes` array from
61
- /// IndexFastScan in packed FastScan format for SIMD processing.
62
- ///
63
- /// This flat_storage holds per-vector factors and refinement-bit codes:
64
- /// Layout for 1-bit: [SignBitFactors (8 bytes)]
65
- /// Layout for multi-bit: [SignBitFactorsWithError
66
- /// (12B)][ref_codes][ExtraBitsFactors (8B)]
67
- std::vector<uint8_t> flat_storage;
68
-
69
58
  // Constructors
70
59
 
71
60
  IndexIVFRaBitQFastScan();
@@ -93,17 +82,25 @@ struct IndexIVFRaBitQFastScan : IndexIVFFastScan {
93
82
  uint8_t* codes,
94
83
  bool include_listnos = false) const override;
95
84
 
96
- protected:
97
- /// Extract and store RaBitQ factors from encoded vectors
98
- void preprocess_code_metadata(
99
- idx_t n,
100
- const uint8_t* flat_codes,
101
- idx_t start_global_idx) override;
85
+ /// Packed code size: (d + 7) / 8 bytes (1-bit-per-dimension sign bits,
86
+ /// excluding factors)
87
+ size_t fast_scan_code_size() const override;
102
88
 
89
+ protected:
103
90
  /// Return code_size as stride to skip embedded factor data during packing
104
91
  size_t code_packing_stride() const override;
105
92
 
106
93
  public:
94
+ /// Return CodePackerRaBitQ with enlarged block size
95
+ CodePacker* get_CodePacker() const override;
96
+
97
+ /// Write per-vector auxiliary data into block auxiliary region
98
+ void postprocess_packed_codes(
99
+ idx_t list_no,
100
+ size_t list_offset,
101
+ size_t n_added,
102
+ const uint8_t* flat_codes) override;
103
+
107
104
  /// Reconstruct a single vector from an inverted list
108
105
  void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
109
106
  const override;
@@ -111,18 +108,32 @@ struct IndexIVFRaBitQFastScan : IndexIVFFastScan {
111
108
  /// Override sa_decode to handle RaBitQ reconstruction
112
109
  void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
113
110
 
114
- /// Compute storage size per vector in flat_storage based on nb_bits
111
+ /// Compute per-vector auxiliary storage size based on nb_bits
115
112
  size_t compute_per_vector_storage_size() const;
116
113
 
117
- private:
118
- /// Compute query factors and lookup table for a residual vector
119
- /// (similar to IndexRaBitQFastScan::compute_float_LUT)
114
+ /// Override: compute and quantize LUT per-query to avoid O(n*nprobe*M*16)
115
+ /// float table allocation.
116
+ void compute_LUT_uint8(
117
+ size_t n,
118
+ const float* x,
119
+ const CoarseQuantized& cq,
120
+ AlignedTable<uint8_t>& dis_tables,
121
+ AlignedTable<uint16_t>& biases,
122
+ float* normalizers,
123
+ const FastScanDistancePostProcessing& context) const override;
124
+
125
+ /// Compute residual, query factors, and float LUT in two passes over d.
120
126
  void compute_residual_LUT(
121
- const float* residual,
127
+ const float* query,
128
+ idx_t centroid_id,
122
129
  QueryFactorsData& query_factors,
123
130
  float* lut_out,
124
- const float* original_query = nullptr) const;
131
+ uint8_t qb_param,
132
+ bool centered_param,
133
+ std::vector<float>& rotated_q,
134
+ std::vector<float>& centroid_buf) const;
125
135
 
136
+ private:
126
137
  /// Decode FastScan code to RaBitQ residual vector with explicit
127
138
  /// dp_multiplier
128
139
  void decode_fastscan_to_residual(
@@ -154,99 +165,246 @@ struct IndexIVFRaBitQFastScan : IndexIVFFastScan {
154
165
  const IVFSearchParameters* params = nullptr,
155
166
  IndexIVFStats* stats = nullptr) const override;
156
167
 
157
- /// Override to create RaBitQ-specific handlers
158
- SIMDResultHandlerToFloat* make_knn_handler(
168
+ /// RaBitQ scanner via rabitq_ivf_make_knn_scanner
169
+ std::unique_ptr<FastScanCodeScanner> make_knn_scanner(
159
170
  bool is_max,
160
- int /* impl */,
161
171
  idx_t n,
162
172
  idx_t k,
163
173
  float* distances,
164
174
  idx_t* labels,
165
175
  const IDSelector* sel,
166
- const FastScanDistancePostProcessing& context,
167
- const float* normalizers = nullptr) const override;
168
-
169
- /** SIMD result handler for IndexIVFRaBitQFastScan that applies
170
- * RaBitQ-specific distance corrections during batch processing.
171
- *
172
- * This handler processes batches of 32 distance computations from SIMD
173
- * kernels, applies RaBitQ distance formula adjustments (factors and
174
- * normalizers), and immediately updates result heaps. This eliminates the
175
- * need for post-processing and provides significant performance benefits.
176
- *
177
- * Key optimizations:
178
- * - Direct heap integration with no intermediate result storage
179
- * - Batch-level computation of normalizers and query factors
180
- * - Specialized handling for both centered and non-centered quantization
181
- * modes
182
- * - Efficient inner product metric corrections
183
- * - Uses runtime boolean for multi-bit mode
184
- *
185
- * @tparam C Comparator type (CMin/CMax) for heap operations
186
- */
176
+ int impl = 0,
177
+ const FastScanDistancePostProcessing& context = {}) const override;
178
+
179
+ /// Get an InvertedListScanner for single-query scanning.
180
+ /// This provides compatibility with the standard IVF search interface
181
+ InvertedListScanner* get_InvertedListScanner(
182
+ bool store_pairs = false,
183
+ const IDSelector* sel = nullptr,
184
+ const IVFSearchParameters* params = nullptr) const override;
185
+
186
+ /// RaBitQ-specific result handler (defined in impl/fast_scan/)
187
187
  template <class C>
188
- struct IVFRaBitQHeapHandler
189
- : simd_result_handlers::ResultHandlerCompare<C, true> {
190
- const IndexIVFRaBitQFastScan* index;
191
- float* heap_distances; // [nq * k]
192
- int64_t* heap_labels; // [nq * k]
193
- const size_t nq, k;
194
- size_t current_list_no = 0;
195
- std::vector<int>
196
- probe_indices; // probe index for each query in current batch
197
- const FastScanDistancePostProcessing*
198
- context; // Processing context with query factors
199
- const bool is_multibit; // Whether to use multi-bit two-stage search
200
-
201
- // Use float-based comparator for heap operations
202
- using Cfloat = typename std::conditional<
203
- C::is_max,
204
- CMax<float, int64_t>,
205
- CMin<float, int64_t>>::type;
206
-
207
- IVFRaBitQHeapHandler(
208
- const IndexIVFRaBitQFastScan* idx,
209
- size_t nq_val,
210
- size_t k_val,
211
- float* distances,
212
- int64_t* labels,
213
- const FastScanDistancePostProcessing* ctx = nullptr,
214
- bool multibit = false);
215
-
216
- void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1)
217
- override;
218
-
219
- /// Override base class virtual method to receive context information
220
- void set_list_context(size_t list_no, const std::vector<int>& probe_map)
221
- override;
222
-
223
- void begin(const float* norms) override;
224
-
225
- void end() override;
226
-
227
- private:
228
- /// Compute full multi-bit distance for a candidate vector (multi-bit
229
- /// only)
230
- /// @param db_idx Global database vector index
231
- /// @param local_q Batch-local query index (for probe_indices access)
232
- /// @param global_q Global query index (for storage indexing)
233
- /// @param local_offset Offset within the current inverted list
234
- float compute_full_multibit_distance(
235
- size_t db_idx,
236
- size_t local_q,
237
- size_t global_q,
238
- size_t local_offset) const;
239
-
240
- /// Compute lower bound using 1-bit distance and error bound (multi-bit
241
- /// only)
242
- /// @param local_q Batch-local query index (for probe_indices access)
243
- /// @param global_q Global query index (for storage indexing)
244
- float compute_lower_bound(
245
- float dist_1bit,
246
- size_t db_idx,
247
- size_t local_q,
248
- size_t global_q) const;
249
- };
188
+ using IVFRaBitQHeapHandler = simd_result_handlers::IVFRaBitQHeapHandler<C>;
250
189
  };
251
190
 
191
+ /*********************************************************
192
+ * IVFRaBitQHeapHandler method bodies — defined here (after
193
+ * IndexIVFRaBitQFastScan is complete) to break the circular
194
+ * dependency between rabitq_result_handler.h and this header.
195
+ *********************************************************/
196
+
197
+ namespace simd_result_handlers {
198
+
199
+ template <class C, SIMDLevel SL>
200
+ IVFRaBitQHeapHandler<C, SL>::IVFRaBitQHeapHandler(
201
+ const IndexIVFRaBitQFastScan* idx,
202
+ size_t nq_val,
203
+ size_t k_val,
204
+ float* distances,
205
+ int64_t* labels,
206
+ const IDSelector* sel,
207
+ const FastScanDistancePostProcessing* ctx,
208
+ bool multibit)
209
+ : ResultHandlerCompare<C, true, SL>(nq_val, 0, sel),
210
+ index(idx),
211
+ heap_distances(distances),
212
+ heap_labels(labels),
213
+ nq(nq_val),
214
+ k(k_val),
215
+ context(ctx),
216
+ is_multibit(multibit),
217
+ storage_size(idx->compute_per_vector_storage_size()),
218
+ packed_block_size(((idx->M2 + 1) / 2) * idx->bbs),
219
+ full_block_size(idx->get_block_stride()),
220
+ unpack_buf((idx->d + 7) / 8) {
221
+ current_list_no = 0;
222
+ probe_indices.clear();
223
+ for (int64_t q = 0; q < static_cast<int64_t>(nq); q++) {
224
+ heap_heapify<Cfloat>(k, heap_distances + q * k, heap_labels + q * k);
225
+ }
226
+ }
227
+
228
+ template <class C, SIMDLevel SL>
229
+ void IVFRaBitQHeapHandler<C, SL>::handle(
230
+ size_t q,
231
+ size_t b,
232
+ simd16uint16 d0,
233
+ simd16uint16 d1) {
234
+ size_t local_q = q;
235
+ this->adjust_with_origin(q, d0, d1);
236
+
237
+ ALIGNED(32) uint16_t d32tab[32];
238
+ d0.store(d32tab);
239
+ d1.store(d32tab + 16);
240
+
241
+ float* const heap_dis = heap_distances + q * k;
242
+ int64_t* const heap_ids = heap_labels + q * k;
243
+
244
+ FAISS_THROW_IF_NOT_FMT(
245
+ !probe_indices.empty() && local_q < probe_indices.size(),
246
+ "set_list_context() must be called before handle() - probe_indices size: %zu, local_q: %zu, global_q: %zu",
247
+ probe_indices.size(),
248
+ local_q,
249
+ q);
250
+
251
+ if (!context || !context->query_factors) {
252
+ FAISS_THROW_MSG(
253
+ "Query factors not available: FastScanDistancePostProcessing with query_factors required");
254
+ }
255
+
256
+ const size_t probe_rank = probe_indices[local_q];
257
+ const size_t storage_idx = q * cached_nprobe + probe_rank;
258
+ const auto& query_factors = context->query_factors[storage_idx];
259
+
260
+ const float one_a =
261
+ this->normalizers ? (1.0f / this->normalizers[2 * q]) : 1.0f;
262
+ const float bias = this->normalizers ? this->normalizers[2 * q + 1] : 0.0f;
263
+
264
+ const uint64_t idx_base = this->j0 + b * 32;
265
+ if (idx_base >= this->ntotal) {
266
+ return;
267
+ }
268
+ const size_t max_positions = std::min<size_t>(32, this->ntotal - idx_base);
269
+
270
+ // Hoist aux pointer base out of loop: all 32 elements in this block share
271
+ // the same block base. Only the per-element offset (j * storage_size)
272
+ // varies.
273
+ const uint8_t* aux_base = this->list_codes_ptr +
274
+ (idx_base / index->bbs) * full_block_size + packed_block_size;
275
+
276
+ // Cache index fields used in the inner loop.
277
+ // Use overridden qb/centered from context if provided, else index defaults.
278
+ const bool centered = context->qb > 0 ? context->centered : index->centered;
279
+ const size_t qb = context->qb > 0 ? context->qb : index->qb;
280
+ const size_t d = index->d;
281
+
282
+ for (size_t j = 0; j < max_positions; j++) {
283
+ const int64_t result_id = this->adjust_id(b, j);
284
+ if (result_id < 0) {
285
+ continue;
286
+ }
287
+ if (this->sel != nullptr && !this->sel->is_member(result_id)) {
288
+ continue;
289
+ }
290
+
291
+ this->scan_cnt++;
292
+
293
+ const float normalized_distance = d32tab[j] * one_a + bias;
294
+ const uint8_t* base_ptr = aux_base + j * storage_size;
295
+
296
+ if (is_multibit) {
297
+ const SignBitFactorsWithError& full_factors =
298
+ *reinterpret_cast<const SignBitFactorsWithError*>(base_ptr);
299
+
300
+ float dist_1bit = rabitq_utils::compute_1bit_adjusted_distance(
301
+ normalized_distance,
302
+ full_factors,
303
+ query_factors,
304
+ centered,
305
+ qb,
306
+ d);
307
+
308
+ bool should_refine = rabitq_utils::should_refine_candidate(
309
+ dist_1bit,
310
+ full_factors.f_error,
311
+ query_factors.g_error,
312
+ heap_dis[0],
313
+ is_similarity);
314
+ if (should_refine) {
315
+ size_t local_offset = idx_base + j;
316
+ float dist_full = compute_full_multibit_distance(
317
+ local_q, q, local_offset, base_ptr);
318
+ if (Cfloat::cmp(heap_dis[0], dist_full)) {
319
+ heap_replace_top<Cfloat>(
320
+ k, heap_dis, heap_ids, dist_full, result_id);
321
+ nup++;
322
+ }
323
+ }
324
+ } else {
325
+ const auto& db_factors =
326
+ *reinterpret_cast<const SignBitFactors*>(base_ptr);
327
+ float adjusted_distance =
328
+ rabitq_utils::compute_1bit_adjusted_distance(
329
+ normalized_distance,
330
+ db_factors,
331
+ query_factors,
332
+ centered,
333
+ qb,
334
+ d);
335
+ if (Cfloat::cmp(heap_dis[0], adjusted_distance)) {
336
+ heap_replace_top<Cfloat>(
337
+ k, heap_dis, heap_ids, adjusted_distance, result_id);
338
+ nup++;
339
+ }
340
+ }
341
+ }
342
+ }
343
+
344
+ template <class C, SIMDLevel SL>
345
+ void IVFRaBitQHeapHandler<C, SL>::set_list_context(
346
+ size_t list_no,
347
+ const std::vector<int>& probe_map) {
348
+ current_list_no = list_no;
349
+ probe_indices = probe_map;
350
+ cached_nprobe =
351
+ context && context->nprobe > 0 ? context->nprobe : index->nprobe;
352
+ is_similarity = index->metric_type == MetricType::METRIC_INNER_PRODUCT;
353
+ if (index->invlists) {
354
+ this->list_codes_ptr = index->invlists->get_codes(list_no);
355
+ }
356
+ }
357
+
358
+ template <class C, SIMDLevel SL>
359
+ void IVFRaBitQHeapHandler<C, SL>::begin(const float* norms) {
360
+ this->normalizers = norms;
361
+ }
362
+
363
+ template <class C, SIMDLevel SL>
364
+ void IVFRaBitQHeapHandler<C, SL>::end() {
365
+ #pragma omp parallel for
366
+ for (int64_t q = 0; q < static_cast<int64_t>(nq); q++) {
367
+ heap_reorder<Cfloat>(k, heap_distances + q * k, heap_labels + q * k);
368
+ }
369
+ }
370
+
371
+ template <class C, SIMDLevel SL>
372
+ float IVFRaBitQHeapHandler<C, SL>::compute_full_multibit_distance(
373
+ size_t local_q,
374
+ size_t global_q,
375
+ size_t local_offset,
376
+ const uint8_t* aux_ptr) {
377
+ const size_t ex_bits = index->rabitq.nb_bits - 1;
378
+ const size_t dim = index->d;
379
+
380
+ const size_t ex_code_size = (dim * ex_bits + 7) / 8;
381
+ const uint8_t* ex_code = aux_ptr + sizeof(SignBitFactorsWithError);
382
+ const ExtraBitsFactors& ex_fac = *reinterpret_cast<const ExtraBitsFactors*>(
383
+ aux_ptr + sizeof(SignBitFactorsWithError) + ex_code_size);
384
+
385
+ const size_t probe_rank = probe_indices[local_q];
386
+ const size_t storage_idx_val = global_q * cached_nprobe + probe_rank;
387
+ const auto& query_factors = context->query_factors[storage_idx_val];
388
+
389
+ rabitq_utils::unpack_sign_bits_from_packed(
390
+ this->list_codes_ptr,
391
+ index->bbs,
392
+ index->M2,
393
+ local_offset,
394
+ full_block_size,
395
+ unpack_buf.data());
396
+
397
+ return rabitq_utils::compute_full_multibit_distance(
398
+ unpack_buf.data(),
399
+ ex_code,
400
+ ex_fac,
401
+ query_factors.rotated_q.data(),
402
+ is_similarity ? query_factors.q_dot_c : query_factors.qr_to_c_L2sqr,
403
+ dim,
404
+ ex_bits,
405
+ index->metric_type);
406
+ }
407
+
408
+ } // namespace simd_result_handlers
409
+
252
410
  } // namespace faiss
@@ -10,6 +10,7 @@
10
10
  #include <faiss/IndexIVFSpectralHash.h>
11
11
 
12
12
  #include <algorithm>
13
+ #include <cmath>
13
14
  #include <cstdint>
14
15
  #include <memory>
15
16
 
@@ -20,27 +21,39 @@
20
21
  #include <faiss/impl/FaissAssert.h>
21
22
  #include <faiss/utils/hamming.h>
22
23
 
24
+ #include <faiss/impl/simd_dispatch.h>
25
+
26
+ // Scalar (NONE) fallback for dynamic dispatch
27
+ #define THE_SIMD_LEVEL SIMDLevel::NONE
28
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
29
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
30
+ #include <faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h>
31
+ #include <faiss/utils/hamming_distance/hamming_computer-generic.h>
32
+ #undef THE_SIMD_LEVEL
33
+
23
34
  namespace faiss {
24
35
 
25
36
  IndexIVFSpectralHash::IndexIVFSpectralHash(
26
- Index* quantizer,
27
- size_t d,
28
- size_t nlist,
29
- int nbit,
30
- float period,
31
- bool own_invlists)
37
+ Index* quantizer_in,
38
+ size_t d_in,
39
+ size_t nlist_in,
40
+ int nbit_in,
41
+ float period_in,
42
+ bool own_invlists_in)
32
43
  : IndexIVF(
33
- quantizer,
34
- d,
35
- nlist,
36
- (nbit + 7) / 8,
44
+ quantizer_in,
45
+ d_in,
46
+ nlist_in,
47
+ (nbit_in + 7) / 8,
37
48
  METRIC_L2,
38
- own_invlists),
39
- nbit(nbit),
40
- period(period) {
41
- RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
49
+ own_invlists_in),
50
+ nbit(nbit_in),
51
+ period(period_in) {
52
+ auto rr = std::make_unique<RandomRotationMatrix>(
53
+ static_cast<int>(d_in), nbit_in);
42
54
  rr->init(1234);
43
- vt = rr;
55
+ vt = rr.release();
56
+ own_fields = true;
44
57
  is_trained = false;
45
58
  by_residual = false;
46
59
  }
@@ -71,7 +84,7 @@ float median(size_t n, float* x) {
71
84
  void IndexIVFSpectralHash::train_encoder(
72
85
  idx_t n,
73
86
  const float* x,
74
- const idx_t* assign) {
87
+ const idx_t* /*assign*/) {
75
88
  if (!vt->is_trained) {
76
89
  vt->train(n, x);
77
90
  }
@@ -102,13 +115,13 @@ void IndexIVFSpectralHash::train_encoder(
102
115
  quantizer->assign(n, x, idx.get());
103
116
 
104
117
  std::vector<size_t> sizes(nlist + 1);
105
- for (size_t i = 0; i < n; i++) {
118
+ for (idx_t i = 0; i < n; i++) {
106
119
  FAISS_THROW_IF_NOT(idx[i] >= 0);
107
120
  sizes[idx[i]]++;
108
121
  }
109
122
 
110
123
  size_t ofs = 0;
111
- for (int j = 0; j < nlist; j++) {
124
+ for (size_t j = 0; j < nlist; j++) {
112
125
  size_t o0 = ofs;
113
126
  ofs += sizes[j];
114
127
  sizes[j] = o0;
@@ -120,9 +133,9 @@ void IndexIVFSpectralHash::train_encoder(
120
133
  // transpose + reorder
121
134
  std::unique_ptr<float[]> xo(new float[n * nbit]);
122
135
 
123
- for (size_t i = 0; i < n; i++) {
136
+ for (idx_t i = 0; i < n; i++) {
124
137
  size_t idest = sizes[idx[i]]++;
125
- for (size_t j = 0; j < nbit; j++) {
138
+ for (size_t j = 0; j < static_cast<size_t>(nbit); j++) {
126
139
  xo[idest + n * j] = xt[i * nbit + j];
127
140
  }
128
141
  }
@@ -130,7 +143,7 @@ void IndexIVFSpectralHash::train_encoder(
130
143
  trained.resize(n * nbit);
131
144
  // compute medians
132
145
  #pragma omp for
133
- for (int i = 0; i < nlist; i++) {
146
+ for (idx_t i = 0; i < static_cast<idx_t>(nlist); i++) {
134
147
  size_t i0 = i == 0 ? 0 : sizes[i - 1];
135
148
  size_t i1 = sizes[i];
136
149
  for (int j = 0; j < nbit; j++) {
@@ -157,7 +170,7 @@ void binarize_with_freq(
157
170
  memset(codes, 0, (nbit + 7) / 8);
158
171
  for (size_t i = 0; i < nbit; i++) {
159
172
  float xf = (x[i] - c[i]);
160
- int64_t xi = int64_t(floor(xf * freq));
173
+ int64_t xi = int64_t(std::floor(xf * freq));
161
174
  int64_t bit = xi & 1;
162
175
  codes[i >> 3] |= bit << (i & 7);
163
176
  }
@@ -205,114 +218,15 @@ void IndexIVFSpectralHash::encode_vectors(
205
218
  }
206
219
  }
207
220
 
208
- namespace {
209
-
210
- template <class HammingComputer>
211
- struct IVFScanner : InvertedListScanner {
212
- // copied from index structure
213
- const IndexIVFSpectralHash* index;
214
- size_t nbit;
215
-
216
- float period, freq;
217
- std::vector<float> q;
218
- std::vector<float> zero;
219
- std::vector<uint8_t> qcode;
220
- HammingComputer hc;
221
-
222
- IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
223
- : index(index),
224
- nbit(index->nbit),
225
- period(index->period),
226
- freq(2.0 / index->period),
227
- q(nbit),
228
- zero(nbit),
229
- qcode(index->code_size),
230
- hc(qcode.data(), index->code_size) {
231
- this->store_pairs = store_pairs;
232
- this->code_size = index->code_size;
233
- this->keep_max = is_similarity_metric(index->metric_type);
234
- }
235
-
236
- void set_query(const float* query) override {
237
- FAISS_THROW_IF_NOT(query);
238
- FAISS_THROW_IF_NOT(q.size() == nbit);
239
- index->vt->apply_noalloc(1, query, q.data());
240
-
241
- if (index->threshold_type == IndexIVFSpectralHash::Thresh_global) {
242
- binarize_with_freq(nbit, freq, q.data(), zero.data(), qcode.data());
243
- hc.set(qcode.data(), code_size);
244
- }
245
- }
246
-
247
- void set_list(idx_t list_no, float /*coarse_dis*/) override {
248
- this->list_no = list_no;
249
- if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
250
- const float* c = index->trained.data() + list_no * nbit;
251
- binarize_with_freq(nbit, freq, q.data(), c, qcode.data());
252
- hc.set(qcode.data(), code_size);
253
- }
254
- }
255
-
256
- float distance_to_code(const uint8_t* code) const final {
257
- return hc.hamming(code);
258
- }
259
-
260
- size_t scan_codes(
261
- size_t list_size,
262
- const uint8_t* codes,
263
- const idx_t* ids,
264
- float* simi,
265
- idx_t* idxi,
266
- size_t k) const override {
267
- size_t nup = 0;
268
- for (size_t j = 0; j < list_size; j++) {
269
- float dis = hc.hamming(codes);
270
-
271
- if (dis < simi[0]) {
272
- int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
273
- maxheap_replace_top(k, simi, idxi, dis, id);
274
- nup++;
275
- }
276
- codes += code_size;
277
- }
278
- return nup;
279
- }
280
-
281
- void scan_codes_range(
282
- size_t list_size,
283
- const uint8_t* codes,
284
- const idx_t* ids,
285
- float radius,
286
- RangeQueryResult& res) const override {
287
- for (size_t j = 0; j < list_size; j++) {
288
- float dis = hc.hamming(codes);
289
- if (dis < radius) {
290
- int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
291
- res.add(dis, id);
292
- }
293
- codes += code_size;
294
- }
295
- }
296
- };
297
-
298
- struct BuildScanner {
299
- using T = InvertedListScanner*;
300
-
301
- template <class HammingComputer>
302
- static T f(const IndexIVFSpectralHash* index, bool store_pairs) {
303
- return new IVFScanner<HammingComputer>(index, store_pairs);
304
- }
305
- };
306
-
307
- } // anonymous namespace
308
-
309
221
  InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
310
222
  bool store_pairs,
311
223
  const IDSelector* sel,
312
224
  const IVFSearchParameters*) const {
313
225
  FAISS_THROW_IF_NOT(!sel);
314
- BuildScanner bs;
315
- return dispatch_HammingComputer(code_size, bs, this, store_pairs);
226
+ return with_simd_level([&]<SIMDLevel SL>() {
227
+ return make_spectral_hash_scanner_fixSL<SL>(
228
+ code_size, this, store_pairs);
229
+ });
316
230
  }
317
231
 
318
232
  void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
@@ -323,8 +237,8 @@ void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
323
237
  }
324
238
  vt = vt_in;
325
239
  threshold_type = Thresh_global;
326
- is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
327
- vt->is_trained;
240
+ is_trained = quantizer->is_trained &&
241
+ quantizer->ntotal == static_cast<idx_t>(nlist) && vt->is_trained;
328
242
  own_fields = own;
329
243
  }
330
244
 
@@ -32,7 +32,7 @@ struct IndexIVFSpectralHash : IndexIVF {
32
32
  /// transformation from d to nbit dim
33
33
  VectorTransform* vt = nullptr;
34
34
  /// own the vt
35
- bool own_fields = true;
35
+ bool own_fields = false;
36
36
 
37
37
  /// nb of bits of the binary signature
38
38
  int nbit = 0;