faiss 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/ext/faiss/ext.cpp +1 -1
  4. data/ext/faiss/extconf.rb +4 -4
  5. data/ext/faiss/index.cpp +63 -45
  6. data/ext/faiss/index_binary.cpp +37 -27
  7. data/ext/faiss/kmeans.cpp +9 -8
  8. data/ext/faiss/pca_matrix.cpp +9 -7
  9. data/ext/faiss/product_quantizer.cpp +13 -11
  10. data/ext/faiss/utils.cpp +4 -2
  11. data/ext/faiss/utils.h +4 -0
  12. data/lib/faiss/version.rb +1 -1
  13. data/lib/faiss.rb +1 -1
  14. data/vendor/faiss/faiss/AutoTune.cpp +214 -82
  15. data/vendor/faiss/faiss/AutoTune.h +14 -1
  16. data/vendor/faiss/faiss/Clustering.cpp +97 -249
  17. data/vendor/faiss/faiss/Clustering.h +18 -0
  18. data/vendor/faiss/faiss/IVFlib.cpp +67 -44
  19. data/vendor/faiss/faiss/Index.cpp +25 -12
  20. data/vendor/faiss/faiss/Index.h +26 -4
  21. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  22. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  24. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  25. data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
  26. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  27. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  28. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  30. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
  31. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  32. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  33. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  34. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
  35. data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
  36. data/vendor/faiss/faiss/IndexFastScan.h +35 -24
  37. data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
  38. data/vendor/faiss/faiss/IndexFlat.h +32 -14
  39. data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
  40. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  41. data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
  42. data/vendor/faiss/faiss/IndexHNSW.h +30 -14
  43. data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
  44. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  45. data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
  46. data/vendor/faiss/faiss/IndexIVF.h +47 -16
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
  49. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
  50. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
  51. data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
  52. data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
  53. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  54. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
  55. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  56. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  57. data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
  58. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
  59. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  60. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
  61. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
  62. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
  63. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
  64. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
  65. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  66. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  67. data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
  68. data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
  69. data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
  70. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  71. data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
  72. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  73. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
  74. data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
  75. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  76. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  77. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  78. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  79. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  80. data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
  81. data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
  82. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
  83. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
  84. data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
  85. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  86. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  87. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  88. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  89. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  90. data/vendor/faiss/faiss/IndexShards.cpp +13 -13
  91. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  92. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  93. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  94. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  95. data/vendor/faiss/faiss/MetricType.h +29 -6
  96. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  97. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  98. data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
  99. data/vendor/faiss/faiss/VectorTransform.h +39 -16
  100. data/vendor/faiss/faiss/build.cpp +23 -0
  101. data/vendor/faiss/faiss/build.h +15 -0
  102. data/vendor/faiss/faiss/clone_index.cpp +55 -51
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  107. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
  108. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  109. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  110. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  111. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  113. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  118. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  119. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  120. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  130. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  132. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
  134. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  136. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
  139. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  140. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  141. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  142. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  143. data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
  144. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  145. data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
  146. data/vendor/faiss/faiss/impl/HNSW.h +21 -40
  147. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  148. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  149. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  150. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
  151. data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
  152. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  153. data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
  154. data/vendor/faiss/faiss/impl/NSG.h +20 -10
  155. data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
  156. data/vendor/faiss/faiss/impl/Panorama.h +265 -78
  157. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  158. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  159. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
  160. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  161. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  162. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  163. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
  164. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  165. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
  166. data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
  167. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
  168. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  169. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  170. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  171. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
  172. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  173. data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
  174. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
  175. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
  176. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  177. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  178. data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  181. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  182. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  183. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  184. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  185. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  191. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  192. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  193. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  194. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  195. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  196. data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
  197. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  198. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  199. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  203. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
  204. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  205. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  206. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  208. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  209. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  210. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
  211. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  212. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  213. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
  214. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  215. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  216. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  217. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  218. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  219. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  220. data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
  221. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
  222. data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
  223. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  225. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  226. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
  227. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  228. data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
  229. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  230. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  233. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  234. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  235. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  237. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
  238. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
  239. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
  240. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  241. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
  242. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
  243. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  244. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
  245. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  256. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
  257. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
  258. data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  260. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
  261. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  262. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  264. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
  265. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  266. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  267. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  268. data/vendor/faiss/faiss/index_factory.cpp +115 -28
  269. data/vendor/faiss/faiss/index_io.h +53 -3
  270. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
  271. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  272. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  273. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  274. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  275. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  276. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
  277. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  278. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  279. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  280. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  285. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  286. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  287. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  290. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
  291. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
  292. data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
  293. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  294. data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
  295. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  296. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  297. data/vendor/faiss/faiss/utils/distances.cpp +507 -559
  298. data/vendor/faiss/faiss/utils/distances.h +118 -1
  299. data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
  300. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  301. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  302. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  304. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  305. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  306. data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
  307. data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
  308. data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
  309. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  310. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  311. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  312. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  355. data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
  357. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  358. data/vendor/faiss/faiss/utils/utils.cpp +21 -14
  359. data/vendor/faiss/faiss/utils/utils.h +3 -3
  360. metadata +156 -42
  361. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  362. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  363. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  364. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
  366. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
  367. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  368. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  369. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  370. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  371. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  373. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  374. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
  375. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  376. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  377. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  378. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
  379. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -1,186 +0,0 @@
1
- /*
2
- * Copyright (c) Meta Platforms, Inc. and affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- #pragma once
9
-
10
- #include <faiss/impl/platform_macros.h>
11
-
12
- // This directory contains functions to compute a distance
13
- // from a given PQ code to a query vector, given that the
14
- // distances to a query vector for pq.M codebooks are precomputed.
15
- //
16
- // The code was originally the part of IndexIVFPQ.cpp.
17
- // The baseline implementation can be found in
18
- // code_distance-generic.h, distance_single_code_generic().
19
-
20
- // The reason for this somewhat unusual structure is that
21
- // custom implementations may need to fall off to generic
22
- // implementation in certain cases. So, say, avx2 header file
23
- // needs to reference the generic header file. This is
24
- // why the names of the functions for custom implementations
25
- // have this _generic or _avx2 suffix.
26
-
27
- #ifdef __AVX2__
28
-
29
- #include <faiss/impl/code_distance/code_distance-avx2.h>
30
-
31
- namespace faiss {
32
-
33
- template <typename PQDecoderT>
34
- inline float distance_single_code(
35
- // number of subquantizers
36
- const size_t M,
37
- // number of bits per quantization index
38
- const size_t nbits,
39
- // precomputed distances, layout (M, ksub)
40
- const float* sim_table,
41
- // the code
42
- const uint8_t* code) {
43
- return distance_single_code_avx2<PQDecoderT>(M, nbits, sim_table, code);
44
- }
45
-
46
- template <typename PQDecoderT>
47
- inline void distance_four_codes(
48
- // number of subquantizers
49
- const size_t M,
50
- // number of bits per quantization index
51
- const size_t nbits,
52
- // precomputed distances, layout (M, ksub)
53
- const float* sim_table,
54
- // codes
55
- const uint8_t* __restrict code0,
56
- const uint8_t* __restrict code1,
57
- const uint8_t* __restrict code2,
58
- const uint8_t* __restrict code3,
59
- // computed distances
60
- float& result0,
61
- float& result1,
62
- float& result2,
63
- float& result3) {
64
- distance_four_codes_avx2<PQDecoderT>(
65
- M,
66
- nbits,
67
- sim_table,
68
- code0,
69
- code1,
70
- code2,
71
- code3,
72
- result0,
73
- result1,
74
- result2,
75
- result3);
76
- }
77
-
78
- } // namespace faiss
79
-
80
- #elif defined(__ARM_FEATURE_SVE)
81
-
82
- #include <faiss/impl/code_distance/code_distance-sve.h>
83
-
84
- namespace faiss {
85
-
86
- template <typename PQDecoderT>
87
- inline float distance_single_code(
88
- // the product quantizer
89
- const size_t M,
90
- // number of bits per quantization index
91
- const size_t nbits,
92
- // precomputed distances, layout (M, ksub)
93
- const float* sim_table,
94
- // the code
95
- const uint8_t* code) {
96
- return distance_single_code_sve<PQDecoderT>(M, nbits, sim_table, code);
97
- }
98
-
99
- template <typename PQDecoderT>
100
- inline void distance_four_codes(
101
- // the product quantizer
102
- const size_t M,
103
- // number of bits per quantization index
104
- const size_t nbits,
105
- // precomputed distances, layout (M, ksub)
106
- const float* sim_table,
107
- // codes
108
- const uint8_t* __restrict code0,
109
- const uint8_t* __restrict code1,
110
- const uint8_t* __restrict code2,
111
- const uint8_t* __restrict code3,
112
- // computed distances
113
- float& result0,
114
- float& result1,
115
- float& result2,
116
- float& result3) {
117
- distance_four_codes_sve<PQDecoderT>(
118
- M,
119
- nbits,
120
- sim_table,
121
- code0,
122
- code1,
123
- code2,
124
- code3,
125
- result0,
126
- result1,
127
- result2,
128
- result3);
129
- }
130
-
131
- } // namespace faiss
132
-
133
- #else
134
-
135
- #include <faiss/impl/code_distance/code_distance-generic.h>
136
-
137
- namespace faiss {
138
-
139
- template <typename PQDecoderT>
140
- inline float distance_single_code(
141
- // number of subquantizers
142
- const size_t M,
143
- // number of bits per quantization index
144
- const size_t nbits,
145
- // precomputed distances, layout (M, ksub)
146
- const float* sim_table,
147
- // the code
148
- const uint8_t* code) {
149
- return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
150
- }
151
-
152
- template <typename PQDecoderT>
153
- inline void distance_four_codes(
154
- // number of subquantizers
155
- const size_t M,
156
- // number of bits per quantization index
157
- const size_t nbits,
158
- // precomputed distances, layout (M, ksub)
159
- const float* sim_table,
160
- // codes
161
- const uint8_t* __restrict code0,
162
- const uint8_t* __restrict code1,
163
- const uint8_t* __restrict code2,
164
- const uint8_t* __restrict code3,
165
- // computed distances
166
- float& result0,
167
- float& result1,
168
- float& result2,
169
- float& result3) {
170
- distance_four_codes_generic<PQDecoderT>(
171
- M,
172
- nbits,
173
- sim_table,
174
- code0,
175
- code1,
176
- code2,
177
- code3,
178
- result0,
179
- result1,
180
- result2,
181
- result3);
182
- }
183
-
184
- } // namespace faiss
185
-
186
- #endif
@@ -1,216 +0,0 @@
1
- /*
2
- * Copyright (c) Meta Platforms, Inc. and affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- #pragma once
9
-
10
- #include <cstdint>
11
- #include <cstdlib>
12
-
13
- #include <faiss/impl/CodePacker.h>
14
-
15
- /** PQ4 SIMD packing and accumulation functions
16
- *
17
- * The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
18
- * and produces an output matrix for that. It is interesting for nq * nb <= 4,
19
- * otherwise register spilling becomes too large.
20
- *
21
- * The implementation of these functions is spread over 3 cpp files to reduce
22
- * parallel compile times. Templates are instantiated explicitly.
23
- */
24
-
25
- namespace faiss {
26
-
27
- struct NormTableScaler;
28
- struct SIMDResultHandler;
29
-
30
- /** Pack codes for consumption by the SIMD kernels.
31
- * The unused bytes are set to 0.
32
- *
33
- * @param codes input codes, size (ntotal, ceil(M / 2))
34
- * @param ntotal number of input codes
35
- * @param nb output number of codes (ntotal rounded up to a multiple of
36
- * bbs)
37
- * @param nsq number of sub-quantizers (=M rounded up to a multiple of 2)
38
- * @param bbs size of database blocks (multiple of 32)
39
- * @param blocks output array, size nb * nsq / 2.
40
- * @param code_stride optional stride between consecutive codes (0 = use
41
- default (M + 1) / 2)
42
- */
43
- void pq4_pack_codes(
44
- const uint8_t* codes,
45
- size_t ntotal,
46
- size_t M,
47
- size_t nb,
48
- size_t bbs,
49
- size_t nsq,
50
- uint8_t* blocks,
51
- size_t code_stride = 0);
52
-
53
- /** Same as pack_codes but write in a given range of the output,
54
- * leaving the rest untouched. Assumes allocated entries are 0 on input.
55
- *
56
- * @param codes input codes, size (i1 - i0, ceil(M / 2))
57
- * @param i0 first output code to write
58
- * @param i1 last output code to write
59
- * @param blocks output array, size at least ceil(i1 / bbs) * bbs * nsq / 2
60
- * @param code_stride optional stride between consecutive codes (0 = use
61
- * default (M + 1) / 2)
62
- */
63
- void pq4_pack_codes_range(
64
- const uint8_t* codes,
65
- size_t M,
66
- size_t i0,
67
- size_t i1,
68
- size_t bbs,
69
- size_t nsq,
70
- uint8_t* blocks,
71
- size_t code_stride = 0);
72
-
73
- /** get a single element from a packed codes table
74
- *
75
- * @param vector_id vector id
76
- * @param sq subquantizer (< nsq)
77
- */
78
- uint8_t pq4_get_packed_element(
79
- const uint8_t* data,
80
- size_t bbs,
81
- size_t nsq,
82
- size_t vector_id,
83
- size_t sq);
84
-
85
- /** set a single element "code" into a packed codes table
86
- *
87
- * @param vector_id vector id
88
- * @param sq subquantizer (< nsq)
89
- */
90
- void pq4_set_packed_element(
91
- uint8_t* data,
92
- uint8_t code,
93
- size_t bbs,
94
- size_t nsq,
95
- size_t vector_id,
96
- size_t sq);
97
-
98
- /** CodePacker API for the PQ4 fast-scan */
99
- struct CodePackerPQ4 : CodePacker {
100
- size_t nsq;
101
-
102
- CodePackerPQ4(size_t nsq, size_t bbs);
103
-
104
- void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
105
- const final;
106
- void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
107
- const final;
108
- };
109
-
110
- /** Pack Look-up table for consumption by the kernel.
111
- *
112
- * @param nq number of queries
113
- * @param nsq number of sub-quantizers (multiple of 2)
114
- * @param src input array, size (nq, 16)
115
- * @param dest output array, size (nq, 16)
116
- */
117
- void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
118
-
119
- /** Loop over database elements and accumulate results into result handler
120
- *
121
- * @param nq number of queries
122
- * @param nb number of database elements
123
- * @param bbs size of database blocks (multiple of 32)
124
- * @param nsq number of sub-quantizers (multiple of 2)
125
- * @param codes packed codes array
126
- * @param LUT packed look-up table
127
- * @param scaler scaler to scale the encoded norm
128
- */
129
- void pq4_accumulate_loop(
130
- int nq,
131
- size_t nb,
132
- int bbs,
133
- int nsq,
134
- const uint8_t* codes,
135
- const uint8_t* LUT,
136
- SIMDResultHandler& res,
137
- const NormTableScaler* scaler);
138
-
139
- /* qbs versions, supported only for bbs=32.
140
- *
141
- * The kernel function runs the kernel for *several* query blocks
142
- * and bbs database vectors. The sizes of the blocks are encoded in qbs as
143
- * base-16 digits.
144
- *
145
- * For example, qbs = 0x1223 means that the kernel will be run 4 times, the
146
- * first time with 3 query vectors, second time with 2 query vectors, then 2
147
- * vectors again and finally with 1 query vector. The output block will thus be
148
- * nq = 3 + 2 + 2 + 1 = 6 queries. For a given total block size, the optimal
149
- * decomposition into sub-blocks (measured empirically) is given by
150
- * preferred_qbs().
151
- */
152
-
153
- /* compute the number of queries from a base-16 decomposition */
154
- int pq4_qbs_to_nq(int qbs);
155
-
156
- /** return the preferred decomposition in blocks for a nb of queries. */
157
- int pq4_preferred_qbs(int nq);
158
-
159
- /** Pack Look-up table for consumption by the kernel.
160
- *
161
- * @param qbs 4-bit encoded number of query blocks, the total number of
162
- * queries handled (nq) is deduced from it
163
- * @param nsq number of sub-quantizers (multiple of 2)
164
- * @param src input array, size (nq, 16)
165
- * @param dest output array, size (nq, 16)
166
- * @return nq
167
- */
168
- int pq4_pack_LUT_qbs(int fqbs, int nsq, const uint8_t* src, uint8_t* dest);
169
-
170
- /** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
171
- */
172
- int pq4_pack_LUT_qbs_q_map(
173
- int qbs,
174
- int nsq,
175
- const uint8_t* src,
176
- const int* q_map,
177
- uint8_t* dest);
178
-
179
- /** Run accumulation loop.
180
- *
181
- * @param qbs 4-bit encoded number of queries
182
- * @param nb number of database codes (multiple of bbs)
183
- * @param nsq number of sub-quantizers
184
- * @param codes encoded database vectors (packed)
185
- * @param LUT look-up table (packed)
186
- * @param res call-back for the results
187
- * @param scaler scaler to scale the encoded norm
188
- */
189
- void pq4_accumulate_loop_qbs(
190
- int qbs,
191
- size_t nb,
192
- int nsq,
193
- const uint8_t* codes,
194
- const uint8_t* LUT,
195
- SIMDResultHandler& res,
196
- const NormTableScaler* scaler = nullptr);
197
-
198
- /** Wrapper of pq4_accumulate_loop_qbs using simple StoreResultHandler
199
- * and DummyScaler
200
- *
201
- * @param nq number of queries
202
- * @param ntotal2 number of database elements (multiple of 32)
203
- * @param nsq number of sub-quantizers (muliple of 2)
204
- * @param codes packed codes array
205
- * @param LUT packed look-up table
206
- * @param accu array to store the results
207
- */
208
- void accumulate_to_mem(
209
- int nq,
210
- size_t ntotal2,
211
- int nsq,
212
- const uint8_t* codes,
213
- const uint8_t* LUT,
214
- uint16_t* accu);
215
-
216
- } // namespace faiss
@@ -1,224 +0,0 @@
1
- /*
2
- * Copyright (c) Meta Platforms, Inc. and affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- #include <faiss/impl/pq4_fast_scan.h>
9
-
10
- #include <faiss/impl/FaissAssert.h>
11
- #include <faiss/impl/LookupTableScaler.h>
12
- #include <faiss/impl/simd_result_handlers.h>
13
-
14
- namespace faiss {
15
-
16
- using namespace simd_result_handlers;
17
-
18
- /***************************************************************
19
- * accumulation functions
20
- ***************************************************************/
21
-
22
- namespace {
23
-
24
- /*
25
- * The computation kernel
26
- * It accumulates results for NQ queries and BB * 32 database elements
27
- * writes results in a ResultHandler
28
- */
29
-
30
- template <int NQ, int BB, class ResultHandler, class Scaler>
31
- void kernel_accumulate_block(
32
- int nsq,
33
- const uint8_t* codes,
34
- const uint8_t* LUT,
35
- ResultHandler& res,
36
- const Scaler& scaler) {
37
- // distance accumulators
38
- simd16uint16 accu[NQ][BB][4];
39
-
40
- for (int q = 0; q < NQ; q++) {
41
- for (int b = 0; b < BB; b++) {
42
- accu[q][b][0].clear();
43
- accu[q][b][1].clear();
44
- accu[q][b][2].clear();
45
- accu[q][b][3].clear();
46
- }
47
- }
48
-
49
- for (int sq = 0; sq < nsq - scaler.nscale; sq += 2) {
50
- simd32uint8 lut_cache[NQ];
51
- for (int q = 0; q < NQ; q++) {
52
- lut_cache[q] = simd32uint8(LUT);
53
- LUT += 32;
54
- }
55
-
56
- for (int b = 0; b < BB; b++) {
57
- simd32uint8 c = simd32uint8(codes);
58
- codes += 32;
59
- simd32uint8 mask(15);
60
- simd32uint8 chi = simd32uint8(simd16uint16(c) >> 4) & mask;
61
- simd32uint8 clo = c & mask;
62
-
63
- for (int q = 0; q < NQ; q++) {
64
- simd32uint8 lut = lut_cache[q];
65
- simd32uint8 res0 = lut.lookup_2_lanes(clo);
66
- simd32uint8 res1 = lut.lookup_2_lanes(chi);
67
-
68
- accu[q][b][0] += simd16uint16(res0);
69
- accu[q][b][1] += simd16uint16(res0) >> 8;
70
-
71
- accu[q][b][2] += simd16uint16(res1);
72
- accu[q][b][3] += simd16uint16(res1) >> 8;
73
- }
74
- }
75
- }
76
-
77
- for (int sq = 0; sq < scaler.nscale; sq += 2) {
78
- simd32uint8 lut_cache[NQ];
79
- for (int q = 0; q < NQ; q++) {
80
- lut_cache[q] = simd32uint8(LUT);
81
- LUT += 32;
82
- }
83
-
84
- for (int b = 0; b < BB; b++) {
85
- simd32uint8 c = simd32uint8(codes);
86
- codes += 32;
87
- simd32uint8 mask(15);
88
- simd32uint8 chi = simd32uint8(simd16uint16(c) >> 4) & mask;
89
- simd32uint8 clo = c & mask;
90
-
91
- for (int q = 0; q < NQ; q++) {
92
- simd32uint8 lut = lut_cache[q];
93
-
94
- simd32uint8 res0 = scaler.lookup(lut, clo);
95
- accu[q][b][0] += scaler.scale_lo(res0); // handle vectors 0..7
96
- accu[q][b][1] += scaler.scale_hi(res0); // handle vectors 8..15
97
-
98
- simd32uint8 res1 = scaler.lookup(lut, chi);
99
- accu[q][b][2] += scaler.scale_lo(res1); // handle vectors 16..23
100
- accu[q][b][3] +=
101
- scaler.scale_hi(res1); // handle vectors 24..31
102
- }
103
- }
104
- }
105
-
106
- for (int q = 0; q < NQ; q++) {
107
- for (int b = 0; b < BB; b++) {
108
- accu[q][b][0] -= accu[q][b][1] << 8;
109
- simd16uint16 dis0 = combine2x2(accu[q][b][0], accu[q][b][1]);
110
-
111
- accu[q][b][2] -= accu[q][b][3] << 8;
112
- simd16uint16 dis1 = combine2x2(accu[q][b][2], accu[q][b][3]);
113
-
114
- res.handle(q, b, dis0, dis1);
115
- }
116
- }
117
- }
118
-
119
- template <int NQ, int BB, class ResultHandler, class Scaler>
120
- void accumulate_fixed_blocks(
121
- size_t nb,
122
- int nsq,
123
- const uint8_t* codes,
124
- const uint8_t* LUT,
125
- ResultHandler& res,
126
- const Scaler& scaler) {
127
- constexpr int bbs = 32 * BB;
128
- for (size_t j0 = 0; j0 < nb; j0 += bbs) {
129
- FixedStorageHandler<NQ, 2 * BB> res2;
130
- kernel_accumulate_block<NQ, BB>(nsq, codes, LUT, res2, scaler);
131
- res.set_block_origin(0, j0);
132
- res2.to_other_handler(res);
133
- codes += bbs * nsq / 2;
134
- }
135
- }
136
-
137
- template <class ResultHandler, class Scaler>
138
- void pq4_accumulate_loop_fixed_scaler(
139
- int nq,
140
- size_t nb,
141
- int bbs,
142
- int nsq,
143
- const uint8_t* codes,
144
- const uint8_t* LUT,
145
- ResultHandler& res,
146
- const Scaler& scaler) {
147
- FAISS_THROW_IF_NOT(is_aligned_pointer(codes));
148
- FAISS_THROW_IF_NOT(is_aligned_pointer(LUT));
149
- FAISS_THROW_IF_NOT(bbs % 32 == 0);
150
- FAISS_THROW_IF_NOT(nb % bbs == 0);
151
-
152
- #define DISPATCH(NQ, BB) \
153
- case NQ * 1000 + BB: \
154
- accumulate_fixed_blocks<NQ, BB>(nb, nsq, codes, LUT, res, scaler); \
155
- break
156
-
157
- switch (nq * 1000 + bbs / 32) {
158
- DISPATCH(1, 1);
159
- DISPATCH(1, 2);
160
- DISPATCH(1, 3);
161
- DISPATCH(1, 4);
162
- DISPATCH(1, 5);
163
- DISPATCH(2, 1);
164
- DISPATCH(2, 2);
165
- DISPATCH(3, 1);
166
- DISPATCH(4, 1);
167
- default:
168
- FAISS_THROW_FMT("nq=%d bbs=%d not instantiated", nq, bbs);
169
- }
170
- #undef DISPATCH
171
- }
172
-
173
- template <class ResultHandler>
174
- void pq4_accumulate_loop_fixed_handler(
175
- int nq,
176
- size_t nb,
177
- int bbs,
178
- int nsq,
179
- const uint8_t* codes,
180
- const uint8_t* LUT,
181
- ResultHandler& res,
182
- const NormTableScaler* scaler) {
183
- if (scaler) {
184
- pq4_accumulate_loop_fixed_scaler(
185
- nq, nb, bbs, nsq, codes, LUT, res, *scaler);
186
- } else {
187
- DummyScaler dscaler;
188
- pq4_accumulate_loop_fixed_scaler(
189
- nq, nb, bbs, nsq, codes, LUT, res, dscaler);
190
- }
191
- }
192
-
193
- struct Run_pq4_accumulate_loop {
194
- template <class ResultHandler>
195
- void f(ResultHandler& res,
196
- int nq,
197
- size_t nb,
198
- int bbs,
199
- int nsq,
200
- const uint8_t* codes,
201
- const uint8_t* LUT,
202
- const NormTableScaler* scaler) {
203
- pq4_accumulate_loop_fixed_handler(
204
- nq, nb, bbs, nsq, codes, LUT, res, scaler);
205
- }
206
- };
207
-
208
- } // anonymous namespace
209
-
210
- void pq4_accumulate_loop(
211
- int nq,
212
- size_t nb,
213
- int bbs,
214
- int nsq,
215
- const uint8_t* codes,
216
- const uint8_t* LUT,
217
- SIMDResultHandler& res,
218
- const NormTableScaler* scaler) {
219
- Run_pq4_accumulate_loop consumer;
220
- dispatch_SIMDResultHandler(
221
- res, consumer, nq, nb, bbs, nsq, codes, LUT, scaler);
222
- }
223
-
224
- } // namespace faiss
@@ -1,84 +0,0 @@
1
- /*
2
- * Copyright (c) Meta Platforms, Inc. and affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // This file contains an implementation of approximate top-k search
9
- // using heap. It was initially created for a beam search.
10
- //
11
- // The core idea is the following.
12
- // Say we need to find beam_size indices with the minimal distance
13
- // values. It is done via heap (priority_queue) using the following
14
- // pseudocode:
15
- //
16
- // def baseline():
17
- // distances = np.empty([beam_size * n], dtype=float)
18
- // indices = np.empty([beam_size * n], dtype=int)
19
- //
20
- // heap = Heap(max_heap_size=beam_size)
21
- //
22
- // for i in range(0, beam_size * n):
23
- // heap.push(distances[i], indices[i])
24
- //
25
- // Basically, this is what heap_addn() function from utils/Heap.h does.
26
- //
27
- // The following scheme can be used for approximate beam search.
28
- // Say, we need to find elements with min distance.
29
- // Basically, we split n elements of every beam into NBUCKETS buckets
30
- // and track the index with the minimal distance for every bucket.
31
- // This can be effectively SIMD-ed and significantly lowers the number
32
- // of operations, but yields approximate results for beam_size >= 2.
33
- //
34
- // def approximate_v1():
35
- // distances = np.empty([beam_size * n], dtype=float)
36
- // indices = np.empty([beam_size * n], dtype=int)
37
- //
38
- // heap = Heap(max_heap_size=beam_size)
39
- //
40
- // for beam in range(0, beam_size):
41
- // # The value of 32 is just an example.
42
- // # The value may be varied: the larger the value is,
43
- // # the slower and the more precise vs baseline beam search is
44
- // NBUCKETS = 32
45
- //
46
- // local_min_distances = [HUGE_VALF] * NBUCKETS
47
- // local_min_indices = [0] * NBUCKETS
48
- //
49
- // for i in range(0, n / NBUCKETS):
50
- // for j in range(0, NBUCKETS):
51
- // idx = beam * n + i * NBUCKETS + j
52
- // if distances[idx] < local_min_distances[j]:
53
- // local_min_distances[j] = distances[idx]
54
- // local_min_indices[j] = indices[idx]
55
- //
56
- // for j in range(0, NBUCKETS):
57
- // heap.push(local_min_distances[j], local_min_indices[j])
58
- //
59
- // The accuracy can be improved by tracking min-2 elements for every
60
- // bucket. Such a min-2 implementation with NBUCKETS buckets provides
61
- // better accuracy than top-1 implementation with 2 * NBUCKETS buckets.
62
- // Min-3 is also doable. One can use min-N approach, but I'm not sure
63
- // whether min-4 and above are practical, because of the lack of SIMD
64
- // registers (unless AVX-512 version is used).
65
- //
66
- // C++ template for top-N implementation is provided. The code
67
- // assumes that indices[idx] == idx. One can write a code that lifts
68
- // such an assumption easily.
69
- //
70
- // Currently, the code that tracks elements with min distances is implemented
71
- // (Max Heap). Min Heap option can be added easily.
72
-
73
- #pragma once
74
-
75
- #include <faiss/impl/platform_macros.h>
76
-
77
- // the list of available modes is in the following file
78
- #include <faiss/utils/approx_topk/mode.h>
79
-
80
- #ifdef __AVX2__
81
- #include <faiss/utils/approx_topk/avx2-inl.h>
82
- #else
83
- #include <faiss/utils/approx_topk/generic.h>
84
- #endif