faiss 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  85. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  86. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  87. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  88. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  89. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  90. data/vendor/faiss/faiss/MetricType.h +14 -7
  91. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  92. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  93. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  94. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  95. data/vendor/faiss/faiss/build.cpp +23 -0
  96. data/vendor/faiss/faiss/build.h +15 -0
  97. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  102. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  105. data/vendor/faiss/faiss/factory_tools.cpp +9 -0
  106. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  107. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  108. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
  109. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  113. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  114. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  115. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  116. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  117. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  120. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  130. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  136. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  139. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  140. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  141. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  142. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  143. data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
  144. data/vendor/faiss/faiss/impl/HNSW.h +61 -44
  145. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  146. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  147. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  148. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  149. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  150. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  151. data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
  152. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  153. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  154. data/vendor/faiss/faiss/impl/Panorama.h +269 -87
  155. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  156. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  157. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  158. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  159. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  160. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  161. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
  162. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  163. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  164. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  165. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
  166. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  167. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  168. data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
  169. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
  170. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
  171. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  172. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  173. data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
  174. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  175. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  176. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  177. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  178. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  182. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  183. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  184. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  185. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  191. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  192. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  193. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  194. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  196. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  197. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
  198. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  199. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  203. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  204. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  205. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  206. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  208. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  209. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  210. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  211. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
  212. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
  213. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
  214. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
  215. data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
  216. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  217. data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
  218. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  219. data/vendor/faiss/faiss/impl/io_macros.h +58 -16
  220. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  221. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  222. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  223. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
  225. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  226. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  228. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  229. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
  230. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  233. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  234. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
  235. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
  237. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
  238. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  239. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
  240. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  241. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
  244. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
  245. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  256. data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
  257. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  258. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  260. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  261. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  262. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  264. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  265. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  266. data/vendor/faiss/faiss/index_factory.cpp +90 -18
  267. data/vendor/faiss/faiss/index_io.h +40 -0
  268. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  269. data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
  270. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  271. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
  272. data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
  273. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  274. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  275. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  276. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  277. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  278. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  279. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  280. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
  285. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
  286. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  287. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
  290. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  291. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  292. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  293. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  294. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  295. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  296. data/vendor/faiss/faiss/utils/distances.h +20 -1
  297. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  298. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  299. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  300. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  301. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  302. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  304. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
  305. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  306. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  307. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  308. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  309. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  310. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  311. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
  312. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  355. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
  357. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  358. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  359. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  360. data/vendor/faiss/faiss/utils/utils.h +3 -3
  361. metadata +129 -34
  362. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  363. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  364. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  366. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  367. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  368. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  369. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  370. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  371. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  373. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  374. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  375. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  376. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  377. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  378. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -0,0 +1,39 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifndef HAMMING_COMPUTER_RVV_H
9
+ #define HAMMING_COMPUTER_RVV_H
10
+
11
+ // RVV HammingComputer fallbacks. There is no RVV-optimized HammingComputer
12
+ // implementation yet, so provide concrete RISCV_RVV specializations backed by
13
+ // the scalar NONE implementations.
14
+
15
+ #include <faiss/utils/hamming_distance/hamming_computer-generic.h>
16
+
17
+ namespace faiss {
18
+
19
+ #define FAISS_INHERIT_HAMMING_RVV(Class) \
20
+ template <> \
21
+ struct Class##_tpl<SIMDLevel::RISCV_RVV> : Class##_tpl<SIMDLevel::NONE> { \
22
+ using Class##_tpl<SIMDLevel::NONE>::Class##_tpl; \
23
+ }
24
+
25
+ FAISS_INHERIT_HAMMING_RVV(HammingComputer16);
26
+ FAISS_INHERIT_HAMMING_RVV(HammingComputer20);
27
+ FAISS_INHERIT_HAMMING_RVV(HammingComputer32);
28
+ FAISS_INHERIT_HAMMING_RVV(HammingComputer64);
29
+ FAISS_INHERIT_HAMMING_RVV(HammingComputerDefault);
30
+ FAISS_INHERIT_HAMMING_RVV(GenHammingComputer8);
31
+ FAISS_INHERIT_HAMMING_RVV(GenHammingComputer16);
32
+ FAISS_INHERIT_HAMMING_RVV(GenHammingComputer32);
33
+ FAISS_INHERIT_HAMMING_RVV(GenHammingComputerM8);
34
+
35
+ #undef FAISS_INHERIT_HAMMING_RVV
36
+
37
+ } // namespace faiss
38
+
39
+ #endif
@@ -0,0 +1,146 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // This file contains forward declarations, architecture-independent
9
+ // HammingComputer structs (sizes 4 and 8), and the with_HammingComputer
10
+ // dispatch function. SIMDLevel-specific specializations live in:
11
+ // hamming_computer-generic.h (NONE — scalar fallback)
12
+ // hamming_computer-avx2.h (AVX2)
13
+ // hamming_computer-avx512.h (AVX512)
14
+ // hamming_computer-neon.h (ARM NEON)
15
+
16
+ #ifndef FAISS_hamming_computer_h
17
+ #define FAISS_hamming_computer_h
18
+
19
+ #include <faiss/utils/hamming_distance/common.h>
20
+
21
+ namespace faiss {
22
+
23
+ /***************************************************************************
24
+ * HammingComputer primary templates.
25
+ *
26
+ * Per-ISA backend files (hamming_computer-avx512.h, hamming_computer-neon.h,
27
+ * etc.) provide explicit specializations that override the scalar (NONE)
28
+ * defaults in hamming_computer-generic.h with ISA-optimized code.
29
+ * Templating on SIMDLevel gives each specialization a distinct mangled
30
+ * name, so DD builds with multiple per-ISA TUs do NOT create ODR-violating
31
+ * struct collisions.
32
+ *
33
+ * Call sites use with_HammingComputer<SL>, which is templatized on
34
+ * SIMDLevel to select the matching specialization.
35
+ ***************************************************************************/
36
+
37
+ // Forward declarations. The struct bodies live in hamming_computer-generic.h
38
+ // (NONE) and per-ISA hamming_computer-*.h files.
39
+ template <SIMDLevel SL>
40
+ struct HammingComputer16_tpl;
41
+ template <SIMDLevel SL>
42
+ struct HammingComputer20_tpl;
43
+ template <SIMDLevel SL>
44
+ struct HammingComputer32_tpl;
45
+ template <SIMDLevel SL>
46
+ struct HammingComputer64_tpl;
47
+ template <SIMDLevel SL>
48
+ struct HammingComputerDefault_tpl;
49
+ template <SIMDLevel SL>
50
+ struct GenHammingComputer8_tpl;
51
+ template <SIMDLevel SL>
52
+ struct GenHammingComputer16_tpl;
53
+ template <SIMDLevel SL>
54
+ struct GenHammingComputer32_tpl;
55
+ template <SIMDLevel SL>
56
+ struct GenHammingComputerM8_tpl;
57
+
58
+ /******************************************************************
59
+ * The HammingComputer series of classes compares a single code of
60
+ * size 4 to 32 to incoming codes. They are intended for use as a
61
+ * template class where it would be inefficient to switch on the code
62
+ * size in the inner loop. Hopefully the compiler will inline the
63
+ * hamming() functions and put the a0, a1, ... in registers.
64
+ * For code_size = 4 and 8 we don't use SIMD implementations, because
65
+ * register widths are too large.
66
+ ******************************************************************/
67
+
68
+ struct HammingComputer4 {
69
+ uint32_t a0;
70
+
71
+ HammingComputer4() {}
72
+
73
+ HammingComputer4(const uint8_t* a, int code_size) {
74
+ set(a, code_size);
75
+ }
76
+
77
+ void set(const uint8_t* a, FAISS_MAYBE_UNUSED int code_size) {
78
+ assert(code_size == 4);
79
+ const uint32_t* a32 = reinterpret_cast<const uint32_t*>(a);
80
+ a0 = *a32;
81
+ }
82
+
83
+ inline int hamming(const uint8_t* b) const {
84
+ const uint32_t* b32 = reinterpret_cast<const uint32_t*>(b);
85
+ return popcount64(*b32 ^ a0);
86
+ }
87
+
88
+ inline static constexpr int get_code_size() {
89
+ return 4;
90
+ }
91
+ };
92
+
93
+ struct HammingComputer8 {
94
+ uint64_t a0;
95
+
96
+ HammingComputer8() {}
97
+
98
+ HammingComputer8(const uint8_t* a, int code_size) {
99
+ set(a, code_size);
100
+ }
101
+
102
+ void set(const uint8_t* a, FAISS_MAYBE_UNUSED int code_size) {
103
+ assert(code_size == 8);
104
+ const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a);
105
+ a0 = *a64;
106
+ }
107
+
108
+ inline int hamming(const uint8_t* b) const {
109
+ const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b);
110
+ return popcount64(*b64 ^ a0);
111
+ }
112
+
113
+ inline static constexpr int get_code_size() {
114
+ return 8;
115
+ }
116
+ };
117
+
118
+ /***************************************************************************
119
+ * Dispatching function that takes a code size and a C++20 template lambda.
120
+ * The lambda is called with the appropriate HammingComputer type:
121
+ * with_HammingComputer<SL>(code_size, [&]<class HammingComputer>() { ... });
122
+ **************************************************************************/
123
+
124
+ template <SIMDLevel SL, class F>
125
+ decltype(auto) with_HammingComputer(int code_size, F&& f) {
126
+ switch (code_size) {
127
+ case 4:
128
+ return f.template operator()<HammingComputer4>();
129
+ case 8:
130
+ return f.template operator()<HammingComputer8>();
131
+ case 16:
132
+ return f.template operator()<HammingComputer16_tpl<SL>>();
133
+ case 20:
134
+ return f.template operator()<HammingComputer20_tpl<SL>>();
135
+ case 32:
136
+ return f.template operator()<HammingComputer32_tpl<SL>>();
137
+ case 64:
138
+ return f.template operator()<HammingComputer64_tpl<SL>>();
139
+ default:
140
+ return f.template operator()<HammingComputerDefault_tpl<SL>>();
141
+ }
142
+ }
143
+
144
+ } // namespace faiss
145
+
146
+ #endif
@@ -0,0 +1,481 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Shared implementation header for Hamming distance dynamic dispatch.
9
+ // Included by per-ISA TUs (hamming_avx2.cpp, hamming_avx512.cpp,
10
+ // hamming_neon.cpp) and by hamming.cpp (for the NONE fallback TU).
11
+ //
12
+ // THE_SIMD_LEVEL must be defined before including this header.
13
+ // Callers must also include the appropriate hamming_computer-*.h
14
+ // (generic, avx2, avx512, or neon) before this header so that the
15
+ // HammingComputer struct specializations are available.
16
+ //
17
+ // ODR CONTRACT: each TU that includes this header MUST define a UNIQUE
18
+ // THE_SIMD_LEVEL value across the linked binary. The template
19
+ // specializations at the bottom of this file (hammings_knn_hc_fixSL<SL>,
20
+ // etc.) have external linkage with SL-keyed mangled names; two TUs sharing
21
+ // THE_SIMD_LEVEL would emit conflicting definitions of the same symbol.
22
+ // In DD mode, the per-ISA TUs use distinct values (NONE/AVX2/AVX512/NEON);
23
+ // in static mode, only hamming.cpp includes this header so uniqueness is
24
+ // trivially satisfied.
25
+
26
+ #pragma once
27
+
28
+ #ifndef THE_SIMD_LEVEL
29
+ #error "Define THE_SIMD_LEVEL before including hamming_impl.h"
30
+ #endif
31
+
32
+ // Forward declarations and dispatch function.
33
+ #include <faiss/utils/hamming_distance/hamming_computer.h>
34
+
35
+ #include <faiss/utils/hamming.h>
36
+
37
+ #include <algorithm>
38
+ #include <cstdio>
39
+ #include <limits>
40
+ #include <memory>
41
+ #include <vector>
42
+
43
+ #include <faiss/impl/AuxIndexStructures.h>
44
+ #include <faiss/impl/FaissAssert.h>
45
+ #include <faiss/impl/IDSelector.h>
46
+ #include <faiss/utils/Heap.h>
47
+ #include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
48
+ #include <faiss/utils/utils.h>
49
+
50
+ namespace faiss {
51
+
52
+ // All HammingComputer-dependent implementation templates live in an anonymous
53
+ // namespace for ODR safety. Different TUs compile this header with different
54
+ // HammingComputer struct layouts (generic vs AVX2 vs NEON). The anonymous
55
+ // namespace ensures each TU gets its own copy with internal linkage, preventing
56
+ // the linker from merging incompatible instantiations.
57
+ namespace {
58
+
59
+ /******************************************************************
60
+ * HammingComputer-based search templates
61
+ ******************************************************************/
62
+
63
+ template <class HammingComputer>
64
+ void hammings_knn_hc_impl(
65
+ int bytes_per_code,
66
+ int_maxheap_array_t* __restrict ha,
67
+ const uint8_t* __restrict bs1,
68
+ const uint8_t* __restrict bs2,
69
+ size_t n2,
70
+ bool order = true,
71
+ bool init_heap = true,
72
+ ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK,
73
+ const faiss::IDSelector* sel = nullptr) {
74
+ size_t k = ha->k;
75
+ if (init_heap) {
76
+ ha->heapify();
77
+ }
78
+
79
+ const size_t block_size = hamming_batch_size;
80
+ for (size_t j0 = 0; j0 < n2; j0 += block_size) {
81
+ const size_t j1 = std::min(j0 + block_size, n2);
82
+ #pragma omp parallel for
83
+ for (int64_t i = 0; i < static_cast<int64_t>(ha->nh); i++) {
84
+ HammingComputer hc(bs1 + i * bytes_per_code, bytes_per_code);
85
+
86
+ const uint8_t* __restrict bs2_ = bs2 + j0 * bytes_per_code;
87
+ hamdis_t dis;
88
+ hamdis_t* __restrict bh_val_ = ha->val + i * k;
89
+ int64_t* __restrict bh_ids_ = ha->ids + i * k;
90
+
91
+ #define HANDLE_APPROX(NB, BD) \
92
+ case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD: \
93
+ FAISS_THROW_IF_NOT_FMT( \
94
+ k <= NB * BD, \
95
+ "The chosen mode (%d) of approximate top-k supports " \
96
+ "up to %d values, but %zd is requested.", \
97
+ (int)(ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD), \
98
+ NB * BD, \
99
+ k); \
100
+ HeapWithBucketsForHamming32< \
101
+ CMax<hamdis_t, int64_t>, \
102
+ NB, \
103
+ BD, \
104
+ HammingComputer>:: \
105
+ addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_, sel); \
106
+ break;
107
+
108
+ switch (approx_topk_mode) {
109
+ HANDLE_APPROX(8, 3)
110
+ HANDLE_APPROX(8, 2)
111
+ HANDLE_APPROX(16, 2)
112
+ HANDLE_APPROX(32, 2)
113
+ default: {
114
+ for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) {
115
+ if (sel && !sel->is_member(j)) {
116
+ continue;
117
+ }
118
+ dis = hc.hamming(bs2_);
119
+ if (dis < bh_val_[0]) {
120
+ faiss::maxheap_replace_top<hamdis_t>(
121
+ k, bh_val_, bh_ids_, dis, j);
122
+ }
123
+ }
124
+ } break;
125
+ }
126
+ }
127
+ }
128
+ if (order) {
129
+ ha->reorder();
130
+ }
131
+ }
132
+
133
+ #undef HANDLE_APPROX
134
+
135
+ template <class HammingComputer>
136
+ void hammings_knn_mc_impl(
137
+ int bytes_per_code,
138
+ const uint8_t* __restrict a,
139
+ const uint8_t* __restrict b,
140
+ size_t na,
141
+ size_t nb,
142
+ size_t k,
143
+ int32_t* __restrict distances,
144
+ int64_t* __restrict labels,
145
+ const faiss::IDSelector* sel) {
146
+ const int nBuckets = bytes_per_code * 8 + 1;
147
+ std::vector<int> all_counters(na * nBuckets, 0);
148
+ std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]);
149
+
150
+ std::vector<HCounterState<HammingComputer>> cs;
151
+ for (size_t i = 0; i < na; ++i) {
152
+ cs.push_back(
153
+ HCounterState<HammingComputer>(
154
+ all_counters.data() + i * nBuckets,
155
+ all_ids_per_dis.get() + i * nBuckets * k,
156
+ a + i * bytes_per_code,
157
+ 8 * bytes_per_code,
158
+ k));
159
+ }
160
+
161
+ const size_t block_size = hamming_batch_size;
162
+ for (size_t j0 = 0; j0 < nb; j0 += block_size) {
163
+ const size_t j1 = std::min(j0 + block_size, nb);
164
+ #pragma omp parallel for
165
+ for (int64_t i = 0; i < static_cast<int64_t>(na); ++i) {
166
+ for (size_t j = j0; j < j1; ++j) {
167
+ if (!sel || sel->is_member(j)) {
168
+ cs[i].update_counter(b + j * bytes_per_code, j);
169
+ }
170
+ }
171
+ }
172
+ }
173
+
174
+ for (size_t i = 0; i < na; ++i) {
175
+ HCounterState<HammingComputer>& csi = cs[i];
176
+
177
+ size_t nres = 0;
178
+ for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) {
179
+ for (int l = 0; l < csi.counters[b_2] && nres < k; l++) {
180
+ labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l];
181
+ distances[i * k + nres] = b_2;
182
+ nres++;
183
+ }
184
+ }
185
+ while (nres < k) {
186
+ labels[i * k + nres] = -1;
187
+ distances[i * k + nres] = std::numeric_limits<int32_t>::max();
188
+ ++nres;
189
+ }
190
+ }
191
+ }
192
+
193
+ template <class HammingComputer>
194
+ void hamming_range_search_impl(
195
+ const uint8_t* a,
196
+ const uint8_t* b,
197
+ size_t na,
198
+ size_t nb,
199
+ int radius,
200
+ size_t code_size,
201
+ RangeSearchResult* res,
202
+ const faiss::IDSelector* sel) {
203
+ #pragma omp parallel
204
+ {
205
+ RangeSearchPartialResult pres(res);
206
+
207
+ #pragma omp for
208
+ for (int64_t i = 0; i < static_cast<int64_t>(na); i++) {
209
+ HammingComputer hc(a + i * code_size, code_size);
210
+ const uint8_t* yi = b;
211
+ RangeQueryResult& qres = pres.new_result(i);
212
+
213
+ for (size_t j = 0; j < nb; j++) {
214
+ if (!sel || sel->is_member(j)) {
215
+ int dis = hc.hamming(yi);
216
+ if (dis < radius) {
217
+ qres.add(dis, j);
218
+ }
219
+ }
220
+ yi += code_size;
221
+ }
222
+ }
223
+ pres.finalize();
224
+ }
225
+ }
226
+
227
+ /******************************************************************
228
+ * Generalized Hamming distances
229
+ ******************************************************************/
230
+
231
+ template <class HammingComputer>
232
+ void hamming_dis_inner_loop(
233
+ const uint8_t* __restrict ca,
234
+ const uint8_t* __restrict cb,
235
+ size_t nb,
236
+ size_t code_size,
237
+ int k,
238
+ hamdis_t* __restrict bh_val_,
239
+ int64_t* __restrict bh_ids_) {
240
+ HammingComputer hc(ca, code_size);
241
+
242
+ for (size_t j = 0; j < nb; j++) {
243
+ int ndiff = hc.hamming(cb);
244
+ cb += code_size;
245
+ if (ndiff < bh_val_[0]) {
246
+ maxheap_replace_top<hamdis_t>(k, bh_val_, bh_ids_, ndiff, j);
247
+ }
248
+ }
249
+ }
250
+
251
+ void generalized_hammings_knn_hc_impl(
252
+ int_maxheap_array_t* __restrict ha,
253
+ const uint8_t* __restrict a,
254
+ const uint8_t* __restrict b,
255
+ size_t nb,
256
+ size_t code_size,
257
+ int ordered) {
258
+ int na = ha->nh;
259
+ int k = ha->k;
260
+
261
+ if (ordered) {
262
+ ha->heapify();
263
+ }
264
+
265
+ #pragma omp parallel for
266
+ for (int i = 0; i < na; i++) {
267
+ const uint8_t* __restrict ca = a + i * code_size;
268
+ const uint8_t* __restrict cb = b;
269
+
270
+ hamdis_t* __restrict bh_val_ = ha->val + i * k;
271
+ int64_t* __restrict bh_ids_ = ha->ids + i * k;
272
+
273
+ switch (code_size) {
274
+ case 8:
275
+ hamming_dis_inner_loop<GenHammingComputer8_tpl<THE_SIMD_LEVEL>>(
276
+ ca, cb, nb, 8, k, bh_val_, bh_ids_);
277
+ break;
278
+ case 16:
279
+ hamming_dis_inner_loop<
280
+ GenHammingComputer16_tpl<THE_SIMD_LEVEL>>(
281
+ ca, cb, nb, 16, k, bh_val_, bh_ids_);
282
+ break;
283
+ case 32:
284
+ hamming_dis_inner_loop<
285
+ GenHammingComputer32_tpl<THE_SIMD_LEVEL>>(
286
+ ca, cb, nb, 32, k, bh_val_, bh_ids_);
287
+ break;
288
+ default:
289
+ hamming_dis_inner_loop<
290
+ GenHammingComputerM8_tpl<THE_SIMD_LEVEL>>(
291
+ ca, cb, nb, code_size, k, bh_val_, bh_ids_);
292
+ break;
293
+ }
294
+ }
295
+
296
+ if (ordered) {
297
+ ha->reorder();
298
+ }
299
+ }
300
+
301
+ } // anonymous namespace
302
+
303
+ /******************************************************************
304
+ * Entry point template specializations at THE_SIMD_LEVEL
305
+ ******************************************************************/
306
+
307
+ #define C64(x) ((uint64_t*)x)
308
+
309
+ template <>
310
+ void hammings_knn_hc_fixSL<THE_SIMD_LEVEL>(
311
+ int_maxheap_array_t* ha,
312
+ const uint8_t* a,
313
+ const uint8_t* b,
314
+ size_t nb,
315
+ size_t ncodes,
316
+ int ordered,
317
+ ApproxTopK_mode_t approx_topk_mode,
318
+ const IDSelector* sel) {
319
+ with_HammingComputer<THE_SIMD_LEVEL>(ncodes, [&]<class HammingComputer>() {
320
+ hammings_knn_hc_impl<HammingComputer>(
321
+ ncodes, ha, a, b, nb, ordered, true, approx_topk_mode, sel);
322
+ });
323
+ }
324
+
325
+ template <>
326
+ void hammings_knn_mc_fixSL<THE_SIMD_LEVEL>(
327
+ const uint8_t* a,
328
+ const uint8_t* b,
329
+ size_t na,
330
+ size_t nb,
331
+ size_t k,
332
+ size_t ncodes,
333
+ int32_t* distances,
334
+ int64_t* labels,
335
+ const IDSelector* sel) {
336
+ with_HammingComputer<THE_SIMD_LEVEL>(ncodes, [&]<class HammingComputer>() {
337
+ hammings_knn_mc_impl<HammingComputer>(
338
+ ncodes, a, b, na, nb, k, distances, labels, sel);
339
+ });
340
+ }
341
+
342
+ template <>
343
+ void hamming_range_search_fixSL<THE_SIMD_LEVEL>(
344
+ const uint8_t* a,
345
+ const uint8_t* b,
346
+ size_t na,
347
+ size_t nb,
348
+ int radius,
349
+ size_t code_size,
350
+ RangeSearchResult* result,
351
+ const IDSelector* sel) {
352
+ with_HammingComputer<THE_SIMD_LEVEL>(
353
+ code_size, [&]<class HammingComputer>() {
354
+ hamming_range_search_impl<HammingComputer>(
355
+ a, b, na, nb, radius, code_size, result, sel);
356
+ });
357
+ }
358
+
359
+ template <>
360
+ void hammings_fixSL<THE_SIMD_LEVEL>(
361
+ const uint8_t* a,
362
+ const uint8_t* b,
363
+ size_t na,
364
+ size_t nb,
365
+ size_t ncodes,
366
+ hamdis_t* dis) {
367
+ FAISS_THROW_IF_NOT(ncodes % 8 == 0);
368
+ switch (ncodes) {
369
+ case 8:
370
+ hammings_impl<64>(C64(a), C64(b), na, nb, dis);
371
+ return;
372
+ case 16:
373
+ hammings_impl<128>(C64(a), C64(b), na, nb, dis);
374
+ return;
375
+ case 32:
376
+ hammings_impl<256>(C64(a), C64(b), na, nb, dis);
377
+ return;
378
+ case 64:
379
+ hammings_impl<512>(C64(a), C64(b), na, nb, dis);
380
+ return;
381
+ default:
382
+ hammings_impl_runtime(C64(a), C64(b), na, nb, ncodes * 8, dis);
383
+ return;
384
+ }
385
+ }
386
+
387
+ template <>
388
+ void generalized_hammings_knn_hc_fixSL<THE_SIMD_LEVEL>(
389
+ int_maxheap_array_t* ha,
390
+ const uint8_t* a,
391
+ const uint8_t* b,
392
+ size_t nb,
393
+ size_t code_size,
394
+ int ordered) {
395
+ generalized_hammings_knn_hc_impl(ha, a, b, nb, code_size, ordered);
396
+ }
397
+
398
+ template <>
399
+ void hamming_count_thres_fixSL<THE_SIMD_LEVEL>(
400
+ const uint8_t* bs1,
401
+ const uint8_t* bs2,
402
+ size_t n1,
403
+ size_t n2,
404
+ hamdis_t ht,
405
+ size_t ncodes,
406
+ size_t* nptr) {
407
+ switch (ncodes) {
408
+ case 8:
409
+ hamming_count_thres_impl<64>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
410
+ return;
411
+ case 16:
412
+ hamming_count_thres_impl<128>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
413
+ return;
414
+ case 32:
415
+ hamming_count_thres_impl<256>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
416
+ return;
417
+ case 64:
418
+ hamming_count_thres_impl<512>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
419
+ return;
420
+ default:
421
+ FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
422
+ }
423
+ }
424
+
425
+ template <>
426
+ void crosshamming_count_thres_fixSL<THE_SIMD_LEVEL>(
427
+ const uint8_t* dbs,
428
+ size_t n,
429
+ hamdis_t ht,
430
+ size_t ncodes,
431
+ size_t* nptr) {
432
+ switch (ncodes) {
433
+ case 8:
434
+ crosshamming_count_thres_impl<64>(C64(dbs), n, ht, nptr);
435
+ return;
436
+ case 16:
437
+ crosshamming_count_thres_impl<128>(C64(dbs), n, ht, nptr);
438
+ return;
439
+ case 32:
440
+ crosshamming_count_thres_impl<256>(C64(dbs), n, ht, nptr);
441
+ return;
442
+ case 64:
443
+ crosshamming_count_thres_impl<512>(C64(dbs), n, ht, nptr);
444
+ return;
445
+ default:
446
+ FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
447
+ }
448
+ }
449
+
450
+ template <>
451
+ size_t match_hamming_thres_fixSL<THE_SIMD_LEVEL>(
452
+ const uint8_t* bs1,
453
+ const uint8_t* bs2,
454
+ size_t n1,
455
+ size_t n2,
456
+ hamdis_t ht,
457
+ size_t ncodes,
458
+ int64_t* idx,
459
+ hamdis_t* dis) {
460
+ switch (ncodes) {
461
+ case 8:
462
+ return match_hamming_thres_impl<64>(
463
+ C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
464
+ case 16:
465
+ return match_hamming_thres_impl<128>(
466
+ C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
467
+ case 32:
468
+ return match_hamming_thres_impl<256>(
469
+ C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
470
+ case 64:
471
+ return match_hamming_thres_impl<512>(
472
+ C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
473
+ default:
474
+ FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
475
+ return 0;
476
+ }
477
+ }
478
+
479
+ #undef C64
480
+
481
+ } // namespace faiss
@@ -0,0 +1,15 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_ARM_NEON
9
+
10
+ #define THE_SIMD_LEVEL SIMDLevel::ARM_NEON
11
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
12
+ #include <faiss/utils/hamming_distance/hamming_computer-neon.h>
13
+ #include <faiss/utils/hamming_distance/hamming_impl.h>
14
+
15
+ #endif // COMPILE_SIMD_ARM_NEON
@@ -0,0 +1,15 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_RISCV_RVV
9
+
10
+ #define THE_SIMD_LEVEL SIMDLevel::RISCV_RVV
11
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
12
+ #include <faiss/utils/hamming_distance/hamming_computer-rvv.h>
13
+ #include <faiss/utils/hamming_distance/hamming_impl.h>
14
+
15
+ #endif // COMPILE_SIMD_RISCV_RVV