faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_AVX2
9
+
10
+ #include <faiss/utils/simd_impl/super_kmeans_kernels.h>
11
+
12
+ #include <immintrin.h>
13
+
14
+ namespace faiss {
15
+ namespace detail {
16
+
17
+ namespace {
18
+
19
+ // Reduce 8 float lanes of an AVX2 register to a scalar sum.
20
+ // Uses a shuffle+add tree instead of two _mm_hadd_ps. On Skylake-class
21
+ // cores, hadd is 3-cycle latency / 2-uop, while movehdup/movehl/add_ss
22
+ // are single-uop, single-cycle ops.
23
+ inline float horizontal_sum_avx2(__m256 v) {
24
+ __m128 lo = _mm256_castps256_ps128(v);
25
+ __m128 hi = _mm256_extractf128_ps(v, 1);
26
+ __m128 sum128 = _mm_add_ps(lo, hi); // 4 lanes
27
+ __m128 shuf = _mm_movehdup_ps(sum128); // [s1, s1, s3, s3]
28
+ __m128 sums = _mm_add_ps(sum128, shuf); // [s0+s1, _, s2+s3, _]
29
+ shuf = _mm_movehl_ps(shuf, sums); // [s2+s3, s3, _, _]
30
+ sums = _mm_add_ss(sums, shuf); // (s0+s1) + (s2+s3)
31
+ return _mm_cvtss_f32(sums);
32
+ }
33
+
34
+ } // namespace
35
+
36
+ template <>
37
+ float block_l2<SIMDLevel::AVX2>(const float* x, const float* y, int n) {
38
+ __m256 acc = _mm256_setzero_ps();
39
+ int m = 0;
40
+ for (; m + 8 <= n; m += 8) {
41
+ __m256 xv = _mm256_loadu_ps(x + m);
42
+ __m256 yv = _mm256_loadu_ps(y + m);
43
+ __m256 diff = _mm256_sub_ps(xv, yv);
44
+ acc = _mm256_fmadd_ps(diff, diff, acc);
45
+ }
46
+ float result = horizontal_sum_avx2(acc);
47
+ for (; m < n; ++m) {
48
+ const float d = x[m] - y[m];
49
+ result += d * d;
50
+ }
51
+ return result;
52
+ }
53
+
54
+ } // namespace detail
55
+ } // namespace faiss
56
+
57
+ #endif // COMPILE_SIMD_AVX2
@@ -0,0 +1,45 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_AVX512
9
+
10
+ #include <faiss/utils/simd_impl/super_kmeans_kernels.h>
11
+
12
+ #include <immintrin.h>
13
+
14
+ namespace faiss {
15
+ namespace detail {
16
+
17
+ template <>
18
+ float block_l2<SIMDLevel::AVX512>(const float* x, const float* y, int n) {
19
+ __m512 acc = _mm512_setzero_ps();
20
+ int m = 0;
21
+ for (; m + 16 <= n; m += 16) {
22
+ __m512 xv = _mm512_loadu_ps(x + m);
23
+ __m512 yv = _mm512_loadu_ps(y + m);
24
+ __m512 diff = _mm512_sub_ps(xv, yv);
25
+ acc = _mm512_fmadd_ps(diff, diff, acc);
26
+ }
27
+ // _mm512_reduce_add_ps: on modern AVX-512 SKUs (Cascade Lake+, Sapphire
28
+ // Rapids) GCC/Clang lower this to a shuffle+add tree, ~5-cycle latency.
29
+ // On older AVX-512 SKUs (Skylake-X, Ice Lake) the cross-lane reduction
30
+ // can be ~20 cycles. Acceptable here because n ~ pdx_block_size = 64
31
+ // (4 iterations of 16-wide accumulation), so per-block work dominates
32
+ // the reduction cost. AVX2 uses a manual shuffle+add tree explicitly
33
+ // to avoid `_mm_hadd_ps` overhead, where the ratio is reversed.
34
+ float result = _mm512_reduce_add_ps(acc);
35
+ for (; m < n; ++m) {
36
+ const float d = x[m] - y[m];
37
+ result += d * d;
38
+ }
39
+ return result;
40
+ }
41
+
42
+ } // namespace detail
43
+ } // namespace faiss
44
+
45
+ #endif // COMPILE_SIMD_AVX512
@@ -10,6 +10,7 @@
10
10
  #include <cstdlib>
11
11
 
12
12
  #include <faiss/impl/FaissAssert.h>
13
+ #include <faiss/impl/simd_dispatch.h>
13
14
 
14
15
  namespace faiss {
15
16
 
@@ -47,7 +48,7 @@ static bool has_sve() {
47
48
  #endif // __linux__ / __APPLE__ / other
48
49
 
49
50
  #else // Not ARM64
50
- static bool has_sve() {
51
+ [[maybe_unused]] static bool has_sve() {
51
52
  return false;
52
53
  }
53
54
  #endif
@@ -189,12 +190,15 @@ SIMDLevel SIMDConfig::auto_detect_simd_level() {
189
190
  }
190
191
  #endif
191
192
 
193
+ #if defined(__riscv) && defined(COMPILE_SIMD_RISCV_RVV)
194
+ // RVV is always available on RISC-V builds compiled with rv64gcv.
195
+ supported_simd_levels |= (1 << static_cast<int>(SIMDLevel::RISCV_RVV));
196
+ detected_level = SIMDLevel::RISCV_RVV;
197
+ #endif
198
+
192
199
  return detected_level;
193
200
  }
194
201
 
195
- // Include private header for DISPATCH_SIMDLevel macro
196
- #include <faiss/impl/simd_dispatch.h>
197
-
198
202
  namespace {
199
203
 
200
204
  template <SIMDLevel Level>
@@ -205,7 +209,8 @@ SIMDLevel get_dispatched_level_impl() {
205
209
  } // namespace
206
210
 
207
211
  SIMDLevel SIMDConfig::get_dispatched_level() {
208
- DISPATCH_SIMDLevel(get_dispatched_level_impl);
212
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_ALL>(
213
+ [&]<SIMDLevel SL>() { return get_dispatched_level_impl<SL>(); });
209
214
  }
210
215
 
211
216
  #else // Static mode
@@ -260,6 +265,8 @@ SIMDLevel SIMDConfig::auto_detect_simd_level() {
260
265
  return SIMDLevel::ARM_SVE;
261
266
  #elif defined(COMPILE_SIMD_ARM_NEON)
262
267
  return SIMDLevel::ARM_NEON;
268
+ #elif defined(COMPILE_SIMD_RISCV_RVV)
269
+ return SIMDLevel::RISCV_RVV;
263
270
  #else
264
271
  return SIMDLevel::NONE;
265
272
  #endif
@@ -290,6 +297,8 @@ std::string to_string(SIMDLevel level) {
290
297
  return "ARM_NEON";
291
298
  case SIMDLevel::ARM_SVE:
292
299
  return "ARM_SVE";
300
+ case SIMDLevel::RISCV_RVV:
301
+ return "RISCV_RVV";
293
302
  case SIMDLevel::COUNT:
294
303
  default:
295
304
  throw FaissException("Invalid SIMDLevel");
@@ -315,6 +324,9 @@ SIMDLevel to_simd_level(const std::string& level_str) {
315
324
  if (level_str == "ARM_SVE") {
316
325
  return SIMDLevel::ARM_SVE;
317
326
  }
327
+ if (level_str == "RISCV_RVV") {
328
+ return SIMDLevel::RISCV_RVV;
329
+ }
318
330
 
319
331
  throw FaissException("Invalid SIMD level string: " + level_str);
320
332
  }
@@ -25,13 +25,105 @@ enum class SIMDLevel {
25
25
  // arm & aarch64
26
26
  ARM_NEON,
27
27
  ARM_SVE, // Scalable Vector Extension (ARMv8.2+)
28
+ // riscv
29
+ RISCV_RVV, // RISC-V Vector Extension (rv64gcv)
28
30
 
29
31
  COUNT
30
32
  };
31
33
 
34
+ /***************************************************************
35
+ * SINGLE_SIMD_LEVEL: the SIMD level for code without explicit SL context.
36
+ *
37
+ * In static mode: resolves to the compiled-in level (zero overhead).
38
+ * In DD mode: resolves to NONE (emulated scalar). Code using
39
+ * SINGLE_SIMD_LEVEL is meant to be incrementally migrated to use
40
+ * proper SL dispatch — SINGLE_SIMD_LEVEL is migration scaffolding,
41
+ * not permanent API.
42
+ ***************************************************************/
43
+ #ifdef FAISS_ENABLE_DD
44
+ // DD dispatches to the highest optional SIMD level at runtime.
45
+ // On ARM64, NEON is mandatory (always available via COMPILE_SIMD_ARM_NEON),
46
+ // so the baseline is ARM_NEON. On x86, the baseline is NONE.
47
+ #if defined(COMPILE_SIMD_ARM_NEON)
48
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::ARM_NEON;
49
+ #else
50
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::NONE;
51
+ #endif
52
+ #else
53
+ #if defined(COMPILE_SIMD_AVX512_SPR)
54
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::AVX512_SPR;
55
+ #elif defined(COMPILE_SIMD_AVX512)
56
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::AVX512;
57
+ #elif defined(COMPILE_SIMD_AVX2)
58
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::AVX2;
59
+ #elif defined(COMPILE_SIMD_ARM_SVE)
60
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::ARM_SVE;
61
+ #elif defined(COMPILE_SIMD_ARM_NEON)
62
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::ARM_NEON;
63
+ #elif defined(COMPILE_SIMD_RISCV_RVV)
64
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::RISCV_RVV;
65
+ #else
66
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::NONE;
67
+ #endif
68
+ #endif
69
+
70
+ /***************************************************************
71
+ * Helper to select the appropriate 256-bit SIMD level.
72
+ *
73
+ * For 256-bit SIMD types (simd16uint16, simd32uint8, etc.), maps:
74
+ * AVX512/AVX512_SPR → AVX2 (256-bit ops use AVX2 instructions)
75
+ * AVX2 → AVX2
76
+ * ARM_NEON/ARM_SVE → ARM_NEON
77
+ * NONE → NONE
78
+ ***************************************************************/
79
+ template <SIMDLevel SL>
80
+ struct simd256_level_selector {
81
+ static constexpr SIMDLevel value =
82
+ (SL == SIMDLevel::AVX512 || SL == SIMDLevel::AVX512_SPR)
83
+ ? SIMDLevel::AVX2
84
+ : (SL == SIMDLevel::ARM_SVE ? SIMDLevel::ARM_NEON
85
+ : SL == SIMDLevel::RISCV_RVV ? SIMDLevel::NONE
86
+ : SL);
87
+ };
88
+
89
+ /// SINGLE_SIMD_LEVEL mapped to 256-bit: use this for 256-bit simd types
90
+ /// (simd16uint16, simd32uint8, etc.) which don't have AVX512/SVE
91
+ /// specializations.
92
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL_256 =
93
+ simd256_level_selector<SINGLE_SIMD_LEVEL>::value;
94
+
95
+ /***************************************************************
96
+ * Helper to select the appropriate 512-bit SIMD level.
97
+ *
98
+ * For 512-bit SIMD types (simd32uint16, simd64uint8, etc.), maps:
99
+ * AVX512_SPR → AVX512 (512-bit ops share the same instructions)
100
+ * AVX512 → AVX512
101
+ * NONE → NONE
102
+ ***************************************************************/
103
+ template <SIMDLevel SL>
104
+ struct simd512_level_selector {
105
+ static constexpr SIMDLevel value = (SL == SIMDLevel::AVX512_SPR)
106
+ ? SIMDLevel::AVX512
107
+ : (SL == SIMDLevel::RISCV_RVV) ? SIMDLevel::NONE
108
+ : SL;
109
+ };
110
+
111
+ /// SINGLE_SIMD_LEVEL mapped to 512-bit: use this for 512-bit simd types
112
+ /// (simd32uint16, simd64uint8, etc.) which don't have AVX512_SPR
113
+ /// specializations (AVX512_SPR uses the same 512-bit integer ops as AVX512).
114
+ inline constexpr SIMDLevel SINGLE_SIMD_LEVEL_512 =
115
+ simd512_level_selector<SINGLE_SIMD_LEVEL>::value;
116
+
32
117
  /// Number of float32 lanes for a given SIMD level.
118
+ /// ARM_SVE is variable-width (128–2048 bits); no single constant is correct.
33
119
  template <SIMDLevel SL>
34
120
  constexpr int simd_width() {
121
+ static_assert(
122
+ SL != SIMDLevel::ARM_SVE,
123
+ "simd_width<ARM_SVE> is not supported: SVE is variable-width");
124
+ static_assert(
125
+ SL != SIMDLevel::RISCV_RVV,
126
+ "simd_width<RISCV_RVV> is not supported: RVV is variable-width");
35
127
  if constexpr (SL == SIMDLevel::AVX512 || SL == SIMDLevel::AVX512_SPR)
36
128
  return 16;
37
129
  else if constexpr (SL == SIMDLevel::AVX2 || SL == SIMDLevel::ARM_NEON)
@@ -82,7 +174,7 @@ struct FAISS_API SIMDConfig {
82
174
  static bool is_simd_level_available(SIMDLevel level);
83
175
 
84
176
  /// Returns the SIMD level via the dispatch mechanism.
85
- /// In DD mode, uses DISPATCH_SIMDLevel internally.
177
+ /// In DD mode, uses with_simd_level internally.
86
178
  /// In static mode, returns the compiled-in level.
87
179
  /// Useful for verification: get_level() == get_dispatched_level()
88
180
  static SIMDLevel get_dispatched_level();
@@ -134,9 +134,9 @@ void fvec_argsort(size_t n, const float* vals, size_t* perm) {
134
134
  }
135
135
 
136
136
  void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
137
- size_t* perm2 = new size_t[n];
137
+ std::vector<size_t> perm2(n);
138
138
  // 2 result tables, during merging, flip between them
139
- size_t *permB = perm2, *permA = perm;
139
+ size_t *permB = perm2.data(), *permA = perm;
140
140
 
141
141
  int nt = omp_get_max_threads();
142
142
  { // prepare correct permutation so that the result ends in perm
@@ -148,8 +148,8 @@ void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
148
148
  }
149
149
  }
150
150
 
151
- #pragma omp parallel
152
- for (size_t i = 0; i < n; i++) {
151
+ #pragma omp parallel for
152
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
153
153
  permA[i] = i;
154
154
  }
155
155
 
@@ -184,7 +184,6 @@ void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
184
184
  } else {
185
185
  int t0 = s * sub_nt / sub_nseg1;
186
186
  int t1 = (s + 1) * sub_nt / sub_nseg1;
187
- printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
188
187
  parallel_merge(
189
188
  permA, permB, segs[s], segs[s + 1], t1 - t0, comp);
190
189
  }
@@ -197,7 +196,6 @@ void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
197
196
  }
198
197
  assert(permA == perm);
199
198
  omp_set_nested(prev_nested);
200
- delete[] perm2;
201
199
  }
202
200
 
203
201
  /*****************************************************************************
@@ -226,7 +224,7 @@ void bucket_sort_ref(
226
224
  for (size_t i = 0; i < vmax; i++) {
227
225
  lims[i + 1] += lims[i];
228
226
  }
229
- FAISS_THROW_IF_NOT(lims[vmax] == nval);
227
+ FAISS_THROW_IF_NOT(static_cast<size_t>(lims[vmax]) == nval);
230
228
  double t2 = getmillisecs();
231
229
  // populate buckets
232
230
  for (size_t i = 0; i < nval; i++) {
@@ -286,7 +284,7 @@ void bucket_sort_parallel(
286
284
  for (size_t i = 0; i < vmax; i++) {
287
285
  lims[i + 1] += lims[i];
288
286
  }
289
- FAISS_THROW_IF_NOT(lims[vmax] == nval);
287
+ FAISS_THROW_IF_NOT(static_cast<size_t>(lims[vmax]) == nval);
290
288
  }
291
289
  #pragma omp barrier
292
290
 
@@ -341,7 +339,8 @@ void bucket_sort_inplace_ref(
341
339
  double t0 = getmillisecs();
342
340
  size_t nval = nrow * ncol;
343
341
  FAISS_THROW_IF_NOT(
344
- nbucket < nval); // unclear what would happen in this case...
342
+ static_cast<size_t>(nbucket) <
343
+ nval); // unclear what would happen in this case...
345
344
 
346
345
  memset(lims, 0, sizeof(*lims) * (nbucket + 1));
347
346
  for (size_t i = 0; i < nval; i++) {
@@ -350,14 +349,14 @@ void bucket_sort_inplace_ref(
350
349
  }
351
350
  double t1 = getmillisecs();
352
351
  // compute cumulative sum
353
- for (size_t i = 0; i < nbucket; i++) {
352
+ for (size_t i = 0; i < static_cast<size_t>(nbucket); i++) {
354
353
  lims[i + 1] += lims[i];
355
354
  }
356
- FAISS_THROW_IF_NOT(lims[nbucket] == nval);
355
+ FAISS_THROW_IF_NOT(static_cast<size_t>(lims[nbucket]) == nval);
357
356
  double t2 = getmillisecs();
358
357
 
359
358
  std::vector<size_t> ptrs(nbucket);
360
- for (size_t i = 0; i < nbucket; i++) {
359
+ for (size_t i = 0; i < static_cast<size_t>(nbucket); i++) {
361
360
  ptrs[i] = lims[i];
362
361
  }
363
362
 
@@ -378,7 +377,8 @@ void bucket_sort_inplace_ref(
378
377
  } else {
379
378
  // start new loop
380
379
  for (; init_bucket_no < nbucket; init_bucket_no++) {
381
- if (ptrs[init_bucket_no] < lims[init_bucket_no + 1]) {
380
+ if (ptrs[init_bucket_no] <
381
+ static_cast<size_t>(lims[init_bucket_no + 1])) {
382
382
  break;
383
383
  }
384
384
  }
@@ -390,7 +390,7 @@ void bucket_sort_inplace_ref(
390
390
  }
391
391
  }
392
392
 
393
- for (size_t i = 0; i < nbucket; i++) {
393
+ for (size_t i = 0; i < static_cast<size_t>(nbucket); i++) {
394
394
  assert(ptrs[i] == lims[i + 1]);
395
395
  }
396
396
  double t3 = getmillisecs();
@@ -407,8 +407,8 @@ struct ToWrite {
407
407
  std::vector<TI> rows;
408
408
  std::vector<size_t> lims;
409
409
 
410
- explicit ToWrite(TI nbucket) : nbucket(nbucket) {
411
- lims.resize(nbucket + 1);
410
+ explicit ToWrite(TI nbucket_in) : nbucket(nbucket_in) {
411
+ lims.resize(nbucket_in + 1);
412
412
  }
413
413
 
414
414
  /// add one element (row) to write in bucket b
@@ -428,7 +428,7 @@ struct ToWrite {
428
428
  lims[buckets[i] + 1]++;
429
429
  }
430
430
  // compute cumulative sum
431
- for (size_t i = 0; i < nbucket; i++) {
431
+ for (size_t i = 0; i < static_cast<size_t>(nbucket); i++) {
432
432
  lims[i + 1] += lims[i];
433
433
  }
434
434
  FAISS_THROW_IF_NOT(lims[nbucket] == buckets.size());
@@ -466,7 +466,8 @@ void bucket_sort_inplace_parallel(
466
466
  std::vector<ToWrite<TI>> all_to_write;
467
467
  size_t nval = nrow * ncol;
468
468
  FAISS_THROW_IF_NOT(
469
- nbucket < nval); // unclear what would happen in this case...
469
+ static_cast<size_t>(nbucket) <
470
+ nval); // unclear what would happen in this case...
470
471
 
471
472
  // try to keep size of all_to_write < 5GiB
472
473
  // but we need at least one element per bucket
@@ -498,7 +499,7 @@ void bucket_sort_inplace_parallel(
498
499
  }
499
500
  #pragma omp critical
500
501
  { // accumulate histograms (not shifted indices to prepare cumsum)
501
- for (size_t i = 0; i < nbucket; i++) {
502
+ for (size_t i = 0; i < static_cast<size_t>(nbucket); i++) {
502
503
  lims[i + 1] += local_lims[i];
503
504
  }
504
505
  all_to_write.push_back(ToWrite<TI>(nbucket));
@@ -511,10 +512,10 @@ void bucket_sort_inplace_parallel(
511
512
  #pragma omp master
512
513
  {
513
514
  // compute cumulative sum
514
- for (size_t i = 0; i < nbucket; i++) {
515
+ for (size_t i = 0; i < static_cast<size_t>(nbucket); i++) {
515
516
  lims[i + 1] += lims[i];
516
517
  }
517
- FAISS_THROW_IF_NOT(lims[nbucket] == nval);
518
+ FAISS_THROW_IF_NOT(static_cast<size_t>(lims[nbucket]) == nval);
518
519
  // at this point lims is final (read only!)
519
520
 
520
521
  memcpy(ptrs.data(), lims, sizeof(lims[0]) * nbucket);
@@ -559,19 +560,22 @@ void bucket_sort_inplace_parallel(
559
560
  printf("ROUND %d n_to_write=%zd\n", round, n_to_write);
560
561
  }
561
562
  if (verbose > 2) {
562
- for (size_t b = 0; b < nbucket; b++) {
563
+ for (size_t b = 0; b < static_cast<size_t>(nbucket); b++) {
563
564
  printf(" b=%zd [", b);
564
- for (size_t i = lims[b]; i < lims[b + 1]; i++) {
565
+ for (size_t i = static_cast<size_t>(lims[b]);
566
+ i < static_cast<size_t>(lims[b + 1]);
567
+ i++) {
565
568
  printf(" %s%d",
566
569
  ptrs[b] == i ? ">" : "",
567
570
  int(vals[i]));
568
571
  }
569
572
  printf(" %s] %s\n",
570
- ptrs[b] == lims[b + 1] ? ">" : "",
573
+ ptrs[b] == static_cast<size_t>(lims[b + 1]) ? ">"
574
+ : "",
571
575
  did_wrap[b] ? "w" : "");
572
576
  }
573
577
  printf("To write\n");
574
- for (size_t b = 0; b < nbucket; b++) {
578
+ for (size_t b = 0; b < static_cast<size_t>(nbucket); b++) {
575
579
  printf(" b=%zd ", b);
576
580
  const char* sep = "[";
577
581
  for (const ToWrite<TI>& to_write_2 : all_to_write) {
@@ -609,7 +613,7 @@ void bucket_sort_inplace_parallel(
609
613
  rank,
610
614
  idx);
611
615
  }
612
- if (idx < lims[b + 1]) {
616
+ if (idx < static_cast<size_t>(lims[b + 1])) {
613
617
  ptrs[b]++;
614
618
  } else {
615
619
  // wrapping around
@@ -709,7 +713,7 @@ inline int64_t hash_function(int64_t x) {
709
713
  void hashtable_int64_to_int64_init(int log2_capacity, int64_t* tab) {
710
714
  size_t capacity = (size_t)1 << log2_capacity;
711
715
  #pragma omp parallel for
712
- for (int64_t i = 0; i < capacity; i++) {
716
+ for (int64_t i = 0; i < static_cast<int64_t>(capacity); i++) {
713
717
  tab[2 * i] = -1;
714
718
  tab[2 * i + 1] = -1;
715
719
  }
@@ -729,7 +733,7 @@ void hashtable_int64_to_int64_add(
729
733
  size_t nbucket = (size_t)1 << log2_nbucket;
730
734
 
731
735
  #pragma omp parallel for
732
- for (int64_t i = 0; i < n; i++) {
736
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
733
737
  hk[i] = hash_function(keys[i]) & mask;
734
738
  bucket_no[i] = hk[i] >> (log2_capacity - log2_nbucket);
735
739
  }
@@ -746,11 +750,13 @@ void hashtable_int64_to_int64_add(
746
750
 
747
751
  int num_errors = 0;
748
752
  #pragma omp parallel for reduction(+ : num_errors)
749
- for (int64_t bucket = 0; bucket < nbucket; bucket++) {
753
+ for (int64_t bucket = 0; bucket < static_cast<int64_t>(nbucket); bucket++) {
750
754
  size_t k0 = bucket << (log2_capacity - log2_nbucket);
751
755
  size_t k1 = (bucket + 1) << (log2_capacity - log2_nbucket);
752
756
 
753
- for (size_t i = lims[bucket]; i < lims[bucket + 1]; i++) {
757
+ for (size_t i = static_cast<size_t>(lims[bucket]);
758
+ i < static_cast<size_t>(lims[bucket + 1]);
759
+ i++) {
754
760
  int64_t j = perm[i];
755
761
  assert(bucket_no[j] == bucket);
756
762
  assert(hk[j] >= k0 && hk[j] < k1);
@@ -768,7 +774,8 @@ void hashtable_int64_to_int64_add(
768
774
  if (slot == k1) {
769
775
  slot = k0;
770
776
  }
771
- if (slot == hk[j]) { // no free slot left in bucket
777
+ if (slot ==
778
+ static_cast<size_t>(hk[j])) { // no free slot left in bucket
772
779
  num_errors++;
773
780
  break;
774
781
  }
@@ -793,20 +800,24 @@ void hashtable_int64_to_int64_lookup(
793
800
  int log2_nbucket = log2_capacity_to_log2_nbucket(log2_capacity);
794
801
 
795
802
  #pragma omp parallel for
796
- for (int64_t i = 0; i < n; i++) {
803
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
797
804
  int64_t k = keys[i];
798
- int64_t hashValue = hash_function(k) & mask;
799
- size_t slot = hashValue;
805
+ int64_t hk_i = hash_function(k) & mask;
806
+ size_t slot = hk_i;
800
807
 
801
808
  if (tab[2 * slot] == -1) { // not in table
802
809
  vals[i] = -1;
803
810
  } else if (tab[2 * slot] == k) { // found!
804
811
  vals[i] = tab[2 * slot + 1];
805
812
  } else { // need to search in [k0, k1)
806
- size_t bucket = hashValue >> (log2_capacity - log2_nbucket);
813
+ size_t bucket = hk_i >> (log2_capacity - log2_nbucket);
807
814
  size_t k0 = bucket << (log2_capacity - log2_nbucket);
808
815
  size_t k1 = (bucket + 1) << (log2_capacity - log2_nbucket);
809
816
  for (;;) {
817
+ if (tab[slot * 2] == -1) { // empty slot, key not in table
818
+ vals[i] = -1;
819
+ break;
820
+ }
810
821
  if (tab[slot * 2] == k) { // found!
811
822
  vals[i] = tab[2 * slot + 1];
812
823
  break;
@@ -815,7 +826,8 @@ void hashtable_int64_to_int64_lookup(
815
826
  if (slot == k1) {
816
827
  slot = k0;
817
828
  }
818
- if (slot == hashValue) { // bucket is full and not found
829
+ if (slot ==
830
+ static_cast<size_t>(hk_i)) { // bucket is full and not found
819
831
  vals[i] = -1;
820
832
  break;
821
833
  }
@@ -187,7 +187,7 @@ size_t get_mem_usage_kb() {
187
187
  char buf[256];
188
188
  if (!fgets(buf, 256, f))
189
189
  break;
190
- if (sscanf(buf, "VmRSS: %ld kB", &sz) == 1)
190
+ if (sscanf(buf, "VmRSS: %zu kB", &sz) == 1)
191
191
  break;
192
192
  }
193
193
  fclose(f);
@@ -307,7 +307,7 @@ size_t merge_result_table_with(
307
307
  std::vector<float> tmpD(k);
308
308
 
309
309
  #pragma omp for
310
- for (int64_t i = 0; i < n; i++) {
310
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
311
311
  int64_t* lI0 = I0 + i * k;
312
312
  float* lD0 = D0 + i * k;
313
313
  const int64_t* lI1 = I1 + i * k;
@@ -437,10 +437,10 @@ void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
437
437
  std::vector<int> accu(d * 256);
438
438
  const uint8_t* c = codes;
439
439
  for (size_t i = 0; i < n; i++)
440
- for (int j = 0; j < d; j++)
440
+ for (size_t j = 0; j < d; j++)
441
441
  accu[j * 256 + *c++]++;
442
442
  memset(hist, 0, sizeof(*hist) * nbits);
443
- for (int i = 0; i < d; i++) {
443
+ for (size_t i = 0; i < d; i++) {
444
444
  const int* ai = accu.data() + i * 256;
445
445
  int* hi = hist + i * 8;
446
446
  for (int j = 0; j < 256; j++)
@@ -500,7 +500,7 @@ const float* fvecs_maybe_subsample(
500
500
  std::vector<int> subset(*n);
501
501
  rand_perm(subset.data(), *n, seed);
502
502
  float* x_subset = new float[n2 * d];
503
- for (int64_t i = 0; i < n2; i++)
503
+ for (int64_t i = 0; i < static_cast<int64_t>(n2); i++)
504
504
  memcpy(&x_subset[i * d], &x[subset[i] * size_t(d)], sizeof(x[0]) * d);
505
505
  *n = n2;
506
506
  return x_subset;
@@ -172,8 +172,8 @@ struct CombinerRangeKNN {
172
172
  T r2; /// range search radius
173
173
  bool keep_max; /// whether to keep max values instead of min.
174
174
 
175
- CombinerRangeKNN(int64_t nq, size_t k, T r2, bool keep_max)
176
- : nq(nq), k(k), r2(r2), keep_max(keep_max) {}
175
+ CombinerRangeKNN(int64_t nq_in, size_t k_in, T r2_in, bool keep_max_in)
176
+ : nq(nq_in), k(k_in), r2(r2_in), keep_max(keep_max_in) {}
177
177
 
178
178
  /// Knn search results
179
179
  const int64_t* I = nullptr; /// size nq * k
@@ -200,7 +200,7 @@ struct CodeSet {
200
200
  size_t d;
201
201
  std::set<std::vector<uint8_t>> s;
202
202
 
203
- explicit CodeSet(size_t d) : d(d) {}
203
+ explicit CodeSet(size_t d_in) : d(d_in) {}
204
204
  void insert(size_t n, const uint8_t* codes, bool* inserted);
205
205
  };
206
206