faiss 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (292) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/AutoTune.cpp +2 -2
  6. data/vendor/faiss/faiss/AutoTune.h +2 -2
  7. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  8. data/vendor/faiss/faiss/Clustering.h +2 -2
  9. data/vendor/faiss/faiss/IVFlib.cpp +2 -2
  10. data/vendor/faiss/faiss/IVFlib.h +2 -2
  11. data/vendor/faiss/faiss/Index.cpp +6 -2
  12. data/vendor/faiss/faiss/Index.h +10 -3
  13. data/vendor/faiss/faiss/Index2Layer.cpp +2 -2
  14. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  15. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +7 -7
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +2 -2
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +14 -16
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +2 -2
  19. data/vendor/faiss/faiss/IndexBinary.cpp +13 -2
  20. data/vendor/faiss/faiss/IndexBinary.h +8 -2
  21. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.h +2 -2
  23. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -2
  25. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +2 -7
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -2
  27. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.h +2 -2
  29. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +2 -2
  30. data/vendor/faiss/faiss/IndexBinaryIVF.h +2 -2
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +10 -14
  32. data/vendor/faiss/faiss/IndexFastScan.h +11 -2
  33. data/vendor/faiss/faiss/IndexFlat.cpp +2 -3
  34. data/vendor/faiss/faiss/IndexFlat.h +2 -2
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -2
  36. data/vendor/faiss/faiss/IndexFlatCodes.h +5 -2
  37. data/vendor/faiss/faiss/IndexHNSW.cpp +13 -6
  38. data/vendor/faiss/faiss/IndexHNSW.h +2 -2
  39. data/vendor/faiss/faiss/IndexIDMap.cpp +19 -3
  40. data/vendor/faiss/faiss/IndexIDMap.h +5 -2
  41. data/vendor/faiss/faiss/IndexIVF.cpp +2 -3
  42. data/vendor/faiss/faiss/IndexIVF.h +5 -4
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +6 -7
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +2 -2
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +3 -14
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +2 -4
  47. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +71 -34
  48. data/vendor/faiss/faiss/IndexIVFFastScan.h +19 -2
  49. data/vendor/faiss/faiss/IndexIVFFlat.cpp +2 -2
  50. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -2
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +2 -2
  52. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +2 -2
  53. data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
  54. data/vendor/faiss/faiss/IndexIVFPQ.h +2 -2
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +7 -33
  56. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -4
  57. data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -2
  58. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -2
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +2 -3
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -2
  61. data/vendor/faiss/faiss/IndexLSH.cpp +2 -3
  62. data/vendor/faiss/faiss/IndexLSH.h +2 -2
  63. data/vendor/faiss/faiss/IndexLattice.cpp +2 -2
  64. data/vendor/faiss/faiss/IndexLattice.h +2 -2
  65. data/vendor/faiss/faiss/IndexNNDescent.cpp +2 -2
  66. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  67. data/vendor/faiss/faiss/IndexNSG.cpp +2 -5
  68. data/vendor/faiss/faiss/IndexNSG.h +2 -2
  69. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +2 -2
  70. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +2 -2
  71. data/vendor/faiss/faiss/IndexPQ.cpp +26 -26
  72. data/vendor/faiss/faiss/IndexPQ.h +2 -2
  73. data/vendor/faiss/faiss/IndexPQFastScan.cpp +2 -5
  74. data/vendor/faiss/faiss/IndexPQFastScan.h +2 -11
  75. data/vendor/faiss/faiss/IndexPreTransform.cpp +2 -2
  76. data/vendor/faiss/faiss/IndexPreTransform.h +2 -2
  77. data/vendor/faiss/faiss/IndexRefine.cpp +41 -4
  78. data/vendor/faiss/faiss/IndexRefine.h +9 -2
  79. data/vendor/faiss/faiss/IndexReplicas.cpp +2 -2
  80. data/vendor/faiss/faiss/IndexReplicas.h +2 -2
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +2 -2
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +2 -2
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -3
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -2
  85. data/vendor/faiss/faiss/IndexShards.cpp +2 -2
  86. data/vendor/faiss/faiss/IndexShards.h +2 -2
  87. data/vendor/faiss/faiss/IndexShardsIVF.cpp +2 -2
  88. data/vendor/faiss/faiss/IndexShardsIVF.h +2 -2
  89. data/vendor/faiss/faiss/MatrixStats.cpp +2 -2
  90. data/vendor/faiss/faiss/MatrixStats.h +2 -2
  91. data/vendor/faiss/faiss/MetaIndexes.cpp +2 -3
  92. data/vendor/faiss/faiss/MetaIndexes.h +2 -2
  93. data/vendor/faiss/faiss/MetricType.h +2 -2
  94. data/vendor/faiss/faiss/VectorTransform.cpp +2 -2
  95. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  96. data/vendor/faiss/faiss/clone_index.cpp +2 -2
  97. data/vendor/faiss/faiss/clone_index.h +2 -2
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +2 -2
  99. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +2 -2
  100. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +2 -2
  101. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +44 -4
  102. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +7 -2
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2 -2
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +2 -2
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2 -2
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +2 -2
  107. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +2 -2
  108. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +2 -2
  109. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +2 -2
  110. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +2 -2
  111. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +2 -5
  112. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +2 -2
  113. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +13 -13
  114. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  115. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +6 -6
  116. data/vendor/faiss/faiss/gpu/GpuDistance.h +11 -7
  117. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +2 -2
  118. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +2 -2
  119. data/vendor/faiss/faiss/gpu/GpuIndex.h +8 -7
  120. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -2
  121. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -3
  122. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +2 -2
  123. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +2 -2
  124. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +2 -2
  125. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +3 -3
  126. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +2 -2
  127. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +2 -2
  128. data/vendor/faiss/faiss/gpu/GpuResources.cpp +7 -2
  129. data/vendor/faiss/faiss/gpu/GpuResources.h +11 -4
  130. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +51 -21
  131. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +15 -5
  132. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -2
  133. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +2 -2
  134. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +2 -2
  135. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +2 -2
  136. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +2 -2
  137. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  138. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +2 -2
  139. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +2 -2
  140. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +2 -3
  141. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +2 -2
  142. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +2 -2
  143. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +2 -2
  144. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +54 -54
  145. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +80 -78
  146. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +51 -51
  147. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +2 -2
  148. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +3 -3
  149. data/vendor/faiss/faiss/gpu/test/TestGpuResidualQuantizer.cpp +70 -0
  150. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +74 -4
  151. data/vendor/faiss/faiss/gpu/test/TestUtils.h +2 -2
  152. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  153. data/vendor/faiss/faiss/gpu/utils/{RaftUtils.h → CuvsUtils.h} +12 -11
  154. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +2 -2
  155. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +2 -2
  156. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +2 -2
  157. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +2 -2
  158. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +2 -2
  159. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  160. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +79 -11
  161. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +17 -5
  162. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  163. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -2
  164. data/vendor/faiss/faiss/impl/CodePacker.cpp +2 -2
  165. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  166. data/vendor/faiss/faiss/impl/DistanceComputer.h +2 -2
  167. data/vendor/faiss/faiss/impl/FaissAssert.h +2 -2
  168. data/vendor/faiss/faiss/impl/FaissException.cpp +2 -2
  169. data/vendor/faiss/faiss/impl/FaissException.h +2 -3
  170. data/vendor/faiss/faiss/impl/HNSW.cpp +24 -19
  171. data/vendor/faiss/faiss/impl/HNSW.h +12 -2
  172. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  173. data/vendor/faiss/faiss/impl/IDSelector.h +2 -2
  174. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -2
  175. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +2 -2
  176. data/vendor/faiss/faiss/impl/LookupTableScaler.h +2 -2
  177. data/vendor/faiss/faiss/impl/NNDescent.cpp +2 -2
  178. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  179. data/vendor/faiss/faiss/impl/NSG.cpp +27 -21
  180. data/vendor/faiss/faiss/impl/NSG.h +20 -8
  181. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +2 -2
  182. data/vendor/faiss/faiss/impl/PolysemousTraining.h +2 -2
  183. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +2 -4
  184. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +2 -2
  185. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -2
  186. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +2 -2
  187. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -2
  188. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  189. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +2 -36
  190. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +3 -13
  191. data/vendor/faiss/faiss/impl/ResultHandler.h +2 -2
  192. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +2 -2
  193. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +2 -2
  194. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +2 -2
  195. data/vendor/faiss/faiss/impl/ThreadedIndex.h +2 -2
  196. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +2 -2
  197. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +2 -2
  198. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +2 -2
  199. data/vendor/faiss/faiss/impl/code_distance/code_distance-sve.h +440 -0
  200. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +55 -2
  201. data/vendor/faiss/faiss/impl/index_read.cpp +2 -5
  202. data/vendor/faiss/faiss/impl/index_read_utils.h +2 -2
  203. data/vendor/faiss/faiss/impl/index_write.cpp +2 -6
  204. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  205. data/vendor/faiss/faiss/impl/io.h +2 -2
  206. data/vendor/faiss/faiss/impl/io_macros.h +2 -9
  207. data/vendor/faiss/faiss/impl/kmeans1d.cpp +2 -3
  208. data/vendor/faiss/faiss/impl/kmeans1d.h +2 -2
  209. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +2 -3
  210. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  211. data/vendor/faiss/faiss/impl/platform_macros.h +12 -2
  212. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +2 -2
  213. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +20 -2
  214. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +2 -2
  215. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  216. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +3 -3
  217. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +2 -2
  218. data/vendor/faiss/faiss/impl/simd_result_handlers.h +18 -18
  219. data/vendor/faiss/faiss/index_factory.cpp +20 -21
  220. data/vendor/faiss/faiss/index_factory.h +2 -2
  221. data/vendor/faiss/faiss/index_io.h +2 -2
  222. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +2 -2
  223. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +2 -2
  224. data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -2
  225. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -2
  226. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +2 -2
  227. data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
  228. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +2 -2
  229. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +2 -2
  230. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -3
  231. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -2
  232. data/vendor/faiss/faiss/python/python_callbacks.cpp +2 -2
  233. data/vendor/faiss/faiss/python/python_callbacks.h +2 -2
  234. data/vendor/faiss/faiss/utils/AlignedTable.h +5 -3
  235. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  236. data/vendor/faiss/faiss/utils/Heap.h +2 -2
  237. data/vendor/faiss/faiss/utils/NeuralNet.cpp +11 -7
  238. data/vendor/faiss/faiss/utils/NeuralNet.h +2 -2
  239. data/vendor/faiss/faiss/utils/WorkerThread.cpp +2 -2
  240. data/vendor/faiss/faiss/utils/WorkerThread.h +2 -2
  241. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  242. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  243. data/vendor/faiss/faiss/utils/approx_topk/generic.h +2 -2
  244. data/vendor/faiss/faiss/utils/approx_topk/mode.h +2 -2
  245. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +2 -2
  246. data/vendor/faiss/faiss/utils/bf16.h +2 -2
  247. data/vendor/faiss/faiss/utils/distances.cpp +191 -2
  248. data/vendor/faiss/faiss/utils/distances.h +3 -3
  249. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +2 -2
  250. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  251. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  252. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +2 -2
  253. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +2 -2
  254. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +2 -2
  255. data/vendor/faiss/faiss/utils/distances_simd.cpp +502 -3
  256. data/vendor/faiss/faiss/utils/extra_distances-inl.h +2 -2
  257. data/vendor/faiss/faiss/utils/extra_distances.cpp +2 -3
  258. data/vendor/faiss/faiss/utils/extra_distances.h +2 -2
  259. data/vendor/faiss/faiss/utils/fp16-arm.h +2 -2
  260. data/vendor/faiss/faiss/utils/fp16-fp16c.h +2 -2
  261. data/vendor/faiss/faiss/utils/fp16-inl.h +2 -2
  262. data/vendor/faiss/faiss/utils/fp16.h +2 -2
  263. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -2
  264. data/vendor/faiss/faiss/utils/hamming.cpp +2 -3
  265. data/vendor/faiss/faiss/utils/hamming.h +2 -2
  266. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +2 -2
  267. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +490 -0
  268. data/vendor/faiss/faiss/utils/hamming_distance/common.h +2 -2
  269. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +2 -2
  270. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +5 -2
  271. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +5 -5
  272. data/vendor/faiss/faiss/utils/ordered_key_value.h +2 -2
  273. data/vendor/faiss/faiss/utils/partitioning.cpp +2 -2
  274. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  275. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  276. data/vendor/faiss/faiss/utils/quantize_lut.cpp +2 -2
  277. data/vendor/faiss/faiss/utils/quantize_lut.h +2 -2
  278. data/vendor/faiss/faiss/utils/random.cpp +2 -2
  279. data/vendor/faiss/faiss/utils/random.h +2 -2
  280. data/vendor/faiss/faiss/utils/simdlib.h +2 -2
  281. data/vendor/faiss/faiss/utils/simdlib_avx2.h +2 -2
  282. data/vendor/faiss/faiss/utils/simdlib_avx512.h +2 -2
  283. data/vendor/faiss/faiss/utils/simdlib_emulated.h +2 -2
  284. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  285. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +2 -2
  286. data/vendor/faiss/faiss/utils/sorting.cpp +2 -2
  287. data/vendor/faiss/faiss/utils/sorting.h +2 -2
  288. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +2 -2
  289. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +2 -2
  290. data/vendor/faiss/faiss/utils/utils.cpp +7 -7
  291. data/vendor/faiss/faiss/utils/utils.h +4 -3
  292. metadata +9 -10
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -29,6 +29,10 @@
29
29
  #include <faiss/utils/transpose/transpose-avx2-inl.h>
30
30
  #endif
31
31
 
32
+ #ifdef __ARM_FEATURE_SVE
33
+ #include <arm_sve.h>
34
+ #endif
35
+
32
36
  #ifdef __aarch64__
33
37
  #include <arm_neon.h>
34
38
  #endif
@@ -2585,6 +2589,7 @@ size_t fvec_L2sqr_ny_nearest_y_transposed(
2585
2589
 
2586
2590
  float fvec_L1(const float* x, const float* y, size_t d) {
2587
2591
  __m256 msum1 = _mm256_setzero_ps();
2592
+ // signmask used for absolute value
2588
2593
  __m256 signmask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffUL));
2589
2594
 
2590
2595
  while (d >= 8) {
@@ -2592,7 +2597,9 @@ float fvec_L1(const float* x, const float* y, size_t d) {
2592
2597
  x += 8;
2593
2598
  __m256 my = _mm256_loadu_ps(y);
2594
2599
  y += 8;
2600
+ // subtract
2595
2601
  const __m256 a_m_b = _mm256_sub_ps(mx, my);
2602
+ // find sum of absolute value of distances (manhattan distance)
2596
2603
  msum1 = _mm256_add_ps(msum1, _mm256_and_ps(signmask, a_m_b));
2597
2604
  d -= 8;
2598
2605
  }
@@ -2625,6 +2632,7 @@ float fvec_L1(const float* x, const float* y, size_t d) {
2625
2632
 
2626
2633
  float fvec_Linf(const float* x, const float* y, size_t d) {
2627
2634
  __m256 msum1 = _mm256_setzero_ps();
2635
+ // signmask used for absolute value
2628
2636
  __m256 signmask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffUL));
2629
2637
 
2630
2638
  while (d >= 8) {
@@ -2632,7 +2640,9 @@ float fvec_Linf(const float* x, const float* y, size_t d) {
2632
2640
  x += 8;
2633
2641
  __m256 my = _mm256_loadu_ps(y);
2634
2642
  y += 8;
2643
+ // subtract
2635
2644
  const __m256 a_m_b = _mm256_sub_ps(mx, my);
2645
+ // find max of absolute value of distances (chebyshev distance)
2636
2646
  msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b));
2637
2647
  d -= 8;
2638
2648
  }
@@ -2673,6 +2683,441 @@ float fvec_Linf(const float* x, const float* y, size_t d) {
2673
2683
  return fvec_Linf_ref(x, y, d);
2674
2684
  }
2675
2685
 
2686
+ #elif defined(__ARM_FEATURE_SVE)
2687
+
2688
+ struct ElementOpIP {
2689
+ static svfloat32_t op(svbool_t pg, svfloat32_t x, svfloat32_t y) {
2690
+ return svmul_f32_x(pg, x, y);
2691
+ }
2692
+ static svfloat32_t merge(
2693
+ svbool_t pg,
2694
+ svfloat32_t z,
2695
+ svfloat32_t x,
2696
+ svfloat32_t y) {
2697
+ return svmla_f32_x(pg, z, x, y);
2698
+ }
2699
+ };
2700
+
2701
+ template <typename ElementOp>
2702
+ void fvec_op_ny_sve_d1(float* dis, const float* x, const float* y, size_t ny) {
2703
+ const size_t lanes = svcntw();
2704
+ const size_t lanes2 = lanes * 2;
2705
+ const size_t lanes3 = lanes * 3;
2706
+ const size_t lanes4 = lanes * 4;
2707
+ const svbool_t pg = svptrue_b32();
2708
+ const svfloat32_t x0 = svdup_n_f32(x[0]);
2709
+ size_t i = 0;
2710
+ for (; i + lanes4 < ny; i += lanes4) {
2711
+ svfloat32_t y0 = svld1_f32(pg, y);
2712
+ svfloat32_t y1 = svld1_f32(pg, y + lanes);
2713
+ svfloat32_t y2 = svld1_f32(pg, y + lanes2);
2714
+ svfloat32_t y3 = svld1_f32(pg, y + lanes3);
2715
+ y0 = ElementOp::op(pg, x0, y0);
2716
+ y1 = ElementOp::op(pg, x0, y1);
2717
+ y2 = ElementOp::op(pg, x0, y2);
2718
+ y3 = ElementOp::op(pg, x0, y3);
2719
+ svst1_f32(pg, dis, y0);
2720
+ svst1_f32(pg, dis + lanes, y1);
2721
+ svst1_f32(pg, dis + lanes2, y2);
2722
+ svst1_f32(pg, dis + lanes3, y3);
2723
+ y += lanes4;
2724
+ dis += lanes4;
2725
+ }
2726
+ const svbool_t pg0 = svwhilelt_b32_u64(i, ny);
2727
+ const svbool_t pg1 = svwhilelt_b32_u64(i + lanes, ny);
2728
+ const svbool_t pg2 = svwhilelt_b32_u64(i + lanes2, ny);
2729
+ const svbool_t pg3 = svwhilelt_b32_u64(i + lanes3, ny);
2730
+ svfloat32_t y0 = svld1_f32(pg0, y);
2731
+ svfloat32_t y1 = svld1_f32(pg1, y + lanes);
2732
+ svfloat32_t y2 = svld1_f32(pg2, y + lanes2);
2733
+ svfloat32_t y3 = svld1_f32(pg3, y + lanes3);
2734
+ y0 = ElementOp::op(pg0, x0, y0);
2735
+ y1 = ElementOp::op(pg1, x0, y1);
2736
+ y2 = ElementOp::op(pg2, x0, y2);
2737
+ y3 = ElementOp::op(pg3, x0, y3);
2738
+ svst1_f32(pg0, dis, y0);
2739
+ svst1_f32(pg1, dis + lanes, y1);
2740
+ svst1_f32(pg2, dis + lanes2, y2);
2741
+ svst1_f32(pg3, dis + lanes3, y3);
2742
+ }
2743
+
2744
+ template <typename ElementOp>
2745
+ void fvec_op_ny_sve_d2(float* dis, const float* x, const float* y, size_t ny) {
2746
+ const size_t lanes = svcntw();
2747
+ const size_t lanes2 = lanes * 2;
2748
+ const size_t lanes4 = lanes * 4;
2749
+ const svbool_t pg = svptrue_b32();
2750
+ const svfloat32_t x0 = svdup_n_f32(x[0]);
2751
+ const svfloat32_t x1 = svdup_n_f32(x[1]);
2752
+ size_t i = 0;
2753
+ for (; i + lanes2 < ny; i += lanes2) {
2754
+ const svfloat32x2_t y0 = svld2_f32(pg, y);
2755
+ const svfloat32x2_t y1 = svld2_f32(pg, y + lanes2);
2756
+ svfloat32_t y00 = svget2_f32(y0, 0);
2757
+ const svfloat32_t y01 = svget2_f32(y0, 1);
2758
+ svfloat32_t y10 = svget2_f32(y1, 0);
2759
+ const svfloat32_t y11 = svget2_f32(y1, 1);
2760
+ y00 = ElementOp::op(pg, x0, y00);
2761
+ y10 = ElementOp::op(pg, x0, y10);
2762
+ y00 = ElementOp::merge(pg, y00, x1, y01);
2763
+ y10 = ElementOp::merge(pg, y10, x1, y11);
2764
+ svst1_f32(pg, dis, y00);
2765
+ svst1_f32(pg, dis + lanes, y10);
2766
+ y += lanes4;
2767
+ dis += lanes2;
2768
+ }
2769
+ const svbool_t pg0 = svwhilelt_b32_u64(i, ny);
2770
+ const svbool_t pg1 = svwhilelt_b32_u64(i + lanes, ny);
2771
+ const svfloat32x2_t y0 = svld2_f32(pg0, y);
2772
+ const svfloat32x2_t y1 = svld2_f32(pg1, y + lanes2);
2773
+ svfloat32_t y00 = svget2_f32(y0, 0);
2774
+ const svfloat32_t y01 = svget2_f32(y0, 1);
2775
+ svfloat32_t y10 = svget2_f32(y1, 0);
2776
+ const svfloat32_t y11 = svget2_f32(y1, 1);
2777
+ y00 = ElementOp::op(pg0, x0, y00);
2778
+ y10 = ElementOp::op(pg1, x0, y10);
2779
+ y00 = ElementOp::merge(pg0, y00, x1, y01);
2780
+ y10 = ElementOp::merge(pg1, y10, x1, y11);
2781
+ svst1_f32(pg0, dis, y00);
2782
+ svst1_f32(pg1, dis + lanes, y10);
2783
+ }
2784
+
2785
+ template <typename ElementOp>
2786
+ void fvec_op_ny_sve_d4(float* dis, const float* x, const float* y, size_t ny) {
2787
+ const size_t lanes = svcntw();
2788
+ const size_t lanes4 = lanes * 4;
2789
+ const svbool_t pg = svptrue_b32();
2790
+ const svfloat32_t x0 = svdup_n_f32(x[0]);
2791
+ const svfloat32_t x1 = svdup_n_f32(x[1]);
2792
+ const svfloat32_t x2 = svdup_n_f32(x[2]);
2793
+ const svfloat32_t x3 = svdup_n_f32(x[3]);
2794
+ size_t i = 0;
2795
+ for (; i + lanes < ny; i += lanes) {
2796
+ const svfloat32x4_t y0 = svld4_f32(pg, y);
2797
+ svfloat32_t y00 = svget4_f32(y0, 0);
2798
+ const svfloat32_t y01 = svget4_f32(y0, 1);
2799
+ svfloat32_t y02 = svget4_f32(y0, 2);
2800
+ const svfloat32_t y03 = svget4_f32(y0, 3);
2801
+ y00 = ElementOp::op(pg, x0, y00);
2802
+ y02 = ElementOp::op(pg, x2, y02);
2803
+ y00 = ElementOp::merge(pg, y00, x1, y01);
2804
+ y02 = ElementOp::merge(pg, y02, x3, y03);
2805
+ y00 = svadd_f32_x(pg, y00, y02);
2806
+ svst1_f32(pg, dis, y00);
2807
+ y += lanes4;
2808
+ dis += lanes;
2809
+ }
2810
+ const svbool_t pg0 = svwhilelt_b32_u64(i, ny);
2811
+ const svfloat32x4_t y0 = svld4_f32(pg0, y);
2812
+ svfloat32_t y00 = svget4_f32(y0, 0);
2813
+ const svfloat32_t y01 = svget4_f32(y0, 1);
2814
+ svfloat32_t y02 = svget4_f32(y0, 2);
2815
+ const svfloat32_t y03 = svget4_f32(y0, 3);
2816
+ y00 = ElementOp::op(pg0, x0, y00);
2817
+ y02 = ElementOp::op(pg0, x2, y02);
2818
+ y00 = ElementOp::merge(pg0, y00, x1, y01);
2819
+ y02 = ElementOp::merge(pg0, y02, x3, y03);
2820
+ y00 = svadd_f32_x(pg0, y00, y02);
2821
+ svst1_f32(pg0, dis, y00);
2822
+ }
2823
+
2824
+ template <typename ElementOp>
2825
+ void fvec_op_ny_sve_d8(float* dis, const float* x, const float* y, size_t ny) {
2826
+ const size_t lanes = svcntw();
2827
+ const size_t lanes4 = lanes * 4;
2828
+ const size_t lanes8 = lanes * 8;
2829
+ const svbool_t pg = svptrue_b32();
2830
+ const svfloat32_t x0 = svdup_n_f32(x[0]);
2831
+ const svfloat32_t x1 = svdup_n_f32(x[1]);
2832
+ const svfloat32_t x2 = svdup_n_f32(x[2]);
2833
+ const svfloat32_t x3 = svdup_n_f32(x[3]);
2834
+ const svfloat32_t x4 = svdup_n_f32(x[4]);
2835
+ const svfloat32_t x5 = svdup_n_f32(x[5]);
2836
+ const svfloat32_t x6 = svdup_n_f32(x[6]);
2837
+ const svfloat32_t x7 = svdup_n_f32(x[7]);
2838
+ size_t i = 0;
2839
+ for (; i + lanes < ny; i += lanes) {
2840
+ const svfloat32x4_t ya = svld4_f32(pg, y);
2841
+ const svfloat32x4_t yb = svld4_f32(pg, y + lanes4);
2842
+ const svfloat32_t ya0 = svget4_f32(ya, 0);
2843
+ const svfloat32_t ya1 = svget4_f32(ya, 1);
2844
+ const svfloat32_t ya2 = svget4_f32(ya, 2);
2845
+ const svfloat32_t ya3 = svget4_f32(ya, 3);
2846
+ const svfloat32_t yb0 = svget4_f32(yb, 0);
2847
+ const svfloat32_t yb1 = svget4_f32(yb, 1);
2848
+ const svfloat32_t yb2 = svget4_f32(yb, 2);
2849
+ const svfloat32_t yb3 = svget4_f32(yb, 3);
2850
+ svfloat32_t y0 = svuzp1(ya0, yb0);
2851
+ const svfloat32_t y1 = svuzp1(ya1, yb1);
2852
+ svfloat32_t y2 = svuzp1(ya2, yb2);
2853
+ const svfloat32_t y3 = svuzp1(ya3, yb3);
2854
+ svfloat32_t y4 = svuzp2(ya0, yb0);
2855
+ const svfloat32_t y5 = svuzp2(ya1, yb1);
2856
+ svfloat32_t y6 = svuzp2(ya2, yb2);
2857
+ const svfloat32_t y7 = svuzp2(ya3, yb3);
2858
+ y0 = ElementOp::op(pg, x0, y0);
2859
+ y2 = ElementOp::op(pg, x2, y2);
2860
+ y4 = ElementOp::op(pg, x4, y4);
2861
+ y6 = ElementOp::op(pg, x6, y6);
2862
+ y0 = ElementOp::merge(pg, y0, x1, y1);
2863
+ y2 = ElementOp::merge(pg, y2, x3, y3);
2864
+ y4 = ElementOp::merge(pg, y4, x5, y5);
2865
+ y6 = ElementOp::merge(pg, y6, x7, y7);
2866
+ y0 = svadd_f32_x(pg, y0, y2);
2867
+ y4 = svadd_f32_x(pg, y4, y6);
2868
+ y0 = svadd_f32_x(pg, y0, y4);
2869
+ svst1_f32(pg, dis, y0);
2870
+ y += lanes8;
2871
+ dis += lanes;
2872
+ }
2873
+ const svbool_t pg0 = svwhilelt_b32_u64(i, ny);
2874
+ const svbool_t pga = svwhilelt_b32_u64(i * 2, ny * 2);
2875
+ const svbool_t pgb = svwhilelt_b32_u64(i * 2 + lanes, ny * 2);
2876
+ const svfloat32x4_t ya = svld4_f32(pga, y);
2877
+ const svfloat32x4_t yb = svld4_f32(pgb, y + lanes4);
2878
+ const svfloat32_t ya0 = svget4_f32(ya, 0);
2879
+ const svfloat32_t ya1 = svget4_f32(ya, 1);
2880
+ const svfloat32_t ya2 = svget4_f32(ya, 2);
2881
+ const svfloat32_t ya3 = svget4_f32(ya, 3);
2882
+ const svfloat32_t yb0 = svget4_f32(yb, 0);
2883
+ const svfloat32_t yb1 = svget4_f32(yb, 1);
2884
+ const svfloat32_t yb2 = svget4_f32(yb, 2);
2885
+ const svfloat32_t yb3 = svget4_f32(yb, 3);
2886
+ svfloat32_t y0 = svuzp1(ya0, yb0);
2887
+ const svfloat32_t y1 = svuzp1(ya1, yb1);
2888
+ svfloat32_t y2 = svuzp1(ya2, yb2);
2889
+ const svfloat32_t y3 = svuzp1(ya3, yb3);
2890
+ svfloat32_t y4 = svuzp2(ya0, yb0);
2891
+ const svfloat32_t y5 = svuzp2(ya1, yb1);
2892
+ svfloat32_t y6 = svuzp2(ya2, yb2);
2893
+ const svfloat32_t y7 = svuzp2(ya3, yb3);
2894
+ y0 = ElementOp::op(pg0, x0, y0);
2895
+ y2 = ElementOp::op(pg0, x2, y2);
2896
+ y4 = ElementOp::op(pg0, x4, y4);
2897
+ y6 = ElementOp::op(pg0, x6, y6);
2898
+ y0 = ElementOp::merge(pg0, y0, x1, y1);
2899
+ y2 = ElementOp::merge(pg0, y2, x3, y3);
2900
+ y4 = ElementOp::merge(pg0, y4, x5, y5);
2901
+ y6 = ElementOp::merge(pg0, y6, x7, y7);
2902
+ y0 = svadd_f32_x(pg0, y0, y2);
2903
+ y4 = svadd_f32_x(pg0, y4, y6);
2904
+ y0 = svadd_f32_x(pg0, y0, y4);
2905
+ svst1_f32(pg0, dis, y0);
2906
+ y += lanes8;
2907
+ dis += lanes;
2908
+ }
2909
+
2910
+ template <typename ElementOp>
2911
+ void fvec_op_ny_sve_lanes1(
2912
+ float* dis,
2913
+ const float* x,
2914
+ const float* y,
2915
+ size_t ny) {
2916
+ const size_t lanes = svcntw();
2917
+ const size_t lanes2 = lanes * 2;
2918
+ const size_t lanes3 = lanes * 3;
2919
+ const size_t lanes4 = lanes * 4;
2920
+ const svbool_t pg = svptrue_b32();
2921
+ const svfloat32_t x0 = svld1_f32(pg, x);
2922
+ size_t i = 0;
2923
+ for (; i + 3 < ny; i += 4) {
2924
+ svfloat32_t y0 = svld1_f32(pg, y);
2925
+ svfloat32_t y1 = svld1_f32(pg, y + lanes);
2926
+ svfloat32_t y2 = svld1_f32(pg, y + lanes2);
2927
+ svfloat32_t y3 = svld1_f32(pg, y + lanes3);
2928
+ y += lanes4;
2929
+ y0 = ElementOp::op(pg, x0, y0);
2930
+ y1 = ElementOp::op(pg, x0, y1);
2931
+ y2 = ElementOp::op(pg, x0, y2);
2932
+ y3 = ElementOp::op(pg, x0, y3);
2933
+ dis[i] = svaddv_f32(pg, y0);
2934
+ dis[i + 1] = svaddv_f32(pg, y1);
2935
+ dis[i + 2] = svaddv_f32(pg, y2);
2936
+ dis[i + 3] = svaddv_f32(pg, y3);
2937
+ }
2938
+ for (; i < ny; ++i) {
2939
+ svfloat32_t y0 = svld1_f32(pg, y);
2940
+ y += lanes;
2941
+ y0 = ElementOp::op(pg, x0, y0);
2942
+ dis[i] = svaddv_f32(pg, y0);
2943
+ }
2944
+ }
2945
+
2946
+ template <typename ElementOp>
2947
+ void fvec_op_ny_sve_lanes2(
2948
+ float* dis,
2949
+ const float* x,
2950
+ const float* y,
2951
+ size_t ny) {
2952
+ const size_t lanes = svcntw();
2953
+ const size_t lanes2 = lanes * 2;
2954
+ const size_t lanes3 = lanes * 3;
2955
+ const size_t lanes4 = lanes * 4;
2956
+ const svbool_t pg = svptrue_b32();
2957
+ const svfloat32_t x0 = svld1_f32(pg, x);
2958
+ const svfloat32_t x1 = svld1_f32(pg, x + lanes);
2959
+ size_t i = 0;
2960
+ for (; i + 1 < ny; i += 2) {
2961
+ svfloat32_t y00 = svld1_f32(pg, y);
2962
+ const svfloat32_t y01 = svld1_f32(pg, y + lanes);
2963
+ svfloat32_t y10 = svld1_f32(pg, y + lanes2);
2964
+ const svfloat32_t y11 = svld1_f32(pg, y + lanes3);
2965
+ y += lanes4;
2966
+ y00 = ElementOp::op(pg, x0, y00);
2967
+ y10 = ElementOp::op(pg, x0, y10);
2968
+ y00 = ElementOp::merge(pg, y00, x1, y01);
2969
+ y10 = ElementOp::merge(pg, y10, x1, y11);
2970
+ dis[i] = svaddv_f32(pg, y00);
2971
+ dis[i + 1] = svaddv_f32(pg, y10);
2972
+ }
2973
+ if (i < ny) {
2974
+ svfloat32_t y0 = svld1_f32(pg, y);
2975
+ const svfloat32_t y1 = svld1_f32(pg, y + lanes);
2976
+ y0 = ElementOp::op(pg, x0, y0);
2977
+ y0 = ElementOp::merge(pg, y0, x1, y1);
2978
+ dis[i] = svaddv_f32(pg, y0);
2979
+ }
2980
+ }
2981
+
2982
+ template <typename ElementOp>
2983
+ void fvec_op_ny_sve_lanes3(
2984
+ float* dis,
2985
+ const float* x,
2986
+ const float* y,
2987
+ size_t ny) {
2988
+ const size_t lanes = svcntw();
2989
+ const size_t lanes2 = lanes * 2;
2990
+ const size_t lanes3 = lanes * 3;
2991
+ const svbool_t pg = svptrue_b32();
2992
+ const svfloat32_t x0 = svld1_f32(pg, x);
2993
+ const svfloat32_t x1 = svld1_f32(pg, x + lanes);
2994
+ const svfloat32_t x2 = svld1_f32(pg, x + lanes2);
2995
+ for (size_t i = 0; i < ny; ++i) {
2996
+ svfloat32_t y0 = svld1_f32(pg, y);
2997
+ const svfloat32_t y1 = svld1_f32(pg, y + lanes);
2998
+ svfloat32_t y2 = svld1_f32(pg, y + lanes2);
2999
+ y += lanes3;
3000
+ y0 = ElementOp::op(pg, x0, y0);
3001
+ y0 = ElementOp::merge(pg, y0, x1, y1);
3002
+ y0 = ElementOp::merge(pg, y0, x2, y2);
3003
+ dis[i] = svaddv_f32(pg, y0);
3004
+ }
3005
+ }
3006
+
3007
+ template <typename ElementOp>
3008
+ void fvec_op_ny_sve_lanes4(
3009
+ float* dis,
3010
+ const float* x,
3011
+ const float* y,
3012
+ size_t ny) {
3013
+ const size_t lanes = svcntw();
3014
+ const size_t lanes2 = lanes * 2;
3015
+ const size_t lanes3 = lanes * 3;
3016
+ const size_t lanes4 = lanes * 4;
3017
+ const svbool_t pg = svptrue_b32();
3018
+ const svfloat32_t x0 = svld1_f32(pg, x);
3019
+ const svfloat32_t x1 = svld1_f32(pg, x + lanes);
3020
+ const svfloat32_t x2 = svld1_f32(pg, x + lanes2);
3021
+ const svfloat32_t x3 = svld1_f32(pg, x + lanes3);
3022
+ for (size_t i = 0; i < ny; ++i) {
3023
+ svfloat32_t y0 = svld1_f32(pg, y);
3024
+ const svfloat32_t y1 = svld1_f32(pg, y + lanes);
3025
+ svfloat32_t y2 = svld1_f32(pg, y + lanes2);
3026
+ const svfloat32_t y3 = svld1_f32(pg, y + lanes3);
3027
+ y += lanes4;
3028
+ y0 = ElementOp::op(pg, x0, y0);
3029
+ y2 = ElementOp::op(pg, x2, y2);
3030
+ y0 = ElementOp::merge(pg, y0, x1, y1);
3031
+ y2 = ElementOp::merge(pg, y2, x3, y3);
3032
+ y0 = svadd_f32_x(pg, y0, y2);
3033
+ dis[i] = svaddv_f32(pg, y0);
3034
+ }
3035
+ }
3036
+
3037
+ void fvec_L2sqr_ny(
3038
+ float* dis,
3039
+ const float* x,
3040
+ const float* y,
3041
+ size_t d,
3042
+ size_t ny) {
3043
+ fvec_L2sqr_ny_ref(dis, x, y, d, ny);
3044
+ }
3045
+
3046
+ void fvec_L2sqr_ny_transposed(
3047
+ float* dis,
3048
+ const float* x,
3049
+ const float* y,
3050
+ const float* y_sqlen,
3051
+ size_t d,
3052
+ size_t d_offset,
3053
+ size_t ny) {
3054
+ return fvec_L2sqr_ny_y_transposed_ref(dis, x, y, y_sqlen, d, d_offset, ny);
3055
+ }
3056
+
3057
+ size_t fvec_L2sqr_ny_nearest(
3058
+ float* distances_tmp_buffer,
3059
+ const float* x,
3060
+ const float* y,
3061
+ size_t d,
3062
+ size_t ny) {
3063
+ return fvec_L2sqr_ny_nearest_ref(distances_tmp_buffer, x, y, d, ny);
3064
+ }
3065
+
3066
+ size_t fvec_L2sqr_ny_nearest_y_transposed(
3067
+ float* distances_tmp_buffer,
3068
+ const float* x,
3069
+ const float* y,
3070
+ const float* y_sqlen,
3071
+ size_t d,
3072
+ size_t d_offset,
3073
+ size_t ny) {
3074
+ return fvec_L2sqr_ny_nearest_y_transposed_ref(
3075
+ distances_tmp_buffer, x, y, y_sqlen, d, d_offset, ny);
3076
+ }
3077
+
3078
+ float fvec_L1(const float* x, const float* y, size_t d) {
3079
+ return fvec_L1_ref(x, y, d);
3080
+ }
3081
+
3082
+ float fvec_Linf(const float* x, const float* y, size_t d) {
3083
+ return fvec_Linf_ref(x, y, d);
3084
+ }
3085
+
3086
+ void fvec_inner_products_ny(
3087
+ float* dis,
3088
+ const float* x,
3089
+ const float* y,
3090
+ size_t d,
3091
+ size_t ny) {
3092
+ const size_t lanes = svcntw();
3093
+ switch (d) {
3094
+ case 1:
3095
+ fvec_op_ny_sve_d1<ElementOpIP>(dis, x, y, ny);
3096
+ break;
3097
+ case 2:
3098
+ fvec_op_ny_sve_d2<ElementOpIP>(dis, x, y, ny);
3099
+ break;
3100
+ case 4:
3101
+ fvec_op_ny_sve_d4<ElementOpIP>(dis, x, y, ny);
3102
+ break;
3103
+ case 8:
3104
+ fvec_op_ny_sve_d8<ElementOpIP>(dis, x, y, ny);
3105
+ break;
3106
+ default:
3107
+ if (d == lanes)
3108
+ fvec_op_ny_sve_lanes1<ElementOpIP>(dis, x, y, ny);
3109
+ else if (d == lanes * 2)
3110
+ fvec_op_ny_sve_lanes2<ElementOpIP>(dis, x, y, ny);
3111
+ else if (d == lanes * 3)
3112
+ fvec_op_ny_sve_lanes3<ElementOpIP>(dis, x, y, ny);
3113
+ else if (d == lanes * 4)
3114
+ fvec_op_ny_sve_lanes4<ElementOpIP>(dis, x, y, ny);
3115
+ else
3116
+ fvec_inner_products_ny_ref(dis, x, y, d, ny);
3117
+ break;
3118
+ }
3119
+ }
3120
+
2676
3121
  #elif defined(__aarch64__)
2677
3122
 
2678
3123
  // not optimized for ARM
@@ -2934,6 +3379,60 @@ void fvec_madd(size_t n, const float* a, float bf, const float* b, float* c) {
2934
3379
  #endif
2935
3380
  }
2936
3381
 
3382
+ #elif defined(__ARM_FEATURE_SVE)
3383
+
3384
+ void fvec_madd(
3385
+ const size_t n,
3386
+ const float* __restrict a,
3387
+ const float bf,
3388
+ const float* __restrict b,
3389
+ float* __restrict c) {
3390
+ const size_t lanes = static_cast<size_t>(svcntw());
3391
+ const size_t lanes2 = lanes * 2;
3392
+ const size_t lanes3 = lanes * 3;
3393
+ const size_t lanes4 = lanes * 4;
3394
+ size_t i = 0;
3395
+ for (; i + lanes4 < n; i += lanes4) {
3396
+ const auto mask = svptrue_b32();
3397
+ const auto ai0 = svld1_f32(mask, a + i);
3398
+ const auto ai1 = svld1_f32(mask, a + i + lanes);
3399
+ const auto ai2 = svld1_f32(mask, a + i + lanes2);
3400
+ const auto ai3 = svld1_f32(mask, a + i + lanes3);
3401
+ const auto bi0 = svld1_f32(mask, b + i);
3402
+ const auto bi1 = svld1_f32(mask, b + i + lanes);
3403
+ const auto bi2 = svld1_f32(mask, b + i + lanes2);
3404
+ const auto bi3 = svld1_f32(mask, b + i + lanes3);
3405
+ const auto ci0 = svmla_n_f32_x(mask, ai0, bi0, bf);
3406
+ const auto ci1 = svmla_n_f32_x(mask, ai1, bi1, bf);
3407
+ const auto ci2 = svmla_n_f32_x(mask, ai2, bi2, bf);
3408
+ const auto ci3 = svmla_n_f32_x(mask, ai3, bi3, bf);
3409
+ svst1_f32(mask, c + i, ci0);
3410
+ svst1_f32(mask, c + i + lanes, ci1);
3411
+ svst1_f32(mask, c + i + lanes2, ci2);
3412
+ svst1_f32(mask, c + i + lanes3, ci3);
3413
+ }
3414
+ const auto mask0 = svwhilelt_b32_u64(i, n);
3415
+ const auto mask1 = svwhilelt_b32_u64(i + lanes, n);
3416
+ const auto mask2 = svwhilelt_b32_u64(i + lanes2, n);
3417
+ const auto mask3 = svwhilelt_b32_u64(i + lanes3, n);
3418
+ const auto ai0 = svld1_f32(mask0, a + i);
3419
+ const auto ai1 = svld1_f32(mask1, a + i + lanes);
3420
+ const auto ai2 = svld1_f32(mask2, a + i + lanes2);
3421
+ const auto ai3 = svld1_f32(mask3, a + i + lanes3);
3422
+ const auto bi0 = svld1_f32(mask0, b + i);
3423
+ const auto bi1 = svld1_f32(mask1, b + i + lanes);
3424
+ const auto bi2 = svld1_f32(mask2, b + i + lanes2);
3425
+ const auto bi3 = svld1_f32(mask3, b + i + lanes3);
3426
+ const auto ci0 = svmla_n_f32_x(mask0, ai0, bi0, bf);
3427
+ const auto ci1 = svmla_n_f32_x(mask1, ai1, bi1, bf);
3428
+ const auto ci2 = svmla_n_f32_x(mask2, ai2, bi2, bf);
3429
+ const auto ci3 = svmla_n_f32_x(mask3, ai3, bi3, bf);
3430
+ svst1_f32(mask0, c + i, ci0);
3431
+ svst1_f32(mask1, c + i + lanes, ci1);
3432
+ svst1_f32(mask2, c + i + lanes2, ci2);
3433
+ svst1_f32(mask3, c + i + lanes3, ci3);
3434
+ }
3435
+
2937
3436
  #elif defined(__aarch64__)
2938
3437
 
2939
3438
  void fvec_madd(size_t n, const float* a, float bf, const float* b, float* c) {
@@ -3266,7 +3765,7 @@ void fvec_add(size_t d, const float* a, float b, float* c) {
3266
3765
  size_t i;
3267
3766
  simd8float32 bv(b);
3268
3767
  for (i = 0; i + 7 < d; i += 8) {
3269
- simd8float32 ci, ai, bi;
3768
+ simd8float32 ci, ai;
3270
3769
  ai.loadu(a + i);
3271
3770
  ci = ai + bv;
3272
3771
  ci.storeu(c + i);
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -15,7 +15,6 @@
15
15
 
16
16
  #include <faiss/impl/AuxIndexStructures.h>
17
17
  #include <faiss/impl/DistanceComputer.h>
18
- #include <faiss/impl/FaissAssert.h>
19
18
  #include <faiss/utils/utils.h>
20
19
 
21
20
  namespace faiss {
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -24,7 +24,6 @@
24
24
  #include <faiss/utils/hamming.h>
25
25
 
26
26
  #include <algorithm>
27
- #include <cmath>
28
27
  #include <cstdio>
29
28
  #include <memory>
30
29
  #include <vector>
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
@@ -1,5 +1,5 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  *
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.