faiss 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e9bd037bbb04bb31eb1856073a7d01d9b1871f1005c2f81b2b3d48c72f737a2
4
- data.tar.gz: bfde828c3c7780e2cba3eac0db39ab805c1892e3fb07968870097051ebc7b713
3
+ metadata.gz: bdce4ec4f4169dff5f08ccbed2de2750dfd33738fe60d747645f7aaa43187505
4
+ data.tar.gz: a8ab702eead45525bb4aae8b28b9c20bc0d0d8c774a79ef942a9c8d7a9cabc2f
5
5
  SHA512:
6
- metadata.gz: c7dd39002fa4f463c552b75fcfeed505816784ca986e65a46826d7982f9e8f9761750931b7e472ef3406d851c63e2038ef65370cd1c0d54113eb556190fb8c65
7
- data.tar.gz: 2f1fc38577e089b9a817feabe7a794354a385ca4cf99887253db12a88cfeb39c0ba456a44191b16680158e2c30d1356efa909df57a2823376036f47e25aed0b1
6
+ metadata.gz: 7e8291961c8a8550e745c55eef5011ca23fc6f5ce7452eeb6da45ebfd020f7c07df70a0a5d7c281e2449214d5ec26102f9194f1aa49d0b9be21304dad3a98368
7
+ data.tar.gz: 80b475d06b237902b88025dc2602a7e7c8ad15ec757cd43d63d143423eb7a1bd759b8c30715b9ec30c2ae3cfecd2eea502e9814524219d396c71067f0959b62e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.3.2 (2024-10-05)
2
+
3
+ - Updated Faiss to 1.9.0
4
+
5
+ ## 0.3.1 (2024-03-13)
6
+
7
+ - Updated Faiss to 1.8.0
8
+ - Fixed memory leak with `load` and `index_binary_factory` methods
9
+
1
10
  ## 0.3.0 (2023-05-11)
2
11
 
3
12
  - Fixed error on Fedora
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2023 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
6
6
 
7
- [![Build Status](https://github.com/ankane/faiss-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/faiss-ruby/actions)
7
+ [![Build Status](https://github.com/ankane/faiss-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/faiss-ruby/actions)
8
8
 
9
9
  ## Installation
10
10
 
data/ext/faiss/extconf.rb CHANGED
@@ -19,9 +19,16 @@ abort "Numo not found" unless find_header("numo/narray.h", numo)
19
19
  # for https://bugs.ruby-lang.org/issues/19005
20
20
  $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
21
 
22
+ $CXXFLAGS += " -std=c++17 $(optflags) -DFINTEGER=int"
23
+ $CXXFLAGS += " -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-sign-compare"
24
+
22
25
  # -march=native not supported with ARM Mac
23
- default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
24
- $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
26
+ default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : " -march=native"
27
+ $CXXFLAGS += with_config("optflags", default_optflags)
28
+
29
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
30
+ $CXXFLAGS += " -Xclang" if apple_clang
31
+ $CXXFLAGS += " -fopenmp"
25
32
 
26
33
  ext = File.expand_path(".", __dir__)
27
34
  vendor = File.expand_path("../../vendor/faiss", __dir__)
data/ext/faiss/index.cpp CHANGED
@@ -157,7 +157,7 @@ void init_index(Rice::Module& m) {
157
157
  "load",
158
158
  [](Rice::String fname) {
159
159
  return faiss::read_index(fname.c_str());
160
- });
160
+ }, Rice::Return().takeOwnership());
161
161
 
162
162
  Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
163
163
  .define_constructor(Rice::Constructor<faiss::IndexFlatL2, int64_t>());
@@ -59,7 +59,7 @@ void init_index_binary(Rice::Module& m) {
59
59
  "load",
60
60
  [](Rice::String fname) {
61
61
  return faiss::read_index_binary(fname.c_str());
62
- });
62
+ }, Rice::Return().takeOwnership());
63
63
 
64
64
  Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
65
65
  .define_constructor(Rice::Constructor<faiss::IndexBinaryFlat, int64_t>());
@@ -71,5 +71,5 @@ void init_index_binary(Rice::Module& m) {
71
71
  "index_binary_factory",
72
72
  [](int d, Rice::String description) {
73
73
  return faiss::index_binary_factory(d, description.c_str());
74
- });
74
+ }, Rice::Return().takeOwnership());
75
75
  }
@@ -49,5 +49,5 @@ void init_product_quantizer(Rice::Module& m) {
49
49
  "load",
50
50
  [](Rice::String fname) {
51
51
  return faiss::read_ProductQuantizer(fname.c_str());
52
- });
52
+ }, Rice::Return().takeOwnership());
53
53
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.2"
3
3
  end
@@ -152,12 +152,10 @@ bool OperatingPoints::add(
152
152
  return false;
153
153
  }
154
154
  }
155
- { // remove non-optimal points from array
156
- int i = a.size() - 1;
157
- while (i > 0) {
158
- if (a[i].t < a[i - 1].t)
159
- a.erase(a.begin() + (i - 1));
160
- i--;
155
+ // remove non-optimal points from array
156
+ for (int i = a.size() - 1; i > 0; --i) {
157
+ if (a[i].t < a[i - 1].t) {
158
+ a.erase(a.begin() + (i - 1));
161
159
  }
162
160
  }
163
161
  return true;
@@ -286,6 +284,8 @@ std::string ParameterSpace::combination_name(size_t cno) const {
286
284
  char buf[1000], *wp = buf;
287
285
  *wp = 0;
288
286
  for (int i = 0; i < parameter_ranges.size(); i++) {
287
+ FAISS_THROW_IF_NOT_MSG(
288
+ buf + 1000 - wp >= 0, "Overflow detected in snprintf");
289
289
  const ParameterRange& pr = parameter_ranges[i];
290
290
  size_t j = cno % pr.values.size();
291
291
  cno /= pr.values.size();
@@ -334,7 +334,7 @@ ParameterRange& ParameterSpace::add_range(const std::string& name) {
334
334
  return pr;
335
335
  }
336
336
  }
337
- parameter_ranges.push_back(ParameterRange());
337
+ parameter_ranges.emplace_back();
338
338
  parameter_ranges.back().name = name;
339
339
  return parameter_ranges.back();
340
340
  }
@@ -11,7 +11,6 @@
11
11
  #define FAISS_AUTO_TUNE_H
12
12
 
13
13
  #include <stdint.h>
14
- #include <unordered_map>
15
14
  #include <vector>
16
15
 
17
16
  #include <faiss/Index.h>
@@ -87,7 +86,7 @@ struct OperatingPoint {
87
86
  double perf; ///< performance measure (output of a Criterion)
88
87
  double t; ///< corresponding execution time (ms)
89
88
  std::string key; ///< key that identifies this op pt
90
- int64_t cno; ///< integer identifer
89
+ int64_t cno; ///< integer identifier
91
90
  };
92
91
 
93
92
  struct OperatingPoints {
@@ -11,6 +11,7 @@
11
11
  #include <faiss/VectorTransform.h>
12
12
  #include <faiss/impl/AuxIndexStructures.h>
13
13
 
14
+ #include <chrono>
14
15
  #include <cinttypes>
15
16
  #include <cmath>
16
17
  #include <cstdio>
@@ -27,20 +28,6 @@
27
28
 
28
29
  namespace faiss {
29
30
 
30
- ClusteringParameters::ClusteringParameters()
31
- : niter(25),
32
- nredo(1),
33
- verbose(false),
34
- spherical(false),
35
- int_centroids(false),
36
- update_index(false),
37
- frozen_centroids(false),
38
- min_points_per_centroid(39),
39
- max_points_per_centroid(256),
40
- seed(1234),
41
- decode_block_size(32768) {}
42
- // 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
43
-
44
31
  Clustering::Clustering(int d, int k) : d(d), k(k) {}
45
32
 
46
33
  Clustering::Clustering(int d, int k, const ClusteringParameters& cp)
@@ -88,6 +75,14 @@ void Clustering::train(
88
75
 
89
76
  namespace {
90
77
 
78
+ uint64_t get_actual_rng_seed(const int seed) {
79
+ return (seed >= 0)
80
+ ? seed
81
+ : static_cast<uint64_t>(std::chrono::high_resolution_clock::now()
82
+ .time_since_epoch()
83
+ .count());
84
+ }
85
+
91
86
  idx_t subsample_training_set(
92
87
  const Clustering& clus,
93
88
  idx_t nx,
@@ -101,11 +96,30 @@ idx_t subsample_training_set(
101
96
  clus.k * clus.max_points_per_centroid,
102
97
  nx);
103
98
  }
104
- std::vector<int> perm(nx);
105
- rand_perm(perm.data(), nx, clus.seed);
99
+
100
+ const uint64_t actual_seed = get_actual_rng_seed(clus.seed);
101
+
102
+ std::vector<int> perm;
103
+ if (clus.use_faster_subsampling) {
104
+ // use subsampling with splitmix64 rng
105
+ SplitMix64RandomGenerator rng(actual_seed);
106
+
107
+ const idx_t new_nx = clus.k * clus.max_points_per_centroid;
108
+ perm.resize(new_nx);
109
+ for (idx_t i = 0; i < new_nx; i++) {
110
+ perm[i] = rng.rand_int(nx);
111
+ }
112
+ } else {
113
+ // use subsampling with a default std rng
114
+ perm.resize(nx);
115
+ rand_perm(perm.data(), nx, actual_seed);
116
+ }
117
+
106
118
  nx = clus.k * clus.max_points_per_centroid;
107
119
  uint8_t* x_new = new uint8_t[nx * line_size];
108
120
  *x_out = x_new;
121
+
122
+ // might be worth omp-ing as well
109
123
  for (idx_t i = 0; i < nx; i++) {
110
124
  memcpy(x_new + i * line_size, x + perm[i] * line_size, line_size);
111
125
  }
@@ -231,7 +245,7 @@ int split_clusters(
231
245
  for (size_t ci = 0; ci < k; ci++) {
232
246
  if (hassign[ci] == 0) { /* need to redefine a centroid */
233
247
  size_t cj;
234
- for (cj = 0; 1; cj = (cj + 1) % k) {
248
+ for (cj = 0; true; cj = (cj + 1) % k) {
235
249
  /* probability to pick this cluster for split */
236
250
  float p = (hassign[cj] - 1.0) / (float)(n - k);
237
251
  float r = rng.rand_float();
@@ -264,7 +278,7 @@ int split_clusters(
264
278
  return nsplit;
265
279
  }
266
280
 
267
- }; // namespace
281
+ } // namespace
268
282
 
269
283
  void Clustering::train_encoded(
270
284
  idx_t nx,
@@ -294,7 +308,7 @@ void Clustering::train_encoded(
294
308
 
295
309
  double t0 = getmillisecs();
296
310
 
297
- if (!codec) {
311
+ if (!codec && check_input_data_for_NaNs) {
298
312
  // Check for NaNs in input data. Normally it is the user's
299
313
  // responsibility, but it may spare us some hard-to-debug
300
314
  // reports.
@@ -397,6 +411,9 @@ void Clustering::train_encoded(
397
411
  }
398
412
  t0 = getmillisecs();
399
413
 
414
+ // initialize seed
415
+ const uint64_t actual_seed = get_actual_rng_seed(seed);
416
+
400
417
  // temporary buffer to decode vectors during the optimization
401
418
  std::vector<float> decode_buffer(codec ? d * decode_block_size : 0);
402
419
 
@@ -409,7 +426,7 @@ void Clustering::train_encoded(
409
426
  centroids.resize(d * k);
410
427
  std::vector<int> perm(nx);
411
428
 
412
- rand_perm(perm.data(), nx, seed + 1 + redo * 15486557L);
429
+ rand_perm(perm.data(), nx, actual_seed + 1 + redo * 15486557L);
413
430
 
414
431
  if (!codec) {
415
432
  for (int i = n_input_centroids; i < k; i++) {
@@ -590,7 +607,7 @@ float kmeans_clustering(
590
607
  const float* x,
591
608
  float* centroids) {
592
609
  Clustering clus(d, k);
593
- clus.verbose = d * n * k > (1L << 30);
610
+ clus.verbose = d * n * k > (size_t(1) << 30);
594
611
  // display logs if > 1Gflop per iteration
595
612
  IndexFlatL2 index(d);
596
613
  clus.train(n, x, index);
@@ -631,7 +648,7 @@ void copy_columns(idx_t n, idx_t d1, const float* src, idx_t d2, float* dest) {
631
648
  }
632
649
  }
633
650
 
634
- }; // namespace
651
+ } // namespace
635
652
 
636
653
  void ProgressiveDimClustering::train(
637
654
  idx_t n,
@@ -5,7 +5,7 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
8
+ /** Implementation of k-means clustering with many variants. */
9
9
 
10
10
  #ifndef FAISS_CLUSTERING_H
11
11
  #define FAISS_CLUSTERING_H
@@ -19,25 +19,44 @@ namespace faiss {
19
19
  * constructor of the Clustering object.
20
20
  */
21
21
  struct ClusteringParameters {
22
- int niter; ///< clustering iterations
23
- int nredo; ///< redo clustering this many times and keep best
24
-
25
- bool verbose;
26
- bool spherical; ///< do we want normalized centroids?
27
- bool int_centroids; ///< round centroids coordinates to integer
28
- bool update_index; ///< re-train index after each iteration?
29
- bool frozen_centroids; ///< use the centroids provided as input and do not
30
- ///< change them during iterations
31
-
32
- int min_points_per_centroid; ///< otherwise you get a warning
33
- int max_points_per_centroid; ///< to limit size of dataset
34
-
35
- int seed; ///< seed for the random number generator
36
-
37
- size_t decode_block_size; ///< how many vectors at a time to decode
38
-
39
- /// sets reasonable defaults
40
- ClusteringParameters();
22
+ /// number of clustering iterations
23
+ int niter = 25;
24
+ /// redo clustering this many times and keep the clusters with the best
25
+ /// objective
26
+ int nredo = 1;
27
+
28
+ bool verbose = false;
29
+ /// whether to normalize centroids after each iteration (useful for inner
30
+ /// product clustering)
31
+ bool spherical = false;
32
+ /// round centroids coordinates to integer after each iteration?
33
+ bool int_centroids = false;
34
+ /// re-train index after each iteration?
35
+ bool update_index = false;
36
+
37
+ /// Use the subset of centroids provided as input and do not change them
38
+ /// during iterations
39
+ bool frozen_centroids = false;
40
+ /// If fewer than this number of training vectors per centroid are provided,
41
+ /// writes a warning. Note that fewer than 1 point per centroid raises an
42
+ /// exception.
43
+ int min_points_per_centroid = 39;
44
+ /// to limit size of dataset, otherwise the training set is subsampled
45
+ int max_points_per_centroid = 256;
46
+ /// seed for the random number generator.
47
+ /// negative values lead to seeding an internal rng with
48
+ /// std::high_resolution_clock.
49
+ int seed = 1234;
50
+
51
+ /// when the training set is encoded, batch size of the codec decoder
52
+ size_t decode_block_size = 32768;
53
+
54
+ /// whether to check for NaNs in an input data
55
+ bool check_input_data_for_NaNs = true;
56
+
57
+ /// Whether to use splitmix64-based random number generator for subsampling,
58
+ /// which is faster, but may pick duplicate points.
59
+ bool use_faster_subsampling = false;
41
60
  };
42
61
 
43
62
  struct ClusteringIterationStats {
@@ -94,7 +113,7 @@ struct Clustering : ClusteringParameters {
94
113
  * to decode the input vectors.
95
114
  *
96
115
  * @param codec codec used to decode the vectors (nullptr =
97
- * vectors are in fact floats) *
116
+ * vectors are in fact floats)
98
117
  */
99
118
  void train_encoded(
100
119
  idx_t nx,
@@ -12,7 +12,9 @@
12
12
 
13
13
  #include <faiss/IndexAdditiveQuantizer.h>
14
14
  #include <faiss/IndexIVFAdditiveQuantizer.h>
15
+ #include <faiss/IndexIVFIndependentQuantizer.h>
15
16
  #include <faiss/IndexPreTransform.h>
17
+ #include <faiss/IndexRefine.h>
16
18
  #include <faiss/MetaIndexes.h>
17
19
  #include <faiss/impl/FaissAssert.h>
18
20
  #include <faiss/utils/distances.h>
@@ -57,20 +59,29 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
57
59
  }
58
60
 
59
61
  const IndexIVF* try_extract_index_ivf(const Index* index) {
60
- if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
61
- index = pt->index;
62
+ auto* ivf = dynamic_cast<const IndexIVF*>(index);
63
+ if (ivf != nullptr) {
64
+ return ivf;
62
65
  }
63
66
 
67
+ if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
68
+ return try_extract_index_ivf(pt->index);
69
+ }
64
70
  if (auto* idmap = dynamic_cast<const IndexIDMap*>(index)) {
65
- index = idmap->index;
71
+ return try_extract_index_ivf(idmap->index);
66
72
  }
67
73
  if (auto* idmap = dynamic_cast<const IndexIDMap2*>(index)) {
68
- index = idmap->index;
74
+ return try_extract_index_ivf(idmap->index);
75
+ }
76
+ if (auto* indep =
77
+ dynamic_cast<const IndexIVFIndependentQuantizer*>(index)) {
78
+ return try_extract_index_ivf(indep->index_ivf);
79
+ }
80
+ if (auto* refine = dynamic_cast<const IndexRefine*>(index)) {
81
+ return try_extract_index_ivf(refine->base_index);
69
82
  }
70
83
 
71
- auto* ivf = dynamic_cast<const IndexIVF*>(index);
72
-
73
- return ivf;
84
+ return nullptr;
74
85
  }
75
86
 
76
87
  IndexIVF* try_extract_index_ivf(Index* index) {
@@ -321,14 +332,14 @@ void search_with_parameters(
321
332
  double* ms_per_stage) {
322
333
  FAISS_THROW_IF_NOT(params);
323
334
  const float* prev_x = x;
324
- ScopeDeleter<float> del;
335
+ std::unique_ptr<const float[]> del;
325
336
 
326
337
  double t0 = getmillisecs();
327
338
 
328
339
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
329
340
  x = ip->apply_chain(n, x);
330
341
  if (x != prev_x) {
331
- del.set(x);
342
+ del.reset(x);
332
343
  }
333
344
  index = ip->index;
334
345
  }
@@ -341,7 +352,10 @@ void search_with_parameters(
341
352
  const IndexIVF* index_ivf = dynamic_cast<const IndexIVF*>(index);
342
353
  FAISS_THROW_IF_NOT(index_ivf);
343
354
 
344
- index_ivf->quantizer->search(n, x, params->nprobe, Dq.data(), Iq.data());
355
+ SearchParameters* quantizer_params =
356
+ (params) ? params->quantizer_params : nullptr;
357
+ index_ivf->quantizer->search(
358
+ n, x, params->nprobe, Dq.data(), Iq.data(), quantizer_params);
345
359
 
346
360
  if (nb_dis_ptr) {
347
361
  *nb_dis_ptr = count_ndis(index_ivf, n * params->nprobe, Iq.data());
@@ -371,14 +385,14 @@ void range_search_with_parameters(
371
385
  double* ms_per_stage) {
372
386
  FAISS_THROW_IF_NOT(params);
373
387
  const float* prev_x = x;
374
- ScopeDeleter<float> del;
388
+ std::unique_ptr<const float[]> del;
375
389
 
376
390
  double t0 = getmillisecs();
377
391
 
378
392
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
379
393
  x = ip->apply_chain(n, x);
380
394
  if (x != prev_x) {
381
- del.set(x);
395
+ del.reset(x);
382
396
  }
383
397
  index = ip->index;
384
398
  }
@@ -18,7 +18,7 @@
18
18
 
19
19
  namespace faiss {
20
20
 
21
- Index::~Index() {}
21
+ Index::~Index() = default;
22
22
 
23
23
  void Index::train(idx_t /*n*/, const float* /*x*/) {
24
24
  // does nothing by default
@@ -17,8 +17,20 @@
17
17
  #include <typeinfo>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 4
20
+ #define FAISS_VERSION_MINOR 9
21
+ #define FAISS_VERSION_PATCH 0
22
+
23
+ // Macro to combine the version components into a single string
24
+ #ifndef FAISS_STRINGIFY
25
+ #define FAISS_STRINGIFY(ARG) #ARG
26
+ #endif
27
+ #ifndef FAISS_TOSTRING
28
+ #define FAISS_TOSTRING(ARG) FAISS_STRINGIFY(ARG)
29
+ #endif
30
+ #define VERSION_STRING \
31
+ FAISS_TOSTRING(FAISS_VERSION_MAJOR) \
32
+ "." FAISS_TOSTRING(FAISS_VERSION_MINOR) "." FAISS_TOSTRING( \
33
+ FAISS_VERSION_PATCH)
22
34
 
23
35
  /**
24
36
  * @namespace faiss
@@ -38,8 +50,8 @@
38
50
 
39
51
  namespace faiss {
40
52
 
41
- /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h and
42
- /// impl/DistanceComputer.h
53
+ /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h
54
+ /// and impl/DistanceComputer.h
43
55
  struct IDSelector;
44
56
  struct RangeSearchResult;
45
57
  struct DistanceComputer;
@@ -56,7 +68,8 @@ struct SearchParameters {
56
68
  virtual ~SearchParameters() {}
57
69
  };
58
70
 
59
- /** Abstract structure for an index, supports adding vectors and searching them.
71
+ /** Abstract structure for an index, supports adding vectors and searching
72
+ * them.
60
73
  *
61
74
  * All vectors provided at add or search time are 32-bit float arrays,
62
75
  * although the internal representation may vary.
@@ -99,6 +112,7 @@ struct Index {
99
112
  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
100
113
  * This function slices the input vectors in chunks smaller than
101
114
  * blocksize_add and calls add_core.
115
+ * @param n number of vectors
102
116
  * @param x input matrix, size n * d
103
117
  */
104
118
  virtual void add(idx_t n, const float* x) = 0;
@@ -108,7 +122,9 @@ struct Index {
108
122
  * The default implementation fails with an assertion, as it is
109
123
  * not supported by all indexes.
110
124
  *
111
- * @param xids if non-null, ids to store for the vectors (size n)
125
+ * @param n number of vectors
126
+ * @param x input vectors, size n * d
127
+ * @param xids if non-null, ids to store for the vectors (size n)
112
128
  */
113
129
  virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
114
130
 
@@ -117,9 +133,11 @@ struct Index {
117
133
  * return at most k vectors. If there are not enough results for a
118
134
  * query, the result array is padded with -1s.
119
135
  *
136
+ * @param n number of vectors
120
137
  * @param x input vectors to search, size n * d
121
- * @param labels output labels of the NNs, size n*k
138
+ * @param k number of extracted vectors
122
139
  * @param distances output pairwise distances, size n*k
140
+ * @param labels output labels of the NNs, size n*k
123
141
  */
124
142
  virtual void search(
125
143
  idx_t n,
@@ -135,6 +153,7 @@ struct Index {
135
153
  * indexes do not implement the range_search (only the k-NN search
136
154
  * is mandatory).
137
155
  *
156
+ * @param n number of vectors
138
157
  * @param x input vectors to search, size n * d
139
158
  * @param radius search radius
140
159
  * @param result result table
@@ -148,9 +167,12 @@ struct Index {
148
167
 
149
168
  /** return the indexes of the k vectors closest to the query x.
150
169
  *
151
- * This function is identical as search but only return labels of neighbors.
170
+ * This function is identical as search but only return labels of
171
+ * neighbors.
172
+ * @param n number of vectors
152
173
  * @param x input vectors to search, size n * d
153
174
  * @param labels output labels of the NNs, size n*k
175
+ * @param k number of nearest neighbours
154
176
  */
155
177
  virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
156
178
  const;
@@ -171,10 +193,11 @@ struct Index {
171
193
  */
172
194
  virtual void reconstruct(idx_t key, float* recons) const;
173
195
 
174
- /** Reconstruct several stored vectors (or an approximation if lossy coding)
196
+ /** Reconstruct several stored vectors (or an approximation if lossy
197
+ * coding)
175
198
  *
176
199
  * this function may not be defined for some indexes
177
- * @param n number of vectors to reconstruct
200
+ * @param n number of vectors to reconstruct
178
201
  * @param keys ids of the vectors to reconstruct (size n)
179
202
  * @param recons reconstucted vector (size n * d)
180
203
  */
@@ -184,6 +207,8 @@ struct Index {
184
207
  /** Reconstruct vectors i0 to i0 + ni - 1
185
208
  *
186
209
  * this function may not be defined for some indexes
210
+ * @param i0 index of the first vector in the sequence
211
+ * @param ni number of vectors in the sequence
187
212
  * @param recons reconstucted vector (size ni * d)
188
213
  */
189
214
  virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
@@ -194,6 +219,11 @@ struct Index {
194
219
  * If there are not enough results for a query, the resulting arrays
195
220
  * is padded with -1s.
196
221
  *
222
+ * @param n number of vectors
223
+ * @param x input vectors to search, size n * d
224
+ * @param k number of extracted vectors
225
+ * @param distances output pairwise distances, size n*k
226
+ * @param labels output labels of the NNs, size n*k
197
227
  * @param recons reconstructed vectors size (n, k, d)
198
228
  **/
199
229
  virtual void search_and_reconstruct(