faiss 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e9bd037bbb04bb31eb1856073a7d01d9b1871f1005c2f81b2b3d48c72f737a2
4
- data.tar.gz: bfde828c3c7780e2cba3eac0db39ab805c1892e3fb07968870097051ebc7b713
3
+ metadata.gz: bdce4ec4f4169dff5f08ccbed2de2750dfd33738fe60d747645f7aaa43187505
4
+ data.tar.gz: a8ab702eead45525bb4aae8b28b9c20bc0d0d8c774a79ef942a9c8d7a9cabc2f
5
5
  SHA512:
6
- metadata.gz: c7dd39002fa4f463c552b75fcfeed505816784ca986e65a46826d7982f9e8f9761750931b7e472ef3406d851c63e2038ef65370cd1c0d54113eb556190fb8c65
7
- data.tar.gz: 2f1fc38577e089b9a817feabe7a794354a385ca4cf99887253db12a88cfeb39c0ba456a44191b16680158e2c30d1356efa909df57a2823376036f47e25aed0b1
6
+ metadata.gz: 7e8291961c8a8550e745c55eef5011ca23fc6f5ce7452eeb6da45ebfd020f7c07df70a0a5d7c281e2449214d5ec26102f9194f1aa49d0b9be21304dad3a98368
7
+ data.tar.gz: 80b475d06b237902b88025dc2602a7e7c8ad15ec757cd43d63d143423eb7a1bd759b8c30715b9ec30c2ae3cfecd2eea502e9814524219d396c71067f0959b62e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.3.2 (2024-10-05)
2
+
3
+ - Updated Faiss to 1.9.0
4
+
5
+ ## 0.3.1 (2024-03-13)
6
+
7
+ - Updated Faiss to 1.8.0
8
+ - Fixed memory leak with `load` and `index_binary_factory` methods
9
+
1
10
  ## 0.3.0 (2023-05-11)
2
11
 
3
12
  - Fixed error on Fedora
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2023 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
6
6
 
7
- [![Build Status](https://github.com/ankane/faiss-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/faiss-ruby/actions)
7
+ [![Build Status](https://github.com/ankane/faiss-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/faiss-ruby/actions)
8
8
 
9
9
  ## Installation
10
10
 
data/ext/faiss/extconf.rb CHANGED
@@ -19,9 +19,16 @@ abort "Numo not found" unless find_header("numo/narray.h", numo)
19
19
  # for https://bugs.ruby-lang.org/issues/19005
20
20
  $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
21
 
22
+ $CXXFLAGS += " -std=c++17 $(optflags) -DFINTEGER=int"
23
+ $CXXFLAGS += " -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-sign-compare"
24
+
22
25
  # -march=native not supported with ARM Mac
23
- default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
24
- $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
26
+ default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : " -march=native"
27
+ $CXXFLAGS += with_config("optflags", default_optflags)
28
+
29
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
30
+ $CXXFLAGS += " -Xclang" if apple_clang
31
+ $CXXFLAGS += " -fopenmp"
25
32
 
26
33
  ext = File.expand_path(".", __dir__)
27
34
  vendor = File.expand_path("../../vendor/faiss", __dir__)
data/ext/faiss/index.cpp CHANGED
@@ -157,7 +157,7 @@ void init_index(Rice::Module& m) {
157
157
  "load",
158
158
  [](Rice::String fname) {
159
159
  return faiss::read_index(fname.c_str());
160
- });
160
+ }, Rice::Return().takeOwnership());
161
161
 
162
162
  Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
163
163
  .define_constructor(Rice::Constructor<faiss::IndexFlatL2, int64_t>());
@@ -59,7 +59,7 @@ void init_index_binary(Rice::Module& m) {
59
59
  "load",
60
60
  [](Rice::String fname) {
61
61
  return faiss::read_index_binary(fname.c_str());
62
- });
62
+ }, Rice::Return().takeOwnership());
63
63
 
64
64
  Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
65
65
  .define_constructor(Rice::Constructor<faiss::IndexBinaryFlat, int64_t>());
@@ -71,5 +71,5 @@ void init_index_binary(Rice::Module& m) {
71
71
  "index_binary_factory",
72
72
  [](int d, Rice::String description) {
73
73
  return faiss::index_binary_factory(d, description.c_str());
74
- });
74
+ }, Rice::Return().takeOwnership());
75
75
  }
@@ -49,5 +49,5 @@ void init_product_quantizer(Rice::Module& m) {
49
49
  "load",
50
50
  [](Rice::String fname) {
51
51
  return faiss::read_ProductQuantizer(fname.c_str());
52
- });
52
+ }, Rice::Return().takeOwnership());
53
53
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.2"
3
3
  end
@@ -152,12 +152,10 @@ bool OperatingPoints::add(
152
152
  return false;
153
153
  }
154
154
  }
155
- { // remove non-optimal points from array
156
- int i = a.size() - 1;
157
- while (i > 0) {
158
- if (a[i].t < a[i - 1].t)
159
- a.erase(a.begin() + (i - 1));
160
- i--;
155
+ // remove non-optimal points from array
156
+ for (int i = a.size() - 1; i > 0; --i) {
157
+ if (a[i].t < a[i - 1].t) {
158
+ a.erase(a.begin() + (i - 1));
161
159
  }
162
160
  }
163
161
  return true;
@@ -286,6 +284,8 @@ std::string ParameterSpace::combination_name(size_t cno) const {
286
284
  char buf[1000], *wp = buf;
287
285
  *wp = 0;
288
286
  for (int i = 0; i < parameter_ranges.size(); i++) {
287
+ FAISS_THROW_IF_NOT_MSG(
288
+ buf + 1000 - wp >= 0, "Overflow detected in snprintf");
289
289
  const ParameterRange& pr = parameter_ranges[i];
290
290
  size_t j = cno % pr.values.size();
291
291
  cno /= pr.values.size();
@@ -334,7 +334,7 @@ ParameterRange& ParameterSpace::add_range(const std::string& name) {
334
334
  return pr;
335
335
  }
336
336
  }
337
- parameter_ranges.push_back(ParameterRange());
337
+ parameter_ranges.emplace_back();
338
338
  parameter_ranges.back().name = name;
339
339
  return parameter_ranges.back();
340
340
  }
@@ -11,7 +11,6 @@
11
11
  #define FAISS_AUTO_TUNE_H
12
12
 
13
13
  #include <stdint.h>
14
- #include <unordered_map>
15
14
  #include <vector>
16
15
 
17
16
  #include <faiss/Index.h>
@@ -87,7 +86,7 @@ struct OperatingPoint {
87
86
  double perf; ///< performance measure (output of a Criterion)
88
87
  double t; ///< corresponding execution time (ms)
89
88
  std::string key; ///< key that identifies this op pt
90
- int64_t cno; ///< integer identifer
89
+ int64_t cno; ///< integer identifier
91
90
  };
92
91
 
93
92
  struct OperatingPoints {
@@ -11,6 +11,7 @@
11
11
  #include <faiss/VectorTransform.h>
12
12
  #include <faiss/impl/AuxIndexStructures.h>
13
13
 
14
+ #include <chrono>
14
15
  #include <cinttypes>
15
16
  #include <cmath>
16
17
  #include <cstdio>
@@ -27,20 +28,6 @@
27
28
 
28
29
  namespace faiss {
29
30
 
30
- ClusteringParameters::ClusteringParameters()
31
- : niter(25),
32
- nredo(1),
33
- verbose(false),
34
- spherical(false),
35
- int_centroids(false),
36
- update_index(false),
37
- frozen_centroids(false),
38
- min_points_per_centroid(39),
39
- max_points_per_centroid(256),
40
- seed(1234),
41
- decode_block_size(32768) {}
42
- // 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
43
-
44
31
  Clustering::Clustering(int d, int k) : d(d), k(k) {}
45
32
 
46
33
  Clustering::Clustering(int d, int k, const ClusteringParameters& cp)
@@ -88,6 +75,14 @@ void Clustering::train(
88
75
 
89
76
  namespace {
90
77
 
78
+ uint64_t get_actual_rng_seed(const int seed) {
79
+ return (seed >= 0)
80
+ ? seed
81
+ : static_cast<uint64_t>(std::chrono::high_resolution_clock::now()
82
+ .time_since_epoch()
83
+ .count());
84
+ }
85
+
91
86
  idx_t subsample_training_set(
92
87
  const Clustering& clus,
93
88
  idx_t nx,
@@ -101,11 +96,30 @@ idx_t subsample_training_set(
101
96
  clus.k * clus.max_points_per_centroid,
102
97
  nx);
103
98
  }
104
- std::vector<int> perm(nx);
105
- rand_perm(perm.data(), nx, clus.seed);
99
+
100
+ const uint64_t actual_seed = get_actual_rng_seed(clus.seed);
101
+
102
+ std::vector<int> perm;
103
+ if (clus.use_faster_subsampling) {
104
+ // use subsampling with splitmix64 rng
105
+ SplitMix64RandomGenerator rng(actual_seed);
106
+
107
+ const idx_t new_nx = clus.k * clus.max_points_per_centroid;
108
+ perm.resize(new_nx);
109
+ for (idx_t i = 0; i < new_nx; i++) {
110
+ perm[i] = rng.rand_int(nx);
111
+ }
112
+ } else {
113
+ // use subsampling with a default std rng
114
+ perm.resize(nx);
115
+ rand_perm(perm.data(), nx, actual_seed);
116
+ }
117
+
106
118
  nx = clus.k * clus.max_points_per_centroid;
107
119
  uint8_t* x_new = new uint8_t[nx * line_size];
108
120
  *x_out = x_new;
121
+
122
+ // might be worth omp-ing as well
109
123
  for (idx_t i = 0; i < nx; i++) {
110
124
  memcpy(x_new + i * line_size, x + perm[i] * line_size, line_size);
111
125
  }
@@ -231,7 +245,7 @@ int split_clusters(
231
245
  for (size_t ci = 0; ci < k; ci++) {
232
246
  if (hassign[ci] == 0) { /* need to redefine a centroid */
233
247
  size_t cj;
234
- for (cj = 0; 1; cj = (cj + 1) % k) {
248
+ for (cj = 0; true; cj = (cj + 1) % k) {
235
249
  /* probability to pick this cluster for split */
236
250
  float p = (hassign[cj] - 1.0) / (float)(n - k);
237
251
  float r = rng.rand_float();
@@ -264,7 +278,7 @@ int split_clusters(
264
278
  return nsplit;
265
279
  }
266
280
 
267
- }; // namespace
281
+ } // namespace
268
282
 
269
283
  void Clustering::train_encoded(
270
284
  idx_t nx,
@@ -294,7 +308,7 @@ void Clustering::train_encoded(
294
308
 
295
309
  double t0 = getmillisecs();
296
310
 
297
- if (!codec) {
311
+ if (!codec && check_input_data_for_NaNs) {
298
312
  // Check for NaNs in input data. Normally it is the user's
299
313
  // responsibility, but it may spare us some hard-to-debug
300
314
  // reports.
@@ -397,6 +411,9 @@ void Clustering::train_encoded(
397
411
  }
398
412
  t0 = getmillisecs();
399
413
 
414
+ // initialize seed
415
+ const uint64_t actual_seed = get_actual_rng_seed(seed);
416
+
400
417
  // temporary buffer to decode vectors during the optimization
401
418
  std::vector<float> decode_buffer(codec ? d * decode_block_size : 0);
402
419
 
@@ -409,7 +426,7 @@ void Clustering::train_encoded(
409
426
  centroids.resize(d * k);
410
427
  std::vector<int> perm(nx);
411
428
 
412
- rand_perm(perm.data(), nx, seed + 1 + redo * 15486557L);
429
+ rand_perm(perm.data(), nx, actual_seed + 1 + redo * 15486557L);
413
430
 
414
431
  if (!codec) {
415
432
  for (int i = n_input_centroids; i < k; i++) {
@@ -590,7 +607,7 @@ float kmeans_clustering(
590
607
  const float* x,
591
608
  float* centroids) {
592
609
  Clustering clus(d, k);
593
- clus.verbose = d * n * k > (1L << 30);
610
+ clus.verbose = d * n * k > (size_t(1) << 30);
594
611
  // display logs if > 1Gflop per iteration
595
612
  IndexFlatL2 index(d);
596
613
  clus.train(n, x, index);
@@ -631,7 +648,7 @@ void copy_columns(idx_t n, idx_t d1, const float* src, idx_t d2, float* dest) {
631
648
  }
632
649
  }
633
650
 
634
- }; // namespace
651
+ } // namespace
635
652
 
636
653
  void ProgressiveDimClustering::train(
637
654
  idx_t n,
@@ -5,7 +5,7 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
8
+ /** Implementation of k-means clustering with many variants. */
9
9
 
10
10
  #ifndef FAISS_CLUSTERING_H
11
11
  #define FAISS_CLUSTERING_H
@@ -19,25 +19,44 @@ namespace faiss {
19
19
  * constructor of the Clustering object.
20
20
  */
21
21
  struct ClusteringParameters {
22
- int niter; ///< clustering iterations
23
- int nredo; ///< redo clustering this many times and keep best
24
-
25
- bool verbose;
26
- bool spherical; ///< do we want normalized centroids?
27
- bool int_centroids; ///< round centroids coordinates to integer
28
- bool update_index; ///< re-train index after each iteration?
29
- bool frozen_centroids; ///< use the centroids provided as input and do not
30
- ///< change them during iterations
31
-
32
- int min_points_per_centroid; ///< otherwise you get a warning
33
- int max_points_per_centroid; ///< to limit size of dataset
34
-
35
- int seed; ///< seed for the random number generator
36
-
37
- size_t decode_block_size; ///< how many vectors at a time to decode
38
-
39
- /// sets reasonable defaults
40
- ClusteringParameters();
22
+ /// number of clustering iterations
23
+ int niter = 25;
24
+ /// redo clustering this many times and keep the clusters with the best
25
+ /// objective
26
+ int nredo = 1;
27
+
28
+ bool verbose = false;
29
+ /// whether to normalize centroids after each iteration (useful for inner
30
+ /// product clustering)
31
+ bool spherical = false;
32
+ /// round centroids coordinates to integer after each iteration?
33
+ bool int_centroids = false;
34
+ /// re-train index after each iteration?
35
+ bool update_index = false;
36
+
37
+ /// Use the subset of centroids provided as input and do not change them
38
+ /// during iterations
39
+ bool frozen_centroids = false;
40
+ /// If fewer than this number of training vectors per centroid are provided,
41
+ /// writes a warning. Note that fewer than 1 point per centroid raises an
42
+ /// exception.
43
+ int min_points_per_centroid = 39;
44
+ /// to limit size of dataset, otherwise the training set is subsampled
45
+ int max_points_per_centroid = 256;
46
+ /// seed for the random number generator.
47
+ /// negative values lead to seeding an internal rng with
48
+ /// std::high_resolution_clock.
49
+ int seed = 1234;
50
+
51
+ /// when the training set is encoded, batch size of the codec decoder
52
+ size_t decode_block_size = 32768;
53
+
54
+ /// whether to check for NaNs in an input data
55
+ bool check_input_data_for_NaNs = true;
56
+
57
+ /// Whether to use splitmix64-based random number generator for subsampling,
58
+ /// which is faster, but may pick duplicate points.
59
+ bool use_faster_subsampling = false;
41
60
  };
42
61
 
43
62
  struct ClusteringIterationStats {
@@ -94,7 +113,7 @@ struct Clustering : ClusteringParameters {
94
113
  * to decode the input vectors.
95
114
  *
96
115
  * @param codec codec used to decode the vectors (nullptr =
97
- * vectors are in fact floats) *
116
+ * vectors are in fact floats)
98
117
  */
99
118
  void train_encoded(
100
119
  idx_t nx,
@@ -12,7 +12,9 @@
12
12
 
13
13
  #include <faiss/IndexAdditiveQuantizer.h>
14
14
  #include <faiss/IndexIVFAdditiveQuantizer.h>
15
+ #include <faiss/IndexIVFIndependentQuantizer.h>
15
16
  #include <faiss/IndexPreTransform.h>
17
+ #include <faiss/IndexRefine.h>
16
18
  #include <faiss/MetaIndexes.h>
17
19
  #include <faiss/impl/FaissAssert.h>
18
20
  #include <faiss/utils/distances.h>
@@ -57,20 +59,29 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
57
59
  }
58
60
 
59
61
  const IndexIVF* try_extract_index_ivf(const Index* index) {
60
- if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
61
- index = pt->index;
62
+ auto* ivf = dynamic_cast<const IndexIVF*>(index);
63
+ if (ivf != nullptr) {
64
+ return ivf;
62
65
  }
63
66
 
67
+ if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
68
+ return try_extract_index_ivf(pt->index);
69
+ }
64
70
  if (auto* idmap = dynamic_cast<const IndexIDMap*>(index)) {
65
- index = idmap->index;
71
+ return try_extract_index_ivf(idmap->index);
66
72
  }
67
73
  if (auto* idmap = dynamic_cast<const IndexIDMap2*>(index)) {
68
- index = idmap->index;
74
+ return try_extract_index_ivf(idmap->index);
75
+ }
76
+ if (auto* indep =
77
+ dynamic_cast<const IndexIVFIndependentQuantizer*>(index)) {
78
+ return try_extract_index_ivf(indep->index_ivf);
79
+ }
80
+ if (auto* refine = dynamic_cast<const IndexRefine*>(index)) {
81
+ return try_extract_index_ivf(refine->base_index);
69
82
  }
70
83
 
71
- auto* ivf = dynamic_cast<const IndexIVF*>(index);
72
-
73
- return ivf;
84
+ return nullptr;
74
85
  }
75
86
 
76
87
  IndexIVF* try_extract_index_ivf(Index* index) {
@@ -321,14 +332,14 @@ void search_with_parameters(
321
332
  double* ms_per_stage) {
322
333
  FAISS_THROW_IF_NOT(params);
323
334
  const float* prev_x = x;
324
- ScopeDeleter<float> del;
335
+ std::unique_ptr<const float[]> del;
325
336
 
326
337
  double t0 = getmillisecs();
327
338
 
328
339
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
329
340
  x = ip->apply_chain(n, x);
330
341
  if (x != prev_x) {
331
- del.set(x);
342
+ del.reset(x);
332
343
  }
333
344
  index = ip->index;
334
345
  }
@@ -341,7 +352,10 @@ void search_with_parameters(
341
352
  const IndexIVF* index_ivf = dynamic_cast<const IndexIVF*>(index);
342
353
  FAISS_THROW_IF_NOT(index_ivf);
343
354
 
344
- index_ivf->quantizer->search(n, x, params->nprobe, Dq.data(), Iq.data());
355
+ SearchParameters* quantizer_params =
356
+ (params) ? params->quantizer_params : nullptr;
357
+ index_ivf->quantizer->search(
358
+ n, x, params->nprobe, Dq.data(), Iq.data(), quantizer_params);
345
359
 
346
360
  if (nb_dis_ptr) {
347
361
  *nb_dis_ptr = count_ndis(index_ivf, n * params->nprobe, Iq.data());
@@ -371,14 +385,14 @@ void range_search_with_parameters(
371
385
  double* ms_per_stage) {
372
386
  FAISS_THROW_IF_NOT(params);
373
387
  const float* prev_x = x;
374
- ScopeDeleter<float> del;
388
+ std::unique_ptr<const float[]> del;
375
389
 
376
390
  double t0 = getmillisecs();
377
391
 
378
392
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
379
393
  x = ip->apply_chain(n, x);
380
394
  if (x != prev_x) {
381
- del.set(x);
395
+ del.reset(x);
382
396
  }
383
397
  index = ip->index;
384
398
  }
@@ -18,7 +18,7 @@
18
18
 
19
19
  namespace faiss {
20
20
 
21
- Index::~Index() {}
21
+ Index::~Index() = default;
22
22
 
23
23
  void Index::train(idx_t /*n*/, const float* /*x*/) {
24
24
  // does nothing by default
@@ -17,8 +17,20 @@
17
17
  #include <typeinfo>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 4
20
+ #define FAISS_VERSION_MINOR 9
21
+ #define FAISS_VERSION_PATCH 0
22
+
23
+ // Macro to combine the version components into a single string
24
+ #ifndef FAISS_STRINGIFY
25
+ #define FAISS_STRINGIFY(ARG) #ARG
26
+ #endif
27
+ #ifndef FAISS_TOSTRING
28
+ #define FAISS_TOSTRING(ARG) FAISS_STRINGIFY(ARG)
29
+ #endif
30
+ #define VERSION_STRING \
31
+ FAISS_TOSTRING(FAISS_VERSION_MAJOR) \
32
+ "." FAISS_TOSTRING(FAISS_VERSION_MINOR) "." FAISS_TOSTRING( \
33
+ FAISS_VERSION_PATCH)
22
34
 
23
35
  /**
24
36
  * @namespace faiss
@@ -38,8 +50,8 @@
38
50
 
39
51
  namespace faiss {
40
52
 
41
- /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h and
42
- /// impl/DistanceComputer.h
53
+ /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h
54
+ /// and impl/DistanceComputer.h
43
55
  struct IDSelector;
44
56
  struct RangeSearchResult;
45
57
  struct DistanceComputer;
@@ -56,7 +68,8 @@ struct SearchParameters {
56
68
  virtual ~SearchParameters() {}
57
69
  };
58
70
 
59
- /** Abstract structure for an index, supports adding vectors and searching them.
71
+ /** Abstract structure for an index, supports adding vectors and searching
72
+ * them.
60
73
  *
61
74
  * All vectors provided at add or search time are 32-bit float arrays,
62
75
  * although the internal representation may vary.
@@ -99,6 +112,7 @@ struct Index {
99
112
  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
100
113
  * This function slices the input vectors in chunks smaller than
101
114
  * blocksize_add and calls add_core.
115
+ * @param n number of vectors
102
116
  * @param x input matrix, size n * d
103
117
  */
104
118
  virtual void add(idx_t n, const float* x) = 0;
@@ -108,7 +122,9 @@ struct Index {
108
122
  * The default implementation fails with an assertion, as it is
109
123
  * not supported by all indexes.
110
124
  *
111
- * @param xids if non-null, ids to store for the vectors (size n)
125
+ * @param n number of vectors
126
+ * @param x input vectors, size n * d
127
+ * @param xids if non-null, ids to store for the vectors (size n)
112
128
  */
113
129
  virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
114
130
 
@@ -117,9 +133,11 @@ struct Index {
117
133
  * return at most k vectors. If there are not enough results for a
118
134
  * query, the result array is padded with -1s.
119
135
  *
136
+ * @param n number of vectors
120
137
  * @param x input vectors to search, size n * d
121
- * @param labels output labels of the NNs, size n*k
138
+ * @param k number of extracted vectors
122
139
  * @param distances output pairwise distances, size n*k
140
+ * @param labels output labels of the NNs, size n*k
123
141
  */
124
142
  virtual void search(
125
143
  idx_t n,
@@ -135,6 +153,7 @@ struct Index {
135
153
  * indexes do not implement the range_search (only the k-NN search
136
154
  * is mandatory).
137
155
  *
156
+ * @param n number of vectors
138
157
  * @param x input vectors to search, size n * d
139
158
  * @param radius search radius
140
159
  * @param result result table
@@ -148,9 +167,12 @@ struct Index {
148
167
 
149
168
  /** return the indexes of the k vectors closest to the query x.
150
169
  *
151
- * This function is identical as search but only return labels of neighbors.
170
+ * This function is identical as search but only return labels of
171
+ * neighbors.
172
+ * @param n number of vectors
152
173
  * @param x input vectors to search, size n * d
153
174
  * @param labels output labels of the NNs, size n*k
175
+ * @param k number of nearest neighbours
154
176
  */
155
177
  virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
156
178
  const;
@@ -171,10 +193,11 @@ struct Index {
171
193
  */
172
194
  virtual void reconstruct(idx_t key, float* recons) const;
173
195
 
174
- /** Reconstruct several stored vectors (or an approximation if lossy coding)
196
+ /** Reconstruct several stored vectors (or an approximation if lossy
197
+ * coding)
175
198
  *
176
199
  * this function may not be defined for some indexes
177
- * @param n number of vectors to reconstruct
200
+ * @param n number of vectors to reconstruct
178
201
  * @param keys ids of the vectors to reconstruct (size n)
179
202
  * @param recons reconstucted vector (size n * d)
180
203
  */
@@ -184,6 +207,8 @@ struct Index {
184
207
  /** Reconstruct vectors i0 to i0 + ni - 1
185
208
  *
186
209
  * this function may not be defined for some indexes
210
+ * @param i0 index of the first vector in the sequence
211
+ * @param ni number of vectors in the sequence
187
212
  * @param recons reconstucted vector (size ni * d)
188
213
  */
189
214
  virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
@@ -194,6 +219,11 @@ struct Index {
194
219
  * If there are not enough results for a query, the resulting arrays
195
220
  * is padded with -1s.
196
221
  *
222
+ * @param n number of vectors
223
+ * @param x input vectors to search, size n * d
224
+ * @param k number of extracted vectors
225
+ * @param distances output pairwise distances, size n*k
226
+ * @param labels output labels of the NNs, size n*k
197
227
  * @param recons reconstructed vectors size (n, k, d)
198
228
  **/
199
229
  virtual void search_and_reconstruct(