faiss 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +23 -21
  4. data/ext/faiss/extconf.rb +11 -0
  5. data/ext/faiss/index.cpp +4 -4
  6. data/ext/faiss/index_binary.cpp +6 -6
  7. data/ext/faiss/product_quantizer.cpp +4 -4
  8. data/lib/faiss/version.rb +1 -1
  9. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  10. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  11. data/vendor/faiss/faiss/IVFlib.h +26 -2
  12. data/vendor/faiss/faiss/Index.cpp +36 -3
  13. data/vendor/faiss/faiss/Index.h +43 -6
  14. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  15. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  20. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  23. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  31. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  32. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  33. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  34. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  35. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  39. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  40. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  41. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  42. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  43. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  48. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  50. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  51. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  53. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  56. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  57. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  58. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  60. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  61. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  62. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  63. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  66. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  67. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  68. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  69. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  73. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  74. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  75. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  76. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  78. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  80. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  82. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  83. data/vendor/faiss/faiss/IndexShards.h +2 -1
  84. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  85. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  86. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  87. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  88. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  89. data/vendor/faiss/faiss/clone_index.h +3 -0
  90. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  91. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  93. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  101. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  102. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  103. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  105. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  106. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  110. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  111. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  113. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  118. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  119. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  124. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  125. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  126. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  127. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  128. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  131. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  134. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  138. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  144. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  145. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  146. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  147. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  151. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  152. data/vendor/faiss/faiss/index_io.h +5 -0
  153. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  154. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  155. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  156. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  157. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  158. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  159. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  160. data/vendor/faiss/faiss/utils/distances.h +113 -15
  161. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  162. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  163. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  164. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  165. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  166. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  167. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  168. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  169. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  170. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  172. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  173. data/vendor/faiss/faiss/utils/random.h +5 -0
  174. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  175. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  176. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  177. metadata +37 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15428eb8dd7d27f8a94e3a7797dd765827e5454def33fa785055adb7ef0d20c5
4
- data.tar.gz: 3e8eafebc49163c928bcab8d0ebd0f7b69e6659e49f36d7aaeb44e8651853ac9
3
+ metadata.gz: bdc2effbe6e2d827ffe473531be0864cf710ae6f3ad34f8324087695c367d140
4
+ data.tar.gz: 6e5b80b1f4281766b17d208af58b44e768576d259de1c6edb25630c700215c10
5
5
  SHA512:
6
- metadata.gz: 598f6e626d5c970e408cff68ec479bf1aa2d6ee18adeeeb2489d1c4fbf627dacbc6e398ce149f0483720080a439df4d2887e5b6c9dc9f465e8ffa1bbeede84a8
7
- data.tar.gz: fefebfbbbbceb58ac6c6b02636630943d30db7af4ee92521586b4fb40e73bf1aa2b401e28e4c27872bf9344f60e3c9fb288ec6420abc1ffccd05ffd3ec7379fd
6
+ metadata.gz: 404d064f14734a23d946ad0fc0576673e5e685d1001884981453e3bec23ecf6ba7b75857e578b47ef5303fde27342dfb25e85bbb7a60883d6ba09a964049bbfa
7
+ data.tar.gz: 536f403109d3773a3cd0ca27b4b46bfa26eaeedd8d5e2f4e6fa116e3b922d21f8a964a2d6aa1cbbff951da9444e9184bf7c60955263ed68c91ba91342f3f9196
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.5 (2022-12-27)
2
+
3
+ - Updated Faiss to 1.7.3
4
+ - Fixed installation errors on Mac
5
+
1
6
  ## 0.2.4 (2022-01-10)
2
7
 
3
8
  - Updated Faiss to 1.7.2
data/README.md CHANGED
@@ -8,23 +8,25 @@ Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-
8
8
 
9
9
  ## Installation
10
10
 
11
- First, install BLAS, LAPACK, and OpenMP:
11
+ First, ensure BLAS, LAPACK, and OpenMP are installed. For Mac, use:
12
12
 
13
13
  ```sh
14
- # Mac
15
- brew install openblas lapack libomp
14
+ brew install libomp
15
+ ```
16
+
17
+ For Ubuntu, use:
16
18
 
17
- # Ubuntu
18
- sudo apt install libblas-dev liblapack-dev
19
+ ```sh
20
+ sudo apt-get install libblas-dev liblapack-dev
19
21
  ```
20
22
 
21
- Add this line to your application’s Gemfile:
23
+ Then add this line to your application’s Gemfile:
22
24
 
23
25
  ```ruby
24
- gem 'faiss'
26
+ gem "faiss"
25
27
  ```
26
28
 
27
- It can take a few minutes to compile the gem. Faiss is not available for Windows.
29
+ It can take a few minutes to compile the gem. Windows is not currently supported.
28
30
 
29
31
  ## Getting Started
30
32
 
@@ -69,61 +71,61 @@ index = Faiss::Index.load("index.bin")
69
71
 
70
72
  Exact search for L2
71
73
 
72
- ```rb
74
+ ```ruby
73
75
  Faiss::IndexFlatL2.new(d)
74
76
  ```
75
77
 
76
78
  Exact search for inner product
77
79
 
78
- ```rb
80
+ ```ruby
79
81
  Faiss::IndexFlatIP.new(d)
80
82
  ```
81
83
 
82
84
  Hierarchical navigable small world graph exploration
83
85
 
84
- ```rb
86
+ ```ruby
85
87
  Faiss::IndexHNSWFlat.new(d, m)
86
88
  ```
87
89
 
88
90
  Inverted file with exact post-verification
89
91
 
90
- ```rb
92
+ ```ruby
91
93
  Faiss::IndexIVFFlat.new(quantizer, d, nlists)
92
94
  ```
93
95
 
94
96
  Locality-sensitive hashing
95
97
 
96
- ```rb
98
+ ```ruby
97
99
  Faiss::IndexLSH.new(d, nbits)
98
100
  ```
99
101
 
100
102
  Scalar quantizer (SQ) in flat mode
101
103
 
102
- ```rb
104
+ ```ruby
103
105
  Faiss::IndexScalarQuantizer.new(d, qtype)
104
106
  ```
105
107
 
106
108
  Product quantizer (PQ) in flat mode
107
109
 
108
- ```rb
110
+ ```ruby
109
111
  Faiss::IndexPQ.new(d, m, nbits)
110
112
  ```
111
113
 
112
114
  IVF and scalar quantizer
113
115
 
114
- ```rb
116
+ ```ruby
115
117
  Faiss::IndexIVFScalarQuantizer.new(quantizer, d, nlists, qtype)
116
118
  ```
117
119
 
118
120
  IVFADC (coarse quantizer+PQ on residuals)
119
121
 
120
- ```rb
122
+ ```ruby
121
123
  Faiss::IndexIVFPQ.new(quantizer, d, nlists, m, nbits)
122
124
  ```
123
125
 
124
126
  IVFADC+R (same as IVFADC with re-ranking based on codes)
125
127
 
126
- ```rb
128
+ ```ruby
127
129
  Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
128
130
  ```
129
131
 
@@ -131,13 +133,13 @@ Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
131
133
 
132
134
  Index binary vectors
133
135
 
134
- ```rb
136
+ ```ruby
135
137
  Faiss::IndexBinaryFlat.new(d)
136
138
  ```
137
139
 
138
140
  Speed up search with an inverse vector file
139
141
 
140
- ```rb
142
+ ```ruby
141
143
  Faiss::IndexBinaryIVF.new(quantizer, d, nlists)
142
144
  ```
143
145
 
@@ -220,7 +222,7 @@ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
220
222
 
221
223
  ## History
222
224
 
223
- View the [changelog](https://github.com/ankane/faiss-ruby/blob/master/CHANGELOG.md)
225
+ View the [changelog](CHANGELOG.md)
224
226
 
225
227
  ## Contributing
226
228
 
data/ext/faiss/extconf.rb CHANGED
@@ -1,6 +1,14 @@
1
1
  require "mkmf-rice"
2
2
  require "numo/narray"
3
3
 
4
+ # libomp changed to keg-only
5
+ # https://github.com/Homebrew/homebrew-core/issues/112107
6
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i
7
+ brew_prefix = RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "/opt/homebrew" : "/usr/local"
8
+ find_library("omp", nil, "#{brew_prefix}/opt/libomp/lib")
9
+ find_header("omp.h", "#{brew_prefix}/opt/libomp/include")
10
+ end
11
+
4
12
  abort "BLAS not found" unless have_library("blas")
5
13
  abort "LAPACK not found" unless have_library("lapack")
6
14
  abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
@@ -8,6 +16,9 @@ abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
8
16
  numo = File.join(Gem.loaded_specs["numo-narray"].require_path, "numo")
9
17
  abort "Numo not found" unless find_header("numo/narray.h", numo)
10
18
 
19
+ # for https://bugs.ruby-lang.org/issues/19005
20
+ $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
+
11
22
  # -march=native not supported with ARM Mac
12
23
  default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
13
24
  $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
data/ext/faiss/index.cpp CHANGED
@@ -140,13 +140,13 @@ void init_index(Rice::Module& m) {
140
140
  })
141
141
  .define_method(
142
142
  "save",
143
- [](faiss::Index &self, const char *fname) {
144
- faiss::write_index(&self, fname);
143
+ [](faiss::Index &self, Rice::String fname) {
144
+ faiss::write_index(&self, fname.c_str());
145
145
  })
146
146
  .define_singleton_function(
147
147
  "load",
148
- [](const char *fname) {
149
- return faiss::read_index(fname);
148
+ [](Rice::String fname) {
149
+ return faiss::read_index(fname.c_str());
150
150
  });
151
151
 
152
152
  Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
@@ -52,13 +52,13 @@ void init_index_binary(Rice::Module& m) {
52
52
  })
53
53
  .define_method(
54
54
  "save",
55
- [](faiss::IndexBinary &self, const char *fname) {
56
- faiss::write_index_binary(&self, fname);
55
+ [](faiss::IndexBinary &self, Rice::String fname) {
56
+ faiss::write_index_binary(&self, fname.c_str());
57
57
  })
58
58
  .define_singleton_function(
59
59
  "load",
60
- [](const char *fname) {
61
- return faiss::read_index_binary(fname);
60
+ [](Rice::String fname) {
61
+ return faiss::read_index_binary(fname.c_str());
62
62
  });
63
63
 
64
64
  Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
@@ -69,7 +69,7 @@ void init_index_binary(Rice::Module& m) {
69
69
 
70
70
  m.define_singleton_function(
71
71
  "index_binary_factory",
72
- [](int d, const char *description) {
73
- return faiss::index_binary_factory(d, description);
72
+ [](int d, Rice::String description) {
73
+ return faiss::index_binary_factory(d, description.c_str());
74
74
  });
75
75
  }
@@ -42,12 +42,12 @@ void init_product_quantizer(Rice::Module& m) {
42
42
  })
43
43
  .define_method(
44
44
  "save",
45
- [](faiss::ProductQuantizer &self, const char *fname) {
46
- faiss::write_ProductQuantizer(&self, fname);
45
+ [](faiss::ProductQuantizer &self, Rice::String fname) {
46
+ faiss::write_ProductQuantizer(&self, fname.c_str());
47
47
  })
48
48
  .define_singleton_function(
49
49
  "load",
50
- [](const char *fname) {
51
- return faiss::read_ProductQuantizer(fname);
50
+ [](Rice::String fname) {
51
+ return faiss::read_ProductQuantizer(fname.c_str());
52
52
  });
53
53
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.2.4"
2
+ VERSION = "0.2.5"
3
3
  end
@@ -523,6 +523,19 @@ void ParameterSpace::set_index_parameter(
523
523
  }
524
524
  }
525
525
 
526
+ if (name == "efConstruction") {
527
+ if (DC(IndexHNSW)) {
528
+ ix->hnsw.efConstruction = int(val);
529
+ return;
530
+ }
531
+ if (DC(IndexIVF)) {
532
+ if (IndexHNSW* cq = dynamic_cast<IndexHNSW*>(ix->quantizer)) {
533
+ cq->hnsw.efConstruction = int(val);
534
+ return;
535
+ }
536
+ }
537
+ }
538
+
526
539
  if (name == "efSearch") {
527
540
  if (DC(IndexHNSW)) {
528
541
  ix->hnsw.efSearch = int(val);
@@ -5,15 +5,18 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  #include <faiss/IVFlib.h>
9
+ #include <omp.h>
11
10
 
12
11
  #include <memory>
13
12
 
13
+ #include <faiss/IndexAdditiveQuantizer.h>
14
+ #include <faiss/IndexIVFAdditiveQuantizer.h>
14
15
  #include <faiss/IndexPreTransform.h>
15
16
  #include <faiss/MetaIndexes.h>
16
17
  #include <faiss/impl/FaissAssert.h>
18
+ #include <faiss/utils/distances.h>
19
+ #include <faiss/utils/hamming.h>
17
20
  #include <faiss/utils/utils.h>
18
21
 
19
22
  namespace faiss {
@@ -349,6 +352,7 @@ void search_with_parameters(
349
352
  index_ivf->search_preassigned(
350
353
  n, x, k, Iq.data(), Dq.data(), distances, labels, false, params);
351
354
  double t3 = getmillisecs();
355
+
352
356
  if (ms_per_stage) {
353
357
  ms_per_stage[0] = t1 - t0;
354
358
  ms_per_stage[1] = t2 - t1;
@@ -406,5 +410,100 @@ void range_search_with_parameters(
406
410
  }
407
411
  }
408
412
 
413
+ IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
414
+ const ResidualQuantizer& rq,
415
+ int nlevel) {
416
+ FAISS_THROW_IF_NOT(nlevel > 0 && nlevel + 1 < rq.M);
417
+
418
+ std::vector<size_t> nbits(nlevel);
419
+ std::copy(rq.nbits.begin(), rq.nbits.begin() + nlevel, nbits.begin());
420
+ std::unique_ptr<ResidualCoarseQuantizer> rcq(
421
+ new ResidualCoarseQuantizer(rq.d, nbits));
422
+
423
+ // set the coarse quantizer from the 2 first quantizers
424
+ rcq->rq.initialize_from(rq);
425
+ rcq->is_trained = true;
426
+ rcq->ntotal = (idx_t)1 << rcq->rq.tot_bits;
427
+
428
+ // settings for exhaustive search in RCQ
429
+ rcq->centroid_norms.resize(rcq->ntotal);
430
+ rcq->aq->compute_centroid_norms(rcq->centroid_norms.data());
431
+ rcq->beam_factor = -1.0; // use exact search
432
+ size_t nlist = rcq->ntotal;
433
+
434
+ // build a IVFResidualQuantizer from that
435
+ std::vector<size_t> nbits_refined;
436
+ for (int i = nlevel; i < rq.M; i++) {
437
+ nbits_refined.push_back(rq.nbits[i]);
438
+ }
439
+ std::unique_ptr<IndexIVFResidualQuantizer> index(
440
+ new IndexIVFResidualQuantizer(
441
+ rcq.get(),
442
+ rq.d,
443
+ nlist,
444
+ nbits_refined,
445
+ faiss::METRIC_L2,
446
+ rq.search_type));
447
+ index->own_fields = true;
448
+ rcq.release();
449
+ index->by_residual = true;
450
+ index->rq.initialize_from(rq, nlevel);
451
+ index->is_trained = true;
452
+
453
+ return index.release();
454
+ }
455
+
456
+ void ivf_residual_add_from_flat_codes(
457
+ IndexIVFResidualQuantizer* index,
458
+ size_t nb,
459
+ const uint8_t* raw_codes,
460
+ int64_t code_size) {
461
+ const ResidualCoarseQuantizer* rcq =
462
+ dynamic_cast<const faiss::ResidualCoarseQuantizer*>(
463
+ index->quantizer);
464
+ FAISS_THROW_IF_NOT_MSG(rcq, "the coarse quantizer must be a RCQ");
465
+ if (code_size < 0) {
466
+ code_size = index->code_size;
467
+ }
468
+ InvertedLists& invlists = *index->invlists;
469
+ const ResidualQuantizer& rq = index->rq;
470
+
471
+ // populate inverted lists
472
+ #pragma omp parallel if (nb > 10000)
473
+ {
474
+ std::vector<uint8_t> tmp_code(index->code_size);
475
+ std::vector<float> tmp(rq.d);
476
+ int nt = omp_get_num_threads();
477
+ int rank = omp_get_thread_num();
478
+
479
+ #pragma omp for
480
+ for (idx_t i = 0; i < nb; i++) {
481
+ const uint8_t* code = &raw_codes[i * code_size];
482
+ BitstringReader rd(code, code_size);
483
+ idx_t list_no = rd.read(rcq->rq.tot_bits);
484
+
485
+ if (list_no % nt ==
486
+ rank) { // each thread takes care of 1/nt of the invlists
487
+ // copy AQ indexes one by one
488
+ BitstringWriter wr(tmp_code.data(), tmp_code.size());
489
+ for (int j = 0; j < rq.M; j++) {
490
+ int nbit = rq.nbits[j];
491
+ wr.write(rd.read(nbit), nbit);
492
+ }
493
+ // we need to recompute the norm
494
+ // decode first, does not use the norm component, so that's
495
+ // ok
496
+ index->rq.decode(tmp_code.data(), tmp.data(), 1);
497
+ float norm = fvec_norm_L2sqr(tmp.data(), rq.d);
498
+ wr.write(rq.encode_norm(norm), rq.norm_bits);
499
+
500
+ // add code to the inverted list
501
+ invlists.add_entry(list_no, i, tmp_code.data());
502
+ }
503
+ }
504
+ }
505
+ index->ntotal += nb;
506
+ }
507
+
409
508
  } // namespace ivflib
410
509
  } // namespace faiss
@@ -5,8 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  #ifndef FAISS_IVFLIB_H
11
9
  #define FAISS_IVFLIB_H
12
10
 
@@ -20,6 +18,11 @@
20
18
  #include <vector>
21
19
 
22
20
  namespace faiss {
21
+
22
+ struct IndexIVFResidualQuantizer;
23
+ struct IndexResidualQuantizer;
24
+ struct ResidualQuantizer;
25
+
23
26
  namespace ivflib {
24
27
 
25
28
  /** check if two indexes have the same parameters and are trained in
@@ -145,6 +148,27 @@ void range_search_with_parameters(
145
148
  size_t* nb_dis = nullptr,
146
149
  double* ms_per_stage = nullptr);
147
150
 
151
+ /** Build an IndexIVFResidualQuantizer from an ResidualQuantizer, using the
152
+ * nlevel first components as coarse quantizer and the rest as codes in invlists
153
+ */
154
+ IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
155
+ const ResidualQuantizer&,
156
+ int nlevel);
157
+
158
+ /** add from codes. NB that the norm component is not used, so the code_size can
159
+ * be provided.
160
+ *
161
+ * @param ivfrq index to populate with the codes
162
+ * @param codes codes to add, size (ncode, code_size)
163
+ * @param code_size override the ivfrq's code_size, useful if the norm encoding
164
+ * is different
165
+ */
166
+ void ivf_residual_add_from_flat_codes(
167
+ IndexIVFResidualQuantizer* ivfrq,
168
+ size_t ncode,
169
+ const uint8_t* codes,
170
+ int64_t code_size = -1);
171
+
148
172
  } // namespace ivflib
149
173
  } // namespace faiss
150
174
 
@@ -10,6 +10,7 @@
10
10
  #include <faiss/Index.h>
11
11
 
12
12
  #include <faiss/impl/AuxIndexStructures.h>
13
+ #include <faiss/impl/DistanceComputer.h>
13
14
  #include <faiss/impl/FaissAssert.h>
14
15
  #include <faiss/utils/distances.h>
15
16
 
@@ -23,7 +24,12 @@ void Index::train(idx_t /*n*/, const float* /*x*/) {
23
24
  // does nothing by default
24
25
  }
25
26
 
26
- void Index::range_search(idx_t, const float*, float, RangeSearchResult*) const {
27
+ void Index::range_search(
28
+ idx_t,
29
+ const float*,
30
+ float,
31
+ RangeSearchResult*,
32
+ const SearchParameters* params) const {
27
33
  FAISS_THROW_MSG("range search not implemented");
28
34
  }
29
35
 
@@ -48,7 +54,25 @@ void Index::reconstruct(idx_t, float*) const {
48
54
  FAISS_THROW_MSG("reconstruct not implemented for this type of index");
49
55
  }
50
56
 
57
+ void Index::reconstruct_batch(idx_t n, const idx_t* keys, float* recons) const {
58
+ std::mutex exception_mutex;
59
+ std::string exception_string;
60
+ #pragma omp parallel for if (n > 1000)
61
+ for (idx_t i = 0; i < n; i++) {
62
+ try {
63
+ reconstruct(keys[i], &recons[i * d]);
64
+ } catch (const std::exception& e) {
65
+ std::lock_guard<std::mutex> lock(exception_mutex);
66
+ exception_string = e.what();
67
+ }
68
+ }
69
+ if (!exception_string.empty()) {
70
+ FAISS_THROW_MSG(exception_string.c_str());
71
+ }
72
+ }
73
+
51
74
  void Index::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
75
+ #pragma omp parallel for if (ni > 1000)
52
76
  for (idx_t i = 0; i < ni; i++) {
53
77
  reconstruct(i0 + i, recons + i * d);
54
78
  }
@@ -60,10 +84,11 @@ void Index::search_and_reconstruct(
60
84
  idx_t k,
61
85
  float* distances,
62
86
  idx_t* labels,
63
- float* recons) const {
87
+ float* recons,
88
+ const SearchParameters* params) const {
64
89
  FAISS_THROW_IF_NOT(k > 0);
65
90
 
66
- search(n, x, k, distances, labels);
91
+ search(n, x, k, distances, labels, params);
67
92
  for (idx_t i = 0; i < n; ++i) {
68
93
  for (idx_t j = 0; j < k; ++j) {
69
94
  idx_t ij = i * k + j;
@@ -149,4 +174,12 @@ DistanceComputer* Index::get_distance_computer() const {
149
174
  }
150
175
  }
151
176
 
177
+ void Index::merge_from(Index& /* otherIndex */, idx_t /* add_id */) {
178
+ FAISS_THROW_MSG("merge_from() not implemented");
179
+ }
180
+
181
+ void Index::check_compatible_for_merge(const Index& /* otherIndex */) const {
182
+ FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
183
+ }
184
+
152
185
  } // namespace faiss
@@ -18,7 +18,7 @@
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
20
  #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 2
21
+ #define FAISS_VERSION_PATCH 3
22
22
 
23
23
  /**
24
24
  * @namespace faiss
@@ -38,11 +38,24 @@
38
38
 
39
39
  namespace faiss {
40
40
 
41
- /// Forward declarations see AuxIndexStructures.h
41
+ /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h and
42
+ /// impl/DistanceComputer.h
42
43
  struct IDSelector;
43
44
  struct RangeSearchResult;
44
45
  struct DistanceComputer;
45
46
 
47
+ /** Parent class for the optional search paramenters.
48
+ *
49
+ * Sub-classes with additional search parameters should inherit this class.
50
+ * Ownership of the object fields is always to the caller.
51
+ */
52
+ struct SearchParameters {
53
+ /// if non-null, only these IDs will be considered during search.
54
+ IDSelector* sel = nullptr;
55
+ /// make sure we can dynamic_cast this
56
+ virtual ~SearchParameters() {}
57
+ };
58
+
46
59
  /** Abstract structure for an index, supports adding vectors and searching them.
47
60
  *
48
61
  * All vectors provided at add or search time are 32-bit float arrays,
@@ -114,7 +127,8 @@ struct Index {
114
127
  const float* x,
115
128
  idx_t k,
116
129
  float* distances,
117
- idx_t* labels) const = 0;
130
+ idx_t* labels,
131
+ const SearchParameters* params = nullptr) const = 0;
118
132
 
119
133
  /** query n vectors of dimension d to the index.
120
134
  *
@@ -130,7 +144,8 @@ struct Index {
130
144
  idx_t n,
131
145
  const float* x,
132
146
  float radius,
133
- RangeSearchResult* result) const;
147
+ RangeSearchResult* result,
148
+ const SearchParameters* params = nullptr) const;
134
149
 
135
150
  /** return the indexes of the k vectors closest to the query x.
136
151
  *
@@ -157,6 +172,16 @@ struct Index {
157
172
  */
158
173
  virtual void reconstruct(idx_t key, float* recons) const;
159
174
 
175
+ /** Reconstruct several stored vectors (or an approximation if lossy coding)
176
+ *
177
+ * this function may not be defined for some indexes
178
+ * @param n number of vectors to reconstruct
179
+ * @param keys ids of the vectors to reconstruct (size n)
180
+ * @param recons reconstucted vector (size n * d)
181
+ */
182
+ virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
183
+ const;
184
+
160
185
  /** Reconstruct vectors i0 to i0 + ni - 1
161
186
  *
162
187
  * this function may not be defined for some indexes
@@ -178,7 +203,8 @@ struct Index {
178
203
  idx_t k,
179
204
  float* distances,
180
205
  idx_t* labels,
181
- float* recons) const;
206
+ float* recons,
207
+ const SearchParameters* params = nullptr) const;
182
208
 
183
209
  /** Computes a residual vector after indexing encoding.
184
210
  *
@@ -234,13 +260,24 @@ struct Index {
234
260
  */
235
261
  virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
236
262
 
237
- /** encode a set of vectors
263
+ /** decode a set of vectors
238
264
  *
239
265
  * @param n number of vectors
240
266
  * @param bytes input encoded vectors, size n * sa_code_size()
241
267
  * @param x output vectors, size n * d
242
268
  */
243
269
  virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
270
+
271
+ /** moves the entries from another dataset to self.
272
+ * On output, other is empty.
273
+ * add_id is added to all moved ids
274
+ * (for sequential ids, this would be this->ntotal) */
275
+ virtual void merge_from(Index& otherIndex, idx_t add_id = 0);
276
+
277
+ /** check that the two indexes are compatible (ie, they are
278
+ * trained in the same way and have the same
279
+ * parameters). Otherwise throw. */
280
+ virtual void check_compatible_for_merge(const Index& otherIndex) const;
244
281
  };
245
282
 
246
283
  } // namespace faiss
@@ -111,7 +111,8 @@ void Index2Layer::search(
111
111
  const float* /*x*/,
112
112
  idx_t /*k*/,
113
113
  float* /*distances*/,
114
- idx_t* /*labels*/) const {
114
+ idx_t* /*labels*/,
115
+ const SearchParameters* /* params */) const {
115
116
  FAISS_THROW_MSG("not implemented");
116
117
  }
117
118
 
@@ -282,10 +283,13 @@ DistanceComputer* Index2Layer::get_distance_computer() const {
282
283
 
283
284
  /* The standalone codec interface */
284
285
 
286
+ // block size used in Index2Layer::sa_encode
287
+ int index2layer_sa_encode_bs = 32768;
288
+
285
289
  void Index2Layer::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
286
290
  FAISS_THROW_IF_NOT(is_trained);
287
291
 
288
- idx_t bs = 32768;
292
+ idx_t bs = index2layer_sa_encode_bs;
289
293
  if (n > bs) {
290
294
  for (idx_t i0 = 0; i0 < n; i0 += bs) {
291
295
  idx_t i1 = std::min(i0 + bs, n);
@@ -14,6 +14,7 @@
14
14
  #include <faiss/IndexFlatCodes.h>
15
15
  #include <faiss/IndexIVF.h>
16
16
  #include <faiss/IndexPQ.h>
17
+ #include <faiss/impl/platform_macros.h>
17
18
 
18
19
  namespace faiss {
19
20
 
@@ -56,7 +57,8 @@ struct Index2Layer : IndexFlatCodes {
56
57
  const float* x,
57
58
  idx_t k,
58
59
  float* distances,
59
- idx_t* labels) const override;
60
+ idx_t* labels,
61
+ const SearchParameters* params = nullptr) const override;
60
62
 
61
63
  DistanceComputer* get_distance_computer() const override;
62
64
 
@@ -68,4 +70,7 @@ struct Index2Layer : IndexFlatCodes {
68
70
  void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
69
71
  };
70
72
 
73
+ // block size used in Index2Layer::sa_encode
74
+ FAISS_API extern int index2layer_sa_encode_bs;
75
+
71
76
  } // namespace faiss