faiss 0.2.4 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +23 -21
  5. data/ext/faiss/extconf.rb +11 -0
  6. data/ext/faiss/index.cpp +17 -4
  7. data/ext/faiss/index_binary.cpp +6 -6
  8. data/ext/faiss/product_quantizer.cpp +4 -4
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  11. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  12. data/vendor/faiss/faiss/IVFlib.h +26 -2
  13. data/vendor/faiss/faiss/Index.cpp +36 -3
  14. data/vendor/faiss/faiss/Index.h +43 -6
  15. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  16. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  21. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  22. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  29. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  30. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  31. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  32. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  33. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  34. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  35. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  36. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  37. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  38. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  39. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  40. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  41. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  42. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  43. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  44. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  50. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  51. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  52. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  53. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  54. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  56. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  57. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  58. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  61. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  62. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  63. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  64. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  65. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  66. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  67. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  68. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  69. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  70. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  71. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  72. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  73. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  74. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  75. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  76. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  78. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  80. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  82. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  83. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  84. data/vendor/faiss/faiss/IndexShards.h +2 -1
  85. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  86. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  87. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  88. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  89. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  90. data/vendor/faiss/faiss/clone_index.h +3 -0
  91. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  93. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  102. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  103. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  105. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  106. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  110. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  111. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  112. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  113. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  114. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  118. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  119. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  120. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  122. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  124. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  125. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  127. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  128. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  129. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  131. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  132. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  133. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  134. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  136. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  139. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  145. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  146. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  147. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  151. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  152. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  153. data/vendor/faiss/faiss/index_io.h +5 -0
  154. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  155. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  156. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  157. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  158. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  159. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  160. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  161. data/vendor/faiss/faiss/utils/distances.h +113 -15
  162. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  163. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  164. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  165. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  166. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  167. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  168. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  169. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  170. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  172. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  173. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  174. data/vendor/faiss/faiss/utils/random.h +5 -0
  175. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  176. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  177. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  178. metadata +37 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15428eb8dd7d27f8a94e3a7797dd765827e5454def33fa785055adb7ef0d20c5
4
- data.tar.gz: 3e8eafebc49163c928bcab8d0ebd0f7b69e6659e49f36d7aaeb44e8651853ac9
3
+ metadata.gz: 5a12656e91180940d60fdd412b6f179cd0faa8a6b473aaadfa0bc4a371131464
4
+ data.tar.gz: 1076b01164ff1702abc8a9184410e0f8417199056698628ff2b0e5bf641cf168
5
5
  SHA512:
6
- metadata.gz: 598f6e626d5c970e408cff68ec479bf1aa2d6ee18adeeeb2489d1c4fbf627dacbc6e398ce149f0483720080a439df4d2887e5b6c9dc9f465e8ffa1bbeede84a8
7
- data.tar.gz: fefebfbbbbceb58ac6c6b02636630943d30db7af4ee92521586b4fb40e73bf1aa2b401e28e4c27872bf9344f60e3c9fb288ec6420abc1ffccd05ffd3ec7379fd
6
+ metadata.gz: 1298e0ed4b4455c478ee94384dbd02b4a029ea96409d66cf0b0e4292e51acde782d6bd875470c093a07198b99e222a366878d7019b6dc499221ee8b126a95cd7
7
+ data.tar.gz: 4b61b0fa059d240b568e176abdffff5b56a06e16598c185795e4d437bb0581a1cafb36794dd799168512b010ba71858837a30f3197be9b781510e50d3d4a268b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.2.6 (2023-04-11)
2
+
3
+ - Added `add_with_ids` method
4
+ - Added `IndexIDMap`
5
+
6
+ ## 0.2.5 (2022-12-27)
7
+
8
+ - Updated Faiss to 1.7.3
9
+ - Fixed installation errors on Mac
10
+
1
11
  ## 0.2.4 (2022-01-10)
2
12
 
3
13
  - Updated Faiss to 1.7.2
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2022 Andrew Kane
4
+ Copyright (c) 2020-2023 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -8,23 +8,25 @@ Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-
8
8
 
9
9
  ## Installation
10
10
 
11
- First, install BLAS, LAPACK, and OpenMP:
11
+ First, ensure BLAS, LAPACK, and OpenMP are installed. For Mac, use:
12
12
 
13
13
  ```sh
14
- # Mac
15
- brew install openblas lapack libomp
14
+ brew install libomp
15
+ ```
16
+
17
+ For Ubuntu, use:
16
18
 
17
- # Ubuntu
18
- sudo apt install libblas-dev liblapack-dev
19
+ ```sh
20
+ sudo apt-get install libblas-dev liblapack-dev
19
21
  ```
20
22
 
21
- Add this line to your application’s Gemfile:
23
+ Then add this line to your application’s Gemfile:
22
24
 
23
25
  ```ruby
24
- gem 'faiss'
26
+ gem "faiss"
25
27
  ```
26
28
 
27
- It can take a few minutes to compile the gem. Faiss is not available for Windows.
29
+ It can take a few minutes to compile the gem. Windows is not currently supported.
28
30
 
29
31
  ## Getting Started
30
32
 
@@ -69,61 +71,61 @@ index = Faiss::Index.load("index.bin")
69
71
 
70
72
  Exact search for L2
71
73
 
72
- ```rb
74
+ ```ruby
73
75
  Faiss::IndexFlatL2.new(d)
74
76
  ```
75
77
 
76
78
  Exact search for inner product
77
79
 
78
- ```rb
80
+ ```ruby
79
81
  Faiss::IndexFlatIP.new(d)
80
82
  ```
81
83
 
82
84
  Hierarchical navigable small world graph exploration
83
85
 
84
- ```rb
86
+ ```ruby
85
87
  Faiss::IndexHNSWFlat.new(d, m)
86
88
  ```
87
89
 
88
90
  Inverted file with exact post-verification
89
91
 
90
- ```rb
92
+ ```ruby
91
93
  Faiss::IndexIVFFlat.new(quantizer, d, nlists)
92
94
  ```
93
95
 
94
96
  Locality-sensitive hashing
95
97
 
96
- ```rb
98
+ ```ruby
97
99
  Faiss::IndexLSH.new(d, nbits)
98
100
  ```
99
101
 
100
102
  Scalar quantizer (SQ) in flat mode
101
103
 
102
- ```rb
104
+ ```ruby
103
105
  Faiss::IndexScalarQuantizer.new(d, qtype)
104
106
  ```
105
107
 
106
108
  Product quantizer (PQ) in flat mode
107
109
 
108
- ```rb
110
+ ```ruby
109
111
  Faiss::IndexPQ.new(d, m, nbits)
110
112
  ```
111
113
 
112
114
  IVF and scalar quantizer
113
115
 
114
- ```rb
116
+ ```ruby
115
117
  Faiss::IndexIVFScalarQuantizer.new(quantizer, d, nlists, qtype)
116
118
  ```
117
119
 
118
120
  IVFADC (coarse quantizer+PQ on residuals)
119
121
 
120
- ```rb
122
+ ```ruby
121
123
  Faiss::IndexIVFPQ.new(quantizer, d, nlists, m, nbits)
122
124
  ```
123
125
 
124
126
  IVFADC+R (same as IVFADC with re-ranking based on codes)
125
127
 
126
- ```rb
128
+ ```ruby
127
129
  Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
128
130
  ```
129
131
 
@@ -131,13 +133,13 @@ Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
131
133
 
132
134
  Index binary vectors
133
135
 
134
- ```rb
136
+ ```ruby
135
137
  Faiss::IndexBinaryFlat.new(d)
136
138
  ```
137
139
 
138
140
  Speed up search with an inverse vector file
139
141
 
140
- ```rb
142
+ ```ruby
141
143
  Faiss::IndexBinaryIVF.new(quantizer, d, nlists)
142
144
  ```
143
145
 
@@ -220,7 +222,7 @@ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
220
222
 
221
223
  ## History
222
224
 
223
- View the [changelog](https://github.com/ankane/faiss-ruby/blob/master/CHANGELOG.md)
225
+ View the [changelog](CHANGELOG.md)
224
226
 
225
227
  ## Contributing
226
228
 
data/ext/faiss/extconf.rb CHANGED
@@ -1,6 +1,14 @@
1
1
  require "mkmf-rice"
2
2
  require "numo/narray"
3
3
 
4
+ # libomp changed to keg-only
5
+ # https://github.com/Homebrew/homebrew-core/issues/112107
6
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i
7
+ brew_prefix = RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "/opt/homebrew" : "/usr/local"
8
+ find_library("omp", nil, "#{brew_prefix}/opt/libomp/lib")
9
+ find_header("omp.h", "#{brew_prefix}/opt/libomp/include")
10
+ end
11
+
4
12
  abort "BLAS not found" unless have_library("blas")
5
13
  abort "LAPACK not found" unless have_library("lapack")
6
14
  abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
@@ -8,6 +16,9 @@ abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
8
16
  numo = File.join(Gem.loaded_specs["numo-narray"].require_path, "numo")
9
17
  abort "Numo not found" unless find_header("numo/narray.h", numo)
10
18
 
19
+ # for https://bugs.ruby-lang.org/issues/19005
20
+ $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
+
11
22
  # -march=native not supported with ARM Mac
12
23
  default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
13
24
  $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
data/ext/faiss/index.cpp CHANGED
@@ -1,6 +1,7 @@
1
1
  #include <faiss/Index.h>
2
2
  #include <faiss/IndexFlat.h>
3
3
  #include <faiss/IndexHNSW.h>
4
+ #include <faiss/IndexIDMap.h>
4
5
  #include <faiss/IndexIVFFlat.h>
5
6
  #include <faiss/IndexLSH.h>
6
7
  #include <faiss/IndexScalarQuantizer.h>
@@ -118,6 +119,15 @@ void init_index(Rice::Module& m) {
118
119
  auto n = check_shape(objects, self.d);
119
120
  self.add(n, objects.read_ptr());
120
121
  })
122
+ .define_method(
123
+ "add_with_ids",
124
+ [](faiss::Index &self, numo::SFloat objects, numo::Int64 ids) {
125
+ auto n = check_shape(objects, self.d);
126
+ if (ids.ndim() != 1 || ids.shape()[0] != n) {
127
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array with size %d", n);
128
+ }
129
+ self.add_with_ids(n, objects.read_ptr(), ids.read_ptr());
130
+ })
121
131
  .define_method(
122
132
  "search",
123
133
  [](faiss::Index &self, numo::SFloat objects, size_t k) {
@@ -140,13 +150,13 @@ void init_index(Rice::Module& m) {
140
150
  })
141
151
  .define_method(
142
152
  "save",
143
- [](faiss::Index &self, const char *fname) {
144
- faiss::write_index(&self, fname);
153
+ [](faiss::Index &self, Rice::String fname) {
154
+ faiss::write_index(&self, fname.c_str());
145
155
  })
146
156
  .define_singleton_function(
147
157
  "load",
148
- [](const char *fname) {
149
- return faiss::read_index(fname);
158
+ [](Rice::String fname) {
159
+ return faiss::read_index(fname.c_str());
150
160
  });
151
161
 
152
162
  Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
@@ -186,4 +196,7 @@ void init_index(Rice::Module& m) {
186
196
  [](faiss::ParameterSpace& self, faiss::Index* index, const std::string& name, double val) {
187
197
  self.set_index_parameter(index, name, val);
188
198
  });
199
+
200
+ Rice::define_class_under<faiss::IndexIDMap, faiss::Index>(m, "IndexIDMap")
201
+ .define_constructor(Rice::Constructor<faiss::IndexIDMap, faiss::Index*>());
189
202
  }
@@ -52,13 +52,13 @@ void init_index_binary(Rice::Module& m) {
52
52
  })
53
53
  .define_method(
54
54
  "save",
55
- [](faiss::IndexBinary &self, const char *fname) {
56
- faiss::write_index_binary(&self, fname);
55
+ [](faiss::IndexBinary &self, Rice::String fname) {
56
+ faiss::write_index_binary(&self, fname.c_str());
57
57
  })
58
58
  .define_singleton_function(
59
59
  "load",
60
- [](const char *fname) {
61
- return faiss::read_index_binary(fname);
60
+ [](Rice::String fname) {
61
+ return faiss::read_index_binary(fname.c_str());
62
62
  });
63
63
 
64
64
  Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
@@ -69,7 +69,7 @@ void init_index_binary(Rice::Module& m) {
69
69
 
70
70
  m.define_singleton_function(
71
71
  "index_binary_factory",
72
- [](int d, const char *description) {
73
- return faiss::index_binary_factory(d, description);
72
+ [](int d, Rice::String description) {
73
+ return faiss::index_binary_factory(d, description.c_str());
74
74
  });
75
75
  }
@@ -42,12 +42,12 @@ void init_product_quantizer(Rice::Module& m) {
42
42
  })
43
43
  .define_method(
44
44
  "save",
45
- [](faiss::ProductQuantizer &self, const char *fname) {
46
- faiss::write_ProductQuantizer(&self, fname);
45
+ [](faiss::ProductQuantizer &self, Rice::String fname) {
46
+ faiss::write_ProductQuantizer(&self, fname.c_str());
47
47
  })
48
48
  .define_singleton_function(
49
49
  "load",
50
- [](const char *fname) {
51
- return faiss::read_ProductQuantizer(fname);
50
+ [](Rice::String fname) {
51
+ return faiss::read_ProductQuantizer(fname.c_str());
52
52
  });
53
53
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.2.4"
2
+ VERSION = "0.2.6"
3
3
  end
@@ -523,6 +523,19 @@ void ParameterSpace::set_index_parameter(
523
523
  }
524
524
  }
525
525
 
526
+ if (name == "efConstruction") {
527
+ if (DC(IndexHNSW)) {
528
+ ix->hnsw.efConstruction = int(val);
529
+ return;
530
+ }
531
+ if (DC(IndexIVF)) {
532
+ if (IndexHNSW* cq = dynamic_cast<IndexHNSW*>(ix->quantizer)) {
533
+ cq->hnsw.efConstruction = int(val);
534
+ return;
535
+ }
536
+ }
537
+ }
538
+
526
539
  if (name == "efSearch") {
527
540
  if (DC(IndexHNSW)) {
528
541
  ix->hnsw.efSearch = int(val);
@@ -5,15 +5,18 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  #include <faiss/IVFlib.h>
9
+ #include <omp.h>
11
10
 
12
11
  #include <memory>
13
12
 
13
+ #include <faiss/IndexAdditiveQuantizer.h>
14
+ #include <faiss/IndexIVFAdditiveQuantizer.h>
14
15
  #include <faiss/IndexPreTransform.h>
15
16
  #include <faiss/MetaIndexes.h>
16
17
  #include <faiss/impl/FaissAssert.h>
18
+ #include <faiss/utils/distances.h>
19
+ #include <faiss/utils/hamming.h>
17
20
  #include <faiss/utils/utils.h>
18
21
 
19
22
  namespace faiss {
@@ -349,6 +352,7 @@ void search_with_parameters(
349
352
  index_ivf->search_preassigned(
350
353
  n, x, k, Iq.data(), Dq.data(), distances, labels, false, params);
351
354
  double t3 = getmillisecs();
355
+
352
356
  if (ms_per_stage) {
353
357
  ms_per_stage[0] = t1 - t0;
354
358
  ms_per_stage[1] = t2 - t1;
@@ -406,5 +410,100 @@ void range_search_with_parameters(
406
410
  }
407
411
  }
408
412
 
413
+ IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
414
+ const ResidualQuantizer& rq,
415
+ int nlevel) {
416
+ FAISS_THROW_IF_NOT(nlevel > 0 && nlevel + 1 < rq.M);
417
+
418
+ std::vector<size_t> nbits(nlevel);
419
+ std::copy(rq.nbits.begin(), rq.nbits.begin() + nlevel, nbits.begin());
420
+ std::unique_ptr<ResidualCoarseQuantizer> rcq(
421
+ new ResidualCoarseQuantizer(rq.d, nbits));
422
+
423
+ // set the coarse quantizer from the 2 first quantizers
424
+ rcq->rq.initialize_from(rq);
425
+ rcq->is_trained = true;
426
+ rcq->ntotal = (idx_t)1 << rcq->rq.tot_bits;
427
+
428
+ // settings for exhaustive search in RCQ
429
+ rcq->centroid_norms.resize(rcq->ntotal);
430
+ rcq->aq->compute_centroid_norms(rcq->centroid_norms.data());
431
+ rcq->beam_factor = -1.0; // use exact search
432
+ size_t nlist = rcq->ntotal;
433
+
434
+ // build a IVFResidualQuantizer from that
435
+ std::vector<size_t> nbits_refined;
436
+ for (int i = nlevel; i < rq.M; i++) {
437
+ nbits_refined.push_back(rq.nbits[i]);
438
+ }
439
+ std::unique_ptr<IndexIVFResidualQuantizer> index(
440
+ new IndexIVFResidualQuantizer(
441
+ rcq.get(),
442
+ rq.d,
443
+ nlist,
444
+ nbits_refined,
445
+ faiss::METRIC_L2,
446
+ rq.search_type));
447
+ index->own_fields = true;
448
+ rcq.release();
449
+ index->by_residual = true;
450
+ index->rq.initialize_from(rq, nlevel);
451
+ index->is_trained = true;
452
+
453
+ return index.release();
454
+ }
455
+
456
+ void ivf_residual_add_from_flat_codes(
457
+ IndexIVFResidualQuantizer* index,
458
+ size_t nb,
459
+ const uint8_t* raw_codes,
460
+ int64_t code_size) {
461
+ const ResidualCoarseQuantizer* rcq =
462
+ dynamic_cast<const faiss::ResidualCoarseQuantizer*>(
463
+ index->quantizer);
464
+ FAISS_THROW_IF_NOT_MSG(rcq, "the coarse quantizer must be a RCQ");
465
+ if (code_size < 0) {
466
+ code_size = index->code_size;
467
+ }
468
+ InvertedLists& invlists = *index->invlists;
469
+ const ResidualQuantizer& rq = index->rq;
470
+
471
+ // populate inverted lists
472
+ #pragma omp parallel if (nb > 10000)
473
+ {
474
+ std::vector<uint8_t> tmp_code(index->code_size);
475
+ std::vector<float> tmp(rq.d);
476
+ int nt = omp_get_num_threads();
477
+ int rank = omp_get_thread_num();
478
+
479
+ #pragma omp for
480
+ for (idx_t i = 0; i < nb; i++) {
481
+ const uint8_t* code = &raw_codes[i * code_size];
482
+ BitstringReader rd(code, code_size);
483
+ idx_t list_no = rd.read(rcq->rq.tot_bits);
484
+
485
+ if (list_no % nt ==
486
+ rank) { // each thread takes care of 1/nt of the invlists
487
+ // copy AQ indexes one by one
488
+ BitstringWriter wr(tmp_code.data(), tmp_code.size());
489
+ for (int j = 0; j < rq.M; j++) {
490
+ int nbit = rq.nbits[j];
491
+ wr.write(rd.read(nbit), nbit);
492
+ }
493
+ // we need to recompute the norm
494
+ // decode first, does not use the norm component, so that's
495
+ // ok
496
+ index->rq.decode(tmp_code.data(), tmp.data(), 1);
497
+ float norm = fvec_norm_L2sqr(tmp.data(), rq.d);
498
+ wr.write(rq.encode_norm(norm), rq.norm_bits);
499
+
500
+ // add code to the inverted list
501
+ invlists.add_entry(list_no, i, tmp_code.data());
502
+ }
503
+ }
504
+ }
505
+ index->ntotal += nb;
506
+ }
507
+
409
508
  } // namespace ivflib
410
509
  } // namespace faiss
@@ -5,8 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  #ifndef FAISS_IVFLIB_H
11
9
  #define FAISS_IVFLIB_H
12
10
 
@@ -20,6 +18,11 @@
20
18
  #include <vector>
21
19
 
22
20
  namespace faiss {
21
+
22
+ struct IndexIVFResidualQuantizer;
23
+ struct IndexResidualQuantizer;
24
+ struct ResidualQuantizer;
25
+
23
26
  namespace ivflib {
24
27
 
25
28
  /** check if two indexes have the same parameters and are trained in
@@ -145,6 +148,27 @@ void range_search_with_parameters(
145
148
  size_t* nb_dis = nullptr,
146
149
  double* ms_per_stage = nullptr);
147
150
 
151
+ /** Build an IndexIVFResidualQuantizer from an ResidualQuantizer, using the
152
+ * nlevel first components as coarse quantizer and the rest as codes in invlists
153
+ */
154
+ IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
155
+ const ResidualQuantizer&,
156
+ int nlevel);
157
+
158
+ /** add from codes. NB that the norm component is not used, so the code_size can
159
+ * be provided.
160
+ *
161
+ * @param ivfrq index to populate with the codes
162
+ * @param codes codes to add, size (ncode, code_size)
163
+ * @param code_size override the ivfrq's code_size, useful if the norm encoding
164
+ * is different
165
+ */
166
+ void ivf_residual_add_from_flat_codes(
167
+ IndexIVFResidualQuantizer* ivfrq,
168
+ size_t ncode,
169
+ const uint8_t* codes,
170
+ int64_t code_size = -1);
171
+
148
172
  } // namespace ivflib
149
173
  } // namespace faiss
150
174
 
@@ -10,6 +10,7 @@
10
10
  #include <faiss/Index.h>
11
11
 
12
12
  #include <faiss/impl/AuxIndexStructures.h>
13
+ #include <faiss/impl/DistanceComputer.h>
13
14
  #include <faiss/impl/FaissAssert.h>
14
15
  #include <faiss/utils/distances.h>
15
16
 
@@ -23,7 +24,12 @@ void Index::train(idx_t /*n*/, const float* /*x*/) {
23
24
  // does nothing by default
24
25
  }
25
26
 
26
- void Index::range_search(idx_t, const float*, float, RangeSearchResult*) const {
27
+ void Index::range_search(
28
+ idx_t,
29
+ const float*,
30
+ float,
31
+ RangeSearchResult*,
32
+ const SearchParameters* params) const {
27
33
  FAISS_THROW_MSG("range search not implemented");
28
34
  }
29
35
 
@@ -48,7 +54,25 @@ void Index::reconstruct(idx_t, float*) const {
48
54
  FAISS_THROW_MSG("reconstruct not implemented for this type of index");
49
55
  }
50
56
 
57
+ void Index::reconstruct_batch(idx_t n, const idx_t* keys, float* recons) const {
58
+ std::mutex exception_mutex;
59
+ std::string exception_string;
60
+ #pragma omp parallel for if (n > 1000)
61
+ for (idx_t i = 0; i < n; i++) {
62
+ try {
63
+ reconstruct(keys[i], &recons[i * d]);
64
+ } catch (const std::exception& e) {
65
+ std::lock_guard<std::mutex> lock(exception_mutex);
66
+ exception_string = e.what();
67
+ }
68
+ }
69
+ if (!exception_string.empty()) {
70
+ FAISS_THROW_MSG(exception_string.c_str());
71
+ }
72
+ }
73
+
51
74
  void Index::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
75
+ #pragma omp parallel for if (ni > 1000)
52
76
  for (idx_t i = 0; i < ni; i++) {
53
77
  reconstruct(i0 + i, recons + i * d);
54
78
  }
@@ -60,10 +84,11 @@ void Index::search_and_reconstruct(
60
84
  idx_t k,
61
85
  float* distances,
62
86
  idx_t* labels,
63
- float* recons) const {
87
+ float* recons,
88
+ const SearchParameters* params) const {
64
89
  FAISS_THROW_IF_NOT(k > 0);
65
90
 
66
- search(n, x, k, distances, labels);
91
+ search(n, x, k, distances, labels, params);
67
92
  for (idx_t i = 0; i < n; ++i) {
68
93
  for (idx_t j = 0; j < k; ++j) {
69
94
  idx_t ij = i * k + j;
@@ -149,4 +174,12 @@ DistanceComputer* Index::get_distance_computer() const {
149
174
  }
150
175
  }
151
176
 
177
+ void Index::merge_from(Index& /* otherIndex */, idx_t /* add_id */) {
178
+ FAISS_THROW_MSG("merge_from() not implemented");
179
+ }
180
+
181
+ void Index::check_compatible_for_merge(const Index& /* otherIndex */) const {
182
+ FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
183
+ }
184
+
152
185
  } // namespace faiss
@@ -18,7 +18,7 @@
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
20
  #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 2
21
+ #define FAISS_VERSION_PATCH 3
22
22
 
23
23
  /**
24
24
  * @namespace faiss
@@ -38,11 +38,24 @@
38
38
 
39
39
  namespace faiss {
40
40
 
41
- /// Forward declarations see AuxIndexStructures.h
41
+ /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h and
42
+ /// impl/DistanceComputer.h
42
43
  struct IDSelector;
43
44
  struct RangeSearchResult;
44
45
  struct DistanceComputer;
45
46
 
47
+ /** Parent class for the optional search paramenters.
48
+ *
49
+ * Sub-classes with additional search parameters should inherit this class.
50
+ * Ownership of the object fields is always to the caller.
51
+ */
52
+ struct SearchParameters {
53
+ /// if non-null, only these IDs will be considered during search.
54
+ IDSelector* sel = nullptr;
55
+ /// make sure we can dynamic_cast this
56
+ virtual ~SearchParameters() {}
57
+ };
58
+
46
59
  /** Abstract structure for an index, supports adding vectors and searching them.
47
60
  *
48
61
  * All vectors provided at add or search time are 32-bit float arrays,
@@ -114,7 +127,8 @@ struct Index {
114
127
  const float* x,
115
128
  idx_t k,
116
129
  float* distances,
117
- idx_t* labels) const = 0;
130
+ idx_t* labels,
131
+ const SearchParameters* params = nullptr) const = 0;
118
132
 
119
133
  /** query n vectors of dimension d to the index.
120
134
  *
@@ -130,7 +144,8 @@ struct Index {
130
144
  idx_t n,
131
145
  const float* x,
132
146
  float radius,
133
- RangeSearchResult* result) const;
147
+ RangeSearchResult* result,
148
+ const SearchParameters* params = nullptr) const;
134
149
 
135
150
  /** return the indexes of the k vectors closest to the query x.
136
151
  *
@@ -157,6 +172,16 @@ struct Index {
157
172
  */
158
173
  virtual void reconstruct(idx_t key, float* recons) const;
159
174
 
175
+ /** Reconstruct several stored vectors (or an approximation if lossy coding)
176
+ *
177
+ * this function may not be defined for some indexes
178
+ * @param n number of vectors to reconstruct
179
+ * @param keys ids of the vectors to reconstruct (size n)
180
+ * @param recons reconstucted vector (size n * d)
181
+ */
182
+ virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
183
+ const;
184
+
160
185
  /** Reconstruct vectors i0 to i0 + ni - 1
161
186
  *
162
187
  * this function may not be defined for some indexes
@@ -178,7 +203,8 @@ struct Index {
178
203
  idx_t k,
179
204
  float* distances,
180
205
  idx_t* labels,
181
- float* recons) const;
206
+ float* recons,
207
+ const SearchParameters* params = nullptr) const;
182
208
 
183
209
  /** Computes a residual vector after indexing encoding.
184
210
  *
@@ -234,13 +260,24 @@ struct Index {
234
260
  */
235
261
  virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
236
262
 
237
- /** encode a set of vectors
263
+ /** decode a set of vectors
238
264
  *
239
265
  * @param n number of vectors
240
266
  * @param bytes input encoded vectors, size n * sa_code_size()
241
267
  * @param x output vectors, size n * d
242
268
  */
243
269
  virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
270
+
271
+ /** moves the entries from another dataset to self.
272
+ * On output, other is empty.
273
+ * add_id is added to all moved ids
274
+ * (for sequential ids, this would be this->ntotal) */
275
+ virtual void merge_from(Index& otherIndex, idx_t add_id = 0);
276
+
277
+ /** check that the two indexes are compatible (ie, they are
278
+ * trained in the same way and have the same
279
+ * parameters). Otherwise throw. */
280
+ virtual void check_compatible_for_merge(const Index& otherIndex) const;
244
281
  };
245
282
 
246
283
  } // namespace faiss