faiss 0.1.7 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -0,0 +1,448 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include "faiss/impl/ResidualQuantizer.h"
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <faiss/impl/ResidualQuantizer.h>
13
+ #include "faiss/utils/utils.h"
14
+
15
+ #include <cstddef>
16
+ #include <cstdio>
17
+ #include <cstring>
18
+ #include <memory>
19
+
20
+ #include <algorithm>
21
+
22
+ #include <faiss/IndexFlat.h>
23
+ #include <faiss/VectorTransform.h>
24
+ #include <faiss/impl/AuxIndexStructures.h>
25
+ #include <faiss/impl/FaissAssert.h>
26
+ #include <faiss/utils/Heap.h>
27
+ #include <faiss/utils/distances.h>
28
+ #include <faiss/utils/hamming.h>
29
+ #include <faiss/utils/utils.h>
30
+
31
+ namespace faiss {
32
+
33
+ ResidualQuantizer::ResidualQuantizer()
34
+ : train_type(Train_progressive_dim),
35
+ max_beam_size(30),
36
+ max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
37
+ assign_index_factory(nullptr) {
38
+ d = 0;
39
+ M = 0;
40
+ verbose = false;
41
+ }
42
+
43
+ ResidualQuantizer::ResidualQuantizer(size_t d, const std::vector<size_t>& nbits)
44
+ : ResidualQuantizer() {
45
+ this->d = d;
46
+ M = nbits.size();
47
+ this->nbits = nbits;
48
+ set_derived_values();
49
+ }
50
+
51
+ ResidualQuantizer::ResidualQuantizer(size_t d, size_t M, size_t nbits)
52
+ : ResidualQuantizer(d, std::vector<size_t>(M, nbits)) {}
53
+
54
+ namespace {
55
+
56
+ void fvec_sub(size_t d, const float* a, const float* b, float* c) {
57
+ for (size_t i = 0; i < d; i++) {
58
+ c[i] = a[i] - b[i];
59
+ }
60
+ }
61
+
62
+ } // anonymous namespace
63
+
64
+ void beam_search_encode_step(
65
+ size_t d,
66
+ size_t K,
67
+ const float* cent, /// size (K, d)
68
+ size_t n,
69
+ size_t beam_size,
70
+ const float* residuals, /// size (n, beam_size, d)
71
+ size_t m,
72
+ const int32_t* codes, /// size (n, beam_size, m)
73
+ size_t new_beam_size,
74
+ int32_t* new_codes, /// size (n, new_beam_size, m + 1)
75
+ float* new_residuals, /// size (n, new_beam_size, d)
76
+ float* new_distances, /// size (n, new_beam_size)
77
+ Index* assign_index) {
78
+ // we have to fill in the whole output matrix
79
+ FAISS_THROW_IF_NOT(new_beam_size <= beam_size * K);
80
+
81
+ using idx_t = Index::idx_t;
82
+
83
+ std::vector<float> cent_distances;
84
+ std::vector<idx_t> cent_ids;
85
+
86
+ if (assign_index) {
87
+ // search beam_size distances per query
88
+ FAISS_THROW_IF_NOT(assign_index->d == d);
89
+ cent_distances.resize(n * beam_size * new_beam_size);
90
+ cent_ids.resize(n * beam_size * new_beam_size);
91
+ if (assign_index->ntotal != 0) {
92
+ // then we assume the codebooks are already added to the index
93
+ FAISS_THROW_IF_NOT(assign_index->ntotal != K);
94
+ } else {
95
+ assign_index->add(K, cent);
96
+ }
97
+
98
+ // printf("beam_search_encode_step -- mem usage %zd\n",
99
+ // get_mem_usage_kb());
100
+ assign_index->search(
101
+ n * beam_size,
102
+ residuals,
103
+ new_beam_size,
104
+ cent_distances.data(),
105
+ cent_ids.data());
106
+ } else {
107
+ // do one big distance computation
108
+ cent_distances.resize(n * beam_size * K);
109
+ pairwise_L2sqr(
110
+ d, n * beam_size, residuals, K, cent, cent_distances.data());
111
+ }
112
+ InterruptCallback::check();
113
+
114
+ #pragma omp parallel for if (n > 100)
115
+ for (int64_t i = 0; i < n; i++) {
116
+ const int32_t* codes_i = codes + i * m * beam_size;
117
+ int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
118
+ const float* residuals_i = residuals + i * d * beam_size;
119
+ float* new_residuals_i = new_residuals + i * d * new_beam_size;
120
+
121
+ float* new_distances_i = new_distances + i * new_beam_size;
122
+ using C = CMax<float, int>;
123
+
124
+ if (assign_index) {
125
+ const float* cent_distances_i =
126
+ cent_distances.data() + i * beam_size * new_beam_size;
127
+ const idx_t* cent_ids_i =
128
+ cent_ids.data() + i * beam_size * new_beam_size;
129
+
130
+ // here we could be a tad more efficient by merging sorted arrays
131
+ for (int i = 0; i < new_beam_size; i++) {
132
+ new_distances_i[i] = C::neutral();
133
+ }
134
+ std::vector<int> perm(new_beam_size, -1);
135
+ heap_addn<C>(
136
+ new_beam_size,
137
+ new_distances_i,
138
+ perm.data(),
139
+ cent_distances_i,
140
+ nullptr,
141
+ beam_size * new_beam_size);
142
+ heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
143
+
144
+ for (int j = 0; j < new_beam_size; j++) {
145
+ int js = perm[j] / new_beam_size;
146
+ int ls = cent_ids_i[perm[j]];
147
+ if (m > 0) {
148
+ memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
149
+ }
150
+ new_codes_i[m] = ls;
151
+ new_codes_i += m + 1;
152
+ fvec_sub(
153
+ d,
154
+ residuals_i + js * d,
155
+ cent + ls * d,
156
+ new_residuals_i);
157
+ new_residuals_i += d;
158
+ }
159
+
160
+ } else {
161
+ const float* cent_distances_i =
162
+ cent_distances.data() + i * beam_size * K;
163
+ // then we have to select the best results
164
+ for (int i = 0; i < new_beam_size; i++) {
165
+ new_distances_i[i] = C::neutral();
166
+ }
167
+ std::vector<int> perm(new_beam_size, -1);
168
+ heap_addn<C>(
169
+ new_beam_size,
170
+ new_distances_i,
171
+ perm.data(),
172
+ cent_distances_i,
173
+ nullptr,
174
+ beam_size * K);
175
+ heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
176
+
177
+ for (int j = 0; j < new_beam_size; j++) {
178
+ int js = perm[j] / K;
179
+ int ls = perm[j] % K;
180
+ if (m > 0) {
181
+ memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
182
+ }
183
+ new_codes_i[m] = ls;
184
+ new_codes_i += m + 1;
185
+ fvec_sub(
186
+ d,
187
+ residuals_i + js * d,
188
+ cent + ls * d,
189
+ new_residuals_i);
190
+ new_residuals_i += d;
191
+ }
192
+ }
193
+ }
194
+ }
195
+
196
+ void ResidualQuantizer::train(size_t n, const float* x) {
197
+ codebooks.resize(d * codebook_offsets.back());
198
+
199
+ if (verbose) {
200
+ printf("Training ResidualQuantizer, with %zd steps on %zd %zdD vectors\n",
201
+ M,
202
+ n,
203
+ size_t(d));
204
+ }
205
+
206
+ int cur_beam_size = 1;
207
+ std::vector<float> residuals(x, x + n * d);
208
+ std::vector<int32_t> codes;
209
+ std::vector<float> distances;
210
+ double t0 = getmillisecs();
211
+
212
+ for (int m = 0; m < M; m++) {
213
+ int K = 1 << nbits[m];
214
+
215
+ // on which residuals to train
216
+ std::vector<float>& train_residuals = residuals;
217
+ std::vector<float> residuals1;
218
+ if (train_type & Train_top_beam) {
219
+ residuals1.resize(n * d);
220
+ for (size_t j = 0; j < n; j++) {
221
+ memcpy(residuals1.data() + j * d,
222
+ residuals.data() + j * d * cur_beam_size,
223
+ sizeof(residuals[0]) * d);
224
+ }
225
+ train_residuals = residuals1;
226
+ }
227
+ train_type_t tt = train_type_t(train_type & ~Train_top_beam);
228
+
229
+ std::vector<float> codebooks;
230
+ float obj = 0;
231
+
232
+ std::unique_ptr<Index> assign_index;
233
+ if (assign_index_factory) {
234
+ assign_index.reset((*assign_index_factory)(d));
235
+ } else {
236
+ assign_index.reset(new IndexFlatL2(d));
237
+ }
238
+ if (tt == Train_default) {
239
+ Clustering clus(d, K, cp);
240
+ clus.train(
241
+ train_residuals.size() / d,
242
+ train_residuals.data(),
243
+ *assign_index.get());
244
+ codebooks.swap(clus.centroids);
245
+ assign_index->reset();
246
+ obj = clus.iteration_stats.back().obj;
247
+ } else if (tt == Train_progressive_dim) {
248
+ ProgressiveDimClustering clus(d, K, cp);
249
+ ProgressiveDimIndexFactory default_fac;
250
+ clus.train(
251
+ train_residuals.size() / d,
252
+ train_residuals.data(),
253
+ assign_index_factory ? *assign_index_factory : default_fac);
254
+ codebooks.swap(clus.centroids);
255
+ obj = clus.iteration_stats.back().obj;
256
+ } else {
257
+ FAISS_THROW_MSG("train type not supported");
258
+ }
259
+
260
+ memcpy(this->codebooks.data() + codebook_offsets[m] * d,
261
+ codebooks.data(),
262
+ codebooks.size() * sizeof(codebooks[0]));
263
+
264
+ // quantize using the new codebooks
265
+
266
+ int new_beam_size = std::min(cur_beam_size * K, max_beam_size);
267
+ std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
268
+ std::vector<float> new_residuals(n * new_beam_size * d);
269
+ std::vector<float> new_distances(n * new_beam_size);
270
+
271
+ beam_search_encode_step(
272
+ d,
273
+ K,
274
+ codebooks.data(),
275
+ n,
276
+ cur_beam_size,
277
+ residuals.data(),
278
+ m,
279
+ codes.data(),
280
+ new_beam_size,
281
+ new_codes.data(),
282
+ new_residuals.data(),
283
+ new_distances.data(),
284
+ assign_index.get());
285
+
286
+ codes.swap(new_codes);
287
+ residuals.swap(new_residuals);
288
+ distances.swap(new_distances);
289
+
290
+ float sum_distances = 0;
291
+ for (int j = 0; j < distances.size(); j++) {
292
+ sum_distances += distances[j];
293
+ }
294
+
295
+ if (verbose) {
296
+ printf("[%.3f s] train stage %d, %d bits, kmeans objective %g, "
297
+ "total distance %g, beam_size %d->%d\n",
298
+ (getmillisecs() - t0) / 1000,
299
+ m,
300
+ int(nbits[m]),
301
+ obj,
302
+ sum_distances,
303
+ cur_beam_size,
304
+ new_beam_size);
305
+ }
306
+ cur_beam_size = new_beam_size;
307
+ }
308
+
309
+ is_trained = true;
310
+ }
311
+
312
+ size_t ResidualQuantizer::memory_per_point(int beam_size) const {
313
+ if (beam_size < 0) {
314
+ beam_size = max_beam_size;
315
+ }
316
+ size_t mem;
317
+ mem = beam_size * d * 2 * sizeof(float); // size for 2 beams at a time
318
+ mem += beam_size * beam_size *
319
+ (sizeof(float) +
320
+ sizeof(Index::idx_t)); // size for 1 beam search result
321
+ return mem;
322
+ }
323
+
324
+ void ResidualQuantizer::compute_codes(
325
+ const float* x,
326
+ uint8_t* codes_out,
327
+ size_t n) const {
328
+ FAISS_THROW_IF_NOT_MSG(is_trained, "RQ is not trained yet.");
329
+
330
+ size_t mem = memory_per_point();
331
+ if (n > 1 && mem * n > max_mem_distances) {
332
+ // then split queries to reduce temp memory
333
+ size_t bs = max_mem_distances / mem;
334
+ if (bs == 0) {
335
+ bs = 1; // otherwise we can't do much
336
+ }
337
+ for (size_t i0 = 0; i0 < n; i0 += bs) {
338
+ size_t i1 = std::min(n, i0 + bs);
339
+ compute_codes(x + i0 * d, codes_out + i0 * code_size, i1 - i0);
340
+ }
341
+ return;
342
+ }
343
+
344
+ std::vector<float> residuals(max_beam_size * n * d);
345
+ std::vector<int32_t> codes(max_beam_size * M * n);
346
+ std::vector<float> distances(max_beam_size * n);
347
+
348
+ refine_beam(
349
+ n,
350
+ 1,
351
+ x,
352
+ max_beam_size,
353
+ codes.data(),
354
+ residuals.data(),
355
+ distances.data());
356
+
357
+ // pack only the first code of the beam (hence the ld_codes=M *
358
+ // max_beam_size)
359
+ pack_codes(n, codes.data(), codes_out, M * max_beam_size);
360
+ }
361
+
362
+ void ResidualQuantizer::refine_beam(
363
+ size_t n,
364
+ size_t beam_size,
365
+ const float* x,
366
+ int out_beam_size,
367
+ int32_t* out_codes,
368
+ float* out_residuals,
369
+ float* out_distances) const {
370
+ int cur_beam_size = beam_size;
371
+
372
+ std::vector<float> residuals(x, x + n * d * beam_size);
373
+ std::vector<int32_t> codes;
374
+ std::vector<float> distances;
375
+ double t0 = getmillisecs();
376
+
377
+ std::unique_ptr<Index> assign_index;
378
+ if (assign_index_factory) {
379
+ assign_index.reset((*assign_index_factory)(d));
380
+ } else {
381
+ assign_index.reset(new IndexFlatL2(d));
382
+ }
383
+
384
+ for (int m = 0; m < M; m++) {
385
+ int K = 1 << nbits[m];
386
+
387
+ const float* codebooks_m =
388
+ this->codebooks.data() + codebook_offsets[m] * d;
389
+
390
+ int new_beam_size = std::min(cur_beam_size * K, out_beam_size);
391
+
392
+ std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
393
+ std::vector<float> new_residuals(n * new_beam_size * d);
394
+ distances.resize(n * new_beam_size);
395
+
396
+ beam_search_encode_step(
397
+ d,
398
+ K,
399
+ codebooks_m,
400
+ n,
401
+ cur_beam_size,
402
+ residuals.data(),
403
+ m,
404
+ codes.data(),
405
+ new_beam_size,
406
+ new_codes.data(),
407
+ new_residuals.data(),
408
+ distances.data(),
409
+ assign_index.get());
410
+
411
+ assign_index->reset();
412
+
413
+ codes.swap(new_codes);
414
+ residuals.swap(new_residuals);
415
+
416
+ cur_beam_size = new_beam_size;
417
+
418
+ if (verbose) {
419
+ float sum_distances = 0;
420
+ for (int j = 0; j < distances.size(); j++) {
421
+ sum_distances += distances[j];
422
+ }
423
+ printf("[%.3f s] encode stage %d, %d bits, "
424
+ "total error %g, beam_size %d\n",
425
+ (getmillisecs() - t0) / 1000,
426
+ m,
427
+ int(nbits[m]),
428
+ sum_distances,
429
+ cur_beam_size);
430
+ }
431
+ }
432
+
433
+ if (out_codes) {
434
+ memcpy(out_codes, codes.data(), codes.size() * sizeof(codes[0]));
435
+ }
436
+ if (out_residuals) {
437
+ memcpy(out_residuals,
438
+ residuals.data(),
439
+ residuals.size() * sizeof(residuals[0]));
440
+ }
441
+ if (out_distances) {
442
+ memcpy(out_distances,
443
+ distances.data(),
444
+ distances.size() * sizeof(distances[0]));
445
+ }
446
+ }
447
+
448
+ } // namespace faiss
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstdint>
11
+ #include <vector>
12
+
13
+ #include <faiss/Clustering.h>
14
+ #include <faiss/impl/AdditiveQuantizer.h>
15
+
16
+ namespace faiss {
17
+
18
+ /** Residual quantizer with variable number of bits per sub-quantizer
19
+ *
20
+ * The residual centroids are stored in a big cumulative centroid table.
21
+ * The codes are represented either as a non-compact table of size (n, M) or
22
+ * as the compact output (n, code_size).
23
+ */
24
+
25
+ struct ResidualQuantizer : AdditiveQuantizer {
26
+ /// initialization
27
+ enum train_type_t {
28
+ Train_default, ///< regular k-means
29
+ Train_progressive_dim, ///< progressive dim clustering
30
+ };
31
+
32
+ // set this bit on train_type if beam is to be trained only on the
33
+ // first element of the beam (faster but less accurate)
34
+ static const int Train_top_beam = 1024;
35
+ train_type_t train_type;
36
+
37
+ /// beam size used for training and for encoding
38
+ int max_beam_size;
39
+
40
+ /// distance matrixes with beam search can get large, so use this
41
+ /// to batch computations at encoding time.
42
+ size_t max_mem_distances;
43
+
44
+ /// clustering parameters
45
+ ProgressiveDimClusteringParameters cp;
46
+
47
+ /// if non-NULL, use this index for assignment
48
+ ProgressiveDimIndexFactory* assign_index_factory;
49
+
50
+ ResidualQuantizer(size_t d, const std::vector<size_t>& nbits);
51
+
52
+ ResidualQuantizer(
53
+ size_t d, /* dimensionality of the input vectors */
54
+ size_t M, /* number of subquantizers */
55
+ size_t nbits); /* number of bit per subvector index */
56
+
57
+ ResidualQuantizer();
58
+
59
+ // Train the residual quantizer
60
+ void train(size_t n, const float* x) override;
61
+
62
+ /** Encode a set of vectors
63
+ *
64
+ * @param x vectors to encode, size n * d
65
+ * @param codes output codes, size n * code_size
66
+ */
67
+ void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
68
+
69
+ /** lower-level encode function
70
+ *
71
+ * @param n number of vectors to hanlde
72
+ * @param residuals vectors to encode, size (n, beam_size, d)
73
+ * @param beam_size input beam size
74
+ * @param new_beam_size output beam size (should be <= K * beam_size)
75
+ * @param new_codes output codes, size (n, new_beam_size, m + 1)
76
+ * @param new_residuals output residuals, size (n, new_beam_size, d)
77
+ * @param new_distances output distances, size (n, new_beam_size)
78
+ */
79
+ void refine_beam(
80
+ size_t n,
81
+ size_t beam_size,
82
+ const float* residuals,
83
+ int new_beam_size,
84
+ int32_t* new_codes,
85
+ float* new_residuals = nullptr,
86
+ float* new_distances = nullptr) const;
87
+
88
+ /** Beam search can consume a lot of memory. This function estimates the
89
+ * amount of mem used by refine_beam to adjust the batch size
90
+ *
91
+ * @param beam_size if != -1, override the beam size
92
+ */
93
+ size_t memory_per_point(int beam_size = -1) const;
94
+ };
95
+
96
+ /** Encode a residual by sampling from a centroid table.
97
+ *
98
+ * This is a single encoding step the residual quantizer.
99
+ * It allows low-level access to the encoding function, exposed mainly for unit
100
+ * tests.
101
+ *
102
+ * @param n number of vectors to hanlde
103
+ * @param residuals vectors to encode, size (n, beam_size, d)
104
+ * @param cent centroids, size (K, d)
105
+ * @param beam_size input beam size
106
+ * @param m size of the codes for the previous encoding steps
107
+ * @param codes code array for the previous steps of the beam (n,
108
+ * beam_size, m)
109
+ * @param new_beam_size output beam size (should be <= K * beam_size)
110
+ * @param new_codes output codes, size (n, new_beam_size, m + 1)
111
+ * @param new_residuals output residuals, size (n, new_beam_size, d)
112
+ * @param new_distances output distances, size (n, new_beam_size)
113
+ * @param assign_index if non-NULL, will be used to perform assignment
114
+ */
115
+ void beam_search_encode_step(
116
+ size_t d,
117
+ size_t K,
118
+ const float* cent,
119
+ size_t n,
120
+ size_t beam_size,
121
+ const float* residuals,
122
+ size_t m,
123
+ const int32_t* codes,
124
+ size_t new_beam_size,
125
+ int32_t* new_codes,
126
+ float* new_residuals,
127
+ float* new_distances,
128
+ Index* assign_index = nullptr);
129
+
130
+ }; // namespace faiss