faiss 0.1.7 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -0,0 +1,448 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include "faiss/impl/ResidualQuantizer.h"
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <faiss/impl/ResidualQuantizer.h>
13
+ #include "faiss/utils/utils.h"
14
+
15
+ #include <cstddef>
16
+ #include <cstdio>
17
+ #include <cstring>
18
+ #include <memory>
19
+
20
+ #include <algorithm>
21
+
22
+ #include <faiss/IndexFlat.h>
23
+ #include <faiss/VectorTransform.h>
24
+ #include <faiss/impl/AuxIndexStructures.h>
25
+ #include <faiss/impl/FaissAssert.h>
26
+ #include <faiss/utils/Heap.h>
27
+ #include <faiss/utils/distances.h>
28
+ #include <faiss/utils/hamming.h>
29
+ #include <faiss/utils/utils.h>
30
+
31
+ namespace faiss {
32
+
33
+ ResidualQuantizer::ResidualQuantizer()
34
+ : train_type(Train_progressive_dim),
35
+ max_beam_size(30),
36
+ max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
37
+ assign_index_factory(nullptr) {
38
+ d = 0;
39
+ M = 0;
40
+ verbose = false;
41
+ }
42
+
43
+ ResidualQuantizer::ResidualQuantizer(size_t d, const std::vector<size_t>& nbits)
44
+ : ResidualQuantizer() {
45
+ this->d = d;
46
+ M = nbits.size();
47
+ this->nbits = nbits;
48
+ set_derived_values();
49
+ }
50
+
51
+ ResidualQuantizer::ResidualQuantizer(size_t d, size_t M, size_t nbits)
52
+ : ResidualQuantizer(d, std::vector<size_t>(M, nbits)) {}
53
+
54
+ namespace {
55
+
56
+ void fvec_sub(size_t d, const float* a, const float* b, float* c) {
57
+ for (size_t i = 0; i < d; i++) {
58
+ c[i] = a[i] - b[i];
59
+ }
60
+ }
61
+
62
+ } // anonymous namespace
63
+
64
+ void beam_search_encode_step(
65
+ size_t d,
66
+ size_t K,
67
+ const float* cent, /// size (K, d)
68
+ size_t n,
69
+ size_t beam_size,
70
+ const float* residuals, /// size (n, beam_size, d)
71
+ size_t m,
72
+ const int32_t* codes, /// size (n, beam_size, m)
73
+ size_t new_beam_size,
74
+ int32_t* new_codes, /// size (n, new_beam_size, m + 1)
75
+ float* new_residuals, /// size (n, new_beam_size, d)
76
+ float* new_distances, /// size (n, new_beam_size)
77
+ Index* assign_index) {
78
+ // we have to fill in the whole output matrix
79
+ FAISS_THROW_IF_NOT(new_beam_size <= beam_size * K);
80
+
81
+ using idx_t = Index::idx_t;
82
+
83
+ std::vector<float> cent_distances;
84
+ std::vector<idx_t> cent_ids;
85
+
86
+ if (assign_index) {
87
+ // search beam_size distances per query
88
+ FAISS_THROW_IF_NOT(assign_index->d == d);
89
+ cent_distances.resize(n * beam_size * new_beam_size);
90
+ cent_ids.resize(n * beam_size * new_beam_size);
91
+ if (assign_index->ntotal != 0) {
92
+ // then we assume the codebooks are already added to the index
93
+ FAISS_THROW_IF_NOT(assign_index->ntotal != K);
94
+ } else {
95
+ assign_index->add(K, cent);
96
+ }
97
+
98
+ // printf("beam_search_encode_step -- mem usage %zd\n",
99
+ // get_mem_usage_kb());
100
+ assign_index->search(
101
+ n * beam_size,
102
+ residuals,
103
+ new_beam_size,
104
+ cent_distances.data(),
105
+ cent_ids.data());
106
+ } else {
107
+ // do one big distance computation
108
+ cent_distances.resize(n * beam_size * K);
109
+ pairwise_L2sqr(
110
+ d, n * beam_size, residuals, K, cent, cent_distances.data());
111
+ }
112
+ InterruptCallback::check();
113
+
114
+ #pragma omp parallel for if (n > 100)
115
+ for (int64_t i = 0; i < n; i++) {
116
+ const int32_t* codes_i = codes + i * m * beam_size;
117
+ int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
118
+ const float* residuals_i = residuals + i * d * beam_size;
119
+ float* new_residuals_i = new_residuals + i * d * new_beam_size;
120
+
121
+ float* new_distances_i = new_distances + i * new_beam_size;
122
+ using C = CMax<float, int>;
123
+
124
+ if (assign_index) {
125
+ const float* cent_distances_i =
126
+ cent_distances.data() + i * beam_size * new_beam_size;
127
+ const idx_t* cent_ids_i =
128
+ cent_ids.data() + i * beam_size * new_beam_size;
129
+
130
+ // here we could be a tad more efficient by merging sorted arrays
131
+ for (int i = 0; i < new_beam_size; i++) {
132
+ new_distances_i[i] = C::neutral();
133
+ }
134
+ std::vector<int> perm(new_beam_size, -1);
135
+ heap_addn<C>(
136
+ new_beam_size,
137
+ new_distances_i,
138
+ perm.data(),
139
+ cent_distances_i,
140
+ nullptr,
141
+ beam_size * new_beam_size);
142
+ heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
143
+
144
+ for (int j = 0; j < new_beam_size; j++) {
145
+ int js = perm[j] / new_beam_size;
146
+ int ls = cent_ids_i[perm[j]];
147
+ if (m > 0) {
148
+ memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
149
+ }
150
+ new_codes_i[m] = ls;
151
+ new_codes_i += m + 1;
152
+ fvec_sub(
153
+ d,
154
+ residuals_i + js * d,
155
+ cent + ls * d,
156
+ new_residuals_i);
157
+ new_residuals_i += d;
158
+ }
159
+
160
+ } else {
161
+ const float* cent_distances_i =
162
+ cent_distances.data() + i * beam_size * K;
163
+ // then we have to select the best results
164
+ for (int i = 0; i < new_beam_size; i++) {
165
+ new_distances_i[i] = C::neutral();
166
+ }
167
+ std::vector<int> perm(new_beam_size, -1);
168
+ heap_addn<C>(
169
+ new_beam_size,
170
+ new_distances_i,
171
+ perm.data(),
172
+ cent_distances_i,
173
+ nullptr,
174
+ beam_size * K);
175
+ heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
176
+
177
+ for (int j = 0; j < new_beam_size; j++) {
178
+ int js = perm[j] / K;
179
+ int ls = perm[j] % K;
180
+ if (m > 0) {
181
+ memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
182
+ }
183
+ new_codes_i[m] = ls;
184
+ new_codes_i += m + 1;
185
+ fvec_sub(
186
+ d,
187
+ residuals_i + js * d,
188
+ cent + ls * d,
189
+ new_residuals_i);
190
+ new_residuals_i += d;
191
+ }
192
+ }
193
+ }
194
+ }
195
+
196
+ void ResidualQuantizer::train(size_t n, const float* x) {
197
+ codebooks.resize(d * codebook_offsets.back());
198
+
199
+ if (verbose) {
200
+ printf("Training ResidualQuantizer, with %zd steps on %zd %zdD vectors\n",
201
+ M,
202
+ n,
203
+ size_t(d));
204
+ }
205
+
206
+ int cur_beam_size = 1;
207
+ std::vector<float> residuals(x, x + n * d);
208
+ std::vector<int32_t> codes;
209
+ std::vector<float> distances;
210
+ double t0 = getmillisecs();
211
+
212
+ for (int m = 0; m < M; m++) {
213
+ int K = 1 << nbits[m];
214
+
215
+ // on which residuals to train
216
+ std::vector<float>& train_residuals = residuals;
217
+ std::vector<float> residuals1;
218
+ if (train_type & Train_top_beam) {
219
+ residuals1.resize(n * d);
220
+ for (size_t j = 0; j < n; j++) {
221
+ memcpy(residuals1.data() + j * d,
222
+ residuals.data() + j * d * cur_beam_size,
223
+ sizeof(residuals[0]) * d);
224
+ }
225
+ train_residuals = residuals1;
226
+ }
227
+ train_type_t tt = train_type_t(train_type & ~Train_top_beam);
228
+
229
+ std::vector<float> codebooks;
230
+ float obj = 0;
231
+
232
+ std::unique_ptr<Index> assign_index;
233
+ if (assign_index_factory) {
234
+ assign_index.reset((*assign_index_factory)(d));
235
+ } else {
236
+ assign_index.reset(new IndexFlatL2(d));
237
+ }
238
+ if (tt == Train_default) {
239
+ Clustering clus(d, K, cp);
240
+ clus.train(
241
+ train_residuals.size() / d,
242
+ train_residuals.data(),
243
+ *assign_index.get());
244
+ codebooks.swap(clus.centroids);
245
+ assign_index->reset();
246
+ obj = clus.iteration_stats.back().obj;
247
+ } else if (tt == Train_progressive_dim) {
248
+ ProgressiveDimClustering clus(d, K, cp);
249
+ ProgressiveDimIndexFactory default_fac;
250
+ clus.train(
251
+ train_residuals.size() / d,
252
+ train_residuals.data(),
253
+ assign_index_factory ? *assign_index_factory : default_fac);
254
+ codebooks.swap(clus.centroids);
255
+ obj = clus.iteration_stats.back().obj;
256
+ } else {
257
+ FAISS_THROW_MSG("train type not supported");
258
+ }
259
+
260
+ memcpy(this->codebooks.data() + codebook_offsets[m] * d,
261
+ codebooks.data(),
262
+ codebooks.size() * sizeof(codebooks[0]));
263
+
264
+ // quantize using the new codebooks
265
+
266
+ int new_beam_size = std::min(cur_beam_size * K, max_beam_size);
267
+ std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
268
+ std::vector<float> new_residuals(n * new_beam_size * d);
269
+ std::vector<float> new_distances(n * new_beam_size);
270
+
271
+ beam_search_encode_step(
272
+ d,
273
+ K,
274
+ codebooks.data(),
275
+ n,
276
+ cur_beam_size,
277
+ residuals.data(),
278
+ m,
279
+ codes.data(),
280
+ new_beam_size,
281
+ new_codes.data(),
282
+ new_residuals.data(),
283
+ new_distances.data(),
284
+ assign_index.get());
285
+
286
+ codes.swap(new_codes);
287
+ residuals.swap(new_residuals);
288
+ distances.swap(new_distances);
289
+
290
+ float sum_distances = 0;
291
+ for (int j = 0; j < distances.size(); j++) {
292
+ sum_distances += distances[j];
293
+ }
294
+
295
+ if (verbose) {
296
+ printf("[%.3f s] train stage %d, %d bits, kmeans objective %g, "
297
+ "total distance %g, beam_size %d->%d\n",
298
+ (getmillisecs() - t0) / 1000,
299
+ m,
300
+ int(nbits[m]),
301
+ obj,
302
+ sum_distances,
303
+ cur_beam_size,
304
+ new_beam_size);
305
+ }
306
+ cur_beam_size = new_beam_size;
307
+ }
308
+
309
+ is_trained = true;
310
+ }
311
+
312
+ size_t ResidualQuantizer::memory_per_point(int beam_size) const {
313
+ if (beam_size < 0) {
314
+ beam_size = max_beam_size;
315
+ }
316
+ size_t mem;
317
+ mem = beam_size * d * 2 * sizeof(float); // size for 2 beams at a time
318
+ mem += beam_size * beam_size *
319
+ (sizeof(float) +
320
+ sizeof(Index::idx_t)); // size for 1 beam search result
321
+ return mem;
322
+ }
323
+
324
+ void ResidualQuantizer::compute_codes(
325
+ const float* x,
326
+ uint8_t* codes_out,
327
+ size_t n) const {
328
+ FAISS_THROW_IF_NOT_MSG(is_trained, "RQ is not trained yet.");
329
+
330
+ size_t mem = memory_per_point();
331
+ if (n > 1 && mem * n > max_mem_distances) {
332
+ // then split queries to reduce temp memory
333
+ size_t bs = max_mem_distances / mem;
334
+ if (bs == 0) {
335
+ bs = 1; // otherwise we can't do much
336
+ }
337
+ for (size_t i0 = 0; i0 < n; i0 += bs) {
338
+ size_t i1 = std::min(n, i0 + bs);
339
+ compute_codes(x + i0 * d, codes_out + i0 * code_size, i1 - i0);
340
+ }
341
+ return;
342
+ }
343
+
344
+ std::vector<float> residuals(max_beam_size * n * d);
345
+ std::vector<int32_t> codes(max_beam_size * M * n);
346
+ std::vector<float> distances(max_beam_size * n);
347
+
348
+ refine_beam(
349
+ n,
350
+ 1,
351
+ x,
352
+ max_beam_size,
353
+ codes.data(),
354
+ residuals.data(),
355
+ distances.data());
356
+
357
+ // pack only the first code of the beam (hence the ld_codes=M *
358
+ // max_beam_size)
359
+ pack_codes(n, codes.data(), codes_out, M * max_beam_size);
360
+ }
361
+
362
+ void ResidualQuantizer::refine_beam(
363
+ size_t n,
364
+ size_t beam_size,
365
+ const float* x,
366
+ int out_beam_size,
367
+ int32_t* out_codes,
368
+ float* out_residuals,
369
+ float* out_distances) const {
370
+ int cur_beam_size = beam_size;
371
+
372
+ std::vector<float> residuals(x, x + n * d * beam_size);
373
+ std::vector<int32_t> codes;
374
+ std::vector<float> distances;
375
+ double t0 = getmillisecs();
376
+
377
+ std::unique_ptr<Index> assign_index;
378
+ if (assign_index_factory) {
379
+ assign_index.reset((*assign_index_factory)(d));
380
+ } else {
381
+ assign_index.reset(new IndexFlatL2(d));
382
+ }
383
+
384
+ for (int m = 0; m < M; m++) {
385
+ int K = 1 << nbits[m];
386
+
387
+ const float* codebooks_m =
388
+ this->codebooks.data() + codebook_offsets[m] * d;
389
+
390
+ int new_beam_size = std::min(cur_beam_size * K, out_beam_size);
391
+
392
+ std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
393
+ std::vector<float> new_residuals(n * new_beam_size * d);
394
+ distances.resize(n * new_beam_size);
395
+
396
+ beam_search_encode_step(
397
+ d,
398
+ K,
399
+ codebooks_m,
400
+ n,
401
+ cur_beam_size,
402
+ residuals.data(),
403
+ m,
404
+ codes.data(),
405
+ new_beam_size,
406
+ new_codes.data(),
407
+ new_residuals.data(),
408
+ distances.data(),
409
+ assign_index.get());
410
+
411
+ assign_index->reset();
412
+
413
+ codes.swap(new_codes);
414
+ residuals.swap(new_residuals);
415
+
416
+ cur_beam_size = new_beam_size;
417
+
418
+ if (verbose) {
419
+ float sum_distances = 0;
420
+ for (int j = 0; j < distances.size(); j++) {
421
+ sum_distances += distances[j];
422
+ }
423
+ printf("[%.3f s] encode stage %d, %d bits, "
424
+ "total error %g, beam_size %d\n",
425
+ (getmillisecs() - t0) / 1000,
426
+ m,
427
+ int(nbits[m]),
428
+ sum_distances,
429
+ cur_beam_size);
430
+ }
431
+ }
432
+
433
+ if (out_codes) {
434
+ memcpy(out_codes, codes.data(), codes.size() * sizeof(codes[0]));
435
+ }
436
+ if (out_residuals) {
437
+ memcpy(out_residuals,
438
+ residuals.data(),
439
+ residuals.size() * sizeof(residuals[0]));
440
+ }
441
+ if (out_distances) {
442
+ memcpy(out_distances,
443
+ distances.data(),
444
+ distances.size() * sizeof(distances[0]));
445
+ }
446
+ }
447
+
448
+ } // namespace faiss
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstdint>
11
+ #include <vector>
12
+
13
+ #include <faiss/Clustering.h>
14
+ #include <faiss/impl/AdditiveQuantizer.h>
15
+
16
+ namespace faiss {
17
+
18
+ /** Residual quantizer with variable number of bits per sub-quantizer
19
+ *
20
+ * The residual centroids are stored in a big cumulative centroid table.
21
+ * The codes are represented either as a non-compact table of size (n, M) or
22
+ * as the compact output (n, code_size).
23
+ */
24
+
25
+ struct ResidualQuantizer : AdditiveQuantizer {
26
+ /// initialization
27
+ enum train_type_t {
28
+ Train_default, ///< regular k-means
29
+ Train_progressive_dim, ///< progressive dim clustering
30
+ };
31
+
32
+ // set this bit on train_type if beam is to be trained only on the
33
+ // first element of the beam (faster but less accurate)
34
+ static const int Train_top_beam = 1024;
35
+ train_type_t train_type;
36
+
37
+ /// beam size used for training and for encoding
38
+ int max_beam_size;
39
+
40
+ /// distance matrixes with beam search can get large, so use this
41
+ /// to batch computations at encoding time.
42
+ size_t max_mem_distances;
43
+
44
+ /// clustering parameters
45
+ ProgressiveDimClusteringParameters cp;
46
+
47
+ /// if non-NULL, use this index for assignment
48
+ ProgressiveDimIndexFactory* assign_index_factory;
49
+
50
+ ResidualQuantizer(size_t d, const std::vector<size_t>& nbits);
51
+
52
+ ResidualQuantizer(
53
+ size_t d, /* dimensionality of the input vectors */
54
+ size_t M, /* number of subquantizers */
55
+ size_t nbits); /* number of bit per subvector index */
56
+
57
+ ResidualQuantizer();
58
+
59
+ // Train the residual quantizer
60
+ void train(size_t n, const float* x) override;
61
+
62
+ /** Encode a set of vectors
63
+ *
64
+ * @param x vectors to encode, size n * d
65
+ * @param codes output codes, size n * code_size
66
+ */
67
+ void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
68
+
69
+ /** lower-level encode function
70
+ *
71
+ * @param n number of vectors to hanlde
72
+ * @param residuals vectors to encode, size (n, beam_size, d)
73
+ * @param beam_size input beam size
74
+ * @param new_beam_size output beam size (should be <= K * beam_size)
75
+ * @param new_codes output codes, size (n, new_beam_size, m + 1)
76
+ * @param new_residuals output residuals, size (n, new_beam_size, d)
77
+ * @param new_distances output distances, size (n, new_beam_size)
78
+ */
79
+ void refine_beam(
80
+ size_t n,
81
+ size_t beam_size,
82
+ const float* residuals,
83
+ int new_beam_size,
84
+ int32_t* new_codes,
85
+ float* new_residuals = nullptr,
86
+ float* new_distances = nullptr) const;
87
+
88
+ /** Beam search can consume a lot of memory. This function estimates the
89
+ * amount of mem used by refine_beam to adjust the batch size
90
+ *
91
+ * @param beam_size if != -1, override the beam size
92
+ */
93
+ size_t memory_per_point(int beam_size = -1) const;
94
+ };
95
+
96
+ /** Encode a residual by sampling from a centroid table.
97
+ *
98
+ * This is a single encoding step the residual quantizer.
99
+ * It allows low-level access to the encoding function, exposed mainly for unit
100
+ * tests.
101
+ *
102
+ * @param n number of vectors to hanlde
103
+ * @param residuals vectors to encode, size (n, beam_size, d)
104
+ * @param cent centroids, size (K, d)
105
+ * @param beam_size input beam size
106
+ * @param m size of the codes for the previous encoding steps
107
+ * @param codes code array for the previous steps of the beam (n,
108
+ * beam_size, m)
109
+ * @param new_beam_size output beam size (should be <= K * beam_size)
110
+ * @param new_codes output codes, size (n, new_beam_size, m + 1)
111
+ * @param new_residuals output residuals, size (n, new_beam_size, d)
112
+ * @param new_distances output distances, size (n, new_beam_size)
113
+ * @param assign_index if non-NULL, will be used to perform assignment
114
+ */
115
+ void beam_search_encode_step(
116
+ size_t d,
117
+ size_t K,
118
+ const float* cent,
119
+ size_t n,
120
+ size_t beam_size,
121
+ const float* residuals,
122
+ size_t m,
123
+ const int32_t* codes,
124
+ size_t new_beam_size,
125
+ int32_t* new_codes,
126
+ float* new_residuals,
127
+ float* new_distances,
128
+ Index* assign_index = nullptr);
129
+
130
+ }; // namespace faiss