faiss 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -0,0 +1,229 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexNNDescent.h>
11
+
12
+ #include <omp.h>
13
+
14
+ #include <cinttypes>
15
+ #include <cstdio>
16
+ #include <cstdlib>
17
+
18
+ #include <queue>
19
+ #include <unordered_set>
20
+
21
+ #ifdef __SSE__
22
+ #endif
23
+
24
+ #include <faiss/IndexFlat.h>
25
+ #include <faiss/impl/AuxIndexStructures.h>
26
+ #include <faiss/impl/FaissAssert.h>
27
+ #include <faiss/utils/Heap.h>
28
+ #include <faiss/utils/distances.h>
29
+ #include <faiss/utils/random.h>
30
+
31
+ extern "C" {
32
+
33
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
34
+
35
+ int sgemm_(
36
+ const char* transa,
37
+ const char* transb,
38
+ FINTEGER* m,
39
+ FINTEGER* n,
40
+ FINTEGER* k,
41
+ const float* alpha,
42
+ const float* a,
43
+ FINTEGER* lda,
44
+ const float* b,
45
+ FINTEGER* ldb,
46
+ float* beta,
47
+ float* c,
48
+ FINTEGER* ldc);
49
+ }
50
+
51
+ namespace faiss {
52
+
53
+ using idx_t = Index::idx_t;
54
+ using storage_idx_t = NNDescent::storage_idx_t;
55
+
56
+ /**************************************************************
57
+ * add / search blocks of descriptors
58
+ **************************************************************/
59
+
60
+ namespace {
61
+
62
+ /* Wrap the distance computer into one that negates the
63
+ distances. This makes supporting INNER_PRODUCE search easier */
64
+
65
+ struct NegativeDistanceComputer : DistanceComputer {
66
+ /// owned by this
67
+ DistanceComputer* basedis;
68
+
69
+ explicit NegativeDistanceComputer(DistanceComputer* basedis)
70
+ : basedis(basedis) {}
71
+
72
+ void set_query(const float* x) override {
73
+ basedis->set_query(x);
74
+ }
75
+
76
+ /// compute distance of vector i to current query
77
+ float operator()(idx_t i) override {
78
+ return -(*basedis)(i);
79
+ }
80
+
81
+ /// compute distance between two stored vectors
82
+ float symmetric_dis(idx_t i, idx_t j) override {
83
+ return -basedis->symmetric_dis(i, j);
84
+ }
85
+
86
+ ~NegativeDistanceComputer() override {
87
+ delete basedis;
88
+ }
89
+ };
90
+
91
+ DistanceComputer* storage_distance_computer(const Index* storage) {
92
+ if (storage->metric_type == METRIC_INNER_PRODUCT) {
93
+ return new NegativeDistanceComputer(storage->get_distance_computer());
94
+ } else {
95
+ return storage->get_distance_computer();
96
+ }
97
+ }
98
+
99
+ } // namespace
100
+
101
+ /**************************************************************
102
+ * IndexNNDescent implementation
103
+ **************************************************************/
104
+
105
+ IndexNNDescent::IndexNNDescent(int d, int K, MetricType metric)
106
+ : Index(d, metric),
107
+ nndescent(d, K),
108
+ own_fields(false),
109
+ storage(nullptr) {}
110
+
111
+ IndexNNDescent::IndexNNDescent(Index* storage, int K)
112
+ : Index(storage->d, storage->metric_type),
113
+ nndescent(storage->d, K),
114
+ own_fields(false),
115
+ storage(storage) {}
116
+
117
+ IndexNNDescent::~IndexNNDescent() {
118
+ if (own_fields) {
119
+ delete storage;
120
+ }
121
+ }
122
+
123
+ void IndexNNDescent::train(idx_t n, const float* x) {
124
+ FAISS_THROW_IF_NOT_MSG(
125
+ storage,
126
+ "Please use IndexNNDescentFlat (or variants) "
127
+ "instead of IndexNNDescent directly");
128
+ // nndescent structure does not require training
129
+ storage->train(n, x);
130
+ is_trained = true;
131
+ }
132
+
133
+ void IndexNNDescent::search(
134
+ idx_t n,
135
+ const float* x,
136
+ idx_t k,
137
+ float* distances,
138
+ idx_t* labels) const
139
+
140
+ {
141
+ FAISS_THROW_IF_NOT_MSG(
142
+ storage,
143
+ "Please use IndexNNDescentFlat (or variants) "
144
+ "instead of IndexNNDescent directly");
145
+ if (verbose) {
146
+ printf("Parameters: k=%" PRId64 ", search_L=%d\n",
147
+ k,
148
+ nndescent.search_L);
149
+ }
150
+
151
+ idx_t check_period =
152
+ InterruptCallback::get_period_hint(d * nndescent.search_L);
153
+
154
+ for (idx_t i0 = 0; i0 < n; i0 += check_period) {
155
+ idx_t i1 = std::min(i0 + check_period, n);
156
+
157
+ #pragma omp parallel
158
+ {
159
+ VisitedTable vt(ntotal);
160
+
161
+ DistanceComputer* dis = storage_distance_computer(storage);
162
+ ScopeDeleter1<DistanceComputer> del(dis);
163
+
164
+ #pragma omp for
165
+ for (idx_t i = i0; i < i1; i++) {
166
+ idx_t* idxi = labels + i * k;
167
+ float* simi = distances + i * k;
168
+ dis->set_query(x + i * d);
169
+
170
+ nndescent.search(*dis, k, idxi, simi, vt);
171
+ }
172
+ }
173
+ InterruptCallback::check();
174
+ }
175
+
176
+ if (metric_type == METRIC_INNER_PRODUCT) {
177
+ // we need to revert the negated distances
178
+ for (size_t i = 0; i < k * n; i++) {
179
+ distances[i] = -distances[i];
180
+ }
181
+ }
182
+ }
183
+
184
+ void IndexNNDescent::add(idx_t n, const float* x) {
185
+ FAISS_THROW_IF_NOT_MSG(
186
+ storage,
187
+ "Please use IndexNNDescentFlat (or variants) "
188
+ "instead of IndexNNDescent directly");
189
+ FAISS_THROW_IF_NOT(is_trained);
190
+
191
+ if (ntotal != 0) {
192
+ fprintf(stderr,
193
+ "WARNING NNDescent doest not support dynamic insertions,"
194
+ "multiple insertions would lead to re-building the index");
195
+ }
196
+
197
+ storage->add(n, x);
198
+ ntotal = storage->ntotal;
199
+
200
+ DistanceComputer* dis = storage_distance_computer(storage);
201
+ ScopeDeleter1<DistanceComputer> del(dis);
202
+ nndescent.build(*dis, ntotal, verbose);
203
+ }
204
+
205
+ void IndexNNDescent::reset() {
206
+ nndescent.reset();
207
+ storage->reset();
208
+ ntotal = 0;
209
+ }
210
+
211
+ void IndexNNDescent::reconstruct(idx_t key, float* recons) const {
212
+ storage->reconstruct(key, recons);
213
+ }
214
+
215
+ /**************************************************************
216
+ * IndexNNDescentFlat implementation
217
+ **************************************************************/
218
+
219
+ IndexNNDescentFlat::IndexNNDescentFlat() {
220
+ is_trained = true;
221
+ }
222
+
223
+ IndexNNDescentFlat::IndexNNDescentFlat(int d, int M, MetricType metric)
224
+ : IndexNNDescent(new IndexFlat(d, metric), M) {
225
+ own_fields = true;
226
+ is_trained = true;
227
+ }
228
+
229
+ } // namespace faiss
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <vector>
13
+
14
+ #include <faiss/IndexFlat.h>
15
+ #include <faiss/impl/NNDescent.h>
16
+ #include <faiss/utils/utils.h>
17
+
18
+ namespace faiss {
19
+
20
+ /** The NNDescent index is a normal random-access index with an NNDescent
21
+ * link structure built on top */
22
+
23
+ struct IndexNNDescent : Index {
24
+ // internal storage of vectors (32 bits)
25
+ using storage_idx_t = NNDescent::storage_idx_t;
26
+
27
+ /// Faiss results are 64-bit
28
+ using idx_t = Index::idx_t;
29
+
30
+ // the link strcuture
31
+ NNDescent nndescent;
32
+
33
+ // the sequential storage
34
+ bool own_fields;
35
+ Index* storage;
36
+
37
+ explicit IndexNNDescent(
38
+ int d = 0,
39
+ int K = 32,
40
+ MetricType metric = METRIC_L2);
41
+ explicit IndexNNDescent(Index* storage, int K = 32);
42
+
43
+ ~IndexNNDescent() override;
44
+
45
+ void add(idx_t n, const float* x) override;
46
+
47
+ /// Trains the storage if needed
48
+ void train(idx_t n, const float* x) override;
49
+
50
+ /// entry point for search
51
+ void search(
52
+ idx_t n,
53
+ const float* x,
54
+ idx_t k,
55
+ float* distances,
56
+ idx_t* labels) const override;
57
+
58
+ void reconstruct(idx_t key, float* recons) const override;
59
+
60
+ void reset() override;
61
+ };
62
+
63
+ /** Flat index topped with with a NNDescent structure to access elements
64
+ * more efficiently.
65
+ */
66
+
67
+ struct IndexNNDescentFlat : IndexNNDescent {
68
+ IndexNNDescentFlat();
69
+ IndexNNDescentFlat(int d, int K, MetricType metric = METRIC_L2);
70
+ };
71
+
72
+ } // namespace faiss
@@ -0,0 +1,301 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexNSG.h>
11
+
12
+ #include <omp.h>
13
+
14
+ #include <cinttypes>
15
+ #include <memory>
16
+
17
+ #include <faiss/IndexFlat.h>
18
+ #include <faiss/IndexNNDescent.h>
19
+ #include <faiss/impl/AuxIndexStructures.h>
20
+ #include <faiss/impl/FaissAssert.h>
21
+ #include <faiss/utils/Heap.h>
22
+ #include <faiss/utils/distances.h>
23
+
24
+ namespace faiss {
25
+
26
+ using idx_t = Index::idx_t;
27
+ using namespace nsg;
28
+
29
+ /**************************************************************
30
+ * IndexNSG implementation
31
+ **************************************************************/
32
+
33
+ IndexNSG::IndexNSG(int d, int R, MetricType metric)
34
+ : Index(d, metric),
35
+ nsg(R),
36
+ own_fields(false),
37
+ storage(nullptr),
38
+ is_built(false),
39
+ GK(64),
40
+ build_type(0) {
41
+ nndescent_S = 10;
42
+ nndescent_R = 100;
43
+ nndescent_L = GK + 50;
44
+ nndescent_iter = 10;
45
+ }
46
+
47
+ IndexNSG::IndexNSG(Index* storage, int R)
48
+ : Index(storage->d, storage->metric_type),
49
+ nsg(R),
50
+ own_fields(false),
51
+ storage(storage),
52
+ is_built(false),
53
+ GK(64),
54
+ build_type(1) {
55
+ nndescent_S = 10;
56
+ nndescent_R = 100;
57
+ nndescent_L = GK + 50;
58
+ nndescent_iter = 10;
59
+ }
60
+
61
+ IndexNSG::~IndexNSG() {
62
+ if (own_fields) {
63
+ delete storage;
64
+ }
65
+ }
66
+
67
+ void IndexNSG::train(idx_t n, const float* x) {
68
+ FAISS_THROW_IF_NOT_MSG(
69
+ storage,
70
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
71
+ // nsg structure does not require training
72
+ storage->train(n, x);
73
+ is_trained = true;
74
+ }
75
+
76
+ void IndexNSG::search(
77
+ idx_t n,
78
+ const float* x,
79
+ idx_t k,
80
+ float* distances,
81
+ idx_t* labels) const
82
+
83
+ {
84
+ FAISS_THROW_IF_NOT_MSG(
85
+ storage,
86
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
87
+
88
+ int L = std::max(nsg.search_L, (int)k); // in case of search L = -1
89
+ idx_t check_period = InterruptCallback::get_period_hint(d * L);
90
+
91
+ for (idx_t i0 = 0; i0 < n; i0 += check_period) {
92
+ idx_t i1 = std::min(i0 + check_period, n);
93
+
94
+ #pragma omp parallel
95
+ {
96
+ VisitedTable vt(ntotal);
97
+
98
+ DistanceComputer* dis = storage_distance_computer(storage);
99
+ ScopeDeleter1<DistanceComputer> del(dis);
100
+
101
+ #pragma omp for
102
+ for (idx_t i = i0; i < i1; i++) {
103
+ idx_t* idxi = labels + i * k;
104
+ float* simi = distances + i * k;
105
+ dis->set_query(x + i * d);
106
+
107
+ nsg.search(*dis, k, idxi, simi, vt);
108
+
109
+ vt.advance();
110
+ }
111
+ }
112
+ InterruptCallback::check();
113
+ }
114
+
115
+ if (metric_type == METRIC_INNER_PRODUCT) {
116
+ // we need to revert the negated distances
117
+ for (size_t i = 0; i < k * n; i++) {
118
+ distances[i] = -distances[i];
119
+ }
120
+ }
121
+ }
122
+
123
+ void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
124
+ FAISS_THROW_IF_NOT_MSG(
125
+ storage,
126
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
127
+ FAISS_THROW_IF_NOT_MSG(
128
+ !is_built && ntotal == 0, "The IndexNSG is already built");
129
+
130
+ storage->add(n, x);
131
+ ntotal = storage->ntotal;
132
+
133
+ // check the knn graph
134
+ check_knn_graph(knn_graph, n, GK);
135
+
136
+ const nsg::Graph<idx_t> knng(knn_graph, n, GK);
137
+ nsg.build(storage, n, knng, verbose);
138
+ is_built = true;
139
+ }
140
+
141
+ void IndexNSG::add(idx_t n, const float* x) {
142
+ FAISS_THROW_IF_NOT_MSG(
143
+ storage,
144
+ "Please use IndexNSGFlat (or variants) "
145
+ "instead of IndexNSG directly");
146
+ FAISS_THROW_IF_NOT(is_trained);
147
+
148
+ FAISS_THROW_IF_NOT_MSG(
149
+ !is_built && ntotal == 0,
150
+ "NSG does not support incremental addition");
151
+
152
+ std::vector<idx_t> knng;
153
+ if (verbose) {
154
+ printf("IndexNSG::add %zd vectors\n", size_t(n));
155
+ }
156
+
157
+ if (build_type == 0) { // build with brute force search
158
+
159
+ if (verbose) {
160
+ printf(" Build knn graph with brute force search on storage index\n");
161
+ }
162
+
163
+ storage->add(n, x);
164
+ ntotal = storage->ntotal;
165
+ FAISS_THROW_IF_NOT(ntotal == n);
166
+
167
+ knng.resize(ntotal * (GK + 1));
168
+ storage->assign(ntotal, x, knng.data(), GK + 1);
169
+
170
+ // Remove itself
171
+ // - For metric distance, we just need to remove the first neighbor
172
+ // - But for non-metric, e.g. inner product, we need to check
173
+ // - each neighbor
174
+ if (storage->metric_type == METRIC_INNER_PRODUCT) {
175
+ for (idx_t i = 0; i < ntotal; i++) {
176
+ int count = 0;
177
+ for (int j = 0; j < GK + 1; j++) {
178
+ idx_t id = knng[i * (GK + 1) + j];
179
+ if (id != i) {
180
+ knng[i * GK + count] = id;
181
+ count += 1;
182
+ }
183
+ if (count == GK) {
184
+ break;
185
+ }
186
+ }
187
+ }
188
+ } else {
189
+ for (idx_t i = 0; i < ntotal; i++) {
190
+ memmove(knng.data() + i * GK,
191
+ knng.data() + i * (GK + 1) + 1,
192
+ GK * sizeof(idx_t));
193
+ }
194
+ }
195
+
196
+ } else if (build_type == 1) { // build with NNDescent
197
+ IndexNNDescent index(storage, GK);
198
+ index.nndescent.S = nndescent_S;
199
+ index.nndescent.R = nndescent_R;
200
+ index.nndescent.L = std::max(nndescent_L, GK + 50);
201
+ index.nndescent.iter = nndescent_iter;
202
+ index.verbose = verbose;
203
+
204
+ if (verbose) {
205
+ printf(" Build knn graph with NNdescent S=%d R=%d L=%d niter=%d\n",
206
+ index.nndescent.S,
207
+ index.nndescent.R,
208
+ index.nndescent.L,
209
+ index.nndescent.iter);
210
+ }
211
+
212
+ // prevent IndexNSG from deleting the storage
213
+ index.own_fields = false;
214
+
215
+ index.add(n, x);
216
+
217
+ // storage->add is already implicit called in IndexNSG.add
218
+ ntotal = storage->ntotal;
219
+ FAISS_THROW_IF_NOT(ntotal == n);
220
+
221
+ knng.resize(ntotal * GK);
222
+
223
+ // cast from idx_t to int
224
+ const int* knn_graph = index.nndescent.final_graph.data();
225
+ #pragma omp parallel for
226
+ for (idx_t i = 0; i < ntotal * GK; i++) {
227
+ knng[i] = knn_graph[i];
228
+ }
229
+ } else {
230
+ FAISS_THROW_MSG("build_type should be 0 or 1");
231
+ }
232
+
233
+ if (verbose) {
234
+ printf(" Check the knn graph\n");
235
+ }
236
+
237
+ // check the knn graph
238
+ check_knn_graph(knng.data(), n, GK);
239
+
240
+ if (verbose) {
241
+ printf(" nsg building\n");
242
+ }
243
+
244
+ const nsg::Graph<idx_t> knn_graph(knng.data(), n, GK);
245
+ nsg.build(storage, n, knn_graph, verbose);
246
+ is_built = true;
247
+ }
248
+
249
+ void IndexNSG::reset() {
250
+ nsg.reset();
251
+ storage->reset();
252
+ ntotal = 0;
253
+ is_built = false;
254
+ }
255
+
256
+ void IndexNSG::reconstruct(idx_t key, float* recons) const {
257
+ storage->reconstruct(key, recons);
258
+ }
259
+
260
+ void IndexNSG::check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const {
261
+ idx_t total_count = 0;
262
+
263
+ #pragma omp parallel for reduction(+ : total_count)
264
+ for (idx_t i = 0; i < n; i++) {
265
+ int count = 0;
266
+ for (int j = 0; j < K; j++) {
267
+ idx_t id = knn_graph[i * K + j];
268
+ if (id < 0 || id >= n || id == i) {
269
+ count += 1;
270
+ }
271
+ }
272
+ total_count += count;
273
+ }
274
+
275
+ if (total_count > 0) {
276
+ fprintf(stderr,
277
+ "WARNING: the input knn graph "
278
+ "has %" PRId64 " invalid entries\n",
279
+ total_count);
280
+ }
281
+ FAISS_THROW_IF_NOT_MSG(
282
+ total_count < n / 10,
283
+ "There are too much invalid entries in the knn graph. "
284
+ "It may be an invalid knn graph.");
285
+ }
286
+
287
+ /**************************************************************
288
+ * IndexNSGFlat implementation
289
+ **************************************************************/
290
+
291
+ IndexNSGFlat::IndexNSGFlat() {
292
+ is_trained = true;
293
+ }
294
+
295
+ IndexNSGFlat::IndexNSGFlat(int d, int R, MetricType metric)
296
+ : IndexNSG(new IndexFlat(d, metric), R) {
297
+ own_fields = true;
298
+ is_trained = true;
299
+ }
300
+
301
+ } // namespace faiss
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <vector>
13
+
14
+ #include <faiss/IndexFlat.h>
15
+ #include <faiss/IndexNNDescent.h>
16
+ #include <faiss/impl/NSG.h>
17
+ #include <faiss/utils/utils.h>
18
+
19
+ namespace faiss {
20
+
21
+ /** The NSG index is a normal random-access index with a NSG
22
+ * link structure built on top */
23
+
24
+ struct IndexNSG : Index {
25
+ /// the link strcuture
26
+ NSG nsg;
27
+
28
+ /// the sequential storage
29
+ bool own_fields;
30
+ Index* storage;
31
+
32
+ /// the index is built or not
33
+ bool is_built;
34
+
35
+ /// K of KNN graph for building
36
+ int GK;
37
+
38
+ /// indicate how to build a knn graph
39
+ /// - 0: build NSG with brute force search
40
+ /// - 1: build NSG with NNDescent
41
+ char build_type;
42
+
43
+ /// parameters for nndescent
44
+ int nndescent_S;
45
+ int nndescent_R;
46
+ int nndescent_L;
47
+ int nndescent_iter;
48
+
49
+ explicit IndexNSG(int d = 0, int R = 32, MetricType metric = METRIC_L2);
50
+ explicit IndexNSG(Index* storage, int R = 32);
51
+
52
+ ~IndexNSG() override;
53
+
54
+ void build(idx_t n, const float* x, idx_t* knn_graph, int GK);
55
+
56
+ void add(idx_t n, const float* x) override;
57
+
58
+ /// Trains the storage if needed
59
+ void train(idx_t n, const float* x) override;
60
+
61
+ /// entry point for search
62
+ void search(
63
+ idx_t n,
64
+ const float* x,
65
+ idx_t k,
66
+ float* distances,
67
+ idx_t* labels) const override;
68
+
69
+ void reconstruct(idx_t key, float* recons) const override;
70
+
71
+ void reset() override;
72
+
73
+ void check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const;
74
+ };
75
+
76
+ /** Flat index topped with with a NSG structure to access elements
77
+ * more efficiently.
78
+ */
79
+
80
+ struct IndexNSGFlat : IndexNSG {
81
+ IndexNSGFlat();
82
+ IndexNSGFlat(int d, int R, MetricType metric = METRIC_L2);
83
+ };
84
+
85
+ } // namespace faiss