faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -0,0 +1,229 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexNNDescent.h>
11
+
12
+ #include <omp.h>
13
+
14
+ #include <cinttypes>
15
+ #include <cstdio>
16
+ #include <cstdlib>
17
+
18
+ #include <queue>
19
+ #include <unordered_set>
20
+
21
+ #ifdef __SSE__
22
+ #endif
23
+
24
+ #include <faiss/IndexFlat.h>
25
+ #include <faiss/impl/AuxIndexStructures.h>
26
+ #include <faiss/impl/FaissAssert.h>
27
+ #include <faiss/utils/Heap.h>
28
+ #include <faiss/utils/distances.h>
29
+ #include <faiss/utils/random.h>
30
+
31
+ extern "C" {
32
+
33
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
34
+
35
+ int sgemm_(
36
+ const char* transa,
37
+ const char* transb,
38
+ FINTEGER* m,
39
+ FINTEGER* n,
40
+ FINTEGER* k,
41
+ const float* alpha,
42
+ const float* a,
43
+ FINTEGER* lda,
44
+ const float* b,
45
+ FINTEGER* ldb,
46
+ float* beta,
47
+ float* c,
48
+ FINTEGER* ldc);
49
+ }
50
+
51
+ namespace faiss {
52
+
53
+ using idx_t = Index::idx_t;
54
+ using storage_idx_t = NNDescent::storage_idx_t;
55
+
56
+ /**************************************************************
57
+ * add / search blocks of descriptors
58
+ **************************************************************/
59
+
60
+ namespace {
61
+
62
+ /* Wrap the distance computer into one that negates the
63
+ distances. This makes supporting INNER_PRODUCE search easier */
64
+
65
+ struct NegativeDistanceComputer : DistanceComputer {
66
+ /// owned by this
67
+ DistanceComputer* basedis;
68
+
69
+ explicit NegativeDistanceComputer(DistanceComputer* basedis)
70
+ : basedis(basedis) {}
71
+
72
+ void set_query(const float* x) override {
73
+ basedis->set_query(x);
74
+ }
75
+
76
+ /// compute distance of vector i to current query
77
+ float operator()(idx_t i) override {
78
+ return -(*basedis)(i);
79
+ }
80
+
81
+ /// compute distance between two stored vectors
82
+ float symmetric_dis(idx_t i, idx_t j) override {
83
+ return -basedis->symmetric_dis(i, j);
84
+ }
85
+
86
+ ~NegativeDistanceComputer() override {
87
+ delete basedis;
88
+ }
89
+ };
90
+
91
+ DistanceComputer* storage_distance_computer(const Index* storage) {
92
+ if (storage->metric_type == METRIC_INNER_PRODUCT) {
93
+ return new NegativeDistanceComputer(storage->get_distance_computer());
94
+ } else {
95
+ return storage->get_distance_computer();
96
+ }
97
+ }
98
+
99
+ } // namespace
100
+
101
+ /**************************************************************
102
+ * IndexNNDescent implementation
103
+ **************************************************************/
104
+
105
+ IndexNNDescent::IndexNNDescent(int d, int K, MetricType metric)
106
+ : Index(d, metric),
107
+ nndescent(d, K),
108
+ own_fields(false),
109
+ storage(nullptr) {}
110
+
111
+ IndexNNDescent::IndexNNDescent(Index* storage, int K)
112
+ : Index(storage->d, storage->metric_type),
113
+ nndescent(storage->d, K),
114
+ own_fields(false),
115
+ storage(storage) {}
116
+
117
+ IndexNNDescent::~IndexNNDescent() {
118
+ if (own_fields) {
119
+ delete storage;
120
+ }
121
+ }
122
+
123
+ void IndexNNDescent::train(idx_t n, const float* x) {
124
+ FAISS_THROW_IF_NOT_MSG(
125
+ storage,
126
+ "Please use IndexNNDescentFlat (or variants) "
127
+ "instead of IndexNNDescent directly");
128
+ // nndescent structure does not require training
129
+ storage->train(n, x);
130
+ is_trained = true;
131
+ }
132
+
133
+ void IndexNNDescent::search(
134
+ idx_t n,
135
+ const float* x,
136
+ idx_t k,
137
+ float* distances,
138
+ idx_t* labels) const
139
+
140
+ {
141
+ FAISS_THROW_IF_NOT_MSG(
142
+ storage,
143
+ "Please use IndexNNDescentFlat (or variants) "
144
+ "instead of IndexNNDescent directly");
145
+ if (verbose) {
146
+ printf("Parameters: k=%" PRId64 ", search_L=%d\n",
147
+ k,
148
+ nndescent.search_L);
149
+ }
150
+
151
+ idx_t check_period =
152
+ InterruptCallback::get_period_hint(d * nndescent.search_L);
153
+
154
+ for (idx_t i0 = 0; i0 < n; i0 += check_period) {
155
+ idx_t i1 = std::min(i0 + check_period, n);
156
+
157
+ #pragma omp parallel
158
+ {
159
+ VisitedTable vt(ntotal);
160
+
161
+ DistanceComputer* dis = storage_distance_computer(storage);
162
+ ScopeDeleter1<DistanceComputer> del(dis);
163
+
164
+ #pragma omp for
165
+ for (idx_t i = i0; i < i1; i++) {
166
+ idx_t* idxi = labels + i * k;
167
+ float* simi = distances + i * k;
168
+ dis->set_query(x + i * d);
169
+
170
+ nndescent.search(*dis, k, idxi, simi, vt);
171
+ }
172
+ }
173
+ InterruptCallback::check();
174
+ }
175
+
176
+ if (metric_type == METRIC_INNER_PRODUCT) {
177
+ // we need to revert the negated distances
178
+ for (size_t i = 0; i < k * n; i++) {
179
+ distances[i] = -distances[i];
180
+ }
181
+ }
182
+ }
183
+
184
+ void IndexNNDescent::add(idx_t n, const float* x) {
185
+ FAISS_THROW_IF_NOT_MSG(
186
+ storage,
187
+ "Please use IndexNNDescentFlat (or variants) "
188
+ "instead of IndexNNDescent directly");
189
+ FAISS_THROW_IF_NOT(is_trained);
190
+
191
+ if (ntotal != 0) {
192
+ fprintf(stderr,
193
+ "WARNING NNDescent doest not support dynamic insertions,"
194
+ "multiple insertions would lead to re-building the index");
195
+ }
196
+
197
+ storage->add(n, x);
198
+ ntotal = storage->ntotal;
199
+
200
+ DistanceComputer* dis = storage_distance_computer(storage);
201
+ ScopeDeleter1<DistanceComputer> del(dis);
202
+ nndescent.build(*dis, ntotal, verbose);
203
+ }
204
+
205
+ void IndexNNDescent::reset() {
206
+ nndescent.reset();
207
+ storage->reset();
208
+ ntotal = 0;
209
+ }
210
+
211
+ void IndexNNDescent::reconstruct(idx_t key, float* recons) const {
212
+ storage->reconstruct(key, recons);
213
+ }
214
+
215
+ /**************************************************************
216
+ * IndexNNDescentFlat implementation
217
+ **************************************************************/
218
+
219
+ IndexNNDescentFlat::IndexNNDescentFlat() {
220
+ is_trained = true;
221
+ }
222
+
223
+ IndexNNDescentFlat::IndexNNDescentFlat(int d, int M, MetricType metric)
224
+ : IndexNNDescent(new IndexFlat(d, metric), M) {
225
+ own_fields = true;
226
+ is_trained = true;
227
+ }
228
+
229
+ } // namespace faiss
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <vector>
13
+
14
+ #include <faiss/IndexFlat.h>
15
+ #include <faiss/impl/NNDescent.h>
16
+ #include <faiss/utils/utils.h>
17
+
18
+ namespace faiss {
19
+
20
+ /** The NNDescent index is a normal random-access index with an NNDescent
21
+ * link structure built on top */
22
+
23
+ struct IndexNNDescent : Index {
24
+ // internal storage of vectors (32 bits)
25
+ using storage_idx_t = NNDescent::storage_idx_t;
26
+
27
+ /// Faiss results are 64-bit
28
+ using idx_t = Index::idx_t;
29
+
30
+ // the link strcuture
31
+ NNDescent nndescent;
32
+
33
+ // the sequential storage
34
+ bool own_fields;
35
+ Index* storage;
36
+
37
+ explicit IndexNNDescent(
38
+ int d = 0,
39
+ int K = 32,
40
+ MetricType metric = METRIC_L2);
41
+ explicit IndexNNDescent(Index* storage, int K = 32);
42
+
43
+ ~IndexNNDescent() override;
44
+
45
+ void add(idx_t n, const float* x) override;
46
+
47
+ /// Trains the storage if needed
48
+ void train(idx_t n, const float* x) override;
49
+
50
+ /// entry point for search
51
+ void search(
52
+ idx_t n,
53
+ const float* x,
54
+ idx_t k,
55
+ float* distances,
56
+ idx_t* labels) const override;
57
+
58
+ void reconstruct(idx_t key, float* recons) const override;
59
+
60
+ void reset() override;
61
+ };
62
+
63
+ /** Flat index topped with with a NNDescent structure to access elements
64
+ * more efficiently.
65
+ */
66
+
67
+ struct IndexNNDescentFlat : IndexNNDescent {
68
+ IndexNNDescentFlat();
69
+ IndexNNDescentFlat(int d, int K, MetricType metric = METRIC_L2);
70
+ };
71
+
72
+ } // namespace faiss
@@ -0,0 +1,301 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexNSG.h>
11
+
12
+ #include <omp.h>
13
+
14
+ #include <cinttypes>
15
+ #include <memory>
16
+
17
+ #include <faiss/IndexFlat.h>
18
+ #include <faiss/IndexNNDescent.h>
19
+ #include <faiss/impl/AuxIndexStructures.h>
20
+ #include <faiss/impl/FaissAssert.h>
21
+ #include <faiss/utils/Heap.h>
22
+ #include <faiss/utils/distances.h>
23
+
24
+ namespace faiss {
25
+
26
+ using idx_t = Index::idx_t;
27
+ using namespace nsg;
28
+
29
+ /**************************************************************
30
+ * IndexNSG implementation
31
+ **************************************************************/
32
+
33
+ IndexNSG::IndexNSG(int d, int R, MetricType metric)
34
+ : Index(d, metric),
35
+ nsg(R),
36
+ own_fields(false),
37
+ storage(nullptr),
38
+ is_built(false),
39
+ GK(64),
40
+ build_type(0) {
41
+ nndescent_S = 10;
42
+ nndescent_R = 100;
43
+ nndescent_L = GK + 50;
44
+ nndescent_iter = 10;
45
+ }
46
+
47
+ IndexNSG::IndexNSG(Index* storage, int R)
48
+ : Index(storage->d, storage->metric_type),
49
+ nsg(R),
50
+ own_fields(false),
51
+ storage(storage),
52
+ is_built(false),
53
+ GK(64),
54
+ build_type(1) {
55
+ nndescent_S = 10;
56
+ nndescent_R = 100;
57
+ nndescent_L = GK + 50;
58
+ nndescent_iter = 10;
59
+ }
60
+
61
+ IndexNSG::~IndexNSG() {
62
+ if (own_fields) {
63
+ delete storage;
64
+ }
65
+ }
66
+
67
+ void IndexNSG::train(idx_t n, const float* x) {
68
+ FAISS_THROW_IF_NOT_MSG(
69
+ storage,
70
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
71
+ // nsg structure does not require training
72
+ storage->train(n, x);
73
+ is_trained = true;
74
+ }
75
+
76
+ void IndexNSG::search(
77
+ idx_t n,
78
+ const float* x,
79
+ idx_t k,
80
+ float* distances,
81
+ idx_t* labels) const
82
+
83
+ {
84
+ FAISS_THROW_IF_NOT_MSG(
85
+ storage,
86
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
87
+
88
+ int L = std::max(nsg.search_L, (int)k); // in case of search L = -1
89
+ idx_t check_period = InterruptCallback::get_period_hint(d * L);
90
+
91
+ for (idx_t i0 = 0; i0 < n; i0 += check_period) {
92
+ idx_t i1 = std::min(i0 + check_period, n);
93
+
94
+ #pragma omp parallel
95
+ {
96
+ VisitedTable vt(ntotal);
97
+
98
+ DistanceComputer* dis = storage_distance_computer(storage);
99
+ ScopeDeleter1<DistanceComputer> del(dis);
100
+
101
+ #pragma omp for
102
+ for (idx_t i = i0; i < i1; i++) {
103
+ idx_t* idxi = labels + i * k;
104
+ float* simi = distances + i * k;
105
+ dis->set_query(x + i * d);
106
+
107
+ nsg.search(*dis, k, idxi, simi, vt);
108
+
109
+ vt.advance();
110
+ }
111
+ }
112
+ InterruptCallback::check();
113
+ }
114
+
115
+ if (metric_type == METRIC_INNER_PRODUCT) {
116
+ // we need to revert the negated distances
117
+ for (size_t i = 0; i < k * n; i++) {
118
+ distances[i] = -distances[i];
119
+ }
120
+ }
121
+ }
122
+
123
+ void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
124
+ FAISS_THROW_IF_NOT_MSG(
125
+ storage,
126
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
127
+ FAISS_THROW_IF_NOT_MSG(
128
+ !is_built && ntotal == 0, "The IndexNSG is already built");
129
+
130
+ storage->add(n, x);
131
+ ntotal = storage->ntotal;
132
+
133
+ // check the knn graph
134
+ check_knn_graph(knn_graph, n, GK);
135
+
136
+ const nsg::Graph<idx_t> knng(knn_graph, n, GK);
137
+ nsg.build(storage, n, knng, verbose);
138
+ is_built = true;
139
+ }
140
+
141
+ void IndexNSG::add(idx_t n, const float* x) {
142
+ FAISS_THROW_IF_NOT_MSG(
143
+ storage,
144
+ "Please use IndexNSGFlat (or variants) "
145
+ "instead of IndexNSG directly");
146
+ FAISS_THROW_IF_NOT(is_trained);
147
+
148
+ FAISS_THROW_IF_NOT_MSG(
149
+ !is_built && ntotal == 0,
150
+ "NSG does not support incremental addition");
151
+
152
+ std::vector<idx_t> knng;
153
+ if (verbose) {
154
+ printf("IndexNSG::add %zd vectors\n", size_t(n));
155
+ }
156
+
157
+ if (build_type == 0) { // build with brute force search
158
+
159
+ if (verbose) {
160
+ printf(" Build knn graph with brute force search on storage index\n");
161
+ }
162
+
163
+ storage->add(n, x);
164
+ ntotal = storage->ntotal;
165
+ FAISS_THROW_IF_NOT(ntotal == n);
166
+
167
+ knng.resize(ntotal * (GK + 1));
168
+ storage->assign(ntotal, x, knng.data(), GK + 1);
169
+
170
+ // Remove itself
171
+ // - For metric distance, we just need to remove the first neighbor
172
+ // - But for non-metric, e.g. inner product, we need to check
173
+ // - each neighbor
174
+ if (storage->metric_type == METRIC_INNER_PRODUCT) {
175
+ for (idx_t i = 0; i < ntotal; i++) {
176
+ int count = 0;
177
+ for (int j = 0; j < GK + 1; j++) {
178
+ idx_t id = knng[i * (GK + 1) + j];
179
+ if (id != i) {
180
+ knng[i * GK + count] = id;
181
+ count += 1;
182
+ }
183
+ if (count == GK) {
184
+ break;
185
+ }
186
+ }
187
+ }
188
+ } else {
189
+ for (idx_t i = 0; i < ntotal; i++) {
190
+ memmove(knng.data() + i * GK,
191
+ knng.data() + i * (GK + 1) + 1,
192
+ GK * sizeof(idx_t));
193
+ }
194
+ }
195
+
196
+ } else if (build_type == 1) { // build with NNDescent
197
+ IndexNNDescent index(storage, GK);
198
+ index.nndescent.S = nndescent_S;
199
+ index.nndescent.R = nndescent_R;
200
+ index.nndescent.L = std::max(nndescent_L, GK + 50);
201
+ index.nndescent.iter = nndescent_iter;
202
+ index.verbose = verbose;
203
+
204
+ if (verbose) {
205
+ printf(" Build knn graph with NNdescent S=%d R=%d L=%d niter=%d\n",
206
+ index.nndescent.S,
207
+ index.nndescent.R,
208
+ index.nndescent.L,
209
+ index.nndescent.iter);
210
+ }
211
+
212
+ // prevent IndexNSG from deleting the storage
213
+ index.own_fields = false;
214
+
215
+ index.add(n, x);
216
+
217
+ // storage->add is already implicit called in IndexNSG.add
218
+ ntotal = storage->ntotal;
219
+ FAISS_THROW_IF_NOT(ntotal == n);
220
+
221
+ knng.resize(ntotal * GK);
222
+
223
+ // cast from idx_t to int
224
+ const int* knn_graph = index.nndescent.final_graph.data();
225
+ #pragma omp parallel for
226
+ for (idx_t i = 0; i < ntotal * GK; i++) {
227
+ knng[i] = knn_graph[i];
228
+ }
229
+ } else {
230
+ FAISS_THROW_MSG("build_type should be 0 or 1");
231
+ }
232
+
233
+ if (verbose) {
234
+ printf(" Check the knn graph\n");
235
+ }
236
+
237
+ // check the knn graph
238
+ check_knn_graph(knng.data(), n, GK);
239
+
240
+ if (verbose) {
241
+ printf(" nsg building\n");
242
+ }
243
+
244
+ const nsg::Graph<idx_t> knn_graph(knng.data(), n, GK);
245
+ nsg.build(storage, n, knn_graph, verbose);
246
+ is_built = true;
247
+ }
248
+
249
+ void IndexNSG::reset() {
250
+ nsg.reset();
251
+ storage->reset();
252
+ ntotal = 0;
253
+ is_built = false;
254
+ }
255
+
256
+ void IndexNSG::reconstruct(idx_t key, float* recons) const {
257
+ storage->reconstruct(key, recons);
258
+ }
259
+
260
+ void IndexNSG::check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const {
261
+ idx_t total_count = 0;
262
+
263
+ #pragma omp parallel for reduction(+ : total_count)
264
+ for (idx_t i = 0; i < n; i++) {
265
+ int count = 0;
266
+ for (int j = 0; j < K; j++) {
267
+ idx_t id = knn_graph[i * K + j];
268
+ if (id < 0 || id >= n || id == i) {
269
+ count += 1;
270
+ }
271
+ }
272
+ total_count += count;
273
+ }
274
+
275
+ if (total_count > 0) {
276
+ fprintf(stderr,
277
+ "WARNING: the input knn graph "
278
+ "has %" PRId64 " invalid entries\n",
279
+ total_count);
280
+ }
281
+ FAISS_THROW_IF_NOT_MSG(
282
+ total_count < n / 10,
283
+ "There are too much invalid entries in the knn graph. "
284
+ "It may be an invalid knn graph.");
285
+ }
286
+
287
+ /**************************************************************
288
+ * IndexNSGFlat implementation
289
+ **************************************************************/
290
+
291
+ IndexNSGFlat::IndexNSGFlat() {
292
+ is_trained = true;
293
+ }
294
+
295
+ IndexNSGFlat::IndexNSGFlat(int d, int R, MetricType metric)
296
+ : IndexNSG(new IndexFlat(d, metric), R) {
297
+ own_fields = true;
298
+ is_trained = true;
299
+ }
300
+
301
+ } // namespace faiss
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <vector>
13
+
14
+ #include <faiss/IndexFlat.h>
15
+ #include <faiss/IndexNNDescent.h>
16
+ #include <faiss/impl/NSG.h>
17
+ #include <faiss/utils/utils.h>
18
+
19
+ namespace faiss {
20
+
21
+ /** The NSG index is a normal random-access index with a NSG
22
+ * link structure built on top */
23
+
24
+ struct IndexNSG : Index {
25
+ /// the link strcuture
26
+ NSG nsg;
27
+
28
+ /// the sequential storage
29
+ bool own_fields;
30
+ Index* storage;
31
+
32
+ /// the index is built or not
33
+ bool is_built;
34
+
35
+ /// K of KNN graph for building
36
+ int GK;
37
+
38
+ /// indicate how to build a knn graph
39
+ /// - 0: build NSG with brute force search
40
+ /// - 1: build NSG with NNDescent
41
+ char build_type;
42
+
43
+ /// parameters for nndescent
44
+ int nndescent_S;
45
+ int nndescent_R;
46
+ int nndescent_L;
47
+ int nndescent_iter;
48
+
49
+ explicit IndexNSG(int d = 0, int R = 32, MetricType metric = METRIC_L2);
50
+ explicit IndexNSG(Index* storage, int R = 32);
51
+
52
+ ~IndexNSG() override;
53
+
54
+ void build(idx_t n, const float* x, idx_t* knn_graph, int GK);
55
+
56
+ void add(idx_t n, const float* x) override;
57
+
58
+ /// Trains the storage if needed
59
+ void train(idx_t n, const float* x) override;
60
+
61
+ /// entry point for search
62
+ void search(
63
+ idx_t n,
64
+ const float* x,
65
+ idx_t k,
66
+ float* distances,
67
+ idx_t* labels) const override;
68
+
69
+ void reconstruct(idx_t key, float* recons) const override;
70
+
71
+ void reset() override;
72
+
73
+ void check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const;
74
+ };
75
+
76
+ /** Flat index topped with with a NSG structure to access elements
77
+ * more efficiently.
78
+ */
79
+
80
+ struct IndexNSGFlat : IndexNSG {
81
+ IndexNSGFlat();
82
+ IndexNSGFlat(int d, int R, MetricType metric = METRIC_L2);
83
+ };
84
+
85
+ } // namespace faiss