faiss 0.1.5 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +12 -0
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +6 -2
  6. data/ext/faiss/index.cpp +114 -43
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss.rb +0 -5
  15. data/lib/faiss/version.rb +1 -1
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +24 -10
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -10,7 +10,6 @@
10
10
  #ifndef FAISS_INDEX_LATTICE_H
11
11
  #define FAISS_INDEX_LATTICE_H
12
12
 
13
-
14
13
  #include <vector>
15
14
 
16
15
  #include <faiss/IndexIVF.h>
@@ -18,14 +17,9 @@
18
17
 
19
18
  namespace faiss {
20
19
 
21
-
22
-
23
-
24
-
25
20
  /** Index that encodes a vector with a series of Zn lattice quantizers
26
21
  */
27
- struct IndexLattice: Index {
28
-
22
+ struct IndexLattice : Index {
29
23
  /// number of sub-vectors
30
24
  int nsq;
31
25
  /// dimension of sub-vectors
@@ -42,25 +36,26 @@ struct IndexLattice: Index {
42
36
  /// mins and maxes of the vector norms, per subquantizer
43
37
  std::vector<float> trained;
44
38
 
45
- IndexLattice (idx_t d, int nsq, int scale_nbit, int r2);
39
+ IndexLattice(idx_t d, int nsq, int scale_nbit, int r2);
46
40
 
47
41
  void train(idx_t n, const float* x) override;
48
42
 
49
43
  /* The standalone codec interface */
50
- size_t sa_code_size () const override;
44
+ size_t sa_code_size() const override;
51
45
 
52
- void sa_encode (idx_t n, const float *x,
53
- uint8_t *bytes) const override;
46
+ void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
54
47
 
55
- void sa_decode (idx_t n, const uint8_t *bytes,
56
- float *x) const override;
48
+ void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
57
49
 
58
50
  /// not implemented
59
51
  void add(idx_t n, const float* x) override;
60
- void search(idx_t n, const float* x, idx_t k,
61
- float* distances, idx_t* labels) const override;
52
+ void search(
53
+ idx_t n,
54
+ const float* x,
55
+ idx_t k,
56
+ float* distances,
57
+ idx_t* labels) const override;
62
58
  void reset() override;
63
-
64
59
  };
65
60
 
66
61
  } // namespace faiss
@@ -0,0 +1,231 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexNNDescent.h>
11
+
12
+ #include <omp.h>
13
+
14
+ #include <cinttypes>
15
+ #include <cstdio>
16
+ #include <cstdlib>
17
+
18
+ #include <queue>
19
+ #include <unordered_set>
20
+
21
+ #ifdef __SSE__
22
+ #endif
23
+
24
+ #include <faiss/IndexFlat.h>
25
+ #include <faiss/impl/AuxIndexStructures.h>
26
+ #include <faiss/impl/FaissAssert.h>
27
+ #include <faiss/utils/Heap.h>
28
+ #include <faiss/utils/distances.h>
29
+ #include <faiss/utils/random.h>
30
+
31
+ extern "C" {
32
+
33
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
34
+
35
+ int sgemm_(
36
+ const char* transa,
37
+ const char* transb,
38
+ FINTEGER* m,
39
+ FINTEGER* n,
40
+ FINTEGER* k,
41
+ const float* alpha,
42
+ const float* a,
43
+ FINTEGER* lda,
44
+ const float* b,
45
+ FINTEGER* ldb,
46
+ float* beta,
47
+ float* c,
48
+ FINTEGER* ldc);
49
+ }
50
+
51
+ namespace faiss {
52
+
53
+ using idx_t = Index::idx_t;
54
+ using storage_idx_t = NNDescent::storage_idx_t;
55
+
56
+ /**************************************************************
57
+ * add / search blocks of descriptors
58
+ **************************************************************/
59
+
60
+ namespace {
61
+
62
+ /* Wrap the distance computer into one that negates the
63
+ distances. This makes supporting INNER_PRODUCE search easier */
64
+
65
+ struct NegativeDistanceComputer : DistanceComputer {
66
+ /// owned by this
67
+ DistanceComputer* basedis;
68
+
69
+ explicit NegativeDistanceComputer(DistanceComputer* basedis)
70
+ : basedis(basedis) {}
71
+
72
+ void set_query(const float* x) override {
73
+ basedis->set_query(x);
74
+ }
75
+
76
+ /// compute distance of vector i to current query
77
+ float operator()(idx_t i) override {
78
+ return -(*basedis)(i);
79
+ }
80
+
81
+ /// compute distance between two stored vectors
82
+ float symmetric_dis(idx_t i, idx_t j) override {
83
+ return -basedis->symmetric_dis(i, j);
84
+ }
85
+
86
+ ~NegativeDistanceComputer() override {
87
+ delete basedis;
88
+ }
89
+ };
90
+
91
+ DistanceComputer* storage_distance_computer(const Index* storage) {
92
+ if (storage->metric_type == METRIC_INNER_PRODUCT) {
93
+ return new NegativeDistanceComputer(storage->get_distance_computer());
94
+ } else {
95
+ return storage->get_distance_computer();
96
+ }
97
+ }
98
+
99
+ } // namespace
100
+
101
+ /**************************************************************
102
+ * IndexNNDescent implementation
103
+ **************************************************************/
104
+
105
+ IndexNNDescent::IndexNNDescent(int d, int K, MetricType metric)
106
+ : Index(d, metric),
107
+ nndescent(d, K),
108
+ own_fields(false),
109
+ storage(nullptr) {}
110
+
111
+ IndexNNDescent::IndexNNDescent(Index* storage, int K)
112
+ : Index(storage->d, storage->metric_type),
113
+ nndescent(storage->d, K),
114
+ own_fields(false),
115
+ storage(storage) {}
116
+
117
+ IndexNNDescent::~IndexNNDescent() {
118
+ if (own_fields) {
119
+ delete storage;
120
+ }
121
+ }
122
+
123
+ void IndexNNDescent::train(idx_t n, const float* x) {
124
+ FAISS_THROW_IF_NOT_MSG(
125
+ storage,
126
+ "Please use IndexNNDescentFlat (or variants) "
127
+ "instead of IndexNNDescent directly");
128
+ // nndescent structure does not require training
129
+ storage->train(n, x);
130
+ is_trained = true;
131
+ }
132
+
133
+ void IndexNNDescent::search(
134
+ idx_t n,
135
+ const float* x,
136
+ idx_t k,
137
+ float* distances,
138
+ idx_t* labels) const
139
+
140
+ {
141
+ FAISS_THROW_IF_NOT_MSG(
142
+ storage,
143
+ "Please use IndexNNDescentFlat (or variants) "
144
+ "instead of IndexNNDescent directly");
145
+ if (verbose) {
146
+ printf("Parameters: k=%" PRId64 ", search_L=%d\n",
147
+ k,
148
+ nndescent.search_L);
149
+ }
150
+
151
+ idx_t check_period =
152
+ InterruptCallback::get_period_hint(d * nndescent.search_L);
153
+
154
+ for (idx_t i0 = 0; i0 < n; i0 += check_period) {
155
+ idx_t i1 = std::min(i0 + check_period, n);
156
+
157
+ #pragma omp parallel
158
+ {
159
+ VisitedTable vt(ntotal);
160
+
161
+ DistanceComputer* dis = storage_distance_computer(storage);
162
+ ScopeDeleter1<DistanceComputer> del(dis);
163
+
164
+ #pragma omp for
165
+ for (idx_t i = i0; i < i1; i++) {
166
+ idx_t* idxi = labels + i * k;
167
+ float* simi = distances + i * k;
168
+ dis->set_query(x + i * d);
169
+
170
+ maxheap_heapify(k, simi, idxi);
171
+ nndescent.search(*dis, k, idxi, simi, vt);
172
+ maxheap_reorder(k, simi, idxi);
173
+ }
174
+ }
175
+ InterruptCallback::check();
176
+ }
177
+
178
+ if (metric_type == METRIC_INNER_PRODUCT) {
179
+ // we need to revert the negated distances
180
+ for (size_t i = 0; i < k * n; i++) {
181
+ distances[i] = -distances[i];
182
+ }
183
+ }
184
+ }
185
+
186
+ void IndexNNDescent::add(idx_t n, const float* x) {
187
+ FAISS_THROW_IF_NOT_MSG(
188
+ storage,
189
+ "Please use IndexNNDescentFlat (or variants) "
190
+ "instead of IndexNNDescent directly");
191
+ FAISS_THROW_IF_NOT(is_trained);
192
+
193
+ if (ntotal != 0) {
194
+ fprintf(stderr,
195
+ "WARNING NNDescent doest not support dynamic insertions,"
196
+ "multiple insertions would lead to re-building the index");
197
+ }
198
+
199
+ storage->add(n, x);
200
+ ntotal = storage->ntotal;
201
+
202
+ DistanceComputer* dis = storage_distance_computer(storage);
203
+ ScopeDeleter1<DistanceComputer> del(dis);
204
+ nndescent.build(*dis, ntotal, verbose);
205
+ }
206
+
207
+ void IndexNNDescent::reset() {
208
+ nndescent.reset();
209
+ storage->reset();
210
+ ntotal = 0;
211
+ }
212
+
213
+ void IndexNNDescent::reconstruct(idx_t key, float* recons) const {
214
+ storage->reconstruct(key, recons);
215
+ }
216
+
217
+ /**************************************************************
218
+ * IndexNNDescentFlat implementation
219
+ **************************************************************/
220
+
221
+ IndexNNDescentFlat::IndexNNDescentFlat() {
222
+ is_trained = true;
223
+ }
224
+
225
+ IndexNNDescentFlat::IndexNNDescentFlat(int d, int M, MetricType metric)
226
+ : IndexNNDescent(new IndexFlat(d, metric), M) {
227
+ own_fields = true;
228
+ is_trained = true;
229
+ }
230
+
231
+ } // namespace faiss
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <vector>
13
+
14
+ #include <faiss/IndexFlat.h>
15
+ #include <faiss/impl/NNDescent.h>
16
+ #include <faiss/utils/utils.h>
17
+
18
+ namespace faiss {
19
+
20
+ /** The NNDescent index is a normal random-access index with an NNDescent
21
+ * link structure built on top */
22
+
23
+ struct IndexNNDescent : Index {
24
+ // internal storage of vectors (32 bits)
25
+ using storage_idx_t = NNDescent::storage_idx_t;
26
+
27
+ /// Faiss results are 64-bit
28
+ using idx_t = Index::idx_t;
29
+
30
+ // the link strcuture
31
+ NNDescent nndescent;
32
+
33
+ // the sequential storage
34
+ bool own_fields;
35
+ Index* storage;
36
+
37
+ explicit IndexNNDescent(
38
+ int d = 0,
39
+ int K = 32,
40
+ MetricType metric = METRIC_L2);
41
+ explicit IndexNNDescent(Index* storage, int K = 32);
42
+
43
+ ~IndexNNDescent() override;
44
+
45
+ void add(idx_t n, const float* x) override;
46
+
47
+ /// Trains the storage if needed
48
+ void train(idx_t n, const float* x) override;
49
+
50
+ /// entry point for search
51
+ void search(
52
+ idx_t n,
53
+ const float* x,
54
+ idx_t k,
55
+ float* distances,
56
+ idx_t* labels) const override;
57
+
58
+ void reconstruct(idx_t key, float* recons) const override;
59
+
60
+ void reset() override;
61
+ };
62
+
63
+ /** Flat index topped with with a NNDescent structure to access elements
64
+ * more efficiently.
65
+ */
66
+
67
+ struct IndexNNDescentFlat : IndexNNDescent {
68
+ IndexNNDescentFlat();
69
+ IndexNNDescentFlat(int d, int K, MetricType metric = METRIC_L2);
70
+ };
71
+
72
+ } // namespace faiss
@@ -0,0 +1,303 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexNSG.h>
11
+
12
+ #include <omp.h>
13
+
14
+ #include <cinttypes>
15
+ #include <memory>
16
+
17
+ #include <faiss/IndexFlat.h>
18
+ #include <faiss/IndexNNDescent.h>
19
+ #include <faiss/impl/AuxIndexStructures.h>
20
+ #include <faiss/impl/FaissAssert.h>
21
+ #include <faiss/utils/Heap.h>
22
+ #include <faiss/utils/distances.h>
23
+
24
+ namespace faiss {
25
+
26
+ using idx_t = Index::idx_t;
27
+ using namespace nsg;
28
+
29
+ /**************************************************************
30
+ * IndexNSG implementation
31
+ **************************************************************/
32
+
33
+ IndexNSG::IndexNSG(int d, int R, MetricType metric)
34
+ : Index(d, metric),
35
+ nsg(R),
36
+ own_fields(false),
37
+ storage(nullptr),
38
+ is_built(false),
39
+ GK(64),
40
+ build_type(0) {
41
+ nndescent_S = 10;
42
+ nndescent_R = 100;
43
+ nndescent_L = GK + 50;
44
+ nndescent_iter = 10;
45
+ }
46
+
47
+ IndexNSG::IndexNSG(Index* storage, int R)
48
+ : Index(storage->d, storage->metric_type),
49
+ nsg(R),
50
+ own_fields(false),
51
+ storage(storage),
52
+ is_built(false),
53
+ GK(64),
54
+ build_type(1) {
55
+ nndescent_S = 10;
56
+ nndescent_R = 100;
57
+ nndescent_L = GK + 50;
58
+ nndescent_iter = 10;
59
+ }
60
+
61
+ IndexNSG::~IndexNSG() {
62
+ if (own_fields) {
63
+ delete storage;
64
+ }
65
+ }
66
+
67
+ void IndexNSG::train(idx_t n, const float* x) {
68
+ FAISS_THROW_IF_NOT_MSG(
69
+ storage,
70
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
71
+ // nsg structure does not require training
72
+ storage->train(n, x);
73
+ is_trained = true;
74
+ }
75
+
76
+ void IndexNSG::search(
77
+ idx_t n,
78
+ const float* x,
79
+ idx_t k,
80
+ float* distances,
81
+ idx_t* labels) const
82
+
83
+ {
84
+ FAISS_THROW_IF_NOT_MSG(
85
+ storage,
86
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
87
+
88
+ int L = std::max(nsg.search_L, (int)k); // in case of search L = -1
89
+ idx_t check_period = InterruptCallback::get_period_hint(d * L);
90
+
91
+ for (idx_t i0 = 0; i0 < n; i0 += check_period) {
92
+ idx_t i1 = std::min(i0 + check_period, n);
93
+
94
+ #pragma omp parallel
95
+ {
96
+ VisitedTable vt(ntotal);
97
+
98
+ DistanceComputer* dis = storage_distance_computer(storage);
99
+ ScopeDeleter1<DistanceComputer> del(dis);
100
+
101
+ #pragma omp for
102
+ for (idx_t i = i0; i < i1; i++) {
103
+ idx_t* idxi = labels + i * k;
104
+ float* simi = distances + i * k;
105
+ dis->set_query(x + i * d);
106
+
107
+ maxheap_heapify(k, simi, idxi);
108
+ nsg.search(*dis, k, idxi, simi, vt);
109
+ maxheap_reorder(k, simi, idxi);
110
+
111
+ vt.advance();
112
+ }
113
+ }
114
+ InterruptCallback::check();
115
+ }
116
+
117
+ if (metric_type == METRIC_INNER_PRODUCT) {
118
+ // we need to revert the negated distances
119
+ for (size_t i = 0; i < k * n; i++) {
120
+ distances[i] = -distances[i];
121
+ }
122
+ }
123
+ }
124
+
125
+ void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
126
+ FAISS_THROW_IF_NOT_MSG(
127
+ storage,
128
+ "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
129
+ FAISS_THROW_IF_NOT_MSG(
130
+ !is_built && ntotal == 0, "The IndexNSG is already built");
131
+
132
+ storage->add(n, x);
133
+ ntotal = storage->ntotal;
134
+
135
+ // check the knn graph
136
+ check_knn_graph(knn_graph, n, GK);
137
+
138
+ const nsg::Graph<idx_t> knng(knn_graph, n, GK);
139
+ nsg.build(storage, n, knng, verbose);
140
+ is_built = true;
141
+ }
142
+
143
+ void IndexNSG::add(idx_t n, const float* x) {
144
+ FAISS_THROW_IF_NOT_MSG(
145
+ storage,
146
+ "Please use IndexNSGFlat (or variants) "
147
+ "instead of IndexNSG directly");
148
+ FAISS_THROW_IF_NOT(is_trained);
149
+
150
+ FAISS_THROW_IF_NOT_MSG(
151
+ !is_built && ntotal == 0,
152
+ "NSG does not support incremental addition");
153
+
154
+ std::vector<idx_t> knng;
155
+ if (verbose) {
156
+ printf("IndexNSG::add %zd vectors\n", size_t(n));
157
+ }
158
+
159
+ if (build_type == 0) { // build with brute force search
160
+
161
+ if (verbose) {
162
+ printf(" Build knn graph with brute force search on storage index\n");
163
+ }
164
+
165
+ storage->add(n, x);
166
+ ntotal = storage->ntotal;
167
+ FAISS_THROW_IF_NOT(ntotal == n);
168
+
169
+ knng.resize(ntotal * (GK + 1));
170
+ storage->assign(ntotal, x, knng.data(), GK + 1);
171
+
172
+ // Remove itself
173
+ // - For metric distance, we just need to remove the first neighbor
174
+ // - But for non-metric, e.g. inner product, we need to check
175
+ // - each neighbor
176
+ if (storage->metric_type == METRIC_INNER_PRODUCT) {
177
+ for (idx_t i = 0; i < ntotal; i++) {
178
+ int count = 0;
179
+ for (int j = 0; j < GK + 1; j++) {
180
+ idx_t id = knng[i * (GK + 1) + j];
181
+ if (id != i) {
182
+ knng[i * GK + count] = id;
183
+ count += 1;
184
+ }
185
+ if (count == GK) {
186
+ break;
187
+ }
188
+ }
189
+ }
190
+ } else {
191
+ for (idx_t i = 0; i < ntotal; i++) {
192
+ memmove(knng.data() + i * GK,
193
+ knng.data() + i * (GK + 1) + 1,
194
+ GK * sizeof(idx_t));
195
+ }
196
+ }
197
+
198
+ } else if (build_type == 1) { // build with NNDescent
199
+ IndexNNDescent index(storage, GK);
200
+ index.nndescent.S = nndescent_S;
201
+ index.nndescent.R = nndescent_R;
202
+ index.nndescent.L = std::max(nndescent_L, GK + 50);
203
+ index.nndescent.iter = nndescent_iter;
204
+ index.verbose = verbose;
205
+
206
+ if (verbose) {
207
+ printf(" Build knn graph with NNdescent S=%d R=%d L=%d niter=%d\n",
208
+ index.nndescent.S,
209
+ index.nndescent.R,
210
+ index.nndescent.L,
211
+ index.nndescent.iter);
212
+ }
213
+
214
+ // prevent IndexNSG from deleting the storage
215
+ index.own_fields = false;
216
+
217
+ index.add(n, x);
218
+
219
+ // storage->add is already implicit called in IndexNSG.add
220
+ ntotal = storage->ntotal;
221
+ FAISS_THROW_IF_NOT(ntotal == n);
222
+
223
+ knng.resize(ntotal * GK);
224
+
225
+ // cast from idx_t to int
226
+ const int* knn_graph = index.nndescent.final_graph.data();
227
+ #pragma omp parallel for
228
+ for (idx_t i = 0; i < ntotal * GK; i++) {
229
+ knng[i] = knn_graph[i];
230
+ }
231
+ } else {
232
+ FAISS_THROW_MSG("build_type should be 0 or 1");
233
+ }
234
+
235
+ if (verbose) {
236
+ printf(" Check the knn graph\n");
237
+ }
238
+
239
+ // check the knn graph
240
+ check_knn_graph(knng.data(), n, GK);
241
+
242
+ if (verbose) {
243
+ printf(" nsg building\n");
244
+ }
245
+
246
+ const nsg::Graph<idx_t> knn_graph(knng.data(), n, GK);
247
+ nsg.build(storage, n, knn_graph, verbose);
248
+ is_built = true;
249
+ }
250
+
251
+ void IndexNSG::reset() {
252
+ nsg.reset();
253
+ storage->reset();
254
+ ntotal = 0;
255
+ is_built = false;
256
+ }
257
+
258
+ void IndexNSG::reconstruct(idx_t key, float* recons) const {
259
+ storage->reconstruct(key, recons);
260
+ }
261
+
262
+ void IndexNSG::check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const {
263
+ idx_t total_count = 0;
264
+
265
+ #pragma omp parallel for reduction(+ : total_count)
266
+ for (idx_t i = 0; i < n; i++) {
267
+ int count = 0;
268
+ for (int j = 0; j < K; j++) {
269
+ idx_t id = knn_graph[i * K + j];
270
+ if (id < 0 || id >= n || id == i) {
271
+ count += 1;
272
+ }
273
+ }
274
+ total_count += count;
275
+ }
276
+
277
+ if (total_count > 0) {
278
+ fprintf(stderr,
279
+ "WARNING: the input knn graph "
280
+ "has %" PRId64 " invalid entries\n",
281
+ total_count);
282
+ }
283
+ FAISS_THROW_IF_NOT_MSG(
284
+ total_count < n / 10,
285
+ "There are too much invalid entries in the knn graph. "
286
+ "It may be an invalid knn graph.");
287
+ }
288
+
289
+ /**************************************************************
290
+ * IndexNSGFlat implementation
291
+ **************************************************************/
292
+
293
+ IndexNSGFlat::IndexNSGFlat() {
294
+ is_trained = true;
295
+ }
296
+
297
+ IndexNSGFlat::IndexNSGFlat(int d, int R, MetricType metric)
298
+ : IndexNSG(new IndexFlat(d, metric), R) {
299
+ own_fields = true;
300
+ is_trained = true;
301
+ }
302
+
303
+ } // namespace faiss