faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -11,20 +11,20 @@
11
11
 
12
12
  #include <vector>
13
13
 
14
- #include <faiss/IndexPQ.h>
15
14
  #include <faiss/IndexIVF.h>
15
+ #include <faiss/IndexPQ.h>
16
16
 
17
17
  namespace faiss {
18
18
 
19
19
  struct IndexIVFPQ;
20
20
 
21
-
22
- /** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
21
+ /** Same as an IndexIVFPQ without the inverted lists: codes are stored
22
+ * sequentially
23
23
  *
24
24
  * The class is mainly inteded to store encoded vectors that can be
25
25
  * accessed randomly, the search function is not implemented.
26
26
  */
27
- struct Index2Layer: Index {
27
+ struct Index2Layer : Index {
28
28
  /// first level quantizer
29
29
  Level1Quantizer q1;
30
30
 
@@ -43,12 +43,15 @@ struct Index2Layer: Index {
43
43
  /// code_size_1 + code_size_2
44
44
  size_t code_size;
45
45
 
46
- Index2Layer (Index * quantizer, size_t nlist,
47
- int M, int nbit = 8,
48
- MetricType metric = METRIC_L2);
46
+ Index2Layer(
47
+ Index* quantizer,
48
+ size_t nlist,
49
+ int M,
50
+ int nbit = 8,
51
+ MetricType metric = METRIC_L2);
49
52
 
50
- Index2Layer ();
51
- ~Index2Layer ();
53
+ Index2Layer();
54
+ ~Index2Layer();
52
55
 
53
56
  void train(idx_t n, const float* x) override;
54
57
 
@@ -56,11 +59,11 @@ struct Index2Layer: Index {
56
59
 
57
60
  /// not implemented
58
61
  void search(
59
- idx_t n,
60
- const float* x,
61
- idx_t k,
62
- float* distances,
63
- idx_t* labels) const override;
62
+ idx_t n,
63
+ const float* x,
64
+ idx_t k,
65
+ float* distances,
66
+ idx_t* labels) const override;
64
67
 
65
68
  void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
66
69
 
@@ -68,18 +71,15 @@ struct Index2Layer: Index {
68
71
 
69
72
  void reset() override;
70
73
 
71
- DistanceComputer * get_distance_computer() const override;
74
+ DistanceComputer* get_distance_computer() const override;
72
75
 
73
76
  /// transfer the flat codes to an IVFPQ index
74
- void transfer_to_IVFPQ(IndexIVFPQ & other) const;
75
-
77
+ void transfer_to_IVFPQ(IndexIVFPQ& other) const;
76
78
 
77
79
  /* The standalone codec interface */
78
- size_t sa_code_size () const override;
79
- void sa_encode (idx_t n, const float *x, uint8_t *bytes) const override;
80
- void sa_decode (idx_t n, const uint8_t *bytes, float *x) const override;
81
-
80
+ size_t sa_code_size() const override;
81
+ void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
82
+ void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
82
83
  };
83
84
 
84
-
85
85
  } // namespace faiss
@@ -17,61 +17,69 @@ namespace faiss {
17
17
 
18
18
  IndexBinary::~IndexBinary() {}
19
19
 
20
- void IndexBinary::train(idx_t, const uint8_t *) {
21
- // Does nothing by default.
20
+ void IndexBinary::train(idx_t, const uint8_t*) {
21
+ // Does nothing by default.
22
22
  }
23
23
 
24
- void IndexBinary::range_search(idx_t, const uint8_t *, int,
25
- RangeSearchResult *) const {
26
- FAISS_THROW_MSG("range search not implemented");
24
+ void IndexBinary::range_search(idx_t, const uint8_t*, int, RangeSearchResult*)
25
+ const {
26
+ FAISS_THROW_MSG("range search not implemented");
27
27
  }
28
28
 
29
- void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) const {
30
- std::vector<int> distances(n * k);
31
- search(n, x, k, distances.data(), labels);
29
+ void IndexBinary::assign(idx_t n, const uint8_t* x, idx_t* labels, idx_t k)
30
+ const {
31
+ std::vector<int> distances(n * k);
32
+ search(n, x, k, distances.data(), labels);
32
33
  }
33
34
 
34
- void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
35
- FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
35
+ void IndexBinary::add_with_ids(idx_t, const uint8_t*, const idx_t*) {
36
+ FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
36
37
  }
37
38
 
38
39
  size_t IndexBinary::remove_ids(const IDSelector&) {
39
- FAISS_THROW_MSG("remove_ids not implemented for this type of index");
40
- return 0;
40
+ FAISS_THROW_MSG("remove_ids not implemented for this type of index");
41
+ return 0;
41
42
  }
42
43
 
43
- void IndexBinary::reconstruct(idx_t, uint8_t *) const {
44
- FAISS_THROW_MSG("reconstruct not implemented for this type of index");
44
+ void IndexBinary::reconstruct(idx_t, uint8_t*) const {
45
+ FAISS_THROW_MSG("reconstruct not implemented for this type of index");
45
46
  }
46
47
 
47
- void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
48
- for (idx_t i = 0; i < ni; i++) {
49
- reconstruct(i0 + i, recons + i * d);
50
- }
48
+ void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const {
49
+ for (idx_t i = 0; i < ni; i++) {
50
+ reconstruct(i0 + i, recons + i * d);
51
+ }
51
52
  }
52
53
 
53
- void IndexBinary::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
54
- int32_t *distances, idx_t *labels,
55
- uint8_t *recons) const {
56
- search(n, x, k, distances, labels);
57
- for (idx_t i = 0; i < n; ++i) {
58
- for (idx_t j = 0; j < k; ++j) {
59
- idx_t ij = i * k + j;
60
- idx_t key = labels[ij];
61
- uint8_t *reconstructed = recons + ij * d;
62
- if (key < 0) {
63
- // Fill with NaNs
64
- memset(reconstructed, -1, sizeof(*reconstructed) * d);
65
- } else {
66
- reconstruct(key, reconstructed);
67
- }
54
+ void IndexBinary::search_and_reconstruct(
55
+ idx_t n,
56
+ const uint8_t* x,
57
+ idx_t k,
58
+ int32_t* distances,
59
+ idx_t* labels,
60
+ uint8_t* recons) const {
61
+ FAISS_THROW_IF_NOT(k > 0);
62
+
63
+ search(n, x, k, distances, labels);
64
+ for (idx_t i = 0; i < n; ++i) {
65
+ for (idx_t j = 0; j < k; ++j) {
66
+ idx_t ij = i * k + j;
67
+ idx_t key = labels[ij];
68
+ uint8_t* reconstructed = recons + ij * d;
69
+ if (key < 0) {
70
+ // Fill with NaNs
71
+ memset(reconstructed, -1, sizeof(*reconstructed) * d);
72
+ } else {
73
+ reconstruct(key, reconstructed);
74
+ }
75
+ }
68
76
  }
69
- }
70
77
  }
71
78
 
72
79
  void IndexBinary::display() const {
73
- printf("Index: %s -> %" PRId64 " elements\n", typeid (*this).name(), ntotal);
80
+ printf("Index: %s -> %" PRId64 " elements\n",
81
+ typeid(*this).name(),
82
+ ntotal);
74
83
  }
75
84
 
76
-
77
- } // namespace faiss
85
+ } // namespace faiss
@@ -11,17 +11,15 @@
11
11
  #define FAISS_INDEX_BINARY_H
12
12
 
13
13
  #include <cstdio>
14
- #include <typeinfo>
15
- #include <string>
16
14
  #include <sstream>
15
+ #include <string>
16
+ #include <typeinfo>
17
17
 
18
- #include <faiss/impl/FaissAssert.h>
19
18
  #include <faiss/Index.h>
20
-
19
+ #include <faiss/impl/FaissAssert.h>
21
20
 
22
21
  namespace faiss {
23
22
 
24
-
25
23
  /// Forward declarations see AuxIndexStructures.h
26
24
  struct IDSelector;
27
25
  struct RangeSearchResult;
@@ -34,134 +32,144 @@ struct RangeSearchResult;
34
32
  * vectors.
35
33
  */
36
34
  struct IndexBinary {
37
- using idx_t = Index::idx_t; ///< all indices are this type
38
- using component_t = uint8_t;
39
- using distance_t = int32_t;
40
-
41
- int d; ///< vector dimension
42
- int code_size; ///< number of bytes per vector ( = d / 8 )
43
- idx_t ntotal; ///< total nb of indexed vectors
44
- bool verbose; ///< verbosity level
45
-
46
- /// set if the Index does not require training, or if training is done already
47
- bool is_trained;
48
-
49
- /// type of metric this index uses for search
50
- MetricType metric_type;
51
-
52
- explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
53
- : d(d),
54
- code_size(d / 8),
55
- ntotal(0),
56
- verbose(false),
57
- is_trained(true),
58
- metric_type(metric) {
35
+ using idx_t = Index::idx_t; ///< all indices are this type
36
+ using component_t = uint8_t;
37
+ using distance_t = int32_t;
38
+
39
+ int d; ///< vector dimension
40
+ int code_size; ///< number of bytes per vector ( = d / 8 )
41
+ idx_t ntotal; ///< total nb of indexed vectors
42
+ bool verbose; ///< verbosity level
43
+
44
+ /// set if the Index does not require training, or if training is done
45
+ /// already
46
+ bool is_trained;
47
+
48
+ /// type of metric this index uses for search
49
+ MetricType metric_type;
50
+
51
+ explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
52
+ : d(d),
53
+ code_size(d / 8),
54
+ ntotal(0),
55
+ verbose(false),
56
+ is_trained(true),
57
+ metric_type(metric) {
59
58
  FAISS_THROW_IF_NOT(d % 8 == 0);
60
- }
61
-
62
- virtual ~IndexBinary();
63
-
64
-
65
- /** Perform training on a representative set of vectors.
66
- *
67
- * @param n nb of training vectors
68
- * @param x training vecors, size n * d / 8
69
- */
70
- virtual void train(idx_t n, const uint8_t *x);
71
-
72
- /** Add n vectors of dimension d to the index.
73
- *
74
- * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
75
- * @param x input matrix, size n * d / 8
76
- */
77
- virtual void add(idx_t n, const uint8_t *x) = 0;
78
-
79
- /** Same as add, but stores xids instead of sequential ids.
80
- *
81
- * The default implementation fails with an assertion, as it is
82
- * not supported by all indexes.
83
- *
84
- * @param xids if non-null, ids to store for the vectors (size n)
85
- */
86
- virtual void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids);
87
-
88
- /** Query n vectors of dimension d to the index.
89
- *
90
- * return at most k vectors. If there are not enough results for a
91
- * query, the result array is padded with -1s.
92
- *
93
- * @param x input vectors to search, size n * d / 8
94
- * @param labels output labels of the NNs, size n*k
95
- * @param distances output pairwise distances, size n*k
96
- */
97
- virtual void search(idx_t n, const uint8_t *x, idx_t k,
98
- int32_t *distances, idx_t *labels) const = 0;
99
-
100
- /** Query n vectors of dimension d to the index.
101
- *
102
- * return all vectors with distance < radius. Note that many indexes
103
- * do not implement the range_search (only the k-NN search is
104
- * mandatory). The distances are converted to float to reuse the
105
- * RangeSearchResult structure, but they are integer. By convention,
106
- * only distances < radius (strict comparison) are returned,
107
- * ie. radius = 0 does not return any result and 1 returns only
108
- * exact same vectors.
109
- *
110
- * @param x input vectors to search, size n * d / 8
111
- * @param radius search radius
112
- * @param result result table
113
- */
114
- virtual void range_search(idx_t n, const uint8_t *x, int radius,
115
- RangeSearchResult *result) const;
116
-
117
- /** Return the indexes of the k vectors closest to the query x.
118
- *
119
- * This function is identical to search but only returns labels of neighbors.
120
- * @param x input vectors to search, size n * d / 8
121
- * @param labels output labels of the NNs, size n*k
122
- */
123
- void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1) const;
124
-
125
- /// Removes all elements from the database.
126
- virtual void reset() = 0;
127
-
128
- /** Removes IDs from the index. Not supported by all indexes.
129
- */
130
- virtual size_t remove_ids(const IDSelector& sel);
131
-
132
- /** Reconstruct a stored vector.
133
- *
134
- * This function may not be defined for some indexes.
135
- * @param key id of the vector to reconstruct
136
- * @param recons reconstucted vector (size d / 8)
137
- */
138
- virtual void reconstruct(idx_t key, uint8_t *recons) const;
139
-
140
-
141
- /** Reconstruct vectors i0 to i0 + ni - 1.
142
- *
143
- * This function may not be defined for some indexes.
144
- * @param recons reconstucted vectors (size ni * d / 8)
145
- */
146
- virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const;
147
-
148
- /** Similar to search, but also reconstructs the stored vectors (or an
149
- * approximation in the case of lossy coding) for the search results.
150
- *
151
- * If there are not enough results for a query, the resulting array
152
- * is padded with -1s.
153
- *
154
- * @param recons reconstructed vectors size (n, k, d)
155
- **/
156
- virtual void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
157
- int32_t *distances, idx_t *labels,
158
- uint8_t *recons) const;
159
-
160
- /** Display the actual class name and some more info. */
161
- void display() const;
59
+ }
60
+
61
+ virtual ~IndexBinary();
62
+
63
+ /** Perform training on a representative set of vectors.
64
+ *
65
+ * @param n nb of training vectors
66
+ * @param x training vecors, size n * d / 8
67
+ */
68
+ virtual void train(idx_t n, const uint8_t* x);
69
+
70
+ /** Add n vectors of dimension d to the index.
71
+ *
72
+ * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
73
+ * @param x input matrix, size n * d / 8
74
+ */
75
+ virtual void add(idx_t n, const uint8_t* x) = 0;
76
+
77
+ /** Same as add, but stores xids instead of sequential ids.
78
+ *
79
+ * The default implementation fails with an assertion, as it is
80
+ * not supported by all indexes.
81
+ *
82
+ * @param xids if non-null, ids to store for the vectors (size n)
83
+ */
84
+ virtual void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids);
85
+
86
+ /** Query n vectors of dimension d to the index.
87
+ *
88
+ * return at most k vectors. If there are not enough results for a
89
+ * query, the result array is padded with -1s.
90
+ *
91
+ * @param x input vectors to search, size n * d / 8
92
+ * @param labels output labels of the NNs, size n*k
93
+ * @param distances output pairwise distances, size n*k
94
+ */
95
+ virtual void search(
96
+ idx_t n,
97
+ const uint8_t* x,
98
+ idx_t k,
99
+ int32_t* distances,
100
+ idx_t* labels) const = 0;
101
+
102
+ /** Query n vectors of dimension d to the index.
103
+ *
104
+ * return all vectors with distance < radius. Note that many indexes
105
+ * do not implement the range_search (only the k-NN search is
106
+ * mandatory). The distances are converted to float to reuse the
107
+ * RangeSearchResult structure, but they are integer. By convention,
108
+ * only distances < radius (strict comparison) are returned,
109
+ * ie. radius = 0 does not return any result and 1 returns only
110
+ * exact same vectors.
111
+ *
112
+ * @param x input vectors to search, size n * d / 8
113
+ * @param radius search radius
114
+ * @param result result table
115
+ */
116
+ virtual void range_search(
117
+ idx_t n,
118
+ const uint8_t* x,
119
+ int radius,
120
+ RangeSearchResult* result) const;
121
+
122
+ /** Return the indexes of the k vectors closest to the query x.
123
+ *
124
+ * This function is identical to search but only returns labels of
125
+ * neighbors.
126
+ * @param x input vectors to search, size n * d / 8
127
+ * @param labels output labels of the NNs, size n*k
128
+ */
129
+ void assign(idx_t n, const uint8_t* x, idx_t* labels, idx_t k = 1) const;
130
+
131
+ /// Removes all elements from the database.
132
+ virtual void reset() = 0;
133
+
134
+ /** Removes IDs from the index. Not supported by all indexes.
135
+ */
136
+ virtual size_t remove_ids(const IDSelector& sel);
137
+
138
+ /** Reconstruct a stored vector.
139
+ *
140
+ * This function may not be defined for some indexes.
141
+ * @param key id of the vector to reconstruct
142
+ * @param recons reconstucted vector (size d / 8)
143
+ */
144
+ virtual void reconstruct(idx_t key, uint8_t* recons) const;
145
+
146
+ /** Reconstruct vectors i0 to i0 + ni - 1.
147
+ *
148
+ * This function may not be defined for some indexes.
149
+ * @param recons reconstucted vectors (size ni * d / 8)
150
+ */
151
+ virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const;
152
+
153
+ /** Similar to search, but also reconstructs the stored vectors (or an
154
+ * approximation in the case of lossy coding) for the search results.
155
+ *
156
+ * If there are not enough results for a query, the resulting array
157
+ * is padded with -1s.
158
+ *
159
+ * @param recons reconstructed vectors size (n, k, d)
160
+ **/
161
+ virtual void search_and_reconstruct(
162
+ idx_t n,
163
+ const uint8_t* x,
164
+ idx_t k,
165
+ int32_t* distances,
166
+ idx_t* labels,
167
+ uint8_t* recons) const;
168
+
169
+ /** Display the actual class name and some more info. */
170
+ void display() const;
162
171
  };
163
172
 
173
+ } // namespace faiss
164
174
 
165
- } // namespace faiss
166
-
167
- #endif // FAISS_INDEX_BINARY_H
175
+ #endif // FAISS_INDEX_BINARY_H