faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -17,61 +17,69 @@ namespace faiss {
17
17
 
18
18
  IndexBinary::~IndexBinary() {}
19
19
 
20
- void IndexBinary::train(idx_t, const uint8_t *) {
21
- // Does nothing by default.
20
+ void IndexBinary::train(idx_t, const uint8_t*) {
21
+ // Does nothing by default.
22
22
  }
23
23
 
24
- void IndexBinary::range_search(idx_t, const uint8_t *, int,
25
- RangeSearchResult *) const {
26
- FAISS_THROW_MSG("range search not implemented");
24
+ void IndexBinary::range_search(idx_t, const uint8_t*, int, RangeSearchResult*)
25
+ const {
26
+ FAISS_THROW_MSG("range search not implemented");
27
27
  }
28
28
 
29
- void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) const {
30
- std::vector<int> distances(n * k);
31
- search(n, x, k, distances.data(), labels);
29
+ void IndexBinary::assign(idx_t n, const uint8_t* x, idx_t* labels, idx_t k)
30
+ const {
31
+ std::vector<int> distances(n * k);
32
+ search(n, x, k, distances.data(), labels);
32
33
  }
33
34
 
34
- void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
35
- FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
35
+ void IndexBinary::add_with_ids(idx_t, const uint8_t*, const idx_t*) {
36
+ FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
36
37
  }
37
38
 
38
39
  size_t IndexBinary::remove_ids(const IDSelector&) {
39
- FAISS_THROW_MSG("remove_ids not implemented for this type of index");
40
- return 0;
40
+ FAISS_THROW_MSG("remove_ids not implemented for this type of index");
41
+ return 0;
41
42
  }
42
43
 
43
- void IndexBinary::reconstruct(idx_t, uint8_t *) const {
44
- FAISS_THROW_MSG("reconstruct not implemented for this type of index");
44
+ void IndexBinary::reconstruct(idx_t, uint8_t*) const {
45
+ FAISS_THROW_MSG("reconstruct not implemented for this type of index");
45
46
  }
46
47
 
47
- void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
48
- for (idx_t i = 0; i < ni; i++) {
49
- reconstruct(i0 + i, recons + i * d);
50
- }
48
+ void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const {
49
+ for (idx_t i = 0; i < ni; i++) {
50
+ reconstruct(i0 + i, recons + i * d);
51
+ }
51
52
  }
52
53
 
53
- void IndexBinary::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
54
- int32_t *distances, idx_t *labels,
55
- uint8_t *recons) const {
56
- search(n, x, k, distances, labels);
57
- for (idx_t i = 0; i < n; ++i) {
58
- for (idx_t j = 0; j < k; ++j) {
59
- idx_t ij = i * k + j;
60
- idx_t key = labels[ij];
61
- uint8_t *reconstructed = recons + ij * d;
62
- if (key < 0) {
63
- // Fill with NaNs
64
- memset(reconstructed, -1, sizeof(*reconstructed) * d);
65
- } else {
66
- reconstruct(key, reconstructed);
67
- }
54
+ void IndexBinary::search_and_reconstruct(
55
+ idx_t n,
56
+ const uint8_t* x,
57
+ idx_t k,
58
+ int32_t* distances,
59
+ idx_t* labels,
60
+ uint8_t* recons) const {
61
+ FAISS_THROW_IF_NOT(k > 0);
62
+
63
+ search(n, x, k, distances, labels);
64
+ for (idx_t i = 0; i < n; ++i) {
65
+ for (idx_t j = 0; j < k; ++j) {
66
+ idx_t ij = i * k + j;
67
+ idx_t key = labels[ij];
68
+ uint8_t* reconstructed = recons + ij * d;
69
+ if (key < 0) {
70
+ // Fill with NaNs
71
+ memset(reconstructed, -1, sizeof(*reconstructed) * d);
72
+ } else {
73
+ reconstruct(key, reconstructed);
74
+ }
75
+ }
68
76
  }
69
- }
70
77
  }
71
78
 
72
79
  void IndexBinary::display() const {
73
- printf("Index: %s -> %" PRId64 " elements\n", typeid (*this).name(), ntotal);
80
+ printf("Index: %s -> %" PRId64 " elements\n",
81
+ typeid(*this).name(),
82
+ ntotal);
74
83
  }
75
84
 
76
-
77
- } // namespace faiss
85
+ } // namespace faiss
@@ -11,17 +11,15 @@
11
11
  #define FAISS_INDEX_BINARY_H
12
12
 
13
13
  #include <cstdio>
14
- #include <typeinfo>
15
- #include <string>
16
14
  #include <sstream>
15
+ #include <string>
16
+ #include <typeinfo>
17
17
 
18
- #include <faiss/impl/FaissAssert.h>
19
18
  #include <faiss/Index.h>
20
-
19
+ #include <faiss/impl/FaissAssert.h>
21
20
 
22
21
  namespace faiss {
23
22
 
24
-
25
23
  /// Forward declarations see AuxIndexStructures.h
26
24
  struct IDSelector;
27
25
  struct RangeSearchResult;
@@ -34,134 +32,144 @@ struct RangeSearchResult;
34
32
  * vectors.
35
33
  */
36
34
  struct IndexBinary {
37
- using idx_t = Index::idx_t; ///< all indices are this type
38
- using component_t = uint8_t;
39
- using distance_t = int32_t;
40
-
41
- int d; ///< vector dimension
42
- int code_size; ///< number of bytes per vector ( = d / 8 )
43
- idx_t ntotal; ///< total nb of indexed vectors
44
- bool verbose; ///< verbosity level
45
-
46
- /// set if the Index does not require training, or if training is done already
47
- bool is_trained;
48
-
49
- /// type of metric this index uses for search
50
- MetricType metric_type;
51
-
52
- explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
53
- : d(d),
54
- code_size(d / 8),
55
- ntotal(0),
56
- verbose(false),
57
- is_trained(true),
58
- metric_type(metric) {
35
+ using idx_t = Index::idx_t; ///< all indices are this type
36
+ using component_t = uint8_t;
37
+ using distance_t = int32_t;
38
+
39
+ int d; ///< vector dimension
40
+ int code_size; ///< number of bytes per vector ( = d / 8 )
41
+ idx_t ntotal; ///< total nb of indexed vectors
42
+ bool verbose; ///< verbosity level
43
+
44
+ /// set if the Index does not require training, or if training is done
45
+ /// already
46
+ bool is_trained;
47
+
48
+ /// type of metric this index uses for search
49
+ MetricType metric_type;
50
+
51
+ explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
52
+ : d(d),
53
+ code_size(d / 8),
54
+ ntotal(0),
55
+ verbose(false),
56
+ is_trained(true),
57
+ metric_type(metric) {
59
58
  FAISS_THROW_IF_NOT(d % 8 == 0);
60
- }
61
-
62
- virtual ~IndexBinary();
63
-
64
-
65
- /** Perform training on a representative set of vectors.
66
- *
67
- * @param n nb of training vectors
68
- * @param x training vecors, size n * d / 8
69
- */
70
- virtual void train(idx_t n, const uint8_t *x);
71
-
72
- /** Add n vectors of dimension d to the index.
73
- *
74
- * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
75
- * @param x input matrix, size n * d / 8
76
- */
77
- virtual void add(idx_t n, const uint8_t *x) = 0;
78
-
79
- /** Same as add, but stores xids instead of sequential ids.
80
- *
81
- * The default implementation fails with an assertion, as it is
82
- * not supported by all indexes.
83
- *
84
- * @param xids if non-null, ids to store for the vectors (size n)
85
- */
86
- virtual void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids);
87
-
88
- /** Query n vectors of dimension d to the index.
89
- *
90
- * return at most k vectors. If there are not enough results for a
91
- * query, the result array is padded with -1s.
92
- *
93
- * @param x input vectors to search, size n * d / 8
94
- * @param labels output labels of the NNs, size n*k
95
- * @param distances output pairwise distances, size n*k
96
- */
97
- virtual void search(idx_t n, const uint8_t *x, idx_t k,
98
- int32_t *distances, idx_t *labels) const = 0;
99
-
100
- /** Query n vectors of dimension d to the index.
101
- *
102
- * return all vectors with distance < radius. Note that many indexes
103
- * do not implement the range_search (only the k-NN search is
104
- * mandatory). The distances are converted to float to reuse the
105
- * RangeSearchResult structure, but they are integer. By convention,
106
- * only distances < radius (strict comparison) are returned,
107
- * ie. radius = 0 does not return any result and 1 returns only
108
- * exact same vectors.
109
- *
110
- * @param x input vectors to search, size n * d / 8
111
- * @param radius search radius
112
- * @param result result table
113
- */
114
- virtual void range_search(idx_t n, const uint8_t *x, int radius,
115
- RangeSearchResult *result) const;
116
-
117
- /** Return the indexes of the k vectors closest to the query x.
118
- *
119
- * This function is identical to search but only returns labels of neighbors.
120
- * @param x input vectors to search, size n * d / 8
121
- * @param labels output labels of the NNs, size n*k
122
- */
123
- void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1) const;
124
-
125
- /// Removes all elements from the database.
126
- virtual void reset() = 0;
127
-
128
- /** Removes IDs from the index. Not supported by all indexes.
129
- */
130
- virtual size_t remove_ids(const IDSelector& sel);
131
-
132
- /** Reconstruct a stored vector.
133
- *
134
- * This function may not be defined for some indexes.
135
- * @param key id of the vector to reconstruct
136
- * @param recons reconstucted vector (size d / 8)
137
- */
138
- virtual void reconstruct(idx_t key, uint8_t *recons) const;
139
-
140
-
141
- /** Reconstruct vectors i0 to i0 + ni - 1.
142
- *
143
- * This function may not be defined for some indexes.
144
- * @param recons reconstucted vectors (size ni * d / 8)
145
- */
146
- virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const;
147
-
148
- /** Similar to search, but also reconstructs the stored vectors (or an
149
- * approximation in the case of lossy coding) for the search results.
150
- *
151
- * If there are not enough results for a query, the resulting array
152
- * is padded with -1s.
153
- *
154
- * @param recons reconstructed vectors size (n, k, d)
155
- **/
156
- virtual void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
157
- int32_t *distances, idx_t *labels,
158
- uint8_t *recons) const;
159
-
160
- /** Display the actual class name and some more info. */
161
- void display() const;
59
+ }
60
+
61
+ virtual ~IndexBinary();
62
+
63
+ /** Perform training on a representative set of vectors.
64
+ *
65
+ * @param n nb of training vectors
66
+ * @param x training vecors, size n * d / 8
67
+ */
68
+ virtual void train(idx_t n, const uint8_t* x);
69
+
70
+ /** Add n vectors of dimension d to the index.
71
+ *
72
+ * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
73
+ * @param x input matrix, size n * d / 8
74
+ */
75
+ virtual void add(idx_t n, const uint8_t* x) = 0;
76
+
77
+ /** Same as add, but stores xids instead of sequential ids.
78
+ *
79
+ * The default implementation fails with an assertion, as it is
80
+ * not supported by all indexes.
81
+ *
82
+ * @param xids if non-null, ids to store for the vectors (size n)
83
+ */
84
+ virtual void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids);
85
+
86
+ /** Query n vectors of dimension d to the index.
87
+ *
88
+ * return at most k vectors. If there are not enough results for a
89
+ * query, the result array is padded with -1s.
90
+ *
91
+ * @param x input vectors to search, size n * d / 8
92
+ * @param labels output labels of the NNs, size n*k
93
+ * @param distances output pairwise distances, size n*k
94
+ */
95
+ virtual void search(
96
+ idx_t n,
97
+ const uint8_t* x,
98
+ idx_t k,
99
+ int32_t* distances,
100
+ idx_t* labels) const = 0;
101
+
102
+ /** Query n vectors of dimension d to the index.
103
+ *
104
+ * return all vectors with distance < radius. Note that many indexes
105
+ * do not implement the range_search (only the k-NN search is
106
+ * mandatory). The distances are converted to float to reuse the
107
+ * RangeSearchResult structure, but they are integer. By convention,
108
+ * only distances < radius (strict comparison) are returned,
109
+ * ie. radius = 0 does not return any result and 1 returns only
110
+ * exact same vectors.
111
+ *
112
+ * @param x input vectors to search, size n * d / 8
113
+ * @param radius search radius
114
+ * @param result result table
115
+ */
116
+ virtual void range_search(
117
+ idx_t n,
118
+ const uint8_t* x,
119
+ int radius,
120
+ RangeSearchResult* result) const;
121
+
122
+ /** Return the indexes of the k vectors closest to the query x.
123
+ *
124
+ * This function is identical to search but only returns labels of
125
+ * neighbors.
126
+ * @param x input vectors to search, size n * d / 8
127
+ * @param labels output labels of the NNs, size n*k
128
+ */
129
+ void assign(idx_t n, const uint8_t* x, idx_t* labels, idx_t k = 1) const;
130
+
131
+ /// Removes all elements from the database.
132
+ virtual void reset() = 0;
133
+
134
+ /** Removes IDs from the index. Not supported by all indexes.
135
+ */
136
+ virtual size_t remove_ids(const IDSelector& sel);
137
+
138
+ /** Reconstruct a stored vector.
139
+ *
140
+ * This function may not be defined for some indexes.
141
+ * @param key id of the vector to reconstruct
142
+ * @param recons reconstucted vector (size d / 8)
143
+ */
144
+ virtual void reconstruct(idx_t key, uint8_t* recons) const;
145
+
146
+ /** Reconstruct vectors i0 to i0 + ni - 1.
147
+ *
148
+ * This function may not be defined for some indexes.
149
+ * @param recons reconstucted vectors (size ni * d / 8)
150
+ */
151
+ virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const;
152
+
153
+ /** Similar to search, but also reconstructs the stored vectors (or an
154
+ * approximation in the case of lossy coding) for the search results.
155
+ *
156
+ * If there are not enough results for a query, the resulting array
157
+ * is padded with -1s.
158
+ *
159
+ * @param recons reconstructed vectors size (n, k, d)
160
+ **/
161
+ virtual void search_and_reconstruct(
162
+ idx_t n,
163
+ const uint8_t* x,
164
+ idx_t k,
165
+ int32_t* distances,
166
+ idx_t* labels,
167
+ uint8_t* recons) const;
168
+
169
+ /** Display the actual class name and some more info. */
170
+ void display() const;
162
171
  };
163
172
 
173
+ } // namespace faiss
164
174
 
165
- } // namespace faiss
166
-
167
- #endif // FAISS_INDEX_BINARY_H
175
+ #endif // FAISS_INDEX_BINARY_H
@@ -9,80 +9,100 @@
9
9
 
10
10
  #include <faiss/IndexBinaryFlat.h>
11
11
 
12
- #include <cstring>
12
+ #include <faiss/impl/AuxIndexStructures.h>
13
+ #include <faiss/impl/FaissAssert.h>
14
+ #include <faiss/utils/Heap.h>
13
15
  #include <faiss/utils/hamming.h>
14
16
  #include <faiss/utils/utils.h>
15
- #include <faiss/utils/Heap.h>
16
- #include <faiss/impl/FaissAssert.h>
17
- #include <faiss/impl/AuxIndexStructures.h>
17
+ #include <cstring>
18
18
 
19
19
  namespace faiss {
20
20
 
21
- IndexBinaryFlat::IndexBinaryFlat(idx_t d)
22
- : IndexBinary(d) {}
21
+ IndexBinaryFlat::IndexBinaryFlat(idx_t d) : IndexBinary(d) {}
23
22
 
24
- void IndexBinaryFlat::add(idx_t n, const uint8_t *x) {
25
- xb.insert(xb.end(), x, x + n * code_size);
26
- ntotal += n;
23
+ void IndexBinaryFlat::add(idx_t n, const uint8_t* x) {
24
+ xb.insert(xb.end(), x, x + n * code_size);
25
+ ntotal += n;
27
26
  }
28
27
 
29
28
  void IndexBinaryFlat::reset() {
30
- xb.clear();
31
- ntotal = 0;
29
+ xb.clear();
30
+ ntotal = 0;
32
31
  }
33
32
 
34
- void IndexBinaryFlat::search(idx_t n, const uint8_t *x, idx_t k,
35
- int32_t *distances, idx_t *labels) const {
36
- const idx_t block_size = query_batch_size;
37
- for (idx_t s = 0; s < n; s += block_size) {
38
- idx_t nn = block_size;
39
- if (s + block_size > n) {
40
- nn = n - s;
41
- }
33
+ void IndexBinaryFlat::search(
34
+ idx_t n,
35
+ const uint8_t* x,
36
+ idx_t k,
37
+ int32_t* distances,
38
+ idx_t* labels) const {
39
+ FAISS_THROW_IF_NOT(k > 0);
42
40
 
43
- if (use_heap) {
44
- // We see the distances and labels as heaps.
45
- int_maxheap_array_t res = {
46
- size_t(nn), size_t(k), labels + s * k, distances + s * k
47
- };
41
+ const idx_t block_size = query_batch_size;
42
+ for (idx_t s = 0; s < n; s += block_size) {
43
+ idx_t nn = block_size;
44
+ if (s + block_size > n) {
45
+ nn = n - s;
46
+ }
48
47
 
49
- hammings_knn_hc(&res, x + s * code_size, xb.data(), ntotal, code_size,
50
- /* ordered = */ true);
51
- } else {
52
- hammings_knn_mc(x + s * code_size, xb.data(), nn, ntotal, k, code_size,
53
- distances + s * k, labels + s * k);
48
+ if (use_heap) {
49
+ // We see the distances and labels as heaps.
50
+ int_maxheap_array_t res = {
51
+ size_t(nn), size_t(k), labels + s * k, distances + s * k};
52
+
53
+ hammings_knn_hc(
54
+ &res,
55
+ x + s * code_size,
56
+ xb.data(),
57
+ ntotal,
58
+ code_size,
59
+ /* ordered = */ true);
60
+ } else {
61
+ hammings_knn_mc(
62
+ x + s * code_size,
63
+ xb.data(),
64
+ nn,
65
+ ntotal,
66
+ k,
67
+ code_size,
68
+ distances + s * k,
69
+ labels + s * k);
70
+ }
54
71
  }
55
- }
56
72
  }
57
73
 
58
74
  size_t IndexBinaryFlat::remove_ids(const IDSelector& sel) {
59
- idx_t j = 0;
60
- for (idx_t i = 0; i < ntotal; i++) {
61
- if (sel.is_member(i)) {
62
- // should be removed
63
- } else {
64
- if (i > j) {
65
- memmove(&xb[code_size * j], &xb[code_size * i], sizeof(xb[0]) * code_size);
66
- }
67
- j++;
75
+ idx_t j = 0;
76
+ for (idx_t i = 0; i < ntotal; i++) {
77
+ if (sel.is_member(i)) {
78
+ // should be removed
79
+ } else {
80
+ if (i > j) {
81
+ memmove(&xb[code_size * j],
82
+ &xb[code_size * i],
83
+ sizeof(xb[0]) * code_size);
84
+ }
85
+ j++;
86
+ }
87
+ }
88
+ long nremove = ntotal - j;
89
+ if (nremove > 0) {
90
+ ntotal = j;
91
+ xb.resize(ntotal * code_size);
68
92
  }
69
- }
70
- long nremove = ntotal - j;
71
- if (nremove > 0) {
72
- ntotal = j;
73
- xb.resize(ntotal * code_size);
74
- }
75
- return nremove;
93
+ return nremove;
76
94
  }
77
95
 
78
- void IndexBinaryFlat::reconstruct(idx_t key, uint8_t *recons) const {
79
- memcpy(recons, &(xb[code_size * key]), sizeof(*recons) * code_size);
96
+ void IndexBinaryFlat::reconstruct(idx_t key, uint8_t* recons) const {
97
+ memcpy(recons, &(xb[code_size * key]), sizeof(*recons) * code_size);
80
98
  }
81
99
 
82
- void IndexBinaryFlat::range_search(idx_t n, const uint8_t *x, int radius,
83
- RangeSearchResult *result) const
84
- {
85
- hamming_range_search (x, xb.data(), n, ntotal, radius, code_size, result);
100
+ void IndexBinaryFlat::range_search(
101
+ idx_t n,
102
+ const uint8_t* x,
103
+ int radius,
104
+ RangeSearchResult* result) const {
105
+ hamming_range_search(x, xb.data(), n, ntotal, radius, code_size, result);
86
106
  }
87
107
 
88
- } // namespace faiss
108
+ } // namespace faiss
@@ -16,42 +16,47 @@
16
16
 
17
17
  namespace faiss {
18
18
 
19
-
20
19
  /** Index that stores the full vectors and performs exhaustive search. */
21
20
  struct IndexBinaryFlat : IndexBinary {
22
- /// database vectors, size ntotal * d / 8
23
- std::vector<uint8_t> xb;
21
+ /// database vectors, size ntotal * d / 8
22
+ std::vector<uint8_t> xb;
24
23
 
25
- /** Select between using a heap or counting to select the k smallest values
26
- * when scanning inverted lists.
27
- */
28
- bool use_heap = true;
24
+ /** Select between using a heap or counting to select the k smallest values
25
+ * when scanning inverted lists.
26
+ */
27
+ bool use_heap = true;
29
28
 
30
- size_t query_batch_size = 32;
29
+ size_t query_batch_size = 32;
31
30
 
32
- explicit IndexBinaryFlat(idx_t d);
31
+ explicit IndexBinaryFlat(idx_t d);
33
32
 
34
- void add(idx_t n, const uint8_t *x) override;
33
+ void add(idx_t n, const uint8_t* x) override;
35
34
 
36
- void reset() override;
35
+ void reset() override;
37
36
 
38
- void search(idx_t n, const uint8_t *x, idx_t k,
39
- int32_t *distances, idx_t *labels) const override;
37
+ void search(
38
+ idx_t n,
39
+ const uint8_t* x,
40
+ idx_t k,
41
+ int32_t* distances,
42
+ idx_t* labels) const override;
40
43
 
41
- void range_search(idx_t n, const uint8_t *x, int radius,
42
- RangeSearchResult *result) const override;
44
+ void range_search(
45
+ idx_t n,
46
+ const uint8_t* x,
47
+ int radius,
48
+ RangeSearchResult* result) const override;
43
49
 
44
- void reconstruct(idx_t key, uint8_t *recons) const override;
50
+ void reconstruct(idx_t key, uint8_t* recons) const override;
45
51
 
46
- /** Remove some ids. Note that because of the indexing structure,
47
- * the semantics of this operation are different from the usual ones:
48
- * the new ids are shifted. */
49
- size_t remove_ids(const IDSelector& sel) override;
52
+ /** Remove some ids. Note that because of the indexing structure,
53
+ * the semantics of this operation are different from the usual ones:
54
+ * the new ids are shifted. */
55
+ size_t remove_ids(const IDSelector& sel) override;
50
56
 
51
- IndexBinaryFlat() {}
57
+ IndexBinaryFlat() {}
52
58
  };
53
59
 
60
+ } // namespace faiss
54
61
 
55
- } // namespace faiss
56
-
57
- #endif // INDEX_BINARY_FLAT_H
62
+ #endif // INDEX_BINARY_FLAT_H