faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -10,15 +10,13 @@
10
10
  #ifndef FAISS_INDEX_BINARY_IVF_H
11
11
  #define FAISS_INDEX_BINARY_IVF_H
12
12
 
13
-
14
13
  #include <vector>
15
14
 
15
+ #include <faiss/Clustering.h>
16
16
  #include <faiss/IndexBinary.h>
17
17
  #include <faiss/IndexIVF.h>
18
- #include <faiss/Clustering.h>
19
18
  #include <faiss/utils/Heap.h>
20
19
 
21
-
22
20
  namespace faiss {
23
21
 
24
22
  struct BinaryInvertedListScanner;
@@ -34,11 +32,11 @@ struct BinaryInvertedListScanner;
34
32
  */
35
33
  struct IndexBinaryIVF : IndexBinary {
36
34
  /// Access to the actual data
37
- InvertedLists *invlists;
35
+ InvertedLists* invlists;
38
36
  bool own_invlists;
39
37
 
40
- size_t nprobe; ///< number of probes at query time
41
- size_t max_codes; ///< max nb of codes to visit to do a query
38
+ size_t nprobe; ///< number of probes at query time
39
+ size_t max_codes; ///< max nb of codes to visit to do a query
42
40
 
43
41
  /** Select between using a heap or counting to select the k smallest values
44
42
  * when scanning inverted lists.
@@ -48,20 +46,20 @@ struct IndexBinaryIVF : IndexBinary {
48
46
  /// map for direct access to the elements. Enables reconstruct().
49
47
  DirectMap direct_map;
50
48
 
51
- IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
52
- size_t nlist; ///< number of possible key values
49
+ IndexBinary* quantizer; ///< quantizer that maps vectors to inverted lists
50
+ size_t nlist; ///< number of possible key values
53
51
 
54
- bool own_fields; ///< whether object owns the quantizer
52
+ bool own_fields; ///< whether object owns the quantizer
55
53
 
56
54
  ClusteringParameters cp; ///< to override default clustering params
57
- Index *clustering_index; ///< to override index used during clustering
55
+ Index* clustering_index; ///< to override index used during clustering
58
56
 
59
57
  /** The Inverted file takes a quantizer (an IndexBinary) on input,
60
58
  * which implements the function mapping a vector to a list
61
59
  * identifier. The pointer is borrowed: the quantizer should not
62
60
  * be deleted while the IndexBinaryIVF is in use.
63
61
  */
64
- IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
62
+ IndexBinaryIVF(IndexBinary* quantizer, size_t d, size_t nlist);
65
63
 
66
64
  IndexBinaryIVF();
67
65
 
@@ -70,15 +68,23 @@ struct IndexBinaryIVF : IndexBinary {
70
68
  void reset() override;
71
69
 
72
70
  /// Trains the quantizer
73
- void train(idx_t n, const uint8_t *x) override;
71
+ void train(idx_t n, const uint8_t* x) override;
74
72
 
75
- void add(idx_t n, const uint8_t *x) override;
73
+ void add(idx_t n, const uint8_t* x) override;
76
74
 
77
- void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
75
+ void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids) override;
78
76
 
79
- /// same as add_with_ids, with precomputed coarse quantizer
80
- void add_core (idx_t n, const uint8_t * x, const idx_t *xids,
81
- const idx_t *precomputed_idx);
77
+ /** Implementation of vector addition where the vector assignments are
78
+ * predefined.
79
+ *
80
+ * @param precomputed_idx quantization indices for the input vectors
81
+ * (size n)
82
+ */
83
+ void add_core(
84
+ idx_t n,
85
+ const uint8_t* x,
86
+ const idx_t* xids,
87
+ const idx_t* precomputed_idx);
82
88
 
83
89
  /** Search a set of vectors, that are pre-quantized by the IVF
84
90
  * quantizer. Fill in the corresponding heaps with the query
@@ -97,25 +103,43 @@ struct IndexBinaryIVF : IndexBinary {
97
103
  * instead of ids (used for reranking).
98
104
  * @param params used to override the object's search parameters
99
105
  */
100
- void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
101
- const idx_t *assign,
102
- const int32_t *centroid_dis,
103
- int32_t *distances, idx_t *labels,
104
- bool store_pairs,
105
- const IVFSearchParameters *params=nullptr
106
- ) const;
107
-
108
- virtual BinaryInvertedListScanner *get_InvertedListScanner (
109
- bool store_pairs=false) const;
106
+ void search_preassigned(
107
+ idx_t n,
108
+ const uint8_t* x,
109
+ idx_t k,
110
+ const idx_t* assign,
111
+ const int32_t* centroid_dis,
112
+ int32_t* distances,
113
+ idx_t* labels,
114
+ bool store_pairs,
115
+ const IVFSearchParameters* params = nullptr) const;
116
+
117
+ virtual BinaryInvertedListScanner* get_InvertedListScanner(
118
+ bool store_pairs = false) const;
110
119
 
111
120
  /** assign the vectors, then call search_preassign */
112
- void search(idx_t n, const uint8_t *x, idx_t k,
113
- int32_t *distances, idx_t *labels) const override;
114
-
115
- void range_search(idx_t n, const uint8_t *x, int radius,
116
- RangeSearchResult *result) const override;
117
-
118
- void reconstruct(idx_t key, uint8_t *recons) const override;
121
+ void search(
122
+ idx_t n,
123
+ const uint8_t* x,
124
+ idx_t k,
125
+ int32_t* distances,
126
+ idx_t* labels) const override;
127
+
128
+ void range_search(
129
+ idx_t n,
130
+ const uint8_t* x,
131
+ int radius,
132
+ RangeSearchResult* result) const override;
133
+
134
+ void range_search_preassigned(
135
+ idx_t n,
136
+ const uint8_t* x,
137
+ int radius,
138
+ const idx_t* assign,
139
+ const int32_t* centroid_dis,
140
+ RangeSearchResult* result) const;
141
+
142
+ void reconstruct(idx_t key, uint8_t* recons) const override;
119
143
 
120
144
  /** Reconstruct a subset of the indexed vectors.
121
145
  *
@@ -126,7 +150,7 @@ struct IndexBinaryIVF : IndexBinary {
126
150
  * @param ni nb of vectors to reconstruct
127
151
  * @param recons output array of reconstructed vectors, size ni * d / 8
128
152
  */
129
- void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
153
+ void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const override;
130
154
 
131
155
  /** Similar to search, but also reconstructs the stored vectors (or an
132
156
  * approximation in the case of lossy coding) for the search results.
@@ -137,9 +161,13 @@ struct IndexBinaryIVF : IndexBinary {
137
161
  *
138
162
  * @param recons reconstructed vectors size (n, k, d / 8)
139
163
  */
140
- void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
141
- int32_t *distances, idx_t *labels,
142
- uint8_t *recons) const override;
164
+ void search_and_reconstruct(
165
+ idx_t n,
166
+ const uint8_t* x,
167
+ idx_t k,
168
+ int32_t* distances,
169
+ idx_t* labels,
170
+ uint8_t* recons) const override;
143
171
 
144
172
  /** Reconstruct a vector given the location in terms of (inv list index +
145
173
  * inv list offset) instead of the id.
@@ -148,9 +176,10 @@ struct IndexBinaryIVF : IndexBinary {
148
176
  * the inv list offset is computed by search_preassigned() with
149
177
  * `store_pairs` set.
150
178
  */
151
- virtual void reconstruct_from_offset(idx_t list_no, idx_t offset,
152
- uint8_t* recons) const;
153
-
179
+ virtual void reconstruct_from_offset(
180
+ idx_t list_no,
181
+ idx_t offset,
182
+ uint8_t* recons) const;
154
183
 
155
184
  /// Dataset manipulation functions
156
185
  size_t remove_ids(const IDSelector& sel) override;
@@ -160,34 +189,33 @@ struct IndexBinaryIVF : IndexBinary {
160
189
  * sequential ids, this would be this->ntotal */
161
190
  virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
162
191
 
163
- size_t get_list_size(size_t list_no) const
164
- { return invlists->list_size(list_no); }
192
+ size_t get_list_size(size_t list_no) const {
193
+ return invlists->list_size(list_no);
194
+ }
165
195
 
166
196
  /** intialize a direct map
167
197
  *
168
198
  * @param new_maintain_direct_map if true, create a direct map,
169
199
  * else clear it
170
200
  */
171
- void make_direct_map(bool new_maintain_direct_map=true);
201
+ void make_direct_map(bool new_maintain_direct_map = true);
172
202
 
173
- void set_direct_map_type (DirectMap::Type type);
203
+ void set_direct_map_type(DirectMap::Type type);
174
204
 
175
- void replace_invlists(InvertedLists *il, bool own=false);
205
+ void replace_invlists(InvertedLists* il, bool own = false);
176
206
  };
177
207
 
178
-
179
208
  struct BinaryInvertedListScanner {
180
-
181
209
  using idx_t = Index::idx_t;
182
210
 
183
211
  /// from now on we handle this query.
184
- virtual void set_query (const uint8_t *query_vector) = 0;
212
+ virtual void set_query(const uint8_t* query_vector) = 0;
185
213
 
186
214
  /// following codes come from this inverted list
187
- virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
215
+ virtual void set_list(idx_t list_no, uint8_t coarse_dis) = 0;
188
216
 
189
217
  /// compute a single query-to-code distance
190
- virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
218
+ virtual uint32_t distance_to_code(const uint8_t* code) const = 0;
191
219
 
192
220
  /** compute the distances to codes. (distances, labels) should be
193
221
  * organized as a min- or max-heap
@@ -199,23 +227,24 @@ struct BinaryInvertedListScanner {
199
227
  * @param labels heap labels (size k)
200
228
  * @param k heap size
201
229
  */
202
- virtual size_t scan_codes (size_t n,
203
- const uint8_t *codes,
204
- const idx_t *ids,
205
- int32_t *distances, idx_t *labels,
206
- size_t k) const = 0;
207
-
208
- virtual void scan_codes_range (size_t n,
209
- const uint8_t *codes,
210
- const idx_t *ids,
211
- int radius,
212
- RangeQueryResult &result) const = 0;
213
-
214
- virtual ~BinaryInvertedListScanner () {}
215
-
230
+ virtual size_t scan_codes(
231
+ size_t n,
232
+ const uint8_t* codes,
233
+ const idx_t* ids,
234
+ int32_t* distances,
235
+ idx_t* labels,
236
+ size_t k) const = 0;
237
+
238
+ virtual void scan_codes_range(
239
+ size_t n,
240
+ const uint8_t* codes,
241
+ const idx_t* ids,
242
+ int radius,
243
+ RangeQueryResult& result) const = 0;
244
+
245
+ virtual ~BinaryInvertedListScanner() {}
216
246
  };
217
247
 
248
+ } // namespace faiss
218
249
 
219
- } // namespace faiss
220
-
221
- #endif // FAISS_INDEX_BINARY_IVF_H
250
+ #endif // FAISS_INDEX_BINARY_IVF_H
@@ -9,108 +9,96 @@
9
9
 
10
10
  #include <faiss/IndexFlat.h>
11
11
 
12
- #include <cstring>
12
+ #include <faiss/impl/AuxIndexStructures.h>
13
+ #include <faiss/impl/FaissAssert.h>
14
+ #include <faiss/utils/Heap.h>
13
15
  #include <faiss/utils/distances.h>
14
16
  #include <faiss/utils/extra_distances.h>
15
17
  #include <faiss/utils/utils.h>
16
- #include <faiss/utils/Heap.h>
17
- #include <faiss/impl/FaissAssert.h>
18
- #include <faiss/impl/AuxIndexStructures.h>
19
-
18
+ #include <cstring>
20
19
 
21
20
  namespace faiss {
22
21
 
23
- IndexFlat::IndexFlat (idx_t d, MetricType metric):
24
- Index(d, metric)
25
- {
26
- }
27
-
22
+ IndexFlat::IndexFlat(idx_t d, MetricType metric) : Index(d, metric) {}
28
23
 
29
-
30
- void IndexFlat::add (idx_t n, const float *x) {
24
+ void IndexFlat::add(idx_t n, const float* x) {
31
25
  xb.insert(xb.end(), x, x + n * d);
32
26
  ntotal += n;
33
27
  }
34
28
 
35
-
36
29
  void IndexFlat::reset() {
37
30
  xb.clear();
38
31
  ntotal = 0;
39
32
  }
40
33
 
34
+ void IndexFlat::search(
35
+ idx_t n,
36
+ const float* x,
37
+ idx_t k,
38
+ float* distances,
39
+ idx_t* labels) const {
40
+ FAISS_THROW_IF_NOT(k > 0);
41
41
 
42
- void IndexFlat::search (idx_t n, const float *x, idx_t k,
43
- float *distances, idx_t *labels) const
44
- {
45
42
  // we see the distances and labels as heaps
46
43
 
47
44
  if (metric_type == METRIC_INNER_PRODUCT) {
48
- float_minheap_array_t res = {
49
- size_t(n), size_t(k), labels, distances};
50
- knn_inner_product (x, xb.data(), d, n, ntotal, &res);
45
+ float_minheap_array_t res = {size_t(n), size_t(k), labels, distances};
46
+ knn_inner_product(x, xb.data(), d, n, ntotal, &res);
51
47
  } else if (metric_type == METRIC_L2) {
52
- float_maxheap_array_t res = {
53
- size_t(n), size_t(k), labels, distances};
54
- knn_L2sqr (x, xb.data(), d, n, ntotal, &res);
48
+ float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
49
+ knn_L2sqr(x, xb.data(), d, n, ntotal, &res);
55
50
  } else {
56
- float_maxheap_array_t res = {
57
- size_t(n), size_t(k), labels, distances};
58
- knn_extra_metrics (x, xb.data(), d, n, ntotal,
59
- metric_type, metric_arg,
60
- &res);
51
+ float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
52
+ knn_extra_metrics(
53
+ x, xb.data(), d, n, ntotal, metric_type, metric_arg, &res);
61
54
  }
62
55
  }
63
56
 
64
- void IndexFlat::range_search (idx_t n, const float *x, float radius,
65
- RangeSearchResult *result) const
66
- {
57
+ void IndexFlat::range_search(
58
+ idx_t n,
59
+ const float* x,
60
+ float radius,
61
+ RangeSearchResult* result) const {
67
62
  switch (metric_type) {
68
- case METRIC_INNER_PRODUCT:
69
- range_search_inner_product (x, xb.data(), d, n, ntotal,
70
- radius, result);
71
- break;
72
- case METRIC_L2:
73
- range_search_L2sqr (x, xb.data(), d, n, ntotal, radius, result);
74
- break;
75
- default:
76
- FAISS_THROW_MSG("metric type not supported");
63
+ case METRIC_INNER_PRODUCT:
64
+ range_search_inner_product(
65
+ x, xb.data(), d, n, ntotal, radius, result);
66
+ break;
67
+ case METRIC_L2:
68
+ range_search_L2sqr(x, xb.data(), d, n, ntotal, radius, result);
69
+ break;
70
+ default:
71
+ FAISS_THROW_MSG("metric type not supported");
77
72
  }
78
73
  }
79
74
 
80
-
81
- void IndexFlat::compute_distance_subset (
82
- idx_t n,
83
- const float *x,
84
- idx_t k,
85
- float *distances,
86
- const idx_t *labels) const
87
- {
75
+ void IndexFlat::compute_distance_subset(
76
+ idx_t n,
77
+ const float* x,
78
+ idx_t k,
79
+ float* distances,
80
+ const idx_t* labels) const {
88
81
  switch (metric_type) {
89
82
  case METRIC_INNER_PRODUCT:
90
- fvec_inner_products_by_idx (
91
- distances,
92
- x, xb.data(), labels, d, n, k);
83
+ fvec_inner_products_by_idx(
84
+ distances, x, xb.data(), labels, d, n, k);
93
85
  break;
94
86
  case METRIC_L2:
95
- fvec_L2sqr_by_idx (
96
- distances,
97
- x, xb.data(), labels, d, n, k);
87
+ fvec_L2sqr_by_idx(distances, x, xb.data(), labels, d, n, k);
98
88
  break;
99
89
  default:
100
90
  FAISS_THROW_MSG("metric type not supported");
101
91
  }
102
-
103
92
  }
104
93
 
105
- size_t IndexFlat::remove_ids (const IDSelector & sel)
106
- {
94
+ size_t IndexFlat::remove_ids(const IDSelector& sel) {
107
95
  idx_t j = 0;
108
96
  for (idx_t i = 0; i < ntotal; i++) {
109
- if (sel.is_member (i)) {
97
+ if (sel.is_member(i)) {
110
98
  // should be removed
111
99
  } else {
112
100
  if (i > j) {
113
- memmove (&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
101
+ memmove(&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
114
102
  }
115
103
  j++;
116
104
  }
@@ -118,23 +106,21 @@ size_t IndexFlat::remove_ids (const IDSelector & sel)
118
106
  size_t nremove = ntotal - j;
119
107
  if (nremove > 0) {
120
108
  ntotal = j;
121
- xb.resize (ntotal * d);
109
+ xb.resize(ntotal * d);
122
110
  }
123
111
  return nremove;
124
112
  }
125
113
 
126
-
127
114
  namespace {
128
115
 
129
-
130
116
  struct FlatL2Dis : DistanceComputer {
131
117
  size_t d;
132
118
  Index::idx_t nb;
133
- const float *q;
134
- const float *b;
119
+ const float* q;
120
+ const float* b;
135
121
  size_t ndis;
136
122
 
137
- float operator () (idx_t i) override {
123
+ float operator()(idx_t i) override {
138
124
  ndis++;
139
125
  return fvec_L2sqr(q, b + i * d, d);
140
126
  }
@@ -143,14 +129,14 @@ struct FlatL2Dis : DistanceComputer {
143
129
  return fvec_L2sqr(b + j * d, b + i * d, d);
144
130
  }
145
131
 
146
- explicit FlatL2Dis(const IndexFlat& storage, const float *q = nullptr)
147
- : d(storage.d),
148
- nb(storage.ntotal),
149
- q(q),
150
- b(storage.xb.data()),
151
- ndis(0) {}
132
+ explicit FlatL2Dis(const IndexFlat& storage, const float* q = nullptr)
133
+ : d(storage.d),
134
+ nb(storage.ntotal),
135
+ q(q),
136
+ b(storage.xb.data()),
137
+ ndis(0) {}
152
138
 
153
- void set_query(const float *x) override {
139
+ void set_query(const float* x) override {
154
140
  q = x;
155
141
  }
156
142
  };
@@ -158,128 +144,106 @@ struct FlatL2Dis : DistanceComputer {
158
144
  struct FlatIPDis : DistanceComputer {
159
145
  size_t d;
160
146
  Index::idx_t nb;
161
- const float *q;
162
- const float *b;
147
+ const float* q;
148
+ const float* b;
163
149
  size_t ndis;
164
150
 
165
- float operator () (idx_t i) override {
151
+ float operator()(idx_t i) override {
166
152
  ndis++;
167
- return fvec_inner_product (q, b + i * d, d);
153
+ return fvec_inner_product(q, b + i * d, d);
168
154
  }
169
155
 
170
156
  float symmetric_dis(idx_t i, idx_t j) override {
171
- return fvec_inner_product (b + j * d, b + i * d, d);
157
+ return fvec_inner_product(b + j * d, b + i * d, d);
172
158
  }
173
159
 
174
- explicit FlatIPDis(const IndexFlat& storage, const float *q = nullptr)
175
- : d(storage.d),
176
- nb(storage.ntotal),
177
- q(q),
178
- b(storage.xb.data()),
179
- ndis(0) {}
160
+ explicit FlatIPDis(const IndexFlat& storage, const float* q = nullptr)
161
+ : d(storage.d),
162
+ nb(storage.ntotal),
163
+ q(q),
164
+ b(storage.xb.data()),
165
+ ndis(0) {}
180
166
 
181
- void set_query(const float *x) override {
167
+ void set_query(const float* x) override {
182
168
  q = x;
183
169
  }
184
170
  };
185
171
 
172
+ } // namespace
186
173
 
187
-
188
-
189
- } // namespace
190
-
191
-
192
- DistanceComputer * IndexFlat::get_distance_computer() const {
174
+ DistanceComputer* IndexFlat::get_distance_computer() const {
193
175
  if (metric_type == METRIC_L2) {
194
176
  return new FlatL2Dis(*this);
195
177
  } else if (metric_type == METRIC_INNER_PRODUCT) {
196
178
  return new FlatIPDis(*this);
197
179
  } else {
198
- return get_extra_distance_computer (d, metric_type, metric_arg,
199
- ntotal, xb.data());
180
+ return get_extra_distance_computer(
181
+ d, metric_type, metric_arg, ntotal, xb.data());
200
182
  }
201
183
  }
202
184
 
203
-
204
- void IndexFlat::reconstruct (idx_t key, float * recons) const
205
- {
206
- memcpy (recons, &(xb[key * d]), sizeof(*recons) * d);
185
+ void IndexFlat::reconstruct(idx_t key, float* recons) const {
186
+ memcpy(recons, &(xb[key * d]), sizeof(*recons) * d);
207
187
  }
208
188
 
209
-
210
189
  /* The standalone codec interface */
211
- size_t IndexFlat::sa_code_size () const
212
- {
190
+ size_t IndexFlat::sa_code_size() const {
213
191
  return sizeof(float) * d;
214
192
  }
215
193
 
216
- void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
217
- {
218
- memcpy (bytes, x, sizeof(float) * d * n);
194
+ void IndexFlat::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
195
+ memcpy(bytes, x, sizeof(float) * d * n);
219
196
  }
220
197
 
221
- void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
222
- {
223
- memcpy (x, bytes, sizeof(float) * d * n);
198
+ void IndexFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
199
+ memcpy(x, bytes, sizeof(float) * d * n);
224
200
  }
225
201
 
226
-
227
-
228
-
229
-
230
-
231
202
  /***************************************************
232
203
  * IndexFlat1D
233
204
  ***************************************************/
234
205
 
235
-
236
- IndexFlat1D::IndexFlat1D (bool continuous_update):
237
- IndexFlatL2 (1),
238
- continuous_update (continuous_update)
239
- {
240
- }
206
+ IndexFlat1D::IndexFlat1D(bool continuous_update)
207
+ : IndexFlatL2(1), continuous_update(continuous_update) {}
241
208
 
242
209
  /// if not continuous_update, call this between the last add and
243
210
  /// the first search
244
- void IndexFlat1D::update_permutation ()
245
- {
246
- perm.resize (ntotal);
211
+ void IndexFlat1D::update_permutation() {
212
+ perm.resize(ntotal);
247
213
  if (ntotal < 1000000) {
248
- fvec_argsort (ntotal, xb.data(), (size_t*)perm.data());
214
+ fvec_argsort(ntotal, xb.data(), (size_t*)perm.data());
249
215
  } else {
250
- fvec_argsort_parallel (ntotal, xb.data(), (size_t*)perm.data());
216
+ fvec_argsort_parallel(ntotal, xb.data(), (size_t*)perm.data());
251
217
  }
252
218
  }
253
219
 
254
- void IndexFlat1D::add (idx_t n, const float *x)
255
- {
256
- IndexFlatL2::add (n, x);
220
+ void IndexFlat1D::add(idx_t n, const float* x) {
221
+ IndexFlatL2::add(n, x);
257
222
  if (continuous_update)
258
223
  update_permutation();
259
224
  }
260
225
 
261
- void IndexFlat1D::reset()
262
- {
226
+ void IndexFlat1D::reset() {
263
227
  IndexFlatL2::reset();
264
228
  perm.clear();
265
229
  }
266
230
 
267
- void IndexFlat1D::search (
268
- idx_t n,
269
- const float *x,
270
- idx_t k,
271
- float *distances,
272
- idx_t *labels) const
273
- {
274
- FAISS_THROW_IF_NOT_MSG (perm.size() == ntotal,
275
- "Call update_permutation before search");
231
+ void IndexFlat1D::search(
232
+ idx_t n,
233
+ const float* x,
234
+ idx_t k,
235
+ float* distances,
236
+ idx_t* labels) const {
237
+ FAISS_THROW_IF_NOT(k > 0);
238
+
239
+ FAISS_THROW_IF_NOT_MSG(
240
+ perm.size() == ntotal, "Call update_permutation before search");
276
241
 
277
242
  #pragma omp parallel for
278
243
  for (idx_t i = 0; i < n; i++) {
279
-
280
244
  float q = x[i]; // query
281
- float *D = distances + i * k;
282
- idx_t *I = labels + i * k;
245
+ float* D = distances + i * k;
246
+ idx_t* I = labels + i * k;
283
247
 
284
248
  // binary search
285
249
  idx_t i0 = 0, i1 = ntotal;
@@ -297,8 +261,10 @@ void IndexFlat1D::search (
297
261
 
298
262
  while (i0 + 1 < i1) {
299
263
  idx_t imed = (i0 + i1) / 2;
300
- if (xb[perm[imed]] <= q) i0 = imed;
301
- else i1 = imed;
264
+ if (xb[perm[imed]] <= q)
265
+ i0 = imed;
266
+ else
267
+ i1 = imed;
302
268
  }
303
269
 
304
270
  // query is between xb[perm[i0]] and xb[perm[i1]]
@@ -311,13 +277,19 @@ void IndexFlat1D::search (
311
277
  if (q - xleft < xright - q) {
312
278
  D[wp] = q - xleft;
313
279
  I[wp] = perm[i0];
314
- i0--; wp++;
315
- if (i0 < 0) { goto finish_right; }
280
+ i0--;
281
+ wp++;
282
+ if (i0 < 0) {
283
+ goto finish_right;
284
+ }
316
285
  } else {
317
286
  D[wp] = xright - q;
318
287
  I[wp] = perm[i1];
319
- i1++; wp++;
320
- if (i1 >= ntotal) { goto finish_left; }
288
+ i1++;
289
+ wp++;
290
+ if (i1 >= ntotal) {
291
+ goto finish_left;
292
+ }
321
293
  }
322
294
  }
323
295
  goto done;
@@ -350,11 +322,8 @@ void IndexFlat1D::search (
350
322
  }
351
323
  wp++;
352
324
  }
353
- done: ;
325
+ done:;
354
326
  }
355
-
356
327
  }
357
328
 
358
-
359
-
360
329
  } // namespace faiss