faiss 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -10,30 +10,27 @@
10
10
  #ifndef FAISS_INDEX_IVF_H
11
11
  #define FAISS_INDEX_IVF_H
12
12
 
13
-
14
- #include <vector>
15
- #include <unordered_map>
16
13
  #include <stdint.h>
14
+ #include <unordered_map>
15
+ #include <vector>
17
16
 
18
- #include <faiss/Index.h>
19
- #include <faiss/invlists/InvertedLists.h>
20
- #include <faiss/invlists/DirectMap.h>
21
17
  #include <faiss/Clustering.h>
18
+ #include <faiss/Index.h>
22
19
  #include <faiss/impl/platform_macros.h>
20
+ #include <faiss/invlists/DirectMap.h>
21
+ #include <faiss/invlists/InvertedLists.h>
23
22
  #include <faiss/utils/Heap.h>
24
23
 
25
-
26
24
  namespace faiss {
27
25
 
28
-
29
26
  /** Encapsulates a quantizer object for the IndexIVF
30
27
  *
31
28
  * The class isolates the fields that are independent of the storage
32
29
  * of the lists (especially training)
33
30
  */
34
31
  struct Level1Quantizer {
35
- Index * quantizer; ///< quantizer that maps vectors to inverted lists
36
- size_t nlist; ///< number of possible key values
32
+ Index* quantizer; ///< quantizer that maps vectors to inverted lists
33
+ size_t nlist; ///< number of possible key values
37
34
 
38
35
  /**
39
36
  * = 0: use the quantizer as index in a kmeans training
@@ -41,40 +38,37 @@ struct Level1Quantizer {
41
38
  * = 2: kmeans training on a flat index + add the centroids to the quantizer
42
39
  */
43
40
  char quantizer_trains_alone;
44
- bool own_fields; ///< whether object owns the quantizer
41
+ bool own_fields; ///< whether object owns the quantizer (false by default)
45
42
 
46
43
  ClusteringParameters cp; ///< to override default clustering params
47
- Index *clustering_index; ///< to override index used during clustering
44
+ Index* clustering_index; ///< to override index used during clustering
48
45
 
49
46
  /// Trains the quantizer and calls train_residual to train sub-quantizers
50
- void train_q1 (size_t n, const float *x, bool verbose,
51
- MetricType metric_type);
52
-
47
+ void train_q1(
48
+ size_t n,
49
+ const float* x,
50
+ bool verbose,
51
+ MetricType metric_type);
53
52
 
54
53
  /// compute the number of bytes required to store list ids
55
- size_t coarse_code_size () const;
56
- void encode_listno (Index::idx_t list_no, uint8_t *code) const;
57
- Index::idx_t decode_listno (const uint8_t *code) const;
58
-
59
- Level1Quantizer (Index * quantizer, size_t nlist);
54
+ size_t coarse_code_size() const;
55
+ void encode_listno(Index::idx_t list_no, uint8_t* code) const;
56
+ Index::idx_t decode_listno(const uint8_t* code) const;
60
57
 
61
- Level1Quantizer ();
58
+ Level1Quantizer(Index* quantizer, size_t nlist);
62
59
 
63
- ~Level1Quantizer ();
60
+ Level1Quantizer();
64
61
 
62
+ ~Level1Quantizer();
65
63
  };
66
64
 
67
-
68
-
69
65
  struct IVFSearchParameters {
70
- size_t nprobe; ///< number of probes at query time
71
- size_t max_codes; ///< max nb of codes to visit to do a query
72
- IVFSearchParameters(): nprobe(1), max_codes(0) {}
73
- virtual ~IVFSearchParameters () {}
66
+ size_t nprobe; ///< number of probes at query time
67
+ size_t max_codes; ///< max nb of codes to visit to do a query
68
+ IVFSearchParameters() : nprobe(1), max_codes(0) {}
69
+ virtual ~IVFSearchParameters() {}
74
70
  };
75
71
 
76
-
77
-
78
72
  struct InvertedListScanner;
79
73
  struct IndexIVFStats;
80
74
 
@@ -98,15 +92,15 @@ struct IndexIVFStats;
98
92
  * Sub-classes implement a post-filtering of the index that refines
99
93
  * the distance estimation from the query to databse vectors.
100
94
  */
101
- struct IndexIVF: Index, Level1Quantizer {
95
+ struct IndexIVF : Index, Level1Quantizer {
102
96
  /// Access to the actual data
103
- InvertedLists *invlists;
97
+ InvertedLists* invlists;
104
98
  bool own_invlists;
105
99
 
106
- size_t code_size; ///< code size per vector in bytes
100
+ size_t code_size; ///< code size per vector in bytes
107
101
 
108
- size_t nprobe; ///< number of probes at query time
109
- size_t max_codes; ///< max nb of codes to visit to do a query
102
+ size_t nprobe; ///< number of probes at query time
103
+ size_t max_codes; ///< max nb of codes to visit to do a query
110
104
 
111
105
  /** Parallel mode determines how queries are parallelized with OpenMP
112
106
  *
@@ -127,12 +121,14 @@ struct IndexIVF: Index, Level1Quantizer {
127
121
 
128
122
  /** The Inverted file takes a quantizer (an Index) on input,
129
123
  * which implements the function mapping a vector to a list
130
- * identifier. The pointer is borrowed: the quantizer should not
131
- * be deleted while the IndexIVF is in use.
124
+ * identifier.
132
125
  */
133
- IndexIVF (Index * quantizer, size_t d,
134
- size_t nlist, size_t code_size,
135
- MetricType metric = METRIC_L2);
126
+ IndexIVF(
127
+ Index* quantizer,
128
+ size_t d,
129
+ size_t nlist,
130
+ size_t code_size,
131
+ MetricType metric = METRIC_L2);
136
132
 
137
133
  void reset() override;
138
134
 
@@ -145,6 +141,19 @@ struct IndexIVF: Index, Level1Quantizer {
145
141
  /// default implementation that calls encode_vectors
146
142
  void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
147
143
 
144
+ /** Implementation of vector addition where the vector assignments are
145
+ * predefined. The default implementation hands over the code extraction to
146
+ * encode_vectors.
147
+ *
148
+ * @param precomputed_idx quantization indices for the input vectors
149
+ * (size n)
150
+ */
151
+ virtual void add_core(
152
+ idx_t n,
153
+ const float* x,
154
+ const idx_t* xids,
155
+ const idx_t* precomputed_idx);
156
+
148
157
  /** Encodes a set of vectors as they would appear in the inverted lists
149
158
  *
150
159
  * @param list_nos inverted list ids as returned by the
@@ -154,14 +163,23 @@ struct IndexIVF: Index, Level1Quantizer {
154
163
  * include the list ids in the code (in this case add
155
164
  * ceil(log8(nlist)) to the code size)
156
165
  */
157
- virtual void encode_vectors(idx_t n, const float* x,
158
- const idx_t *list_nos,
159
- uint8_t * codes,
160
- bool include_listno = false) const = 0;
166
+ virtual void encode_vectors(
167
+ idx_t n,
168
+ const float* x,
169
+ const idx_t* list_nos,
170
+ uint8_t* codes,
171
+ bool include_listno = false) const = 0;
172
+
173
+ /** Add vectors that are computed with the standalone codec
174
+ *
175
+ * @param codes codes to add size n * sa_code_size()
176
+ * @param xids corresponding ids, size n
177
+ */
178
+ void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
161
179
 
162
180
  /// Sub-classes that encode the residuals can train their encoders here
163
181
  /// does nothing by default
164
- virtual void train_residual (idx_t n, const float *x);
182
+ virtual void train_residual(idx_t n, const float* x);
165
183
 
166
184
  /** search a set of vectors, that are pre-quantized by the IVF
167
185
  * quantizer. Fill in the corresponding heaps with the query
@@ -182,36 +200,53 @@ struct IndexIVF: Index, Level1Quantizer {
182
200
  * @param params used to override the object's search parameters
183
201
  * @param stats search stats to be updated (can be null)
184
202
  */
185
- virtual void search_preassigned (
186
- idx_t n, const float *x, idx_t k,
187
- const idx_t *assign, const float *centroid_dis,
188
- float *distances, idx_t *labels,
203
+ virtual void search_preassigned(
204
+ idx_t n,
205
+ const float* x,
206
+ idx_t k,
207
+ const idx_t* assign,
208
+ const float* centroid_dis,
209
+ float* distances,
210
+ idx_t* labels,
189
211
  bool store_pairs,
190
- const IVFSearchParameters *params=nullptr,
191
- IndexIVFStats *stats=nullptr
192
- ) const;
212
+ const IVFSearchParameters* params = nullptr,
213
+ IndexIVFStats* stats = nullptr) const;
193
214
 
194
215
  /** assign the vectors, then call search_preassign */
195
- void search (idx_t n, const float *x, idx_t k,
196
- float *distances, idx_t *labels) const override;
197
-
198
- void range_search (idx_t n, const float* x, float radius,
199
- RangeSearchResult* result) const override;
216
+ void search(
217
+ idx_t n,
218
+ const float* x,
219
+ idx_t k,
220
+ float* distances,
221
+ idx_t* labels) const override;
222
+
223
+ void range_search(
224
+ idx_t n,
225
+ const float* x,
226
+ float radius,
227
+ RangeSearchResult* result) const override;
200
228
 
201
229
  void range_search_preassigned(
202
- idx_t nx, const float *x, float radius,
203
- const idx_t *keys, const float *coarse_dis,
204
- RangeSearchResult *result,
205
- bool store_pairs=false,
206
- const IVFSearchParameters *params=nullptr,
207
- IndexIVFStats *stats=nullptr) const;
208
-
209
- /// get a scanner for this index (store_pairs means ignore labels)
210
- virtual InvertedListScanner *get_InvertedListScanner (
211
- bool store_pairs=false) const;
230
+ idx_t nx,
231
+ const float* x,
232
+ float radius,
233
+ const idx_t* keys,
234
+ const float* coarse_dis,
235
+ RangeSearchResult* result,
236
+ bool store_pairs = false,
237
+ const IVFSearchParameters* params = nullptr,
238
+ IndexIVFStats* stats = nullptr) const;
239
+
240
+ /** Get a scanner for this index (store_pairs means ignore labels)
241
+ *
242
+ * The default search implementation uses this to compute the distances
243
+ */
244
+ virtual InvertedListScanner* get_InvertedListScanner(
245
+ bool store_pairs = false) const;
212
246
 
213
- /** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2 */
214
- void reconstruct (idx_t key, float* recons) const override;
247
+ /** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2
248
+ */
249
+ void reconstruct(idx_t key, float* recons) const override;
215
250
 
216
251
  /** Update a subset of vectors.
217
252
  *
@@ -221,7 +256,7 @@ struct IndexIVF: Index, Level1Quantizer {
221
256
  * @param idx vector indices to update, size nv
222
257
  * @param v vectors of new values, size nv*d
223
258
  */
224
- virtual void update_vectors (int nv, const idx_t *idx, const float *v);
259
+ virtual void update_vectors(int nv, const idx_t* idx, const float* v);
225
260
 
226
261
  /** Reconstruct a subset of the indexed vectors.
227
262
  *
@@ -243,9 +278,13 @@ struct IndexIVF: Index, Level1Quantizer {
243
278
  *
244
279
  * @param recons reconstructed vectors size (n, k, d)
245
280
  */
246
- void search_and_reconstruct (idx_t n, const float *x, idx_t k,
247
- float *distances, idx_t *labels,
248
- float *recons) const override;
281
+ void search_and_reconstruct(
282
+ idx_t n,
283
+ const float* x,
284
+ idx_t k,
285
+ float* distances,
286
+ idx_t* labels,
287
+ float* recons) const override;
249
288
 
250
289
  /** Reconstruct a vector given the location in terms of (inv list index +
251
290
  * inv list offset) instead of the id.
@@ -254,9 +293,10 @@ struct IndexIVF: Index, Level1Quantizer {
254
293
  * the inv list offset is computed by search_preassigned() with
255
294
  * `store_pairs` set.
256
295
  */
257
- virtual void reconstruct_from_offset (int64_t list_no, int64_t offset,
258
- float* recons) const;
259
-
296
+ virtual void reconstruct_from_offset(
297
+ int64_t list_no,
298
+ int64_t offset,
299
+ float* recons) const;
260
300
 
261
301
  /// Dataset manipulation functions
262
302
 
@@ -265,12 +305,12 @@ struct IndexIVF: Index, Level1Quantizer {
265
305
  /** check that the two indexes are compatible (ie, they are
266
306
  * trained in the same way and have the same
267
307
  * parameters). Otherwise throw. */
268
- void check_compatible_for_merge (const IndexIVF &other) const;
308
+ void check_compatible_for_merge(const IndexIVF& other) const;
269
309
 
270
310
  /** moves the entries from another dataset to self. On output,
271
311
  * other is empty. add_id is added to all moved ids (for
272
312
  * sequential ids, this would be this->ntotal */
273
- virtual void merge_from (IndexIVF &other, idx_t add_id);
313
+ virtual void merge_from(IndexIVF& other, idx_t add_id);
274
314
 
275
315
  /** copy a subset of the entries index to the other index
276
316
  *
@@ -279,34 +319,36 @@ struct IndexIVF: Index, Level1Quantizer {
279
319
  * if subset_type == 2: copies inverted lists such that a1
280
320
  * elements are left before and a2 elements are after
281
321
  */
282
- virtual void copy_subset_to (IndexIVF & other, int subset_type,
283
- idx_t a1, idx_t a2) const;
322
+ virtual void copy_subset_to(
323
+ IndexIVF& other,
324
+ int subset_type,
325
+ idx_t a1,
326
+ idx_t a2) const;
284
327
 
285
328
  ~IndexIVF() override;
286
329
 
287
- size_t get_list_size (size_t list_no) const
288
- { return invlists->list_size(list_no); }
330
+ size_t get_list_size(size_t list_no) const {
331
+ return invlists->list_size(list_no);
332
+ }
289
333
 
290
334
  /** intialize a direct map
291
335
  *
292
336
  * @param new_maintain_direct_map if true, create a direct map,
293
337
  * else clear it
294
338
  */
295
- void make_direct_map (bool new_maintain_direct_map=true);
296
-
297
- void set_direct_map_type (DirectMap::Type type);
339
+ void make_direct_map(bool new_maintain_direct_map = true);
298
340
 
341
+ void set_direct_map_type(DirectMap::Type type);
299
342
 
300
343
  /// replace the inverted lists, old one is deallocated if own_invlists
301
- void replace_invlists (InvertedLists *il, bool own=false);
344
+ void replace_invlists(InvertedLists* il, bool own = false);
302
345
 
303
346
  /* The standalone codec interface (except sa_decode that is specific) */
304
- size_t sa_code_size () const override;
347
+ size_t sa_code_size() const override;
305
348
 
306
- void sa_encode (idx_t n, const float *x,
307
- uint8_t *bytes) const override;
349
+ void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
308
350
 
309
- IndexIVF ();
351
+ IndexIVF();
310
352
  };
311
353
 
312
354
  struct RangeQueryResult;
@@ -316,20 +358,28 @@ struct RangeQueryResult;
316
358
  * distance_to_code and scan_codes can be called in multiple
317
359
  * threads */
318
360
  struct InvertedListScanner {
319
-
320
361
  using idx_t = Index::idx_t;
321
362
 
363
+ idx_t list_no = -1; ///< remember current list
364
+ bool keep_max = false; ///< keep maximum instead of minimum
365
+ /// store positions in invlists rather than labels
366
+ bool store_pairs = false;
367
+
368
+ /// used in default implementation of scan_codes
369
+ size_t code_size = 0;
370
+
322
371
  /// from now on we handle this query.
323
- virtual void set_query (const float *query_vector) = 0;
372
+ virtual void set_query(const float* query_vector) = 0;
324
373
 
325
374
  /// following codes come from this inverted list
326
- virtual void set_list (idx_t list_no, float coarse_dis) = 0;
375
+ virtual void set_list(idx_t list_no, float coarse_dis) = 0;
327
376
 
328
377
  /// compute a single query-to-code distance
329
- virtual float distance_to_code (const uint8_t *code) const = 0;
378
+ virtual float distance_to_code(const uint8_t* code) const = 0;
330
379
 
331
380
  /** scan a set of codes, compute distances to current query and
332
- * update heap of results if necessary.
381
+ * update heap of results if necessary. Default implemetation
382
+ * calls distance_to_code.
333
383
  *
334
384
  * @param n number of codes to scan
335
385
  * @param codes codes to scan (n * code_size)
@@ -339,45 +389,46 @@ struct InvertedListScanner {
339
389
  * @param k heap size
340
390
  * @return number of heap updates performed
341
391
  */
342
- virtual size_t scan_codes (size_t n,
343
- const uint8_t *codes,
344
- const idx_t *ids,
345
- float *distances, idx_t *labels,
346
- size_t k) const = 0;
392
+ virtual size_t scan_codes(
393
+ size_t n,
394
+ const uint8_t* codes,
395
+ const idx_t* ids,
396
+ float* distances,
397
+ idx_t* labels,
398
+ size_t k) const;
347
399
 
348
400
  /** scan a set of codes, compute distances to current query and
349
401
  * update results if distances are below radius
350
402
  *
351
403
  * (default implementation fails) */
352
- virtual void scan_codes_range (size_t n,
353
- const uint8_t *codes,
354
- const idx_t *ids,
355
- float radius,
356
- RangeQueryResult &result) const;
357
-
358
- virtual ~InvertedListScanner () {}
359
-
404
+ virtual void scan_codes_range(
405
+ size_t n,
406
+ const uint8_t* codes,
407
+ const idx_t* ids,
408
+ float radius,
409
+ RangeQueryResult& result) const;
410
+
411
+ virtual ~InvertedListScanner() {}
360
412
  };
361
413
 
362
-
363
414
  struct IndexIVFStats {
364
- size_t nq; // nb of queries run
365
- size_t nlist; // nb of inverted lists scanned
366
- size_t ndis; // nb of distancs computed
367
- size_t nheap_updates; // nb of times the heap was updated
415
+ size_t nq; // nb of queries run
416
+ size_t nlist; // nb of inverted lists scanned
417
+ size_t ndis; // nb of distances computed
418
+ size_t nheap_updates; // nb of times the heap was updated
368
419
  double quantization_time; // time spent quantizing vectors (in ms)
369
420
  double search_time; // time spent searching lists (in ms)
370
421
 
371
- IndexIVFStats () {reset (); }
372
- void reset ();
373
- void add (const IndexIVFStats & other);
422
+ IndexIVFStats() {
423
+ reset();
424
+ }
425
+ void reset();
426
+ void add(const IndexIVFStats& other);
374
427
  };
375
428
 
376
429
  // global var that collects them all
377
430
  FAISS_API extern IndexIVFStats indexIVF_stats;
378
431
 
379
-
380
432
  } // namespace faiss
381
433
 
382
-
383
434
  #endif