faiss 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -31,19 +31,23 @@ namespace faiss {
31
31
  * of the lists (especially training)
32
32
  */
33
33
  struct Level1Quantizer {
34
- Index* quantizer; ///< quantizer that maps vectors to inverted lists
35
- size_t nlist; ///< number of possible key values
34
+ /// quantizer that maps vectors to inverted lists
35
+ Index* quantizer = nullptr;
36
+
37
+ /// number of inverted lists
38
+ size_t nlist = 0;
36
39
 
37
40
  /**
38
41
  * = 0: use the quantizer as index in a kmeans training
39
42
  * = 1: just pass on the training set to the train() of the quantizer
40
43
  * = 2: kmeans training on a flat index + add the centroids to the quantizer
41
44
  */
42
- char quantizer_trains_alone;
43
- bool own_fields; ///< whether object owns the quantizer (false by default)
45
+ char quantizer_trains_alone = 0;
46
+ bool own_fields = false; ///< whether object owns the quantizer
44
47
 
45
48
  ClusteringParameters cp; ///< to override default clustering params
46
- Index* clustering_index; ///< to override index used during clustering
49
+ /// to override index used during clustering
50
+ Index* clustering_index = nullptr;
47
51
 
48
52
  /// Trains the quantizer and calls train_residual to train sub-quantizers
49
53
  void train_q1(
@@ -54,8 +58,8 @@ struct Level1Quantizer {
54
58
 
55
59
  /// compute the number of bytes required to store list ids
56
60
  size_t coarse_code_size() const;
57
- void encode_listno(Index::idx_t list_no, uint8_t* code) const;
58
- Index::idx_t decode_listno(const uint8_t* code) const;
61
+ void encode_listno(idx_t list_no, uint8_t* code) const;
62
+ idx_t decode_listno(const uint8_t* code) const;
59
63
 
60
64
  Level1Quantizer(Index* quantizer, size_t nlist);
61
65
 
@@ -65,11 +69,10 @@ struct Level1Quantizer {
65
69
  };
66
70
 
67
71
  struct SearchParametersIVF : SearchParameters {
68
- size_t nprobe; ///< number of probes at query time
69
- size_t max_codes; ///< max nb of codes to visit to do a query
72
+ size_t nprobe = 1; ///< number of probes at query time
73
+ size_t max_codes = 0; ///< max nb of codes to visit to do a query
70
74
  SearchParameters* quantizer_params = nullptr;
71
75
 
72
- SearchParametersIVF() : nprobe(1), max_codes(0) {}
73
76
  virtual ~SearchParametersIVF() {}
74
77
  };
75
78
 
@@ -78,6 +81,75 @@ using IVFSearchParameters = SearchParametersIVF;
78
81
 
79
82
  struct InvertedListScanner;
80
83
  struct IndexIVFStats;
84
+ struct CodePacker;
85
+
86
+ struct IndexIVFInterface : Level1Quantizer {
87
+ size_t nprobe = 1; ///< number of probes at query time
88
+ size_t max_codes = 0; ///< max nb of codes to visit to do a query
89
+
90
+ explicit IndexIVFInterface(Index* quantizer = nullptr, size_t nlist = 0)
91
+ : Level1Quantizer(quantizer, nlist) {}
92
+
93
+ /** search a set of vectors, that are pre-quantized by the IVF
94
+ * quantizer. Fill in the corresponding heaps with the query
95
+ * results. The default implementation uses InvertedListScanners
96
+ * to do the search.
97
+ *
98
+ * @param n nb of vectors to query
99
+ * @param x query vectors, size nx * d
100
+ * @param assign coarse quantization indices, size nx * nprobe
101
+ * @param centroid_dis
102
+ * distances to coarse centroids, size nx * nprobe
103
+ * @param distance
104
+ * output distances, size n * k
105
+ * @param labels output labels, size n * k
106
+ * @param store_pairs store inv list index + inv list offset
107
+ * instead in upper/lower 32 bit of result,
108
+ * instead of ids (used for reranking).
109
+ * @param params used to override the object's search parameters
110
+ * @param stats search stats to be updated (can be null)
111
+ */
112
+ virtual void search_preassigned(
113
+ idx_t n,
114
+ const float* x,
115
+ idx_t k,
116
+ const idx_t* assign,
117
+ const float* centroid_dis,
118
+ float* distances,
119
+ idx_t* labels,
120
+ bool store_pairs,
121
+ const IVFSearchParameters* params = nullptr,
122
+ IndexIVFStats* stats = nullptr) const = 0;
123
+
124
+ /** Range search a set of vectors, that are pre-quantized by the IVF
125
+ * quantizer. Fill in the RangeSearchResults results. The default
126
+ * implementation uses InvertedListScanners to do the search.
127
+ *
128
+ * @param n nb of vectors to query
129
+ * @param x query vectors, size nx * d
130
+ * @param assign coarse quantization indices, size nx * nprobe
131
+ * @param centroid_dis
132
+ * distances to coarse centroids, size nx * nprobe
133
+ * @param result Output results
134
+ * @param store_pairs store inv list index + inv list offset
135
+ * instead in upper/lower 32 bit of result,
136
+ * instead of ids (used for reranking).
137
+ * @param params used to override the object's search parameters
138
+ * @param stats search stats to be updated (can be null)
139
+ */
140
+ virtual void range_search_preassigned(
141
+ idx_t nx,
142
+ const float* x,
143
+ float radius,
144
+ const idx_t* keys,
145
+ const float* coarse_dis,
146
+ RangeSearchResult* result,
147
+ bool store_pairs = false,
148
+ const IVFSearchParameters* params = nullptr,
149
+ IndexIVFStats* stats = nullptr) const = 0;
150
+
151
+ virtual ~IndexIVFInterface() {}
152
+ };
81
153
 
82
154
  /** Index based on a inverted file (IVF)
83
155
  *
@@ -99,16 +171,12 @@ struct IndexIVFStats;
99
171
  * Sub-classes implement a post-filtering of the index that refines
100
172
  * the distance estimation from the query to databse vectors.
101
173
  */
102
- struct IndexIVF : Index, Level1Quantizer {
174
+ struct IndexIVF : Index, IndexIVFInterface {
103
175
  /// Access to the actual data
104
- InvertedLists* invlists;
105
- bool own_invlists;
106
-
107
- size_t code_size; ///< code size per vector in bytes
108
-
109
- size_t nprobe; ///< number of probes at query time
110
- size_t max_codes; ///< max nb of codes to visit to do a query
176
+ InvertedLists* invlists = nullptr;
177
+ bool own_invlists = false;
111
178
 
179
+ size_t code_size = 0; ///< code size per vector in bytes
112
180
  /** Parallel mode determines how queries are parallelized with OpenMP
113
181
  *
114
182
  * 0 (default): split over queries
@@ -119,7 +187,7 @@ struct IndexIVF : Index, Level1Quantizer {
119
187
  * PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to
120
188
  * prevent the heap to be initialized and finalized
121
189
  */
122
- int parallel_mode;
190
+ int parallel_mode = 0;
123
191
  const int PARALLEL_MODE_NO_HEAP_INIT = 1024;
124
192
 
125
193
  /** optional map that maps back ids to invlist entries. This
@@ -188,26 +256,7 @@ struct IndexIVF : Index, Level1Quantizer {
188
256
  /// does nothing by default
189
257
  virtual void train_residual(idx_t n, const float* x);
190
258
 
191
- /** search a set of vectors, that are pre-quantized by the IVF
192
- * quantizer. Fill in the corresponding heaps with the query
193
- * results. The default implementation uses InvertedListScanners
194
- * to do the search.
195
- *
196
- * @param n nb of vectors to query
197
- * @param x query vectors, size nx * d
198
- * @param assign coarse quantization indices, size nx * nprobe
199
- * @param centroid_dis
200
- * distances to coarse centroids, size nx * nprobe
201
- * @param distance
202
- * output distances, size n * k
203
- * @param labels output labels, size n * k
204
- * @param store_pairs store inv list index + inv list offset
205
- * instead in upper/lower 32 bit of result,
206
- * instead of ids (used for reranking).
207
- * @param params used to override the object's search parameters
208
- * @param stats search stats to be updated (can be null)
209
- */
210
- virtual void search_preassigned(
259
+ void search_preassigned(
211
260
  idx_t n,
212
261
  const float* x,
213
262
  idx_t k,
@@ -217,7 +266,18 @@ struct IndexIVF : Index, Level1Quantizer {
217
266
  idx_t* labels,
218
267
  bool store_pairs,
219
268
  const IVFSearchParameters* params = nullptr,
220
- IndexIVFStats* stats = nullptr) const;
269
+ IndexIVFStats* stats = nullptr) const override;
270
+
271
+ void range_search_preassigned(
272
+ idx_t nx,
273
+ const float* x,
274
+ float radius,
275
+ const idx_t* keys,
276
+ const float* coarse_dis,
277
+ RangeSearchResult* result,
278
+ bool store_pairs = false,
279
+ const IVFSearchParameters* params = nullptr,
280
+ IndexIVFStats* stats = nullptr) const override;
221
281
 
222
282
  /** assign the vectors, then call search_preassign */
223
283
  void search(
@@ -235,17 +295,6 @@ struct IndexIVF : Index, Level1Quantizer {
235
295
  RangeSearchResult* result,
236
296
  const SearchParameters* params = nullptr) const override;
237
297
 
238
- void range_search_preassigned(
239
- idx_t nx,
240
- const float* x,
241
- float radius,
242
- const idx_t* keys,
243
- const float* coarse_dis,
244
- RangeSearchResult* result,
245
- bool store_pairs = false,
246
- const IVFSearchParameters* params = nullptr,
247
- IndexIVFStats* stats = nullptr) const;
248
-
249
298
  /** Get a scanner for this index (store_pairs means ignore labels)
250
299
  *
251
300
  * The default search implementation uses this to compute the distances
@@ -317,16 +366,15 @@ struct IndexIVF : Index, Level1Quantizer {
317
366
 
318
367
  virtual void merge_from(Index& otherIndex, idx_t add_id) override;
319
368
 
369
+ // returns a new instance of a CodePacker
370
+ virtual CodePacker* get_CodePacker() const;
371
+
320
372
  /** copy a subset of the entries index to the other index
321
- *
322
- * if subset_type == 0: copies ids in [a1, a2)
323
- * if subset_type == 1: copies ids if id % a1 == a2
324
- * if subset_type == 2: copies inverted lists such that a1
325
- * elements are left before and a2 elements are after
373
+ * see Invlists::copy_subset_to for the meaning of subset_type
326
374
  */
327
375
  virtual void copy_subset_to(
328
376
  IndexIVF& other,
329
- int subset_type,
377
+ InvertedLists::subset_type_t subset_type,
330
378
  idx_t a1,
331
379
  idx_t a2) const;
332
380
 
@@ -339,7 +387,7 @@ struct IndexIVF : Index, Level1Quantizer {
339
387
  /// are the ids sorted?
340
388
  bool check_ids_sorted() const;
341
389
 
342
- /** intialize a direct map
390
+ /** initialize a direct map
343
391
  *
344
392
  * @param new_maintain_direct_map if true, create a direct map,
345
393
  * else clear it
@@ -353,7 +401,6 @@ struct IndexIVF : Index, Level1Quantizer {
353
401
 
354
402
  /* The standalone codec interface (except sa_decode that is specific) */
355
403
  size_t sa_code_size() const override;
356
-
357
404
  void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
358
405
 
359
406
  IndexIVF();
@@ -366,8 +413,6 @@ struct RangeQueryResult;
366
413
  * distance_to_code and scan_codes can be called in multiple
367
414
  * threads */
368
415
  struct InvertedListScanner {
369
- using idx_t = Index::idx_t;
370
-
371
416
  idx_t list_no = -1; ///< remember current list
372
417
  bool keep_max = false; ///< keep maximum instead of minimum
373
418
  /// store positions in invlists rather than labels
@@ -413,6 +458,14 @@ struct InvertedListScanner {
413
458
  idx_t* labels,
414
459
  size_t k) const;
415
460
 
461
+ // same as scan_codes, using an iterator
462
+ virtual size_t iterate_codes(
463
+ InvertedListsIterator* iterator,
464
+ float* distances,
465
+ idx_t* labels,
466
+ size_t k,
467
+ size_t& list_size) const;
468
+
416
469
  /** scan a set of codes, compute distances to current query and
417
470
  * update results if distances are below radius
418
471
  *
@@ -424,6 +477,13 @@ struct InvertedListScanner {
424
477
  float radius,
425
478
  RangeQueryResult& result) const;
426
479
 
480
+ // same as scan_codes_range, using an iterator
481
+ virtual void iterate_codes_range(
482
+ InvertedListsIterator* iterator,
483
+ float radius,
484
+ RangeQueryResult& result,
485
+ size_t& list_size) const;
486
+
427
487
  virtual ~InvertedListScanner() {}
428
488
  };
429
489
 
@@ -51,7 +51,7 @@ void IndexIVFAdditiveQuantizer::train_residual(idx_t n, const float* x) {
51
51
  ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
52
52
 
53
53
  if (by_residual) {
54
- std::vector<Index::idx_t> idx(n);
54
+ std::vector<idx_t> idx(n);
55
55
  quantizer->assign(n, x, idx.data());
56
56
 
57
57
  std::vector<float> residuals(n * d);
@@ -145,7 +145,7 @@ struct AQInvertedListScanner : InvertedListScanner {
145
145
  : ia(ia), aq(*ia.aq) {
146
146
  this->store_pairs = store_pairs;
147
147
  this->code_size = ia.code_size;
148
- keep_max = ia.metric_type == METRIC_INNER_PRODUCT;
148
+ keep_max = is_similarity_metric(ia.metric_type);
149
149
  tmp.resize(ia.d);
150
150
  }
151
151
 
@@ -69,7 +69,14 @@ void IndexIVFFastScan::init_fastscan(
69
69
  code_size = M2 / 2;
70
70
 
71
71
  is_trained = false;
72
- replace_invlists(new BlockInvertedLists(nlist, bbs, bbs * M2 / 2), true);
72
+ replace_invlists(new BlockInvertedLists(nlist, get_CodePacker()), true);
73
+ }
74
+
75
+ void IndexIVFFastScan::init_code_packer() {
76
+ auto bil = dynamic_cast<BlockInvertedLists*>(invlists);
77
+ FAISS_THROW_IF_NOT(bil);
78
+ delete bil->packer; // in case there was one before
79
+ bil->packer = get_CodePacker();
73
80
  }
74
81
 
75
82
  IndexIVFFastScan::~IndexIVFFastScan() {}
@@ -112,17 +119,9 @@ void IndexIVFFastScan::add_with_ids(
112
119
  }
113
120
  InterruptCallback::check();
114
121
 
115
- AlignedTable<uint8_t> codes(n * code_size);
116
122
  direct_map.check_can_add(xids);
117
123
  std::unique_ptr<idx_t[]> idx(new idx_t[n]);
118
124
  quantizer->assign(n, x, idx.get());
119
- size_t nadd = 0, nminus1 = 0;
120
-
121
- for (size_t i = 0; i < n; i++) {
122
- if (idx[i] < 0) {
123
- nminus1++;
124
- }
125
- }
126
125
 
127
126
  AlignedTable<uint8_t> flat_codes(n * code_size);
128
127
  encode_vectors(n, x, idx.get(), flat_codes.get());
@@ -170,7 +169,6 @@ void IndexIVFFastScan::add_with_ids(
170
169
  memcpy(list_codes.data() + (i - i0) * code_size,
171
170
  flat_codes.data() + order[i] * code_size,
172
171
  code_size);
173
- nadd++;
174
172
  }
175
173
  pq4_pack_codes_range(
176
174
  list_codes.data(),
@@ -187,6 +185,10 @@ void IndexIVFFastScan::add_with_ids(
187
185
  ntotal += n;
188
186
  }
189
187
 
188
+ CodePacker* IndexIVFFastScan::get_CodePacker() const {
189
+ return new CodePackerPQ4(M, bbs);
190
+ }
191
+
190
192
  /*********************************************************
191
193
  * search
192
194
  *********************************************************/
@@ -229,7 +231,6 @@ void estimators_from_tables_generic(
229
231
  }
230
232
  }
231
233
 
232
- using idx_t = Index::idx_t;
233
234
  using namespace quantize_lut;
234
235
 
235
236
  } // anonymous namespace
@@ -67,6 +67,9 @@ struct IndexIVFFastScan : IndexIVF {
67
67
  MetricType metric,
68
68
  int bbs);
69
69
 
70
+ // initialize the CodePacker in the InvertedLists
71
+ void init_code_packer();
72
+
70
73
  ~IndexIVFFastScan() override;
71
74
 
72
75
  /// orig's inverted lists (for debugging)
@@ -166,7 +169,7 @@ struct IndexIVFFastScan : IndexIVF {
166
169
  size_t* nlist_out,
167
170
  const Scaler& scaler) const;
168
171
 
169
- // implem 14 is mukltithreaded internally across nprobes and queries
172
+ // implem 14 is multithreaded internally across nprobes and queries
170
173
  template <class C, class Scaler>
171
174
  void search_implem_14(
172
175
  idx_t n,
@@ -181,6 +184,8 @@ struct IndexIVFFastScan : IndexIVF {
181
184
  void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
182
185
  const override;
183
186
 
187
+ CodePacker* get_CodePacker() const override;
188
+
184
189
  // reconstruct orig invlists (for debugging)
185
190
  void reconstruct_orig_invlists();
186
191
  };