faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_INDEX_BINARY_IVF_H
11
+ #define FAISS_INDEX_BINARY_IVF_H
12
+
13
+
14
+ #include <vector>
15
+
16
+ #include <faiss/IndexBinary.h>
17
+ #include <faiss/IndexIVF.h>
18
+ #include <faiss/Clustering.h>
19
+ #include <faiss/utils/Heap.h>
20
+
21
+
22
+ namespace faiss {
23
+
24
+ struct BinaryInvertedListScanner;
25
+
26
+ /** Index based on a inverted file (IVF)
27
+ *
28
+ * In the inverted file, the quantizer (an IndexBinary instance) provides a
29
+ * quantization index for each vector to be added. The quantization
30
+ * index maps to a list (aka inverted list or posting list), where the
31
+ * id of the vector is stored.
32
+ *
33
+ * Otherwise the object is similar to the IndexIVF
34
+ */
35
+ struct IndexBinaryIVF : IndexBinary {
36
+ /// Acess to the actual data
37
+ InvertedLists *invlists;
38
+ bool own_invlists;
39
+
40
+ size_t nprobe; ///< number of probes at query time
41
+ size_t max_codes; ///< max nb of codes to visit to do a query
42
+
43
+ /** Select between using a heap or counting to select the k smallest values
44
+ * when scanning inverted lists.
45
+ */
46
+ bool use_heap = true;
47
+
48
+ /// map for direct access to the elements. Enables reconstruct().
49
+ bool maintain_direct_map;
50
+ std::vector<idx_t> direct_map;
51
+
52
+ IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
53
+ size_t nlist; ///< number of possible key values
54
+
55
+ bool own_fields; ///< whether object owns the quantizer
56
+
57
+ ClusteringParameters cp; ///< to override default clustering params
58
+ Index *clustering_index; ///< to override index used during clustering
59
+
60
+ /** The Inverted file takes a quantizer (an IndexBinary) on input,
61
+ * which implements the function mapping a vector to a list
62
+ * identifier. The pointer is borrowed: the quantizer should not
63
+ * be deleted while the IndexBinaryIVF is in use.
64
+ */
65
+ IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
66
+
67
+ IndexBinaryIVF();
68
+
69
+ ~IndexBinaryIVF() override;
70
+
71
+ void reset() override;
72
+
73
+ /// Trains the quantizer
74
+ void train(idx_t n, const uint8_t *x) override;
75
+
76
+ void add(idx_t n, const uint8_t *x) override;
77
+
78
+ void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
79
+
80
+ /// same as add_with_ids, with precomputed coarse quantizer
81
+ void add_core (idx_t n, const uint8_t * x, const idx_t *xids,
82
+ const idx_t *precomputed_idx);
83
+
84
+ /** Search a set of vectors, that are pre-quantized by the IVF
85
+ * quantizer. Fill in the corresponding heaps with the query
86
+ * results. search() calls this.
87
+ *
88
+ * @param n nb of vectors to query
89
+ * @param x query vectors, size nx * d
90
+ * @param assign coarse quantization indices, size nx * nprobe
91
+ * @param centroid_dis
92
+ * distances to coarse centroids, size nx * nprobe
93
+ * @param distance
94
+ * output distances, size n * k
95
+ * @param labels output labels, size n * k
96
+ * @param store_pairs store inv list index + inv list offset
97
+ * instead in upper/lower 32 bit of result,
98
+ * instead of ids (used for reranking).
99
+ * @param params used to override the object's search parameters
100
+ */
101
+ void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
102
+ const idx_t *assign,
103
+ const int32_t *centroid_dis,
104
+ int32_t *distances, idx_t *labels,
105
+ bool store_pairs,
106
+ const IVFSearchParameters *params=nullptr
107
+ ) const;
108
+
109
+ virtual BinaryInvertedListScanner *get_InvertedListScanner (
110
+ bool store_pairs=false) const;
111
+
112
+ /** assign the vectors, then call search_preassign */
113
+ virtual void search(idx_t n, const uint8_t *x, idx_t k,
114
+ int32_t *distances, idx_t *labels) const override;
115
+
116
+ void reconstruct(idx_t key, uint8_t *recons) const override;
117
+
118
+ /** Reconstruct a subset of the indexed vectors.
119
+ *
120
+ * Overrides default implementation to bypass reconstruct() which requires
121
+ * direct_map to be maintained.
122
+ *
123
+ * @param i0 first vector to reconstruct
124
+ * @param ni nb of vectors to reconstruct
125
+ * @param recons output array of reconstructed vectors, size ni * d / 8
126
+ */
127
+ void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
128
+
129
+ /** Similar to search, but also reconstructs the stored vectors (or an
130
+ * approximation in the case of lossy coding) for the search results.
131
+ *
132
+ * Overrides default implementation to avoid having to maintain direct_map
133
+ * and instead fetch the code offsets through the `store_pairs` flag in
134
+ * search_preassigned().
135
+ *
136
+ * @param recons reconstructed vectors size (n, k, d / 8)
137
+ */
138
+ void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
139
+ int32_t *distances, idx_t *labels,
140
+ uint8_t *recons) const override;
141
+
142
+ /** Reconstruct a vector given the location in terms of (inv list index +
143
+ * inv list offset) instead of the id.
144
+ *
145
+ * Useful for reconstructing when the direct_map is not maintained and
146
+ * the inv list offset is computed by search_preassigned() with
147
+ * `store_pairs` set.
148
+ */
149
+ virtual void reconstruct_from_offset(idx_t list_no, idx_t offset,
150
+ uint8_t* recons) const;
151
+
152
+
153
+ /// Dataset manipulation functions
154
+ size_t remove_ids(const IDSelector& sel) override;
155
+
156
+ /** moves the entries from another dataset to self. On output,
157
+ * other is empty. add_id is added to all moved ids (for
158
+ * sequential ids, this would be this->ntotal */
159
+ virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
160
+
161
+ size_t get_list_size(size_t list_no) const
162
+ { return invlists->list_size(list_no); }
163
+
164
+ /** intialize a direct map
165
+ *
166
+ * @param new_maintain_direct_map if true, create a direct map,
167
+ * else clear it
168
+ */
169
+ void make_direct_map(bool new_maintain_direct_map=true);
170
+
171
+ void replace_invlists(InvertedLists *il, bool own=false);
172
+ };
173
+
174
+
175
+ struct BinaryInvertedListScanner {
176
+
177
+ using idx_t = Index::idx_t;
178
+
179
+ /// from now on we handle this query.
180
+ virtual void set_query (const uint8_t *query_vector) = 0;
181
+
182
+ /// following codes come from this inverted list
183
+ virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
184
+
185
+ /// compute a single query-to-code distance
186
+ virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
187
+
188
+ /** compute the distances to codes. (distances, labels) should be
189
+ * organized as a min- or max-heap
190
+ *
191
+ * @param n number of codes to scan
192
+ * @param codes codes to scan (n * code_size)
193
+ * @param ids corresponding ids (ignored if store_pairs)
194
+ * @param distances heap distances (size k)
195
+ * @param labels heap labels (size k)
196
+ * @param k heap size
197
+ */
198
+ virtual size_t scan_codes (size_t n,
199
+ const uint8_t *codes,
200
+ const idx_t *ids,
201
+ int32_t *distances, idx_t *labels,
202
+ size_t k) const = 0;
203
+
204
+ virtual ~BinaryInvertedListScanner () {}
205
+
206
+ };
207
+
208
+
209
+ } // namespace faiss
210
+
211
+ #endif // FAISS_INDEX_BINARY_IVF_H
@@ -0,0 +1,508 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexFlat.h>
11
+
12
+ #include <cstring>
13
+ #include <faiss/utils/distances.h>
14
+ #include <faiss/utils/extra_distances.h>
15
+ #include <faiss/utils/utils.h>
16
+ #include <faiss/utils/Heap.h>
17
+ #include <faiss/impl/FaissAssert.h>
18
+ #include <faiss/impl/AuxIndexStructures.h>
19
+
20
+
21
+ namespace faiss {
22
+
23
+ IndexFlat::IndexFlat (idx_t d, MetricType metric):
24
+ Index(d, metric)
25
+ {
26
+ }
27
+
28
+
29
+
30
+ void IndexFlat::add (idx_t n, const float *x) {
31
+ xb.insert(xb.end(), x, x + n * d);
32
+ ntotal += n;
33
+ }
34
+
35
+
36
+ void IndexFlat::reset() {
37
+ xb.clear();
38
+ ntotal = 0;
39
+ }
40
+
41
+
42
+ void IndexFlat::search (idx_t n, const float *x, idx_t k,
43
+ float *distances, idx_t *labels) const
44
+ {
45
+ // we see the distances and labels as heaps
46
+
47
+ if (metric_type == METRIC_INNER_PRODUCT) {
48
+ float_minheap_array_t res = {
49
+ size_t(n), size_t(k), labels, distances};
50
+ knn_inner_product (x, xb.data(), d, n, ntotal, &res);
51
+ } else if (metric_type == METRIC_L2) {
52
+ float_maxheap_array_t res = {
53
+ size_t(n), size_t(k), labels, distances};
54
+ knn_L2sqr (x, xb.data(), d, n, ntotal, &res);
55
+ } else {
56
+ float_maxheap_array_t res = {
57
+ size_t(n), size_t(k), labels, distances};
58
+ knn_extra_metrics (x, xb.data(), d, n, ntotal,
59
+ metric_type, metric_arg,
60
+ &res);
61
+ }
62
+ }
63
+
64
+ void IndexFlat::range_search (idx_t n, const float *x, float radius,
65
+ RangeSearchResult *result) const
66
+ {
67
+ switch (metric_type) {
68
+ case METRIC_INNER_PRODUCT:
69
+ range_search_inner_product (x, xb.data(), d, n, ntotal,
70
+ radius, result);
71
+ break;
72
+ case METRIC_L2:
73
+ range_search_L2sqr (x, xb.data(), d, n, ntotal, radius, result);
74
+ break;
75
+ default:
76
+ FAISS_THROW_MSG("metric type not supported");
77
+ }
78
+ }
79
+
80
+
81
+ void IndexFlat::compute_distance_subset (
82
+ idx_t n,
83
+ const float *x,
84
+ idx_t k,
85
+ float *distances,
86
+ const idx_t *labels) const
87
+ {
88
+ switch (metric_type) {
89
+ case METRIC_INNER_PRODUCT:
90
+ fvec_inner_products_by_idx (
91
+ distances,
92
+ x, xb.data(), labels, d, n, k);
93
+ break;
94
+ case METRIC_L2:
95
+ fvec_L2sqr_by_idx (
96
+ distances,
97
+ x, xb.data(), labels, d, n, k);
98
+ break;
99
+ default:
100
+ FAISS_THROW_MSG("metric type not supported");
101
+ }
102
+
103
+ }
104
+
105
+ size_t IndexFlat::remove_ids (const IDSelector & sel)
106
+ {
107
+ idx_t j = 0;
108
+ for (idx_t i = 0; i < ntotal; i++) {
109
+ if (sel.is_member (i)) {
110
+ // should be removed
111
+ } else {
112
+ if (i > j) {
113
+ memmove (&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
114
+ }
115
+ j++;
116
+ }
117
+ }
118
+ size_t nremove = ntotal - j;
119
+ if (nremove > 0) {
120
+ ntotal = j;
121
+ xb.resize (ntotal * d);
122
+ }
123
+ return nremove;
124
+ }
125
+
126
+
127
+ namespace {
128
+
129
+
130
+ struct FlatL2Dis : DistanceComputer {
131
+ size_t d;
132
+ Index::idx_t nb;
133
+ const float *q;
134
+ const float *b;
135
+ size_t ndis;
136
+
137
+ float operator () (idx_t i) override {
138
+ ndis++;
139
+ return fvec_L2sqr(q, b + i * d, d);
140
+ }
141
+
142
+ float symmetric_dis(idx_t i, idx_t j) override {
143
+ return fvec_L2sqr(b + j * d, b + i * d, d);
144
+ }
145
+
146
+ explicit FlatL2Dis(const IndexFlat& storage, const float *q = nullptr)
147
+ : d(storage.d),
148
+ nb(storage.ntotal),
149
+ q(q),
150
+ b(storage.xb.data()),
151
+ ndis(0) {}
152
+
153
+ void set_query(const float *x) override {
154
+ q = x;
155
+ }
156
+ };
157
+
158
+ struct FlatIPDis : DistanceComputer {
159
+ size_t d;
160
+ Index::idx_t nb;
161
+ const float *q;
162
+ const float *b;
163
+ size_t ndis;
164
+
165
+ float operator () (idx_t i) override {
166
+ ndis++;
167
+ return fvec_inner_product (q, b + i * d, d);
168
+ }
169
+
170
+ float symmetric_dis(idx_t i, idx_t j) override {
171
+ return fvec_inner_product (b + j * d, b + i * d, d);
172
+ }
173
+
174
+ explicit FlatIPDis(const IndexFlat& storage, const float *q = nullptr)
175
+ : d(storage.d),
176
+ nb(storage.ntotal),
177
+ q(q),
178
+ b(storage.xb.data()),
179
+ ndis(0) {}
180
+
181
+ void set_query(const float *x) override {
182
+ q = x;
183
+ }
184
+ };
185
+
186
+
187
+
188
+
189
+ } // namespace
190
+
191
+
192
+ DistanceComputer * IndexFlat::get_distance_computer() const {
193
+ if (metric_type == METRIC_L2) {
194
+ return new FlatL2Dis(*this);
195
+ } else if (metric_type == METRIC_INNER_PRODUCT) {
196
+ return new FlatIPDis(*this);
197
+ } else {
198
+ return get_extra_distance_computer (d, metric_type, metric_arg,
199
+ ntotal, xb.data());
200
+ }
201
+ }
202
+
203
+
204
+ void IndexFlat::reconstruct (idx_t key, float * recons) const
205
+ {
206
+ memcpy (recons, &(xb[key * d]), sizeof(*recons) * d);
207
+ }
208
+
209
+
210
+ /* The standalone codec interface */
211
+ size_t IndexFlat::sa_code_size () const
212
+ {
213
+ return sizeof(float) * d;
214
+ }
215
+
216
+ void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
217
+ {
218
+ memcpy (bytes, x, sizeof(float) * d * n);
219
+ }
220
+
221
+ void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
222
+ {
223
+ memcpy (x, bytes, sizeof(float) * d * n);
224
+ }
225
+
226
+
227
+
228
+
229
+ /***************************************************
230
+ * IndexFlatL2BaseShift
231
+ ***************************************************/
232
+
233
+ IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
234
+ IndexFlatL2 (d), shift (nshift)
235
+ {
236
+ memcpy (this->shift.data(), shift, sizeof(float) * nshift);
237
+ }
238
+
239
+ void IndexFlatL2BaseShift::search (
240
+ idx_t n,
241
+ const float *x,
242
+ idx_t k,
243
+ float *distances,
244
+ idx_t *labels) const
245
+ {
246
+ FAISS_THROW_IF_NOT (shift.size() == ntotal);
247
+
248
+ float_maxheap_array_t res = {
249
+ size_t(n), size_t(k), labels, distances};
250
+ knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
251
+ }
252
+
253
+
254
+
255
+ /***************************************************
256
+ * IndexRefineFlat
257
+ ***************************************************/
258
+
259
+ IndexRefineFlat::IndexRefineFlat (Index *base_index):
260
+ Index (base_index->d, base_index->metric_type),
261
+ refine_index (base_index->d, base_index->metric_type),
262
+ base_index (base_index), own_fields (false),
263
+ k_factor (1)
264
+ {
265
+ is_trained = base_index->is_trained;
266
+ FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
267
+ "base_index should be empty in the beginning");
268
+ }
269
+
270
+ IndexRefineFlat::IndexRefineFlat () {
271
+ base_index = nullptr;
272
+ own_fields = false;
273
+ k_factor = 1;
274
+ }
275
+
276
+
277
+ void IndexRefineFlat::train (idx_t n, const float *x)
278
+ {
279
+ base_index->train (n, x);
280
+ is_trained = true;
281
+ }
282
+
283
+ void IndexRefineFlat::add (idx_t n, const float *x) {
284
+ FAISS_THROW_IF_NOT (is_trained);
285
+ base_index->add (n, x);
286
+ refine_index.add (n, x);
287
+ ntotal = refine_index.ntotal;
288
+ }
289
+
290
+ void IndexRefineFlat::reset ()
291
+ {
292
+ base_index->reset ();
293
+ refine_index.reset ();
294
+ ntotal = 0;
295
+ }
296
+
297
+ namespace {
298
+ typedef faiss::Index::idx_t idx_t;
299
+
300
+ template<class C>
301
+ static void reorder_2_heaps (
302
+ idx_t n,
303
+ idx_t k, idx_t *labels, float *distances,
304
+ idx_t k_base, const idx_t *base_labels, const float *base_distances)
305
+ {
306
+ #pragma omp parallel for
307
+ for (idx_t i = 0; i < n; i++) {
308
+ idx_t *idxo = labels + i * k;
309
+ float *diso = distances + i * k;
310
+ const idx_t *idxi = base_labels + i * k_base;
311
+ const float *disi = base_distances + i * k_base;
312
+
313
+ heap_heapify<C> (k, diso, idxo, disi, idxi, k);
314
+ if (k_base != k) { // add remaining elements
315
+ heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
316
+ }
317
+ heap_reorder<C> (k, diso, idxo);
318
+ }
319
+ }
320
+
321
+
322
+ }
323
+
324
+
325
+ void IndexRefineFlat::search (
326
+ idx_t n, const float *x, idx_t k,
327
+ float *distances, idx_t *labels) const
328
+ {
329
+ FAISS_THROW_IF_NOT (is_trained);
330
+ idx_t k_base = idx_t (k * k_factor);
331
+ idx_t * base_labels = labels;
332
+ float * base_distances = distances;
333
+ ScopeDeleter<idx_t> del1;
334
+ ScopeDeleter<float> del2;
335
+
336
+
337
+ if (k != k_base) {
338
+ base_labels = new idx_t [n * k_base];
339
+ del1.set (base_labels);
340
+ base_distances = new float [n * k_base];
341
+ del2.set (base_distances);
342
+ }
343
+
344
+ base_index->search (n, x, k_base, base_distances, base_labels);
345
+
346
+ for (int i = 0; i < n * k_base; i++)
347
+ assert (base_labels[i] >= -1 &&
348
+ base_labels[i] < ntotal);
349
+
350
+ // compute refined distances
351
+ refine_index.compute_distance_subset (
352
+ n, x, k_base, base_distances, base_labels);
353
+
354
+ // sort and store result
355
+ if (metric_type == METRIC_L2) {
356
+ typedef CMax <float, idx_t> C;
357
+ reorder_2_heaps<C> (
358
+ n, k, labels, distances,
359
+ k_base, base_labels, base_distances);
360
+
361
+ } else if (metric_type == METRIC_INNER_PRODUCT) {
362
+ typedef CMin <float, idx_t> C;
363
+ reorder_2_heaps<C> (
364
+ n, k, labels, distances,
365
+ k_base, base_labels, base_distances);
366
+ } else {
367
+ FAISS_THROW_MSG("Metric type not supported");
368
+ }
369
+
370
+ }
371
+
372
+
373
+
374
+ IndexRefineFlat::~IndexRefineFlat ()
375
+ {
376
+ if (own_fields) delete base_index;
377
+ }
378
+
379
+ /***************************************************
380
+ * IndexFlat1D
381
+ ***************************************************/
382
+
383
+
384
+ IndexFlat1D::IndexFlat1D (bool continuous_update):
385
+ IndexFlatL2 (1),
386
+ continuous_update (continuous_update)
387
+ {
388
+ }
389
+
390
+ /// if not continuous_update, call this between the last add and
391
+ /// the first search
392
+ void IndexFlat1D::update_permutation ()
393
+ {
394
+ perm.resize (ntotal);
395
+ if (ntotal < 1000000) {
396
+ fvec_argsort (ntotal, xb.data(), (size_t*)perm.data());
397
+ } else {
398
+ fvec_argsort_parallel (ntotal, xb.data(), (size_t*)perm.data());
399
+ }
400
+ }
401
+
402
+ void IndexFlat1D::add (idx_t n, const float *x)
403
+ {
404
+ IndexFlatL2::add (n, x);
405
+ if (continuous_update)
406
+ update_permutation();
407
+ }
408
+
409
+ void IndexFlat1D::reset()
410
+ {
411
+ IndexFlatL2::reset();
412
+ perm.clear();
413
+ }
414
+
415
+ void IndexFlat1D::search (
416
+ idx_t n,
417
+ const float *x,
418
+ idx_t k,
419
+ float *distances,
420
+ idx_t *labels) const
421
+ {
422
+ FAISS_THROW_IF_NOT_MSG (perm.size() == ntotal,
423
+ "Call update_permutation before search");
424
+
425
+ #pragma omp parallel for
426
+ for (idx_t i = 0; i < n; i++) {
427
+
428
+ float q = x[i]; // query
429
+ float *D = distances + i * k;
430
+ idx_t *I = labels + i * k;
431
+
432
+ // binary search
433
+ idx_t i0 = 0, i1 = ntotal;
434
+ idx_t wp = 0;
435
+
436
+ if (xb[perm[i0]] > q) {
437
+ i1 = 0;
438
+ goto finish_right;
439
+ }
440
+
441
+ if (xb[perm[i1 - 1]] <= q) {
442
+ i0 = i1 - 1;
443
+ goto finish_left;
444
+ }
445
+
446
+ while (i0 + 1 < i1) {
447
+ idx_t imed = (i0 + i1) / 2;
448
+ if (xb[perm[imed]] <= q) i0 = imed;
449
+ else i1 = imed;
450
+ }
451
+
452
+ // query is between xb[perm[i0]] and xb[perm[i1]]
453
+ // expand to nearest neighs
454
+
455
+ while (wp < k) {
456
+ float xleft = xb[perm[i0]];
457
+ float xright = xb[perm[i1]];
458
+
459
+ if (q - xleft < xright - q) {
460
+ D[wp] = q - xleft;
461
+ I[wp] = perm[i0];
462
+ i0--; wp++;
463
+ if (i0 < 0) { goto finish_right; }
464
+ } else {
465
+ D[wp] = xright - q;
466
+ I[wp] = perm[i1];
467
+ i1++; wp++;
468
+ if (i1 >= ntotal) { goto finish_left; }
469
+ }
470
+ }
471
+ goto done;
472
+
473
+ finish_right:
474
+ // grow to the right from i1
475
+ while (wp < k) {
476
+ if (i1 < ntotal) {
477
+ D[wp] = xb[perm[i1]] - q;
478
+ I[wp] = perm[i1];
479
+ i1++;
480
+ } else {
481
+ D[wp] = std::numeric_limits<float>::infinity();
482
+ I[wp] = -1;
483
+ }
484
+ wp++;
485
+ }
486
+ goto done;
487
+
488
+ finish_left:
489
+ // grow to the left from i0
490
+ while (wp < k) {
491
+ if (i0 >= 0) {
492
+ D[wp] = q - xb[perm[i0]];
493
+ I[wp] = perm[i0];
494
+ i0--;
495
+ } else {
496
+ D[wp] = std::numeric_limits<float>::infinity();
497
+ I[wp] = -1;
498
+ }
499
+ wp++;
500
+ }
501
+ done: ;
502
+ }
503
+
504
+ }
505
+
506
+
507
+
508
+ } // namespace faiss