faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_INDEX_BINARY_IVF_H
11
+ #define FAISS_INDEX_BINARY_IVF_H
12
+
13
+
14
+ #include <vector>
15
+
16
+ #include <faiss/IndexBinary.h>
17
+ #include <faiss/IndexIVF.h>
18
+ #include <faiss/Clustering.h>
19
+ #include <faiss/utils/Heap.h>
20
+
21
+
22
+ namespace faiss {
23
+
24
+ struct BinaryInvertedListScanner;
25
+
26
+ /** Index based on a inverted file (IVF)
27
+ *
28
+ * In the inverted file, the quantizer (an IndexBinary instance) provides a
29
+ * quantization index for each vector to be added. The quantization
30
+ * index maps to a list (aka inverted list or posting list), where the
31
+ * id of the vector is stored.
32
+ *
33
+ * Otherwise the object is similar to the IndexIVF
34
+ */
35
+ struct IndexBinaryIVF : IndexBinary {
36
+ /// Acess to the actual data
37
+ InvertedLists *invlists;
38
+ bool own_invlists;
39
+
40
+ size_t nprobe; ///< number of probes at query time
41
+ size_t max_codes; ///< max nb of codes to visit to do a query
42
+
43
+ /** Select between using a heap or counting to select the k smallest values
44
+ * when scanning inverted lists.
45
+ */
46
+ bool use_heap = true;
47
+
48
+ /// map for direct access to the elements. Enables reconstruct().
49
+ bool maintain_direct_map;
50
+ std::vector<idx_t> direct_map;
51
+
52
+ IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
53
+ size_t nlist; ///< number of possible key values
54
+
55
+ bool own_fields; ///< whether object owns the quantizer
56
+
57
+ ClusteringParameters cp; ///< to override default clustering params
58
+ Index *clustering_index; ///< to override index used during clustering
59
+
60
+ /** The Inverted file takes a quantizer (an IndexBinary) on input,
61
+ * which implements the function mapping a vector to a list
62
+ * identifier. The pointer is borrowed: the quantizer should not
63
+ * be deleted while the IndexBinaryIVF is in use.
64
+ */
65
+ IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
66
+
67
+ IndexBinaryIVF();
68
+
69
+ ~IndexBinaryIVF() override;
70
+
71
+ void reset() override;
72
+
73
+ /// Trains the quantizer
74
+ void train(idx_t n, const uint8_t *x) override;
75
+
76
+ void add(idx_t n, const uint8_t *x) override;
77
+
78
+ void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
79
+
80
+ /// same as add_with_ids, with precomputed coarse quantizer
81
+ void add_core (idx_t n, const uint8_t * x, const idx_t *xids,
82
+ const idx_t *precomputed_idx);
83
+
84
+ /** Search a set of vectors, that are pre-quantized by the IVF
85
+ * quantizer. Fill in the corresponding heaps with the query
86
+ * results. search() calls this.
87
+ *
88
+ * @param n nb of vectors to query
89
+ * @param x query vectors, size nx * d
90
+ * @param assign coarse quantization indices, size nx * nprobe
91
+ * @param centroid_dis
92
+ * distances to coarse centroids, size nx * nprobe
93
+ * @param distance
94
+ * output distances, size n * k
95
+ * @param labels output labels, size n * k
96
+ * @param store_pairs store inv list index + inv list offset
97
+ * instead in upper/lower 32 bit of result,
98
+ * instead of ids (used for reranking).
99
+ * @param params used to override the object's search parameters
100
+ */
101
+ void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
102
+ const idx_t *assign,
103
+ const int32_t *centroid_dis,
104
+ int32_t *distances, idx_t *labels,
105
+ bool store_pairs,
106
+ const IVFSearchParameters *params=nullptr
107
+ ) const;
108
+
109
+ virtual BinaryInvertedListScanner *get_InvertedListScanner (
110
+ bool store_pairs=false) const;
111
+
112
+ /** assign the vectors, then call search_preassign */
113
+ virtual void search(idx_t n, const uint8_t *x, idx_t k,
114
+ int32_t *distances, idx_t *labels) const override;
115
+
116
+ void reconstruct(idx_t key, uint8_t *recons) const override;
117
+
118
+ /** Reconstruct a subset of the indexed vectors.
119
+ *
120
+ * Overrides default implementation to bypass reconstruct() which requires
121
+ * direct_map to be maintained.
122
+ *
123
+ * @param i0 first vector to reconstruct
124
+ * @param ni nb of vectors to reconstruct
125
+ * @param recons output array of reconstructed vectors, size ni * d / 8
126
+ */
127
+ void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
128
+
129
+ /** Similar to search, but also reconstructs the stored vectors (or an
130
+ * approximation in the case of lossy coding) for the search results.
131
+ *
132
+ * Overrides default implementation to avoid having to maintain direct_map
133
+ * and instead fetch the code offsets through the `store_pairs` flag in
134
+ * search_preassigned().
135
+ *
136
+ * @param recons reconstructed vectors size (n, k, d / 8)
137
+ */
138
+ void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
139
+ int32_t *distances, idx_t *labels,
140
+ uint8_t *recons) const override;
141
+
142
+ /** Reconstruct a vector given the location in terms of (inv list index +
143
+ * inv list offset) instead of the id.
144
+ *
145
+ * Useful for reconstructing when the direct_map is not maintained and
146
+ * the inv list offset is computed by search_preassigned() with
147
+ * `store_pairs` set.
148
+ */
149
+ virtual void reconstruct_from_offset(idx_t list_no, idx_t offset,
150
+ uint8_t* recons) const;
151
+
152
+
153
+ /// Dataset manipulation functions
154
+ size_t remove_ids(const IDSelector& sel) override;
155
+
156
+ /** moves the entries from another dataset to self. On output,
157
+ * other is empty. add_id is added to all moved ids (for
158
+ * sequential ids, this would be this->ntotal */
159
+ virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
160
+
161
+ size_t get_list_size(size_t list_no) const
162
+ { return invlists->list_size(list_no); }
163
+
164
+ /** intialize a direct map
165
+ *
166
+ * @param new_maintain_direct_map if true, create a direct map,
167
+ * else clear it
168
+ */
169
+ void make_direct_map(bool new_maintain_direct_map=true);
170
+
171
+ void replace_invlists(InvertedLists *il, bool own=false);
172
+ };
173
+
174
+
175
+ struct BinaryInvertedListScanner {
176
+
177
+ using idx_t = Index::idx_t;
178
+
179
+ /// from now on we handle this query.
180
+ virtual void set_query (const uint8_t *query_vector) = 0;
181
+
182
+ /// following codes come from this inverted list
183
+ virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
184
+
185
+ /// compute a single query-to-code distance
186
+ virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
187
+
188
+ /** compute the distances to codes. (distances, labels) should be
189
+ * organized as a min- or max-heap
190
+ *
191
+ * @param n number of codes to scan
192
+ * @param codes codes to scan (n * code_size)
193
+ * @param ids corresponding ids (ignored if store_pairs)
194
+ * @param distances heap distances (size k)
195
+ * @param labels heap labels (size k)
196
+ * @param k heap size
197
+ */
198
+ virtual size_t scan_codes (size_t n,
199
+ const uint8_t *codes,
200
+ const idx_t *ids,
201
+ int32_t *distances, idx_t *labels,
202
+ size_t k) const = 0;
203
+
204
+ virtual ~BinaryInvertedListScanner () {}
205
+
206
+ };
207
+
208
+
209
+ } // namespace faiss
210
+
211
+ #endif // FAISS_INDEX_BINARY_IVF_H
@@ -0,0 +1,508 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexFlat.h>
11
+
12
+ #include <cstring>
13
+ #include <faiss/utils/distances.h>
14
+ #include <faiss/utils/extra_distances.h>
15
+ #include <faiss/utils/utils.h>
16
+ #include <faiss/utils/Heap.h>
17
+ #include <faiss/impl/FaissAssert.h>
18
+ #include <faiss/impl/AuxIndexStructures.h>
19
+
20
+
21
+ namespace faiss {
22
+
23
+ IndexFlat::IndexFlat (idx_t d, MetricType metric):
24
+ Index(d, metric)
25
+ {
26
+ }
27
+
28
+
29
+
30
+ void IndexFlat::add (idx_t n, const float *x) {
31
+ xb.insert(xb.end(), x, x + n * d);
32
+ ntotal += n;
33
+ }
34
+
35
+
36
+ void IndexFlat::reset() {
37
+ xb.clear();
38
+ ntotal = 0;
39
+ }
40
+
41
+
42
+ void IndexFlat::search (idx_t n, const float *x, idx_t k,
43
+ float *distances, idx_t *labels) const
44
+ {
45
+ // we see the distances and labels as heaps
46
+
47
+ if (metric_type == METRIC_INNER_PRODUCT) {
48
+ float_minheap_array_t res = {
49
+ size_t(n), size_t(k), labels, distances};
50
+ knn_inner_product (x, xb.data(), d, n, ntotal, &res);
51
+ } else if (metric_type == METRIC_L2) {
52
+ float_maxheap_array_t res = {
53
+ size_t(n), size_t(k), labels, distances};
54
+ knn_L2sqr (x, xb.data(), d, n, ntotal, &res);
55
+ } else {
56
+ float_maxheap_array_t res = {
57
+ size_t(n), size_t(k), labels, distances};
58
+ knn_extra_metrics (x, xb.data(), d, n, ntotal,
59
+ metric_type, metric_arg,
60
+ &res);
61
+ }
62
+ }
63
+
64
+ void IndexFlat::range_search (idx_t n, const float *x, float radius,
65
+ RangeSearchResult *result) const
66
+ {
67
+ switch (metric_type) {
68
+ case METRIC_INNER_PRODUCT:
69
+ range_search_inner_product (x, xb.data(), d, n, ntotal,
70
+ radius, result);
71
+ break;
72
+ case METRIC_L2:
73
+ range_search_L2sqr (x, xb.data(), d, n, ntotal, radius, result);
74
+ break;
75
+ default:
76
+ FAISS_THROW_MSG("metric type not supported");
77
+ }
78
+ }
79
+
80
+
81
+ void IndexFlat::compute_distance_subset (
82
+ idx_t n,
83
+ const float *x,
84
+ idx_t k,
85
+ float *distances,
86
+ const idx_t *labels) const
87
+ {
88
+ switch (metric_type) {
89
+ case METRIC_INNER_PRODUCT:
90
+ fvec_inner_products_by_idx (
91
+ distances,
92
+ x, xb.data(), labels, d, n, k);
93
+ break;
94
+ case METRIC_L2:
95
+ fvec_L2sqr_by_idx (
96
+ distances,
97
+ x, xb.data(), labels, d, n, k);
98
+ break;
99
+ default:
100
+ FAISS_THROW_MSG("metric type not supported");
101
+ }
102
+
103
+ }
104
+
105
+ size_t IndexFlat::remove_ids (const IDSelector & sel)
106
+ {
107
+ idx_t j = 0;
108
+ for (idx_t i = 0; i < ntotal; i++) {
109
+ if (sel.is_member (i)) {
110
+ // should be removed
111
+ } else {
112
+ if (i > j) {
113
+ memmove (&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
114
+ }
115
+ j++;
116
+ }
117
+ }
118
+ size_t nremove = ntotal - j;
119
+ if (nremove > 0) {
120
+ ntotal = j;
121
+ xb.resize (ntotal * d);
122
+ }
123
+ return nremove;
124
+ }
125
+
126
+
127
+ namespace {
128
+
129
+
130
+ struct FlatL2Dis : DistanceComputer {
131
+ size_t d;
132
+ Index::idx_t nb;
133
+ const float *q;
134
+ const float *b;
135
+ size_t ndis;
136
+
137
+ float operator () (idx_t i) override {
138
+ ndis++;
139
+ return fvec_L2sqr(q, b + i * d, d);
140
+ }
141
+
142
+ float symmetric_dis(idx_t i, idx_t j) override {
143
+ return fvec_L2sqr(b + j * d, b + i * d, d);
144
+ }
145
+
146
+ explicit FlatL2Dis(const IndexFlat& storage, const float *q = nullptr)
147
+ : d(storage.d),
148
+ nb(storage.ntotal),
149
+ q(q),
150
+ b(storage.xb.data()),
151
+ ndis(0) {}
152
+
153
+ void set_query(const float *x) override {
154
+ q = x;
155
+ }
156
+ };
157
+
158
+ struct FlatIPDis : DistanceComputer {
159
+ size_t d;
160
+ Index::idx_t nb;
161
+ const float *q;
162
+ const float *b;
163
+ size_t ndis;
164
+
165
+ float operator () (idx_t i) override {
166
+ ndis++;
167
+ return fvec_inner_product (q, b + i * d, d);
168
+ }
169
+
170
+ float symmetric_dis(idx_t i, idx_t j) override {
171
+ return fvec_inner_product (b + j * d, b + i * d, d);
172
+ }
173
+
174
+ explicit FlatIPDis(const IndexFlat& storage, const float *q = nullptr)
175
+ : d(storage.d),
176
+ nb(storage.ntotal),
177
+ q(q),
178
+ b(storage.xb.data()),
179
+ ndis(0) {}
180
+
181
+ void set_query(const float *x) override {
182
+ q = x;
183
+ }
184
+ };
185
+
186
+
187
+
188
+
189
+ } // namespace
190
+
191
+
192
+ DistanceComputer * IndexFlat::get_distance_computer() const {
193
+ if (metric_type == METRIC_L2) {
194
+ return new FlatL2Dis(*this);
195
+ } else if (metric_type == METRIC_INNER_PRODUCT) {
196
+ return new FlatIPDis(*this);
197
+ } else {
198
+ return get_extra_distance_computer (d, metric_type, metric_arg,
199
+ ntotal, xb.data());
200
+ }
201
+ }
202
+
203
+
204
+ void IndexFlat::reconstruct (idx_t key, float * recons) const
205
+ {
206
+ memcpy (recons, &(xb[key * d]), sizeof(*recons) * d);
207
+ }
208
+
209
+
210
+ /* The standalone codec interface */
211
+ size_t IndexFlat::sa_code_size () const
212
+ {
213
+ return sizeof(float) * d;
214
+ }
215
+
216
+ void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
217
+ {
218
+ memcpy (bytes, x, sizeof(float) * d * n);
219
+ }
220
+
221
+ void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
222
+ {
223
+ memcpy (x, bytes, sizeof(float) * d * n);
224
+ }
225
+
226
+
227
+
228
+
229
+ /***************************************************
230
+ * IndexFlatL2BaseShift
231
+ ***************************************************/
232
+
233
+ IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
234
+ IndexFlatL2 (d), shift (nshift)
235
+ {
236
+ memcpy (this->shift.data(), shift, sizeof(float) * nshift);
237
+ }
238
+
239
+ void IndexFlatL2BaseShift::search (
240
+ idx_t n,
241
+ const float *x,
242
+ idx_t k,
243
+ float *distances,
244
+ idx_t *labels) const
245
+ {
246
+ FAISS_THROW_IF_NOT (shift.size() == ntotal);
247
+
248
+ float_maxheap_array_t res = {
249
+ size_t(n), size_t(k), labels, distances};
250
+ knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
251
+ }
252
+
253
+
254
+
255
+ /***************************************************
256
+ * IndexRefineFlat
257
+ ***************************************************/
258
+
259
+ IndexRefineFlat::IndexRefineFlat (Index *base_index):
260
+ Index (base_index->d, base_index->metric_type),
261
+ refine_index (base_index->d, base_index->metric_type),
262
+ base_index (base_index), own_fields (false),
263
+ k_factor (1)
264
+ {
265
+ is_trained = base_index->is_trained;
266
+ FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
267
+ "base_index should be empty in the beginning");
268
+ }
269
+
270
+ IndexRefineFlat::IndexRefineFlat () {
271
+ base_index = nullptr;
272
+ own_fields = false;
273
+ k_factor = 1;
274
+ }
275
+
276
+
277
+ void IndexRefineFlat::train (idx_t n, const float *x)
278
+ {
279
+ base_index->train (n, x);
280
+ is_trained = true;
281
+ }
282
+
283
+ void IndexRefineFlat::add (idx_t n, const float *x) {
284
+ FAISS_THROW_IF_NOT (is_trained);
285
+ base_index->add (n, x);
286
+ refine_index.add (n, x);
287
+ ntotal = refine_index.ntotal;
288
+ }
289
+
290
+ void IndexRefineFlat::reset ()
291
+ {
292
+ base_index->reset ();
293
+ refine_index.reset ();
294
+ ntotal = 0;
295
+ }
296
+
297
+ namespace {
298
+ typedef faiss::Index::idx_t idx_t;
299
+
300
+ template<class C>
301
+ static void reorder_2_heaps (
302
+ idx_t n,
303
+ idx_t k, idx_t *labels, float *distances,
304
+ idx_t k_base, const idx_t *base_labels, const float *base_distances)
305
+ {
306
+ #pragma omp parallel for
307
+ for (idx_t i = 0; i < n; i++) {
308
+ idx_t *idxo = labels + i * k;
309
+ float *diso = distances + i * k;
310
+ const idx_t *idxi = base_labels + i * k_base;
311
+ const float *disi = base_distances + i * k_base;
312
+
313
+ heap_heapify<C> (k, diso, idxo, disi, idxi, k);
314
+ if (k_base != k) { // add remaining elements
315
+ heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
316
+ }
317
+ heap_reorder<C> (k, diso, idxo);
318
+ }
319
+ }
320
+
321
+
322
+ }
323
+
324
+
325
+ void IndexRefineFlat::search (
326
+ idx_t n, const float *x, idx_t k,
327
+ float *distances, idx_t *labels) const
328
+ {
329
+ FAISS_THROW_IF_NOT (is_trained);
330
+ idx_t k_base = idx_t (k * k_factor);
331
+ idx_t * base_labels = labels;
332
+ float * base_distances = distances;
333
+ ScopeDeleter<idx_t> del1;
334
+ ScopeDeleter<float> del2;
335
+
336
+
337
+ if (k != k_base) {
338
+ base_labels = new idx_t [n * k_base];
339
+ del1.set (base_labels);
340
+ base_distances = new float [n * k_base];
341
+ del2.set (base_distances);
342
+ }
343
+
344
+ base_index->search (n, x, k_base, base_distances, base_labels);
345
+
346
+ for (int i = 0; i < n * k_base; i++)
347
+ assert (base_labels[i] >= -1 &&
348
+ base_labels[i] < ntotal);
349
+
350
+ // compute refined distances
351
+ refine_index.compute_distance_subset (
352
+ n, x, k_base, base_distances, base_labels);
353
+
354
+ // sort and store result
355
+ if (metric_type == METRIC_L2) {
356
+ typedef CMax <float, idx_t> C;
357
+ reorder_2_heaps<C> (
358
+ n, k, labels, distances,
359
+ k_base, base_labels, base_distances);
360
+
361
+ } else if (metric_type == METRIC_INNER_PRODUCT) {
362
+ typedef CMin <float, idx_t> C;
363
+ reorder_2_heaps<C> (
364
+ n, k, labels, distances,
365
+ k_base, base_labels, base_distances);
366
+ } else {
367
+ FAISS_THROW_MSG("Metric type not supported");
368
+ }
369
+
370
+ }
371
+
372
+
373
+
374
+ IndexRefineFlat::~IndexRefineFlat ()
375
+ {
376
+ if (own_fields) delete base_index;
377
+ }
378
+
379
+ /***************************************************
380
+ * IndexFlat1D
381
+ ***************************************************/
382
+
383
+
384
+ IndexFlat1D::IndexFlat1D (bool continuous_update):
385
+ IndexFlatL2 (1),
386
+ continuous_update (continuous_update)
387
+ {
388
+ }
389
+
390
+ /// if not continuous_update, call this between the last add and
391
+ /// the first search
392
+ void IndexFlat1D::update_permutation ()
393
+ {
394
+ perm.resize (ntotal);
395
+ if (ntotal < 1000000) {
396
+ fvec_argsort (ntotal, xb.data(), (size_t*)perm.data());
397
+ } else {
398
+ fvec_argsort_parallel (ntotal, xb.data(), (size_t*)perm.data());
399
+ }
400
+ }
401
+
402
+ void IndexFlat1D::add (idx_t n, const float *x)
403
+ {
404
+ IndexFlatL2::add (n, x);
405
+ if (continuous_update)
406
+ update_permutation();
407
+ }
408
+
409
+ void IndexFlat1D::reset()
410
+ {
411
+ IndexFlatL2::reset();
412
+ perm.clear();
413
+ }
414
+
415
+ void IndexFlat1D::search (
416
+ idx_t n,
417
+ const float *x,
418
+ idx_t k,
419
+ float *distances,
420
+ idx_t *labels) const
421
+ {
422
+ FAISS_THROW_IF_NOT_MSG (perm.size() == ntotal,
423
+ "Call update_permutation before search");
424
+
425
+ #pragma omp parallel for
426
+ for (idx_t i = 0; i < n; i++) {
427
+
428
+ float q = x[i]; // query
429
+ float *D = distances + i * k;
430
+ idx_t *I = labels + i * k;
431
+
432
+ // binary search
433
+ idx_t i0 = 0, i1 = ntotal;
434
+ idx_t wp = 0;
435
+
436
+ if (xb[perm[i0]] > q) {
437
+ i1 = 0;
438
+ goto finish_right;
439
+ }
440
+
441
+ if (xb[perm[i1 - 1]] <= q) {
442
+ i0 = i1 - 1;
443
+ goto finish_left;
444
+ }
445
+
446
+ while (i0 + 1 < i1) {
447
+ idx_t imed = (i0 + i1) / 2;
448
+ if (xb[perm[imed]] <= q) i0 = imed;
449
+ else i1 = imed;
450
+ }
451
+
452
+ // query is between xb[perm[i0]] and xb[perm[i1]]
453
+ // expand to nearest neighs
454
+
455
+ while (wp < k) {
456
+ float xleft = xb[perm[i0]];
457
+ float xright = xb[perm[i1]];
458
+
459
+ if (q - xleft < xright - q) {
460
+ D[wp] = q - xleft;
461
+ I[wp] = perm[i0];
462
+ i0--; wp++;
463
+ if (i0 < 0) { goto finish_right; }
464
+ } else {
465
+ D[wp] = xright - q;
466
+ I[wp] = perm[i1];
467
+ i1++; wp++;
468
+ if (i1 >= ntotal) { goto finish_left; }
469
+ }
470
+ }
471
+ goto done;
472
+
473
+ finish_right:
474
+ // grow to the right from i1
475
+ while (wp < k) {
476
+ if (i1 < ntotal) {
477
+ D[wp] = xb[perm[i1]] - q;
478
+ I[wp] = perm[i1];
479
+ i1++;
480
+ } else {
481
+ D[wp] = std::numeric_limits<float>::infinity();
482
+ I[wp] = -1;
483
+ }
484
+ wp++;
485
+ }
486
+ goto done;
487
+
488
+ finish_left:
489
+ // grow to the left from i0
490
+ while (wp < k) {
491
+ if (i0 >= 0) {
492
+ D[wp] = q - xb[perm[i0]];
493
+ I[wp] = perm[i0];
494
+ i0--;
495
+ } else {
496
+ D[wp] = std::numeric_limits<float>::infinity();
497
+ I[wp] = -1;
498
+ }
499
+ wp++;
500
+ }
501
+ done: ;
502
+ }
503
+
504
+ }
505
+
506
+
507
+
508
+ } // namespace faiss