faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <vector>
13
+ #include <string>
14
+ #include <unordered_map>
15
+ #include <stdint.h>
16
+
17
+
18
+ namespace faiss {
19
+
20
+
21
+ /** Reports some statistics on a dataset and comments on them.
22
+ *
23
+ * It is a class rather than a function so that all stats can also be
24
+ * accessed from code */
25
+
26
+ struct MatrixStats {
27
+ MatrixStats (size_t n, size_t d, const float *x);
28
+ std::string comments;
29
+
30
+ // raw statistics
31
+ size_t n, d;
32
+ size_t n_collision, n_valid, n0;
33
+ double min_norm2, max_norm2;
34
+
35
+ struct PerDimStats {
36
+ size_t n, n_nan, n_inf, n0;
37
+
38
+ float min, max;
39
+ double sum, sum2;
40
+
41
+ size_t n_valid;
42
+ double mean, stddev;
43
+
44
+ PerDimStats();
45
+ void add (float x);
46
+ void compute_mean_std ();
47
+ };
48
+
49
+ std::vector<PerDimStats> per_dim_stats;
50
+ struct Occurrence {
51
+ size_t first;
52
+ size_t count;
53
+ };
54
+ std::unordered_map<uint64_t, Occurrence> occurrences;
55
+
56
+ char *buf;
57
+ size_t nbuf;
58
+ void do_comment (const char *fmt, ...);
59
+
60
+ };
61
+
62
+ } // namespace faiss
@@ -0,0 +1,351 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/MetaIndexes.h>
11
+
12
+ #include <cstdio>
13
+ #include <stdint.h>
14
+
15
+ #include <faiss/impl/FaissAssert.h>
16
+ #include <faiss/utils/Heap.h>
17
+ #include <faiss/impl/AuxIndexStructures.h>
18
+ #include <faiss/utils/WorkerThread.h>
19
+
20
+
21
+ namespace faiss {
22
+
23
+ namespace {
24
+
25
+ typedef Index::idx_t idx_t;
26
+
27
+ } // namespace
28
+
29
+ /*****************************************************
30
+ * IndexIDMap implementation
31
+ *******************************************************/
32
+
33
+ template <typename IndexT>
34
+ IndexIDMapTemplate<IndexT>::IndexIDMapTemplate (IndexT *index):
35
+ index (index),
36
+ own_fields (false)
37
+ {
38
+ FAISS_THROW_IF_NOT_MSG (index->ntotal == 0, "index must be empty on input");
39
+ this->is_trained = index->is_trained;
40
+ this->metric_type = index->metric_type;
41
+ this->verbose = index->verbose;
42
+ this->d = index->d;
43
+ }
44
+
45
+ template <typename IndexT>
46
+ void IndexIDMapTemplate<IndexT>::add
47
+ (idx_t, const typename IndexT::component_t *)
48
+ {
49
+ FAISS_THROW_MSG ("add does not make sense with IndexIDMap, "
50
+ "use add_with_ids");
51
+ }
52
+
53
+
54
+ template <typename IndexT>
55
+ void IndexIDMapTemplate<IndexT>::train
56
+ (idx_t n, const typename IndexT::component_t *x)
57
+ {
58
+ index->train (n, x);
59
+ this->is_trained = index->is_trained;
60
+ }
61
+
62
+ template <typename IndexT>
63
+ void IndexIDMapTemplate<IndexT>::reset ()
64
+ {
65
+ index->reset ();
66
+ id_map.clear();
67
+ this->ntotal = 0;
68
+ }
69
+
70
+
71
+ template <typename IndexT>
72
+ void IndexIDMapTemplate<IndexT>::add_with_ids
73
+ (idx_t n, const typename IndexT::component_t * x,
74
+ const typename IndexT::idx_t *xids)
75
+ {
76
+ index->add (n, x);
77
+ for (idx_t i = 0; i < n; i++)
78
+ id_map.push_back (xids[i]);
79
+ this->ntotal = index->ntotal;
80
+ }
81
+
82
+
83
+ template <typename IndexT>
84
+ void IndexIDMapTemplate<IndexT>::search
85
+ (idx_t n, const typename IndexT::component_t *x, idx_t k,
86
+ typename IndexT::distance_t *distances, typename IndexT::idx_t *labels) const
87
+ {
88
+ index->search (n, x, k, distances, labels);
89
+ idx_t *li = labels;
90
+ #pragma omp parallel for
91
+ for (idx_t i = 0; i < n * k; i++) {
92
+ li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
93
+ }
94
+ }
95
+
96
+
97
+ template <typename IndexT>
98
+ void IndexIDMapTemplate<IndexT>::range_search
99
+ (typename IndexT::idx_t n, const typename IndexT::component_t *x,
100
+ typename IndexT::distance_t radius, RangeSearchResult *result) const
101
+ {
102
+ index->range_search(n, x, radius, result);
103
+ #pragma omp parallel for
104
+ for (idx_t i = 0; i < result->lims[result->nq]; i++) {
105
+ result->labels[i] = result->labels[i] < 0 ?
106
+ result->labels[i] : id_map[result->labels[i]];
107
+ }
108
+ }
109
+
110
+ namespace {
111
+
112
+ struct IDTranslatedSelector: IDSelector {
113
+ const std::vector <int64_t> & id_map;
114
+ const IDSelector & sel;
115
+ IDTranslatedSelector (const std::vector <int64_t> & id_map,
116
+ const IDSelector & sel):
117
+ id_map (id_map), sel (sel)
118
+ {}
119
+ bool is_member(idx_t id) const override {
120
+ return sel.is_member(id_map[id]);
121
+ }
122
+ };
123
+
124
+ }
125
+
126
+ template <typename IndexT>
127
+ size_t IndexIDMapTemplate<IndexT>::remove_ids (const IDSelector & sel)
128
+ {
129
+ // remove in sub-index first
130
+ IDTranslatedSelector sel2 (id_map, sel);
131
+ size_t nremove = index->remove_ids (sel2);
132
+
133
+ int64_t j = 0;
134
+ for (idx_t i = 0; i < this->ntotal; i++) {
135
+ if (sel.is_member (id_map[i])) {
136
+ // remove
137
+ } else {
138
+ id_map[j] = id_map[i];
139
+ j++;
140
+ }
141
+ }
142
+ FAISS_ASSERT (j == index->ntotal);
143
+ this->ntotal = j;
144
+ id_map.resize(this->ntotal);
145
+ return nremove;
146
+ }
147
+
148
+ template <typename IndexT>
149
+ IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate ()
150
+ {
151
+ if (own_fields) delete index;
152
+ }
153
+
154
+
155
+
156
+ /*****************************************************
157
+ * IndexIDMap2 implementation
158
+ *******************************************************/
159
+
160
+ template <typename IndexT>
161
+ IndexIDMap2Template<IndexT>::IndexIDMap2Template (IndexT *index):
162
+ IndexIDMapTemplate<IndexT> (index)
163
+ {}
164
+
165
+ template <typename IndexT>
166
+ void IndexIDMap2Template<IndexT>::add_with_ids
167
+ (idx_t n, const typename IndexT::component_t* x,
168
+ const typename IndexT::idx_t* xids)
169
+ {
170
+ size_t prev_ntotal = this->ntotal;
171
+ IndexIDMapTemplate<IndexT>::add_with_ids (n, x, xids);
172
+ for (size_t i = prev_ntotal; i < this->ntotal; i++) {
173
+ rev_map [this->id_map [i]] = i;
174
+ }
175
+ }
176
+
177
+ template <typename IndexT>
178
+ void IndexIDMap2Template<IndexT>::construct_rev_map ()
179
+ {
180
+ rev_map.clear ();
181
+ for (size_t i = 0; i < this->ntotal; i++) {
182
+ rev_map [this->id_map [i]] = i;
183
+ }
184
+ }
185
+
186
+
187
+ template <typename IndexT>
188
+ size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel)
189
+ {
190
+ // This is quite inefficient
191
+ size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids (sel);
192
+ construct_rev_map ();
193
+ return nremove;
194
+ }
195
+
196
+ template <typename IndexT>
197
+ void IndexIDMap2Template<IndexT>::reconstruct
198
+ (idx_t key, typename IndexT::component_t * recons) const
199
+ {
200
+ try {
201
+ this->index->reconstruct (rev_map.at (key), recons);
202
+ } catch (const std::out_of_range& e) {
203
+ FAISS_THROW_FMT ("key %ld not found", key);
204
+ }
205
+ }
206
+
207
+
208
+ // explicit template instantiations
209
+
210
+ template struct IndexIDMapTemplate<Index>;
211
+ template struct IndexIDMapTemplate<IndexBinary>;
212
+ template struct IndexIDMap2Template<Index>;
213
+ template struct IndexIDMap2Template<IndexBinary>;
214
+
215
+
216
+ /*****************************************************
217
+ * IndexSplitVectors implementation
218
+ *******************************************************/
219
+
220
+
221
+ IndexSplitVectors::IndexSplitVectors (idx_t d, bool threaded):
222
+ Index (d), own_fields (false),
223
+ threaded (threaded), sum_d (0)
224
+ {
225
+
226
+ }
227
+
228
+ void IndexSplitVectors::add_sub_index (Index *index)
229
+ {
230
+ sub_indexes.push_back (index);
231
+ sync_with_sub_indexes ();
232
+ }
233
+
234
+ void IndexSplitVectors::sync_with_sub_indexes ()
235
+ {
236
+ if (sub_indexes.empty()) return;
237
+ Index * index0 = sub_indexes[0];
238
+ sum_d = index0->d;
239
+ metric_type = index0->metric_type;
240
+ is_trained = index0->is_trained;
241
+ ntotal = index0->ntotal;
242
+ for (int i = 1; i < sub_indexes.size(); i++) {
243
+ Index * index = sub_indexes[i];
244
+ FAISS_THROW_IF_NOT (metric_type == index->metric_type);
245
+ FAISS_THROW_IF_NOT (ntotal == index->ntotal);
246
+ sum_d += index->d;
247
+ }
248
+
249
+ }
250
+
251
+ void IndexSplitVectors::add(idx_t /*n*/, const float* /*x*/) {
252
+ FAISS_THROW_MSG("not implemented");
253
+ }
254
+
255
+
256
+
257
+ void IndexSplitVectors::search (
258
+ idx_t n, const float *x, idx_t k,
259
+ float *distances, idx_t *labels) const
260
+ {
261
+ FAISS_THROW_IF_NOT_MSG (k == 1,
262
+ "search implemented only for k=1");
263
+ FAISS_THROW_IF_NOT_MSG (sum_d == d,
264
+ "not enough indexes compared to # dimensions");
265
+
266
+ int64_t nshard = sub_indexes.size();
267
+ float *all_distances = new float [nshard * k * n];
268
+ idx_t *all_labels = new idx_t [nshard * k * n];
269
+ ScopeDeleter<float> del (all_distances);
270
+ ScopeDeleter<idx_t> del2 (all_labels);
271
+
272
+ auto query_func = [n, x, k, distances, labels, all_distances, all_labels, this]
273
+ (int no) {
274
+ const IndexSplitVectors *index = this;
275
+ float *distances1 = no == 0 ? distances : all_distances + no * k * n;
276
+ idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
277
+ if (index->verbose)
278
+ printf ("begin query shard %d on %ld points\n", no, n);
279
+ const Index * sub_index = index->sub_indexes[no];
280
+ int64_t sub_d = sub_index->d, d = index->d;
281
+ idx_t ofs = 0;
282
+ for (int i = 0; i < no; i++) ofs += index->sub_indexes[i]->d;
283
+ float *sub_x = new float [sub_d * n];
284
+ ScopeDeleter<float> del1 (sub_x);
285
+ for (idx_t i = 0; i < n; i++)
286
+ memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d * sizeof (sub_x));
287
+ sub_index->search (n, sub_x, k, distances1, labels1);
288
+ if (index->verbose)
289
+ printf ("end query shard %d\n", no);
290
+ };
291
+
292
+ if (!threaded) {
293
+ for (int i = 0; i < nshard; i++) {
294
+ query_func(i);
295
+ }
296
+ } else {
297
+ std::vector<std::unique_ptr<WorkerThread> > threads;
298
+ std::vector<std::future<bool>> v;
299
+
300
+ for (int i = 0; i < nshard; i++) {
301
+ threads.emplace_back(new WorkerThread());
302
+ WorkerThread *wt = threads.back().get();
303
+ v.emplace_back(wt->add([i, query_func](){query_func(i); }));
304
+ }
305
+
306
+ // Blocking wait for completion
307
+ for (auto& func : v) {
308
+ func.get();
309
+ }
310
+ }
311
+
312
+ int64_t factor = 1;
313
+ for (int i = 0; i < nshard; i++) {
314
+ if (i > 0) { // results of 0 are already in the table
315
+ const float *distances_i = all_distances + i * k * n;
316
+ const idx_t *labels_i = all_labels + i * k * n;
317
+ for (int64_t j = 0; j < n; j++) {
318
+ if (labels[j] >= 0 && labels_i[j] >= 0) {
319
+ labels[j] += labels_i[j] * factor;
320
+ distances[j] += distances_i[j];
321
+ } else {
322
+ labels[j] = -1;
323
+ distances[j] = 0.0 / 0.0;
324
+ }
325
+ }
326
+ }
327
+ factor *= sub_indexes[i]->ntotal;
328
+ }
329
+
330
+ }
331
+
332
+ void IndexSplitVectors::train(idx_t /*n*/, const float* /*x*/) {
333
+ FAISS_THROW_MSG("not implemented");
334
+ }
335
+
336
+ void IndexSplitVectors::reset ()
337
+ {
338
+ FAISS_THROW_MSG ("not implemented");
339
+ }
340
+
341
+
342
+ IndexSplitVectors::~IndexSplitVectors ()
343
+ {
344
+ if (own_fields) {
345
+ for (int s = 0; s < sub_indexes.size(); s++)
346
+ delete sub_indexes [s];
347
+ }
348
+ }
349
+
350
+
351
+ } // namespace faiss
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef META_INDEXES_H
11
+ #define META_INDEXES_H
12
+
13
+ #include <vector>
14
+ #include <unordered_map>
15
+ #include <faiss/Index.h>
16
+ #include <faiss/IndexShards.h>
17
+ #include <faiss/IndexReplicas.h>
18
+
19
+ namespace faiss {
20
+
21
+ /** Index that translates search results to ids */
22
+ template <typename IndexT>
23
+ struct IndexIDMapTemplate : IndexT {
24
+ using idx_t = typename IndexT::idx_t;
25
+ using component_t = typename IndexT::component_t;
26
+ using distance_t = typename IndexT::distance_t;
27
+
28
+ IndexT * index; ///! the sub-index
29
+ bool own_fields; ///! whether pointers are deleted in destructo
30
+ std::vector<idx_t> id_map;
31
+
32
+ explicit IndexIDMapTemplate (IndexT *index);
33
+
34
+ /// @param xids if non-null, ids to store for the vectors (size n)
35
+ void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
36
+
37
+ /// this will fail. Use add_with_ids
38
+ void add(idx_t n, const component_t* x) override;
39
+
40
+ void search(
41
+ idx_t n, const component_t* x, idx_t k,
42
+ distance_t* distances,
43
+ idx_t* labels) const override;
44
+
45
+ void train(idx_t n, const component_t* x) override;
46
+
47
+ void reset() override;
48
+
49
+ /// remove ids adapted to IndexFlat
50
+ size_t remove_ids(const IDSelector& sel) override;
51
+
52
+ void range_search (idx_t n, const component_t *x, distance_t radius,
53
+ RangeSearchResult *result) const override;
54
+
55
+ ~IndexIDMapTemplate () override;
56
+ IndexIDMapTemplate () {own_fields=false; index=nullptr; }
57
+ };
58
+
59
+ using IndexIDMap = IndexIDMapTemplate<Index>;
60
+ using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
61
+
62
+
63
+ /** same as IndexIDMap but also provides an efficient reconstruction
64
+ * implementation via a 2-way index */
65
+ template <typename IndexT>
66
+ struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
67
+ using idx_t = typename IndexT::idx_t;
68
+ using component_t = typename IndexT::component_t;
69
+ using distance_t = typename IndexT::distance_t;
70
+
71
+ std::unordered_map<idx_t, idx_t> rev_map;
72
+
73
+ explicit IndexIDMap2Template (IndexT *index);
74
+
75
+ /// make the rev_map from scratch
76
+ void construct_rev_map ();
77
+
78
+ void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
79
+
80
+ size_t remove_ids(const IDSelector& sel) override;
81
+
82
+ void reconstruct (idx_t key, component_t * recons) const override;
83
+
84
+ ~IndexIDMap2Template() override {}
85
+ IndexIDMap2Template () {}
86
+ };
87
+
88
+ using IndexIDMap2 = IndexIDMap2Template<Index>;
89
+ using IndexBinaryIDMap2 = IndexIDMap2Template<IndexBinary>;
90
+
91
+
92
+ /** splits input vectors in segments and assigns each segment to a sub-index
93
+ * used to distribute a MultiIndexQuantizer
94
+ */
95
+ struct IndexSplitVectors: Index {
96
+ bool own_fields;
97
+ bool threaded;
98
+ std::vector<Index*> sub_indexes;
99
+ idx_t sum_d; /// sum of dimensions seen so far
100
+
101
+ explicit IndexSplitVectors (idx_t d, bool threaded = false);
102
+
103
+ void add_sub_index (Index *);
104
+ void sync_with_sub_indexes ();
105
+
106
+ void add(idx_t n, const float* x) override;
107
+
108
+ void search(
109
+ idx_t n,
110
+ const float* x,
111
+ idx_t k,
112
+ float* distances,
113
+ idx_t* labels) const override;
114
+
115
+ void train(idx_t n, const float* x) override;
116
+
117
+ void reset() override;
118
+
119
+ ~IndexSplitVectors() override;
120
+ };
121
+
122
+
123
+ } // namespace faiss
124
+
125
+
126
+ #endif