faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_CLUSTERING_H
11
+ #define FAISS_CLUSTERING_H
12
+ #include <faiss/Index.h>
13
+
14
+ #include <vector>
15
+
16
+ namespace faiss {
17
+
18
+
19
+ /** Class for the clustering parameters. Can be passed to the
20
+ * constructor of the Clustering object.
21
+ */
22
+ struct ClusteringParameters {
23
+ int niter; ///< clustering iterations
24
+ int nredo; ///< redo clustering this many times and keep best
25
+
26
+ bool verbose;
27
+ bool spherical; ///< do we want normalized centroids?
28
+ bool int_centroids; ///< round centroids coordinates to integer
29
+ bool update_index; ///< update index after each iteration?
30
+ bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
31
+
32
+ int min_points_per_centroid; ///< otherwise you get a warning
33
+ int max_points_per_centroid; ///< to limit size of dataset
34
+
35
+ int seed; ///< seed for the random number generator
36
+
37
+ /// sets reasonable defaults
38
+ ClusteringParameters ();
39
+ };
40
+
41
+
42
+ /** clustering based on assignment - centroid update iterations
43
+ *
44
+ * The clustering is based on an Index object that assigns training
45
+ * points to the centroids. Therefore, at each iteration the centroids
46
+ * are added to the index.
47
+ *
48
+ * On output, the centoids table is set to the latest version
49
+ * of the centroids and they are also added to the index. If the
50
+ * centroids table it is not empty on input, it is also used for
51
+ * initialization.
52
+ *
53
+ * To do several clusterings, just call train() several times on
54
+ * different training sets, clearing the centroid table in between.
55
+ */
56
+ struct Clustering: ClusteringParameters {
57
+ typedef Index::idx_t idx_t;
58
+ size_t d; ///< dimension of the vectors
59
+ size_t k; ///< nb of centroids
60
+
61
+ /// centroids (k * d)
62
+ std::vector<float> centroids;
63
+
64
+ /// objective values (sum of distances reported by index) over
65
+ /// iterations
66
+ std::vector<float> obj;
67
+
68
+ /// the only mandatory parameters are k and d
69
+ Clustering (int d, int k);
70
+ Clustering (int d, int k, const ClusteringParameters &cp);
71
+
72
+ /// Index is used during the assignment stage
73
+ virtual void train (idx_t n, const float * x, faiss::Index & index);
74
+
75
+ /// Post-process the centroids after each centroid update.
76
+ /// includes optional L2 normalization and nearest integer rounding
77
+ void post_process_centroids ();
78
+
79
+ virtual ~Clustering() {}
80
+ };
81
+
82
+
83
+ /** simplified interface
84
+ *
85
+ * @param d dimension of the data
86
+ * @param n nb of training vectors
87
+ * @param k nb of output centroids
88
+ * @param x training set (size n * d)
89
+ * @param centroids output centroids (size k * d)
90
+ * @return final quantization error
91
+ */
92
+ float kmeans_clustering (size_t d, size_t n, size_t k,
93
+ const float *x,
94
+ float *centroids);
95
+
96
+
97
+
98
+ }
99
+
100
+
101
+ #endif
@@ -0,0 +1,339 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IVFlib.h>
11
+
12
+ #include <memory>
13
+
14
+ #include <faiss/IndexPreTransform.h>
15
+ #include <faiss/impl/FaissAssert.h>
16
+
17
+
18
+
19
+ namespace faiss { namespace ivflib {
20
+
21
+
22
+ void check_compatible_for_merge (const Index * index0,
23
+ const Index * index1)
24
+ {
25
+
26
+ const faiss::IndexPreTransform *pt0 =
27
+ dynamic_cast<const faiss::IndexPreTransform *>(index0);
28
+
29
+ if (pt0) {
30
+ const faiss::IndexPreTransform *pt1 =
31
+ dynamic_cast<const faiss::IndexPreTransform *>(index1);
32
+ FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
33
+
34
+ FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
35
+ for (int i = 0; i < pt0->chain.size(); i++) {
36
+ FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
37
+ }
38
+
39
+ index0 = pt0->index;
40
+ index1 = pt1->index;
41
+ }
42
+ FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
43
+ FAISS_THROW_IF_NOT (index0->d == index1->d &&
44
+ index0->metric_type == index1->metric_type);
45
+
46
+ const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
47
+ if (ivf0) {
48
+ const faiss::IndexIVF *ivf1 =
49
+ dynamic_cast<const faiss::IndexIVF *>(index1);
50
+ FAISS_THROW_IF_NOT (ivf1);
51
+
52
+ ivf0->check_compatible_for_merge (*ivf1);
53
+ }
54
+
55
+ // TODO: check as thoroughfully for other index types
56
+
57
+ }
58
+
59
+ const IndexIVF * extract_index_ivf (const Index * index)
60
+ {
61
+ if (auto *pt =
62
+ dynamic_cast<const IndexPreTransform *>(index)) {
63
+ index = pt->index;
64
+ }
65
+
66
+ auto *ivf = dynamic_cast<const IndexIVF *>(index);
67
+
68
+ FAISS_THROW_IF_NOT (ivf);
69
+
70
+ return ivf;
71
+ }
72
+
73
+ IndexIVF * extract_index_ivf (Index * index) {
74
+ return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
75
+ }
76
+
77
+ void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
78
+
79
+ check_compatible_for_merge (index0, index1);
80
+ IndexIVF * ivf0 = extract_index_ivf (index0);
81
+ IndexIVF * ivf1 = extract_index_ivf (index1);
82
+
83
+ ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
84
+
85
+ // useful for IndexPreTransform
86
+ index0->ntotal = ivf0->ntotal;
87
+ index1->ntotal = ivf1->ntotal;
88
+ }
89
+
90
+
91
+
92
+ void search_centroid(faiss::Index *index,
93
+ const float* x, int n,
94
+ idx_t* centroid_ids)
95
+ {
96
+ std::unique_ptr<float[]> del;
97
+ if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
98
+ x = index_pre->apply_chain(n, x);
99
+ del.reset((float*)x);
100
+ index = index_pre->index;
101
+ }
102
+ faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
103
+ assert(index_ivf);
104
+ index_ivf->quantizer->assign(n, x, centroid_ids);
105
+ }
106
+
107
+
108
+
109
+ void search_and_return_centroids(faiss::Index *index,
110
+ size_t n,
111
+ const float* xin,
112
+ long k,
113
+ float *distances,
114
+ idx_t* labels,
115
+ idx_t* query_centroid_ids,
116
+ idx_t* result_centroid_ids)
117
+ {
118
+ const float *x = xin;
119
+ std::unique_ptr<float []> del;
120
+ if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
121
+ x = index_pre->apply_chain(n, x);
122
+ del.reset((float*)x);
123
+ index = index_pre->index;
124
+ }
125
+ faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
126
+ assert(index_ivf);
127
+
128
+ size_t nprobe = index_ivf->nprobe;
129
+ std::vector<idx_t> cent_nos (n * nprobe);
130
+ std::vector<float> cent_dis (n * nprobe);
131
+ index_ivf->quantizer->search(
132
+ n, x, nprobe, cent_dis.data(), cent_nos.data());
133
+
134
+ if (query_centroid_ids) {
135
+ for (size_t i = 0; i < n; i++)
136
+ query_centroid_ids[i] = cent_nos[i * nprobe];
137
+ }
138
+
139
+ index_ivf->search_preassigned (n, x, k,
140
+ cent_nos.data(), cent_dis.data(),
141
+ distances, labels, true);
142
+
143
+ for (size_t i = 0; i < n * k; i++) {
144
+ idx_t label = labels[i];
145
+ if (label < 0) {
146
+ if (result_centroid_ids)
147
+ result_centroid_ids[i] = -1;
148
+ } else {
149
+ long list_no = label >> 32;
150
+ long list_index = label & 0xffffffff;
151
+ if (result_centroid_ids)
152
+ result_centroid_ids[i] = list_no;
153
+ labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
154
+ }
155
+ }
156
+ }
157
+
158
+
159
+ SlidingIndexWindow::SlidingIndexWindow (Index *index): index (index) {
160
+ n_slice = 0;
161
+ IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
162
+ ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
163
+ nlist = ils->nlist;
164
+ FAISS_THROW_IF_NOT_MSG (ils,
165
+ "only supports indexes with ArrayInvertedLists");
166
+ sizes.resize(nlist);
167
+ }
168
+
169
+ template<class T>
170
+ static void shift_and_add (std::vector<T> & dst,
171
+ size_t remove,
172
+ const std::vector<T> & src)
173
+ {
174
+ if (remove > 0)
175
+ memmove (dst.data(), dst.data() + remove,
176
+ (dst.size() - remove) * sizeof (T));
177
+ size_t insert_point = dst.size() - remove;
178
+ dst.resize (insert_point + src.size());
179
+ memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
180
+ }
181
+
182
+ template<class T>
183
+ static void remove_from_begin (std::vector<T> & v,
184
+ size_t remove)
185
+ {
186
+ if (remove > 0)
187
+ v.erase (v.begin(), v.begin() + remove);
188
+ }
189
+
190
+ void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
191
+
192
+ FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
193
+ "cannot remove slice: there is none");
194
+
195
+ const ArrayInvertedLists *ils2 = nullptr;
196
+ if(sub_index) {
197
+ check_compatible_for_merge (index, sub_index);
198
+ ils2 = dynamic_cast<const ArrayInvertedLists*>(
199
+ extract_index_ivf (sub_index)->invlists);
200
+ FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
201
+ }
202
+ IndexIVF *index_ivf = extract_index_ivf (index);
203
+
204
+ if (remove_oldest && ils2) {
205
+ for (int i = 0; i < nlist; i++) {
206
+ std::vector<size_t> & sizesi = sizes[i];
207
+ size_t amount_to_remove = sizesi[0];
208
+ index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
209
+
210
+ shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
211
+ shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
212
+ ils2->codes[i]);
213
+ for (int j = 0; j + 1 < n_slice; j++) {
214
+ sizesi[j] = sizesi[j + 1] - amount_to_remove;
215
+ }
216
+ sizesi[n_slice - 1] = ils->ids[i].size();
217
+ }
218
+ } else if (ils2) {
219
+ for (int i = 0; i < nlist; i++) {
220
+ index_ivf->ntotal += ils2->ids[i].size();
221
+ shift_and_add (ils->ids[i], 0, ils2->ids[i]);
222
+ shift_and_add (ils->codes[i], 0, ils2->codes[i]);
223
+ sizes[i].push_back(ils->ids[i].size());
224
+ }
225
+ n_slice++;
226
+ } else if (remove_oldest) {
227
+ for (int i = 0; i < nlist; i++) {
228
+ size_t amount_to_remove = sizes[i][0];
229
+ index_ivf->ntotal -= amount_to_remove;
230
+ remove_from_begin (ils->ids[i], amount_to_remove);
231
+ remove_from_begin (ils->codes[i],
232
+ amount_to_remove * ils->code_size);
233
+ for (int j = 0; j + 1 < n_slice; j++) {
234
+ sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
235
+ }
236
+ sizes[i].pop_back ();
237
+ }
238
+ n_slice--;
239
+ } else {
240
+ FAISS_THROW_MSG ("nothing to do???");
241
+ }
242
+ index->ntotal = index_ivf->ntotal;
243
+ }
244
+
245
+
246
+
247
+ // Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
248
+ // IndexIVF's embedded in a IndexPreTransform
249
+
250
+ ArrayInvertedLists *
251
+ get_invlist_range (const Index *index, long i0, long i1)
252
+ {
253
+ const IndexIVF *ivf = extract_index_ivf (index);
254
+
255
+ FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
256
+
257
+ const InvertedLists *src = ivf->invlists;
258
+
259
+ ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
260
+
261
+ for (long i = i0; i < i1; i++) {
262
+ il->add_entries(i - i0, src->list_size(i),
263
+ InvertedLists::ScopedIds (src, i).get(),
264
+ InvertedLists::ScopedCodes (src, i).get());
265
+ }
266
+ return il;
267
+ }
268
+
269
+
270
+
271
+ void set_invlist_range (Index *index, long i0, long i1,
272
+ ArrayInvertedLists * src)
273
+ {
274
+ IndexIVF *ivf = extract_index_ivf (index);
275
+
276
+ FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
277
+
278
+ ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
279
+ FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
280
+ FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
281
+ dst->code_size == src->code_size);
282
+
283
+ size_t ntotal = index->ntotal;
284
+ for (long i = i0 ; i < i1; i++) {
285
+ ntotal -= dst->list_size (i);
286
+ ntotal += src->list_size (i - i0);
287
+ std::swap (src->codes[i - i0], dst->codes[i]);
288
+ std::swap (src->ids[i - i0], dst->ids[i]);
289
+ }
290
+ ivf->ntotal = index->ntotal = ntotal;
291
+ }
292
+
293
+
294
+ void search_with_parameters (const Index *index,
295
+ idx_t n, const float *x, idx_t k,
296
+ float *distances, idx_t *labels,
297
+ IVFSearchParameters *params,
298
+ size_t *nb_dis_ptr)
299
+ {
300
+ FAISS_THROW_IF_NOT (params);
301
+ const float *prev_x = x;
302
+ ScopeDeleter<float> del;
303
+
304
+ if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
305
+ x = ip->apply_chain (n, x);
306
+ if (x != prev_x) {
307
+ del.set(x);
308
+ }
309
+ index = ip->index;
310
+ }
311
+
312
+ std::vector<idx_t> Iq(params->nprobe * n);
313
+ std::vector<float> Dq(params->nprobe * n);
314
+
315
+ const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
316
+ FAISS_THROW_IF_NOT (index_ivf);
317
+
318
+ index_ivf->quantizer->search(n, x, params->nprobe,
319
+ Dq.data(), Iq.data());
320
+
321
+ if (nb_dis_ptr) {
322
+ size_t nb_dis = 0;
323
+ const InvertedLists *il = index_ivf->invlists;
324
+ for (idx_t i = 0; i < n * params->nprobe; i++) {
325
+ if (Iq[i] >= 0) {
326
+ nb_dis += il->list_size(Iq[i]);
327
+ }
328
+ }
329
+ *nb_dis_ptr = nb_dis;
330
+ }
331
+
332
+ index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
333
+ distances, labels,
334
+ false, params);
335
+ }
336
+
337
+
338
+
339
+ } } // namespace faiss::ivflib
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_IVFLIB_H
11
+ #define FAISS_IVFLIB_H
12
+
13
+ /** Since IVF (inverted file) indexes are of so much use for
14
+ * large-scale use cases, we group a few functions related to them in
15
+ * this small library. Most functions work both on IndexIVFs and
16
+ * IndexIVFs embedded within an IndexPreTransform.
17
+ */
18
+
19
+ #include <vector>
20
+ #include <faiss/IndexIVF.h>
21
+
22
+ namespace faiss { namespace ivflib {
23
+
24
+
25
+ /** check if two indexes have the same parameters and are trained in
26
+ * the same way, otherwise throw. */
27
+ void check_compatible_for_merge (const Index * index1,
28
+ const Index * index2);
29
+
30
+ /** get an IndexIVF from an index. The index may be an IndexIVF or
31
+ * some wrapper class that encloses an IndexIVF
32
+ *
33
+ * throws an exception if this is not the case.
34
+ */
35
+ const IndexIVF * extract_index_ivf (const Index * index);
36
+ IndexIVF * extract_index_ivf (Index * index);
37
+
38
+ /** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
39
+ * embedded in a IndexPreTransform. On output, the index1 is empty.
40
+ *
41
+ * @param shift_ids: translate the ids from index1 to index0->prev_ntotal
42
+ */
43
+ void merge_into(Index *index0, Index *index1, bool shift_ids);
44
+
45
+ typedef Index::idx_t idx_t;
46
+
47
+ /* Returns the cluster the embeddings belong to.
48
+ *
49
+ * @param index Index, which should be an IVF index
50
+ * (otherwise there are no clusters)
51
+ * @param embeddings object descriptors for which the centroids should be found,
52
+ * size num_objects * d
53
+ * @param centroid_ids
54
+ * cluster id each object belongs to, size num_objects
55
+ */
56
+ void search_centroid(Index *index,
57
+ const float* x, int n,
58
+ idx_t* centroid_ids);
59
+
60
+ /* Returns the cluster the embeddings belong to.
61
+ *
62
+ * @param index Index, which should be an IVF index
63
+ * (otherwise there are no clusters)
64
+ * @param query_centroid_ids
65
+ * centroid ids corresponding to the query vectors (size n)
66
+ * @param result_centroid_ids
67
+ * centroid ids corresponding to the results (size n * k)
68
+ * other arguments are the same as the standard search function
69
+ */
70
+ void search_and_return_centroids(Index *index,
71
+ size_t n,
72
+ const float* xin,
73
+ long k,
74
+ float *distances,
75
+ idx_t* labels,
76
+ idx_t* query_centroid_ids,
77
+ idx_t* result_centroid_ids);
78
+
79
+
80
+ /** A set of IndexIVFs concatenated together in a FIFO fashion.
81
+ * at each "step", the oldest index slice is removed and a new index is added.
82
+ */
83
+ struct SlidingIndexWindow {
84
+ /// common index that contains the sliding window
85
+ Index * index;
86
+
87
+ /// InvertedLists of index
88
+ ArrayInvertedLists *ils;
89
+
90
+ /// number of slices currently in index
91
+ int n_slice;
92
+
93
+ /// same as index->nlist
94
+ size_t nlist;
95
+
96
+ /// cumulative list sizes at each slice
97
+ std::vector<std::vector<size_t> > sizes;
98
+
99
+ /// index should be initially empty and trained
100
+ SlidingIndexWindow (Index *index);
101
+
102
+ /** Add one index to the current index and remove the oldest one.
103
+ *
104
+ * @param sub_index slice to swap in (can be NULL)
105
+ * @param remove_oldest if true, remove the oldest slices */
106
+ void step(const Index *sub_index, bool remove_oldest);
107
+
108
+ };
109
+
110
+
111
+ /// Get a subset of inverted lists [i0, i1)
112
+ ArrayInvertedLists * get_invlist_range (const Index *index,
113
+ long i0, long i1);
114
+
115
+ /// Set a subset of inverted lists
116
+ void set_invlist_range (Index *index, long i0, long i1,
117
+ ArrayInvertedLists * src);
118
+
119
+ // search an IndexIVF, possibly embedded in an IndexPreTransform with
120
+ // given parameters. Optionally returns the number of distances
121
+ // computed
122
+ void search_with_parameters (const Index *index,
123
+ idx_t n, const float *x, idx_t k,
124
+ float *distances, idx_t *labels,
125
+ IVFSearchParameters *params,
126
+ size_t *nb_dis = nullptr);
127
+
128
+
129
+
130
+ } } // namespace faiss::ivflib
131
+
132
+ #endif