faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_CLUSTERING_H
11
+ #define FAISS_CLUSTERING_H
12
+ #include <faiss/Index.h>
13
+
14
+ #include <vector>
15
+
16
+ namespace faiss {
17
+
18
+
19
+ /** Class for the clustering parameters. Can be passed to the
20
+ * constructor of the Clustering object.
21
+ */
22
+ struct ClusteringParameters {
23
+ int niter; ///< clustering iterations
24
+ int nredo; ///< redo clustering this many times and keep best
25
+
26
+ bool verbose;
27
+ bool spherical; ///< do we want normalized centroids?
28
+ bool int_centroids; ///< round centroids coordinates to integer
29
+ bool update_index; ///< update index after each iteration?
30
+ bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
31
+
32
+ int min_points_per_centroid; ///< otherwise you get a warning
33
+ int max_points_per_centroid; ///< to limit size of dataset
34
+
35
+ int seed; ///< seed for the random number generator
36
+
37
+ /// sets reasonable defaults
38
+ ClusteringParameters ();
39
+ };
40
+
41
+
42
+ /** clustering based on assignment - centroid update iterations
43
+ *
44
+ * The clustering is based on an Index object that assigns training
45
+ * points to the centroids. Therefore, at each iteration the centroids
46
+ * are added to the index.
47
+ *
48
+ * On output, the centoids table is set to the latest version
49
+ * of the centroids and they are also added to the index. If the
50
+ * centroids table it is not empty on input, it is also used for
51
+ * initialization.
52
+ *
53
+ * To do several clusterings, just call train() several times on
54
+ * different training sets, clearing the centroid table in between.
55
+ */
56
+ struct Clustering: ClusteringParameters {
57
+ typedef Index::idx_t idx_t;
58
+ size_t d; ///< dimension of the vectors
59
+ size_t k; ///< nb of centroids
60
+
61
+ /// centroids (k * d)
62
+ std::vector<float> centroids;
63
+
64
+ /// objective values (sum of distances reported by index) over
65
+ /// iterations
66
+ std::vector<float> obj;
67
+
68
+ /// the only mandatory parameters are k and d
69
+ Clustering (int d, int k);
70
+ Clustering (int d, int k, const ClusteringParameters &cp);
71
+
72
+ /// Index is used during the assignment stage
73
+ virtual void train (idx_t n, const float * x, faiss::Index & index);
74
+
75
+ /// Post-process the centroids after each centroid update.
76
+ /// includes optional L2 normalization and nearest integer rounding
77
+ void post_process_centroids ();
78
+
79
+ virtual ~Clustering() {}
80
+ };
81
+
82
+
83
+ /** simplified interface
84
+ *
85
+ * @param d dimension of the data
86
+ * @param n nb of training vectors
87
+ * @param k nb of output centroids
88
+ * @param x training set (size n * d)
89
+ * @param centroids output centroids (size k * d)
90
+ * @return final quantization error
91
+ */
92
+ float kmeans_clustering (size_t d, size_t n, size_t k,
93
+ const float *x,
94
+ float *centroids);
95
+
96
+
97
+
98
+ }
99
+
100
+
101
+ #endif
@@ -0,0 +1,339 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IVFlib.h>
11
+
12
+ #include <memory>
13
+
14
+ #include <faiss/IndexPreTransform.h>
15
+ #include <faiss/impl/FaissAssert.h>
16
+
17
+
18
+
19
+ namespace faiss { namespace ivflib {
20
+
21
+
22
+ void check_compatible_for_merge (const Index * index0,
23
+ const Index * index1)
24
+ {
25
+
26
+ const faiss::IndexPreTransform *pt0 =
27
+ dynamic_cast<const faiss::IndexPreTransform *>(index0);
28
+
29
+ if (pt0) {
30
+ const faiss::IndexPreTransform *pt1 =
31
+ dynamic_cast<const faiss::IndexPreTransform *>(index1);
32
+ FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
33
+
34
+ FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
35
+ for (int i = 0; i < pt0->chain.size(); i++) {
36
+ FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
37
+ }
38
+
39
+ index0 = pt0->index;
40
+ index1 = pt1->index;
41
+ }
42
+ FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
43
+ FAISS_THROW_IF_NOT (index0->d == index1->d &&
44
+ index0->metric_type == index1->metric_type);
45
+
46
+ const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
47
+ if (ivf0) {
48
+ const faiss::IndexIVF *ivf1 =
49
+ dynamic_cast<const faiss::IndexIVF *>(index1);
50
+ FAISS_THROW_IF_NOT (ivf1);
51
+
52
+ ivf0->check_compatible_for_merge (*ivf1);
53
+ }
54
+
55
+ // TODO: check as thoroughfully for other index types
56
+
57
+ }
58
+
59
+ const IndexIVF * extract_index_ivf (const Index * index)
60
+ {
61
+ if (auto *pt =
62
+ dynamic_cast<const IndexPreTransform *>(index)) {
63
+ index = pt->index;
64
+ }
65
+
66
+ auto *ivf = dynamic_cast<const IndexIVF *>(index);
67
+
68
+ FAISS_THROW_IF_NOT (ivf);
69
+
70
+ return ivf;
71
+ }
72
+
73
+ IndexIVF * extract_index_ivf (Index * index) {
74
+ return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
75
+ }
76
+
77
+ void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
78
+
79
+ check_compatible_for_merge (index0, index1);
80
+ IndexIVF * ivf0 = extract_index_ivf (index0);
81
+ IndexIVF * ivf1 = extract_index_ivf (index1);
82
+
83
+ ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
84
+
85
+ // useful for IndexPreTransform
86
+ index0->ntotal = ivf0->ntotal;
87
+ index1->ntotal = ivf1->ntotal;
88
+ }
89
+
90
+
91
+
92
+ void search_centroid(faiss::Index *index,
93
+ const float* x, int n,
94
+ idx_t* centroid_ids)
95
+ {
96
+ std::unique_ptr<float[]> del;
97
+ if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
98
+ x = index_pre->apply_chain(n, x);
99
+ del.reset((float*)x);
100
+ index = index_pre->index;
101
+ }
102
+ faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
103
+ assert(index_ivf);
104
+ index_ivf->quantizer->assign(n, x, centroid_ids);
105
+ }
106
+
107
+
108
+
109
+ void search_and_return_centroids(faiss::Index *index,
110
+ size_t n,
111
+ const float* xin,
112
+ long k,
113
+ float *distances,
114
+ idx_t* labels,
115
+ idx_t* query_centroid_ids,
116
+ idx_t* result_centroid_ids)
117
+ {
118
+ const float *x = xin;
119
+ std::unique_ptr<float []> del;
120
+ if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
121
+ x = index_pre->apply_chain(n, x);
122
+ del.reset((float*)x);
123
+ index = index_pre->index;
124
+ }
125
+ faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
126
+ assert(index_ivf);
127
+
128
+ size_t nprobe = index_ivf->nprobe;
129
+ std::vector<idx_t> cent_nos (n * nprobe);
130
+ std::vector<float> cent_dis (n * nprobe);
131
+ index_ivf->quantizer->search(
132
+ n, x, nprobe, cent_dis.data(), cent_nos.data());
133
+
134
+ if (query_centroid_ids) {
135
+ for (size_t i = 0; i < n; i++)
136
+ query_centroid_ids[i] = cent_nos[i * nprobe];
137
+ }
138
+
139
+ index_ivf->search_preassigned (n, x, k,
140
+ cent_nos.data(), cent_dis.data(),
141
+ distances, labels, true);
142
+
143
+ for (size_t i = 0; i < n * k; i++) {
144
+ idx_t label = labels[i];
145
+ if (label < 0) {
146
+ if (result_centroid_ids)
147
+ result_centroid_ids[i] = -1;
148
+ } else {
149
+ long list_no = label >> 32;
150
+ long list_index = label & 0xffffffff;
151
+ if (result_centroid_ids)
152
+ result_centroid_ids[i] = list_no;
153
+ labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
154
+ }
155
+ }
156
+ }
157
+
158
+
159
+ SlidingIndexWindow::SlidingIndexWindow (Index *index): index (index) {
160
+ n_slice = 0;
161
+ IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
162
+ ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
163
+ nlist = ils->nlist;
164
+ FAISS_THROW_IF_NOT_MSG (ils,
165
+ "only supports indexes with ArrayInvertedLists");
166
+ sizes.resize(nlist);
167
+ }
168
+
169
+ template<class T>
170
+ static void shift_and_add (std::vector<T> & dst,
171
+ size_t remove,
172
+ const std::vector<T> & src)
173
+ {
174
+ if (remove > 0)
175
+ memmove (dst.data(), dst.data() + remove,
176
+ (dst.size() - remove) * sizeof (T));
177
+ size_t insert_point = dst.size() - remove;
178
+ dst.resize (insert_point + src.size());
179
+ memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
180
+ }
181
+
182
+ template<class T>
183
+ static void remove_from_begin (std::vector<T> & v,
184
+ size_t remove)
185
+ {
186
+ if (remove > 0)
187
+ v.erase (v.begin(), v.begin() + remove);
188
+ }
189
+
190
+ void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
191
+
192
+ FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
193
+ "cannot remove slice: there is none");
194
+
195
+ const ArrayInvertedLists *ils2 = nullptr;
196
+ if(sub_index) {
197
+ check_compatible_for_merge (index, sub_index);
198
+ ils2 = dynamic_cast<const ArrayInvertedLists*>(
199
+ extract_index_ivf (sub_index)->invlists);
200
+ FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
201
+ }
202
+ IndexIVF *index_ivf = extract_index_ivf (index);
203
+
204
+ if (remove_oldest && ils2) {
205
+ for (int i = 0; i < nlist; i++) {
206
+ std::vector<size_t> & sizesi = sizes[i];
207
+ size_t amount_to_remove = sizesi[0];
208
+ index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
209
+
210
+ shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
211
+ shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
212
+ ils2->codes[i]);
213
+ for (int j = 0; j + 1 < n_slice; j++) {
214
+ sizesi[j] = sizesi[j + 1] - amount_to_remove;
215
+ }
216
+ sizesi[n_slice - 1] = ils->ids[i].size();
217
+ }
218
+ } else if (ils2) {
219
+ for (int i = 0; i < nlist; i++) {
220
+ index_ivf->ntotal += ils2->ids[i].size();
221
+ shift_and_add (ils->ids[i], 0, ils2->ids[i]);
222
+ shift_and_add (ils->codes[i], 0, ils2->codes[i]);
223
+ sizes[i].push_back(ils->ids[i].size());
224
+ }
225
+ n_slice++;
226
+ } else if (remove_oldest) {
227
+ for (int i = 0; i < nlist; i++) {
228
+ size_t amount_to_remove = sizes[i][0];
229
+ index_ivf->ntotal -= amount_to_remove;
230
+ remove_from_begin (ils->ids[i], amount_to_remove);
231
+ remove_from_begin (ils->codes[i],
232
+ amount_to_remove * ils->code_size);
233
+ for (int j = 0; j + 1 < n_slice; j++) {
234
+ sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
235
+ }
236
+ sizes[i].pop_back ();
237
+ }
238
+ n_slice--;
239
+ } else {
240
+ FAISS_THROW_MSG ("nothing to do???");
241
+ }
242
+ index->ntotal = index_ivf->ntotal;
243
+ }
244
+
245
+
246
+
247
+ // Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
248
+ // IndexIVF's embedded in a IndexPreTransform
249
+
250
+ ArrayInvertedLists *
251
+ get_invlist_range (const Index *index, long i0, long i1)
252
+ {
253
+ const IndexIVF *ivf = extract_index_ivf (index);
254
+
255
+ FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
256
+
257
+ const InvertedLists *src = ivf->invlists;
258
+
259
+ ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
260
+
261
+ for (long i = i0; i < i1; i++) {
262
+ il->add_entries(i - i0, src->list_size(i),
263
+ InvertedLists::ScopedIds (src, i).get(),
264
+ InvertedLists::ScopedCodes (src, i).get());
265
+ }
266
+ return il;
267
+ }
268
+
269
+
270
+
271
+ void set_invlist_range (Index *index, long i0, long i1,
272
+ ArrayInvertedLists * src)
273
+ {
274
+ IndexIVF *ivf = extract_index_ivf (index);
275
+
276
+ FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
277
+
278
+ ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
279
+ FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
280
+ FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
281
+ dst->code_size == src->code_size);
282
+
283
+ size_t ntotal = index->ntotal;
284
+ for (long i = i0 ; i < i1; i++) {
285
+ ntotal -= dst->list_size (i);
286
+ ntotal += src->list_size (i - i0);
287
+ std::swap (src->codes[i - i0], dst->codes[i]);
288
+ std::swap (src->ids[i - i0], dst->ids[i]);
289
+ }
290
+ ivf->ntotal = index->ntotal = ntotal;
291
+ }
292
+
293
+
294
+ void search_with_parameters (const Index *index,
295
+ idx_t n, const float *x, idx_t k,
296
+ float *distances, idx_t *labels,
297
+ IVFSearchParameters *params,
298
+ size_t *nb_dis_ptr)
299
+ {
300
+ FAISS_THROW_IF_NOT (params);
301
+ const float *prev_x = x;
302
+ ScopeDeleter<float> del;
303
+
304
+ if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
305
+ x = ip->apply_chain (n, x);
306
+ if (x != prev_x) {
307
+ del.set(x);
308
+ }
309
+ index = ip->index;
310
+ }
311
+
312
+ std::vector<idx_t> Iq(params->nprobe * n);
313
+ std::vector<float> Dq(params->nprobe * n);
314
+
315
+ const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
316
+ FAISS_THROW_IF_NOT (index_ivf);
317
+
318
+ index_ivf->quantizer->search(n, x, params->nprobe,
319
+ Dq.data(), Iq.data());
320
+
321
+ if (nb_dis_ptr) {
322
+ size_t nb_dis = 0;
323
+ const InvertedLists *il = index_ivf->invlists;
324
+ for (idx_t i = 0; i < n * params->nprobe; i++) {
325
+ if (Iq[i] >= 0) {
326
+ nb_dis += il->list_size(Iq[i]);
327
+ }
328
+ }
329
+ *nb_dis_ptr = nb_dis;
330
+ }
331
+
332
+ index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
333
+ distances, labels,
334
+ false, params);
335
+ }
336
+
337
+
338
+
339
+ } } // namespace faiss::ivflib
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_IVFLIB_H
11
+ #define FAISS_IVFLIB_H
12
+
13
+ /** Since IVF (inverted file) indexes are of so much use for
14
+ * large-scale use cases, we group a few functions related to them in
15
+ * this small library. Most functions work both on IndexIVFs and
16
+ * IndexIVFs embedded within an IndexPreTransform.
17
+ */
18
+
19
+ #include <vector>
20
+ #include <faiss/IndexIVF.h>
21
+
22
+ namespace faiss { namespace ivflib {
23
+
24
+
25
+ /** check if two indexes have the same parameters and are trained in
26
+ * the same way, otherwise throw. */
27
+ void check_compatible_for_merge (const Index * index1,
28
+ const Index * index2);
29
+
30
+ /** get an IndexIVF from an index. The index may be an IndexIVF or
31
+ * some wrapper class that encloses an IndexIVF
32
+ *
33
+ * throws an exception if this is not the case.
34
+ */
35
+ const IndexIVF * extract_index_ivf (const Index * index);
36
+ IndexIVF * extract_index_ivf (Index * index);
37
+
38
+ /** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
39
+ * embedded in a IndexPreTransform. On output, the index1 is empty.
40
+ *
41
+ * @param shift_ids: translate the ids from index1 to index0->prev_ntotal
42
+ */
43
+ void merge_into(Index *index0, Index *index1, bool shift_ids);
44
+
45
+ typedef Index::idx_t idx_t;
46
+
47
+ /* Returns the cluster the embeddings belong to.
48
+ *
49
+ * @param index Index, which should be an IVF index
50
+ * (otherwise there are no clusters)
51
+ * @param embeddings object descriptors for which the centroids should be found,
52
+ * size num_objects * d
53
+ * @param centroid_ids
54
+ * cluster id each object belongs to, size num_objects
55
+ */
56
+ void search_centroid(Index *index,
57
+ const float* x, int n,
58
+ idx_t* centroid_ids);
59
+
60
+ /* Returns the cluster the embeddings belong to.
61
+ *
62
+ * @param index Index, which should be an IVF index
63
+ * (otherwise there are no clusters)
64
+ * @param query_centroid_ids
65
+ * centroid ids corresponding to the query vectors (size n)
66
+ * @param result_centroid_ids
67
+ * centroid ids corresponding to the results (size n * k)
68
+ * other arguments are the same as the standard search function
69
+ */
70
+ void search_and_return_centroids(Index *index,
71
+ size_t n,
72
+ const float* xin,
73
+ long k,
74
+ float *distances,
75
+ idx_t* labels,
76
+ idx_t* query_centroid_ids,
77
+ idx_t* result_centroid_ids);
78
+
79
+
80
+ /** A set of IndexIVFs concatenated together in a FIFO fashion.
81
+ * at each "step", the oldest index slice is removed and a new index is added.
82
+ */
83
+ struct SlidingIndexWindow {
84
+ /// common index that contains the sliding window
85
+ Index * index;
86
+
87
+ /// InvertedLists of index
88
+ ArrayInvertedLists *ils;
89
+
90
+ /// number of slices currently in index
91
+ int n_slice;
92
+
93
+ /// same as index->nlist
94
+ size_t nlist;
95
+
96
+ /// cumulative list sizes at each slice
97
+ std::vector<std::vector<size_t> > sizes;
98
+
99
+ /// index should be initially empty and trained
100
+ SlidingIndexWindow (Index *index);
101
+
102
+ /** Add one index to the current index and remove the oldest one.
103
+ *
104
+ * @param sub_index slice to swap in (can be NULL)
105
+ * @param remove_oldest if true, remove the oldest slices */
106
+ void step(const Index *sub_index, bool remove_oldest);
107
+
108
+ };
109
+
110
+
111
+ /// Get a subset of inverted lists [i0, i1)
112
+ ArrayInvertedLists * get_invlist_range (const Index *index,
113
+ long i0, long i1);
114
+
115
+ /// Set a subset of inverted lists
116
+ void set_invlist_range (Index *index, long i0, long i1,
117
+ ArrayInvertedLists * src);
118
+
119
+ // search an IndexIVF, possibly embedded in an IndexPreTransform with
120
+ // given parameters. Optionally returns the number of distances
121
+ // computed
122
+ void search_with_parameters (const Index *index,
123
+ idx_t n, const float *x, idx_t k,
124
+ float *distances, idx_t *labels,
125
+ IVFSearchParameters *params,
126
+ size_t *nb_dis = nullptr);
127
+
128
+
129
+
130
+ } } // namespace faiss::ivflib
131
+
132
+ #endif