faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+
13
+
14
+ #include <faiss/Index.h>
15
+ #include <faiss/VectorTransform.h>
16
+
17
+ namespace faiss {
18
+
19
+ /** Index that applies a LinearTransform transform on vectors before
20
+ * handing them over to a sub-index */
21
+ struct IndexPreTransform: Index {
22
+
23
+ std::vector<VectorTransform *> chain; ///! chain of tranforms
24
+ Index * index; ///! the sub-index
25
+
26
+ bool own_fields; ///! whether pointers are deleted in destructor
27
+
28
+ explicit IndexPreTransform (Index *index);
29
+
30
+ IndexPreTransform ();
31
+
32
+ /// ltrans is the last transform before the index
33
+ IndexPreTransform (VectorTransform * ltrans, Index * index);
34
+
35
+ void prepend_transform (VectorTransform * ltrans);
36
+
37
+ void train(idx_t n, const float* x) override;
38
+
39
+ void add(idx_t n, const float* x) override;
40
+
41
+ void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
42
+
43
+ void reset() override;
44
+
45
+ /** removes IDs from the index. Not supported by all indexes.
46
+ */
47
+ size_t remove_ids(const IDSelector& sel) override;
48
+
49
+ void search(
50
+ idx_t n,
51
+ const float* x,
52
+ idx_t k,
53
+ float* distances,
54
+ idx_t* labels) const override;
55
+
56
+
57
+ /* range search, no attempt is done to change the radius */
58
+ void range_search (idx_t n, const float* x, float radius,
59
+ RangeSearchResult* result) const override;
60
+
61
+
62
+ void reconstruct (idx_t key, float * recons) const override;
63
+
64
+ void reconstruct_n (idx_t i0, idx_t ni, float *recons)
65
+ const override;
66
+
67
+ void search_and_reconstruct (idx_t n, const float *x, idx_t k,
68
+ float *distances, idx_t *labels,
69
+ float *recons) const override;
70
+
71
+ /// apply the transforms in the chain. The returned float * may be
72
+ /// equal to x, otherwise it should be deallocated.
73
+ const float * apply_chain (idx_t n, const float *x) const;
74
+
75
+ /// Reverse the transforms in the chain. May not be implemented for
76
+ /// all transforms in the chain or may return approximate results.
77
+ void reverse_chain (idx_t n, const float* xt, float* x) const;
78
+
79
+
80
+ /* standalone codec interface */
81
+ size_t sa_code_size () const override;
82
+ void sa_encode (idx_t n, const float *x,
83
+ uint8_t *bytes) const override;
84
+ void sa_decode (idx_t n, const uint8_t *bytes,
85
+ float *x) const override;
86
+
87
+ ~IndexPreTransform() override;
88
+ };
89
+
90
+
91
+ } // namespace faiss
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/IndexReplicas.h>
9
+ #include <faiss/impl/FaissAssert.h>
10
+
11
+ namespace faiss {
12
+
13
+ template <typename IndexT>
14
+ IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(bool threaded)
15
+ : ThreadedIndex<IndexT>(threaded) {
16
+ }
17
+
18
+ template <typename IndexT>
19
+ IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(idx_t d, bool threaded)
20
+ : ThreadedIndex<IndexT>(d, threaded) {
21
+ }
22
+
23
+ template <typename IndexT>
24
+ IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(int d, bool threaded)
25
+ : ThreadedIndex<IndexT>(d, threaded) {
26
+ }
27
+
28
+ template <typename IndexT>
29
+ void
30
+ IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
31
+ // Make sure that the parameters are the same for all prior indices, unless
32
+ // we're the first index to be added
33
+ if (this->count() > 0 && this->at(0) != index) {
34
+ auto existing = this->at(0);
35
+
36
+ FAISS_THROW_IF_NOT_FMT(index->ntotal == existing->ntotal,
37
+ "IndexReplicas: newly added index does "
38
+ "not have same number of vectors as prior index; "
39
+ "prior index has %ld vectors, new index has %ld",
40
+ existing->ntotal, index->ntotal);
41
+
42
+ FAISS_THROW_IF_NOT_MSG(index->is_trained == existing->is_trained,
43
+ "IndexReplicas: newly added index does "
44
+ "not have same train status as prior index");
45
+ } else {
46
+ // Set our parameters based on the first index we're adding
47
+ // (dimension is handled in ThreadedIndex)
48
+ this->ntotal = index->ntotal;
49
+ this->verbose = index->verbose;
50
+ this->is_trained = index->is_trained;
51
+ this->metric_type = index->metric_type;
52
+ }
53
+ }
54
+
55
+ template <typename IndexT>
56
+ void
57
+ IndexReplicasTemplate<IndexT>::train(idx_t n, const component_t* x) {
58
+ this->runOnIndex([n, x](int, IndexT* index){ index->train(n, x); });
59
+ }
60
+
61
+ template <typename IndexT>
62
+ void
63
+ IndexReplicasTemplate<IndexT>::add(idx_t n, const component_t* x) {
64
+ this->runOnIndex([n, x](int, IndexT* index){ index->add(n, x); });
65
+ this->ntotal += n;
66
+ }
67
+
68
+ template <typename IndexT>
69
+ void
70
+ IndexReplicasTemplate<IndexT>::reconstruct(idx_t n, component_t* x) const {
71
+ FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
72
+
73
+ // Just pass to the first replica
74
+ this->at(0)->reconstruct(n, x);
75
+ }
76
+
77
+ template <typename IndexT>
78
+ void
79
+ IndexReplicasTemplate<IndexT>::search(idx_t n,
80
+ const component_t* x,
81
+ idx_t k,
82
+ distance_t* distances,
83
+ idx_t* labels) const {
84
+ FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
85
+
86
+ if (n == 0) {
87
+ return;
88
+ }
89
+
90
+ auto dim = this->d;
91
+ size_t componentsPerVec =
92
+ sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
93
+
94
+ // Partition the query by the number of indices we have
95
+ faiss::Index::idx_t queriesPerIndex =
96
+ (faiss::Index::idx_t) (n + this->count() - 1) /
97
+ (faiss::Index::idx_t) this->count();
98
+ FAISS_ASSERT(n / queriesPerIndex <= this->count());
99
+
100
+ auto fn =
101
+ [queriesPerIndex, componentsPerVec,
102
+ n, x, k, distances, labels](int i, const IndexT* index) {
103
+ faiss::Index::idx_t base = (faiss::Index::idx_t) i * queriesPerIndex;
104
+
105
+ if (base < n) {
106
+ auto numForIndex = std::min(queriesPerIndex, n - base);
107
+
108
+ index->search(numForIndex,
109
+ x + base * componentsPerVec,
110
+ k,
111
+ distances + base * k,
112
+ labels + base * k);
113
+ }
114
+ };
115
+
116
+ this->runOnIndex(fn);
117
+ }
118
+
119
+ // explicit instantiations
120
+ template struct IndexReplicasTemplate<Index>;
121
+ template struct IndexReplicasTemplate<IndexBinary>;
122
+
123
+ } // namespace
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/Index.h>
11
+ #include <faiss/IndexBinary.h>
12
+ #include <faiss/impl/ThreadedIndex.h>
13
+
14
+ namespace faiss {
15
+
16
+ /// Takes individual faiss::Index instances, and splits queries for
17
+ /// sending to each Index instance, and joins the results together
18
+ /// when done.
19
+ /// Each index is managed by a separate CPU thread.
20
+ template <typename IndexT>
21
+ class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
22
+ public:
23
+ using idx_t = typename IndexT::idx_t;
24
+ using component_t = typename IndexT::component_t;
25
+ using distance_t = typename IndexT::distance_t;
26
+
27
+ /// The dimension that all sub-indices must share will be the dimension of the
28
+ /// first sub-index added
29
+ /// @param threaded do we use one thread per sub-index or do queries
30
+ /// sequentially?
31
+ explicit IndexReplicasTemplate(bool threaded = true);
32
+
33
+ /// @param d the dimension that all sub-indices must share
34
+ /// @param threaded do we use one thread per sub index or do queries
35
+ /// sequentially?
36
+ explicit IndexReplicasTemplate(idx_t d, bool threaded = true);
37
+
38
+ /// int version due to the implicit bool conversion ambiguity of int as
39
+ /// dimension
40
+ explicit IndexReplicasTemplate(int d, bool threaded = true);
41
+
42
+ /// Alias for addIndex()
43
+ void add_replica(IndexT* index) { this->addIndex(index); }
44
+
45
+ /// Alias for removeIndex()
46
+ void remove_replica(IndexT* index) { this->removeIndex(index); }
47
+
48
+ /// faiss::Index API
49
+ /// All indices receive the same call
50
+ void train(idx_t n, const component_t* x) override;
51
+
52
+ /// faiss::Index API
53
+ /// All indices receive the same call
54
+ void add(idx_t n, const component_t* x) override;
55
+
56
+ /// faiss::Index API
57
+ /// Query is partitioned into a slice for each sub-index
58
+ /// split by ceil(n / #indices) for our sub-indices
59
+ void search(idx_t n,
60
+ const component_t* x,
61
+ idx_t k,
62
+ distance_t* distances,
63
+ idx_t* labels) const override;
64
+
65
+ /// reconstructs from the first index
66
+ void reconstruct(idx_t, component_t *v) const override;
67
+
68
+ protected:
69
+ /// Called just after an index is added
70
+ void onAfterAddIndex(IndexT* index) override;
71
+ };
72
+
73
+ using IndexReplicas = IndexReplicasTemplate<Index>;
74
+ using IndexBinaryReplicas = IndexReplicasTemplate<IndexBinary>;
75
+
76
+ } // namespace
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexScalarQuantizer.h>
11
+
12
+ #include <cstdio>
13
+ #include <algorithm>
14
+
15
+ #include <omp.h>
16
+
17
+ #include <faiss/utils/utils.h>
18
+ #include <faiss/impl/FaissAssert.h>
19
+ #include <faiss/impl/AuxIndexStructures.h>
20
+ #include <faiss/impl/ScalarQuantizer.h>
21
+
22
+ namespace faiss {
23
+
24
+
25
+
26
+ /*******************************************************************
27
+ * IndexScalarQuantizer implementation
28
+ ********************************************************************/
29
+
30
+ IndexScalarQuantizer::IndexScalarQuantizer
31
+ (int d, ScalarQuantizer::QuantizerType qtype,
32
+ MetricType metric):
33
+ Index(d, metric),
34
+ sq (d, qtype)
35
+ {
36
+ is_trained =
37
+ qtype == ScalarQuantizer::QT_fp16 ||
38
+ qtype == ScalarQuantizer::QT_8bit_direct;
39
+ code_size = sq.code_size;
40
+ }
41
+
42
+
43
+ IndexScalarQuantizer::IndexScalarQuantizer ():
44
+ IndexScalarQuantizer(0, ScalarQuantizer::QT_8bit)
45
+ {}
46
+
47
+ void IndexScalarQuantizer::train(idx_t n, const float* x)
48
+ {
49
+ sq.train(n, x);
50
+ is_trained = true;
51
+ }
52
+
53
+ void IndexScalarQuantizer::add(idx_t n, const float* x)
54
+ {
55
+ FAISS_THROW_IF_NOT (is_trained);
56
+ codes.resize ((n + ntotal) * code_size);
57
+ sq.compute_codes (x, &codes[ntotal * code_size], n);
58
+ ntotal += n;
59
+ }
60
+
61
+
62
+ void IndexScalarQuantizer::search(
63
+ idx_t n,
64
+ const float* x,
65
+ idx_t k,
66
+ float* distances,
67
+ idx_t* labels) const
68
+ {
69
+ FAISS_THROW_IF_NOT (is_trained);
70
+ FAISS_THROW_IF_NOT (metric_type == METRIC_L2 ||
71
+ metric_type == METRIC_INNER_PRODUCT);
72
+
73
+ #pragma omp parallel
74
+ {
75
+ InvertedListScanner* scanner = sq.select_InvertedListScanner
76
+ (metric_type, nullptr, true);
77
+ ScopeDeleter1<InvertedListScanner> del(scanner);
78
+
79
+ #pragma omp for
80
+ for (size_t i = 0; i < n; i++) {
81
+ float * D = distances + k * i;
82
+ idx_t * I = labels + k * i;
83
+ // re-order heap
84
+ if (metric_type == METRIC_L2) {
85
+ maxheap_heapify (k, D, I);
86
+ } else {
87
+ minheap_heapify (k, D, I);
88
+ }
89
+ scanner->set_query (x + i * d);
90
+ scanner->scan_codes (ntotal, codes.data(),
91
+ nullptr, D, I, k);
92
+
93
+ // re-order heap
94
+ if (metric_type == METRIC_L2) {
95
+ maxheap_reorder (k, D, I);
96
+ } else {
97
+ minheap_reorder (k, D, I);
98
+ }
99
+ }
100
+ }
101
+
102
+ }
103
+
104
+
105
+ DistanceComputer *IndexScalarQuantizer::get_distance_computer () const
106
+ {
107
+ ScalarQuantizer::SQDistanceComputer *dc =
108
+ sq.get_distance_computer (metric_type);
109
+ dc->code_size = sq.code_size;
110
+ dc->codes = codes.data();
111
+ return dc;
112
+ }
113
+
114
+
115
+ void IndexScalarQuantizer::reset()
116
+ {
117
+ codes.clear();
118
+ ntotal = 0;
119
+ }
120
+
121
+ void IndexScalarQuantizer::reconstruct_n(
122
+ idx_t i0, idx_t ni, float* recons) const
123
+ {
124
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
125
+ for (size_t i = 0; i < ni; i++) {
126
+ squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d);
127
+ }
128
+ }
129
+
130
+ void IndexScalarQuantizer::reconstruct(idx_t key, float* recons) const
131
+ {
132
+ reconstruct_n(key, 1, recons);
133
+ }
134
+
135
+ /* Codec interface */
136
+ size_t IndexScalarQuantizer::sa_code_size () const
137
+ {
138
+ return sq.code_size;
139
+ }
140
+
141
+ void IndexScalarQuantizer::sa_encode (idx_t n, const float *x,
142
+ uint8_t *bytes) const
143
+ {
144
+ FAISS_THROW_IF_NOT (is_trained);
145
+ sq.compute_codes (x, bytes, n);
146
+ }
147
+
148
+ void IndexScalarQuantizer::sa_decode (idx_t n, const uint8_t *bytes,
149
+ float *x) const
150
+ {
151
+ FAISS_THROW_IF_NOT (is_trained);
152
+ sq.decode(bytes, x, n);
153
+ }
154
+
155
+
156
+
157
+ /*******************************************************************
158
+ * IndexIVFScalarQuantizer implementation
159
+ ********************************************************************/
160
+
161
+ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer (
162
+ Index *quantizer, size_t d, size_t nlist,
163
+ ScalarQuantizer::QuantizerType qtype,
164
+ MetricType metric, bool encode_residual)
165
+ : IndexIVF(quantizer, d, nlist, 0, metric),
166
+ sq(d, qtype),
167
+ by_residual(encode_residual)
168
+ {
169
+ code_size = sq.code_size;
170
+ // was not known at construction time
171
+ invlists->code_size = code_size;
172
+ is_trained = false;
173
+ }
174
+
175
+ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
176
+ IndexIVF(),
177
+ by_residual(true)
178
+ {
179
+ }
180
+
181
+ void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x)
182
+ {
183
+ sq.train_residual(n, x, quantizer, by_residual, verbose);
184
+ }
185
+
186
+ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
187
+ const idx_t *list_nos,
188
+ uint8_t * codes,
189
+ bool include_listnos) const
190
+ {
191
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
192
+ size_t coarse_size = include_listnos ? coarse_code_size () : 0;
193
+ memset(codes, 0, (code_size + coarse_size) * n);
194
+
195
+ #pragma omp parallel if(n > 1)
196
+ {
197
+ std::vector<float> residual (d);
198
+
199
+ #pragma omp for
200
+ for (size_t i = 0; i < n; i++) {
201
+ int64_t list_no = list_nos [i];
202
+ if (list_no >= 0) {
203
+ const float *xi = x + i * d;
204
+ uint8_t *code = codes + i * (code_size + coarse_size);
205
+ if (by_residual) {
206
+ quantizer->compute_residual (
207
+ xi, residual.data(), list_no);
208
+ xi = residual.data ();
209
+ }
210
+ if (coarse_size) {
211
+ encode_listno (list_no, code);
212
+ }
213
+ squant->encode_vector (xi, code + coarse_size);
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
220
+ float *x) const
221
+ {
222
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
223
+ size_t coarse_size = coarse_code_size ();
224
+
225
+ #pragma omp parallel if(n > 1)
226
+ {
227
+ std::vector<float> residual (d);
228
+
229
+ #pragma omp for
230
+ for (size_t i = 0; i < n; i++) {
231
+ const uint8_t *code = codes + i * (code_size + coarse_size);
232
+ int64_t list_no = decode_listno (code);
233
+ float *xi = x + i * d;
234
+ squant->decode_vector (code + coarse_size, xi);
235
+ if (by_residual) {
236
+ quantizer->reconstruct (list_no, residual.data());
237
+ for (size_t j = 0; j < d; j++) {
238
+ xi[j] += residual[j];
239
+ }
240
+ }
241
+ }
242
+ }
243
+ }
244
+
245
+
246
+
247
+ void IndexIVFScalarQuantizer::add_with_ids
248
+ (idx_t n, const float * x, const idx_t *xids)
249
+ {
250
+ FAISS_THROW_IF_NOT (is_trained);
251
+ std::unique_ptr<int64_t []> idx (new int64_t [n]);
252
+ quantizer->assign (n, x, idx.get());
253
+ size_t nadd = 0;
254
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
255
+
256
+ #pragma omp parallel reduction(+: nadd)
257
+ {
258
+ std::vector<float> residual (d);
259
+ std::vector<uint8_t> one_code (code_size);
260
+ int nt = omp_get_num_threads();
261
+ int rank = omp_get_thread_num();
262
+
263
+ // each thread takes care of a subset of lists
264
+ for (size_t i = 0; i < n; i++) {
265
+ int64_t list_no = idx [i];
266
+ if (list_no >= 0 && list_no % nt == rank) {
267
+ int64_t id = xids ? xids[i] : ntotal + i;
268
+
269
+ const float * xi = x + i * d;
270
+ if (by_residual) {
271
+ quantizer->compute_residual (xi, residual.data(), list_no);
272
+ xi = residual.data();
273
+ }
274
+
275
+ memset (one_code.data(), 0, code_size);
276
+ squant->encode_vector (xi, one_code.data());
277
+
278
+ invlists->add_entry (list_no, id, one_code.data());
279
+
280
+ nadd++;
281
+
282
+ }
283
+ }
284
+ }
285
+ ntotal += n;
286
+ }
287
+
288
+
289
+
290
+
291
+
292
+ InvertedListScanner* IndexIVFScalarQuantizer::get_InvertedListScanner
293
+ (bool store_pairs) const
294
+ {
295
+ return sq.select_InvertedListScanner (metric_type, quantizer, store_pairs,
296
+ by_residual);
297
+ }
298
+
299
+
300
+ void IndexIVFScalarQuantizer::reconstruct_from_offset (int64_t list_no,
301
+ int64_t offset,
302
+ float* recons) const
303
+ {
304
+ std::vector<float> centroid(d);
305
+ quantizer->reconstruct (list_no, centroid.data());
306
+
307
+ const uint8_t* code = invlists->get_single_code (list_no, offset);
308
+ sq.decode (code, recons, 1);
309
+ for (int i = 0; i < d; ++i) {
310
+ recons[i] += centroid[i];
311
+ }
312
+ }
313
+
314
+
315
+
316
+
317
+ } // namespace faiss