faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+
13
+
14
+ #include <faiss/Index.h>
15
+ #include <faiss/VectorTransform.h>
16
+
17
+ namespace faiss {
18
+
19
+ /** Index that applies a LinearTransform transform on vectors before
20
+ * handing them over to a sub-index */
21
+ struct IndexPreTransform: Index {
22
+
23
+ std::vector<VectorTransform *> chain; ///! chain of tranforms
24
+ Index * index; ///! the sub-index
25
+
26
+ bool own_fields; ///! whether pointers are deleted in destructor
27
+
28
+ explicit IndexPreTransform (Index *index);
29
+
30
+ IndexPreTransform ();
31
+
32
+ /// ltrans is the last transform before the index
33
+ IndexPreTransform (VectorTransform * ltrans, Index * index);
34
+
35
+ void prepend_transform (VectorTransform * ltrans);
36
+
37
+ void train(idx_t n, const float* x) override;
38
+
39
+ void add(idx_t n, const float* x) override;
40
+
41
+ void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
42
+
43
+ void reset() override;
44
+
45
+ /** removes IDs from the index. Not supported by all indexes.
46
+ */
47
+ size_t remove_ids(const IDSelector& sel) override;
48
+
49
+ void search(
50
+ idx_t n,
51
+ const float* x,
52
+ idx_t k,
53
+ float* distances,
54
+ idx_t* labels) const override;
55
+
56
+
57
+ /* range search, no attempt is done to change the radius */
58
+ void range_search (idx_t n, const float* x, float radius,
59
+ RangeSearchResult* result) const override;
60
+
61
+
62
+ void reconstruct (idx_t key, float * recons) const override;
63
+
64
+ void reconstruct_n (idx_t i0, idx_t ni, float *recons)
65
+ const override;
66
+
67
+ void search_and_reconstruct (idx_t n, const float *x, idx_t k,
68
+ float *distances, idx_t *labels,
69
+ float *recons) const override;
70
+
71
+ /// apply the transforms in the chain. The returned float * may be
72
+ /// equal to x, otherwise it should be deallocated.
73
+ const float * apply_chain (idx_t n, const float *x) const;
74
+
75
+ /// Reverse the transforms in the chain. May not be implemented for
76
+ /// all transforms in the chain or may return approximate results.
77
+ void reverse_chain (idx_t n, const float* xt, float* x) const;
78
+
79
+
80
+ /* standalone codec interface */
81
+ size_t sa_code_size () const override;
82
+ void sa_encode (idx_t n, const float *x,
83
+ uint8_t *bytes) const override;
84
+ void sa_decode (idx_t n, const uint8_t *bytes,
85
+ float *x) const override;
86
+
87
+ ~IndexPreTransform() override;
88
+ };
89
+
90
+
91
+ } // namespace faiss
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/IndexReplicas.h>
9
+ #include <faiss/impl/FaissAssert.h>
10
+
11
+ namespace faiss {
12
+
13
+ template <typename IndexT>
14
+ IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(bool threaded)
15
+ : ThreadedIndex<IndexT>(threaded) {
16
+ }
17
+
18
+ template <typename IndexT>
19
+ IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(idx_t d, bool threaded)
20
+ : ThreadedIndex<IndexT>(d, threaded) {
21
+ }
22
+
23
+ template <typename IndexT>
24
+ IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(int d, bool threaded)
25
+ : ThreadedIndex<IndexT>(d, threaded) {
26
+ }
27
+
28
+ template <typename IndexT>
29
+ void
30
+ IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
31
+ // Make sure that the parameters are the same for all prior indices, unless
32
+ // we're the first index to be added
33
+ if (this->count() > 0 && this->at(0) != index) {
34
+ auto existing = this->at(0);
35
+
36
+ FAISS_THROW_IF_NOT_FMT(index->ntotal == existing->ntotal,
37
+ "IndexReplicas: newly added index does "
38
+ "not have same number of vectors as prior index; "
39
+ "prior index has %ld vectors, new index has %ld",
40
+ existing->ntotal, index->ntotal);
41
+
42
+ FAISS_THROW_IF_NOT_MSG(index->is_trained == existing->is_trained,
43
+ "IndexReplicas: newly added index does "
44
+ "not have same train status as prior index");
45
+ } else {
46
+ // Set our parameters based on the first index we're adding
47
+ // (dimension is handled in ThreadedIndex)
48
+ this->ntotal = index->ntotal;
49
+ this->verbose = index->verbose;
50
+ this->is_trained = index->is_trained;
51
+ this->metric_type = index->metric_type;
52
+ }
53
+ }
54
+
55
+ template <typename IndexT>
56
+ void
57
+ IndexReplicasTemplate<IndexT>::train(idx_t n, const component_t* x) {
58
+ this->runOnIndex([n, x](int, IndexT* index){ index->train(n, x); });
59
+ }
60
+
61
+ template <typename IndexT>
62
+ void
63
+ IndexReplicasTemplate<IndexT>::add(idx_t n, const component_t* x) {
64
+ this->runOnIndex([n, x](int, IndexT* index){ index->add(n, x); });
65
+ this->ntotal += n;
66
+ }
67
+
68
+ template <typename IndexT>
69
+ void
70
+ IndexReplicasTemplate<IndexT>::reconstruct(idx_t n, component_t* x) const {
71
+ FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
72
+
73
+ // Just pass to the first replica
74
+ this->at(0)->reconstruct(n, x);
75
+ }
76
+
77
+ template <typename IndexT>
78
+ void
79
+ IndexReplicasTemplate<IndexT>::search(idx_t n,
80
+ const component_t* x,
81
+ idx_t k,
82
+ distance_t* distances,
83
+ idx_t* labels) const {
84
+ FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
85
+
86
+ if (n == 0) {
87
+ return;
88
+ }
89
+
90
+ auto dim = this->d;
91
+ size_t componentsPerVec =
92
+ sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
93
+
94
+ // Partition the query by the number of indices we have
95
+ faiss::Index::idx_t queriesPerIndex =
96
+ (faiss::Index::idx_t) (n + this->count() - 1) /
97
+ (faiss::Index::idx_t) this->count();
98
+ FAISS_ASSERT(n / queriesPerIndex <= this->count());
99
+
100
+ auto fn =
101
+ [queriesPerIndex, componentsPerVec,
102
+ n, x, k, distances, labels](int i, const IndexT* index) {
103
+ faiss::Index::idx_t base = (faiss::Index::idx_t) i * queriesPerIndex;
104
+
105
+ if (base < n) {
106
+ auto numForIndex = std::min(queriesPerIndex, n - base);
107
+
108
+ index->search(numForIndex,
109
+ x + base * componentsPerVec,
110
+ k,
111
+ distances + base * k,
112
+ labels + base * k);
113
+ }
114
+ };
115
+
116
+ this->runOnIndex(fn);
117
+ }
118
+
119
+ // explicit instantiations
120
+ template struct IndexReplicasTemplate<Index>;
121
+ template struct IndexReplicasTemplate<IndexBinary>;
122
+
123
+ } // namespace
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/Index.h>
11
+ #include <faiss/IndexBinary.h>
12
+ #include <faiss/impl/ThreadedIndex.h>
13
+
14
+ namespace faiss {
15
+
16
+ /// Takes individual faiss::Index instances, and splits queries for
17
+ /// sending to each Index instance, and joins the results together
18
+ /// when done.
19
+ /// Each index is managed by a separate CPU thread.
20
+ template <typename IndexT>
21
+ class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
22
+ public:
23
+ using idx_t = typename IndexT::idx_t;
24
+ using component_t = typename IndexT::component_t;
25
+ using distance_t = typename IndexT::distance_t;
26
+
27
+ /// The dimension that all sub-indices must share will be the dimension of the
28
+ /// first sub-index added
29
+ /// @param threaded do we use one thread per sub-index or do queries
30
+ /// sequentially?
31
+ explicit IndexReplicasTemplate(bool threaded = true);
32
+
33
+ /// @param d the dimension that all sub-indices must share
34
+ /// @param threaded do we use one thread per sub index or do queries
35
+ /// sequentially?
36
+ explicit IndexReplicasTemplate(idx_t d, bool threaded = true);
37
+
38
+ /// int version due to the implicit bool conversion ambiguity of int as
39
+ /// dimension
40
+ explicit IndexReplicasTemplate(int d, bool threaded = true);
41
+
42
+ /// Alias for addIndex()
43
+ void add_replica(IndexT* index) { this->addIndex(index); }
44
+
45
+ /// Alias for removeIndex()
46
+ void remove_replica(IndexT* index) { this->removeIndex(index); }
47
+
48
+ /// faiss::Index API
49
+ /// All indices receive the same call
50
+ void train(idx_t n, const component_t* x) override;
51
+
52
+ /// faiss::Index API
53
+ /// All indices receive the same call
54
+ void add(idx_t n, const component_t* x) override;
55
+
56
+ /// faiss::Index API
57
+ /// Query is partitioned into a slice for each sub-index
58
+ /// split by ceil(n / #indices) for our sub-indices
59
+ void search(idx_t n,
60
+ const component_t* x,
61
+ idx_t k,
62
+ distance_t* distances,
63
+ idx_t* labels) const override;
64
+
65
+ /// reconstructs from the first index
66
+ void reconstruct(idx_t, component_t *v) const override;
67
+
68
+ protected:
69
+ /// Called just after an index is added
70
+ void onAfterAddIndex(IndexT* index) override;
71
+ };
72
+
73
+ using IndexReplicas = IndexReplicasTemplate<Index>;
74
+ using IndexBinaryReplicas = IndexReplicasTemplate<IndexBinary>;
75
+
76
+ } // namespace
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexScalarQuantizer.h>
11
+
12
+ #include <cstdio>
13
+ #include <algorithm>
14
+
15
+ #include <omp.h>
16
+
17
+ #include <faiss/utils/utils.h>
18
+ #include <faiss/impl/FaissAssert.h>
19
+ #include <faiss/impl/AuxIndexStructures.h>
20
+ #include <faiss/impl/ScalarQuantizer.h>
21
+
22
+ namespace faiss {
23
+
24
+
25
+
26
+ /*******************************************************************
27
+ * IndexScalarQuantizer implementation
28
+ ********************************************************************/
29
+
30
+ IndexScalarQuantizer::IndexScalarQuantizer
31
+ (int d, ScalarQuantizer::QuantizerType qtype,
32
+ MetricType metric):
33
+ Index(d, metric),
34
+ sq (d, qtype)
35
+ {
36
+ is_trained =
37
+ qtype == ScalarQuantizer::QT_fp16 ||
38
+ qtype == ScalarQuantizer::QT_8bit_direct;
39
+ code_size = sq.code_size;
40
+ }
41
+
42
+
43
+ IndexScalarQuantizer::IndexScalarQuantizer ():
44
+ IndexScalarQuantizer(0, ScalarQuantizer::QT_8bit)
45
+ {}
46
+
47
+ void IndexScalarQuantizer::train(idx_t n, const float* x)
48
+ {
49
+ sq.train(n, x);
50
+ is_trained = true;
51
+ }
52
+
53
+ void IndexScalarQuantizer::add(idx_t n, const float* x)
54
+ {
55
+ FAISS_THROW_IF_NOT (is_trained);
56
+ codes.resize ((n + ntotal) * code_size);
57
+ sq.compute_codes (x, &codes[ntotal * code_size], n);
58
+ ntotal += n;
59
+ }
60
+
61
+
62
+ void IndexScalarQuantizer::search(
63
+ idx_t n,
64
+ const float* x,
65
+ idx_t k,
66
+ float* distances,
67
+ idx_t* labels) const
68
+ {
69
+ FAISS_THROW_IF_NOT (is_trained);
70
+ FAISS_THROW_IF_NOT (metric_type == METRIC_L2 ||
71
+ metric_type == METRIC_INNER_PRODUCT);
72
+
73
+ #pragma omp parallel
74
+ {
75
+ InvertedListScanner* scanner = sq.select_InvertedListScanner
76
+ (metric_type, nullptr, true);
77
+ ScopeDeleter1<InvertedListScanner> del(scanner);
78
+
79
+ #pragma omp for
80
+ for (size_t i = 0; i < n; i++) {
81
+ float * D = distances + k * i;
82
+ idx_t * I = labels + k * i;
83
+ // re-order heap
84
+ if (metric_type == METRIC_L2) {
85
+ maxheap_heapify (k, D, I);
86
+ } else {
87
+ minheap_heapify (k, D, I);
88
+ }
89
+ scanner->set_query (x + i * d);
90
+ scanner->scan_codes (ntotal, codes.data(),
91
+ nullptr, D, I, k);
92
+
93
+ // re-order heap
94
+ if (metric_type == METRIC_L2) {
95
+ maxheap_reorder (k, D, I);
96
+ } else {
97
+ minheap_reorder (k, D, I);
98
+ }
99
+ }
100
+ }
101
+
102
+ }
103
+
104
+
105
+ DistanceComputer *IndexScalarQuantizer::get_distance_computer () const
106
+ {
107
+ ScalarQuantizer::SQDistanceComputer *dc =
108
+ sq.get_distance_computer (metric_type);
109
+ dc->code_size = sq.code_size;
110
+ dc->codes = codes.data();
111
+ return dc;
112
+ }
113
+
114
+
115
+ void IndexScalarQuantizer::reset()
116
+ {
117
+ codes.clear();
118
+ ntotal = 0;
119
+ }
120
+
121
+ void IndexScalarQuantizer::reconstruct_n(
122
+ idx_t i0, idx_t ni, float* recons) const
123
+ {
124
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
125
+ for (size_t i = 0; i < ni; i++) {
126
+ squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d);
127
+ }
128
+ }
129
+
130
+ void IndexScalarQuantizer::reconstruct(idx_t key, float* recons) const
131
+ {
132
+ reconstruct_n(key, 1, recons);
133
+ }
134
+
135
+ /* Codec interface */
136
+ size_t IndexScalarQuantizer::sa_code_size () const
137
+ {
138
+ return sq.code_size;
139
+ }
140
+
141
+ void IndexScalarQuantizer::sa_encode (idx_t n, const float *x,
142
+ uint8_t *bytes) const
143
+ {
144
+ FAISS_THROW_IF_NOT (is_trained);
145
+ sq.compute_codes (x, bytes, n);
146
+ }
147
+
148
+ void IndexScalarQuantizer::sa_decode (idx_t n, const uint8_t *bytes,
149
+ float *x) const
150
+ {
151
+ FAISS_THROW_IF_NOT (is_trained);
152
+ sq.decode(bytes, x, n);
153
+ }
154
+
155
+
156
+
157
+ /*******************************************************************
158
+ * IndexIVFScalarQuantizer implementation
159
+ ********************************************************************/
160
+
161
+ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer (
162
+ Index *quantizer, size_t d, size_t nlist,
163
+ ScalarQuantizer::QuantizerType qtype,
164
+ MetricType metric, bool encode_residual)
165
+ : IndexIVF(quantizer, d, nlist, 0, metric),
166
+ sq(d, qtype),
167
+ by_residual(encode_residual)
168
+ {
169
+ code_size = sq.code_size;
170
+ // was not known at construction time
171
+ invlists->code_size = code_size;
172
+ is_trained = false;
173
+ }
174
+
175
+ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
176
+ IndexIVF(),
177
+ by_residual(true)
178
+ {
179
+ }
180
+
181
+ void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x)
182
+ {
183
+ sq.train_residual(n, x, quantizer, by_residual, verbose);
184
+ }
185
+
186
+ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
187
+ const idx_t *list_nos,
188
+ uint8_t * codes,
189
+ bool include_listnos) const
190
+ {
191
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
192
+ size_t coarse_size = include_listnos ? coarse_code_size () : 0;
193
+ memset(codes, 0, (code_size + coarse_size) * n);
194
+
195
+ #pragma omp parallel if(n > 1)
196
+ {
197
+ std::vector<float> residual (d);
198
+
199
+ #pragma omp for
200
+ for (size_t i = 0; i < n; i++) {
201
+ int64_t list_no = list_nos [i];
202
+ if (list_no >= 0) {
203
+ const float *xi = x + i * d;
204
+ uint8_t *code = codes + i * (code_size + coarse_size);
205
+ if (by_residual) {
206
+ quantizer->compute_residual (
207
+ xi, residual.data(), list_no);
208
+ xi = residual.data ();
209
+ }
210
+ if (coarse_size) {
211
+ encode_listno (list_no, code);
212
+ }
213
+ squant->encode_vector (xi, code + coarse_size);
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
220
+ float *x) const
221
+ {
222
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
223
+ size_t coarse_size = coarse_code_size ();
224
+
225
+ #pragma omp parallel if(n > 1)
226
+ {
227
+ std::vector<float> residual (d);
228
+
229
+ #pragma omp for
230
+ for (size_t i = 0; i < n; i++) {
231
+ const uint8_t *code = codes + i * (code_size + coarse_size);
232
+ int64_t list_no = decode_listno (code);
233
+ float *xi = x + i * d;
234
+ squant->decode_vector (code + coarse_size, xi);
235
+ if (by_residual) {
236
+ quantizer->reconstruct (list_no, residual.data());
237
+ for (size_t j = 0; j < d; j++) {
238
+ xi[j] += residual[j];
239
+ }
240
+ }
241
+ }
242
+ }
243
+ }
244
+
245
+
246
+
247
+ void IndexIVFScalarQuantizer::add_with_ids
248
+ (idx_t n, const float * x, const idx_t *xids)
249
+ {
250
+ FAISS_THROW_IF_NOT (is_trained);
251
+ std::unique_ptr<int64_t []> idx (new int64_t [n]);
252
+ quantizer->assign (n, x, idx.get());
253
+ size_t nadd = 0;
254
+ std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
255
+
256
+ #pragma omp parallel reduction(+: nadd)
257
+ {
258
+ std::vector<float> residual (d);
259
+ std::vector<uint8_t> one_code (code_size);
260
+ int nt = omp_get_num_threads();
261
+ int rank = omp_get_thread_num();
262
+
263
+ // each thread takes care of a subset of lists
264
+ for (size_t i = 0; i < n; i++) {
265
+ int64_t list_no = idx [i];
266
+ if (list_no >= 0 && list_no % nt == rank) {
267
+ int64_t id = xids ? xids[i] : ntotal + i;
268
+
269
+ const float * xi = x + i * d;
270
+ if (by_residual) {
271
+ quantizer->compute_residual (xi, residual.data(), list_no);
272
+ xi = residual.data();
273
+ }
274
+
275
+ memset (one_code.data(), 0, code_size);
276
+ squant->encode_vector (xi, one_code.data());
277
+
278
+ invlists->add_entry (list_no, id, one_code.data());
279
+
280
+ nadd++;
281
+
282
+ }
283
+ }
284
+ }
285
+ ntotal += n;
286
+ }
287
+
288
+
289
+
290
+
291
+
292
+ InvertedListScanner* IndexIVFScalarQuantizer::get_InvertedListScanner
293
+ (bool store_pairs) const
294
+ {
295
+ return sq.select_InvertedListScanner (metric_type, quantizer, store_pairs,
296
+ by_residual);
297
+ }
298
+
299
+
300
+ void IndexIVFScalarQuantizer::reconstruct_from_offset (int64_t list_no,
301
+ int64_t offset,
302
+ float* recons) const
303
+ {
304
+ std::vector<float> centroid(d);
305
+ quantizer->reconstruct (list_no, centroid.data());
306
+
307
+ const uint8_t* code = invlists->get_single_code (list_no, offset);
308
+ sq.decode (code, recons, 1);
309
+ for (int i = 0; i < d; ++i) {
310
+ recons[i] += centroid[i];
311
+ }
312
+ }
313
+
314
+
315
+
316
+
317
+ } // namespace faiss