faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,118 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_INDEX_IVF_FLAT_H
11
+ #define FAISS_INDEX_IVF_FLAT_H
12
+
13
+ #include <unordered_map>
14
+ #include <stdint.h>
15
+
16
+ #include <faiss/IndexIVF.h>
17
+
18
+
19
+ namespace faiss {
20
+
21
+ /** Inverted file with stored vectors. Here the inverted file
22
+ * pre-selects the vectors to be searched, but they are not otherwise
23
+ * encoded, the code array just contains the raw float entries.
24
+ */
25
+ struct IndexIVFFlat: IndexIVF {
26
+
27
+ IndexIVFFlat (
28
+ Index * quantizer, size_t d, size_t nlist_,
29
+ MetricType = METRIC_L2);
30
+
31
+ /// same as add_with_ids, with precomputed coarse quantizer
32
+ virtual void add_core (idx_t n, const float * x, const int64_t *xids,
33
+ const int64_t *precomputed_idx);
34
+
35
+ /// implemented for all IndexIVF* classes
36
+ void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
37
+
38
+ void encode_vectors(idx_t n, const float* x,
39
+ const idx_t *list_nos,
40
+ uint8_t * codes,
41
+ bool include_listnos=false) const override;
42
+
43
+
44
+ InvertedListScanner *get_InvertedListScanner (bool store_pairs)
45
+ const override;
46
+
47
+ /** Update a subset of vectors.
48
+ *
49
+ * The index must have a direct_map
50
+ *
51
+ * @param nv nb of vectors to update
52
+ * @param idx vector indices to update, size nv
53
+ * @param v vectors of new values, size nv*d
54
+ */
55
+ virtual void update_vectors (int nv, idx_t *idx, const float *v);
56
+
57
+ void reconstruct_from_offset (int64_t list_no, int64_t offset,
58
+ float* recons) const override;
59
+
60
+ void sa_decode (idx_t n, const uint8_t *bytes,
61
+ float *x) const override;
62
+
63
+ IndexIVFFlat () {}
64
+ };
65
+
66
+
67
+ struct IndexIVFFlatDedup: IndexIVFFlat {
68
+
69
+ /** Maps ids stored in the index to the ids of vectors that are
70
+ * the same. When a vector is unique, it does not appear in the
71
+ * instances map */
72
+ std::unordered_multimap <idx_t, idx_t> instances;
73
+
74
+ IndexIVFFlatDedup (
75
+ Index * quantizer, size_t d, size_t nlist_,
76
+ MetricType = METRIC_L2);
77
+
78
+ /// also dedups the training set
79
+ void train(idx_t n, const float* x) override;
80
+
81
+ /// implemented for all IndexIVF* classes
82
+ void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
83
+
84
+ void search_preassigned (idx_t n, const float *x, idx_t k,
85
+ const idx_t *assign,
86
+ const float *centroid_dis,
87
+ float *distances, idx_t *labels,
88
+ bool store_pairs,
89
+ const IVFSearchParameters *params=nullptr
90
+ ) const override;
91
+
92
+ size_t remove_ids(const IDSelector& sel) override;
93
+
94
+ /// not implemented
95
+ void range_search(
96
+ idx_t n,
97
+ const float* x,
98
+ float radius,
99
+ RangeSearchResult* result) const override;
100
+
101
+ /// not implemented
102
+ void update_vectors (int nv, idx_t *idx, const float *v) override;
103
+
104
+
105
+ /// not implemented
106
+ void reconstruct_from_offset (int64_t list_no, int64_t offset,
107
+ float* recons) const override;
108
+
109
+ IndexIVFFlatDedup () {}
110
+
111
+
112
+ };
113
+
114
+
115
+
116
+ } // namespace faiss
117
+
118
+ #endif
@@ -0,0 +1,1207 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexIVFPQ.h>
11
+
12
+ #include <cmath>
13
+ #include <cstdio>
14
+ #include <cassert>
15
+ #include <stdint.h>
16
+
17
+ #include <algorithm>
18
+
19
+ #include <faiss/utils/Heap.h>
20
+ #include <faiss/utils/utils.h>
21
+ #include <faiss/utils/distances.h>
22
+
23
+ #include <faiss/Clustering.h>
24
+ #include <faiss/IndexFlat.h>
25
+
26
+ #include <faiss/utils/hamming.h>
27
+
28
+ #include <faiss/impl/FaissAssert.h>
29
+
30
+ #include <faiss/impl/AuxIndexStructures.h>
31
+
32
+ namespace faiss {
33
+
34
+ /*****************************************
35
+ * IndexIVFPQ implementation
36
+ ******************************************/
37
+
38
+ IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
39
+ size_t M, size_t nbits_per_idx):
40
+ IndexIVF (quantizer, d, nlist, 0, METRIC_L2),
41
+ pq (d, M, nbits_per_idx)
42
+ {
43
+ FAISS_THROW_IF_NOT (nbits_per_idx <= 8);
44
+ code_size = pq.code_size;
45
+ invlists->code_size = code_size;
46
+ is_trained = false;
47
+ by_residual = true;
48
+ use_precomputed_table = 0;
49
+ scan_table_threshold = 0;
50
+
51
+ polysemous_training = nullptr;
52
+ do_polysemous_training = false;
53
+ polysemous_ht = 0;
54
+
55
+ }
56
+
57
+
58
+ /****************************************************************
59
+ * training */
60
+
61
+ void IndexIVFPQ::train_residual (idx_t n, const float *x)
62
+ {
63
+ train_residual_o (n, x, nullptr);
64
+ }
65
+
66
+
67
+ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
68
+ {
69
+ const float * x_in = x;
70
+
71
+ x = fvecs_maybe_subsample (
72
+ d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
73
+ x, verbose, pq.cp.seed);
74
+
75
+ ScopeDeleter<float> del_x (x_in == x ? nullptr : x);
76
+
77
+ const float *trainset;
78
+ ScopeDeleter<float> del_residuals;
79
+ if (by_residual) {
80
+ if(verbose) printf("computing residuals\n");
81
+ idx_t * assign = new idx_t [n]; // assignement to coarse centroids
82
+ ScopeDeleter<idx_t> del (assign);
83
+ quantizer->assign (n, x, assign);
84
+ float *residuals = new float [n * d];
85
+ del_residuals.set (residuals);
86
+ for (idx_t i = 0; i < n; i++)
87
+ quantizer->compute_residual (x + i * d, residuals+i*d, assign[i]);
88
+
89
+ trainset = residuals;
90
+ } else {
91
+ trainset = x;
92
+ }
93
+ if (verbose)
94
+ printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
95
+ pq.M, pq.ksub, n, d);
96
+ pq.verbose = verbose;
97
+ pq.train (n, trainset);
98
+
99
+ if (do_polysemous_training) {
100
+ if (verbose)
101
+ printf("doing polysemous training for PQ\n");
102
+ PolysemousTraining default_pt;
103
+ PolysemousTraining *pt = polysemous_training;
104
+ if (!pt) pt = &default_pt;
105
+ pt->optimize_pq_for_hamming (pq, n, trainset);
106
+ }
107
+
108
+ // prepare second-level residuals for refine PQ
109
+ if (residuals_2) {
110
+ uint8_t *train_codes = new uint8_t [pq.code_size * n];
111
+ ScopeDeleter<uint8_t> del (train_codes);
112
+ pq.compute_codes (trainset, train_codes, n);
113
+
114
+ for (idx_t i = 0; i < n; i++) {
115
+ const float *xx = trainset + i * d;
116
+ float * res = residuals_2 + i * d;
117
+ pq.decode (train_codes + i * pq.code_size, res);
118
+ for (int j = 0; j < d; j++)
119
+ res[j] = xx[j] - res[j];
120
+ }
121
+
122
+ }
123
+
124
+ if (by_residual) {
125
+ precompute_table ();
126
+ }
127
+
128
+ }
129
+
130
+
131
+
132
+
133
+
134
+
135
+ /****************************************************************
136
+ * IVFPQ as codec */
137
+
138
+
139
+ /* produce a binary signature based on the residual vector */
140
+ void IndexIVFPQ::encode (idx_t key, const float * x, uint8_t * code) const
141
+ {
142
+ if (by_residual) {
143
+ float residual_vec[d];
144
+ quantizer->compute_residual (x, residual_vec, key);
145
+ pq.compute_code (residual_vec, code);
146
+ }
147
+ else pq.compute_code (x, code);
148
+ }
149
+
150
+ void IndexIVFPQ::encode_multiple (size_t n, idx_t *keys,
151
+ const float * x, uint8_t * xcodes,
152
+ bool compute_keys) const
153
+ {
154
+ if (compute_keys)
155
+ quantizer->assign (n, x, keys);
156
+
157
+ encode_vectors (n, x, keys, xcodes);
158
+ }
159
+
160
+ void IndexIVFPQ::decode_multiple (size_t n, const idx_t *keys,
161
+ const uint8_t * xcodes, float * x) const
162
+ {
163
+ pq.decode (xcodes, x, n);
164
+ if (by_residual) {
165
+ std::vector<float> centroid (d);
166
+ for (size_t i = 0; i < n; i++) {
167
+ quantizer->reconstruct (keys[i], centroid.data());
168
+ float *xi = x + i * d;
169
+ for (size_t j = 0; j < d; j++) {
170
+ xi [j] += centroid [j];
171
+ }
172
+ }
173
+ }
174
+ }
175
+
176
+
177
+
178
+
179
+ /****************************************************************
180
+ * add */
181
+
182
+
183
+ void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const idx_t *xids)
184
+ {
185
+ add_core_o (n, x, xids, nullptr);
186
+ }
187
+
188
+
189
+ static float * compute_residuals (
190
+ const Index *quantizer,
191
+ Index::idx_t n, const float* x,
192
+ const Index::idx_t *list_nos)
193
+ {
194
+ size_t d = quantizer->d;
195
+ float *residuals = new float [n * d];
196
+ // TODO: parallelize?
197
+ for (size_t i = 0; i < n; i++) {
198
+ if (list_nos[i] < 0)
199
+ memset (residuals + i * d, 0, sizeof(*residuals) * d);
200
+ else
201
+ quantizer->compute_residual (
202
+ x + i * d, residuals + i * d, list_nos[i]);
203
+ }
204
+ return residuals;
205
+ }
206
+
207
+ void IndexIVFPQ::encode_vectors(idx_t n, const float* x,
208
+ const idx_t *list_nos,
209
+ uint8_t * codes,
210
+ bool include_listnos) const
211
+ {
212
+ if (by_residual) {
213
+ float *to_encode = compute_residuals (quantizer, n, x, list_nos);
214
+ ScopeDeleter<float> del (to_encode);
215
+ pq.compute_codes (to_encode, codes, n);
216
+ } else {
217
+ pq.compute_codes (x, codes, n);
218
+ }
219
+
220
+ if (include_listnos) {
221
+ size_t coarse_size = coarse_code_size();
222
+ for (idx_t i = n - 1; i >= 0; i--) {
223
+ uint8_t * code = codes + i * (coarse_size + code_size);
224
+ memmove (code + coarse_size,
225
+ codes + i * code_size, code_size);
226
+ encode_listno (list_nos[i], code);
227
+ }
228
+ }
229
+ }
230
+
231
+
232
+
233
+ void IndexIVFPQ::sa_decode (idx_t n, const uint8_t *codes,
234
+ float *x) const
235
+ {
236
+ size_t coarse_size = coarse_code_size ();
237
+
238
+ #pragma omp parallel
239
+ {
240
+ std::vector<float> residual (d);
241
+
242
+ #pragma omp for
243
+ for (size_t i = 0; i < n; i++) {
244
+ const uint8_t *code = codes + i * (code_size + coarse_size);
245
+ int64_t list_no = decode_listno (code);
246
+ float *xi = x + i * d;
247
+ pq.decode (code + coarse_size, xi);
248
+ if (by_residual) {
249
+ quantizer->reconstruct (list_no, residual.data());
250
+ for (size_t j = 0; j < d; j++) {
251
+ xi[j] += residual[j];
252
+ }
253
+ }
254
+ }
255
+ }
256
+ }
257
+
258
+
259
+ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
260
+ float *residuals_2, const idx_t *precomputed_idx)
261
+ {
262
+
263
+ idx_t bs = 32768;
264
+ if (n > bs) {
265
+ for (idx_t i0 = 0; i0 < n; i0 += bs) {
266
+ idx_t i1 = std::min(i0 + bs, n);
267
+ if (verbose) {
268
+ printf("IndexIVFPQ::add_core_o: adding %ld:%ld / %ld\n",
269
+ i0, i1, n);
270
+ }
271
+ add_core_o (i1 - i0, x + i0 * d,
272
+ xids ? xids + i0 : nullptr,
273
+ residuals_2 ? residuals_2 + i0 * d : nullptr,
274
+ precomputed_idx ? precomputed_idx + i0 : nullptr);
275
+ }
276
+ return;
277
+ }
278
+
279
+ InterruptCallback::check();
280
+
281
+ FAISS_THROW_IF_NOT (is_trained);
282
+ double t0 = getmillisecs ();
283
+ const idx_t * idx;
284
+ ScopeDeleter<idx_t> del_idx;
285
+
286
+ if (precomputed_idx) {
287
+ idx = precomputed_idx;
288
+ } else {
289
+ idx_t * idx0 = new idx_t [n];
290
+ del_idx.set (idx0);
291
+ quantizer->assign (n, x, idx0);
292
+ idx = idx0;
293
+ }
294
+
295
+ double t1 = getmillisecs ();
296
+ uint8_t * xcodes = new uint8_t [n * code_size];
297
+ ScopeDeleter<uint8_t> del_xcodes (xcodes);
298
+
299
+ const float *to_encode = nullptr;
300
+ ScopeDeleter<float> del_to_encode;
301
+
302
+ if (by_residual) {
303
+ to_encode = compute_residuals (quantizer, n, x, idx);
304
+ del_to_encode.set (to_encode);
305
+ } else {
306
+ to_encode = x;
307
+ }
308
+ pq.compute_codes (to_encode, xcodes, n);
309
+
310
+ double t2 = getmillisecs ();
311
+ // TODO: parallelize?
312
+ size_t n_ignore = 0;
313
+ for (size_t i = 0; i < n; i++) {
314
+ idx_t key = idx[i];
315
+ if (key < 0) {
316
+ n_ignore ++;
317
+ if (residuals_2)
318
+ memset (residuals_2, 0, sizeof(*residuals_2) * d);
319
+ continue;
320
+ }
321
+ idx_t id = xids ? xids[i] : ntotal + i;
322
+
323
+ uint8_t *code = xcodes + i * code_size;
324
+ size_t offset = invlists->add_entry (key, id, code);
325
+
326
+ if (residuals_2) {
327
+ float *res2 = residuals_2 + i * d;
328
+ const float *xi = to_encode + i * d;
329
+ pq.decode (code, res2);
330
+ for (int j = 0; j < d; j++)
331
+ res2[j] = xi[j] - res2[j];
332
+ }
333
+
334
+ if (maintain_direct_map)
335
+ direct_map.push_back (key << 32 | offset);
336
+ }
337
+
338
+
339
+ double t3 = getmillisecs ();
340
+ if(verbose) {
341
+ char comment[100] = {0};
342
+ if (n_ignore > 0)
343
+ snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
344
+ printf(" add_core times: %.3f %.3f %.3f %s\n",
345
+ t1 - t0, t2 - t1, t3 - t2, comment);
346
+ }
347
+ ntotal += n;
348
+ }
349
+
350
+
351
+ void IndexIVFPQ::reconstruct_from_offset (int64_t list_no, int64_t offset,
352
+ float* recons) const
353
+ {
354
+ const uint8_t* code = invlists->get_single_code (list_no, offset);
355
+
356
+ if (by_residual) {
357
+ std::vector<float> centroid(d);
358
+ quantizer->reconstruct (list_no, centroid.data());
359
+
360
+ pq.decode (code, recons);
361
+ for (int i = 0; i < d; ++i) {
362
+ recons[i] += centroid[i];
363
+ }
364
+ } else {
365
+ pq.decode (code, recons);
366
+ }
367
+ }
368
+
369
+
370
+
371
+ /// 2G by default, accommodates tables up to PQ32 w/ 65536 centroids
372
+ size_t IndexIVFPQ::precomputed_table_max_bytes = ((size_t)1) << 31;
373
+
374
+ /** Precomputed tables for residuals
375
+ *
376
+ * During IVFPQ search with by_residual, we compute
377
+ *
378
+ * d = || x - y_C - y_R ||^2
379
+ *
380
+ * where x is the query vector, y_C the coarse centroid, y_R the
381
+ * refined PQ centroid. The expression can be decomposed as:
382
+ *
383
+ * d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
384
+ * --------------- --------------------------- -------
385
+ * term 1 term 2 term 3
386
+ *
387
+ * When using multiprobe, we use the following decomposition:
388
+ * - term 1 is the distance to the coarse centroid, that is computed
389
+ * during the 1st stage search.
390
+ * - term 2 can be precomputed, as it does not involve x. However,
391
+ * because of the PQ, it needs nlist * M * ksub storage. This is why
392
+ * use_precomputed_table is off by default
393
+ * - term 3 is the classical non-residual distance table.
394
+ *
395
+ * Since y_R defined by a product quantizer, it is split across
396
+ * subvectors and stored separately for each subvector. If the coarse
397
+ * quantizer is a MultiIndexQuantizer then the table can be stored
398
+ * more compactly.
399
+ *
400
+ * At search time, the tables for term 2 and term 3 are added up. This
401
+ * is faster when the length of the lists is > ksub * M.
402
+ */
403
+
404
+ void IndexIVFPQ::precompute_table ()
405
+ {
406
+ if (use_precomputed_table == -1)
407
+ return;
408
+
409
+ if (use_precomputed_table == 0) { // then choose the type of table
410
+ if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
411
+ if (verbose) {
412
+ printf("IndexIVFPQ::precompute_table: precomputed "
413
+ "tables not needed for inner product quantizers\n");
414
+ }
415
+ return;
416
+ }
417
+ const MultiIndexQuantizer *miq =
418
+ dynamic_cast<const MultiIndexQuantizer *> (quantizer);
419
+ if (miq && pq.M % miq->pq.M == 0)
420
+ use_precomputed_table = 2;
421
+ else {
422
+ size_t table_size = pq.M * pq.ksub * nlist * sizeof(float);
423
+ if (table_size > precomputed_table_max_bytes) {
424
+ if (verbose) {
425
+ printf(
426
+ "IndexIVFPQ::precompute_table: not precomputing table, "
427
+ "it would be too big: %ld bytes (max %ld)\n",
428
+ table_size, precomputed_table_max_bytes);
429
+ use_precomputed_table = 0;
430
+ }
431
+ return;
432
+ }
433
+ use_precomputed_table = 1;
434
+ }
435
+ } // otherwise assume user has set appropriate flag on input
436
+
437
+ if (verbose) {
438
+ printf ("precomputing IVFPQ tables type %d\n",
439
+ use_precomputed_table);
440
+ }
441
+
442
+ // squared norms of the PQ centroids
443
+ std::vector<float> r_norms (pq.M * pq.ksub, NAN);
444
+ for (int m = 0; m < pq.M; m++)
445
+ for (int j = 0; j < pq.ksub; j++)
446
+ r_norms [m * pq.ksub + j] =
447
+ fvec_norm_L2sqr (pq.get_centroids (m, j), pq.dsub);
448
+
449
+ if (use_precomputed_table == 1) {
450
+
451
+ precomputed_table.resize (nlist * pq.M * pq.ksub);
452
+ std::vector<float> centroid (d);
453
+
454
+ for (size_t i = 0; i < nlist; i++) {
455
+ quantizer->reconstruct (i, centroid.data());
456
+
457
+ float *tab = &precomputed_table[i * pq.M * pq.ksub];
458
+ pq.compute_inner_prod_table (centroid.data(), tab);
459
+ fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
460
+ }
461
+ } else if (use_precomputed_table == 2) {
462
+ const MultiIndexQuantizer *miq =
463
+ dynamic_cast<const MultiIndexQuantizer *> (quantizer);
464
+ FAISS_THROW_IF_NOT (miq);
465
+ const ProductQuantizer &cpq = miq->pq;
466
+ FAISS_THROW_IF_NOT (pq.M % cpq.M == 0);
467
+
468
+ precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
469
+
470
+ // reorder PQ centroid table
471
+ std::vector<float> centroids (d * cpq.ksub, NAN);
472
+
473
+ for (int m = 0; m < cpq.M; m++) {
474
+ for (size_t i = 0; i < cpq.ksub; i++) {
475
+ memcpy (centroids.data() + i * d + m * cpq.dsub,
476
+ cpq.get_centroids (m, i),
477
+ sizeof (*centroids.data()) * cpq.dsub);
478
+ }
479
+ }
480
+
481
+ pq.compute_inner_prod_tables (cpq.ksub, centroids.data (),
482
+ precomputed_table.data ());
483
+
484
+ for (size_t i = 0; i < cpq.ksub; i++) {
485
+ float *tab = &precomputed_table[i * pq.M * pq.ksub];
486
+ fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
487
+ }
488
+
489
+ }
490
+
491
+ }
492
+
493
+ namespace {
494
+
495
+ using idx_t = Index::idx_t;
496
+
497
+
498
+ #define TIC t0 = get_cycles()
499
+ #define TOC get_cycles () - t0
500
+
501
+
502
+
503
+ /** QueryTables manages the various ways of searching an
504
+ * IndexIVFPQ. The code contains a lot of branches, depending on:
505
+ * - metric_type: are we computing L2 or Inner product similarity?
506
+ * - by_residual: do we encode raw vectors or residuals?
507
+ * - use_precomputed_table: are x_R|x_C tables precomputed?
508
+ * - polysemous_ht: are we filtering with polysemous codes?
509
+ */
510
+ struct QueryTables {
511
+
512
+ /*****************************************************
513
+ * General data from the IVFPQ
514
+ *****************************************************/
515
+
516
+ const IndexIVFPQ & ivfpq;
517
+ const IVFSearchParameters *params;
518
+
519
+ // copied from IndexIVFPQ for easier access
520
+ int d;
521
+ const ProductQuantizer & pq;
522
+ MetricType metric_type;
523
+ bool by_residual;
524
+ int use_precomputed_table;
525
+ int polysemous_ht;
526
+
527
+ // pre-allocated data buffers
528
+ float * sim_table, * sim_table_2;
529
+ float * residual_vec, *decoded_vec;
530
+
531
+ // single data buffer
532
+ std::vector<float> mem;
533
+
534
+ // for table pointers
535
+ std::vector<const float *> sim_table_ptrs;
536
+
537
+ explicit QueryTables (const IndexIVFPQ & ivfpq,
538
+ const IVFSearchParameters *params):
539
+ ivfpq(ivfpq),
540
+ d(ivfpq.d),
541
+ pq (ivfpq.pq),
542
+ metric_type (ivfpq.metric_type),
543
+ by_residual (ivfpq.by_residual),
544
+ use_precomputed_table (ivfpq.use_precomputed_table)
545
+ {
546
+ mem.resize (pq.ksub * pq.M * 2 + d * 2);
547
+ sim_table = mem.data ();
548
+ sim_table_2 = sim_table + pq.ksub * pq.M;
549
+ residual_vec = sim_table_2 + pq.ksub * pq.M;
550
+ decoded_vec = residual_vec + d;
551
+
552
+ // for polysemous
553
+ polysemous_ht = ivfpq.polysemous_ht;
554
+ if (auto ivfpq_params =
555
+ dynamic_cast<const IVFPQSearchParameters *>(params)) {
556
+ polysemous_ht = ivfpq_params->polysemous_ht;
557
+ }
558
+ if (polysemous_ht != 0) {
559
+ q_code.resize (pq.code_size);
560
+ }
561
+ init_list_cycles = 0;
562
+ sim_table_ptrs.resize (pq.M);
563
+ }
564
+
565
+ /*****************************************************
566
+ * What we do when query is known
567
+ *****************************************************/
568
+
569
+ // field specific to query
570
+ const float * qi;
571
+
572
+ // query-specific intialization
573
+ void init_query (const float * qi) {
574
+ this->qi = qi;
575
+ if (metric_type == METRIC_INNER_PRODUCT)
576
+ init_query_IP ();
577
+ else
578
+ init_query_L2 ();
579
+ if (!by_residual && polysemous_ht != 0)
580
+ pq.compute_code (qi, q_code.data());
581
+ }
582
+
583
+ void init_query_IP () {
584
+ // precompute some tables specific to the query qi
585
+ pq.compute_inner_prod_table (qi, sim_table);
586
+ }
587
+
588
+ void init_query_L2 () {
589
+ if (!by_residual) {
590
+ pq.compute_distance_table (qi, sim_table);
591
+ } else if (use_precomputed_table) {
592
+ pq.compute_inner_prod_table (qi, sim_table_2);
593
+ }
594
+ }
595
+
596
+ /*****************************************************
597
+ * When inverted list is known: prepare computations
598
+ *****************************************************/
599
+
600
+ // fields specific to list
601
+ Index::idx_t key;
602
+ float coarse_dis;
603
+ std::vector<uint8_t> q_code;
604
+
605
+ uint64_t init_list_cycles;
606
+
607
+ /// once we know the query and the centroid, we can prepare the
608
+ /// sim_table that will be used for accumulation
609
+ /// and dis0, the initial value
610
+ float precompute_list_tables () {
611
+ float dis0 = 0;
612
+ uint64_t t0; TIC;
613
+ if (by_residual) {
614
+ if (metric_type == METRIC_INNER_PRODUCT)
615
+ dis0 = precompute_list_tables_IP ();
616
+ else
617
+ dis0 = precompute_list_tables_L2 ();
618
+ }
619
+ init_list_cycles += TOC;
620
+ return dis0;
621
+ }
622
+
623
+ float precompute_list_table_pointers () {
624
+ float dis0 = 0;
625
+ uint64_t t0; TIC;
626
+ if (by_residual) {
627
+ if (metric_type == METRIC_INNER_PRODUCT)
628
+ FAISS_THROW_MSG ("not implemented");
629
+ else
630
+ dis0 = precompute_list_table_pointers_L2 ();
631
+ }
632
+ init_list_cycles += TOC;
633
+ return dis0;
634
+ }
635
+
636
+ /*****************************************************
637
+ * compute tables for inner prod
638
+ *****************************************************/
639
+
640
+ float precompute_list_tables_IP ()
641
+ {
642
+ // prepare the sim_table that will be used for accumulation
643
+ // and dis0, the initial value
644
+ ivfpq.quantizer->reconstruct (key, decoded_vec);
645
+ // decoded_vec = centroid
646
+ float dis0 = fvec_inner_product (qi, decoded_vec, d);
647
+
648
+ if (polysemous_ht) {
649
+ for (int i = 0; i < d; i++) {
650
+ residual_vec [i] = qi[i] - decoded_vec[i];
651
+ }
652
+ pq.compute_code (residual_vec, q_code.data());
653
+ }
654
+ return dis0;
655
+ }
656
+
657
+
658
+ /*****************************************************
659
+ * compute tables for L2 distance
660
+ *****************************************************/
661
+
662
+ float precompute_list_tables_L2 ()
663
+ {
664
+ float dis0 = 0;
665
+
666
+ if (use_precomputed_table == 0 || use_precomputed_table == -1) {
667
+ ivfpq.quantizer->compute_residual (qi, residual_vec, key);
668
+ pq.compute_distance_table (residual_vec, sim_table);
669
+
670
+ if (polysemous_ht != 0) {
671
+ pq.compute_code (residual_vec, q_code.data());
672
+ }
673
+
674
+ } else if (use_precomputed_table == 1) {
675
+ dis0 = coarse_dis;
676
+
677
+ fvec_madd (pq.M * pq.ksub,
678
+ &ivfpq.precomputed_table [key * pq.ksub * pq.M],
679
+ -2.0, sim_table_2,
680
+ sim_table);
681
+
682
+
683
+ if (polysemous_ht != 0) {
684
+ ivfpq.quantizer->compute_residual (qi, residual_vec, key);
685
+ pq.compute_code (residual_vec, q_code.data());
686
+ }
687
+
688
+ } else if (use_precomputed_table == 2) {
689
+ dis0 = coarse_dis;
690
+
691
+ const MultiIndexQuantizer *miq =
692
+ dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
693
+ FAISS_THROW_IF_NOT (miq);
694
+ const ProductQuantizer &cpq = miq->pq;
695
+ int Mf = pq.M / cpq.M;
696
+
697
+ const float *qtab = sim_table_2; // query-specific table
698
+ float *ltab = sim_table; // (output) list-specific table
699
+
700
+ long k = key;
701
+ for (int cm = 0; cm < cpq.M; cm++) {
702
+ // compute PQ index
703
+ int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
704
+ k >>= cpq.nbits;
705
+
706
+ // get corresponding table
707
+ const float *pc = &ivfpq.precomputed_table
708
+ [(ki * pq.M + cm * Mf) * pq.ksub];
709
+
710
+ if (polysemous_ht == 0) {
711
+
712
+ // sum up with query-specific table
713
+ fvec_madd (Mf * pq.ksub,
714
+ pc,
715
+ -2.0, qtab,
716
+ ltab);
717
+ ltab += Mf * pq.ksub;
718
+ qtab += Mf * pq.ksub;
719
+ } else {
720
+ for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
721
+ q_code[m] = fvec_madd_and_argmin
722
+ (pq.ksub, pc, -2, qtab, ltab);
723
+ pc += pq.ksub;
724
+ ltab += pq.ksub;
725
+ qtab += pq.ksub;
726
+ }
727
+ }
728
+
729
+ }
730
+ }
731
+
732
+ return dis0;
733
+ }
734
+
735
+ float precompute_list_table_pointers_L2 ()
736
+ {
737
+ float dis0 = 0;
738
+
739
+ if (use_precomputed_table == 1) {
740
+ dis0 = coarse_dis;
741
+
742
+ const float * s = &ivfpq.precomputed_table [key * pq.ksub * pq.M];
743
+ for (int m = 0; m < pq.M; m++) {
744
+ sim_table_ptrs [m] = s;
745
+ s += pq.ksub;
746
+ }
747
+ } else if (use_precomputed_table == 2) {
748
+ dis0 = coarse_dis;
749
+
750
+ const MultiIndexQuantizer *miq =
751
+ dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
752
+ FAISS_THROW_IF_NOT (miq);
753
+ const ProductQuantizer &cpq = miq->pq;
754
+ int Mf = pq.M / cpq.M;
755
+
756
+ long k = key;
757
+ int m0 = 0;
758
+ for (int cm = 0; cm < cpq.M; cm++) {
759
+ int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
760
+ k >>= cpq.nbits;
761
+
762
+ const float *pc = &ivfpq.precomputed_table
763
+ [(ki * pq.M + cm * Mf) * pq.ksub];
764
+
765
+ for (int m = m0; m < m0 + Mf; m++) {
766
+ sim_table_ptrs [m] = pc;
767
+ pc += pq.ksub;
768
+ }
769
+ m0 += Mf;
770
+ }
771
+ } else {
772
+ FAISS_THROW_MSG ("need precomputed tables");
773
+ }
774
+
775
+ if (polysemous_ht) {
776
+ FAISS_THROW_MSG ("not implemented");
777
+ // Not clear that it makes sense to implemente this,
778
+ // because it costs M * ksub, which is what we wanted to
779
+ // avoid with the tables pointers.
780
+ }
781
+
782
+ return dis0;
783
+ }
784
+
785
+
786
+ };
787
+
788
+
789
+
790
+ template<class C>
791
+ struct KnnSearchResults {
792
+ idx_t key;
793
+ const idx_t *ids;
794
+
795
+ // heap params
796
+ size_t k;
797
+ float * heap_sim;
798
+ idx_t * heap_ids;
799
+
800
+ size_t nup;
801
+
802
+ inline void add (idx_t j, float dis) {
803
+ if (C::cmp (heap_sim[0], dis)) {
804
+ heap_pop<C> (k, heap_sim, heap_ids);
805
+ idx_t id = ids ? ids[j] : (key << 32 | j);
806
+ heap_push<C> (k, heap_sim, heap_ids, dis, id);
807
+ nup++;
808
+ }
809
+ }
810
+
811
+ };
812
+
813
+ template<class C>
814
+ struct RangeSearchResults {
815
+ idx_t key;
816
+ const idx_t *ids;
817
+
818
+ // wrapped result structure
819
+ float radius;
820
+ RangeQueryResult & rres;
821
+
822
+ inline void add (idx_t j, float dis) {
823
+ if (C::cmp (radius, dis)) {
824
+ idx_t id = ids ? ids[j] : (key << 32 | j);
825
+ rres.add (dis, id);
826
+ }
827
+ }
828
+ };
829
+
830
+
831
+
832
+ /*****************************************************
833
+ * Scaning the codes.
834
+ * The scanning functions call their favorite precompute_*
835
+ * function to precompute the tables they need.
836
+ *****************************************************/
837
+ template <typename IDType, MetricType METRIC_TYPE>
838
+ struct IVFPQScannerT: QueryTables {
839
+
840
+ const uint8_t * list_codes;
841
+ const IDType * list_ids;
842
+ size_t list_size;
843
+
844
+ IVFPQScannerT (const IndexIVFPQ & ivfpq, const IVFSearchParameters *params):
845
+ QueryTables (ivfpq, params)
846
+ {
847
+ FAISS_THROW_IF_NOT (pq.nbits == 8);
848
+ assert(METRIC_TYPE == metric_type);
849
+ }
850
+
851
+ float dis0;
852
+
853
+ void init_list (idx_t list_no, float coarse_dis,
854
+ int mode) {
855
+ this->key = list_no;
856
+ this->coarse_dis = coarse_dis;
857
+
858
+ if (mode == 2) {
859
+ dis0 = precompute_list_tables ();
860
+ } else if (mode == 1) {
861
+ dis0 = precompute_list_table_pointers ();
862
+ }
863
+ }
864
+
865
+ /*****************************************************
866
+ * Scaning the codes: simple PQ scan.
867
+ *****************************************************/
868
+
869
+ /// version of the scan where we use precomputed tables
870
+ template<class SearchResultType>
871
+ void scan_list_with_table (size_t ncode, const uint8_t *codes,
872
+ SearchResultType & res) const
873
+ {
874
+ for (size_t j = 0; j < ncode; j++) {
875
+
876
+ float dis = dis0;
877
+ const float *tab = sim_table;
878
+
879
+ for (size_t m = 0; m < pq.M; m++) {
880
+ dis += tab[*codes++];
881
+ tab += pq.ksub;
882
+ }
883
+
884
+ res.add(j, dis);
885
+ }
886
+ }
887
+
888
+
889
+ /// tables are not precomputed, but pointers are provided to the
890
+ /// relevant X_c|x_r tables
891
+ template<class SearchResultType>
892
+ void scan_list_with_pointer (size_t ncode, const uint8_t *codes,
893
+ SearchResultType & res) const
894
+ {
895
+ for (size_t j = 0; j < ncode; j++) {
896
+
897
+ float dis = dis0;
898
+ const float *tab = sim_table_2;
899
+
900
+ for (size_t m = 0; m < pq.M; m++) {
901
+ int ci = *codes++;
902
+ dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
903
+ tab += pq.ksub;
904
+ }
905
+ res.add (j, dis);
906
+ }
907
+ }
908
+
909
+
910
+ /// nothing is precomputed: access residuals on-the-fly
911
+ template<class SearchResultType>
912
+ void scan_on_the_fly_dist (size_t ncode, const uint8_t *codes,
913
+ SearchResultType &res) const
914
+ {
915
+ const float *dvec;
916
+ float dis0 = 0;
917
+ if (by_residual) {
918
+ if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
919
+ ivfpq.quantizer->reconstruct (key, residual_vec);
920
+ dis0 = fvec_inner_product (residual_vec, qi, d);
921
+ } else {
922
+ ivfpq.quantizer->compute_residual (qi, residual_vec, key);
923
+ }
924
+ dvec = residual_vec;
925
+ } else {
926
+ dvec = qi;
927
+ dis0 = 0;
928
+ }
929
+
930
+ for (size_t j = 0; j < ncode; j++) {
931
+
932
+ pq.decode (codes, decoded_vec);
933
+ codes += pq.code_size;
934
+
935
+ float dis;
936
+ if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
937
+ dis = dis0 + fvec_inner_product (decoded_vec, qi, d);
938
+ } else {
939
+ dis = fvec_L2sqr (decoded_vec, dvec, d);
940
+ }
941
+ res.add (j, dis);
942
+ }
943
+ }
944
+
945
+ /*****************************************************
946
+ * Scanning codes with polysemous filtering
947
+ *****************************************************/
948
+
949
+ template <class HammingComputer, class SearchResultType>
950
+ void scan_list_polysemous_hc (
951
+ size_t ncode, const uint8_t *codes,
952
+ SearchResultType & res) const
953
+ {
954
+ int ht = ivfpq.polysemous_ht;
955
+ size_t n_hamming_pass = 0, nup = 0;
956
+
957
+ int code_size = pq.code_size;
958
+
959
+ HammingComputer hc (q_code.data(), code_size);
960
+
961
+ for (size_t j = 0; j < ncode; j++) {
962
+ const uint8_t *b_code = codes;
963
+ int hd = hc.hamming (b_code);
964
+ if (hd < ht) {
965
+ n_hamming_pass ++;
966
+
967
+ float dis = dis0;
968
+ const float *tab = sim_table;
969
+
970
+ for (size_t m = 0; m < pq.M; m++) {
971
+ dis += tab[*b_code++];
972
+ tab += pq.ksub;
973
+ }
974
+
975
+ res.add (j, dis);
976
+ }
977
+ codes += code_size;
978
+ }
979
+ #pragma omp critical
980
+ {
981
+ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass;
982
+ }
983
+ }
984
+
985
+ template<class SearchResultType>
986
+ void scan_list_polysemous (
987
+ size_t ncode, const uint8_t *codes,
988
+ SearchResultType &res) const
989
+ {
990
+ switch (pq.code_size) {
991
+ #define HANDLE_CODE_SIZE(cs) \
992
+ case cs: \
993
+ scan_list_polysemous_hc \
994
+ <HammingComputer ## cs, SearchResultType> \
995
+ (ncode, codes, res); \
996
+ break
997
+ HANDLE_CODE_SIZE(4);
998
+ HANDLE_CODE_SIZE(8);
999
+ HANDLE_CODE_SIZE(16);
1000
+ HANDLE_CODE_SIZE(20);
1001
+ HANDLE_CODE_SIZE(32);
1002
+ HANDLE_CODE_SIZE(64);
1003
+ #undef HANDLE_CODE_SIZE
1004
+ default:
1005
+ if (pq.code_size % 8 == 0)
1006
+ scan_list_polysemous_hc
1007
+ <HammingComputerM8, SearchResultType>
1008
+ (ncode, codes, res);
1009
+ else
1010
+ scan_list_polysemous_hc
1011
+ <HammingComputerM4, SearchResultType>
1012
+ (ncode, codes, res);
1013
+ break;
1014
+ }
1015
+ }
1016
+
1017
+ };
1018
+
1019
+
1020
+ /* We put as many parameters as possible in template. Hopefully the
1021
+ * gain in runtime is worth the code bloat. C is the comparator < or
1022
+ * >, it is directly related to METRIC_TYPE. precompute_mode is how
1023
+ * much we precompute (2 = precompute distance tables, 1 = precompute
1024
+ * pointers to distances, 0 = compute distances one by one).
1025
+ * Currently only 2 is supported */
1026
+ template<MetricType METRIC_TYPE, class C, int precompute_mode>
1027
+ struct IVFPQScanner:
1028
+ IVFPQScannerT<Index::idx_t, METRIC_TYPE>,
1029
+ InvertedListScanner
1030
+ {
1031
+ bool store_pairs;
1032
+
1033
+ IVFPQScanner(const IndexIVFPQ & ivfpq, bool store_pairs):
1034
+ IVFPQScannerT<Index::idx_t, METRIC_TYPE>(ivfpq, nullptr),
1035
+ store_pairs(store_pairs)
1036
+ {
1037
+ }
1038
+
1039
+ void set_query (const float *query) override {
1040
+ this->init_query (query);
1041
+ }
1042
+
1043
+ void set_list (idx_t list_no, float coarse_dis) override {
1044
+ this->init_list (list_no, coarse_dis, precompute_mode);
1045
+ }
1046
+
1047
+ float distance_to_code (const uint8_t *code) const override {
1048
+ assert(precompute_mode == 2);
1049
+ float dis = this->dis0;
1050
+ const float *tab = this->sim_table;
1051
+
1052
+ for (size_t m = 0; m < this->pq.M; m++) {
1053
+ dis += tab[*code++];
1054
+ tab += this->pq.ksub;
1055
+ }
1056
+ return dis;
1057
+ }
1058
+
1059
+ size_t scan_codes (size_t ncode,
1060
+ const uint8_t *codes,
1061
+ const idx_t *ids,
1062
+ float *heap_sim, idx_t *heap_ids,
1063
+ size_t k) const override
1064
+ {
1065
+ KnnSearchResults<C> res = {
1066
+ /* key */ this->key,
1067
+ /* ids */ this->store_pairs ? nullptr : ids,
1068
+ /* k */ k,
1069
+ /* heap_sim */ heap_sim,
1070
+ /* heap_ids */ heap_ids,
1071
+ /* nup */ 0
1072
+ };
1073
+
1074
+ if (this->polysemous_ht > 0) {
1075
+ assert(precompute_mode == 2);
1076
+ this->scan_list_polysemous (ncode, codes, res);
1077
+ } else if (precompute_mode == 2) {
1078
+ this->scan_list_with_table (ncode, codes, res);
1079
+ } else if (precompute_mode == 1) {
1080
+ this->scan_list_with_pointer (ncode, codes, res);
1081
+ } else if (precompute_mode == 0) {
1082
+ this->scan_on_the_fly_dist (ncode, codes, res);
1083
+ } else {
1084
+ FAISS_THROW_MSG("bad precomp mode");
1085
+ }
1086
+ return res.nup;
1087
+ }
1088
+
1089
+ void scan_codes_range (size_t ncode,
1090
+ const uint8_t *codes,
1091
+ const idx_t *ids,
1092
+ float radius,
1093
+ RangeQueryResult & rres) const override
1094
+ {
1095
+ RangeSearchResults<C> res = {
1096
+ /* key */ this->key,
1097
+ /* ids */ this->store_pairs ? nullptr : ids,
1098
+ /* radius */ radius,
1099
+ /* rres */ rres
1100
+ };
1101
+
1102
+ if (this->polysemous_ht > 0) {
1103
+ assert(precompute_mode == 2);
1104
+ this->scan_list_polysemous (ncode, codes, res);
1105
+ } else if (precompute_mode == 2) {
1106
+ this->scan_list_with_table (ncode, codes, res);
1107
+ } else if (precompute_mode == 1) {
1108
+ this->scan_list_with_pointer (ncode, codes, res);
1109
+ } else if (precompute_mode == 0) {
1110
+ this->scan_on_the_fly_dist (ncode, codes, res);
1111
+ } else {
1112
+ FAISS_THROW_MSG("bad precomp mode");
1113
+ }
1114
+
1115
+ }
1116
+ };
1117
+
1118
+
1119
+
1120
+
1121
+ } // anonymous namespace
1122
+
1123
+ InvertedListScanner *
1124
+ IndexIVFPQ::get_InvertedListScanner (bool store_pairs) const
1125
+ {
1126
+ if (metric_type == METRIC_INNER_PRODUCT) {
1127
+ return new IVFPQScanner<METRIC_INNER_PRODUCT, CMin<float, idx_t>, 2>
1128
+ (*this, store_pairs);
1129
+ } else if (metric_type == METRIC_L2) {
1130
+ return new IVFPQScanner<METRIC_L2, CMax<float, idx_t>, 2>
1131
+ (*this, store_pairs);
1132
+ }
1133
+ return nullptr;
1134
+
1135
+ }
1136
+
1137
+
1138
+
1139
+ IndexIVFPQStats indexIVFPQ_stats;
1140
+
1141
+ void IndexIVFPQStats::reset () {
1142
+ memset (this, 0, sizeof (*this));
1143
+ }
1144
+
1145
+
1146
+
1147
+ IndexIVFPQ::IndexIVFPQ ()
1148
+ {
1149
+ // initialize some runtime values
1150
+ use_precomputed_table = 0;
1151
+ scan_table_threshold = 0;
1152
+ do_polysemous_training = false;
1153
+ polysemous_ht = 0;
1154
+ polysemous_training = nullptr;
1155
+ }
1156
+
1157
+
1158
+ struct CodeCmp {
1159
+ const uint8_t *tab;
1160
+ size_t code_size;
1161
+ bool operator () (int a, int b) const {
1162
+ return cmp (a, b) > 0;
1163
+ }
1164
+ int cmp (int a, int b) const {
1165
+ return memcmp (tab + a * code_size, tab + b * code_size,
1166
+ code_size);
1167
+ }
1168
+ };
1169
+
1170
+
1171
+ size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
1172
+ {
1173
+ size_t ngroup = 0;
1174
+ lims[0] = 0;
1175
+ for (size_t list_no = 0; list_no < nlist; list_no++) {
1176
+ size_t n = invlists->list_size (list_no);
1177
+ std::vector<int> ord (n);
1178
+ for (int i = 0; i < n; i++) ord[i] = i;
1179
+ InvertedLists::ScopedCodes codes (invlists, list_no);
1180
+ CodeCmp cs = { codes.get(), code_size };
1181
+ std::sort (ord.begin(), ord.end(), cs);
1182
+
1183
+ InvertedLists::ScopedIds list_ids (invlists, list_no);
1184
+ int prev = -1; // all elements from prev to i-1 are equal
1185
+ for (int i = 0; i < n; i++) {
1186
+ if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
1187
+ // same as previous => remember
1188
+ if (prev + 1 == i) { // start new group
1189
+ ngroup++;
1190
+ lims[ngroup] = lims[ngroup - 1];
1191
+ dup_ids [lims [ngroup]++] = list_ids [ord [prev]];
1192
+ }
1193
+ dup_ids [lims [ngroup]++] = list_ids [ord [i]];
1194
+ } else { // not same as previous.
1195
+ prev = i;
1196
+ }
1197
+ }
1198
+ }
1199
+ return ngroup;
1200
+ }
1201
+
1202
+
1203
+
1204
+
1205
+
1206
+
1207
+ } // namespace faiss