faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+
12
+ #include <gtest/gtest.h>
13
+
14
+ #include <faiss/IndexIVFPQ.h>
15
+ #include <faiss/IndexFlat.h>
16
+ #include <faiss/index_io.h>
17
+
18
+ TEST(IVFPQ, accuracy) {
19
+
20
+ // dimension of the vectors to index
21
+ int d = 64;
22
+
23
+ // size of the database we plan to index
24
+ size_t nb = 1000;
25
+
26
+ // make a set of nt training vectors in the unit cube
27
+ // (could be the database)
28
+ size_t nt = 1500;
29
+
30
+ // make the index object and train it
31
+ faiss::IndexFlatL2 coarse_quantizer (d);
32
+
33
+ // a reasonable number of cetroids to index nb vectors
34
+ int ncentroids = 25;
35
+
36
+ faiss::IndexIVFPQ index (&coarse_quantizer, d,
37
+ ncentroids, 16, 8);
38
+
39
+ // index that gives the ground-truth
40
+ faiss::IndexFlatL2 index_gt (d);
41
+
42
+ srand48 (35);
43
+
44
+ { // training
45
+
46
+ std::vector <float> trainvecs (nt * d);
47
+ for (size_t i = 0; i < nt * d; i++) {
48
+ trainvecs[i] = drand48();
49
+ }
50
+ index.verbose = true;
51
+ index.train (nt, trainvecs.data());
52
+ }
53
+
54
+ { // populating the database
55
+
56
+ std::vector <float> database (nb * d);
57
+ for (size_t i = 0; i < nb * d; i++) {
58
+ database[i] = drand48();
59
+ }
60
+
61
+ index.add (nb, database.data());
62
+ index_gt.add (nb, database.data());
63
+ }
64
+
65
+ int nq = 200;
66
+ int n_ok;
67
+
68
+ { // searching the database
69
+
70
+ std::vector <float> queries (nq * d);
71
+ for (size_t i = 0; i < nq * d; i++) {
72
+ queries[i] = drand48();
73
+ }
74
+
75
+ std::vector<faiss::Index::idx_t> gt_nns (nq);
76
+ std::vector<float> gt_dis (nq);
77
+
78
+ index_gt.search (nq, queries.data(), 1,
79
+ gt_dis.data(), gt_nns.data());
80
+
81
+ index.nprobe = 5;
82
+ int k = 5;
83
+ std::vector<faiss::Index::idx_t> nns (k * nq);
84
+ std::vector<float> dis (k * nq);
85
+
86
+ index.search (nq, queries.data(), k, dis.data(), nns.data());
87
+
88
+ n_ok = 0;
89
+ for (int q = 0; q < nq; q++) {
90
+
91
+ for (int i = 0; i < k; i++)
92
+ if (nns[q * k + i] == gt_nns[q])
93
+ n_ok++;
94
+ }
95
+ EXPECT_GT(n_ok, nq * 0.4);
96
+ }
97
+
98
+ }
@@ -0,0 +1,566 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #include <memory>
12
+ #include <vector>
13
+ #include <thread>
14
+
15
+ #include <gtest/gtest.h>
16
+
17
+ #include <faiss/IndexIVF.h>
18
+ #include <faiss/IndexBinaryIVF.h>
19
+ #include <faiss/IndexPreTransform.h>
20
+ #include <faiss/AutoTune.h>
21
+ #include <faiss/index_factory.h>
22
+ #include <faiss/index_io.h>
23
+ #include <faiss/IVFlib.h>
24
+ #include <faiss/VectorTransform.h>
25
+
26
+ using namespace faiss;
27
+
28
+ namespace {
29
+
30
+ typedef Index::idx_t idx_t;
31
+
32
+
33
+ // dimension of the vectors to index
34
+ int d = 32;
35
+
36
+ // nb of training vectors
37
+ size_t nt = 5000;
38
+
39
+ // size of the database points per window step
40
+ size_t nb = 1000;
41
+
42
+ // nb of queries
43
+ size_t nq = 200;
44
+
45
+ int k = 10;
46
+
47
+
48
+ std::vector<float> make_data(size_t n)
49
+ {
50
+ std::vector <float> database (n * d);
51
+ for (size_t i = 0; i < n * d; i++) {
52
+ database[i] = drand48();
53
+ }
54
+ return database;
55
+ }
56
+
57
+ std::unique_ptr<Index> make_trained_index(const char *index_type,
58
+ MetricType metric_type)
59
+ {
60
+ auto index = std::unique_ptr<Index>(index_factory(
61
+ d, index_type, metric_type));
62
+ auto xt = make_data(nt);
63
+ index->train(nt, xt.data());
64
+ ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
65
+ return index;
66
+ }
67
+
68
+ std::vector<idx_t> search_index(Index *index, const float *xq) {
69
+ std::vector<idx_t> I(k * nq);
70
+ std::vector<float> D(k * nq);
71
+ index->search (nq, xq, k, D.data(), I.data());
72
+ return I;
73
+ }
74
+
75
+
76
+
77
+
78
+ /*************************************************************
79
+ * Test functions for a given index type
80
+ *************************************************************/
81
+
82
+
83
+
84
+ void test_lowlevel_access (const char *index_key, MetricType metric) {
85
+ std::unique_ptr<Index> index = make_trained_index(index_key, metric);
86
+
87
+ auto xb = make_data (nb);
88
+ index->add(nb, xb.data());
89
+
90
+ /** handle the case if we have a preprocessor */
91
+
92
+ const IndexPreTransform *index_pt =
93
+ dynamic_cast<const IndexPreTransform*> (index.get());
94
+
95
+ int dt = index->d;
96
+ const float * xbt = xb.data();
97
+ std::unique_ptr<float []> del_xbt;
98
+
99
+ if (index_pt) {
100
+ dt = index_pt->index->d;
101
+ xbt = index_pt->apply_chain (nb, xb.data());
102
+ if (xbt != xb.data()) {
103
+ del_xbt.reset((float*)xbt);
104
+ }
105
+ }
106
+
107
+ IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
108
+
109
+ /** Test independent encoding
110
+ *
111
+ * Makes it possible to do additions on a custom inverted list
112
+ * implementation. From a set of vectors, computes the inverted
113
+ * list ids + the codes corresponding to each vector.
114
+ */
115
+
116
+ std::vector<idx_t> list_nos (nb);
117
+ std::vector<uint8_t> codes (index_ivf->code_size * nb);
118
+ index_ivf->quantizer->assign(nb, xbt, list_nos.data());
119
+ index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
120
+
121
+ // compare with normal IVF addition
122
+
123
+ const InvertedLists *il = index_ivf->invlists;
124
+
125
+ for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
126
+ InvertedLists::ScopedCodes ivf_codes (il, list_no);
127
+ InvertedLists::ScopedIds ivf_ids (il, list_no);
128
+ size_t list_size = il->list_size (list_no);
129
+ for (int i = 0; i < list_size; i++) {
130
+ const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
131
+ const uint8_t *new_code =
132
+ codes.data() + ivf_ids[i] * il->code_size;
133
+ EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
134
+ }
135
+ }
136
+
137
+ /** Test independent search
138
+ *
139
+ * Manually scans through inverted lists, computing distances and
140
+ * ordering results organized in a heap.
141
+ */
142
+
143
+ // sample some example queries and get reference search results.
144
+ auto xq = make_data (nq);
145
+ auto ref_I = search_index (index.get(), xq.data());
146
+
147
+ // handle preprocessing
148
+ const float * xqt = xq.data();
149
+ std::unique_ptr<float []> del_xqt;
150
+
151
+ if (index_pt) {
152
+ xqt = index_pt->apply_chain (nq, xq.data());
153
+ if (xqt != xq.data()) {
154
+ del_xqt.reset((float*)xqt);
155
+ }
156
+ }
157
+
158
+ // quantize the queries to get the inverted list ids to visit.
159
+ int nprobe = index_ivf->nprobe;
160
+
161
+ std::vector<idx_t> q_lists (nq * nprobe);
162
+ std::vector<float> q_dis (nq * nprobe);
163
+
164
+ index_ivf->quantizer->search (nq, xqt, nprobe,
165
+ q_dis.data(), q_lists.data());
166
+
167
+ // object that does the scanning and distance computations.
168
+ std::unique_ptr<InvertedListScanner> scanner (
169
+ index_ivf->get_InvertedListScanner());
170
+
171
+ for (int i = 0; i < nq; i++) {
172
+ std::vector<idx_t> I (k, -1);
173
+ float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
174
+ std::vector<float> D (k, default_dis);
175
+
176
+ scanner->set_query (xqt + i * dt);
177
+
178
+ for (int j = 0; j < nprobe; j++) {
179
+ int list_no = q_lists[i * nprobe + j];
180
+ if (list_no < 0) continue;
181
+ scanner->set_list (list_no, q_dis[i * nprobe + j]);
182
+
183
+ // here we get the inverted lists from the InvertedLists
184
+ // object but they could come from anywhere
185
+
186
+ scanner->scan_codes (
187
+ il->list_size (list_no),
188
+ InvertedLists::ScopedCodes(il, list_no).get(),
189
+ InvertedLists::ScopedIds(il, list_no).get(),
190
+ D.data(), I.data(), k);
191
+
192
+ if (j == 0) {
193
+ // all results so far come from list_no, so let's check if
194
+ // the distance function works
195
+ for (int jj = 0; jj < k; jj++) {
196
+ int vno = I[jj];
197
+ if (vno < 0) break; // heap is not full yet
198
+
199
+ // we have the codes from the addition test
200
+ float computed_D = scanner->distance_to_code (
201
+ codes.data() + vno * il->code_size);
202
+
203
+ EXPECT_EQ (computed_D, D[jj]);
204
+ }
205
+ }
206
+ }
207
+
208
+ // re-order heap
209
+ if (metric == METRIC_L2) {
210
+ maxheap_reorder (k, D.data(), I.data());
211
+ } else {
212
+ minheap_reorder (k, D.data(), I.data());
213
+ }
214
+
215
+ // check that we have the same results as the reference search
216
+ for (int j = 0; j < k; j++) {
217
+ EXPECT_EQ (I[j], ref_I[i * k + j]);
218
+ }
219
+ }
220
+
221
+
222
+ }
223
+
224
+ } // anonymous namespace
225
+
226
+
227
+
228
+ /*************************************************************
229
+ * Test entry points
230
+ *************************************************************/
231
+
232
+ TEST(TestLowLevelIVF, IVFFlatL2) {
233
+ test_lowlevel_access ("IVF32,Flat", METRIC_L2);
234
+ }
235
+
236
+ TEST(TestLowLevelIVF, PCAIVFFlatL2) {
237
+ test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
238
+ }
239
+
240
+ TEST(TestLowLevelIVF, IVFFlatIP) {
241
+ test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
242
+ }
243
+
244
+ TEST(TestLowLevelIVF, IVFSQL2) {
245
+ test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
246
+ }
247
+
248
+ TEST(TestLowLevelIVF, IVFSQIP) {
249
+ test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
250
+ }
251
+
252
+
253
+ TEST(TestLowLevelIVF, IVFPQL2) {
254
+ test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
255
+ }
256
+
257
+ TEST(TestLowLevelIVF, IVFPQIP) {
258
+ test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
259
+ }
260
+
261
+
262
+ /*************************************************************
263
+ * Same for binary (a bit simpler)
264
+ *************************************************************/
265
+
266
+ namespace {
267
+
268
+ int nbit = 256;
269
+
270
+ // here d is used the number of ints -> d=32 means 128 bits
271
+
272
+ std::vector<uint8_t> make_data_binary(size_t n)
273
+ {
274
+
275
+ std::vector <uint8_t> database (n * nbit / 8);
276
+ for (size_t i = 0; i < n * d; i++) {
277
+ database[i] = lrand48();
278
+ }
279
+ return database;
280
+ }
281
+
282
+ std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
283
+ {
284
+ auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
285
+ nbit, index_type));
286
+ auto xt = make_data_binary (nt);
287
+ index->train(nt, xt.data());
288
+ return index;
289
+ }
290
+
291
+
292
+ void test_lowlevel_access_binary (const char *index_key) {
293
+ std::unique_ptr<IndexBinary> index =
294
+ make_trained_index_binary (index_key);
295
+
296
+ IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
297
+ (index.get());
298
+ assert (index_ivf);
299
+
300
+ index_ivf->nprobe = 4;
301
+
302
+ auto xb = make_data_binary (nb);
303
+ index->add(nb, xb.data());
304
+
305
+ std::vector<idx_t> list_nos (nb);
306
+ index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
307
+
308
+ /* For binary there is no test for encoding because binary vectors
309
+ * are copied verbatim to the inverted lists */
310
+
311
+ const InvertedLists *il = index_ivf->invlists;
312
+
313
+ /** Test independent search
314
+ *
315
+ * Manually scans through inverted lists, computing distances and
316
+ * ordering results organized in a heap.
317
+ */
318
+
319
+ // sample some example queries and get reference search results.
320
+ auto xq = make_data_binary (nq);
321
+
322
+ std::vector<idx_t> I_ref(k * nq);
323
+ std::vector<int32_t> D_ref(k * nq);
324
+ index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
325
+
326
+ // quantize the queries to get the inverted list ids to visit.
327
+ int nprobe = index_ivf->nprobe;
328
+
329
+ std::vector<idx_t> q_lists (nq * nprobe);
330
+ std::vector<int32_t> q_dis (nq * nprobe);
331
+
332
+ // quantize queries
333
+ index_ivf->quantizer->search (nq, xq.data(), nprobe,
334
+ q_dis.data(), q_lists.data());
335
+
336
+ // object that does the scanning and distance computations.
337
+ std::unique_ptr<BinaryInvertedListScanner> scanner (
338
+ index_ivf->get_InvertedListScanner());
339
+
340
+ for (int i = 0; i < nq; i++) {
341
+ std::vector<idx_t> I (k, -1);
342
+ uint32_t default_dis = 1 << 30;
343
+ std::vector<int32_t> D (k, default_dis);
344
+
345
+ scanner->set_query (xq.data() + i * index_ivf->code_size);
346
+
347
+ for (int j = 0; j < nprobe; j++) {
348
+ int list_no = q_lists[i * nprobe + j];
349
+ if (list_no < 0) continue;
350
+ scanner->set_list (list_no, q_dis[i * nprobe + j]);
351
+
352
+ // here we get the inverted lists from the InvertedLists
353
+ // object but they could come from anywhere
354
+
355
+ scanner->scan_codes (
356
+ il->list_size (list_no),
357
+ InvertedLists::ScopedCodes(il, list_no).get(),
358
+ InvertedLists::ScopedIds(il, list_no).get(),
359
+ D.data(), I.data(), k);
360
+
361
+ if (j == 0) {
362
+ // all results so far come from list_no, so let's check if
363
+ // the distance function works
364
+ for (int jj = 0; jj < k; jj++) {
365
+ int vno = I[jj];
366
+ if (vno < 0) break; // heap is not full yet
367
+
368
+ // we have the codes from the addition test
369
+ float computed_D = scanner->distance_to_code (
370
+ xb.data() + vno * il->code_size);
371
+
372
+ EXPECT_EQ (computed_D, D[jj]);
373
+ }
374
+ }
375
+ }
376
+
377
+ printf("new before reroder: [");
378
+ for (int j = 0; j < k; j++)
379
+ printf("%ld,%d ", I[j], D[j]);
380
+ printf("]\n");
381
+
382
+ // re-order heap
383
+ heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
384
+
385
+ printf("ref: [");
386
+ for (int j = 0; j < k; j++)
387
+ printf("%ld,%d ", I_ref[j], D_ref[j]);
388
+ printf("]\nnew: [");
389
+ for (int j = 0; j < k; j++)
390
+ printf("%ld,%d ", I[j], D[j]);
391
+ printf("]\n");
392
+
393
+ // check that we have the same results as the reference search
394
+ for (int j = 0; j < k; j++) {
395
+ // here the order is not guaranteed to be the same
396
+ // so we scan through ref results
397
+ // EXPECT_EQ (I[j], I_ref[i * k + j]);
398
+ EXPECT_LE (D[j], D_ref[i * k + k - 1]);
399
+ if (D[j] < D_ref[i * k + k - 1]) {
400
+ int j2 = 0;
401
+ while (j2 < k) {
402
+ if (I[j] == I_ref[i * k + j2]) break;
403
+ j2++;
404
+ }
405
+ EXPECT_LT(j2, k); // it was found
406
+ if (j2 < k) {
407
+ EXPECT_EQ(D[j], D_ref[i * k + j2]);
408
+ }
409
+ }
410
+
411
+ }
412
+
413
+ }
414
+
415
+
416
+ }
417
+
418
+ } // anonymous namespace
419
+
420
+
421
+ TEST(TestLowLevelIVF, IVFBinary) {
422
+ test_lowlevel_access_binary ("BIVF32");
423
+ }
424
+
425
+
426
+ namespace {
427
+
428
+ void test_threaded_search (const char *index_key, MetricType metric) {
429
+ std::unique_ptr<Index> index = make_trained_index(index_key, metric);
430
+
431
+ auto xb = make_data (nb);
432
+ index->add(nb, xb.data());
433
+
434
+ /** handle the case if we have a preprocessor */
435
+
436
+ const IndexPreTransform *index_pt =
437
+ dynamic_cast<const IndexPreTransform*> (index.get());
438
+
439
+ int dt = index->d;
440
+ const float * xbt = xb.data();
441
+ std::unique_ptr<float []> del_xbt;
442
+
443
+ if (index_pt) {
444
+ dt = index_pt->index->d;
445
+ xbt = index_pt->apply_chain (nb, xb.data());
446
+ if (xbt != xb.data()) {
447
+ del_xbt.reset((float*)xbt);
448
+ }
449
+ }
450
+
451
+ IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
452
+
453
+ /** Test independent search
454
+ *
455
+ * Manually scans through inverted lists, computing distances and
456
+ * ordering results organized in a heap.
457
+ */
458
+
459
+ // sample some example queries and get reference search results.
460
+ auto xq = make_data (nq);
461
+ auto ref_I = search_index (index.get(), xq.data());
462
+
463
+ // handle preprocessing
464
+ const float * xqt = xq.data();
465
+ std::unique_ptr<float []> del_xqt;
466
+
467
+ if (index_pt) {
468
+ xqt = index_pt->apply_chain (nq, xq.data());
469
+ if (xqt != xq.data()) {
470
+ del_xqt.reset((float*)xqt);
471
+ }
472
+ }
473
+
474
+ // quantize the queries to get the inverted list ids to visit.
475
+ int nprobe = index_ivf->nprobe;
476
+
477
+ std::vector<idx_t> q_lists (nq * nprobe);
478
+ std::vector<float> q_dis (nq * nprobe);
479
+
480
+ index_ivf->quantizer->search (nq, xqt, nprobe,
481
+ q_dis.data(), q_lists.data());
482
+
483
+ // now run search in this many threads
484
+ int nproc = 3;
485
+
486
+
487
+ for (int i = 0; i < nq; i++) {
488
+
489
+ // one result table per thread
490
+ std::vector<idx_t> I (k * nproc, -1);
491
+ float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
492
+ std::vector<float> D (k * nproc, default_dis);
493
+
494
+ auto search_function = [index_ivf, &I, &D, dt, i, nproc,
495
+ xqt, nprobe, &q_dis, &q_lists]
496
+ (int rank) {
497
+ const InvertedLists *il = index_ivf->invlists;
498
+
499
+ // object that does the scanning and distance computations.
500
+ std::unique_ptr<InvertedListScanner> scanner (
501
+ index_ivf->get_InvertedListScanner());
502
+
503
+ idx_t *local_I = I.data() + rank * k;
504
+ float *local_D = D.data() + rank * k;
505
+
506
+ scanner->set_query (xqt + i * dt);
507
+
508
+ for (int j = rank; j < nprobe; j += nproc) {
509
+ int list_no = q_lists[i * nprobe + j];
510
+ if (list_no < 0) continue;
511
+ scanner->set_list (list_no, q_dis[i * nprobe + j]);
512
+
513
+ scanner->scan_codes (
514
+ il->list_size (list_no),
515
+ InvertedLists::ScopedCodes(il, list_no).get(),
516
+ InvertedLists::ScopedIds(il, list_no).get(),
517
+ local_D, local_I, k);
518
+ }
519
+ };
520
+
521
+ // start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
522
+ // thread rank takes care of inverted lists
523
+ // rank, rank+nproc, rank+2*nproc,...
524
+ std::vector<std::thread> threads;
525
+ for (int rank = 0; rank < nproc; rank++) {
526
+ threads.emplace_back(search_function, rank);
527
+ }
528
+
529
+ // join threads, merge heaps
530
+ for (int rank = 0; rank < nproc; rank++) {
531
+ threads[rank].join();
532
+ if (rank == 0) continue; // nothing to merge
533
+ // merge into first result
534
+ if (metric == METRIC_L2) {
535
+ maxheap_addn (k, D.data(), I.data(),
536
+ D.data() + rank * k,
537
+ I.data() + rank * k, k);
538
+ } else {
539
+ minheap_addn (k, D.data(), I.data(),
540
+ D.data() + rank * k,
541
+ I.data() + rank * k, k);
542
+ }
543
+ }
544
+
545
+ // re-order heap
546
+ if (metric == METRIC_L2) {
547
+ maxheap_reorder (k, D.data(), I.data());
548
+ } else {
549
+ minheap_reorder (k, D.data(), I.data());
550
+ }
551
+
552
+ // check that we have the same results as the reference search
553
+ for (int j = 0; j < k; j++) {
554
+ EXPECT_EQ (I[j], ref_I[i * k + j]);
555
+ }
556
+ }
557
+
558
+
559
+ }
560
+
561
+ } // anonymous namepace
562
+
563
+
564
+ TEST(TestLowLevelIVF, ThreadedSearch) {
565
+ test_threaded_search ("IVF32,Flat", METRIC_L2);
566
+ }