faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+
12
+ #include <gtest/gtest.h>
13
+
14
+ #include <faiss/IndexIVFPQ.h>
15
+ #include <faiss/IndexFlat.h>
16
+ #include <faiss/index_io.h>
17
+
18
+ TEST(IVFPQ, accuracy) {
19
+
20
+ // dimension of the vectors to index
21
+ int d = 64;
22
+
23
+ // size of the database we plan to index
24
+ size_t nb = 1000;
25
+
26
+ // make a set of nt training vectors in the unit cube
27
+ // (could be the database)
28
+ size_t nt = 1500;
29
+
30
+ // make the index object and train it
31
+ faiss::IndexFlatL2 coarse_quantizer (d);
32
+
33
+ // a reasonable number of cetroids to index nb vectors
34
+ int ncentroids = 25;
35
+
36
+ faiss::IndexIVFPQ index (&coarse_quantizer, d,
37
+ ncentroids, 16, 8);
38
+
39
+ // index that gives the ground-truth
40
+ faiss::IndexFlatL2 index_gt (d);
41
+
42
+ srand48 (35);
43
+
44
+ { // training
45
+
46
+ std::vector <float> trainvecs (nt * d);
47
+ for (size_t i = 0; i < nt * d; i++) {
48
+ trainvecs[i] = drand48();
49
+ }
50
+ index.verbose = true;
51
+ index.train (nt, trainvecs.data());
52
+ }
53
+
54
+ { // populating the database
55
+
56
+ std::vector <float> database (nb * d);
57
+ for (size_t i = 0; i < nb * d; i++) {
58
+ database[i] = drand48();
59
+ }
60
+
61
+ index.add (nb, database.data());
62
+ index_gt.add (nb, database.data());
63
+ }
64
+
65
+ int nq = 200;
66
+ int n_ok;
67
+
68
+ { // searching the database
69
+
70
+ std::vector <float> queries (nq * d);
71
+ for (size_t i = 0; i < nq * d; i++) {
72
+ queries[i] = drand48();
73
+ }
74
+
75
+ std::vector<faiss::Index::idx_t> gt_nns (nq);
76
+ std::vector<float> gt_dis (nq);
77
+
78
+ index_gt.search (nq, queries.data(), 1,
79
+ gt_dis.data(), gt_nns.data());
80
+
81
+ index.nprobe = 5;
82
+ int k = 5;
83
+ std::vector<faiss::Index::idx_t> nns (k * nq);
84
+ std::vector<float> dis (k * nq);
85
+
86
+ index.search (nq, queries.data(), k, dis.data(), nns.data());
87
+
88
+ n_ok = 0;
89
+ for (int q = 0; q < nq; q++) {
90
+
91
+ for (int i = 0; i < k; i++)
92
+ if (nns[q * k + i] == gt_nns[q])
93
+ n_ok++;
94
+ }
95
+ EXPECT_GT(n_ok, nq * 0.4);
96
+ }
97
+
98
+ }
@@ -0,0 +1,566 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #include <memory>
12
+ #include <vector>
13
+ #include <thread>
14
+
15
+ #include <gtest/gtest.h>
16
+
17
+ #include <faiss/IndexIVF.h>
18
+ #include <faiss/IndexBinaryIVF.h>
19
+ #include <faiss/IndexPreTransform.h>
20
+ #include <faiss/AutoTune.h>
21
+ #include <faiss/index_factory.h>
22
+ #include <faiss/index_io.h>
23
+ #include <faiss/IVFlib.h>
24
+ #include <faiss/VectorTransform.h>
25
+
26
+ using namespace faiss;
27
+
28
+ namespace {
29
+
30
+ typedef Index::idx_t idx_t;
31
+
32
+
33
+ // dimension of the vectors to index
34
+ int d = 32;
35
+
36
+ // nb of training vectors
37
+ size_t nt = 5000;
38
+
39
+ // size of the database points per window step
40
+ size_t nb = 1000;
41
+
42
+ // nb of queries
43
+ size_t nq = 200;
44
+
45
+ int k = 10;
46
+
47
+
48
+ std::vector<float> make_data(size_t n)
49
+ {
50
+ std::vector <float> database (n * d);
51
+ for (size_t i = 0; i < n * d; i++) {
52
+ database[i] = drand48();
53
+ }
54
+ return database;
55
+ }
56
+
57
+ std::unique_ptr<Index> make_trained_index(const char *index_type,
58
+ MetricType metric_type)
59
+ {
60
+ auto index = std::unique_ptr<Index>(index_factory(
61
+ d, index_type, metric_type));
62
+ auto xt = make_data(nt);
63
+ index->train(nt, xt.data());
64
+ ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
65
+ return index;
66
+ }
67
+
68
+ std::vector<idx_t> search_index(Index *index, const float *xq) {
69
+ std::vector<idx_t> I(k * nq);
70
+ std::vector<float> D(k * nq);
71
+ index->search (nq, xq, k, D.data(), I.data());
72
+ return I;
73
+ }
74
+
75
+
76
+
77
+
78
+ /*************************************************************
79
+ * Test functions for a given index type
80
+ *************************************************************/
81
+
82
+
83
+
84
+ void test_lowlevel_access (const char *index_key, MetricType metric) {
85
+ std::unique_ptr<Index> index = make_trained_index(index_key, metric);
86
+
87
+ auto xb = make_data (nb);
88
+ index->add(nb, xb.data());
89
+
90
+ /** handle the case if we have a preprocessor */
91
+
92
+ const IndexPreTransform *index_pt =
93
+ dynamic_cast<const IndexPreTransform*> (index.get());
94
+
95
+ int dt = index->d;
96
+ const float * xbt = xb.data();
97
+ std::unique_ptr<float []> del_xbt;
98
+
99
+ if (index_pt) {
100
+ dt = index_pt->index->d;
101
+ xbt = index_pt->apply_chain (nb, xb.data());
102
+ if (xbt != xb.data()) {
103
+ del_xbt.reset((float*)xbt);
104
+ }
105
+ }
106
+
107
+ IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
108
+
109
+ /** Test independent encoding
110
+ *
111
+ * Makes it possible to do additions on a custom inverted list
112
+ * implementation. From a set of vectors, computes the inverted
113
+ * list ids + the codes corresponding to each vector.
114
+ */
115
+
116
+ std::vector<idx_t> list_nos (nb);
117
+ std::vector<uint8_t> codes (index_ivf->code_size * nb);
118
+ index_ivf->quantizer->assign(nb, xbt, list_nos.data());
119
+ index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
120
+
121
+ // compare with normal IVF addition
122
+
123
+ const InvertedLists *il = index_ivf->invlists;
124
+
125
+ for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
126
+ InvertedLists::ScopedCodes ivf_codes (il, list_no);
127
+ InvertedLists::ScopedIds ivf_ids (il, list_no);
128
+ size_t list_size = il->list_size (list_no);
129
+ for (int i = 0; i < list_size; i++) {
130
+ const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
131
+ const uint8_t *new_code =
132
+ codes.data() + ivf_ids[i] * il->code_size;
133
+ EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
134
+ }
135
+ }
136
+
137
+ /** Test independent search
138
+ *
139
+ * Manually scans through inverted lists, computing distances and
140
+ * ordering results organized in a heap.
141
+ */
142
+
143
+ // sample some example queries and get reference search results.
144
+ auto xq = make_data (nq);
145
+ auto ref_I = search_index (index.get(), xq.data());
146
+
147
+ // handle preprocessing
148
+ const float * xqt = xq.data();
149
+ std::unique_ptr<float []> del_xqt;
150
+
151
+ if (index_pt) {
152
+ xqt = index_pt->apply_chain (nq, xq.data());
153
+ if (xqt != xq.data()) {
154
+ del_xqt.reset((float*)xqt);
155
+ }
156
+ }
157
+
158
+ // quantize the queries to get the inverted list ids to visit.
159
+ int nprobe = index_ivf->nprobe;
160
+
161
+ std::vector<idx_t> q_lists (nq * nprobe);
162
+ std::vector<float> q_dis (nq * nprobe);
163
+
164
+ index_ivf->quantizer->search (nq, xqt, nprobe,
165
+ q_dis.data(), q_lists.data());
166
+
167
+ // object that does the scanning and distance computations.
168
+ std::unique_ptr<InvertedListScanner> scanner (
169
+ index_ivf->get_InvertedListScanner());
170
+
171
+ for (int i = 0; i < nq; i++) {
172
+ std::vector<idx_t> I (k, -1);
173
+ float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
174
+ std::vector<float> D (k, default_dis);
175
+
176
+ scanner->set_query (xqt + i * dt);
177
+
178
+ for (int j = 0; j < nprobe; j++) {
179
+ int list_no = q_lists[i * nprobe + j];
180
+ if (list_no < 0) continue;
181
+ scanner->set_list (list_no, q_dis[i * nprobe + j]);
182
+
183
+ // here we get the inverted lists from the InvertedLists
184
+ // object but they could come from anywhere
185
+
186
+ scanner->scan_codes (
187
+ il->list_size (list_no),
188
+ InvertedLists::ScopedCodes(il, list_no).get(),
189
+ InvertedLists::ScopedIds(il, list_no).get(),
190
+ D.data(), I.data(), k);
191
+
192
+ if (j == 0) {
193
+ // all results so far come from list_no, so let's check if
194
+ // the distance function works
195
+ for (int jj = 0; jj < k; jj++) {
196
+ int vno = I[jj];
197
+ if (vno < 0) break; // heap is not full yet
198
+
199
+ // we have the codes from the addition test
200
+ float computed_D = scanner->distance_to_code (
201
+ codes.data() + vno * il->code_size);
202
+
203
+ EXPECT_EQ (computed_D, D[jj]);
204
+ }
205
+ }
206
+ }
207
+
208
+ // re-order heap
209
+ if (metric == METRIC_L2) {
210
+ maxheap_reorder (k, D.data(), I.data());
211
+ } else {
212
+ minheap_reorder (k, D.data(), I.data());
213
+ }
214
+
215
+ // check that we have the same results as the reference search
216
+ for (int j = 0; j < k; j++) {
217
+ EXPECT_EQ (I[j], ref_I[i * k + j]);
218
+ }
219
+ }
220
+
221
+
222
+ }
223
+
224
+ } // anonymous namespace
225
+
226
+
227
+
228
+ /*************************************************************
229
+ * Test entry points
230
+ *************************************************************/
231
+
232
+ TEST(TestLowLevelIVF, IVFFlatL2) {
233
+ test_lowlevel_access ("IVF32,Flat", METRIC_L2);
234
+ }
235
+
236
+ TEST(TestLowLevelIVF, PCAIVFFlatL2) {
237
+ test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
238
+ }
239
+
240
+ TEST(TestLowLevelIVF, IVFFlatIP) {
241
+ test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
242
+ }
243
+
244
+ TEST(TestLowLevelIVF, IVFSQL2) {
245
+ test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
246
+ }
247
+
248
+ TEST(TestLowLevelIVF, IVFSQIP) {
249
+ test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
250
+ }
251
+
252
+
253
+ TEST(TestLowLevelIVF, IVFPQL2) {
254
+ test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
255
+ }
256
+
257
+ TEST(TestLowLevelIVF, IVFPQIP) {
258
+ test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
259
+ }
260
+
261
+
262
+ /*************************************************************
263
+ * Same for binary (a bit simpler)
264
+ *************************************************************/
265
+
266
+ namespace {
267
+
268
+ int nbit = 256;
269
+
270
+ // here d is used the number of ints -> d=32 means 128 bits
271
+
272
+ std::vector<uint8_t> make_data_binary(size_t n)
273
+ {
274
+
275
+ std::vector <uint8_t> database (n * nbit / 8);
276
+ for (size_t i = 0; i < n * d; i++) {
277
+ database[i] = lrand48();
278
+ }
279
+ return database;
280
+ }
281
+
282
+ std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
283
+ {
284
+ auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
285
+ nbit, index_type));
286
+ auto xt = make_data_binary (nt);
287
+ index->train(nt, xt.data());
288
+ return index;
289
+ }
290
+
291
+
292
+ void test_lowlevel_access_binary (const char *index_key) {
293
+ std::unique_ptr<IndexBinary> index =
294
+ make_trained_index_binary (index_key);
295
+
296
+ IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
297
+ (index.get());
298
+ assert (index_ivf);
299
+
300
+ index_ivf->nprobe = 4;
301
+
302
+ auto xb = make_data_binary (nb);
303
+ index->add(nb, xb.data());
304
+
305
+ std::vector<idx_t> list_nos (nb);
306
+ index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
307
+
308
+ /* For binary there is no test for encoding because binary vectors
309
+ * are copied verbatim to the inverted lists */
310
+
311
+ const InvertedLists *il = index_ivf->invlists;
312
+
313
+ /** Test independent search
314
+ *
315
+ * Manually scans through inverted lists, computing distances and
316
+ * ordering results organized in a heap.
317
+ */
318
+
319
+ // sample some example queries and get reference search results.
320
+ auto xq = make_data_binary (nq);
321
+
322
+ std::vector<idx_t> I_ref(k * nq);
323
+ std::vector<int32_t> D_ref(k * nq);
324
+ index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
325
+
326
+ // quantize the queries to get the inverted list ids to visit.
327
+ int nprobe = index_ivf->nprobe;
328
+
329
+ std::vector<idx_t> q_lists (nq * nprobe);
330
+ std::vector<int32_t> q_dis (nq * nprobe);
331
+
332
+ // quantize queries
333
+ index_ivf->quantizer->search (nq, xq.data(), nprobe,
334
+ q_dis.data(), q_lists.data());
335
+
336
+ // object that does the scanning and distance computations.
337
+ std::unique_ptr<BinaryInvertedListScanner> scanner (
338
+ index_ivf->get_InvertedListScanner());
339
+
340
+ for (int i = 0; i < nq; i++) {
341
+ std::vector<idx_t> I (k, -1);
342
+ uint32_t default_dis = 1 << 30;
343
+ std::vector<int32_t> D (k, default_dis);
344
+
345
+ scanner->set_query (xq.data() + i * index_ivf->code_size);
346
+
347
+ for (int j = 0; j < nprobe; j++) {
348
+ int list_no = q_lists[i * nprobe + j];
349
+ if (list_no < 0) continue;
350
+ scanner->set_list (list_no, q_dis[i * nprobe + j]);
351
+
352
+ // here we get the inverted lists from the InvertedLists
353
+ // object but they could come from anywhere
354
+
355
+ scanner->scan_codes (
356
+ il->list_size (list_no),
357
+ InvertedLists::ScopedCodes(il, list_no).get(),
358
+ InvertedLists::ScopedIds(il, list_no).get(),
359
+ D.data(), I.data(), k);
360
+
361
+ if (j == 0) {
362
+ // all results so far come from list_no, so let's check if
363
+ // the distance function works
364
+ for (int jj = 0; jj < k; jj++) {
365
+ int vno = I[jj];
366
+ if (vno < 0) break; // heap is not full yet
367
+
368
+ // we have the codes from the addition test
369
+ float computed_D = scanner->distance_to_code (
370
+ xb.data() + vno * il->code_size);
371
+
372
+ EXPECT_EQ (computed_D, D[jj]);
373
+ }
374
+ }
375
+ }
376
+
377
+ printf("new before reroder: [");
378
+ for (int j = 0; j < k; j++)
379
+ printf("%ld,%d ", I[j], D[j]);
380
+ printf("]\n");
381
+
382
+ // re-order heap
383
+ heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
384
+
385
+ printf("ref: [");
386
+ for (int j = 0; j < k; j++)
387
+ printf("%ld,%d ", I_ref[j], D_ref[j]);
388
+ printf("]\nnew: [");
389
+ for (int j = 0; j < k; j++)
390
+ printf("%ld,%d ", I[j], D[j]);
391
+ printf("]\n");
392
+
393
+ // check that we have the same results as the reference search
394
+ for (int j = 0; j < k; j++) {
395
+ // here the order is not guaranteed to be the same
396
+ // so we scan through ref results
397
+ // EXPECT_EQ (I[j], I_ref[i * k + j]);
398
+ EXPECT_LE (D[j], D_ref[i * k + k - 1]);
399
+ if (D[j] < D_ref[i * k + k - 1]) {
400
+ int j2 = 0;
401
+ while (j2 < k) {
402
+ if (I[j] == I_ref[i * k + j2]) break;
403
+ j2++;
404
+ }
405
+ EXPECT_LT(j2, k); // it was found
406
+ if (j2 < k) {
407
+ EXPECT_EQ(D[j], D_ref[i * k + j2]);
408
+ }
409
+ }
410
+
411
+ }
412
+
413
+ }
414
+
415
+
416
+ }
417
+
418
+ } // anonymous namespace
419
+
420
+
421
+ TEST(TestLowLevelIVF, IVFBinary) {
422
+ test_lowlevel_access_binary ("BIVF32");
423
+ }
424
+
425
+
426
+ namespace {
427
+
428
+ void test_threaded_search (const char *index_key, MetricType metric) {
429
+ std::unique_ptr<Index> index = make_trained_index(index_key, metric);
430
+
431
+ auto xb = make_data (nb);
432
+ index->add(nb, xb.data());
433
+
434
+ /** handle the case if we have a preprocessor */
435
+
436
+ const IndexPreTransform *index_pt =
437
+ dynamic_cast<const IndexPreTransform*> (index.get());
438
+
439
+ int dt = index->d;
440
+ const float * xbt = xb.data();
441
+ std::unique_ptr<float []> del_xbt;
442
+
443
+ if (index_pt) {
444
+ dt = index_pt->index->d;
445
+ xbt = index_pt->apply_chain (nb, xb.data());
446
+ if (xbt != xb.data()) {
447
+ del_xbt.reset((float*)xbt);
448
+ }
449
+ }
450
+
451
+ IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
452
+
453
+ /** Test independent search
454
+ *
455
+ * Manually scans through inverted lists, computing distances and
456
+ * ordering results organized in a heap.
457
+ */
458
+
459
+ // sample some example queries and get reference search results.
460
+ auto xq = make_data (nq);
461
+ auto ref_I = search_index (index.get(), xq.data());
462
+
463
+ // handle preprocessing
464
+ const float * xqt = xq.data();
465
+ std::unique_ptr<float []> del_xqt;
466
+
467
+ if (index_pt) {
468
+ xqt = index_pt->apply_chain (nq, xq.data());
469
+ if (xqt != xq.data()) {
470
+ del_xqt.reset((float*)xqt);
471
+ }
472
+ }
473
+
474
+ // quantize the queries to get the inverted list ids to visit.
475
+ int nprobe = index_ivf->nprobe;
476
+
477
+ std::vector<idx_t> q_lists (nq * nprobe);
478
+ std::vector<float> q_dis (nq * nprobe);
479
+
480
+ index_ivf->quantizer->search (nq, xqt, nprobe,
481
+ q_dis.data(), q_lists.data());
482
+
483
+ // now run search in this many threads
484
+ int nproc = 3;
485
+
486
+
487
+ for (int i = 0; i < nq; i++) {
488
+
489
+ // one result table per thread
490
+ std::vector<idx_t> I (k * nproc, -1);
491
+ float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
492
+ std::vector<float> D (k * nproc, default_dis);
493
+
494
+ auto search_function = [index_ivf, &I, &D, dt, i, nproc,
495
+ xqt, nprobe, &q_dis, &q_lists]
496
+ (int rank) {
497
+ const InvertedLists *il = index_ivf->invlists;
498
+
499
+ // object that does the scanning and distance computations.
500
+ std::unique_ptr<InvertedListScanner> scanner (
501
+ index_ivf->get_InvertedListScanner());
502
+
503
+ idx_t *local_I = I.data() + rank * k;
504
+ float *local_D = D.data() + rank * k;
505
+
506
+ scanner->set_query (xqt + i * dt);
507
+
508
+ for (int j = rank; j < nprobe; j += nproc) {
509
+ int list_no = q_lists[i * nprobe + j];
510
+ if (list_no < 0) continue;
511
+ scanner->set_list (list_no, q_dis[i * nprobe + j]);
512
+
513
+ scanner->scan_codes (
514
+ il->list_size (list_no),
515
+ InvertedLists::ScopedCodes(il, list_no).get(),
516
+ InvertedLists::ScopedIds(il, list_no).get(),
517
+ local_D, local_I, k);
518
+ }
519
+ };
520
+
521
+ // start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
522
+ // thread rank takes care of inverted lists
523
+ // rank, rank+nproc, rank+2*nproc,...
524
+ std::vector<std::thread> threads;
525
+ for (int rank = 0; rank < nproc; rank++) {
526
+ threads.emplace_back(search_function, rank);
527
+ }
528
+
529
+ // join threads, merge heaps
530
+ for (int rank = 0; rank < nproc; rank++) {
531
+ threads[rank].join();
532
+ if (rank == 0) continue; // nothing to merge
533
+ // merge into first result
534
+ if (metric == METRIC_L2) {
535
+ maxheap_addn (k, D.data(), I.data(),
536
+ D.data() + rank * k,
537
+ I.data() + rank * k, k);
538
+ } else {
539
+ minheap_addn (k, D.data(), I.data(),
540
+ D.data() + rank * k,
541
+ I.data() + rank * k, k);
542
+ }
543
+ }
544
+
545
+ // re-order heap
546
+ if (metric == METRIC_L2) {
547
+ maxheap_reorder (k, D.data(), I.data());
548
+ } else {
549
+ minheap_reorder (k, D.data(), I.data());
550
+ }
551
+
552
+ // check that we have the same results as the reference search
553
+ for (int j = 0; j < k; j++) {
554
+ EXPECT_EQ (I[j], ref_I[i * k + j]);
555
+ }
556
+ }
557
+
558
+
559
+ }
560
+
561
+ } // anonymous namepace
562
+
563
+
564
+ TEST(TestLowLevelIVF, ThreadedSearch) {
565
+ test_threaded_search ("IVF32,Flat", METRIC_L2);
566
+ }