faiss 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,573 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- #include <cinttypes>
9
- #include <cstdio>
10
- #include <cstdlib>
11
-
12
- #include <memory>
13
- #include <vector>
14
- #include <thread>
15
- #include <random>
16
-
17
- #include <gtest/gtest.h>
18
-
19
- #include <faiss/IndexIVF.h>
20
- #include <faiss/IndexBinaryIVF.h>
21
- #include <faiss/IndexPreTransform.h>
22
- #include <faiss/AutoTune.h>
23
- #include <faiss/index_factory.h>
24
- #include <faiss/index_io.h>
25
- #include <faiss/IVFlib.h>
26
- #include <faiss/VectorTransform.h>
27
-
28
-
29
- using namespace faiss;
30
-
31
- namespace {
32
-
33
- typedef Index::idx_t idx_t;
34
-
35
-
36
- // dimension of the vectors to index
37
- int d = 32;
38
-
39
- // nb of training vectors
40
- size_t nt = 5000;
41
-
42
- // size of the database points per window step
43
- size_t nb = 1000;
44
-
45
- // nb of queries
46
- size_t nq = 200;
47
-
48
- int k = 10;
49
-
50
- std::mt19937 rng;
51
-
52
-
53
- std::vector<float> make_data(size_t n)
54
- {
55
- std::vector <float> database (n * d);
56
- std::uniform_real_distribution<> distrib;
57
- for (size_t i = 0; i < n * d; i++) {
58
- database[i] = distrib(rng);
59
- }
60
- return database;
61
- }
62
-
63
- std::unique_ptr<Index> make_trained_index(const char *index_type,
64
- MetricType metric_type)
65
- {
66
- auto index = std::unique_ptr<Index>(index_factory(
67
- d, index_type, metric_type));
68
- auto xt = make_data(nt);
69
- index->train(nt, xt.data());
70
- ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
71
- return index;
72
- }
73
-
74
- std::vector<idx_t> search_index(Index *index, const float *xq) {
75
- std::vector<idx_t> I(k * nq);
76
- std::vector<float> D(k * nq);
77
- index->search (nq, xq, k, D.data(), I.data());
78
- return I;
79
- }
80
-
81
-
82
-
83
-
84
- /*************************************************************
85
- * Test functions for a given index type
86
- *************************************************************/
87
-
88
-
89
-
90
- void test_lowlevel_access (const char *index_key, MetricType metric) {
91
- std::unique_ptr<Index> index = make_trained_index(index_key, metric);
92
-
93
- auto xb = make_data (nb);
94
- index->add(nb, xb.data());
95
-
96
- /** handle the case if we have a preprocessor */
97
-
98
- const IndexPreTransform *index_pt =
99
- dynamic_cast<const IndexPreTransform*> (index.get());
100
-
101
- int dt = index->d;
102
- const float * xbt = xb.data();
103
- std::unique_ptr<float []> del_xbt;
104
-
105
- if (index_pt) {
106
- dt = index_pt->index->d;
107
- xbt = index_pt->apply_chain (nb, xb.data());
108
- if (xbt != xb.data()) {
109
- del_xbt.reset((float*)xbt);
110
- }
111
- }
112
-
113
- IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
114
-
115
- /** Test independent encoding
116
- *
117
- * Makes it possible to do additions on a custom inverted list
118
- * implementation. From a set of vectors, computes the inverted
119
- * list ids + the codes corresponding to each vector.
120
- */
121
-
122
- std::vector<idx_t> list_nos (nb);
123
- std::vector<uint8_t> codes (index_ivf->code_size * nb);
124
- index_ivf->quantizer->assign(nb, xbt, list_nos.data());
125
- index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
126
-
127
- // compare with normal IVF addition
128
-
129
- const InvertedLists *il = index_ivf->invlists;
130
-
131
- for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
132
- InvertedLists::ScopedCodes ivf_codes (il, list_no);
133
- InvertedLists::ScopedIds ivf_ids (il, list_no);
134
- size_t list_size = il->list_size (list_no);
135
- for (int i = 0; i < list_size; i++) {
136
- const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
137
- const uint8_t *new_code =
138
- codes.data() + ivf_ids[i] * il->code_size;
139
- EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
140
- }
141
- }
142
-
143
- /** Test independent search
144
- *
145
- * Manually scans through inverted lists, computing distances and
146
- * ordering results organized in a heap.
147
- */
148
-
149
- // sample some example queries and get reference search results.
150
- auto xq = make_data (nq);
151
- auto ref_I = search_index (index.get(), xq.data());
152
-
153
- // handle preprocessing
154
- const float * xqt = xq.data();
155
- std::unique_ptr<float []> del_xqt;
156
-
157
- if (index_pt) {
158
- xqt = index_pt->apply_chain (nq, xq.data());
159
- if (xqt != xq.data()) {
160
- del_xqt.reset((float*)xqt);
161
- }
162
- }
163
-
164
- // quantize the queries to get the inverted list ids to visit.
165
- int nprobe = index_ivf->nprobe;
166
-
167
- std::vector<idx_t> q_lists (nq * nprobe);
168
- std::vector<float> q_dis (nq * nprobe);
169
-
170
- index_ivf->quantizer->search (nq, xqt, nprobe,
171
- q_dis.data(), q_lists.data());
172
-
173
- // object that does the scanning and distance computations.
174
- std::unique_ptr<InvertedListScanner> scanner (
175
- index_ivf->get_InvertedListScanner());
176
-
177
- for (int i = 0; i < nq; i++) {
178
- std::vector<idx_t> I (k, -1);
179
- float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
180
- std::vector<float> D (k, default_dis);
181
-
182
- scanner->set_query (xqt + i * dt);
183
-
184
- for (int j = 0; j < nprobe; j++) {
185
- int list_no = q_lists[i * nprobe + j];
186
- if (list_no < 0) continue;
187
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
188
-
189
- // here we get the inverted lists from the InvertedLists
190
- // object but they could come from anywhere
191
-
192
- scanner->scan_codes (
193
- il->list_size (list_no),
194
- InvertedLists::ScopedCodes(il, list_no).get(),
195
- InvertedLists::ScopedIds(il, list_no).get(),
196
- D.data(), I.data(), k);
197
-
198
- if (j == 0) {
199
- // all results so far come from list_no, so let's check if
200
- // the distance function works
201
- for (int jj = 0; jj < k; jj++) {
202
- int vno = I[jj];
203
- if (vno < 0) break; // heap is not full yet
204
-
205
- // we have the codes from the addition test
206
- float computed_D = scanner->distance_to_code (
207
- codes.data() + vno * il->code_size);
208
-
209
- EXPECT_EQ (computed_D, D[jj]);
210
- }
211
- }
212
- }
213
-
214
- // re-order heap
215
- if (metric == METRIC_L2) {
216
- maxheap_reorder (k, D.data(), I.data());
217
- } else {
218
- minheap_reorder (k, D.data(), I.data());
219
- }
220
-
221
- // check that we have the same results as the reference search
222
- for (int j = 0; j < k; j++) {
223
- EXPECT_EQ (I[j], ref_I[i * k + j]);
224
- }
225
- }
226
-
227
-
228
- }
229
-
230
- } // anonymous namespace
231
-
232
-
233
-
234
- /*************************************************************
235
- * Test entry points
236
- *************************************************************/
237
-
238
- TEST(TestLowLevelIVF, IVFFlatL2) {
239
- test_lowlevel_access ("IVF32,Flat", METRIC_L2);
240
- }
241
-
242
- TEST(TestLowLevelIVF, PCAIVFFlatL2) {
243
- test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
244
- }
245
-
246
- TEST(TestLowLevelIVF, IVFFlatIP) {
247
- test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
248
- }
249
-
250
- TEST(TestLowLevelIVF, IVFSQL2) {
251
- test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
252
- }
253
-
254
- TEST(TestLowLevelIVF, IVFSQIP) {
255
- test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
256
- }
257
-
258
-
259
- TEST(TestLowLevelIVF, IVFPQL2) {
260
- test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
261
- }
262
-
263
- TEST(TestLowLevelIVF, IVFPQIP) {
264
- test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
265
- }
266
-
267
-
268
- /*************************************************************
269
- * Same for binary (a bit simpler)
270
- *************************************************************/
271
-
272
- namespace {
273
-
274
- int nbit = 256;
275
-
276
- // here d is used the number of ints -> d=32 means 128 bits
277
-
278
- std::vector<uint8_t> make_data_binary(size_t n)
279
- {
280
-
281
- std::vector <uint8_t> database (n * nbit / 8);
282
- std::uniform_int_distribution<> distrib;
283
- for (size_t i = 0; i < n * d; i++) {
284
- database[i] = distrib(rng);
285
- }
286
- return database;
287
- }
288
-
289
- std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
290
- {
291
- auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
292
- nbit, index_type));
293
- auto xt = make_data_binary (nt);
294
- index->train(nt, xt.data());
295
- return index;
296
- }
297
-
298
-
299
- void test_lowlevel_access_binary (const char *index_key) {
300
- std::unique_ptr<IndexBinary> index =
301
- make_trained_index_binary (index_key);
302
-
303
- IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
304
- (index.get());
305
- assert (index_ivf);
306
-
307
- index_ivf->nprobe = 4;
308
-
309
- auto xb = make_data_binary (nb);
310
- index->add(nb, xb.data());
311
-
312
- std::vector<idx_t> list_nos (nb);
313
- index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
314
-
315
- /* For binary there is no test for encoding because binary vectors
316
- * are copied verbatim to the inverted lists */
317
-
318
- const InvertedLists *il = index_ivf->invlists;
319
-
320
- /** Test independent search
321
- *
322
- * Manually scans through inverted lists, computing distances and
323
- * ordering results organized in a heap.
324
- */
325
-
326
- // sample some example queries and get reference search results.
327
- auto xq = make_data_binary (nq);
328
-
329
- std::vector<idx_t> I_ref(k * nq);
330
- std::vector<int32_t> D_ref(k * nq);
331
- index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
332
-
333
- // quantize the queries to get the inverted list ids to visit.
334
- int nprobe = index_ivf->nprobe;
335
-
336
- std::vector<idx_t> q_lists (nq * nprobe);
337
- std::vector<int32_t> q_dis (nq * nprobe);
338
-
339
- // quantize queries
340
- index_ivf->quantizer->search (nq, xq.data(), nprobe,
341
- q_dis.data(), q_lists.data());
342
-
343
- // object that does the scanning and distance computations.
344
- std::unique_ptr<BinaryInvertedListScanner> scanner (
345
- index_ivf->get_InvertedListScanner());
346
-
347
- for (int i = 0; i < nq; i++) {
348
- std::vector<idx_t> I (k, -1);
349
- uint32_t default_dis = 1 << 30;
350
- std::vector<int32_t> D (k, default_dis);
351
-
352
- scanner->set_query (xq.data() + i * index_ivf->code_size);
353
-
354
- for (int j = 0; j < nprobe; j++) {
355
- int list_no = q_lists[i * nprobe + j];
356
- if (list_no < 0) continue;
357
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
358
-
359
- // here we get the inverted lists from the InvertedLists
360
- // object but they could come from anywhere
361
-
362
- scanner->scan_codes (
363
- il->list_size (list_no),
364
- InvertedLists::ScopedCodes(il, list_no).get(),
365
- InvertedLists::ScopedIds(il, list_no).get(),
366
- D.data(), I.data(), k);
367
-
368
- if (j == 0) {
369
- // all results so far come from list_no, so let's check if
370
- // the distance function works
371
- for (int jj = 0; jj < k; jj++) {
372
- int vno = I[jj];
373
- if (vno < 0) break; // heap is not full yet
374
-
375
- // we have the codes from the addition test
376
- float computed_D = scanner->distance_to_code (
377
- xb.data() + vno * il->code_size);
378
-
379
- EXPECT_EQ (computed_D, D[jj]);
380
- }
381
- }
382
- }
383
-
384
- printf("new before reroder: [");
385
- for (int j = 0; j < k; j++)
386
- printf("%" PRId64 ",%d ", I[j], D[j]);
387
- printf("]\n");
388
-
389
- // re-order heap
390
- heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
391
-
392
- printf("ref: [");
393
- for (int j = 0; j < k; j++)
394
- printf("%" PRId64 ",%d ", I_ref[j], D_ref[j]);
395
- printf("]\nnew: [");
396
- for (int j = 0; j < k; j++)
397
- printf("%" PRId64 ",%d ", I[j], D[j]);
398
- printf("]\n");
399
-
400
- // check that we have the same results as the reference search
401
- for (int j = 0; j < k; j++) {
402
- // here the order is not guaranteed to be the same
403
- // so we scan through ref results
404
- // EXPECT_EQ (I[j], I_ref[i * k + j]);
405
- EXPECT_LE (D[j], D_ref[i * k + k - 1]);
406
- if (D[j] < D_ref[i * k + k - 1]) {
407
- int j2 = 0;
408
- while (j2 < k) {
409
- if (I[j] == I_ref[i * k + j2]) break;
410
- j2++;
411
- }
412
- EXPECT_LT(j2, k); // it was found
413
- if (j2 < k) {
414
- EXPECT_EQ(D[j], D_ref[i * k + j2]);
415
- }
416
- }
417
-
418
- }
419
-
420
- }
421
-
422
-
423
- }
424
-
425
- } // anonymous namespace
426
-
427
-
428
- TEST(TestLowLevelIVF, IVFBinary) {
429
- test_lowlevel_access_binary ("BIVF32");
430
- }
431
-
432
-
433
- namespace {
434
-
435
- void test_threaded_search (const char *index_key, MetricType metric) {
436
- std::unique_ptr<Index> index = make_trained_index(index_key, metric);
437
-
438
- auto xb = make_data (nb);
439
- index->add(nb, xb.data());
440
-
441
- /** handle the case if we have a preprocessor */
442
-
443
- const IndexPreTransform *index_pt =
444
- dynamic_cast<const IndexPreTransform*> (index.get());
445
-
446
- int dt = index->d;
447
- const float * xbt = xb.data();
448
- std::unique_ptr<float []> del_xbt;
449
-
450
- if (index_pt) {
451
- dt = index_pt->index->d;
452
- xbt = index_pt->apply_chain (nb, xb.data());
453
- if (xbt != xb.data()) {
454
- del_xbt.reset((float*)xbt);
455
- }
456
- }
457
-
458
- IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
459
-
460
- /** Test independent search
461
- *
462
- * Manually scans through inverted lists, computing distances and
463
- * ordering results organized in a heap.
464
- */
465
-
466
- // sample some example queries and get reference search results.
467
- auto xq = make_data (nq);
468
- auto ref_I = search_index (index.get(), xq.data());
469
-
470
- // handle preprocessing
471
- const float * xqt = xq.data();
472
- std::unique_ptr<float []> del_xqt;
473
-
474
- if (index_pt) {
475
- xqt = index_pt->apply_chain (nq, xq.data());
476
- if (xqt != xq.data()) {
477
- del_xqt.reset((float*)xqt);
478
- }
479
- }
480
-
481
- // quantize the queries to get the inverted list ids to visit.
482
- int nprobe = index_ivf->nprobe;
483
-
484
- std::vector<idx_t> q_lists (nq * nprobe);
485
- std::vector<float> q_dis (nq * nprobe);
486
-
487
- index_ivf->quantizer->search (nq, xqt, nprobe,
488
- q_dis.data(), q_lists.data());
489
-
490
- // now run search in this many threads
491
- int nproc = 3;
492
-
493
-
494
- for (int i = 0; i < nq; i++) {
495
-
496
- // one result table per thread
497
- std::vector<idx_t> I (k * nproc, -1);
498
- float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
499
- std::vector<float> D (k * nproc, default_dis);
500
-
501
- auto search_function = [index_ivf, &I, &D, dt, i, nproc,
502
- xqt, nprobe, &q_dis, &q_lists]
503
- (int rank) {
504
- const InvertedLists *il = index_ivf->invlists;
505
-
506
- // object that does the scanning and distance computations.
507
- std::unique_ptr<InvertedListScanner> scanner (
508
- index_ivf->get_InvertedListScanner());
509
-
510
- idx_t *local_I = I.data() + rank * k;
511
- float *local_D = D.data() + rank * k;
512
-
513
- scanner->set_query (xqt + i * dt);
514
-
515
- for (int j = rank; j < nprobe; j += nproc) {
516
- int list_no = q_lists[i * nprobe + j];
517
- if (list_no < 0) continue;
518
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
519
-
520
- scanner->scan_codes (
521
- il->list_size (list_no),
522
- InvertedLists::ScopedCodes(il, list_no).get(),
523
- InvertedLists::ScopedIds(il, list_no).get(),
524
- local_D, local_I, k);
525
- }
526
- };
527
-
528
- // start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
529
- // thread rank takes care of inverted lists
530
- // rank, rank+nproc, rank+2*nproc,...
531
- std::vector<std::thread> threads;
532
- for (int rank = 0; rank < nproc; rank++) {
533
- threads.emplace_back(search_function, rank);
534
- }
535
-
536
- // join threads, merge heaps
537
- for (int rank = 0; rank < nproc; rank++) {
538
- threads[rank].join();
539
- if (rank == 0) continue; // nothing to merge
540
- // merge into first result
541
- if (metric == METRIC_L2) {
542
- maxheap_addn (k, D.data(), I.data(),
543
- D.data() + rank * k,
544
- I.data() + rank * k, k);
545
- } else {
546
- minheap_addn (k, D.data(), I.data(),
547
- D.data() + rank * k,
548
- I.data() + rank * k, k);
549
- }
550
- }
551
-
552
- // re-order heap
553
- if (metric == METRIC_L2) {
554
- maxheap_reorder (k, D.data(), I.data());
555
- } else {
556
- minheap_reorder (k, D.data(), I.data());
557
- }
558
-
559
- // check that we have the same results as the reference search
560
- for (int j = 0; j < k; j++) {
561
- EXPECT_EQ (I[j], ref_I[i * k + j]);
562
- }
563
- }
564
-
565
-
566
- }
567
-
568
- } // anonymous namepace
569
-
570
-
571
- TEST(TestLowLevelIVF, ThreadedSearch) {
572
- test_threaded_search ("IVF32,Flat", METRIC_L2);
573
- }