faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,573 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- #include <cinttypes>
9
- #include <cstdio>
10
- #include <cstdlib>
11
-
12
- #include <memory>
13
- #include <vector>
14
- #include <thread>
15
- #include <random>
16
-
17
- #include <gtest/gtest.h>
18
-
19
- #include <faiss/IndexIVF.h>
20
- #include <faiss/IndexBinaryIVF.h>
21
- #include <faiss/IndexPreTransform.h>
22
- #include <faiss/AutoTune.h>
23
- #include <faiss/index_factory.h>
24
- #include <faiss/index_io.h>
25
- #include <faiss/IVFlib.h>
26
- #include <faiss/VectorTransform.h>
27
-
28
-
29
- using namespace faiss;
30
-
31
- namespace {
32
-
33
- typedef Index::idx_t idx_t;
34
-
35
-
36
- // dimension of the vectors to index
37
- int d = 32;
38
-
39
- // nb of training vectors
40
- size_t nt = 5000;
41
-
42
- // size of the database points per window step
43
- size_t nb = 1000;
44
-
45
- // nb of queries
46
- size_t nq = 200;
47
-
48
- int k = 10;
49
-
50
- std::mt19937 rng;
51
-
52
-
53
- std::vector<float> make_data(size_t n)
54
- {
55
- std::vector <float> database (n * d);
56
- std::uniform_real_distribution<> distrib;
57
- for (size_t i = 0; i < n * d; i++) {
58
- database[i] = distrib(rng);
59
- }
60
- return database;
61
- }
62
-
63
- std::unique_ptr<Index> make_trained_index(const char *index_type,
64
- MetricType metric_type)
65
- {
66
- auto index = std::unique_ptr<Index>(index_factory(
67
- d, index_type, metric_type));
68
- auto xt = make_data(nt);
69
- index->train(nt, xt.data());
70
- ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
71
- return index;
72
- }
73
-
74
- std::vector<idx_t> search_index(Index *index, const float *xq) {
75
- std::vector<idx_t> I(k * nq);
76
- std::vector<float> D(k * nq);
77
- index->search (nq, xq, k, D.data(), I.data());
78
- return I;
79
- }
80
-
81
-
82
-
83
-
84
- /*************************************************************
85
- * Test functions for a given index type
86
- *************************************************************/
87
-
88
-
89
-
90
- void test_lowlevel_access (const char *index_key, MetricType metric) {
91
- std::unique_ptr<Index> index = make_trained_index(index_key, metric);
92
-
93
- auto xb = make_data (nb);
94
- index->add(nb, xb.data());
95
-
96
- /** handle the case if we have a preprocessor */
97
-
98
- const IndexPreTransform *index_pt =
99
- dynamic_cast<const IndexPreTransform*> (index.get());
100
-
101
- int dt = index->d;
102
- const float * xbt = xb.data();
103
- std::unique_ptr<float []> del_xbt;
104
-
105
- if (index_pt) {
106
- dt = index_pt->index->d;
107
- xbt = index_pt->apply_chain (nb, xb.data());
108
- if (xbt != xb.data()) {
109
- del_xbt.reset((float*)xbt);
110
- }
111
- }
112
-
113
- IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
114
-
115
- /** Test independent encoding
116
- *
117
- * Makes it possible to do additions on a custom inverted list
118
- * implementation. From a set of vectors, computes the inverted
119
- * list ids + the codes corresponding to each vector.
120
- */
121
-
122
- std::vector<idx_t> list_nos (nb);
123
- std::vector<uint8_t> codes (index_ivf->code_size * nb);
124
- index_ivf->quantizer->assign(nb, xbt, list_nos.data());
125
- index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
126
-
127
- // compare with normal IVF addition
128
-
129
- const InvertedLists *il = index_ivf->invlists;
130
-
131
- for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
132
- InvertedLists::ScopedCodes ivf_codes (il, list_no);
133
- InvertedLists::ScopedIds ivf_ids (il, list_no);
134
- size_t list_size = il->list_size (list_no);
135
- for (int i = 0; i < list_size; i++) {
136
- const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
137
- const uint8_t *new_code =
138
- codes.data() + ivf_ids[i] * il->code_size;
139
- EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
140
- }
141
- }
142
-
143
- /** Test independent search
144
- *
145
- * Manually scans through inverted lists, computing distances and
146
- * ordering results organized in a heap.
147
- */
148
-
149
- // sample some example queries and get reference search results.
150
- auto xq = make_data (nq);
151
- auto ref_I = search_index (index.get(), xq.data());
152
-
153
- // handle preprocessing
154
- const float * xqt = xq.data();
155
- std::unique_ptr<float []> del_xqt;
156
-
157
- if (index_pt) {
158
- xqt = index_pt->apply_chain (nq, xq.data());
159
- if (xqt != xq.data()) {
160
- del_xqt.reset((float*)xqt);
161
- }
162
- }
163
-
164
- // quantize the queries to get the inverted list ids to visit.
165
- int nprobe = index_ivf->nprobe;
166
-
167
- std::vector<idx_t> q_lists (nq * nprobe);
168
- std::vector<float> q_dis (nq * nprobe);
169
-
170
- index_ivf->quantizer->search (nq, xqt, nprobe,
171
- q_dis.data(), q_lists.data());
172
-
173
- // object that does the scanning and distance computations.
174
- std::unique_ptr<InvertedListScanner> scanner (
175
- index_ivf->get_InvertedListScanner());
176
-
177
- for (int i = 0; i < nq; i++) {
178
- std::vector<idx_t> I (k, -1);
179
- float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
180
- std::vector<float> D (k, default_dis);
181
-
182
- scanner->set_query (xqt + i * dt);
183
-
184
- for (int j = 0; j < nprobe; j++) {
185
- int list_no = q_lists[i * nprobe + j];
186
- if (list_no < 0) continue;
187
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
188
-
189
- // here we get the inverted lists from the InvertedLists
190
- // object but they could come from anywhere
191
-
192
- scanner->scan_codes (
193
- il->list_size (list_no),
194
- InvertedLists::ScopedCodes(il, list_no).get(),
195
- InvertedLists::ScopedIds(il, list_no).get(),
196
- D.data(), I.data(), k);
197
-
198
- if (j == 0) {
199
- // all results so far come from list_no, so let's check if
200
- // the distance function works
201
- for (int jj = 0; jj < k; jj++) {
202
- int vno = I[jj];
203
- if (vno < 0) break; // heap is not full yet
204
-
205
- // we have the codes from the addition test
206
- float computed_D = scanner->distance_to_code (
207
- codes.data() + vno * il->code_size);
208
-
209
- EXPECT_EQ (computed_D, D[jj]);
210
- }
211
- }
212
- }
213
-
214
- // re-order heap
215
- if (metric == METRIC_L2) {
216
- maxheap_reorder (k, D.data(), I.data());
217
- } else {
218
- minheap_reorder (k, D.data(), I.data());
219
- }
220
-
221
- // check that we have the same results as the reference search
222
- for (int j = 0; j < k; j++) {
223
- EXPECT_EQ (I[j], ref_I[i * k + j]);
224
- }
225
- }
226
-
227
-
228
- }
229
-
230
- } // anonymous namespace
231
-
232
-
233
-
234
- /*************************************************************
235
- * Test entry points
236
- *************************************************************/
237
-
238
- TEST(TestLowLevelIVF, IVFFlatL2) {
239
- test_lowlevel_access ("IVF32,Flat", METRIC_L2);
240
- }
241
-
242
- TEST(TestLowLevelIVF, PCAIVFFlatL2) {
243
- test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
244
- }
245
-
246
- TEST(TestLowLevelIVF, IVFFlatIP) {
247
- test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
248
- }
249
-
250
- TEST(TestLowLevelIVF, IVFSQL2) {
251
- test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
252
- }
253
-
254
- TEST(TestLowLevelIVF, IVFSQIP) {
255
- test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
256
- }
257
-
258
-
259
- TEST(TestLowLevelIVF, IVFPQL2) {
260
- test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
261
- }
262
-
263
- TEST(TestLowLevelIVF, IVFPQIP) {
264
- test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
265
- }
266
-
267
-
268
- /*************************************************************
269
- * Same for binary (a bit simpler)
270
- *************************************************************/
271
-
272
- namespace {
273
-
274
- int nbit = 256;
275
-
276
- // here d is used the number of ints -> d=32 means 128 bits
277
-
278
- std::vector<uint8_t> make_data_binary(size_t n)
279
- {
280
-
281
- std::vector <uint8_t> database (n * nbit / 8);
282
- std::uniform_int_distribution<> distrib;
283
- for (size_t i = 0; i < n * d; i++) {
284
- database[i] = distrib(rng);
285
- }
286
- return database;
287
- }
288
-
289
- std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
290
- {
291
- auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
292
- nbit, index_type));
293
- auto xt = make_data_binary (nt);
294
- index->train(nt, xt.data());
295
- return index;
296
- }
297
-
298
-
299
- void test_lowlevel_access_binary (const char *index_key) {
300
- std::unique_ptr<IndexBinary> index =
301
- make_trained_index_binary (index_key);
302
-
303
- IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
304
- (index.get());
305
- assert (index_ivf);
306
-
307
- index_ivf->nprobe = 4;
308
-
309
- auto xb = make_data_binary (nb);
310
- index->add(nb, xb.data());
311
-
312
- std::vector<idx_t> list_nos (nb);
313
- index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
314
-
315
- /* For binary there is no test for encoding because binary vectors
316
- * are copied verbatim to the inverted lists */
317
-
318
- const InvertedLists *il = index_ivf->invlists;
319
-
320
- /** Test independent search
321
- *
322
- * Manually scans through inverted lists, computing distances and
323
- * ordering results organized in a heap.
324
- */
325
-
326
- // sample some example queries and get reference search results.
327
- auto xq = make_data_binary (nq);
328
-
329
- std::vector<idx_t> I_ref(k * nq);
330
- std::vector<int32_t> D_ref(k * nq);
331
- index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
332
-
333
- // quantize the queries to get the inverted list ids to visit.
334
- int nprobe = index_ivf->nprobe;
335
-
336
- std::vector<idx_t> q_lists (nq * nprobe);
337
- std::vector<int32_t> q_dis (nq * nprobe);
338
-
339
- // quantize queries
340
- index_ivf->quantizer->search (nq, xq.data(), nprobe,
341
- q_dis.data(), q_lists.data());
342
-
343
- // object that does the scanning and distance computations.
344
- std::unique_ptr<BinaryInvertedListScanner> scanner (
345
- index_ivf->get_InvertedListScanner());
346
-
347
- for (int i = 0; i < nq; i++) {
348
- std::vector<idx_t> I (k, -1);
349
- uint32_t default_dis = 1 << 30;
350
- std::vector<int32_t> D (k, default_dis);
351
-
352
- scanner->set_query (xq.data() + i * index_ivf->code_size);
353
-
354
- for (int j = 0; j < nprobe; j++) {
355
- int list_no = q_lists[i * nprobe + j];
356
- if (list_no < 0) continue;
357
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
358
-
359
- // here we get the inverted lists from the InvertedLists
360
- // object but they could come from anywhere
361
-
362
- scanner->scan_codes (
363
- il->list_size (list_no),
364
- InvertedLists::ScopedCodes(il, list_no).get(),
365
- InvertedLists::ScopedIds(il, list_no).get(),
366
- D.data(), I.data(), k);
367
-
368
- if (j == 0) {
369
- // all results so far come from list_no, so let's check if
370
- // the distance function works
371
- for (int jj = 0; jj < k; jj++) {
372
- int vno = I[jj];
373
- if (vno < 0) break; // heap is not full yet
374
-
375
- // we have the codes from the addition test
376
- float computed_D = scanner->distance_to_code (
377
- xb.data() + vno * il->code_size);
378
-
379
- EXPECT_EQ (computed_D, D[jj]);
380
- }
381
- }
382
- }
383
-
384
- printf("new before reroder: [");
385
- for (int j = 0; j < k; j++)
386
- printf("%" PRId64 ",%d ", I[j], D[j]);
387
- printf("]\n");
388
-
389
- // re-order heap
390
- heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
391
-
392
- printf("ref: [");
393
- for (int j = 0; j < k; j++)
394
- printf("%" PRId64 ",%d ", I_ref[j], D_ref[j]);
395
- printf("]\nnew: [");
396
- for (int j = 0; j < k; j++)
397
- printf("%" PRId64 ",%d ", I[j], D[j]);
398
- printf("]\n");
399
-
400
- // check that we have the same results as the reference search
401
- for (int j = 0; j < k; j++) {
402
- // here the order is not guaranteed to be the same
403
- // so we scan through ref results
404
- // EXPECT_EQ (I[j], I_ref[i * k + j]);
405
- EXPECT_LE (D[j], D_ref[i * k + k - 1]);
406
- if (D[j] < D_ref[i * k + k - 1]) {
407
- int j2 = 0;
408
- while (j2 < k) {
409
- if (I[j] == I_ref[i * k + j2]) break;
410
- j2++;
411
- }
412
- EXPECT_LT(j2, k); // it was found
413
- if (j2 < k) {
414
- EXPECT_EQ(D[j], D_ref[i * k + j2]);
415
- }
416
- }
417
-
418
- }
419
-
420
- }
421
-
422
-
423
- }
424
-
425
- } // anonymous namespace
426
-
427
-
428
- TEST(TestLowLevelIVF, IVFBinary) {
429
- test_lowlevel_access_binary ("BIVF32");
430
- }
431
-
432
-
433
- namespace {
434
-
435
- void test_threaded_search (const char *index_key, MetricType metric) {
436
- std::unique_ptr<Index> index = make_trained_index(index_key, metric);
437
-
438
- auto xb = make_data (nb);
439
- index->add(nb, xb.data());
440
-
441
- /** handle the case if we have a preprocessor */
442
-
443
- const IndexPreTransform *index_pt =
444
- dynamic_cast<const IndexPreTransform*> (index.get());
445
-
446
- int dt = index->d;
447
- const float * xbt = xb.data();
448
- std::unique_ptr<float []> del_xbt;
449
-
450
- if (index_pt) {
451
- dt = index_pt->index->d;
452
- xbt = index_pt->apply_chain (nb, xb.data());
453
- if (xbt != xb.data()) {
454
- del_xbt.reset((float*)xbt);
455
- }
456
- }
457
-
458
- IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
459
-
460
- /** Test independent search
461
- *
462
- * Manually scans through inverted lists, computing distances and
463
- * ordering results organized in a heap.
464
- */
465
-
466
- // sample some example queries and get reference search results.
467
- auto xq = make_data (nq);
468
- auto ref_I = search_index (index.get(), xq.data());
469
-
470
- // handle preprocessing
471
- const float * xqt = xq.data();
472
- std::unique_ptr<float []> del_xqt;
473
-
474
- if (index_pt) {
475
- xqt = index_pt->apply_chain (nq, xq.data());
476
- if (xqt != xq.data()) {
477
- del_xqt.reset((float*)xqt);
478
- }
479
- }
480
-
481
- // quantize the queries to get the inverted list ids to visit.
482
- int nprobe = index_ivf->nprobe;
483
-
484
- std::vector<idx_t> q_lists (nq * nprobe);
485
- std::vector<float> q_dis (nq * nprobe);
486
-
487
- index_ivf->quantizer->search (nq, xqt, nprobe,
488
- q_dis.data(), q_lists.data());
489
-
490
- // now run search in this many threads
491
- int nproc = 3;
492
-
493
-
494
- for (int i = 0; i < nq; i++) {
495
-
496
- // one result table per thread
497
- std::vector<idx_t> I (k * nproc, -1);
498
- float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
499
- std::vector<float> D (k * nproc, default_dis);
500
-
501
- auto search_function = [index_ivf, &I, &D, dt, i, nproc,
502
- xqt, nprobe, &q_dis, &q_lists]
503
- (int rank) {
504
- const InvertedLists *il = index_ivf->invlists;
505
-
506
- // object that does the scanning and distance computations.
507
- std::unique_ptr<InvertedListScanner> scanner (
508
- index_ivf->get_InvertedListScanner());
509
-
510
- idx_t *local_I = I.data() + rank * k;
511
- float *local_D = D.data() + rank * k;
512
-
513
- scanner->set_query (xqt + i * dt);
514
-
515
- for (int j = rank; j < nprobe; j += nproc) {
516
- int list_no = q_lists[i * nprobe + j];
517
- if (list_no < 0) continue;
518
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
519
-
520
- scanner->scan_codes (
521
- il->list_size (list_no),
522
- InvertedLists::ScopedCodes(il, list_no).get(),
523
- InvertedLists::ScopedIds(il, list_no).get(),
524
- local_D, local_I, k);
525
- }
526
- };
527
-
528
- // start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
529
- // thread rank takes care of inverted lists
530
- // rank, rank+nproc, rank+2*nproc,...
531
- std::vector<std::thread> threads;
532
- for (int rank = 0; rank < nproc; rank++) {
533
- threads.emplace_back(search_function, rank);
534
- }
535
-
536
- // join threads, merge heaps
537
- for (int rank = 0; rank < nproc; rank++) {
538
- threads[rank].join();
539
- if (rank == 0) continue; // nothing to merge
540
- // merge into first result
541
- if (metric == METRIC_L2) {
542
- maxheap_addn (k, D.data(), I.data(),
543
- D.data() + rank * k,
544
- I.data() + rank * k, k);
545
- } else {
546
- minheap_addn (k, D.data(), I.data(),
547
- D.data() + rank * k,
548
- I.data() + rank * k, k);
549
- }
550
- }
551
-
552
- // re-order heap
553
- if (metric == METRIC_L2) {
554
- maxheap_reorder (k, D.data(), I.data());
555
- } else {
556
- minheap_reorder (k, D.data(), I.data());
557
- }
558
-
559
- // check that we have the same results as the reference search
560
- for (int j = 0; j < k; j++) {
561
- EXPECT_EQ (I[j], ref_I[i * k + j]);
562
- }
563
- }
564
-
565
-
566
- }
567
-
568
- } // anonymous namepace
569
-
570
-
571
- TEST(TestLowLevelIVF, ThreadedSearch) {
572
- test_threaded_search ("IVF32,Flat", METRIC_L2);
573
- }