faiss 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,573 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- #include <cinttypes>
9
- #include <cstdio>
10
- #include <cstdlib>
11
-
12
- #include <memory>
13
- #include <vector>
14
- #include <thread>
15
- #include <random>
16
-
17
- #include <gtest/gtest.h>
18
-
19
- #include <faiss/IndexIVF.h>
20
- #include <faiss/IndexBinaryIVF.h>
21
- #include <faiss/IndexPreTransform.h>
22
- #include <faiss/AutoTune.h>
23
- #include <faiss/index_factory.h>
24
- #include <faiss/index_io.h>
25
- #include <faiss/IVFlib.h>
26
- #include <faiss/VectorTransform.h>
27
-
28
-
29
- using namespace faiss;
30
-
31
- namespace {
32
-
33
- typedef Index::idx_t idx_t;
34
-
35
-
36
- // dimension of the vectors to index
37
- int d = 32;
38
-
39
- // nb of training vectors
40
- size_t nt = 5000;
41
-
42
- // size of the database points per window step
43
- size_t nb = 1000;
44
-
45
- // nb of queries
46
- size_t nq = 200;
47
-
48
- int k = 10;
49
-
50
- std::mt19937 rng;
51
-
52
-
53
- std::vector<float> make_data(size_t n)
54
- {
55
- std::vector <float> database (n * d);
56
- std::uniform_real_distribution<> distrib;
57
- for (size_t i = 0; i < n * d; i++) {
58
- database[i] = distrib(rng);
59
- }
60
- return database;
61
- }
62
-
63
- std::unique_ptr<Index> make_trained_index(const char *index_type,
64
- MetricType metric_type)
65
- {
66
- auto index = std::unique_ptr<Index>(index_factory(
67
- d, index_type, metric_type));
68
- auto xt = make_data(nt);
69
- index->train(nt, xt.data());
70
- ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
71
- return index;
72
- }
73
-
74
- std::vector<idx_t> search_index(Index *index, const float *xq) {
75
- std::vector<idx_t> I(k * nq);
76
- std::vector<float> D(k * nq);
77
- index->search (nq, xq, k, D.data(), I.data());
78
- return I;
79
- }
80
-
81
-
82
-
83
-
84
- /*************************************************************
85
- * Test functions for a given index type
86
- *************************************************************/
87
-
88
-
89
-
90
- void test_lowlevel_access (const char *index_key, MetricType metric) {
91
- std::unique_ptr<Index> index = make_trained_index(index_key, metric);
92
-
93
- auto xb = make_data (nb);
94
- index->add(nb, xb.data());
95
-
96
- /** handle the case if we have a preprocessor */
97
-
98
- const IndexPreTransform *index_pt =
99
- dynamic_cast<const IndexPreTransform*> (index.get());
100
-
101
- int dt = index->d;
102
- const float * xbt = xb.data();
103
- std::unique_ptr<float []> del_xbt;
104
-
105
- if (index_pt) {
106
- dt = index_pt->index->d;
107
- xbt = index_pt->apply_chain (nb, xb.data());
108
- if (xbt != xb.data()) {
109
- del_xbt.reset((float*)xbt);
110
- }
111
- }
112
-
113
- IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
114
-
115
- /** Test independent encoding
116
- *
117
- * Makes it possible to do additions on a custom inverted list
118
- * implementation. From a set of vectors, computes the inverted
119
- * list ids + the codes corresponding to each vector.
120
- */
121
-
122
- std::vector<idx_t> list_nos (nb);
123
- std::vector<uint8_t> codes (index_ivf->code_size * nb);
124
- index_ivf->quantizer->assign(nb, xbt, list_nos.data());
125
- index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
126
-
127
- // compare with normal IVF addition
128
-
129
- const InvertedLists *il = index_ivf->invlists;
130
-
131
- for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
132
- InvertedLists::ScopedCodes ivf_codes (il, list_no);
133
- InvertedLists::ScopedIds ivf_ids (il, list_no);
134
- size_t list_size = il->list_size (list_no);
135
- for (int i = 0; i < list_size; i++) {
136
- const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
137
- const uint8_t *new_code =
138
- codes.data() + ivf_ids[i] * il->code_size;
139
- EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
140
- }
141
- }
142
-
143
- /** Test independent search
144
- *
145
- * Manually scans through inverted lists, computing distances and
146
- * ordering results organized in a heap.
147
- */
148
-
149
- // sample some example queries and get reference search results.
150
- auto xq = make_data (nq);
151
- auto ref_I = search_index (index.get(), xq.data());
152
-
153
- // handle preprocessing
154
- const float * xqt = xq.data();
155
- std::unique_ptr<float []> del_xqt;
156
-
157
- if (index_pt) {
158
- xqt = index_pt->apply_chain (nq, xq.data());
159
- if (xqt != xq.data()) {
160
- del_xqt.reset((float*)xqt);
161
- }
162
- }
163
-
164
- // quantize the queries to get the inverted list ids to visit.
165
- int nprobe = index_ivf->nprobe;
166
-
167
- std::vector<idx_t> q_lists (nq * nprobe);
168
- std::vector<float> q_dis (nq * nprobe);
169
-
170
- index_ivf->quantizer->search (nq, xqt, nprobe,
171
- q_dis.data(), q_lists.data());
172
-
173
- // object that does the scanning and distance computations.
174
- std::unique_ptr<InvertedListScanner> scanner (
175
- index_ivf->get_InvertedListScanner());
176
-
177
- for (int i = 0; i < nq; i++) {
178
- std::vector<idx_t> I (k, -1);
179
- float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
180
- std::vector<float> D (k, default_dis);
181
-
182
- scanner->set_query (xqt + i * dt);
183
-
184
- for (int j = 0; j < nprobe; j++) {
185
- int list_no = q_lists[i * nprobe + j];
186
- if (list_no < 0) continue;
187
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
188
-
189
- // here we get the inverted lists from the InvertedLists
190
- // object but they could come from anywhere
191
-
192
- scanner->scan_codes (
193
- il->list_size (list_no),
194
- InvertedLists::ScopedCodes(il, list_no).get(),
195
- InvertedLists::ScopedIds(il, list_no).get(),
196
- D.data(), I.data(), k);
197
-
198
- if (j == 0) {
199
- // all results so far come from list_no, so let's check if
200
- // the distance function works
201
- for (int jj = 0; jj < k; jj++) {
202
- int vno = I[jj];
203
- if (vno < 0) break; // heap is not full yet
204
-
205
- // we have the codes from the addition test
206
- float computed_D = scanner->distance_to_code (
207
- codes.data() + vno * il->code_size);
208
-
209
- EXPECT_EQ (computed_D, D[jj]);
210
- }
211
- }
212
- }
213
-
214
- // re-order heap
215
- if (metric == METRIC_L2) {
216
- maxheap_reorder (k, D.data(), I.data());
217
- } else {
218
- minheap_reorder (k, D.data(), I.data());
219
- }
220
-
221
- // check that we have the same results as the reference search
222
- for (int j = 0; j < k; j++) {
223
- EXPECT_EQ (I[j], ref_I[i * k + j]);
224
- }
225
- }
226
-
227
-
228
- }
229
-
230
- } // anonymous namespace
231
-
232
-
233
-
234
- /*************************************************************
235
- * Test entry points
236
- *************************************************************/
237
-
238
- TEST(TestLowLevelIVF, IVFFlatL2) {
239
- test_lowlevel_access ("IVF32,Flat", METRIC_L2);
240
- }
241
-
242
- TEST(TestLowLevelIVF, PCAIVFFlatL2) {
243
- test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
244
- }
245
-
246
- TEST(TestLowLevelIVF, IVFFlatIP) {
247
- test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
248
- }
249
-
250
- TEST(TestLowLevelIVF, IVFSQL2) {
251
- test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
252
- }
253
-
254
- TEST(TestLowLevelIVF, IVFSQIP) {
255
- test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
256
- }
257
-
258
-
259
- TEST(TestLowLevelIVF, IVFPQL2) {
260
- test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
261
- }
262
-
263
- TEST(TestLowLevelIVF, IVFPQIP) {
264
- test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
265
- }
266
-
267
-
268
- /*************************************************************
269
- * Same for binary (a bit simpler)
270
- *************************************************************/
271
-
272
- namespace {
273
-
274
- int nbit = 256;
275
-
276
- // here d is used the number of ints -> d=32 means 128 bits
277
-
278
- std::vector<uint8_t> make_data_binary(size_t n)
279
- {
280
-
281
- std::vector <uint8_t> database (n * nbit / 8);
282
- std::uniform_int_distribution<> distrib;
283
- for (size_t i = 0; i < n * d; i++) {
284
- database[i] = distrib(rng);
285
- }
286
- return database;
287
- }
288
-
289
- std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
290
- {
291
- auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
292
- nbit, index_type));
293
- auto xt = make_data_binary (nt);
294
- index->train(nt, xt.data());
295
- return index;
296
- }
297
-
298
-
299
- void test_lowlevel_access_binary (const char *index_key) {
300
- std::unique_ptr<IndexBinary> index =
301
- make_trained_index_binary (index_key);
302
-
303
- IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
304
- (index.get());
305
- assert (index_ivf);
306
-
307
- index_ivf->nprobe = 4;
308
-
309
- auto xb = make_data_binary (nb);
310
- index->add(nb, xb.data());
311
-
312
- std::vector<idx_t> list_nos (nb);
313
- index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
314
-
315
- /* For binary there is no test for encoding because binary vectors
316
- * are copied verbatim to the inverted lists */
317
-
318
- const InvertedLists *il = index_ivf->invlists;
319
-
320
- /** Test independent search
321
- *
322
- * Manually scans through inverted lists, computing distances and
323
- * ordering results organized in a heap.
324
- */
325
-
326
- // sample some example queries and get reference search results.
327
- auto xq = make_data_binary (nq);
328
-
329
- std::vector<idx_t> I_ref(k * nq);
330
- std::vector<int32_t> D_ref(k * nq);
331
- index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
332
-
333
- // quantize the queries to get the inverted list ids to visit.
334
- int nprobe = index_ivf->nprobe;
335
-
336
- std::vector<idx_t> q_lists (nq * nprobe);
337
- std::vector<int32_t> q_dis (nq * nprobe);
338
-
339
- // quantize queries
340
- index_ivf->quantizer->search (nq, xq.data(), nprobe,
341
- q_dis.data(), q_lists.data());
342
-
343
- // object that does the scanning and distance computations.
344
- std::unique_ptr<BinaryInvertedListScanner> scanner (
345
- index_ivf->get_InvertedListScanner());
346
-
347
- for (int i = 0; i < nq; i++) {
348
- std::vector<idx_t> I (k, -1);
349
- uint32_t default_dis = 1 << 30;
350
- std::vector<int32_t> D (k, default_dis);
351
-
352
- scanner->set_query (xq.data() + i * index_ivf->code_size);
353
-
354
- for (int j = 0; j < nprobe; j++) {
355
- int list_no = q_lists[i * nprobe + j];
356
- if (list_no < 0) continue;
357
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
358
-
359
- // here we get the inverted lists from the InvertedLists
360
- // object but they could come from anywhere
361
-
362
- scanner->scan_codes (
363
- il->list_size (list_no),
364
- InvertedLists::ScopedCodes(il, list_no).get(),
365
- InvertedLists::ScopedIds(il, list_no).get(),
366
- D.data(), I.data(), k);
367
-
368
- if (j == 0) {
369
- // all results so far come from list_no, so let's check if
370
- // the distance function works
371
- for (int jj = 0; jj < k; jj++) {
372
- int vno = I[jj];
373
- if (vno < 0) break; // heap is not full yet
374
-
375
- // we have the codes from the addition test
376
- float computed_D = scanner->distance_to_code (
377
- xb.data() + vno * il->code_size);
378
-
379
- EXPECT_EQ (computed_D, D[jj]);
380
- }
381
- }
382
- }
383
-
384
- printf("new before reroder: [");
385
- for (int j = 0; j < k; j++)
386
- printf("%" PRId64 ",%d ", I[j], D[j]);
387
- printf("]\n");
388
-
389
- // re-order heap
390
- heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
391
-
392
- printf("ref: [");
393
- for (int j = 0; j < k; j++)
394
- printf("%" PRId64 ",%d ", I_ref[j], D_ref[j]);
395
- printf("]\nnew: [");
396
- for (int j = 0; j < k; j++)
397
- printf("%" PRId64 ",%d ", I[j], D[j]);
398
- printf("]\n");
399
-
400
- // check that we have the same results as the reference search
401
- for (int j = 0; j < k; j++) {
402
- // here the order is not guaranteed to be the same
403
- // so we scan through ref results
404
- // EXPECT_EQ (I[j], I_ref[i * k + j]);
405
- EXPECT_LE (D[j], D_ref[i * k + k - 1]);
406
- if (D[j] < D_ref[i * k + k - 1]) {
407
- int j2 = 0;
408
- while (j2 < k) {
409
- if (I[j] == I_ref[i * k + j2]) break;
410
- j2++;
411
- }
412
- EXPECT_LT(j2, k); // it was found
413
- if (j2 < k) {
414
- EXPECT_EQ(D[j], D_ref[i * k + j2]);
415
- }
416
- }
417
-
418
- }
419
-
420
- }
421
-
422
-
423
- }
424
-
425
- } // anonymous namespace
426
-
427
-
428
- TEST(TestLowLevelIVF, IVFBinary) {
429
- test_lowlevel_access_binary ("BIVF32");
430
- }
431
-
432
-
433
- namespace {
434
-
435
- void test_threaded_search (const char *index_key, MetricType metric) {
436
- std::unique_ptr<Index> index = make_trained_index(index_key, metric);
437
-
438
- auto xb = make_data (nb);
439
- index->add(nb, xb.data());
440
-
441
- /** handle the case if we have a preprocessor */
442
-
443
- const IndexPreTransform *index_pt =
444
- dynamic_cast<const IndexPreTransform*> (index.get());
445
-
446
- int dt = index->d;
447
- const float * xbt = xb.data();
448
- std::unique_ptr<float []> del_xbt;
449
-
450
- if (index_pt) {
451
- dt = index_pt->index->d;
452
- xbt = index_pt->apply_chain (nb, xb.data());
453
- if (xbt != xb.data()) {
454
- del_xbt.reset((float*)xbt);
455
- }
456
- }
457
-
458
- IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
459
-
460
- /** Test independent search
461
- *
462
- * Manually scans through inverted lists, computing distances and
463
- * ordering results organized in a heap.
464
- */
465
-
466
- // sample some example queries and get reference search results.
467
- auto xq = make_data (nq);
468
- auto ref_I = search_index (index.get(), xq.data());
469
-
470
- // handle preprocessing
471
- const float * xqt = xq.data();
472
- std::unique_ptr<float []> del_xqt;
473
-
474
- if (index_pt) {
475
- xqt = index_pt->apply_chain (nq, xq.data());
476
- if (xqt != xq.data()) {
477
- del_xqt.reset((float*)xqt);
478
- }
479
- }
480
-
481
- // quantize the queries to get the inverted list ids to visit.
482
- int nprobe = index_ivf->nprobe;
483
-
484
- std::vector<idx_t> q_lists (nq * nprobe);
485
- std::vector<float> q_dis (nq * nprobe);
486
-
487
- index_ivf->quantizer->search (nq, xqt, nprobe,
488
- q_dis.data(), q_lists.data());
489
-
490
- // now run search in this many threads
491
- int nproc = 3;
492
-
493
-
494
- for (int i = 0; i < nq; i++) {
495
-
496
- // one result table per thread
497
- std::vector<idx_t> I (k * nproc, -1);
498
- float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
499
- std::vector<float> D (k * nproc, default_dis);
500
-
501
- auto search_function = [index_ivf, &I, &D, dt, i, nproc,
502
- xqt, nprobe, &q_dis, &q_lists]
503
- (int rank) {
504
- const InvertedLists *il = index_ivf->invlists;
505
-
506
- // object that does the scanning and distance computations.
507
- std::unique_ptr<InvertedListScanner> scanner (
508
- index_ivf->get_InvertedListScanner());
509
-
510
- idx_t *local_I = I.data() + rank * k;
511
- float *local_D = D.data() + rank * k;
512
-
513
- scanner->set_query (xqt + i * dt);
514
-
515
- for (int j = rank; j < nprobe; j += nproc) {
516
- int list_no = q_lists[i * nprobe + j];
517
- if (list_no < 0) continue;
518
- scanner->set_list (list_no, q_dis[i * nprobe + j]);
519
-
520
- scanner->scan_codes (
521
- il->list_size (list_no),
522
- InvertedLists::ScopedCodes(il, list_no).get(),
523
- InvertedLists::ScopedIds(il, list_no).get(),
524
- local_D, local_I, k);
525
- }
526
- };
527
-
528
- // start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
529
- // thread rank takes care of inverted lists
530
- // rank, rank+nproc, rank+2*nproc,...
531
- std::vector<std::thread> threads;
532
- for (int rank = 0; rank < nproc; rank++) {
533
- threads.emplace_back(search_function, rank);
534
- }
535
-
536
- // join threads, merge heaps
537
- for (int rank = 0; rank < nproc; rank++) {
538
- threads[rank].join();
539
- if (rank == 0) continue; // nothing to merge
540
- // merge into first result
541
- if (metric == METRIC_L2) {
542
- maxheap_addn (k, D.data(), I.data(),
543
- D.data() + rank * k,
544
- I.data() + rank * k, k);
545
- } else {
546
- minheap_addn (k, D.data(), I.data(),
547
- D.data() + rank * k,
548
- I.data() + rank * k, k);
549
- }
550
- }
551
-
552
- // re-order heap
553
- if (metric == METRIC_L2) {
554
- maxheap_reorder (k, D.data(), I.data());
555
- } else {
556
- minheap_reorder (k, D.data(), I.data());
557
- }
558
-
559
- // check that we have the same results as the reference search
560
- for (int j = 0; j < k; j++) {
561
- EXPECT_EQ (I[j], ref_I[i * k + j]);
562
- }
563
- }
564
-
565
-
566
- }
567
-
568
- } // anonymous namepace
569
-
570
-
571
- TEST(TestLowLevelIVF, ThreadedSearch) {
572
- test_threaded_search ("IVF32,Flat", METRIC_L2);
573
- }