faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,573 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <cinttypes>
|
9
|
-
#include <cstdio>
|
10
|
-
#include <cstdlib>
|
11
|
-
|
12
|
-
#include <memory>
|
13
|
-
#include <vector>
|
14
|
-
#include <thread>
|
15
|
-
#include <random>
|
16
|
-
|
17
|
-
#include <gtest/gtest.h>
|
18
|
-
|
19
|
-
#include <faiss/IndexIVF.h>
|
20
|
-
#include <faiss/IndexBinaryIVF.h>
|
21
|
-
#include <faiss/IndexPreTransform.h>
|
22
|
-
#include <faiss/AutoTune.h>
|
23
|
-
#include <faiss/index_factory.h>
|
24
|
-
#include <faiss/index_io.h>
|
25
|
-
#include <faiss/IVFlib.h>
|
26
|
-
#include <faiss/VectorTransform.h>
|
27
|
-
|
28
|
-
|
29
|
-
using namespace faiss;
|
30
|
-
|
31
|
-
namespace {
|
32
|
-
|
33
|
-
typedef Index::idx_t idx_t;
|
34
|
-
|
35
|
-
|
36
|
-
// dimension of the vectors to index
|
37
|
-
int d = 32;
|
38
|
-
|
39
|
-
// nb of training vectors
|
40
|
-
size_t nt = 5000;
|
41
|
-
|
42
|
-
// size of the database points per window step
|
43
|
-
size_t nb = 1000;
|
44
|
-
|
45
|
-
// nb of queries
|
46
|
-
size_t nq = 200;
|
47
|
-
|
48
|
-
int k = 10;
|
49
|
-
|
50
|
-
std::mt19937 rng;
|
51
|
-
|
52
|
-
|
53
|
-
std::vector<float> make_data(size_t n)
|
54
|
-
{
|
55
|
-
std::vector <float> database (n * d);
|
56
|
-
std::uniform_real_distribution<> distrib;
|
57
|
-
for (size_t i = 0; i < n * d; i++) {
|
58
|
-
database[i] = distrib(rng);
|
59
|
-
}
|
60
|
-
return database;
|
61
|
-
}
|
62
|
-
|
63
|
-
std::unique_ptr<Index> make_trained_index(const char *index_type,
|
64
|
-
MetricType metric_type)
|
65
|
-
{
|
66
|
-
auto index = std::unique_ptr<Index>(index_factory(
|
67
|
-
d, index_type, metric_type));
|
68
|
-
auto xt = make_data(nt);
|
69
|
-
index->train(nt, xt.data());
|
70
|
-
ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
|
71
|
-
return index;
|
72
|
-
}
|
73
|
-
|
74
|
-
std::vector<idx_t> search_index(Index *index, const float *xq) {
|
75
|
-
std::vector<idx_t> I(k * nq);
|
76
|
-
std::vector<float> D(k * nq);
|
77
|
-
index->search (nq, xq, k, D.data(), I.data());
|
78
|
-
return I;
|
79
|
-
}
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
/*************************************************************
|
85
|
-
* Test functions for a given index type
|
86
|
-
*************************************************************/
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
void test_lowlevel_access (const char *index_key, MetricType metric) {
|
91
|
-
std::unique_ptr<Index> index = make_trained_index(index_key, metric);
|
92
|
-
|
93
|
-
auto xb = make_data (nb);
|
94
|
-
index->add(nb, xb.data());
|
95
|
-
|
96
|
-
/** handle the case if we have a preprocessor */
|
97
|
-
|
98
|
-
const IndexPreTransform *index_pt =
|
99
|
-
dynamic_cast<const IndexPreTransform*> (index.get());
|
100
|
-
|
101
|
-
int dt = index->d;
|
102
|
-
const float * xbt = xb.data();
|
103
|
-
std::unique_ptr<float []> del_xbt;
|
104
|
-
|
105
|
-
if (index_pt) {
|
106
|
-
dt = index_pt->index->d;
|
107
|
-
xbt = index_pt->apply_chain (nb, xb.data());
|
108
|
-
if (xbt != xb.data()) {
|
109
|
-
del_xbt.reset((float*)xbt);
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
|
114
|
-
|
115
|
-
/** Test independent encoding
|
116
|
-
*
|
117
|
-
* Makes it possible to do additions on a custom inverted list
|
118
|
-
* implementation. From a set of vectors, computes the inverted
|
119
|
-
* list ids + the codes corresponding to each vector.
|
120
|
-
*/
|
121
|
-
|
122
|
-
std::vector<idx_t> list_nos (nb);
|
123
|
-
std::vector<uint8_t> codes (index_ivf->code_size * nb);
|
124
|
-
index_ivf->quantizer->assign(nb, xbt, list_nos.data());
|
125
|
-
index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
|
126
|
-
|
127
|
-
// compare with normal IVF addition
|
128
|
-
|
129
|
-
const InvertedLists *il = index_ivf->invlists;
|
130
|
-
|
131
|
-
for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
|
132
|
-
InvertedLists::ScopedCodes ivf_codes (il, list_no);
|
133
|
-
InvertedLists::ScopedIds ivf_ids (il, list_no);
|
134
|
-
size_t list_size = il->list_size (list_no);
|
135
|
-
for (int i = 0; i < list_size; i++) {
|
136
|
-
const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
|
137
|
-
const uint8_t *new_code =
|
138
|
-
codes.data() + ivf_ids[i] * il->code_size;
|
139
|
-
EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
/** Test independent search
|
144
|
-
*
|
145
|
-
* Manually scans through inverted lists, computing distances and
|
146
|
-
* ordering results organized in a heap.
|
147
|
-
*/
|
148
|
-
|
149
|
-
// sample some example queries and get reference search results.
|
150
|
-
auto xq = make_data (nq);
|
151
|
-
auto ref_I = search_index (index.get(), xq.data());
|
152
|
-
|
153
|
-
// handle preprocessing
|
154
|
-
const float * xqt = xq.data();
|
155
|
-
std::unique_ptr<float []> del_xqt;
|
156
|
-
|
157
|
-
if (index_pt) {
|
158
|
-
xqt = index_pt->apply_chain (nq, xq.data());
|
159
|
-
if (xqt != xq.data()) {
|
160
|
-
del_xqt.reset((float*)xqt);
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
// quantize the queries to get the inverted list ids to visit.
|
165
|
-
int nprobe = index_ivf->nprobe;
|
166
|
-
|
167
|
-
std::vector<idx_t> q_lists (nq * nprobe);
|
168
|
-
std::vector<float> q_dis (nq * nprobe);
|
169
|
-
|
170
|
-
index_ivf->quantizer->search (nq, xqt, nprobe,
|
171
|
-
q_dis.data(), q_lists.data());
|
172
|
-
|
173
|
-
// object that does the scanning and distance computations.
|
174
|
-
std::unique_ptr<InvertedListScanner> scanner (
|
175
|
-
index_ivf->get_InvertedListScanner());
|
176
|
-
|
177
|
-
for (int i = 0; i < nq; i++) {
|
178
|
-
std::vector<idx_t> I (k, -1);
|
179
|
-
float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
|
180
|
-
std::vector<float> D (k, default_dis);
|
181
|
-
|
182
|
-
scanner->set_query (xqt + i * dt);
|
183
|
-
|
184
|
-
for (int j = 0; j < nprobe; j++) {
|
185
|
-
int list_no = q_lists[i * nprobe + j];
|
186
|
-
if (list_no < 0) continue;
|
187
|
-
scanner->set_list (list_no, q_dis[i * nprobe + j]);
|
188
|
-
|
189
|
-
// here we get the inverted lists from the InvertedLists
|
190
|
-
// object but they could come from anywhere
|
191
|
-
|
192
|
-
scanner->scan_codes (
|
193
|
-
il->list_size (list_no),
|
194
|
-
InvertedLists::ScopedCodes(il, list_no).get(),
|
195
|
-
InvertedLists::ScopedIds(il, list_no).get(),
|
196
|
-
D.data(), I.data(), k);
|
197
|
-
|
198
|
-
if (j == 0) {
|
199
|
-
// all results so far come from list_no, so let's check if
|
200
|
-
// the distance function works
|
201
|
-
for (int jj = 0; jj < k; jj++) {
|
202
|
-
int vno = I[jj];
|
203
|
-
if (vno < 0) break; // heap is not full yet
|
204
|
-
|
205
|
-
// we have the codes from the addition test
|
206
|
-
float computed_D = scanner->distance_to_code (
|
207
|
-
codes.data() + vno * il->code_size);
|
208
|
-
|
209
|
-
EXPECT_EQ (computed_D, D[jj]);
|
210
|
-
}
|
211
|
-
}
|
212
|
-
}
|
213
|
-
|
214
|
-
// re-order heap
|
215
|
-
if (metric == METRIC_L2) {
|
216
|
-
maxheap_reorder (k, D.data(), I.data());
|
217
|
-
} else {
|
218
|
-
minheap_reorder (k, D.data(), I.data());
|
219
|
-
}
|
220
|
-
|
221
|
-
// check that we have the same results as the reference search
|
222
|
-
for (int j = 0; j < k; j++) {
|
223
|
-
EXPECT_EQ (I[j], ref_I[i * k + j]);
|
224
|
-
}
|
225
|
-
}
|
226
|
-
|
227
|
-
|
228
|
-
}
|
229
|
-
|
230
|
-
} // anonymous namespace
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
/*************************************************************
|
235
|
-
* Test entry points
|
236
|
-
*************************************************************/
|
237
|
-
|
238
|
-
TEST(TestLowLevelIVF, IVFFlatL2) {
|
239
|
-
test_lowlevel_access ("IVF32,Flat", METRIC_L2);
|
240
|
-
}
|
241
|
-
|
242
|
-
TEST(TestLowLevelIVF, PCAIVFFlatL2) {
|
243
|
-
test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
|
244
|
-
}
|
245
|
-
|
246
|
-
TEST(TestLowLevelIVF, IVFFlatIP) {
|
247
|
-
test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
|
248
|
-
}
|
249
|
-
|
250
|
-
TEST(TestLowLevelIVF, IVFSQL2) {
|
251
|
-
test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
|
252
|
-
}
|
253
|
-
|
254
|
-
TEST(TestLowLevelIVF, IVFSQIP) {
|
255
|
-
test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
|
256
|
-
}
|
257
|
-
|
258
|
-
|
259
|
-
TEST(TestLowLevelIVF, IVFPQL2) {
|
260
|
-
test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
|
261
|
-
}
|
262
|
-
|
263
|
-
TEST(TestLowLevelIVF, IVFPQIP) {
|
264
|
-
test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
|
265
|
-
}
|
266
|
-
|
267
|
-
|
268
|
-
/*************************************************************
|
269
|
-
* Same for binary (a bit simpler)
|
270
|
-
*************************************************************/
|
271
|
-
|
272
|
-
namespace {
|
273
|
-
|
274
|
-
int nbit = 256;
|
275
|
-
|
276
|
-
// here d is used the number of ints -> d=32 means 128 bits
|
277
|
-
|
278
|
-
std::vector<uint8_t> make_data_binary(size_t n)
|
279
|
-
{
|
280
|
-
|
281
|
-
std::vector <uint8_t> database (n * nbit / 8);
|
282
|
-
std::uniform_int_distribution<> distrib;
|
283
|
-
for (size_t i = 0; i < n * d; i++) {
|
284
|
-
database[i] = distrib(rng);
|
285
|
-
}
|
286
|
-
return database;
|
287
|
-
}
|
288
|
-
|
289
|
-
std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
|
290
|
-
{
|
291
|
-
auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
|
292
|
-
nbit, index_type));
|
293
|
-
auto xt = make_data_binary (nt);
|
294
|
-
index->train(nt, xt.data());
|
295
|
-
return index;
|
296
|
-
}
|
297
|
-
|
298
|
-
|
299
|
-
void test_lowlevel_access_binary (const char *index_key) {
|
300
|
-
std::unique_ptr<IndexBinary> index =
|
301
|
-
make_trained_index_binary (index_key);
|
302
|
-
|
303
|
-
IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
|
304
|
-
(index.get());
|
305
|
-
assert (index_ivf);
|
306
|
-
|
307
|
-
index_ivf->nprobe = 4;
|
308
|
-
|
309
|
-
auto xb = make_data_binary (nb);
|
310
|
-
index->add(nb, xb.data());
|
311
|
-
|
312
|
-
std::vector<idx_t> list_nos (nb);
|
313
|
-
index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
|
314
|
-
|
315
|
-
/* For binary there is no test for encoding because binary vectors
|
316
|
-
* are copied verbatim to the inverted lists */
|
317
|
-
|
318
|
-
const InvertedLists *il = index_ivf->invlists;
|
319
|
-
|
320
|
-
/** Test independent search
|
321
|
-
*
|
322
|
-
* Manually scans through inverted lists, computing distances and
|
323
|
-
* ordering results organized in a heap.
|
324
|
-
*/
|
325
|
-
|
326
|
-
// sample some example queries and get reference search results.
|
327
|
-
auto xq = make_data_binary (nq);
|
328
|
-
|
329
|
-
std::vector<idx_t> I_ref(k * nq);
|
330
|
-
std::vector<int32_t> D_ref(k * nq);
|
331
|
-
index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
|
332
|
-
|
333
|
-
// quantize the queries to get the inverted list ids to visit.
|
334
|
-
int nprobe = index_ivf->nprobe;
|
335
|
-
|
336
|
-
std::vector<idx_t> q_lists (nq * nprobe);
|
337
|
-
std::vector<int32_t> q_dis (nq * nprobe);
|
338
|
-
|
339
|
-
// quantize queries
|
340
|
-
index_ivf->quantizer->search (nq, xq.data(), nprobe,
|
341
|
-
q_dis.data(), q_lists.data());
|
342
|
-
|
343
|
-
// object that does the scanning and distance computations.
|
344
|
-
std::unique_ptr<BinaryInvertedListScanner> scanner (
|
345
|
-
index_ivf->get_InvertedListScanner());
|
346
|
-
|
347
|
-
for (int i = 0; i < nq; i++) {
|
348
|
-
std::vector<idx_t> I (k, -1);
|
349
|
-
uint32_t default_dis = 1 << 30;
|
350
|
-
std::vector<int32_t> D (k, default_dis);
|
351
|
-
|
352
|
-
scanner->set_query (xq.data() + i * index_ivf->code_size);
|
353
|
-
|
354
|
-
for (int j = 0; j < nprobe; j++) {
|
355
|
-
int list_no = q_lists[i * nprobe + j];
|
356
|
-
if (list_no < 0) continue;
|
357
|
-
scanner->set_list (list_no, q_dis[i * nprobe + j]);
|
358
|
-
|
359
|
-
// here we get the inverted lists from the InvertedLists
|
360
|
-
// object but they could come from anywhere
|
361
|
-
|
362
|
-
scanner->scan_codes (
|
363
|
-
il->list_size (list_no),
|
364
|
-
InvertedLists::ScopedCodes(il, list_no).get(),
|
365
|
-
InvertedLists::ScopedIds(il, list_no).get(),
|
366
|
-
D.data(), I.data(), k);
|
367
|
-
|
368
|
-
if (j == 0) {
|
369
|
-
// all results so far come from list_no, so let's check if
|
370
|
-
// the distance function works
|
371
|
-
for (int jj = 0; jj < k; jj++) {
|
372
|
-
int vno = I[jj];
|
373
|
-
if (vno < 0) break; // heap is not full yet
|
374
|
-
|
375
|
-
// we have the codes from the addition test
|
376
|
-
float computed_D = scanner->distance_to_code (
|
377
|
-
xb.data() + vno * il->code_size);
|
378
|
-
|
379
|
-
EXPECT_EQ (computed_D, D[jj]);
|
380
|
-
}
|
381
|
-
}
|
382
|
-
}
|
383
|
-
|
384
|
-
printf("new before reroder: [");
|
385
|
-
for (int j = 0; j < k; j++)
|
386
|
-
printf("%" PRId64 ",%d ", I[j], D[j]);
|
387
|
-
printf("]\n");
|
388
|
-
|
389
|
-
// re-order heap
|
390
|
-
heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
|
391
|
-
|
392
|
-
printf("ref: [");
|
393
|
-
for (int j = 0; j < k; j++)
|
394
|
-
printf("%" PRId64 ",%d ", I_ref[j], D_ref[j]);
|
395
|
-
printf("]\nnew: [");
|
396
|
-
for (int j = 0; j < k; j++)
|
397
|
-
printf("%" PRId64 ",%d ", I[j], D[j]);
|
398
|
-
printf("]\n");
|
399
|
-
|
400
|
-
// check that we have the same results as the reference search
|
401
|
-
for (int j = 0; j < k; j++) {
|
402
|
-
// here the order is not guaranteed to be the same
|
403
|
-
// so we scan through ref results
|
404
|
-
// EXPECT_EQ (I[j], I_ref[i * k + j]);
|
405
|
-
EXPECT_LE (D[j], D_ref[i * k + k - 1]);
|
406
|
-
if (D[j] < D_ref[i * k + k - 1]) {
|
407
|
-
int j2 = 0;
|
408
|
-
while (j2 < k) {
|
409
|
-
if (I[j] == I_ref[i * k + j2]) break;
|
410
|
-
j2++;
|
411
|
-
}
|
412
|
-
EXPECT_LT(j2, k); // it was found
|
413
|
-
if (j2 < k) {
|
414
|
-
EXPECT_EQ(D[j], D_ref[i * k + j2]);
|
415
|
-
}
|
416
|
-
}
|
417
|
-
|
418
|
-
}
|
419
|
-
|
420
|
-
}
|
421
|
-
|
422
|
-
|
423
|
-
}
|
424
|
-
|
425
|
-
} // anonymous namespace
|
426
|
-
|
427
|
-
|
428
|
-
TEST(TestLowLevelIVF, IVFBinary) {
|
429
|
-
test_lowlevel_access_binary ("BIVF32");
|
430
|
-
}
|
431
|
-
|
432
|
-
|
433
|
-
namespace {
|
434
|
-
|
435
|
-
void test_threaded_search (const char *index_key, MetricType metric) {
|
436
|
-
std::unique_ptr<Index> index = make_trained_index(index_key, metric);
|
437
|
-
|
438
|
-
auto xb = make_data (nb);
|
439
|
-
index->add(nb, xb.data());
|
440
|
-
|
441
|
-
/** handle the case if we have a preprocessor */
|
442
|
-
|
443
|
-
const IndexPreTransform *index_pt =
|
444
|
-
dynamic_cast<const IndexPreTransform*> (index.get());
|
445
|
-
|
446
|
-
int dt = index->d;
|
447
|
-
const float * xbt = xb.data();
|
448
|
-
std::unique_ptr<float []> del_xbt;
|
449
|
-
|
450
|
-
if (index_pt) {
|
451
|
-
dt = index_pt->index->d;
|
452
|
-
xbt = index_pt->apply_chain (nb, xb.data());
|
453
|
-
if (xbt != xb.data()) {
|
454
|
-
del_xbt.reset((float*)xbt);
|
455
|
-
}
|
456
|
-
}
|
457
|
-
|
458
|
-
IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
|
459
|
-
|
460
|
-
/** Test independent search
|
461
|
-
*
|
462
|
-
* Manually scans through inverted lists, computing distances and
|
463
|
-
* ordering results organized in a heap.
|
464
|
-
*/
|
465
|
-
|
466
|
-
// sample some example queries and get reference search results.
|
467
|
-
auto xq = make_data (nq);
|
468
|
-
auto ref_I = search_index (index.get(), xq.data());
|
469
|
-
|
470
|
-
// handle preprocessing
|
471
|
-
const float * xqt = xq.data();
|
472
|
-
std::unique_ptr<float []> del_xqt;
|
473
|
-
|
474
|
-
if (index_pt) {
|
475
|
-
xqt = index_pt->apply_chain (nq, xq.data());
|
476
|
-
if (xqt != xq.data()) {
|
477
|
-
del_xqt.reset((float*)xqt);
|
478
|
-
}
|
479
|
-
}
|
480
|
-
|
481
|
-
// quantize the queries to get the inverted list ids to visit.
|
482
|
-
int nprobe = index_ivf->nprobe;
|
483
|
-
|
484
|
-
std::vector<idx_t> q_lists (nq * nprobe);
|
485
|
-
std::vector<float> q_dis (nq * nprobe);
|
486
|
-
|
487
|
-
index_ivf->quantizer->search (nq, xqt, nprobe,
|
488
|
-
q_dis.data(), q_lists.data());
|
489
|
-
|
490
|
-
// now run search in this many threads
|
491
|
-
int nproc = 3;
|
492
|
-
|
493
|
-
|
494
|
-
for (int i = 0; i < nq; i++) {
|
495
|
-
|
496
|
-
// one result table per thread
|
497
|
-
std::vector<idx_t> I (k * nproc, -1);
|
498
|
-
float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
|
499
|
-
std::vector<float> D (k * nproc, default_dis);
|
500
|
-
|
501
|
-
auto search_function = [index_ivf, &I, &D, dt, i, nproc,
|
502
|
-
xqt, nprobe, &q_dis, &q_lists]
|
503
|
-
(int rank) {
|
504
|
-
const InvertedLists *il = index_ivf->invlists;
|
505
|
-
|
506
|
-
// object that does the scanning and distance computations.
|
507
|
-
std::unique_ptr<InvertedListScanner> scanner (
|
508
|
-
index_ivf->get_InvertedListScanner());
|
509
|
-
|
510
|
-
idx_t *local_I = I.data() + rank * k;
|
511
|
-
float *local_D = D.data() + rank * k;
|
512
|
-
|
513
|
-
scanner->set_query (xqt + i * dt);
|
514
|
-
|
515
|
-
for (int j = rank; j < nprobe; j += nproc) {
|
516
|
-
int list_no = q_lists[i * nprobe + j];
|
517
|
-
if (list_no < 0) continue;
|
518
|
-
scanner->set_list (list_no, q_dis[i * nprobe + j]);
|
519
|
-
|
520
|
-
scanner->scan_codes (
|
521
|
-
il->list_size (list_no),
|
522
|
-
InvertedLists::ScopedCodes(il, list_no).get(),
|
523
|
-
InvertedLists::ScopedIds(il, list_no).get(),
|
524
|
-
local_D, local_I, k);
|
525
|
-
}
|
526
|
-
};
|
527
|
-
|
528
|
-
// start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
|
529
|
-
// thread rank takes care of inverted lists
|
530
|
-
// rank, rank+nproc, rank+2*nproc,...
|
531
|
-
std::vector<std::thread> threads;
|
532
|
-
for (int rank = 0; rank < nproc; rank++) {
|
533
|
-
threads.emplace_back(search_function, rank);
|
534
|
-
}
|
535
|
-
|
536
|
-
// join threads, merge heaps
|
537
|
-
for (int rank = 0; rank < nproc; rank++) {
|
538
|
-
threads[rank].join();
|
539
|
-
if (rank == 0) continue; // nothing to merge
|
540
|
-
// merge into first result
|
541
|
-
if (metric == METRIC_L2) {
|
542
|
-
maxheap_addn (k, D.data(), I.data(),
|
543
|
-
D.data() + rank * k,
|
544
|
-
I.data() + rank * k, k);
|
545
|
-
} else {
|
546
|
-
minheap_addn (k, D.data(), I.data(),
|
547
|
-
D.data() + rank * k,
|
548
|
-
I.data() + rank * k, k);
|
549
|
-
}
|
550
|
-
}
|
551
|
-
|
552
|
-
// re-order heap
|
553
|
-
if (metric == METRIC_L2) {
|
554
|
-
maxheap_reorder (k, D.data(), I.data());
|
555
|
-
} else {
|
556
|
-
minheap_reorder (k, D.data(), I.data());
|
557
|
-
}
|
558
|
-
|
559
|
-
// check that we have the same results as the reference search
|
560
|
-
for (int j = 0; j < k; j++) {
|
561
|
-
EXPECT_EQ (I[j], ref_I[i * k + j]);
|
562
|
-
}
|
563
|
-
}
|
564
|
-
|
565
|
-
|
566
|
-
}
|
567
|
-
|
568
|
-
} // anonymous namepace
|
569
|
-
|
570
|
-
|
571
|
-
TEST(TestLowLevelIVF, ThreadedSearch) {
|
572
|
-
test_threaded_search ("IVF32,Flat", METRIC_L2);
|
573
|
-
}
|