faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,573 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <cinttypes>
|
9
|
-
#include <cstdio>
|
10
|
-
#include <cstdlib>
|
11
|
-
|
12
|
-
#include <memory>
|
13
|
-
#include <vector>
|
14
|
-
#include <thread>
|
15
|
-
#include <random>
|
16
|
-
|
17
|
-
#include <gtest/gtest.h>
|
18
|
-
|
19
|
-
#include <faiss/IndexIVF.h>
|
20
|
-
#include <faiss/IndexBinaryIVF.h>
|
21
|
-
#include <faiss/IndexPreTransform.h>
|
22
|
-
#include <faiss/AutoTune.h>
|
23
|
-
#include <faiss/index_factory.h>
|
24
|
-
#include <faiss/index_io.h>
|
25
|
-
#include <faiss/IVFlib.h>
|
26
|
-
#include <faiss/VectorTransform.h>
|
27
|
-
|
28
|
-
|
29
|
-
using namespace faiss;
|
30
|
-
|
31
|
-
namespace {
|
32
|
-
|
33
|
-
typedef Index::idx_t idx_t;
|
34
|
-
|
35
|
-
|
36
|
-
// dimension of the vectors to index
|
37
|
-
int d = 32;
|
38
|
-
|
39
|
-
// nb of training vectors
|
40
|
-
size_t nt = 5000;
|
41
|
-
|
42
|
-
// size of the database points per window step
|
43
|
-
size_t nb = 1000;
|
44
|
-
|
45
|
-
// nb of queries
|
46
|
-
size_t nq = 200;
|
47
|
-
|
48
|
-
int k = 10;
|
49
|
-
|
50
|
-
std::mt19937 rng;
|
51
|
-
|
52
|
-
|
53
|
-
std::vector<float> make_data(size_t n)
|
54
|
-
{
|
55
|
-
std::vector <float> database (n * d);
|
56
|
-
std::uniform_real_distribution<> distrib;
|
57
|
-
for (size_t i = 0; i < n * d; i++) {
|
58
|
-
database[i] = distrib(rng);
|
59
|
-
}
|
60
|
-
return database;
|
61
|
-
}
|
62
|
-
|
63
|
-
std::unique_ptr<Index> make_trained_index(const char *index_type,
|
64
|
-
MetricType metric_type)
|
65
|
-
{
|
66
|
-
auto index = std::unique_ptr<Index>(index_factory(
|
67
|
-
d, index_type, metric_type));
|
68
|
-
auto xt = make_data(nt);
|
69
|
-
index->train(nt, xt.data());
|
70
|
-
ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
|
71
|
-
return index;
|
72
|
-
}
|
73
|
-
|
74
|
-
std::vector<idx_t> search_index(Index *index, const float *xq) {
|
75
|
-
std::vector<idx_t> I(k * nq);
|
76
|
-
std::vector<float> D(k * nq);
|
77
|
-
index->search (nq, xq, k, D.data(), I.data());
|
78
|
-
return I;
|
79
|
-
}
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
/*************************************************************
|
85
|
-
* Test functions for a given index type
|
86
|
-
*************************************************************/
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
void test_lowlevel_access (const char *index_key, MetricType metric) {
|
91
|
-
std::unique_ptr<Index> index = make_trained_index(index_key, metric);
|
92
|
-
|
93
|
-
auto xb = make_data (nb);
|
94
|
-
index->add(nb, xb.data());
|
95
|
-
|
96
|
-
/** handle the case if we have a preprocessor */
|
97
|
-
|
98
|
-
const IndexPreTransform *index_pt =
|
99
|
-
dynamic_cast<const IndexPreTransform*> (index.get());
|
100
|
-
|
101
|
-
int dt = index->d;
|
102
|
-
const float * xbt = xb.data();
|
103
|
-
std::unique_ptr<float []> del_xbt;
|
104
|
-
|
105
|
-
if (index_pt) {
|
106
|
-
dt = index_pt->index->d;
|
107
|
-
xbt = index_pt->apply_chain (nb, xb.data());
|
108
|
-
if (xbt != xb.data()) {
|
109
|
-
del_xbt.reset((float*)xbt);
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
|
114
|
-
|
115
|
-
/** Test independent encoding
|
116
|
-
*
|
117
|
-
* Makes it possible to do additions on a custom inverted list
|
118
|
-
* implementation. From a set of vectors, computes the inverted
|
119
|
-
* list ids + the codes corresponding to each vector.
|
120
|
-
*/
|
121
|
-
|
122
|
-
std::vector<idx_t> list_nos (nb);
|
123
|
-
std::vector<uint8_t> codes (index_ivf->code_size * nb);
|
124
|
-
index_ivf->quantizer->assign(nb, xbt, list_nos.data());
|
125
|
-
index_ivf->encode_vectors (nb, xbt, list_nos.data(), codes.data());
|
126
|
-
|
127
|
-
// compare with normal IVF addition
|
128
|
-
|
129
|
-
const InvertedLists *il = index_ivf->invlists;
|
130
|
-
|
131
|
-
for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
|
132
|
-
InvertedLists::ScopedCodes ivf_codes (il, list_no);
|
133
|
-
InvertedLists::ScopedIds ivf_ids (il, list_no);
|
134
|
-
size_t list_size = il->list_size (list_no);
|
135
|
-
for (int i = 0; i < list_size; i++) {
|
136
|
-
const uint8_t *ref_code = ivf_codes.get() + i * il->code_size;
|
137
|
-
const uint8_t *new_code =
|
138
|
-
codes.data() + ivf_ids[i] * il->code_size;
|
139
|
-
EXPECT_EQ (memcmp(ref_code, new_code, il->code_size), 0);
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
/** Test independent search
|
144
|
-
*
|
145
|
-
* Manually scans through inverted lists, computing distances and
|
146
|
-
* ordering results organized in a heap.
|
147
|
-
*/
|
148
|
-
|
149
|
-
// sample some example queries and get reference search results.
|
150
|
-
auto xq = make_data (nq);
|
151
|
-
auto ref_I = search_index (index.get(), xq.data());
|
152
|
-
|
153
|
-
// handle preprocessing
|
154
|
-
const float * xqt = xq.data();
|
155
|
-
std::unique_ptr<float []> del_xqt;
|
156
|
-
|
157
|
-
if (index_pt) {
|
158
|
-
xqt = index_pt->apply_chain (nq, xq.data());
|
159
|
-
if (xqt != xq.data()) {
|
160
|
-
del_xqt.reset((float*)xqt);
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
// quantize the queries to get the inverted list ids to visit.
|
165
|
-
int nprobe = index_ivf->nprobe;
|
166
|
-
|
167
|
-
std::vector<idx_t> q_lists (nq * nprobe);
|
168
|
-
std::vector<float> q_dis (nq * nprobe);
|
169
|
-
|
170
|
-
index_ivf->quantizer->search (nq, xqt, nprobe,
|
171
|
-
q_dis.data(), q_lists.data());
|
172
|
-
|
173
|
-
// object that does the scanning and distance computations.
|
174
|
-
std::unique_ptr<InvertedListScanner> scanner (
|
175
|
-
index_ivf->get_InvertedListScanner());
|
176
|
-
|
177
|
-
for (int i = 0; i < nq; i++) {
|
178
|
-
std::vector<idx_t> I (k, -1);
|
179
|
-
float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
|
180
|
-
std::vector<float> D (k, default_dis);
|
181
|
-
|
182
|
-
scanner->set_query (xqt + i * dt);
|
183
|
-
|
184
|
-
for (int j = 0; j < nprobe; j++) {
|
185
|
-
int list_no = q_lists[i * nprobe + j];
|
186
|
-
if (list_no < 0) continue;
|
187
|
-
scanner->set_list (list_no, q_dis[i * nprobe + j]);
|
188
|
-
|
189
|
-
// here we get the inverted lists from the InvertedLists
|
190
|
-
// object but they could come from anywhere
|
191
|
-
|
192
|
-
scanner->scan_codes (
|
193
|
-
il->list_size (list_no),
|
194
|
-
InvertedLists::ScopedCodes(il, list_no).get(),
|
195
|
-
InvertedLists::ScopedIds(il, list_no).get(),
|
196
|
-
D.data(), I.data(), k);
|
197
|
-
|
198
|
-
if (j == 0) {
|
199
|
-
// all results so far come from list_no, so let's check if
|
200
|
-
// the distance function works
|
201
|
-
for (int jj = 0; jj < k; jj++) {
|
202
|
-
int vno = I[jj];
|
203
|
-
if (vno < 0) break; // heap is not full yet
|
204
|
-
|
205
|
-
// we have the codes from the addition test
|
206
|
-
float computed_D = scanner->distance_to_code (
|
207
|
-
codes.data() + vno * il->code_size);
|
208
|
-
|
209
|
-
EXPECT_EQ (computed_D, D[jj]);
|
210
|
-
}
|
211
|
-
}
|
212
|
-
}
|
213
|
-
|
214
|
-
// re-order heap
|
215
|
-
if (metric == METRIC_L2) {
|
216
|
-
maxheap_reorder (k, D.data(), I.data());
|
217
|
-
} else {
|
218
|
-
minheap_reorder (k, D.data(), I.data());
|
219
|
-
}
|
220
|
-
|
221
|
-
// check that we have the same results as the reference search
|
222
|
-
for (int j = 0; j < k; j++) {
|
223
|
-
EXPECT_EQ (I[j], ref_I[i * k + j]);
|
224
|
-
}
|
225
|
-
}
|
226
|
-
|
227
|
-
|
228
|
-
}
|
229
|
-
|
230
|
-
} // anonymous namespace
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
/*************************************************************
|
235
|
-
* Test entry points
|
236
|
-
*************************************************************/
|
237
|
-
|
238
|
-
TEST(TestLowLevelIVF, IVFFlatL2) {
|
239
|
-
test_lowlevel_access ("IVF32,Flat", METRIC_L2);
|
240
|
-
}
|
241
|
-
|
242
|
-
TEST(TestLowLevelIVF, PCAIVFFlatL2) {
|
243
|
-
test_lowlevel_access ("PCAR16,IVF32,Flat", METRIC_L2);
|
244
|
-
}
|
245
|
-
|
246
|
-
TEST(TestLowLevelIVF, IVFFlatIP) {
|
247
|
-
test_lowlevel_access ("IVF32,Flat", METRIC_INNER_PRODUCT);
|
248
|
-
}
|
249
|
-
|
250
|
-
TEST(TestLowLevelIVF, IVFSQL2) {
|
251
|
-
test_lowlevel_access ("IVF32,SQ8", METRIC_L2);
|
252
|
-
}
|
253
|
-
|
254
|
-
TEST(TestLowLevelIVF, IVFSQIP) {
|
255
|
-
test_lowlevel_access ("IVF32,SQ8", METRIC_INNER_PRODUCT);
|
256
|
-
}
|
257
|
-
|
258
|
-
|
259
|
-
TEST(TestLowLevelIVF, IVFPQL2) {
|
260
|
-
test_lowlevel_access ("IVF32,PQ4np", METRIC_L2);
|
261
|
-
}
|
262
|
-
|
263
|
-
TEST(TestLowLevelIVF, IVFPQIP) {
|
264
|
-
test_lowlevel_access ("IVF32,PQ4np", METRIC_INNER_PRODUCT);
|
265
|
-
}
|
266
|
-
|
267
|
-
|
268
|
-
/*************************************************************
|
269
|
-
* Same for binary (a bit simpler)
|
270
|
-
*************************************************************/
|
271
|
-
|
272
|
-
namespace {
|
273
|
-
|
274
|
-
int nbit = 256;
|
275
|
-
|
276
|
-
// here d is used the number of ints -> d=32 means 128 bits
|
277
|
-
|
278
|
-
std::vector<uint8_t> make_data_binary(size_t n)
|
279
|
-
{
|
280
|
-
|
281
|
-
std::vector <uint8_t> database (n * nbit / 8);
|
282
|
-
std::uniform_int_distribution<> distrib;
|
283
|
-
for (size_t i = 0; i < n * d; i++) {
|
284
|
-
database[i] = distrib(rng);
|
285
|
-
}
|
286
|
-
return database;
|
287
|
-
}
|
288
|
-
|
289
|
-
std::unique_ptr<IndexBinary> make_trained_index_binary(const char *index_type)
|
290
|
-
{
|
291
|
-
auto index = std::unique_ptr<IndexBinary>(index_binary_factory(
|
292
|
-
nbit, index_type));
|
293
|
-
auto xt = make_data_binary (nt);
|
294
|
-
index->train(nt, xt.data());
|
295
|
-
return index;
|
296
|
-
}
|
297
|
-
|
298
|
-
|
299
|
-
void test_lowlevel_access_binary (const char *index_key) {
|
300
|
-
std::unique_ptr<IndexBinary> index =
|
301
|
-
make_trained_index_binary (index_key);
|
302
|
-
|
303
|
-
IndexBinaryIVF * index_ivf = dynamic_cast<IndexBinaryIVF*>
|
304
|
-
(index.get());
|
305
|
-
assert (index_ivf);
|
306
|
-
|
307
|
-
index_ivf->nprobe = 4;
|
308
|
-
|
309
|
-
auto xb = make_data_binary (nb);
|
310
|
-
index->add(nb, xb.data());
|
311
|
-
|
312
|
-
std::vector<idx_t> list_nos (nb);
|
313
|
-
index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
|
314
|
-
|
315
|
-
/* For binary there is no test for encoding because binary vectors
|
316
|
-
* are copied verbatim to the inverted lists */
|
317
|
-
|
318
|
-
const InvertedLists *il = index_ivf->invlists;
|
319
|
-
|
320
|
-
/** Test independent search
|
321
|
-
*
|
322
|
-
* Manually scans through inverted lists, computing distances and
|
323
|
-
* ordering results organized in a heap.
|
324
|
-
*/
|
325
|
-
|
326
|
-
// sample some example queries and get reference search results.
|
327
|
-
auto xq = make_data_binary (nq);
|
328
|
-
|
329
|
-
std::vector<idx_t> I_ref(k * nq);
|
330
|
-
std::vector<int32_t> D_ref(k * nq);
|
331
|
-
index->search (nq, xq.data(), k, D_ref.data(), I_ref.data());
|
332
|
-
|
333
|
-
// quantize the queries to get the inverted list ids to visit.
|
334
|
-
int nprobe = index_ivf->nprobe;
|
335
|
-
|
336
|
-
std::vector<idx_t> q_lists (nq * nprobe);
|
337
|
-
std::vector<int32_t> q_dis (nq * nprobe);
|
338
|
-
|
339
|
-
// quantize queries
|
340
|
-
index_ivf->quantizer->search (nq, xq.data(), nprobe,
|
341
|
-
q_dis.data(), q_lists.data());
|
342
|
-
|
343
|
-
// object that does the scanning and distance computations.
|
344
|
-
std::unique_ptr<BinaryInvertedListScanner> scanner (
|
345
|
-
index_ivf->get_InvertedListScanner());
|
346
|
-
|
347
|
-
for (int i = 0; i < nq; i++) {
|
348
|
-
std::vector<idx_t> I (k, -1);
|
349
|
-
uint32_t default_dis = 1 << 30;
|
350
|
-
std::vector<int32_t> D (k, default_dis);
|
351
|
-
|
352
|
-
scanner->set_query (xq.data() + i * index_ivf->code_size);
|
353
|
-
|
354
|
-
for (int j = 0; j < nprobe; j++) {
|
355
|
-
int list_no = q_lists[i * nprobe + j];
|
356
|
-
if (list_no < 0) continue;
|
357
|
-
scanner->set_list (list_no, q_dis[i * nprobe + j]);
|
358
|
-
|
359
|
-
// here we get the inverted lists from the InvertedLists
|
360
|
-
// object but they could come from anywhere
|
361
|
-
|
362
|
-
scanner->scan_codes (
|
363
|
-
il->list_size (list_no),
|
364
|
-
InvertedLists::ScopedCodes(il, list_no).get(),
|
365
|
-
InvertedLists::ScopedIds(il, list_no).get(),
|
366
|
-
D.data(), I.data(), k);
|
367
|
-
|
368
|
-
if (j == 0) {
|
369
|
-
// all results so far come from list_no, so let's check if
|
370
|
-
// the distance function works
|
371
|
-
for (int jj = 0; jj < k; jj++) {
|
372
|
-
int vno = I[jj];
|
373
|
-
if (vno < 0) break; // heap is not full yet
|
374
|
-
|
375
|
-
// we have the codes from the addition test
|
376
|
-
float computed_D = scanner->distance_to_code (
|
377
|
-
xb.data() + vno * il->code_size);
|
378
|
-
|
379
|
-
EXPECT_EQ (computed_D, D[jj]);
|
380
|
-
}
|
381
|
-
}
|
382
|
-
}
|
383
|
-
|
384
|
-
printf("new before reroder: [");
|
385
|
-
for (int j = 0; j < k; j++)
|
386
|
-
printf("%" PRId64 ",%d ", I[j], D[j]);
|
387
|
-
printf("]\n");
|
388
|
-
|
389
|
-
// re-order heap
|
390
|
-
heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
|
391
|
-
|
392
|
-
printf("ref: [");
|
393
|
-
for (int j = 0; j < k; j++)
|
394
|
-
printf("%" PRId64 ",%d ", I_ref[j], D_ref[j]);
|
395
|
-
printf("]\nnew: [");
|
396
|
-
for (int j = 0; j < k; j++)
|
397
|
-
printf("%" PRId64 ",%d ", I[j], D[j]);
|
398
|
-
printf("]\n");
|
399
|
-
|
400
|
-
// check that we have the same results as the reference search
|
401
|
-
for (int j = 0; j < k; j++) {
|
402
|
-
// here the order is not guaranteed to be the same
|
403
|
-
// so we scan through ref results
|
404
|
-
// EXPECT_EQ (I[j], I_ref[i * k + j]);
|
405
|
-
EXPECT_LE (D[j], D_ref[i * k + k - 1]);
|
406
|
-
if (D[j] < D_ref[i * k + k - 1]) {
|
407
|
-
int j2 = 0;
|
408
|
-
while (j2 < k) {
|
409
|
-
if (I[j] == I_ref[i * k + j2]) break;
|
410
|
-
j2++;
|
411
|
-
}
|
412
|
-
EXPECT_LT(j2, k); // it was found
|
413
|
-
if (j2 < k) {
|
414
|
-
EXPECT_EQ(D[j], D_ref[i * k + j2]);
|
415
|
-
}
|
416
|
-
}
|
417
|
-
|
418
|
-
}
|
419
|
-
|
420
|
-
}
|
421
|
-
|
422
|
-
|
423
|
-
}
|
424
|
-
|
425
|
-
} // anonymous namespace
|
426
|
-
|
427
|
-
|
428
|
-
TEST(TestLowLevelIVF, IVFBinary) {
|
429
|
-
test_lowlevel_access_binary ("BIVF32");
|
430
|
-
}
|
431
|
-
|
432
|
-
|
433
|
-
namespace {
|
434
|
-
|
435
|
-
void test_threaded_search (const char *index_key, MetricType metric) {
|
436
|
-
std::unique_ptr<Index> index = make_trained_index(index_key, metric);
|
437
|
-
|
438
|
-
auto xb = make_data (nb);
|
439
|
-
index->add(nb, xb.data());
|
440
|
-
|
441
|
-
/** handle the case if we have a preprocessor */
|
442
|
-
|
443
|
-
const IndexPreTransform *index_pt =
|
444
|
-
dynamic_cast<const IndexPreTransform*> (index.get());
|
445
|
-
|
446
|
-
int dt = index->d;
|
447
|
-
const float * xbt = xb.data();
|
448
|
-
std::unique_ptr<float []> del_xbt;
|
449
|
-
|
450
|
-
if (index_pt) {
|
451
|
-
dt = index_pt->index->d;
|
452
|
-
xbt = index_pt->apply_chain (nb, xb.data());
|
453
|
-
if (xbt != xb.data()) {
|
454
|
-
del_xbt.reset((float*)xbt);
|
455
|
-
}
|
456
|
-
}
|
457
|
-
|
458
|
-
IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
|
459
|
-
|
460
|
-
/** Test independent search
|
461
|
-
*
|
462
|
-
* Manually scans through inverted lists, computing distances and
|
463
|
-
* ordering results organized in a heap.
|
464
|
-
*/
|
465
|
-
|
466
|
-
// sample some example queries and get reference search results.
|
467
|
-
auto xq = make_data (nq);
|
468
|
-
auto ref_I = search_index (index.get(), xq.data());
|
469
|
-
|
470
|
-
// handle preprocessing
|
471
|
-
const float * xqt = xq.data();
|
472
|
-
std::unique_ptr<float []> del_xqt;
|
473
|
-
|
474
|
-
if (index_pt) {
|
475
|
-
xqt = index_pt->apply_chain (nq, xq.data());
|
476
|
-
if (xqt != xq.data()) {
|
477
|
-
del_xqt.reset((float*)xqt);
|
478
|
-
}
|
479
|
-
}
|
480
|
-
|
481
|
-
// quantize the queries to get the inverted list ids to visit.
|
482
|
-
int nprobe = index_ivf->nprobe;
|
483
|
-
|
484
|
-
std::vector<idx_t> q_lists (nq * nprobe);
|
485
|
-
std::vector<float> q_dis (nq * nprobe);
|
486
|
-
|
487
|
-
index_ivf->quantizer->search (nq, xqt, nprobe,
|
488
|
-
q_dis.data(), q_lists.data());
|
489
|
-
|
490
|
-
// now run search in this many threads
|
491
|
-
int nproc = 3;
|
492
|
-
|
493
|
-
|
494
|
-
for (int i = 0; i < nq; i++) {
|
495
|
-
|
496
|
-
// one result table per thread
|
497
|
-
std::vector<idx_t> I (k * nproc, -1);
|
498
|
-
float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
|
499
|
-
std::vector<float> D (k * nproc, default_dis);
|
500
|
-
|
501
|
-
auto search_function = [index_ivf, &I, &D, dt, i, nproc,
|
502
|
-
xqt, nprobe, &q_dis, &q_lists]
|
503
|
-
(int rank) {
|
504
|
-
const InvertedLists *il = index_ivf->invlists;
|
505
|
-
|
506
|
-
// object that does the scanning and distance computations.
|
507
|
-
std::unique_ptr<InvertedListScanner> scanner (
|
508
|
-
index_ivf->get_InvertedListScanner());
|
509
|
-
|
510
|
-
idx_t *local_I = I.data() + rank * k;
|
511
|
-
float *local_D = D.data() + rank * k;
|
512
|
-
|
513
|
-
scanner->set_query (xqt + i * dt);
|
514
|
-
|
515
|
-
for (int j = rank; j < nprobe; j += nproc) {
|
516
|
-
int list_no = q_lists[i * nprobe + j];
|
517
|
-
if (list_no < 0) continue;
|
518
|
-
scanner->set_list (list_no, q_dis[i * nprobe + j]);
|
519
|
-
|
520
|
-
scanner->scan_codes (
|
521
|
-
il->list_size (list_no),
|
522
|
-
InvertedLists::ScopedCodes(il, list_no).get(),
|
523
|
-
InvertedLists::ScopedIds(il, list_no).get(),
|
524
|
-
local_D, local_I, k);
|
525
|
-
}
|
526
|
-
};
|
527
|
-
|
528
|
-
// start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
|
529
|
-
// thread rank takes care of inverted lists
|
530
|
-
// rank, rank+nproc, rank+2*nproc,...
|
531
|
-
std::vector<std::thread> threads;
|
532
|
-
for (int rank = 0; rank < nproc; rank++) {
|
533
|
-
threads.emplace_back(search_function, rank);
|
534
|
-
}
|
535
|
-
|
536
|
-
// join threads, merge heaps
|
537
|
-
for (int rank = 0; rank < nproc; rank++) {
|
538
|
-
threads[rank].join();
|
539
|
-
if (rank == 0) continue; // nothing to merge
|
540
|
-
// merge into first result
|
541
|
-
if (metric == METRIC_L2) {
|
542
|
-
maxheap_addn (k, D.data(), I.data(),
|
543
|
-
D.data() + rank * k,
|
544
|
-
I.data() + rank * k, k);
|
545
|
-
} else {
|
546
|
-
minheap_addn (k, D.data(), I.data(),
|
547
|
-
D.data() + rank * k,
|
548
|
-
I.data() + rank * k, k);
|
549
|
-
}
|
550
|
-
}
|
551
|
-
|
552
|
-
// re-order heap
|
553
|
-
if (metric == METRIC_L2) {
|
554
|
-
maxheap_reorder (k, D.data(), I.data());
|
555
|
-
} else {
|
556
|
-
minheap_reorder (k, D.data(), I.data());
|
557
|
-
}
|
558
|
-
|
559
|
-
// check that we have the same results as the reference search
|
560
|
-
for (int j = 0; j < k; j++) {
|
561
|
-
EXPECT_EQ (I[j], ref_I[i * k + j]);
|
562
|
-
}
|
563
|
-
}
|
564
|
-
|
565
|
-
|
566
|
-
}
|
567
|
-
|
568
|
-
} // anonymous namepace
|
569
|
-
|
570
|
-
|
571
|
-
TEST(TestLowLevelIVF, ThreadedSearch) {
|
572
|
-
test_threaded_search ("IVF32,Flat", METRIC_L2);
|
573
|
-
}
|