faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,87 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <cstdio>
|
9
|
-
#include <cstdlib>
|
10
|
-
#include <random>
|
11
|
-
|
12
|
-
#undef FINTEGER
|
13
|
-
#define FINTEGER long
|
14
|
-
|
15
|
-
|
16
|
-
extern "C" {
|
17
|
-
|
18
|
-
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
|
19
|
-
|
20
|
-
int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
|
21
|
-
n, FINTEGER *k, const float *alpha, const float *a,
|
22
|
-
FINTEGER *lda, const float *b, FINTEGER *
|
23
|
-
ldb, float *beta, float *c, FINTEGER *ldc);
|
24
|
-
|
25
|
-
/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
|
26
|
-
|
27
|
-
int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
|
28
|
-
float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
|
29
|
-
|
30
|
-
}
|
31
|
-
|
32
|
-
float *new_random_vec(int size)
|
33
|
-
{
|
34
|
-
float *x = new float[size];
|
35
|
-
std::mt19937 rng;
|
36
|
-
std::uniform_real_distribution<> distrib;
|
37
|
-
for (int i = 0; i < size; i++)
|
38
|
-
x[i] = distrib(rng);
|
39
|
-
return x;
|
40
|
-
}
|
41
|
-
|
42
|
-
|
43
|
-
int main() {
|
44
|
-
|
45
|
-
FINTEGER m = 10, n = 20, k = 30;
|
46
|
-
float *a = new_random_vec(m * k), *b = new_random_vec(n * k), *c = new float[n * m];
|
47
|
-
float one = 1.0, zero = 0.0;
|
48
|
-
|
49
|
-
printf("BLAS test\n");
|
50
|
-
|
51
|
-
sgemm_("Not transposed", "Not transposed",
|
52
|
-
&m, &n, &k, &one, a, &m, b, &k, &zero, c, &m);
|
53
|
-
|
54
|
-
printf("errors=\n");
|
55
|
-
|
56
|
-
for (int i = 0; i < m; i++) {
|
57
|
-
for (int j = 0; j < n; j++) {
|
58
|
-
float accu = 0;
|
59
|
-
for (int l = 0; l < k; l++)
|
60
|
-
accu += a[i + l * m] * b[l + j * k];
|
61
|
-
printf ("%6.3f ", accu - c[i + j * m]);
|
62
|
-
}
|
63
|
-
printf("\n");
|
64
|
-
}
|
65
|
-
|
66
|
-
long info = 0x64bL << 32;
|
67
|
-
long mi = 0x64bL << 32 | m;
|
68
|
-
float *tau = new float[m];
|
69
|
-
FINTEGER lwork = -1;
|
70
|
-
|
71
|
-
float work1;
|
72
|
-
|
73
|
-
printf("Intentional Lapack error (appears only for 64-bit INTEGER):\n");
|
74
|
-
sgeqrf_ (&mi, &n, c, &m, tau, &work1, &lwork, (FINTEGER*)&info);
|
75
|
-
|
76
|
-
// sgeqrf_ (&m, &n, c, &zeroi, tau, &work1, &lwork, (FINTEGER*)&info);
|
77
|
-
printf("info=%016lx\n", info);
|
78
|
-
|
79
|
-
if(info >> 32 == 0x64b) {
|
80
|
-
printf("Lapack uses 32-bit integers\n");
|
81
|
-
} else {
|
82
|
-
printf("Lapack uses 64-bit integers\n");
|
83
|
-
}
|
84
|
-
|
85
|
-
|
86
|
-
return 0;
|
87
|
-
}
|
@@ -1,62 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <cstdio>
|
9
|
-
#include <cstdlib>
|
10
|
-
|
11
|
-
#include <gtest/gtest.h>
|
12
|
-
|
13
|
-
#include <faiss/IndexBinaryFlat.h>
|
14
|
-
#include <faiss/utils/hamming.h>
|
15
|
-
|
16
|
-
TEST(BinaryFlat, accuracy) {
|
17
|
-
// dimension of the vectors to index
|
18
|
-
int d = 64;
|
19
|
-
|
20
|
-
// size of the database we plan to index
|
21
|
-
size_t nb = 1000;
|
22
|
-
|
23
|
-
// make the index object and train it
|
24
|
-
faiss::IndexBinaryFlat index(d);
|
25
|
-
|
26
|
-
std::vector<uint8_t> database(nb * (d / 8));
|
27
|
-
for (size_t i = 0; i < nb * (d / 8); i++) {
|
28
|
-
database[i] = rand() % 0x100;
|
29
|
-
}
|
30
|
-
|
31
|
-
{ // populating the database
|
32
|
-
index.add(nb, database.data());
|
33
|
-
}
|
34
|
-
|
35
|
-
size_t nq = 200;
|
36
|
-
|
37
|
-
{ // searching the database
|
38
|
-
|
39
|
-
std::vector<uint8_t> queries(nq * (d / 8));
|
40
|
-
for (size_t i = 0; i < nq * (d / 8); i++) {
|
41
|
-
queries[i] = rand() % 0x100;
|
42
|
-
}
|
43
|
-
|
44
|
-
int k = 5;
|
45
|
-
std::vector<faiss::IndexBinary::idx_t> nns(k * nq);
|
46
|
-
std::vector<int> dis(k * nq);
|
47
|
-
|
48
|
-
index.search(nq, queries.data(), k, dis.data(), nns.data());
|
49
|
-
|
50
|
-
for (size_t i = 0; i < nq; ++i) {
|
51
|
-
faiss::HammingComputer8 hc(queries.data() + i * (d / 8), d / 8);
|
52
|
-
hamdis_t dist_min = hc.hamming(database.data());
|
53
|
-
for (size_t j = 1; j < nb; ++j) {
|
54
|
-
hamdis_t dist = hc.hamming(database.data() + j * (d / 8));
|
55
|
-
if (dist < dist_min) {
|
56
|
-
dist_min = dist;
|
57
|
-
}
|
58
|
-
}
|
59
|
-
EXPECT_EQ(dist_min, dis[k * i]);
|
60
|
-
}
|
61
|
-
}
|
62
|
-
}
|
@@ -1,188 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <cstdio>
|
9
|
-
#include <cstdlib>
|
10
|
-
|
11
|
-
#include <memory>
|
12
|
-
#include <vector>
|
13
|
-
#include <random>
|
14
|
-
|
15
|
-
#include <gtest/gtest.h>
|
16
|
-
|
17
|
-
#include <faiss/IndexIVF.h>
|
18
|
-
#include <faiss/index_factory.h>
|
19
|
-
#include <faiss/AutoTune.h>
|
20
|
-
#include <faiss/index_io.h>
|
21
|
-
#include <faiss/IVFlib.h>
|
22
|
-
|
23
|
-
|
24
|
-
using namespace faiss;
|
25
|
-
|
26
|
-
namespace {
|
27
|
-
|
28
|
-
typedef Index::idx_t idx_t;
|
29
|
-
|
30
|
-
|
31
|
-
// dimension of the vectors to index
|
32
|
-
int d = 32;
|
33
|
-
|
34
|
-
// nb of training vectors
|
35
|
-
size_t nt = 5000;
|
36
|
-
|
37
|
-
// size of the database points per window step
|
38
|
-
size_t nb = 1000;
|
39
|
-
|
40
|
-
// nb of queries
|
41
|
-
size_t nq = 200;
|
42
|
-
|
43
|
-
std::mt19937 rng;
|
44
|
-
|
45
|
-
std::vector<float> make_data(size_t n)
|
46
|
-
{
|
47
|
-
std::vector <float> database (n * d);
|
48
|
-
std::uniform_real_distribution<> distrib;
|
49
|
-
|
50
|
-
for (size_t i = 0; i < n * d; i++) {
|
51
|
-
database[i] = distrib(rng);
|
52
|
-
}
|
53
|
-
return database;
|
54
|
-
}
|
55
|
-
|
56
|
-
std::unique_ptr<Index> make_trained_index(const char *index_type)
|
57
|
-
{
|
58
|
-
auto index = std::unique_ptr<Index>(index_factory(d, index_type));
|
59
|
-
auto xt = make_data(nt * d);
|
60
|
-
index->train(nt, xt.data());
|
61
|
-
ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
|
62
|
-
return index;
|
63
|
-
}
|
64
|
-
|
65
|
-
std::vector<idx_t> search_index(Index *index, const float *xq) {
|
66
|
-
int k = 10;
|
67
|
-
std::vector<idx_t> I(k * nq);
|
68
|
-
std::vector<float> D(k * nq);
|
69
|
-
index->search (nq, xq, k, D.data(), I.data());
|
70
|
-
return I;
|
71
|
-
}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
/*************************************************************
|
78
|
-
* Test functions for a given index type
|
79
|
-
*************************************************************/
|
80
|
-
|
81
|
-
struct EncapsulateInvertedLists: InvertedLists {
|
82
|
-
|
83
|
-
const InvertedLists *il;
|
84
|
-
|
85
|
-
EncapsulateInvertedLists(const InvertedLists *il):
|
86
|
-
InvertedLists(il->nlist, il->code_size),
|
87
|
-
il(il)
|
88
|
-
{}
|
89
|
-
|
90
|
-
static void * memdup (const void *m, size_t size) {
|
91
|
-
if (size == 0) return nullptr;
|
92
|
-
return memcpy (malloc(size), m, size);
|
93
|
-
}
|
94
|
-
|
95
|
-
size_t list_size(size_t list_no) const override {
|
96
|
-
return il->list_size (list_no);
|
97
|
-
}
|
98
|
-
|
99
|
-
const uint8_t * get_codes (size_t list_no) const override {
|
100
|
-
return (uint8_t*)memdup (il->get_codes(list_no),
|
101
|
-
list_size(list_no) * code_size);
|
102
|
-
}
|
103
|
-
|
104
|
-
const idx_t * get_ids (size_t list_no) const override {
|
105
|
-
return (idx_t*)memdup (il->get_ids(list_no),
|
106
|
-
list_size(list_no) * sizeof(idx_t));
|
107
|
-
}
|
108
|
-
|
109
|
-
void release_codes (size_t, const uint8_t *codes) const override {
|
110
|
-
free ((void*)codes);
|
111
|
-
}
|
112
|
-
|
113
|
-
void release_ids (size_t, const idx_t *ids) const override {
|
114
|
-
free ((void*)ids);
|
115
|
-
}
|
116
|
-
|
117
|
-
const uint8_t * get_single_code (size_t list_no, size_t offset)
|
118
|
-
const override {
|
119
|
-
return (uint8_t*)memdup (il->get_single_code(list_no, offset),
|
120
|
-
code_size);
|
121
|
-
}
|
122
|
-
|
123
|
-
size_t add_entries(size_t, size_t, const idx_t*, const uint8_t*) override {
|
124
|
-
assert(!"not implemented");
|
125
|
-
return 0;
|
126
|
-
}
|
127
|
-
|
128
|
-
void update_entries(size_t, size_t, size_t, const idx_t*, const uint8_t*)
|
129
|
-
override {
|
130
|
-
assert(!"not implemented");
|
131
|
-
}
|
132
|
-
|
133
|
-
void resize(size_t, size_t) override {
|
134
|
-
assert(!"not implemented");
|
135
|
-
}
|
136
|
-
|
137
|
-
~EncapsulateInvertedLists() override {}
|
138
|
-
};
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
int test_dealloc_invlists (const char *index_key) {
|
143
|
-
|
144
|
-
std::unique_ptr<Index> index = make_trained_index(index_key);
|
145
|
-
IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
|
146
|
-
|
147
|
-
auto xb = make_data (nb * d);
|
148
|
-
index->add(nb, xb.data());
|
149
|
-
|
150
|
-
auto xq = make_data (nq * d);
|
151
|
-
|
152
|
-
auto ref_res = search_index (index.get(), xq.data());
|
153
|
-
|
154
|
-
EncapsulateInvertedLists eil(index_ivf->invlists);
|
155
|
-
|
156
|
-
index_ivf->own_invlists = false;
|
157
|
-
index_ivf->replace_invlists (&eil, false);
|
158
|
-
|
159
|
-
// TEST: this could crash or leak mem
|
160
|
-
auto new_res = search_index (index.get(), xq.data());
|
161
|
-
|
162
|
-
// delete explicitly
|
163
|
-
delete eil.il;
|
164
|
-
|
165
|
-
// just to make sure
|
166
|
-
EXPECT_EQ (ref_res, new_res);
|
167
|
-
return 0;
|
168
|
-
}
|
169
|
-
|
170
|
-
} // anonymous namespace
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
/*************************************************************
|
175
|
-
* Test entry points
|
176
|
-
*************************************************************/
|
177
|
-
|
178
|
-
TEST(TestIvlistDealloc, IVFFlat) {
|
179
|
-
test_dealloc_invlists ("IVF32,Flat");
|
180
|
-
}
|
181
|
-
|
182
|
-
TEST(TestIvlistDealloc, IVFSQ) {
|
183
|
-
test_dealloc_invlists ("IVF32,SQ8");
|
184
|
-
}
|
185
|
-
|
186
|
-
TEST(TestIvlistDealloc, IVFPQ) {
|
187
|
-
test_dealloc_invlists ("IVF32,PQ4np");
|
188
|
-
}
|
@@ -1,70 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <cstdio>
|
9
|
-
#include <cstdlib>
|
10
|
-
#include <random>
|
11
|
-
|
12
|
-
#include <gtest/gtest.h>
|
13
|
-
|
14
|
-
#include <faiss/IndexIVFPQ.h>
|
15
|
-
#include <faiss/IndexFlat.h>
|
16
|
-
#include <faiss/utils/utils.h>
|
17
|
-
#include <faiss/utils/distances.h>
|
18
|
-
|
19
|
-
|
20
|
-
namespace {
|
21
|
-
|
22
|
-
// dimension of the vectors to index
|
23
|
-
int d = 64;
|
24
|
-
|
25
|
-
// size of the database we plan to index
|
26
|
-
size_t nb = 8000;
|
27
|
-
|
28
|
-
|
29
|
-
double eval_codec_error (long ncentroids, long m, const std::vector<float> &v)
|
30
|
-
{
|
31
|
-
faiss::IndexFlatL2 coarse_quantizer (d);
|
32
|
-
faiss::IndexIVFPQ index (&coarse_quantizer, d,
|
33
|
-
ncentroids, m, 8);
|
34
|
-
index.pq.cp.niter = 10; // speed up train
|
35
|
-
index.train (nb, v.data());
|
36
|
-
|
37
|
-
// encode and decode to compute reconstruction error
|
38
|
-
|
39
|
-
std::vector<faiss::Index::idx_t> keys (nb);
|
40
|
-
std::vector<uint8_t> codes (nb * m);
|
41
|
-
index.encode_multiple (nb, keys.data(), v.data(), codes.data(), true);
|
42
|
-
|
43
|
-
std::vector<float> v2 (nb * d);
|
44
|
-
index.decode_multiple (nb, keys.data(), codes.data(), v2.data());
|
45
|
-
|
46
|
-
return faiss::fvec_L2sqr (v.data(), v2.data(), nb * d);
|
47
|
-
}
|
48
|
-
|
49
|
-
} // namespace
|
50
|
-
|
51
|
-
|
52
|
-
TEST(IVFPQ, codec) {
|
53
|
-
|
54
|
-
std::vector <float> database (nb * d);
|
55
|
-
std::mt19937 rng;
|
56
|
-
std::uniform_real_distribution<> distrib;
|
57
|
-
for (size_t i = 0; i < nb * d; i++) {
|
58
|
-
database[i] = distrib(rng);
|
59
|
-
}
|
60
|
-
|
61
|
-
double err0 = eval_codec_error(16, 8, database);
|
62
|
-
|
63
|
-
// should be more accurate as there are more coarse centroids
|
64
|
-
double err1 = eval_codec_error(128, 8, database);
|
65
|
-
EXPECT_GT(err0, err1);
|
66
|
-
|
67
|
-
// should be more accurate as there are more PQ codes
|
68
|
-
double err2 = eval_codec_error(16, 16, database);
|
69
|
-
EXPECT_GT(err0, err2);
|
70
|
-
}
|
@@ -1,100 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <cstdio>
|
10
|
-
#include <cstdlib>
|
11
|
-
#include <random>
|
12
|
-
|
13
|
-
#include <gtest/gtest.h>
|
14
|
-
|
15
|
-
#include <faiss/IndexIVFPQ.h>
|
16
|
-
#include <faiss/IndexFlat.h>
|
17
|
-
#include <faiss/index_io.h>
|
18
|
-
|
19
|
-
TEST(IVFPQ, accuracy) {
|
20
|
-
|
21
|
-
// dimension of the vectors to index
|
22
|
-
int d = 64;
|
23
|
-
|
24
|
-
// size of the database we plan to index
|
25
|
-
size_t nb = 1000;
|
26
|
-
|
27
|
-
// make a set of nt training vectors in the unit cube
|
28
|
-
// (could be the database)
|
29
|
-
size_t nt = 1500;
|
30
|
-
|
31
|
-
// make the index object and train it
|
32
|
-
faiss::IndexFlatL2 coarse_quantizer (d);
|
33
|
-
|
34
|
-
// a reasonable number of cetroids to index nb vectors
|
35
|
-
int ncentroids = 25;
|
36
|
-
|
37
|
-
faiss::IndexIVFPQ index (&coarse_quantizer, d,
|
38
|
-
ncentroids, 16, 8);
|
39
|
-
|
40
|
-
// index that gives the ground-truth
|
41
|
-
faiss::IndexFlatL2 index_gt (d);
|
42
|
-
|
43
|
-
std::mt19937 rng;
|
44
|
-
std::uniform_real_distribution<> distrib;
|
45
|
-
|
46
|
-
{ // training
|
47
|
-
|
48
|
-
std::vector <float> trainvecs (nt * d);
|
49
|
-
for (size_t i = 0; i < nt * d; i++) {
|
50
|
-
trainvecs[i] = distrib(rng);
|
51
|
-
}
|
52
|
-
index.verbose = true;
|
53
|
-
index.train (nt, trainvecs.data());
|
54
|
-
}
|
55
|
-
|
56
|
-
{ // populating the database
|
57
|
-
|
58
|
-
std::vector <float> database (nb * d);
|
59
|
-
for (size_t i = 0; i < nb * d; i++) {
|
60
|
-
database[i] = distrib(rng);
|
61
|
-
}
|
62
|
-
|
63
|
-
index.add (nb, database.data());
|
64
|
-
index_gt.add (nb, database.data());
|
65
|
-
}
|
66
|
-
|
67
|
-
int nq = 200;
|
68
|
-
int n_ok;
|
69
|
-
|
70
|
-
{ // searching the database
|
71
|
-
|
72
|
-
std::vector <float> queries (nq * d);
|
73
|
-
for (size_t i = 0; i < nq * d; i++) {
|
74
|
-
queries[i] = distrib(rng);
|
75
|
-
}
|
76
|
-
|
77
|
-
std::vector<faiss::Index::idx_t> gt_nns (nq);
|
78
|
-
std::vector<float> gt_dis (nq);
|
79
|
-
|
80
|
-
index_gt.search (nq, queries.data(), 1,
|
81
|
-
gt_dis.data(), gt_nns.data());
|
82
|
-
|
83
|
-
index.nprobe = 5;
|
84
|
-
int k = 5;
|
85
|
-
std::vector<faiss::Index::idx_t> nns (k * nq);
|
86
|
-
std::vector<float> dis (k * nq);
|
87
|
-
|
88
|
-
index.search (nq, queries.data(), k, dis.data(), nns.data());
|
89
|
-
|
90
|
-
n_ok = 0;
|
91
|
-
for (int q = 0; q < nq; q++) {
|
92
|
-
|
93
|
-
for (int i = 0; i < k; i++)
|
94
|
-
if (nns[q * k + i] == gt_nns[q])
|
95
|
-
n_ok++;
|
96
|
-
}
|
97
|
-
EXPECT_GT(n_ok, nq * 0.4);
|
98
|
-
}
|
99
|
-
|
100
|
-
}
|