faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,42 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved
|
9
|
-
// -*- c++ -*-
|
10
|
-
// I/O code for indexes
|
11
|
-
|
12
|
-
#include "index_io_c.h"
|
13
|
-
#include "index_io.h"
|
14
|
-
#include "macros_impl.h"
|
15
|
-
|
16
|
-
using faiss::Index;
|
17
|
-
|
18
|
-
int faiss_write_index(const FaissIndex *idx, FILE *f) {
|
19
|
-
try {
|
20
|
-
faiss::write_index(reinterpret_cast<const Index*>(idx), f);
|
21
|
-
} CATCH_AND_HANDLE
|
22
|
-
}
|
23
|
-
|
24
|
-
int faiss_write_index_fname(const FaissIndex *idx, const char *fname) {
|
25
|
-
try {
|
26
|
-
faiss::write_index(reinterpret_cast<const Index*>(idx), fname);
|
27
|
-
} CATCH_AND_HANDLE
|
28
|
-
}
|
29
|
-
|
30
|
-
int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out) {
|
31
|
-
try {
|
32
|
-
auto out = faiss::read_index(f, io_flags);
|
33
|
-
*p_out = reinterpret_cast<FaissIndex*>(out);
|
34
|
-
} CATCH_AND_HANDLE
|
35
|
-
}
|
36
|
-
|
37
|
-
int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out) {
|
38
|
-
try {
|
39
|
-
auto out = faiss::read_index(fname, io_flags);
|
40
|
-
*p_out = reinterpret_cast<FaissIndex*>(out);
|
41
|
-
} CATCH_AND_HANDLE
|
42
|
-
}
|
@@ -1,50 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved
|
9
|
-
// -*- c++ -*-
|
10
|
-
// I/O code for indexes
|
11
|
-
|
12
|
-
|
13
|
-
#ifndef FAISS_INDEX_IO_C_H
|
14
|
-
#define FAISS_INDEX_IO_C_H
|
15
|
-
|
16
|
-
#include <stdio.h>
|
17
|
-
#include "faiss_c.h"
|
18
|
-
#include "Index_c.h"
|
19
|
-
|
20
|
-
#ifdef __cplusplus
|
21
|
-
extern "C" {
|
22
|
-
#endif
|
23
|
-
|
24
|
-
/** Write index to a file.
|
25
|
-
* This is equivalent to `faiss::write_index` when a file descriptor is provided.
|
26
|
-
*/
|
27
|
-
int faiss_write_index(const FaissIndex *idx, FILE *f);
|
28
|
-
|
29
|
-
/** Write index to a file.
|
30
|
-
* This is equivalent to `faiss::write_index` when a file path is provided.
|
31
|
-
*/
|
32
|
-
int faiss_write_index_fname(const FaissIndex *idx, const char *fname);
|
33
|
-
|
34
|
-
#define FAISS_IO_FLAG_MMAP 1
|
35
|
-
#define FAISS_IO_FLAG_READ_ONLY 2
|
36
|
-
|
37
|
-
/** Read index from a file.
|
38
|
-
* This is equivalent to `faiss:read_index` when a file descriptor is given.
|
39
|
-
*/
|
40
|
-
int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out);
|
41
|
-
|
42
|
-
/** Read index from a file.
|
43
|
-
* This is equivalent to `faiss:read_index` when a file path is given.
|
44
|
-
*/
|
45
|
-
int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out);
|
46
|
-
|
47
|
-
#ifdef __cplusplus
|
48
|
-
}
|
49
|
-
#endif
|
50
|
-
#endif
|
@@ -1,110 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved.
|
9
|
-
// -*- c++ -*-
|
10
|
-
|
11
|
-
/// Utility macros for the C wrapper implementation.
|
12
|
-
|
13
|
-
#ifndef MACROS_IMPL_H
|
14
|
-
#define MACROS_IMPL_H
|
15
|
-
|
16
|
-
#include "faiss_c.h"
|
17
|
-
#include "FaissException.h"
|
18
|
-
#include "error_impl.h"
|
19
|
-
#include <stdexcept>
|
20
|
-
#include <iostream>
|
21
|
-
|
22
|
-
#ifdef NDEBUG
|
23
|
-
#define CATCH_AND_HANDLE \
|
24
|
-
catch (faiss::FaissException& e) { \
|
25
|
-
faiss_last_exception = \
|
26
|
-
std::make_exception_ptr(e); \
|
27
|
-
return -2; \
|
28
|
-
} catch (std::exception& e) { \
|
29
|
-
faiss_last_exception = \
|
30
|
-
std::make_exception_ptr(e); \
|
31
|
-
return -4; \
|
32
|
-
} catch (...) { \
|
33
|
-
faiss_last_exception = \
|
34
|
-
std::make_exception_ptr( \
|
35
|
-
std::runtime_error("Unknown error")); \
|
36
|
-
return -1; \
|
37
|
-
} return 0;
|
38
|
-
#else
|
39
|
-
#define CATCH_AND_HANDLE \
|
40
|
-
catch (faiss::FaissException& e) { \
|
41
|
-
std::cerr << e.what() << '\n'; \
|
42
|
-
faiss_last_exception = \
|
43
|
-
std::make_exception_ptr(e); \
|
44
|
-
return -2; \
|
45
|
-
} catch (std::exception& e) { \
|
46
|
-
std::cerr << e.what() << '\n'; \
|
47
|
-
faiss_last_exception = \
|
48
|
-
std::make_exception_ptr(e); \
|
49
|
-
return -4; \
|
50
|
-
} catch (...) { \
|
51
|
-
std::cerr << "Unrecognized exception!\n"; \
|
52
|
-
faiss_last_exception = \
|
53
|
-
std::make_exception_ptr( \
|
54
|
-
std::runtime_error("Unknown error")); \
|
55
|
-
return -1; \
|
56
|
-
} return 0;
|
57
|
-
#endif
|
58
|
-
|
59
|
-
#define DEFINE_GETTER(clazz, ty, name) \
|
60
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
61
|
-
return static_cast< ty >( \
|
62
|
-
reinterpret_cast< const faiss::clazz *>(obj)-> name \
|
63
|
-
); \
|
64
|
-
}
|
65
|
-
|
66
|
-
#define DEFINE_GETTER_SUBCLASS(clazz, parent, ty, name) \
|
67
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
68
|
-
return static_cast< ty >( \
|
69
|
-
reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
|
70
|
-
); \
|
71
|
-
}
|
72
|
-
|
73
|
-
#define DEFINE_GETTER_PERMISSIVE(clazz, ty, name) \
|
74
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
75
|
-
return ( ty ) ( \
|
76
|
-
reinterpret_cast<const faiss::clazz *>(obj)-> name \
|
77
|
-
); \
|
78
|
-
}
|
79
|
-
|
80
|
-
#define DEFINE_GETTER_SUBCLASS_PERMISSIVE(clazz, parent, ty, name) \
|
81
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
82
|
-
return ( ty ) ( \
|
83
|
-
reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
|
84
|
-
); \
|
85
|
-
}
|
86
|
-
|
87
|
-
#define DEFINE_SETTER(clazz, ty, name) \
|
88
|
-
void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty val) { \
|
89
|
-
reinterpret_cast< faiss::clazz *>(obj)-> name = val; \
|
90
|
-
}
|
91
|
-
|
92
|
-
#define DEFINE_SETTER_STATIC(clazz, ty_to, ty_from, name) \
|
93
|
-
void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty_from val) { \
|
94
|
-
reinterpret_cast< faiss::clazz *>(obj)-> name = \
|
95
|
-
static_cast< ty_to >(val); \
|
96
|
-
}
|
97
|
-
|
98
|
-
#define DEFINE_DESTRUCTOR(clazz) \
|
99
|
-
void faiss_ ## clazz ## _free (Faiss ## clazz *obj) { \
|
100
|
-
delete reinterpret_cast<faiss::clazz *>(obj); \
|
101
|
-
}
|
102
|
-
|
103
|
-
#define DEFINE_INDEX_DOWNCAST(clazz) \
|
104
|
-
Faiss ## clazz * faiss_ ## clazz ## _cast (FaissIndex* index) { \
|
105
|
-
return reinterpret_cast<Faiss ## clazz *>( \
|
106
|
-
dynamic_cast< faiss::clazz *>( \
|
107
|
-
reinterpret_cast<faiss::Index*>(index))); \
|
108
|
-
}
|
109
|
-
|
110
|
-
#endif
|
@@ -1,154 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#include <cmath>
|
11
|
-
#include <cstdio>
|
12
|
-
#include <cstdlib>
|
13
|
-
#include <random>
|
14
|
-
|
15
|
-
#include <sys/time.h>
|
16
|
-
|
17
|
-
|
18
|
-
#include <faiss/IndexPQ.h>
|
19
|
-
#include <faiss/IndexIVFFlat.h>
|
20
|
-
#include <faiss/IndexFlat.h>
|
21
|
-
#include <faiss/index_io.h>
|
22
|
-
|
23
|
-
double elapsed ()
|
24
|
-
{
|
25
|
-
struct timeval tv;
|
26
|
-
gettimeofday (&tv, nullptr);
|
27
|
-
return tv.tv_sec + tv.tv_usec * 1e-6;
|
28
|
-
}
|
29
|
-
|
30
|
-
|
31
|
-
int main ()
|
32
|
-
{
|
33
|
-
double t0 = elapsed();
|
34
|
-
|
35
|
-
// dimension of the vectors to index
|
36
|
-
int d = 128;
|
37
|
-
|
38
|
-
// size of the database we plan to index
|
39
|
-
size_t nb = 1000 * 1000;
|
40
|
-
|
41
|
-
// make a set of nt training vectors in the unit cube
|
42
|
-
// (could be the database)
|
43
|
-
size_t nt = 100 * 1000;
|
44
|
-
|
45
|
-
//---------------------------------------------------------------
|
46
|
-
// Define the core quantizer
|
47
|
-
// We choose a multiple inverted index for faster training with less data
|
48
|
-
// and because it usually offers best accuracy/speed trade-offs
|
49
|
-
//
|
50
|
-
// We here assume that its lifespan of this coarse quantizer will cover the
|
51
|
-
// lifespan of the inverted-file quantizer IndexIVFFlat below
|
52
|
-
// With dynamic allocation, one may give the responsability to free the
|
53
|
-
// quantizer to the inverted-file index (with attribute do_delete_quantizer)
|
54
|
-
//
|
55
|
-
// Note: a regular clustering algorithm would be defined as:
|
56
|
-
// faiss::IndexFlatL2 coarse_quantizer (d);
|
57
|
-
//
|
58
|
-
// Use nhash=2 subquantizers used to define the product coarse quantizer
|
59
|
-
// Number of bits: we will have 2^nbits_coarse centroids per subquantizer
|
60
|
-
// meaning (2^12)^nhash distinct inverted lists
|
61
|
-
size_t nhash = 2;
|
62
|
-
size_t nbits_subq = int (log2 (nb+1) / 2); // good choice in general
|
63
|
-
size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
|
64
|
-
|
65
|
-
faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
|
66
|
-
|
67
|
-
printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
|
68
|
-
nhash, nbits_subq, ncentroids, nb);
|
69
|
-
|
70
|
-
// the coarse quantizer should not be dealloced before the index
|
71
|
-
// 4 = nb of bytes per code (d must be a multiple of this)
|
72
|
-
// 8 = nb of bits per sub-code (almost always 8)
|
73
|
-
faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
|
74
|
-
faiss::IndexIVFFlat index (&coarse_quantizer, d, ncentroids, metric);
|
75
|
-
index.quantizer_trains_alone = true;
|
76
|
-
|
77
|
-
// define the number of probes. 2048 is for high-dim, overkilled in practice
|
78
|
-
// Use 4-1024 depending on the trade-off speed accuracy that you want
|
79
|
-
index.nprobe = 2048;
|
80
|
-
|
81
|
-
std::mt19937 rng;
|
82
|
-
std::uniform_real_distribution<> distrib;
|
83
|
-
|
84
|
-
{ // training
|
85
|
-
printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
|
86
|
-
elapsed() - t0, nt, d);
|
87
|
-
|
88
|
-
std::vector <float> trainvecs (nt * d);
|
89
|
-
for (size_t i = 0; i < nt * d; i++) {
|
90
|
-
trainvecs[i] = distrib(rng);
|
91
|
-
}
|
92
|
-
|
93
|
-
printf ("[%.3f s] Training the index\n", elapsed() - t0);
|
94
|
-
index.verbose = true;
|
95
|
-
index.train (nt, trainvecs.data());
|
96
|
-
}
|
97
|
-
|
98
|
-
size_t nq;
|
99
|
-
std::vector<float> queries;
|
100
|
-
|
101
|
-
{ // populating the database
|
102
|
-
printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
|
103
|
-
elapsed() - t0, nb);
|
104
|
-
|
105
|
-
std::vector <float> database (nb * d);
|
106
|
-
for (size_t i = 0; i < nb * d; i++) {
|
107
|
-
database[i] = distrib(rng);
|
108
|
-
}
|
109
|
-
|
110
|
-
printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
|
111
|
-
|
112
|
-
index.add (nb, database.data());
|
113
|
-
|
114
|
-
// remember a few elements from the database as queries
|
115
|
-
int i0 = 1234;
|
116
|
-
int i1 = 1244;
|
117
|
-
|
118
|
-
nq = i1 - i0;
|
119
|
-
queries.resize (nq * d);
|
120
|
-
for (int i = i0; i < i1; i++) {
|
121
|
-
for (int j = 0; j < d; j++) {
|
122
|
-
queries [(i - i0) * d + j] = database [i * d + j];
|
123
|
-
}
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
{ // searching the database
|
128
|
-
int k = 5;
|
129
|
-
printf ("[%.3f s] Searching the %d nearest neighbors "
|
130
|
-
"of %ld vectors in the index\n",
|
131
|
-
elapsed() - t0, k, nq);
|
132
|
-
|
133
|
-
std::vector<faiss::Index::idx_t> nns (k * nq);
|
134
|
-
std::vector<float> dis (k * nq);
|
135
|
-
|
136
|
-
index.search (nq, queries.data(), k, dis.data(), nns.data());
|
137
|
-
|
138
|
-
printf ("[%.3f s] Query results (vector ids, then distances):\n",
|
139
|
-
elapsed() - t0);
|
140
|
-
|
141
|
-
for (int i = 0; i < nq; i++) {
|
142
|
-
printf ("query %2d: ", i);
|
143
|
-
for (int j = 0; j < k; j++) {
|
144
|
-
printf ("%7ld ", nns[j + i * k]);
|
145
|
-
}
|
146
|
-
printf ("\n dis: ");
|
147
|
-
for (int j = 0; j < k; j++) {
|
148
|
-
printf ("%7g ", dis[j + i * k]);
|
149
|
-
}
|
150
|
-
printf ("\n");
|
151
|
-
}
|
152
|
-
}
|
153
|
-
return 0;
|
154
|
-
}
|
@@ -1,203 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#include <cmath>
|
11
|
-
#include <cstdio>
|
12
|
-
#include <cstdlib>
|
13
|
-
#include <random>
|
14
|
-
|
15
|
-
#include <sys/time.h>
|
16
|
-
|
17
|
-
|
18
|
-
#include <faiss/IndexPQ.h>
|
19
|
-
#include <faiss/IndexIVFPQ.h>
|
20
|
-
#include <faiss/IndexFlat.h>
|
21
|
-
#include <faiss/index_io.h>
|
22
|
-
|
23
|
-
double elapsed ()
|
24
|
-
{
|
25
|
-
struct timeval tv;
|
26
|
-
gettimeofday (&tv, nullptr);
|
27
|
-
return tv.tv_sec + tv.tv_usec * 1e-6;
|
28
|
-
}
|
29
|
-
|
30
|
-
|
31
|
-
int main ()
|
32
|
-
{
|
33
|
-
double t0 = elapsed();
|
34
|
-
|
35
|
-
// dimension of the vectors to index
|
36
|
-
int d = 64;
|
37
|
-
|
38
|
-
// size of the database we plan to index
|
39
|
-
size_t nb = 1000 * 1000;
|
40
|
-
size_t add_bs = 10000; // # size of the blocks to add
|
41
|
-
|
42
|
-
// make a set of nt training vectors in the unit cube
|
43
|
-
// (could be the database)
|
44
|
-
size_t nt = 100 * 1000;
|
45
|
-
|
46
|
-
//---------------------------------------------------------------
|
47
|
-
// Define the core quantizer
|
48
|
-
// We choose a multiple inverted index for faster training with less data
|
49
|
-
// and because it usually offers best accuracy/speed trade-offs
|
50
|
-
//
|
51
|
-
// We here assume that its lifespan of this coarse quantizer will cover the
|
52
|
-
// lifespan of the inverted-file quantizer IndexIVFFlat below
|
53
|
-
// With dynamic allocation, one may give the responsability to free the
|
54
|
-
// quantizer to the inverted-file index (with attribute do_delete_quantizer)
|
55
|
-
//
|
56
|
-
// Note: a regular clustering algorithm would be defined as:
|
57
|
-
// faiss::IndexFlatL2 coarse_quantizer (d);
|
58
|
-
//
|
59
|
-
// Use nhash=2 subquantizers used to define the product coarse quantizer
|
60
|
-
// Number of bits: we will have 2^nbits_coarse centroids per subquantizer
|
61
|
-
// meaning (2^12)^nhash distinct inverted lists
|
62
|
-
//
|
63
|
-
// The parameter bytes_per_code is determined by the memory
|
64
|
-
// constraint, the dataset will use nb * (bytes_per_code + 8)
|
65
|
-
// bytes.
|
66
|
-
//
|
67
|
-
// The parameter nbits_subq is determined by the size of the dataset to index.
|
68
|
-
//
|
69
|
-
size_t nhash = 2;
|
70
|
-
size_t nbits_subq = 9;
|
71
|
-
size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
|
72
|
-
int bytes_per_code = 16;
|
73
|
-
|
74
|
-
faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
|
75
|
-
|
76
|
-
printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
|
77
|
-
nhash, nbits_subq, ncentroids, nb);
|
78
|
-
|
79
|
-
// the coarse quantizer should not be dealloced before the index
|
80
|
-
// 4 = nb of bytes per code (d must be a multiple of this)
|
81
|
-
// 8 = nb of bits per sub-code (almost always 8)
|
82
|
-
faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
|
83
|
-
faiss::IndexIVFPQ index (&coarse_quantizer, d, ncentroids, bytes_per_code, 8);
|
84
|
-
index.quantizer_trains_alone = true;
|
85
|
-
|
86
|
-
// define the number of probes. 2048 is for high-dim, overkill in practice
|
87
|
-
// Use 4-1024 depending on the trade-off speed accuracy that you want
|
88
|
-
index.nprobe = 2048;
|
89
|
-
|
90
|
-
|
91
|
-
std::mt19937 rng;
|
92
|
-
std::uniform_real_distribution<> distrib;
|
93
|
-
|
94
|
-
{ // training.
|
95
|
-
|
96
|
-
// The distribution of the training vectors should be the same
|
97
|
-
// as the database vectors. It could be a sub-sample of the
|
98
|
-
// database vectors, if sampling is not biased. Here we just
|
99
|
-
// randomly generate the vectors.
|
100
|
-
|
101
|
-
printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
|
102
|
-
elapsed() - t0, nt, d);
|
103
|
-
|
104
|
-
std::vector <float> trainvecs (nt * d);
|
105
|
-
for (size_t i = 0; i < nt; i++) {
|
106
|
-
for (size_t j = 0; j < d; j++) {
|
107
|
-
trainvecs[i * d + j] = distrib(rng);
|
108
|
-
}
|
109
|
-
}
|
110
|
-
|
111
|
-
printf ("[%.3f s] Training the index\n", elapsed() - t0);
|
112
|
-
index.verbose = true;
|
113
|
-
index.train (nt, trainvecs.data());
|
114
|
-
}
|
115
|
-
|
116
|
-
// the index can be re-loaded later with
|
117
|
-
// faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
|
118
|
-
faiss::write_index(&index, "/tmp/trained_index.faissindex");
|
119
|
-
|
120
|
-
size_t nq;
|
121
|
-
std::vector<float> queries;
|
122
|
-
|
123
|
-
{ // populating the database
|
124
|
-
printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
|
125
|
-
elapsed() - t0, nb);
|
126
|
-
|
127
|
-
std::vector <float> database (nb * d);
|
128
|
-
std::vector <long> ids (nb);
|
129
|
-
for (size_t i = 0; i < nb; i++) {
|
130
|
-
for (size_t j = 0; j < d; j++) {
|
131
|
-
database[i * d + j] = distrib(rng);
|
132
|
-
}
|
133
|
-
ids[i] = 8760000000L + i;
|
134
|
-
}
|
135
|
-
|
136
|
-
printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
|
137
|
-
|
138
|
-
for (size_t begin = 0; begin < nb; begin += add_bs) {
|
139
|
-
size_t end = std::min (begin + add_bs, nb);
|
140
|
-
index.add_with_ids (end - begin,
|
141
|
-
database.data() + d * begin,
|
142
|
-
ids.data() + begin);
|
143
|
-
}
|
144
|
-
|
145
|
-
// remember a few elements from the database as queries
|
146
|
-
int i0 = 1234;
|
147
|
-
int i1 = 1244;
|
148
|
-
|
149
|
-
nq = i1 - i0;
|
150
|
-
queries.resize (nq * d);
|
151
|
-
for (int i = i0; i < i1; i++) {
|
152
|
-
for (int j = 0; j < d; j++) {
|
153
|
-
queries [(i - i0) * d + j] = database [i * d + j];
|
154
|
-
}
|
155
|
-
}
|
156
|
-
}
|
157
|
-
|
158
|
-
// A few notes on the internal format of the index:
|
159
|
-
//
|
160
|
-
// - the positing lists for PQ codes are index.codes, which is a
|
161
|
-
// std::vector < std::vector<uint8_t> >
|
162
|
-
// if n is the length of posting list #i, codes[i] has length bytes_per_code * n
|
163
|
-
//
|
164
|
-
// - the corresponding ids are stored in index.ids
|
165
|
-
//
|
166
|
-
// - given a vector float *x, finding which k centroids are
|
167
|
-
// closest to it (ie to find the nearest neighbors) can be done with
|
168
|
-
//
|
169
|
-
// long *centroid_ids = new long[k];
|
170
|
-
// float *distances = new float[k];
|
171
|
-
// index.quantizer->search (1, x, k, dis, centroids_ids);
|
172
|
-
//
|
173
|
-
|
174
|
-
faiss::write_index(&index, "/tmp/populated_index.faissindex");
|
175
|
-
|
176
|
-
{ // searching the database
|
177
|
-
int k = 5;
|
178
|
-
printf ("[%.3f s] Searching the %d nearest neighbors "
|
179
|
-
"of %ld vectors in the index\n",
|
180
|
-
elapsed() - t0, k, nq);
|
181
|
-
|
182
|
-
std::vector<faiss::Index::idx_t> nns (k * nq);
|
183
|
-
std::vector<float> dis (k * nq);
|
184
|
-
|
185
|
-
index.search (nq, queries.data(), k, dis.data(), nns.data());
|
186
|
-
|
187
|
-
printf ("[%.3f s] Query results (vector ids, then distances):\n",
|
188
|
-
elapsed() - t0);
|
189
|
-
|
190
|
-
for (int i = 0; i < nq; i++) {
|
191
|
-
printf ("query %2d: ", i);
|
192
|
-
for (int j = 0; j < k; j++) {
|
193
|
-
printf ("%7ld ", nns[j + i * k]);
|
194
|
-
}
|
195
|
-
printf ("\n dis: ");
|
196
|
-
for (int j = 0; j < k; j++) {
|
197
|
-
printf ("%7g ", dis[j + i * k]);
|
198
|
-
}
|
199
|
-
printf ("\n");
|
200
|
-
}
|
201
|
-
}
|
202
|
-
return 0;
|
203
|
-
}
|