faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,42 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved
|
9
|
-
// -*- c++ -*-
|
10
|
-
// I/O code for indexes
|
11
|
-
|
12
|
-
#include "index_io_c.h"
|
13
|
-
#include "index_io.h"
|
14
|
-
#include "macros_impl.h"
|
15
|
-
|
16
|
-
using faiss::Index;
|
17
|
-
|
18
|
-
int faiss_write_index(const FaissIndex *idx, FILE *f) {
|
19
|
-
try {
|
20
|
-
faiss::write_index(reinterpret_cast<const Index*>(idx), f);
|
21
|
-
} CATCH_AND_HANDLE
|
22
|
-
}
|
23
|
-
|
24
|
-
int faiss_write_index_fname(const FaissIndex *idx, const char *fname) {
|
25
|
-
try {
|
26
|
-
faiss::write_index(reinterpret_cast<const Index*>(idx), fname);
|
27
|
-
} CATCH_AND_HANDLE
|
28
|
-
}
|
29
|
-
|
30
|
-
int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out) {
|
31
|
-
try {
|
32
|
-
auto out = faiss::read_index(f, io_flags);
|
33
|
-
*p_out = reinterpret_cast<FaissIndex*>(out);
|
34
|
-
} CATCH_AND_HANDLE
|
35
|
-
}
|
36
|
-
|
37
|
-
int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out) {
|
38
|
-
try {
|
39
|
-
auto out = faiss::read_index(fname, io_flags);
|
40
|
-
*p_out = reinterpret_cast<FaissIndex*>(out);
|
41
|
-
} CATCH_AND_HANDLE
|
42
|
-
}
|
@@ -1,50 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved
|
9
|
-
// -*- c++ -*-
|
10
|
-
// I/O code for indexes
|
11
|
-
|
12
|
-
|
13
|
-
#ifndef FAISS_INDEX_IO_C_H
|
14
|
-
#define FAISS_INDEX_IO_C_H
|
15
|
-
|
16
|
-
#include <stdio.h>
|
17
|
-
#include "faiss_c.h"
|
18
|
-
#include "Index_c.h"
|
19
|
-
|
20
|
-
#ifdef __cplusplus
|
21
|
-
extern "C" {
|
22
|
-
#endif
|
23
|
-
|
24
|
-
/** Write index to a file.
|
25
|
-
* This is equivalent to `faiss::write_index` when a file descriptor is provided.
|
26
|
-
*/
|
27
|
-
int faiss_write_index(const FaissIndex *idx, FILE *f);
|
28
|
-
|
29
|
-
/** Write index to a file.
|
30
|
-
* This is equivalent to `faiss::write_index` when a file path is provided.
|
31
|
-
*/
|
32
|
-
int faiss_write_index_fname(const FaissIndex *idx, const char *fname);
|
33
|
-
|
34
|
-
#define FAISS_IO_FLAG_MMAP 1
|
35
|
-
#define FAISS_IO_FLAG_READ_ONLY 2
|
36
|
-
|
37
|
-
/** Read index from a file.
|
38
|
-
* This is equivalent to `faiss:read_index` when a file descriptor is given.
|
39
|
-
*/
|
40
|
-
int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out);
|
41
|
-
|
42
|
-
/** Read index from a file.
|
43
|
-
* This is equivalent to `faiss:read_index` when a file path is given.
|
44
|
-
*/
|
45
|
-
int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out);
|
46
|
-
|
47
|
-
#ifdef __cplusplus
|
48
|
-
}
|
49
|
-
#endif
|
50
|
-
#endif
|
@@ -1,110 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved.
|
9
|
-
// -*- c++ -*-
|
10
|
-
|
11
|
-
/// Utility macros for the C wrapper implementation.
|
12
|
-
|
13
|
-
#ifndef MACROS_IMPL_H
|
14
|
-
#define MACROS_IMPL_H
|
15
|
-
|
16
|
-
#include "faiss_c.h"
|
17
|
-
#include "FaissException.h"
|
18
|
-
#include "error_impl.h"
|
19
|
-
#include <stdexcept>
|
20
|
-
#include <iostream>
|
21
|
-
|
22
|
-
#ifdef NDEBUG
|
23
|
-
#define CATCH_AND_HANDLE \
|
24
|
-
catch (faiss::FaissException& e) { \
|
25
|
-
faiss_last_exception = \
|
26
|
-
std::make_exception_ptr(e); \
|
27
|
-
return -2; \
|
28
|
-
} catch (std::exception& e) { \
|
29
|
-
faiss_last_exception = \
|
30
|
-
std::make_exception_ptr(e); \
|
31
|
-
return -4; \
|
32
|
-
} catch (...) { \
|
33
|
-
faiss_last_exception = \
|
34
|
-
std::make_exception_ptr( \
|
35
|
-
std::runtime_error("Unknown error")); \
|
36
|
-
return -1; \
|
37
|
-
} return 0;
|
38
|
-
#else
|
39
|
-
#define CATCH_AND_HANDLE \
|
40
|
-
catch (faiss::FaissException& e) { \
|
41
|
-
std::cerr << e.what() << '\n'; \
|
42
|
-
faiss_last_exception = \
|
43
|
-
std::make_exception_ptr(e); \
|
44
|
-
return -2; \
|
45
|
-
} catch (std::exception& e) { \
|
46
|
-
std::cerr << e.what() << '\n'; \
|
47
|
-
faiss_last_exception = \
|
48
|
-
std::make_exception_ptr(e); \
|
49
|
-
return -4; \
|
50
|
-
} catch (...) { \
|
51
|
-
std::cerr << "Unrecognized exception!\n"; \
|
52
|
-
faiss_last_exception = \
|
53
|
-
std::make_exception_ptr( \
|
54
|
-
std::runtime_error("Unknown error")); \
|
55
|
-
return -1; \
|
56
|
-
} return 0;
|
57
|
-
#endif
|
58
|
-
|
59
|
-
#define DEFINE_GETTER(clazz, ty, name) \
|
60
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
61
|
-
return static_cast< ty >( \
|
62
|
-
reinterpret_cast< const faiss::clazz *>(obj)-> name \
|
63
|
-
); \
|
64
|
-
}
|
65
|
-
|
66
|
-
#define DEFINE_GETTER_SUBCLASS(clazz, parent, ty, name) \
|
67
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
68
|
-
return static_cast< ty >( \
|
69
|
-
reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
|
70
|
-
); \
|
71
|
-
}
|
72
|
-
|
73
|
-
#define DEFINE_GETTER_PERMISSIVE(clazz, ty, name) \
|
74
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
75
|
-
return ( ty ) ( \
|
76
|
-
reinterpret_cast<const faiss::clazz *>(obj)-> name \
|
77
|
-
); \
|
78
|
-
}
|
79
|
-
|
80
|
-
#define DEFINE_GETTER_SUBCLASS_PERMISSIVE(clazz, parent, ty, name) \
|
81
|
-
ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
|
82
|
-
return ( ty ) ( \
|
83
|
-
reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
|
84
|
-
); \
|
85
|
-
}
|
86
|
-
|
87
|
-
#define DEFINE_SETTER(clazz, ty, name) \
|
88
|
-
void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty val) { \
|
89
|
-
reinterpret_cast< faiss::clazz *>(obj)-> name = val; \
|
90
|
-
}
|
91
|
-
|
92
|
-
#define DEFINE_SETTER_STATIC(clazz, ty_to, ty_from, name) \
|
93
|
-
void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty_from val) { \
|
94
|
-
reinterpret_cast< faiss::clazz *>(obj)-> name = \
|
95
|
-
static_cast< ty_to >(val); \
|
96
|
-
}
|
97
|
-
|
98
|
-
#define DEFINE_DESTRUCTOR(clazz) \
|
99
|
-
void faiss_ ## clazz ## _free (Faiss ## clazz *obj) { \
|
100
|
-
delete reinterpret_cast<faiss::clazz *>(obj); \
|
101
|
-
}
|
102
|
-
|
103
|
-
#define DEFINE_INDEX_DOWNCAST(clazz) \
|
104
|
-
Faiss ## clazz * faiss_ ## clazz ## _cast (FaissIndex* index) { \
|
105
|
-
return reinterpret_cast<Faiss ## clazz *>( \
|
106
|
-
dynamic_cast< faiss::clazz *>( \
|
107
|
-
reinterpret_cast<faiss::Index*>(index))); \
|
108
|
-
}
|
109
|
-
|
110
|
-
#endif
|
@@ -1,154 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#include <cmath>
|
11
|
-
#include <cstdio>
|
12
|
-
#include <cstdlib>
|
13
|
-
#include <random>
|
14
|
-
|
15
|
-
#include <sys/time.h>
|
16
|
-
|
17
|
-
|
18
|
-
#include <faiss/IndexPQ.h>
|
19
|
-
#include <faiss/IndexIVFFlat.h>
|
20
|
-
#include <faiss/IndexFlat.h>
|
21
|
-
#include <faiss/index_io.h>
|
22
|
-
|
23
|
-
double elapsed ()
|
24
|
-
{
|
25
|
-
struct timeval tv;
|
26
|
-
gettimeofday (&tv, nullptr);
|
27
|
-
return tv.tv_sec + tv.tv_usec * 1e-6;
|
28
|
-
}
|
29
|
-
|
30
|
-
|
31
|
-
int main ()
|
32
|
-
{
|
33
|
-
double t0 = elapsed();
|
34
|
-
|
35
|
-
// dimension of the vectors to index
|
36
|
-
int d = 128;
|
37
|
-
|
38
|
-
// size of the database we plan to index
|
39
|
-
size_t nb = 1000 * 1000;
|
40
|
-
|
41
|
-
// make a set of nt training vectors in the unit cube
|
42
|
-
// (could be the database)
|
43
|
-
size_t nt = 100 * 1000;
|
44
|
-
|
45
|
-
//---------------------------------------------------------------
|
46
|
-
// Define the core quantizer
|
47
|
-
// We choose a multiple inverted index for faster training with less data
|
48
|
-
// and because it usually offers best accuracy/speed trade-offs
|
49
|
-
//
|
50
|
-
// We here assume that its lifespan of this coarse quantizer will cover the
|
51
|
-
// lifespan of the inverted-file quantizer IndexIVFFlat below
|
52
|
-
// With dynamic allocation, one may give the responsability to free the
|
53
|
-
// quantizer to the inverted-file index (with attribute do_delete_quantizer)
|
54
|
-
//
|
55
|
-
// Note: a regular clustering algorithm would be defined as:
|
56
|
-
// faiss::IndexFlatL2 coarse_quantizer (d);
|
57
|
-
//
|
58
|
-
// Use nhash=2 subquantizers used to define the product coarse quantizer
|
59
|
-
// Number of bits: we will have 2^nbits_coarse centroids per subquantizer
|
60
|
-
// meaning (2^12)^nhash distinct inverted lists
|
61
|
-
size_t nhash = 2;
|
62
|
-
size_t nbits_subq = int (log2 (nb+1) / 2); // good choice in general
|
63
|
-
size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
|
64
|
-
|
65
|
-
faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
|
66
|
-
|
67
|
-
printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
|
68
|
-
nhash, nbits_subq, ncentroids, nb);
|
69
|
-
|
70
|
-
// the coarse quantizer should not be dealloced before the index
|
71
|
-
// 4 = nb of bytes per code (d must be a multiple of this)
|
72
|
-
// 8 = nb of bits per sub-code (almost always 8)
|
73
|
-
faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
|
74
|
-
faiss::IndexIVFFlat index (&coarse_quantizer, d, ncentroids, metric);
|
75
|
-
index.quantizer_trains_alone = true;
|
76
|
-
|
77
|
-
// define the number of probes. 2048 is for high-dim, overkilled in practice
|
78
|
-
// Use 4-1024 depending on the trade-off speed accuracy that you want
|
79
|
-
index.nprobe = 2048;
|
80
|
-
|
81
|
-
std::mt19937 rng;
|
82
|
-
std::uniform_real_distribution<> distrib;
|
83
|
-
|
84
|
-
{ // training
|
85
|
-
printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
|
86
|
-
elapsed() - t0, nt, d);
|
87
|
-
|
88
|
-
std::vector <float> trainvecs (nt * d);
|
89
|
-
for (size_t i = 0; i < nt * d; i++) {
|
90
|
-
trainvecs[i] = distrib(rng);
|
91
|
-
}
|
92
|
-
|
93
|
-
printf ("[%.3f s] Training the index\n", elapsed() - t0);
|
94
|
-
index.verbose = true;
|
95
|
-
index.train (nt, trainvecs.data());
|
96
|
-
}
|
97
|
-
|
98
|
-
size_t nq;
|
99
|
-
std::vector<float> queries;
|
100
|
-
|
101
|
-
{ // populating the database
|
102
|
-
printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
|
103
|
-
elapsed() - t0, nb);
|
104
|
-
|
105
|
-
std::vector <float> database (nb * d);
|
106
|
-
for (size_t i = 0; i < nb * d; i++) {
|
107
|
-
database[i] = distrib(rng);
|
108
|
-
}
|
109
|
-
|
110
|
-
printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
|
111
|
-
|
112
|
-
index.add (nb, database.data());
|
113
|
-
|
114
|
-
// remember a few elements from the database as queries
|
115
|
-
int i0 = 1234;
|
116
|
-
int i1 = 1244;
|
117
|
-
|
118
|
-
nq = i1 - i0;
|
119
|
-
queries.resize (nq * d);
|
120
|
-
for (int i = i0; i < i1; i++) {
|
121
|
-
for (int j = 0; j < d; j++) {
|
122
|
-
queries [(i - i0) * d + j] = database [i * d + j];
|
123
|
-
}
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
{ // searching the database
|
128
|
-
int k = 5;
|
129
|
-
printf ("[%.3f s] Searching the %d nearest neighbors "
|
130
|
-
"of %ld vectors in the index\n",
|
131
|
-
elapsed() - t0, k, nq);
|
132
|
-
|
133
|
-
std::vector<faiss::Index::idx_t> nns (k * nq);
|
134
|
-
std::vector<float> dis (k * nq);
|
135
|
-
|
136
|
-
index.search (nq, queries.data(), k, dis.data(), nns.data());
|
137
|
-
|
138
|
-
printf ("[%.3f s] Query results (vector ids, then distances):\n",
|
139
|
-
elapsed() - t0);
|
140
|
-
|
141
|
-
for (int i = 0; i < nq; i++) {
|
142
|
-
printf ("query %2d: ", i);
|
143
|
-
for (int j = 0; j < k; j++) {
|
144
|
-
printf ("%7ld ", nns[j + i * k]);
|
145
|
-
}
|
146
|
-
printf ("\n dis: ");
|
147
|
-
for (int j = 0; j < k; j++) {
|
148
|
-
printf ("%7g ", dis[j + i * k]);
|
149
|
-
}
|
150
|
-
printf ("\n");
|
151
|
-
}
|
152
|
-
}
|
153
|
-
return 0;
|
154
|
-
}
|
@@ -1,203 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#include <cmath>
|
11
|
-
#include <cstdio>
|
12
|
-
#include <cstdlib>
|
13
|
-
#include <random>
|
14
|
-
|
15
|
-
#include <sys/time.h>
|
16
|
-
|
17
|
-
|
18
|
-
#include <faiss/IndexPQ.h>
|
19
|
-
#include <faiss/IndexIVFPQ.h>
|
20
|
-
#include <faiss/IndexFlat.h>
|
21
|
-
#include <faiss/index_io.h>
|
22
|
-
|
23
|
-
double elapsed ()
|
24
|
-
{
|
25
|
-
struct timeval tv;
|
26
|
-
gettimeofday (&tv, nullptr);
|
27
|
-
return tv.tv_sec + tv.tv_usec * 1e-6;
|
28
|
-
}
|
29
|
-
|
30
|
-
|
31
|
-
int main ()
|
32
|
-
{
|
33
|
-
double t0 = elapsed();
|
34
|
-
|
35
|
-
// dimension of the vectors to index
|
36
|
-
int d = 64;
|
37
|
-
|
38
|
-
// size of the database we plan to index
|
39
|
-
size_t nb = 1000 * 1000;
|
40
|
-
size_t add_bs = 10000; // # size of the blocks to add
|
41
|
-
|
42
|
-
// make a set of nt training vectors in the unit cube
|
43
|
-
// (could be the database)
|
44
|
-
size_t nt = 100 * 1000;
|
45
|
-
|
46
|
-
//---------------------------------------------------------------
|
47
|
-
// Define the core quantizer
|
48
|
-
// We choose a multiple inverted index for faster training with less data
|
49
|
-
// and because it usually offers best accuracy/speed trade-offs
|
50
|
-
//
|
51
|
-
// We here assume that its lifespan of this coarse quantizer will cover the
|
52
|
-
// lifespan of the inverted-file quantizer IndexIVFFlat below
|
53
|
-
// With dynamic allocation, one may give the responsability to free the
|
54
|
-
// quantizer to the inverted-file index (with attribute do_delete_quantizer)
|
55
|
-
//
|
56
|
-
// Note: a regular clustering algorithm would be defined as:
|
57
|
-
// faiss::IndexFlatL2 coarse_quantizer (d);
|
58
|
-
//
|
59
|
-
// Use nhash=2 subquantizers used to define the product coarse quantizer
|
60
|
-
// Number of bits: we will have 2^nbits_coarse centroids per subquantizer
|
61
|
-
// meaning (2^12)^nhash distinct inverted lists
|
62
|
-
//
|
63
|
-
// The parameter bytes_per_code is determined by the memory
|
64
|
-
// constraint, the dataset will use nb * (bytes_per_code + 8)
|
65
|
-
// bytes.
|
66
|
-
//
|
67
|
-
// The parameter nbits_subq is determined by the size of the dataset to index.
|
68
|
-
//
|
69
|
-
size_t nhash = 2;
|
70
|
-
size_t nbits_subq = 9;
|
71
|
-
size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
|
72
|
-
int bytes_per_code = 16;
|
73
|
-
|
74
|
-
faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
|
75
|
-
|
76
|
-
printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
|
77
|
-
nhash, nbits_subq, ncentroids, nb);
|
78
|
-
|
79
|
-
// the coarse quantizer should not be dealloced before the index
|
80
|
-
// 4 = nb of bytes per code (d must be a multiple of this)
|
81
|
-
// 8 = nb of bits per sub-code (almost always 8)
|
82
|
-
faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
|
83
|
-
faiss::IndexIVFPQ index (&coarse_quantizer, d, ncentroids, bytes_per_code, 8);
|
84
|
-
index.quantizer_trains_alone = true;
|
85
|
-
|
86
|
-
// define the number of probes. 2048 is for high-dim, overkill in practice
|
87
|
-
// Use 4-1024 depending on the trade-off speed accuracy that you want
|
88
|
-
index.nprobe = 2048;
|
89
|
-
|
90
|
-
|
91
|
-
std::mt19937 rng;
|
92
|
-
std::uniform_real_distribution<> distrib;
|
93
|
-
|
94
|
-
{ // training.
|
95
|
-
|
96
|
-
// The distribution of the training vectors should be the same
|
97
|
-
// as the database vectors. It could be a sub-sample of the
|
98
|
-
// database vectors, if sampling is not biased. Here we just
|
99
|
-
// randomly generate the vectors.
|
100
|
-
|
101
|
-
printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
|
102
|
-
elapsed() - t0, nt, d);
|
103
|
-
|
104
|
-
std::vector <float> trainvecs (nt * d);
|
105
|
-
for (size_t i = 0; i < nt; i++) {
|
106
|
-
for (size_t j = 0; j < d; j++) {
|
107
|
-
trainvecs[i * d + j] = distrib(rng);
|
108
|
-
}
|
109
|
-
}
|
110
|
-
|
111
|
-
printf ("[%.3f s] Training the index\n", elapsed() - t0);
|
112
|
-
index.verbose = true;
|
113
|
-
index.train (nt, trainvecs.data());
|
114
|
-
}
|
115
|
-
|
116
|
-
// the index can be re-loaded later with
|
117
|
-
// faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
|
118
|
-
faiss::write_index(&index, "/tmp/trained_index.faissindex");
|
119
|
-
|
120
|
-
size_t nq;
|
121
|
-
std::vector<float> queries;
|
122
|
-
|
123
|
-
{ // populating the database
|
124
|
-
printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
|
125
|
-
elapsed() - t0, nb);
|
126
|
-
|
127
|
-
std::vector <float> database (nb * d);
|
128
|
-
std::vector <long> ids (nb);
|
129
|
-
for (size_t i = 0; i < nb; i++) {
|
130
|
-
for (size_t j = 0; j < d; j++) {
|
131
|
-
database[i * d + j] = distrib(rng);
|
132
|
-
}
|
133
|
-
ids[i] = 8760000000L + i;
|
134
|
-
}
|
135
|
-
|
136
|
-
printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
|
137
|
-
|
138
|
-
for (size_t begin = 0; begin < nb; begin += add_bs) {
|
139
|
-
size_t end = std::min (begin + add_bs, nb);
|
140
|
-
index.add_with_ids (end - begin,
|
141
|
-
database.data() + d * begin,
|
142
|
-
ids.data() + begin);
|
143
|
-
}
|
144
|
-
|
145
|
-
// remember a few elements from the database as queries
|
146
|
-
int i0 = 1234;
|
147
|
-
int i1 = 1244;
|
148
|
-
|
149
|
-
nq = i1 - i0;
|
150
|
-
queries.resize (nq * d);
|
151
|
-
for (int i = i0; i < i1; i++) {
|
152
|
-
for (int j = 0; j < d; j++) {
|
153
|
-
queries [(i - i0) * d + j] = database [i * d + j];
|
154
|
-
}
|
155
|
-
}
|
156
|
-
}
|
157
|
-
|
158
|
-
// A few notes on the internal format of the index:
|
159
|
-
//
|
160
|
-
// - the positing lists for PQ codes are index.codes, which is a
|
161
|
-
// std::vector < std::vector<uint8_t> >
|
162
|
-
// if n is the length of posting list #i, codes[i] has length bytes_per_code * n
|
163
|
-
//
|
164
|
-
// - the corresponding ids are stored in index.ids
|
165
|
-
//
|
166
|
-
// - given a vector float *x, finding which k centroids are
|
167
|
-
// closest to it (ie to find the nearest neighbors) can be done with
|
168
|
-
//
|
169
|
-
// long *centroid_ids = new long[k];
|
170
|
-
// float *distances = new float[k];
|
171
|
-
// index.quantizer->search (1, x, k, dis, centroids_ids);
|
172
|
-
//
|
173
|
-
|
174
|
-
faiss::write_index(&index, "/tmp/populated_index.faissindex");
|
175
|
-
|
176
|
-
{ // searching the database
|
177
|
-
int k = 5;
|
178
|
-
printf ("[%.3f s] Searching the %d nearest neighbors "
|
179
|
-
"of %ld vectors in the index\n",
|
180
|
-
elapsed() - t0, k, nq);
|
181
|
-
|
182
|
-
std::vector<faiss::Index::idx_t> nns (k * nq);
|
183
|
-
std::vector<float> dis (k * nq);
|
184
|
-
|
185
|
-
index.search (nq, queries.data(), k, dis.data(), nns.data());
|
186
|
-
|
187
|
-
printf ("[%.3f s] Query results (vector ids, then distances):\n",
|
188
|
-
elapsed() - t0);
|
189
|
-
|
190
|
-
for (int i = 0; i < nq; i++) {
|
191
|
-
printf ("query %2d: ", i);
|
192
|
-
for (int j = 0; j < k; j++) {
|
193
|
-
printf ("%7ld ", nns[j + i * k]);
|
194
|
-
}
|
195
|
-
printf ("\n dis: ");
|
196
|
-
for (int j = 0; j < k; j++) {
|
197
|
-
printf ("%7g ", dis[j + i * k]);
|
198
|
-
}
|
199
|
-
printf ("\n");
|
200
|
-
}
|
201
|
-
}
|
202
|
-
return 0;
|
203
|
-
}
|