faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef FAISS_CLUSTERING_H
|
|
11
|
+
#define FAISS_CLUSTERING_H
|
|
12
|
+
#include <faiss/Index.h>
|
|
13
|
+
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
/** Class for the clustering parameters. Can be passed to the
|
|
20
|
+
* constructor of the Clustering object.
|
|
21
|
+
*/
|
|
22
|
+
struct ClusteringParameters {
|
|
23
|
+
int niter; ///< clustering iterations
|
|
24
|
+
int nredo; ///< redo clustering this many times and keep best
|
|
25
|
+
|
|
26
|
+
bool verbose;
|
|
27
|
+
bool spherical; ///< do we want normalized centroids?
|
|
28
|
+
bool int_centroids; ///< round centroids coordinates to integer
|
|
29
|
+
bool update_index; ///< update index after each iteration?
|
|
30
|
+
bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
|
|
31
|
+
|
|
32
|
+
int min_points_per_centroid; ///< otherwise you get a warning
|
|
33
|
+
int max_points_per_centroid; ///< to limit size of dataset
|
|
34
|
+
|
|
35
|
+
int seed; ///< seed for the random number generator
|
|
36
|
+
|
|
37
|
+
/// sets reasonable defaults
|
|
38
|
+
ClusteringParameters ();
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
/** clustering based on assignment - centroid update iterations
|
|
43
|
+
*
|
|
44
|
+
* The clustering is based on an Index object that assigns training
|
|
45
|
+
* points to the centroids. Therefore, at each iteration the centroids
|
|
46
|
+
* are added to the index.
|
|
47
|
+
*
|
|
48
|
+
* On output, the centoids table is set to the latest version
|
|
49
|
+
* of the centroids and they are also added to the index. If the
|
|
50
|
+
* centroids table it is not empty on input, it is also used for
|
|
51
|
+
* initialization.
|
|
52
|
+
*
|
|
53
|
+
* To do several clusterings, just call train() several times on
|
|
54
|
+
* different training sets, clearing the centroid table in between.
|
|
55
|
+
*/
|
|
56
|
+
struct Clustering: ClusteringParameters {
|
|
57
|
+
typedef Index::idx_t idx_t;
|
|
58
|
+
size_t d; ///< dimension of the vectors
|
|
59
|
+
size_t k; ///< nb of centroids
|
|
60
|
+
|
|
61
|
+
/// centroids (k * d)
|
|
62
|
+
std::vector<float> centroids;
|
|
63
|
+
|
|
64
|
+
/// objective values (sum of distances reported by index) over
|
|
65
|
+
/// iterations
|
|
66
|
+
std::vector<float> obj;
|
|
67
|
+
|
|
68
|
+
/// the only mandatory parameters are k and d
|
|
69
|
+
Clustering (int d, int k);
|
|
70
|
+
Clustering (int d, int k, const ClusteringParameters &cp);
|
|
71
|
+
|
|
72
|
+
/// Index is used during the assignment stage
|
|
73
|
+
virtual void train (idx_t n, const float * x, faiss::Index & index);
|
|
74
|
+
|
|
75
|
+
/// Post-process the centroids after each centroid update.
|
|
76
|
+
/// includes optional L2 normalization and nearest integer rounding
|
|
77
|
+
void post_process_centroids ();
|
|
78
|
+
|
|
79
|
+
virtual ~Clustering() {}
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
/** simplified interface
|
|
84
|
+
*
|
|
85
|
+
* @param d dimension of the data
|
|
86
|
+
* @param n nb of training vectors
|
|
87
|
+
* @param k nb of output centroids
|
|
88
|
+
* @param x training set (size n * d)
|
|
89
|
+
* @param centroids output centroids (size k * d)
|
|
90
|
+
* @return final quantization error
|
|
91
|
+
*/
|
|
92
|
+
float kmeans_clustering (size_t d, size_t n, size_t k,
|
|
93
|
+
const float *x,
|
|
94
|
+
float *centroids);
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
#endif
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/IVFlib.h>
|
|
11
|
+
|
|
12
|
+
#include <memory>
|
|
13
|
+
|
|
14
|
+
#include <faiss/IndexPreTransform.h>
|
|
15
|
+
#include <faiss/impl/FaissAssert.h>
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
namespace faiss { namespace ivflib {
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
void check_compatible_for_merge (const Index * index0,
|
|
23
|
+
const Index * index1)
|
|
24
|
+
{
|
|
25
|
+
|
|
26
|
+
const faiss::IndexPreTransform *pt0 =
|
|
27
|
+
dynamic_cast<const faiss::IndexPreTransform *>(index0);
|
|
28
|
+
|
|
29
|
+
if (pt0) {
|
|
30
|
+
const faiss::IndexPreTransform *pt1 =
|
|
31
|
+
dynamic_cast<const faiss::IndexPreTransform *>(index1);
|
|
32
|
+
FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
|
|
33
|
+
|
|
34
|
+
FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
|
|
35
|
+
for (int i = 0; i < pt0->chain.size(); i++) {
|
|
36
|
+
FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
index0 = pt0->index;
|
|
40
|
+
index1 = pt1->index;
|
|
41
|
+
}
|
|
42
|
+
FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
|
|
43
|
+
FAISS_THROW_IF_NOT (index0->d == index1->d &&
|
|
44
|
+
index0->metric_type == index1->metric_type);
|
|
45
|
+
|
|
46
|
+
const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
|
|
47
|
+
if (ivf0) {
|
|
48
|
+
const faiss::IndexIVF *ivf1 =
|
|
49
|
+
dynamic_cast<const faiss::IndexIVF *>(index1);
|
|
50
|
+
FAISS_THROW_IF_NOT (ivf1);
|
|
51
|
+
|
|
52
|
+
ivf0->check_compatible_for_merge (*ivf1);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// TODO: check as thoroughfully for other index types
|
|
56
|
+
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const IndexIVF * extract_index_ivf (const Index * index)
|
|
60
|
+
{
|
|
61
|
+
if (auto *pt =
|
|
62
|
+
dynamic_cast<const IndexPreTransform *>(index)) {
|
|
63
|
+
index = pt->index;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
auto *ivf = dynamic_cast<const IndexIVF *>(index);
|
|
67
|
+
|
|
68
|
+
FAISS_THROW_IF_NOT (ivf);
|
|
69
|
+
|
|
70
|
+
return ivf;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
IndexIVF * extract_index_ivf (Index * index) {
|
|
74
|
+
return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
|
|
78
|
+
|
|
79
|
+
check_compatible_for_merge (index0, index1);
|
|
80
|
+
IndexIVF * ivf0 = extract_index_ivf (index0);
|
|
81
|
+
IndexIVF * ivf1 = extract_index_ivf (index1);
|
|
82
|
+
|
|
83
|
+
ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
|
|
84
|
+
|
|
85
|
+
// useful for IndexPreTransform
|
|
86
|
+
index0->ntotal = ivf0->ntotal;
|
|
87
|
+
index1->ntotal = ivf1->ntotal;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
void search_centroid(faiss::Index *index,
|
|
93
|
+
const float* x, int n,
|
|
94
|
+
idx_t* centroid_ids)
|
|
95
|
+
{
|
|
96
|
+
std::unique_ptr<float[]> del;
|
|
97
|
+
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
|
|
98
|
+
x = index_pre->apply_chain(n, x);
|
|
99
|
+
del.reset((float*)x);
|
|
100
|
+
index = index_pre->index;
|
|
101
|
+
}
|
|
102
|
+
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
|
|
103
|
+
assert(index_ivf);
|
|
104
|
+
index_ivf->quantizer->assign(n, x, centroid_ids);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
void search_and_return_centroids(faiss::Index *index,
|
|
110
|
+
size_t n,
|
|
111
|
+
const float* xin,
|
|
112
|
+
long k,
|
|
113
|
+
float *distances,
|
|
114
|
+
idx_t* labels,
|
|
115
|
+
idx_t* query_centroid_ids,
|
|
116
|
+
idx_t* result_centroid_ids)
|
|
117
|
+
{
|
|
118
|
+
const float *x = xin;
|
|
119
|
+
std::unique_ptr<float []> del;
|
|
120
|
+
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
|
|
121
|
+
x = index_pre->apply_chain(n, x);
|
|
122
|
+
del.reset((float*)x);
|
|
123
|
+
index = index_pre->index;
|
|
124
|
+
}
|
|
125
|
+
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
|
|
126
|
+
assert(index_ivf);
|
|
127
|
+
|
|
128
|
+
size_t nprobe = index_ivf->nprobe;
|
|
129
|
+
std::vector<idx_t> cent_nos (n * nprobe);
|
|
130
|
+
std::vector<float> cent_dis (n * nprobe);
|
|
131
|
+
index_ivf->quantizer->search(
|
|
132
|
+
n, x, nprobe, cent_dis.data(), cent_nos.data());
|
|
133
|
+
|
|
134
|
+
if (query_centroid_ids) {
|
|
135
|
+
for (size_t i = 0; i < n; i++)
|
|
136
|
+
query_centroid_ids[i] = cent_nos[i * nprobe];
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
index_ivf->search_preassigned (n, x, k,
|
|
140
|
+
cent_nos.data(), cent_dis.data(),
|
|
141
|
+
distances, labels, true);
|
|
142
|
+
|
|
143
|
+
for (size_t i = 0; i < n * k; i++) {
|
|
144
|
+
idx_t label = labels[i];
|
|
145
|
+
if (label < 0) {
|
|
146
|
+
if (result_centroid_ids)
|
|
147
|
+
result_centroid_ids[i] = -1;
|
|
148
|
+
} else {
|
|
149
|
+
long list_no = label >> 32;
|
|
150
|
+
long list_index = label & 0xffffffff;
|
|
151
|
+
if (result_centroid_ids)
|
|
152
|
+
result_centroid_ids[i] = list_no;
|
|
153
|
+
labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
SlidingIndexWindow::SlidingIndexWindow (Index *index): index (index) {
|
|
160
|
+
n_slice = 0;
|
|
161
|
+
IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
|
|
162
|
+
ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
|
|
163
|
+
nlist = ils->nlist;
|
|
164
|
+
FAISS_THROW_IF_NOT_MSG (ils,
|
|
165
|
+
"only supports indexes with ArrayInvertedLists");
|
|
166
|
+
sizes.resize(nlist);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
template<class T>
|
|
170
|
+
static void shift_and_add (std::vector<T> & dst,
|
|
171
|
+
size_t remove,
|
|
172
|
+
const std::vector<T> & src)
|
|
173
|
+
{
|
|
174
|
+
if (remove > 0)
|
|
175
|
+
memmove (dst.data(), dst.data() + remove,
|
|
176
|
+
(dst.size() - remove) * sizeof (T));
|
|
177
|
+
size_t insert_point = dst.size() - remove;
|
|
178
|
+
dst.resize (insert_point + src.size());
|
|
179
|
+
memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
template<class T>
|
|
183
|
+
static void remove_from_begin (std::vector<T> & v,
|
|
184
|
+
size_t remove)
|
|
185
|
+
{
|
|
186
|
+
if (remove > 0)
|
|
187
|
+
v.erase (v.begin(), v.begin() + remove);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
|
|
191
|
+
|
|
192
|
+
FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
|
|
193
|
+
"cannot remove slice: there is none");
|
|
194
|
+
|
|
195
|
+
const ArrayInvertedLists *ils2 = nullptr;
|
|
196
|
+
if(sub_index) {
|
|
197
|
+
check_compatible_for_merge (index, sub_index);
|
|
198
|
+
ils2 = dynamic_cast<const ArrayInvertedLists*>(
|
|
199
|
+
extract_index_ivf (sub_index)->invlists);
|
|
200
|
+
FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
|
|
201
|
+
}
|
|
202
|
+
IndexIVF *index_ivf = extract_index_ivf (index);
|
|
203
|
+
|
|
204
|
+
if (remove_oldest && ils2) {
|
|
205
|
+
for (int i = 0; i < nlist; i++) {
|
|
206
|
+
std::vector<size_t> & sizesi = sizes[i];
|
|
207
|
+
size_t amount_to_remove = sizesi[0];
|
|
208
|
+
index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
|
|
209
|
+
|
|
210
|
+
shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
|
|
211
|
+
shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
|
|
212
|
+
ils2->codes[i]);
|
|
213
|
+
for (int j = 0; j + 1 < n_slice; j++) {
|
|
214
|
+
sizesi[j] = sizesi[j + 1] - amount_to_remove;
|
|
215
|
+
}
|
|
216
|
+
sizesi[n_slice - 1] = ils->ids[i].size();
|
|
217
|
+
}
|
|
218
|
+
} else if (ils2) {
|
|
219
|
+
for (int i = 0; i < nlist; i++) {
|
|
220
|
+
index_ivf->ntotal += ils2->ids[i].size();
|
|
221
|
+
shift_and_add (ils->ids[i], 0, ils2->ids[i]);
|
|
222
|
+
shift_and_add (ils->codes[i], 0, ils2->codes[i]);
|
|
223
|
+
sizes[i].push_back(ils->ids[i].size());
|
|
224
|
+
}
|
|
225
|
+
n_slice++;
|
|
226
|
+
} else if (remove_oldest) {
|
|
227
|
+
for (int i = 0; i < nlist; i++) {
|
|
228
|
+
size_t amount_to_remove = sizes[i][0];
|
|
229
|
+
index_ivf->ntotal -= amount_to_remove;
|
|
230
|
+
remove_from_begin (ils->ids[i], amount_to_remove);
|
|
231
|
+
remove_from_begin (ils->codes[i],
|
|
232
|
+
amount_to_remove * ils->code_size);
|
|
233
|
+
for (int j = 0; j + 1 < n_slice; j++) {
|
|
234
|
+
sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
|
|
235
|
+
}
|
|
236
|
+
sizes[i].pop_back ();
|
|
237
|
+
}
|
|
238
|
+
n_slice--;
|
|
239
|
+
} else {
|
|
240
|
+
FAISS_THROW_MSG ("nothing to do???");
|
|
241
|
+
}
|
|
242
|
+
index->ntotal = index_ivf->ntotal;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
// Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
|
|
248
|
+
// IndexIVF's embedded in a IndexPreTransform
|
|
249
|
+
|
|
250
|
+
ArrayInvertedLists *
|
|
251
|
+
get_invlist_range (const Index *index, long i0, long i1)
|
|
252
|
+
{
|
|
253
|
+
const IndexIVF *ivf = extract_index_ivf (index);
|
|
254
|
+
|
|
255
|
+
FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
|
|
256
|
+
|
|
257
|
+
const InvertedLists *src = ivf->invlists;
|
|
258
|
+
|
|
259
|
+
ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
|
|
260
|
+
|
|
261
|
+
for (long i = i0; i < i1; i++) {
|
|
262
|
+
il->add_entries(i - i0, src->list_size(i),
|
|
263
|
+
InvertedLists::ScopedIds (src, i).get(),
|
|
264
|
+
InvertedLists::ScopedCodes (src, i).get());
|
|
265
|
+
}
|
|
266
|
+
return il;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
void set_invlist_range (Index *index, long i0, long i1,
|
|
272
|
+
ArrayInvertedLists * src)
|
|
273
|
+
{
|
|
274
|
+
IndexIVF *ivf = extract_index_ivf (index);
|
|
275
|
+
|
|
276
|
+
FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
|
|
277
|
+
|
|
278
|
+
ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
|
|
279
|
+
FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
|
|
280
|
+
FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
|
|
281
|
+
dst->code_size == src->code_size);
|
|
282
|
+
|
|
283
|
+
size_t ntotal = index->ntotal;
|
|
284
|
+
for (long i = i0 ; i < i1; i++) {
|
|
285
|
+
ntotal -= dst->list_size (i);
|
|
286
|
+
ntotal += src->list_size (i - i0);
|
|
287
|
+
std::swap (src->codes[i - i0], dst->codes[i]);
|
|
288
|
+
std::swap (src->ids[i - i0], dst->ids[i]);
|
|
289
|
+
}
|
|
290
|
+
ivf->ntotal = index->ntotal = ntotal;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
void search_with_parameters (const Index *index,
|
|
295
|
+
idx_t n, const float *x, idx_t k,
|
|
296
|
+
float *distances, idx_t *labels,
|
|
297
|
+
IVFSearchParameters *params,
|
|
298
|
+
size_t *nb_dis_ptr)
|
|
299
|
+
{
|
|
300
|
+
FAISS_THROW_IF_NOT (params);
|
|
301
|
+
const float *prev_x = x;
|
|
302
|
+
ScopeDeleter<float> del;
|
|
303
|
+
|
|
304
|
+
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
|
|
305
|
+
x = ip->apply_chain (n, x);
|
|
306
|
+
if (x != prev_x) {
|
|
307
|
+
del.set(x);
|
|
308
|
+
}
|
|
309
|
+
index = ip->index;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
std::vector<idx_t> Iq(params->nprobe * n);
|
|
313
|
+
std::vector<float> Dq(params->nprobe * n);
|
|
314
|
+
|
|
315
|
+
const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
|
|
316
|
+
FAISS_THROW_IF_NOT (index_ivf);
|
|
317
|
+
|
|
318
|
+
index_ivf->quantizer->search(n, x, params->nprobe,
|
|
319
|
+
Dq.data(), Iq.data());
|
|
320
|
+
|
|
321
|
+
if (nb_dis_ptr) {
|
|
322
|
+
size_t nb_dis = 0;
|
|
323
|
+
const InvertedLists *il = index_ivf->invlists;
|
|
324
|
+
for (idx_t i = 0; i < n * params->nprobe; i++) {
|
|
325
|
+
if (Iq[i] >= 0) {
|
|
326
|
+
nb_dis += il->list_size(Iq[i]);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
*nb_dis_ptr = nb_dis;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
|
|
333
|
+
distances, labels,
|
|
334
|
+
false, params);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
} } // namespace faiss::ivflib
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef FAISS_IVFLIB_H
|
|
11
|
+
#define FAISS_IVFLIB_H
|
|
12
|
+
|
|
13
|
+
/** Since IVF (inverted file) indexes are of so much use for
|
|
14
|
+
* large-scale use cases, we group a few functions related to them in
|
|
15
|
+
* this small library. Most functions work both on IndexIVFs and
|
|
16
|
+
* IndexIVFs embedded within an IndexPreTransform.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#include <vector>
|
|
20
|
+
#include <faiss/IndexIVF.h>
|
|
21
|
+
|
|
22
|
+
namespace faiss { namespace ivflib {
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
/** check if two indexes have the same parameters and are trained in
|
|
26
|
+
* the same way, otherwise throw. */
|
|
27
|
+
void check_compatible_for_merge (const Index * index1,
|
|
28
|
+
const Index * index2);
|
|
29
|
+
|
|
30
|
+
/** get an IndexIVF from an index. The index may be an IndexIVF or
|
|
31
|
+
* some wrapper class that encloses an IndexIVF
|
|
32
|
+
*
|
|
33
|
+
* throws an exception if this is not the case.
|
|
34
|
+
*/
|
|
35
|
+
const IndexIVF * extract_index_ivf (const Index * index);
|
|
36
|
+
IndexIVF * extract_index_ivf (Index * index);
|
|
37
|
+
|
|
38
|
+
/** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
|
|
39
|
+
* embedded in a IndexPreTransform. On output, the index1 is empty.
|
|
40
|
+
*
|
|
41
|
+
* @param shift_ids: translate the ids from index1 to index0->prev_ntotal
|
|
42
|
+
*/
|
|
43
|
+
void merge_into(Index *index0, Index *index1, bool shift_ids);
|
|
44
|
+
|
|
45
|
+
typedef Index::idx_t idx_t;
|
|
46
|
+
|
|
47
|
+
/* Returns the cluster the embeddings belong to.
|
|
48
|
+
*
|
|
49
|
+
* @param index Index, which should be an IVF index
|
|
50
|
+
* (otherwise there are no clusters)
|
|
51
|
+
* @param embeddings object descriptors for which the centroids should be found,
|
|
52
|
+
* size num_objects * d
|
|
53
|
+
* @param centroid_ids
|
|
54
|
+
* cluster id each object belongs to, size num_objects
|
|
55
|
+
*/
|
|
56
|
+
void search_centroid(Index *index,
|
|
57
|
+
const float* x, int n,
|
|
58
|
+
idx_t* centroid_ids);
|
|
59
|
+
|
|
60
|
+
/* Returns the cluster the embeddings belong to.
|
|
61
|
+
*
|
|
62
|
+
* @param index Index, which should be an IVF index
|
|
63
|
+
* (otherwise there are no clusters)
|
|
64
|
+
* @param query_centroid_ids
|
|
65
|
+
* centroid ids corresponding to the query vectors (size n)
|
|
66
|
+
* @param result_centroid_ids
|
|
67
|
+
* centroid ids corresponding to the results (size n * k)
|
|
68
|
+
* other arguments are the same as the standard search function
|
|
69
|
+
*/
|
|
70
|
+
void search_and_return_centroids(Index *index,
|
|
71
|
+
size_t n,
|
|
72
|
+
const float* xin,
|
|
73
|
+
long k,
|
|
74
|
+
float *distances,
|
|
75
|
+
idx_t* labels,
|
|
76
|
+
idx_t* query_centroid_ids,
|
|
77
|
+
idx_t* result_centroid_ids);
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
/** A set of IndexIVFs concatenated together in a FIFO fashion.
|
|
81
|
+
* at each "step", the oldest index slice is removed and a new index is added.
|
|
82
|
+
*/
|
|
83
|
+
struct SlidingIndexWindow {
|
|
84
|
+
/// common index that contains the sliding window
|
|
85
|
+
Index * index;
|
|
86
|
+
|
|
87
|
+
/// InvertedLists of index
|
|
88
|
+
ArrayInvertedLists *ils;
|
|
89
|
+
|
|
90
|
+
/// number of slices currently in index
|
|
91
|
+
int n_slice;
|
|
92
|
+
|
|
93
|
+
/// same as index->nlist
|
|
94
|
+
size_t nlist;
|
|
95
|
+
|
|
96
|
+
/// cumulative list sizes at each slice
|
|
97
|
+
std::vector<std::vector<size_t> > sizes;
|
|
98
|
+
|
|
99
|
+
/// index should be initially empty and trained
|
|
100
|
+
SlidingIndexWindow (Index *index);
|
|
101
|
+
|
|
102
|
+
/** Add one index to the current index and remove the oldest one.
|
|
103
|
+
*
|
|
104
|
+
* @param sub_index slice to swap in (can be NULL)
|
|
105
|
+
* @param remove_oldest if true, remove the oldest slices */
|
|
106
|
+
void step(const Index *sub_index, bool remove_oldest);
|
|
107
|
+
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
/// Get a subset of inverted lists [i0, i1)
|
|
112
|
+
ArrayInvertedLists * get_invlist_range (const Index *index,
|
|
113
|
+
long i0, long i1);
|
|
114
|
+
|
|
115
|
+
/// Set a subset of inverted lists
|
|
116
|
+
void set_invlist_range (Index *index, long i0, long i1,
|
|
117
|
+
ArrayInvertedLists * src);
|
|
118
|
+
|
|
119
|
+
// search an IndexIVF, possibly embedded in an IndexPreTransform with
|
|
120
|
+
// given parameters. Optionally returns the number of distances
|
|
121
|
+
// computed
|
|
122
|
+
void search_with_parameters (const Index *index,
|
|
123
|
+
idx_t n, const float *x, idx_t k,
|
|
124
|
+
float *distances, idx_t *labels,
|
|
125
|
+
IVFSearchParameters *params,
|
|
126
|
+
size_t *nb_dis = nullptr);
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
} } // namespace faiss::ivflib
|
|
131
|
+
|
|
132
|
+
#endif
|