faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
#include <vector>
|
|
13
|
+
#include <string>
|
|
14
|
+
#include <unordered_map>
|
|
15
|
+
#include <stdint.h>
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
namespace faiss {
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
/** Reports some statistics on a dataset and comments on them.
|
|
22
|
+
*
|
|
23
|
+
* It is a class rather than a function so that all stats can also be
|
|
24
|
+
* accessed from code */
|
|
25
|
+
|
|
26
|
+
struct MatrixStats {
|
|
27
|
+
MatrixStats (size_t n, size_t d, const float *x);
|
|
28
|
+
std::string comments;
|
|
29
|
+
|
|
30
|
+
// raw statistics
|
|
31
|
+
size_t n, d;
|
|
32
|
+
size_t n_collision, n_valid, n0;
|
|
33
|
+
double min_norm2, max_norm2;
|
|
34
|
+
|
|
35
|
+
struct PerDimStats {
|
|
36
|
+
size_t n, n_nan, n_inf, n0;
|
|
37
|
+
|
|
38
|
+
float min, max;
|
|
39
|
+
double sum, sum2;
|
|
40
|
+
|
|
41
|
+
size_t n_valid;
|
|
42
|
+
double mean, stddev;
|
|
43
|
+
|
|
44
|
+
PerDimStats();
|
|
45
|
+
void add (float x);
|
|
46
|
+
void compute_mean_std ();
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
std::vector<PerDimStats> per_dim_stats;
|
|
50
|
+
struct Occurrence {
|
|
51
|
+
size_t first;
|
|
52
|
+
size_t count;
|
|
53
|
+
};
|
|
54
|
+
std::unordered_map<uint64_t, Occurrence> occurrences;
|
|
55
|
+
|
|
56
|
+
char *buf;
|
|
57
|
+
size_t nbuf;
|
|
58
|
+
void do_comment (const char *fmt, ...);
|
|
59
|
+
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
} // namespace faiss
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/MetaIndexes.h>
|
|
11
|
+
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#include <stdint.h>
|
|
14
|
+
|
|
15
|
+
#include <faiss/impl/FaissAssert.h>
|
|
16
|
+
#include <faiss/utils/Heap.h>
|
|
17
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
18
|
+
#include <faiss/utils/WorkerThread.h>
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
namespace faiss {
|
|
22
|
+
|
|
23
|
+
namespace {
|
|
24
|
+
|
|
25
|
+
typedef Index::idx_t idx_t;
|
|
26
|
+
|
|
27
|
+
} // namespace
|
|
28
|
+
|
|
29
|
+
/*****************************************************
|
|
30
|
+
* IndexIDMap implementation
|
|
31
|
+
*******************************************************/
|
|
32
|
+
|
|
33
|
+
template <typename IndexT>
|
|
34
|
+
IndexIDMapTemplate<IndexT>::IndexIDMapTemplate (IndexT *index):
|
|
35
|
+
index (index),
|
|
36
|
+
own_fields (false)
|
|
37
|
+
{
|
|
38
|
+
FAISS_THROW_IF_NOT_MSG (index->ntotal == 0, "index must be empty on input");
|
|
39
|
+
this->is_trained = index->is_trained;
|
|
40
|
+
this->metric_type = index->metric_type;
|
|
41
|
+
this->verbose = index->verbose;
|
|
42
|
+
this->d = index->d;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
template <typename IndexT>
|
|
46
|
+
void IndexIDMapTemplate<IndexT>::add
|
|
47
|
+
(idx_t, const typename IndexT::component_t *)
|
|
48
|
+
{
|
|
49
|
+
FAISS_THROW_MSG ("add does not make sense with IndexIDMap, "
|
|
50
|
+
"use add_with_ids");
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
template <typename IndexT>
|
|
55
|
+
void IndexIDMapTemplate<IndexT>::train
|
|
56
|
+
(idx_t n, const typename IndexT::component_t *x)
|
|
57
|
+
{
|
|
58
|
+
index->train (n, x);
|
|
59
|
+
this->is_trained = index->is_trained;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
template <typename IndexT>
|
|
63
|
+
void IndexIDMapTemplate<IndexT>::reset ()
|
|
64
|
+
{
|
|
65
|
+
index->reset ();
|
|
66
|
+
id_map.clear();
|
|
67
|
+
this->ntotal = 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
template <typename IndexT>
|
|
72
|
+
void IndexIDMapTemplate<IndexT>::add_with_ids
|
|
73
|
+
(idx_t n, const typename IndexT::component_t * x,
|
|
74
|
+
const typename IndexT::idx_t *xids)
|
|
75
|
+
{
|
|
76
|
+
index->add (n, x);
|
|
77
|
+
for (idx_t i = 0; i < n; i++)
|
|
78
|
+
id_map.push_back (xids[i]);
|
|
79
|
+
this->ntotal = index->ntotal;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
template <typename IndexT>
|
|
84
|
+
void IndexIDMapTemplate<IndexT>::search
|
|
85
|
+
(idx_t n, const typename IndexT::component_t *x, idx_t k,
|
|
86
|
+
typename IndexT::distance_t *distances, typename IndexT::idx_t *labels) const
|
|
87
|
+
{
|
|
88
|
+
index->search (n, x, k, distances, labels);
|
|
89
|
+
idx_t *li = labels;
|
|
90
|
+
#pragma omp parallel for
|
|
91
|
+
for (idx_t i = 0; i < n * k; i++) {
|
|
92
|
+
li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
template <typename IndexT>
|
|
98
|
+
void IndexIDMapTemplate<IndexT>::range_search
|
|
99
|
+
(typename IndexT::idx_t n, const typename IndexT::component_t *x,
|
|
100
|
+
typename IndexT::distance_t radius, RangeSearchResult *result) const
|
|
101
|
+
{
|
|
102
|
+
index->range_search(n, x, radius, result);
|
|
103
|
+
#pragma omp parallel for
|
|
104
|
+
for (idx_t i = 0; i < result->lims[result->nq]; i++) {
|
|
105
|
+
result->labels[i] = result->labels[i] < 0 ?
|
|
106
|
+
result->labels[i] : id_map[result->labels[i]];
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
namespace {
|
|
111
|
+
|
|
112
|
+
struct IDTranslatedSelector: IDSelector {
|
|
113
|
+
const std::vector <int64_t> & id_map;
|
|
114
|
+
const IDSelector & sel;
|
|
115
|
+
IDTranslatedSelector (const std::vector <int64_t> & id_map,
|
|
116
|
+
const IDSelector & sel):
|
|
117
|
+
id_map (id_map), sel (sel)
|
|
118
|
+
{}
|
|
119
|
+
bool is_member(idx_t id) const override {
|
|
120
|
+
return sel.is_member(id_map[id]);
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
template <typename IndexT>
|
|
127
|
+
size_t IndexIDMapTemplate<IndexT>::remove_ids (const IDSelector & sel)
|
|
128
|
+
{
|
|
129
|
+
// remove in sub-index first
|
|
130
|
+
IDTranslatedSelector sel2 (id_map, sel);
|
|
131
|
+
size_t nremove = index->remove_ids (sel2);
|
|
132
|
+
|
|
133
|
+
int64_t j = 0;
|
|
134
|
+
for (idx_t i = 0; i < this->ntotal; i++) {
|
|
135
|
+
if (sel.is_member (id_map[i])) {
|
|
136
|
+
// remove
|
|
137
|
+
} else {
|
|
138
|
+
id_map[j] = id_map[i];
|
|
139
|
+
j++;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
FAISS_ASSERT (j == index->ntotal);
|
|
143
|
+
this->ntotal = j;
|
|
144
|
+
id_map.resize(this->ntotal);
|
|
145
|
+
return nremove;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
template <typename IndexT>
|
|
149
|
+
IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate ()
|
|
150
|
+
{
|
|
151
|
+
if (own_fields) delete index;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
/*****************************************************
|
|
157
|
+
* IndexIDMap2 implementation
|
|
158
|
+
*******************************************************/
|
|
159
|
+
|
|
160
|
+
template <typename IndexT>
|
|
161
|
+
IndexIDMap2Template<IndexT>::IndexIDMap2Template (IndexT *index):
|
|
162
|
+
IndexIDMapTemplate<IndexT> (index)
|
|
163
|
+
{}
|
|
164
|
+
|
|
165
|
+
template <typename IndexT>
|
|
166
|
+
void IndexIDMap2Template<IndexT>::add_with_ids
|
|
167
|
+
(idx_t n, const typename IndexT::component_t* x,
|
|
168
|
+
const typename IndexT::idx_t* xids)
|
|
169
|
+
{
|
|
170
|
+
size_t prev_ntotal = this->ntotal;
|
|
171
|
+
IndexIDMapTemplate<IndexT>::add_with_ids (n, x, xids);
|
|
172
|
+
for (size_t i = prev_ntotal; i < this->ntotal; i++) {
|
|
173
|
+
rev_map [this->id_map [i]] = i;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
template <typename IndexT>
|
|
178
|
+
void IndexIDMap2Template<IndexT>::construct_rev_map ()
|
|
179
|
+
{
|
|
180
|
+
rev_map.clear ();
|
|
181
|
+
for (size_t i = 0; i < this->ntotal; i++) {
|
|
182
|
+
rev_map [this->id_map [i]] = i;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
template <typename IndexT>
|
|
188
|
+
size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel)
|
|
189
|
+
{
|
|
190
|
+
// This is quite inefficient
|
|
191
|
+
size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids (sel);
|
|
192
|
+
construct_rev_map ();
|
|
193
|
+
return nremove;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
template <typename IndexT>
|
|
197
|
+
void IndexIDMap2Template<IndexT>::reconstruct
|
|
198
|
+
(idx_t key, typename IndexT::component_t * recons) const
|
|
199
|
+
{
|
|
200
|
+
try {
|
|
201
|
+
this->index->reconstruct (rev_map.at (key), recons);
|
|
202
|
+
} catch (const std::out_of_range& e) {
|
|
203
|
+
FAISS_THROW_FMT ("key %ld not found", key);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
// explicit template instantiations
|
|
209
|
+
|
|
210
|
+
template struct IndexIDMapTemplate<Index>;
|
|
211
|
+
template struct IndexIDMapTemplate<IndexBinary>;
|
|
212
|
+
template struct IndexIDMap2Template<Index>;
|
|
213
|
+
template struct IndexIDMap2Template<IndexBinary>;
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
/*****************************************************
|
|
217
|
+
* IndexSplitVectors implementation
|
|
218
|
+
*******************************************************/
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
IndexSplitVectors::IndexSplitVectors (idx_t d, bool threaded):
|
|
222
|
+
Index (d), own_fields (false),
|
|
223
|
+
threaded (threaded), sum_d (0)
|
|
224
|
+
{
|
|
225
|
+
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
void IndexSplitVectors::add_sub_index (Index *index)
|
|
229
|
+
{
|
|
230
|
+
sub_indexes.push_back (index);
|
|
231
|
+
sync_with_sub_indexes ();
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
void IndexSplitVectors::sync_with_sub_indexes ()
|
|
235
|
+
{
|
|
236
|
+
if (sub_indexes.empty()) return;
|
|
237
|
+
Index * index0 = sub_indexes[0];
|
|
238
|
+
sum_d = index0->d;
|
|
239
|
+
metric_type = index0->metric_type;
|
|
240
|
+
is_trained = index0->is_trained;
|
|
241
|
+
ntotal = index0->ntotal;
|
|
242
|
+
for (int i = 1; i < sub_indexes.size(); i++) {
|
|
243
|
+
Index * index = sub_indexes[i];
|
|
244
|
+
FAISS_THROW_IF_NOT (metric_type == index->metric_type);
|
|
245
|
+
FAISS_THROW_IF_NOT (ntotal == index->ntotal);
|
|
246
|
+
sum_d += index->d;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
void IndexSplitVectors::add(idx_t /*n*/, const float* /*x*/) {
|
|
252
|
+
FAISS_THROW_MSG("not implemented");
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
void IndexSplitVectors::search (
|
|
258
|
+
idx_t n, const float *x, idx_t k,
|
|
259
|
+
float *distances, idx_t *labels) const
|
|
260
|
+
{
|
|
261
|
+
FAISS_THROW_IF_NOT_MSG (k == 1,
|
|
262
|
+
"search implemented only for k=1");
|
|
263
|
+
FAISS_THROW_IF_NOT_MSG (sum_d == d,
|
|
264
|
+
"not enough indexes compared to # dimensions");
|
|
265
|
+
|
|
266
|
+
int64_t nshard = sub_indexes.size();
|
|
267
|
+
float *all_distances = new float [nshard * k * n];
|
|
268
|
+
idx_t *all_labels = new idx_t [nshard * k * n];
|
|
269
|
+
ScopeDeleter<float> del (all_distances);
|
|
270
|
+
ScopeDeleter<idx_t> del2 (all_labels);
|
|
271
|
+
|
|
272
|
+
auto query_func = [n, x, k, distances, labels, all_distances, all_labels, this]
|
|
273
|
+
(int no) {
|
|
274
|
+
const IndexSplitVectors *index = this;
|
|
275
|
+
float *distances1 = no == 0 ? distances : all_distances + no * k * n;
|
|
276
|
+
idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
|
|
277
|
+
if (index->verbose)
|
|
278
|
+
printf ("begin query shard %d on %ld points\n", no, n);
|
|
279
|
+
const Index * sub_index = index->sub_indexes[no];
|
|
280
|
+
int64_t sub_d = sub_index->d, d = index->d;
|
|
281
|
+
idx_t ofs = 0;
|
|
282
|
+
for (int i = 0; i < no; i++) ofs += index->sub_indexes[i]->d;
|
|
283
|
+
float *sub_x = new float [sub_d * n];
|
|
284
|
+
ScopeDeleter<float> del1 (sub_x);
|
|
285
|
+
for (idx_t i = 0; i < n; i++)
|
|
286
|
+
memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d * sizeof (sub_x));
|
|
287
|
+
sub_index->search (n, sub_x, k, distances1, labels1);
|
|
288
|
+
if (index->verbose)
|
|
289
|
+
printf ("end query shard %d\n", no);
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
if (!threaded) {
|
|
293
|
+
for (int i = 0; i < nshard; i++) {
|
|
294
|
+
query_func(i);
|
|
295
|
+
}
|
|
296
|
+
} else {
|
|
297
|
+
std::vector<std::unique_ptr<WorkerThread> > threads;
|
|
298
|
+
std::vector<std::future<bool>> v;
|
|
299
|
+
|
|
300
|
+
for (int i = 0; i < nshard; i++) {
|
|
301
|
+
threads.emplace_back(new WorkerThread());
|
|
302
|
+
WorkerThread *wt = threads.back().get();
|
|
303
|
+
v.emplace_back(wt->add([i, query_func](){query_func(i); }));
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Blocking wait for completion
|
|
307
|
+
for (auto& func : v) {
|
|
308
|
+
func.get();
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
int64_t factor = 1;
|
|
313
|
+
for (int i = 0; i < nshard; i++) {
|
|
314
|
+
if (i > 0) { // results of 0 are already in the table
|
|
315
|
+
const float *distances_i = all_distances + i * k * n;
|
|
316
|
+
const idx_t *labels_i = all_labels + i * k * n;
|
|
317
|
+
for (int64_t j = 0; j < n; j++) {
|
|
318
|
+
if (labels[j] >= 0 && labels_i[j] >= 0) {
|
|
319
|
+
labels[j] += labels_i[j] * factor;
|
|
320
|
+
distances[j] += distances_i[j];
|
|
321
|
+
} else {
|
|
322
|
+
labels[j] = -1;
|
|
323
|
+
distances[j] = 0.0 / 0.0;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
factor *= sub_indexes[i]->ntotal;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
void IndexSplitVectors::train(idx_t /*n*/, const float* /*x*/) {
|
|
333
|
+
FAISS_THROW_MSG("not implemented");
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
void IndexSplitVectors::reset ()
|
|
337
|
+
{
|
|
338
|
+
FAISS_THROW_MSG ("not implemented");
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
IndexSplitVectors::~IndexSplitVectors ()
|
|
343
|
+
{
|
|
344
|
+
if (own_fields) {
|
|
345
|
+
for (int s = 0; s < sub_indexes.size(); s++)
|
|
346
|
+
delete sub_indexes [s];
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
} // namespace faiss
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef META_INDEXES_H
|
|
11
|
+
#define META_INDEXES_H
|
|
12
|
+
|
|
13
|
+
#include <vector>
|
|
14
|
+
#include <unordered_map>
|
|
15
|
+
#include <faiss/Index.h>
|
|
16
|
+
#include <faiss/IndexShards.h>
|
|
17
|
+
#include <faiss/IndexReplicas.h>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
|
|
21
|
+
/** Index that translates search results to ids */
|
|
22
|
+
template <typename IndexT>
|
|
23
|
+
struct IndexIDMapTemplate : IndexT {
|
|
24
|
+
using idx_t = typename IndexT::idx_t;
|
|
25
|
+
using component_t = typename IndexT::component_t;
|
|
26
|
+
using distance_t = typename IndexT::distance_t;
|
|
27
|
+
|
|
28
|
+
IndexT * index; ///! the sub-index
|
|
29
|
+
bool own_fields; ///! whether pointers are deleted in destructo
|
|
30
|
+
std::vector<idx_t> id_map;
|
|
31
|
+
|
|
32
|
+
explicit IndexIDMapTemplate (IndexT *index);
|
|
33
|
+
|
|
34
|
+
/// @param xids if non-null, ids to store for the vectors (size n)
|
|
35
|
+
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
|
|
36
|
+
|
|
37
|
+
/// this will fail. Use add_with_ids
|
|
38
|
+
void add(idx_t n, const component_t* x) override;
|
|
39
|
+
|
|
40
|
+
void search(
|
|
41
|
+
idx_t n, const component_t* x, idx_t k,
|
|
42
|
+
distance_t* distances,
|
|
43
|
+
idx_t* labels) const override;
|
|
44
|
+
|
|
45
|
+
void train(idx_t n, const component_t* x) override;
|
|
46
|
+
|
|
47
|
+
void reset() override;
|
|
48
|
+
|
|
49
|
+
/// remove ids adapted to IndexFlat
|
|
50
|
+
size_t remove_ids(const IDSelector& sel) override;
|
|
51
|
+
|
|
52
|
+
void range_search (idx_t n, const component_t *x, distance_t radius,
|
|
53
|
+
RangeSearchResult *result) const override;
|
|
54
|
+
|
|
55
|
+
~IndexIDMapTemplate () override;
|
|
56
|
+
IndexIDMapTemplate () {own_fields=false; index=nullptr; }
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
using IndexIDMap = IndexIDMapTemplate<Index>;
|
|
60
|
+
using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
/** same as IndexIDMap but also provides an efficient reconstruction
|
|
64
|
+
* implementation via a 2-way index */
|
|
65
|
+
template <typename IndexT>
|
|
66
|
+
struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
|
|
67
|
+
using idx_t = typename IndexT::idx_t;
|
|
68
|
+
using component_t = typename IndexT::component_t;
|
|
69
|
+
using distance_t = typename IndexT::distance_t;
|
|
70
|
+
|
|
71
|
+
std::unordered_map<idx_t, idx_t> rev_map;
|
|
72
|
+
|
|
73
|
+
explicit IndexIDMap2Template (IndexT *index);
|
|
74
|
+
|
|
75
|
+
/// make the rev_map from scratch
|
|
76
|
+
void construct_rev_map ();
|
|
77
|
+
|
|
78
|
+
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
|
|
79
|
+
|
|
80
|
+
size_t remove_ids(const IDSelector& sel) override;
|
|
81
|
+
|
|
82
|
+
void reconstruct (idx_t key, component_t * recons) const override;
|
|
83
|
+
|
|
84
|
+
~IndexIDMap2Template() override {}
|
|
85
|
+
IndexIDMap2Template () {}
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
using IndexIDMap2 = IndexIDMap2Template<Index>;
|
|
89
|
+
using IndexBinaryIDMap2 = IndexIDMap2Template<IndexBinary>;
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
/** splits input vectors in segments and assigns each segment to a sub-index
|
|
93
|
+
* used to distribute a MultiIndexQuantizer
|
|
94
|
+
*/
|
|
95
|
+
struct IndexSplitVectors: Index {
|
|
96
|
+
bool own_fields;
|
|
97
|
+
bool threaded;
|
|
98
|
+
std::vector<Index*> sub_indexes;
|
|
99
|
+
idx_t sum_d; /// sum of dimensions seen so far
|
|
100
|
+
|
|
101
|
+
explicit IndexSplitVectors (idx_t d, bool threaded = false);
|
|
102
|
+
|
|
103
|
+
void add_sub_index (Index *);
|
|
104
|
+
void sync_with_sub_indexes ();
|
|
105
|
+
|
|
106
|
+
void add(idx_t n, const float* x) override;
|
|
107
|
+
|
|
108
|
+
void search(
|
|
109
|
+
idx_t n,
|
|
110
|
+
const float* x,
|
|
111
|
+
idx_t k,
|
|
112
|
+
float* distances,
|
|
113
|
+
idx_t* labels) const override;
|
|
114
|
+
|
|
115
|
+
void train(idx_t n, const float* x) override;
|
|
116
|
+
|
|
117
|
+
void reset() override;
|
|
118
|
+
|
|
119
|
+
~IndexSplitVectors() override;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
} // namespace faiss
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
#endif
|