faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/InvertedLists.h>
|
|
11
|
+
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
|
|
14
|
+
#include <faiss/utils/utils.h>
|
|
15
|
+
#include <faiss/impl/FaissAssert.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
using ScopedIds = InvertedLists::ScopedIds;
|
|
20
|
+
using ScopedCodes = InvertedLists::ScopedCodes;
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
/*****************************************
|
|
24
|
+
* InvertedLists implementation
|
|
25
|
+
******************************************/
|
|
26
|
+
|
|
27
|
+
InvertedLists::InvertedLists (size_t nlist, size_t code_size):
|
|
28
|
+
nlist (nlist), code_size (code_size)
|
|
29
|
+
{
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
InvertedLists::~InvertedLists ()
|
|
33
|
+
{}
|
|
34
|
+
|
|
35
|
+
InvertedLists::idx_t InvertedLists::get_single_id (
|
|
36
|
+
size_t list_no, size_t offset) const
|
|
37
|
+
{
|
|
38
|
+
assert (offset < list_size (list_no));
|
|
39
|
+
return get_ids(list_no)[offset];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
void InvertedLists::release_codes (size_t, const uint8_t *) const
|
|
44
|
+
{}
|
|
45
|
+
|
|
46
|
+
void InvertedLists::release_ids (size_t, const idx_t *) const
|
|
47
|
+
{}
|
|
48
|
+
|
|
49
|
+
void InvertedLists::prefetch_lists (const idx_t *, int) const
|
|
50
|
+
{}
|
|
51
|
+
|
|
52
|
+
const uint8_t * InvertedLists::get_single_code (
|
|
53
|
+
size_t list_no, size_t offset) const
|
|
54
|
+
{
|
|
55
|
+
assert (offset < list_size (list_no));
|
|
56
|
+
return get_codes(list_no) + offset * code_size;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
size_t InvertedLists::add_entry (size_t list_no, idx_t theid,
|
|
60
|
+
const uint8_t *code)
|
|
61
|
+
{
|
|
62
|
+
return add_entries (list_no, 1, &theid, code);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
void InvertedLists::update_entry (size_t list_no, size_t offset,
|
|
66
|
+
idx_t id, const uint8_t *code)
|
|
67
|
+
{
|
|
68
|
+
update_entries (list_no, offset, 1, &id, code);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
void InvertedLists::reset () {
|
|
72
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
73
|
+
resize (i, 0);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) {
|
|
78
|
+
|
|
79
|
+
#pragma omp parallel for
|
|
80
|
+
for (idx_t i = 0; i < nlist; i++) {
|
|
81
|
+
size_t list_size = oivf->list_size (i);
|
|
82
|
+
ScopedIds ids (oivf, i);
|
|
83
|
+
if (add_id == 0) {
|
|
84
|
+
add_entries (i, list_size, ids.get (),
|
|
85
|
+
ScopedCodes (oivf, i).get());
|
|
86
|
+
} else {
|
|
87
|
+
std::vector <idx_t> new_ids (list_size);
|
|
88
|
+
|
|
89
|
+
for (size_t j = 0; j < list_size; j++) {
|
|
90
|
+
new_ids [j] = ids[j] + add_id;
|
|
91
|
+
}
|
|
92
|
+
add_entries (i, list_size, new_ids.data(),
|
|
93
|
+
ScopedCodes (oivf, i).get());
|
|
94
|
+
}
|
|
95
|
+
oivf->resize (i, 0);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
double InvertedLists::imbalance_factor () const {
|
|
100
|
+
std::vector<int> hist(nlist);
|
|
101
|
+
|
|
102
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
103
|
+
hist[i] = list_size(i);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return faiss::imbalance_factor(nlist, hist.data());
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
void InvertedLists::print_stats () const {
|
|
110
|
+
std::vector<int> sizes(40);
|
|
111
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
112
|
+
for (size_t j = 0; j < sizes.size(); j++) {
|
|
113
|
+
if ((list_size(i) >> j) == 0) {
|
|
114
|
+
sizes[j]++;
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
for (size_t i = 0; i < sizes.size(); i++) {
|
|
120
|
+
if (sizes[i]) {
|
|
121
|
+
printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
size_t InvertedLists::compute_ntotal () const {
|
|
127
|
+
size_t tot = 0;
|
|
128
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
129
|
+
tot += list_size(i);
|
|
130
|
+
}
|
|
131
|
+
return tot;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/*****************************************
|
|
135
|
+
* ArrayInvertedLists implementation
|
|
136
|
+
******************************************/
|
|
137
|
+
|
|
138
|
+
ArrayInvertedLists::ArrayInvertedLists (size_t nlist, size_t code_size):
|
|
139
|
+
InvertedLists (nlist, code_size)
|
|
140
|
+
{
|
|
141
|
+
ids.resize (nlist);
|
|
142
|
+
codes.resize (nlist);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
size_t ArrayInvertedLists::add_entries (
|
|
146
|
+
size_t list_no, size_t n_entry,
|
|
147
|
+
const idx_t* ids_in, const uint8_t *code)
|
|
148
|
+
{
|
|
149
|
+
if (n_entry == 0) return 0;
|
|
150
|
+
assert (list_no < nlist);
|
|
151
|
+
size_t o = ids [list_no].size();
|
|
152
|
+
ids [list_no].resize (o + n_entry);
|
|
153
|
+
memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
|
|
154
|
+
codes [list_no].resize ((o + n_entry) * code_size);
|
|
155
|
+
memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
|
|
156
|
+
return o;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
size_t ArrayInvertedLists::list_size(size_t list_no) const
|
|
160
|
+
{
|
|
161
|
+
assert (list_no < nlist);
|
|
162
|
+
return ids[list_no].size();
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const uint8_t * ArrayInvertedLists::get_codes (size_t list_no) const
|
|
166
|
+
{
|
|
167
|
+
assert (list_no < nlist);
|
|
168
|
+
return codes[list_no].data();
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
const InvertedLists::idx_t * ArrayInvertedLists::get_ids (size_t list_no) const
|
|
173
|
+
{
|
|
174
|
+
assert (list_no < nlist);
|
|
175
|
+
return ids[list_no].data();
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
void ArrayInvertedLists::resize (size_t list_no, size_t new_size)
|
|
179
|
+
{
|
|
180
|
+
ids[list_no].resize (new_size);
|
|
181
|
+
codes[list_no].resize (new_size * code_size);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
void ArrayInvertedLists::update_entries (
|
|
185
|
+
size_t list_no, size_t offset, size_t n_entry,
|
|
186
|
+
const idx_t *ids_in, const uint8_t *codes_in)
|
|
187
|
+
{
|
|
188
|
+
assert (list_no < nlist);
|
|
189
|
+
assert (n_entry + offset <= ids[list_no].size());
|
|
190
|
+
memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
191
|
+
memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
ArrayInvertedLists::~ArrayInvertedLists ()
|
|
196
|
+
{}
|
|
197
|
+
|
|
198
|
+
/*****************************************************************
|
|
199
|
+
* Meta-inverted list implementations
|
|
200
|
+
*****************************************************************/
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
size_t ReadOnlyInvertedLists::add_entries (
|
|
204
|
+
size_t , size_t ,
|
|
205
|
+
const idx_t* , const uint8_t *)
|
|
206
|
+
{
|
|
207
|
+
FAISS_THROW_MSG ("not implemented");
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
void ReadOnlyInvertedLists::update_entries (size_t, size_t , size_t ,
|
|
211
|
+
const idx_t *, const uint8_t *)
|
|
212
|
+
{
|
|
213
|
+
FAISS_THROW_MSG ("not implemented");
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
void ReadOnlyInvertedLists::resize (size_t , size_t )
|
|
217
|
+
{
|
|
218
|
+
FAISS_THROW_MSG ("not implemented");
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
/*****************************************
|
|
224
|
+
* HStackInvertedLists implementation
|
|
225
|
+
******************************************/
|
|
226
|
+
|
|
227
|
+
HStackInvertedLists::HStackInvertedLists (
|
|
228
|
+
int nil, const InvertedLists **ils_in):
|
|
229
|
+
ReadOnlyInvertedLists (nil > 0 ? ils_in[0]->nlist : 0,
|
|
230
|
+
nil > 0 ? ils_in[0]->code_size : 0)
|
|
231
|
+
{
|
|
232
|
+
FAISS_THROW_IF_NOT (nil > 0);
|
|
233
|
+
for (int i = 0; i < nil; i++) {
|
|
234
|
+
ils.push_back (ils_in[i]);
|
|
235
|
+
FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size &&
|
|
236
|
+
ils_in[i]->nlist == nlist);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
size_t HStackInvertedLists::list_size(size_t list_no) const
|
|
241
|
+
{
|
|
242
|
+
size_t sz = 0;
|
|
243
|
+
for (int i = 0; i < ils.size(); i++) {
|
|
244
|
+
const InvertedLists *il = ils[i];
|
|
245
|
+
sz += il->list_size (list_no);
|
|
246
|
+
}
|
|
247
|
+
return sz;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const uint8_t * HStackInvertedLists::get_codes (size_t list_no) const
|
|
251
|
+
{
|
|
252
|
+
uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
|
|
253
|
+
|
|
254
|
+
for (int i = 0; i < ils.size(); i++) {
|
|
255
|
+
const InvertedLists *il = ils[i];
|
|
256
|
+
size_t sz = il->list_size(list_no) * code_size;
|
|
257
|
+
if (sz > 0) {
|
|
258
|
+
memcpy (c, ScopedCodes (il, list_no).get(), sz);
|
|
259
|
+
c += sz;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return codes;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const uint8_t * HStackInvertedLists::get_single_code (
|
|
266
|
+
size_t list_no, size_t offset) const
|
|
267
|
+
{
|
|
268
|
+
for (int i = 0; i < ils.size(); i++) {
|
|
269
|
+
const InvertedLists *il = ils[i];
|
|
270
|
+
size_t sz = il->list_size (list_no);
|
|
271
|
+
if (offset < sz) {
|
|
272
|
+
// here we have to copy the code, otherwise it will crash at dealloc
|
|
273
|
+
uint8_t * code = new uint8_t [code_size];
|
|
274
|
+
memcpy (code, ScopedCodes (il, list_no, offset).get(), code_size);
|
|
275
|
+
return code;
|
|
276
|
+
}
|
|
277
|
+
offset -= sz;
|
|
278
|
+
}
|
|
279
|
+
FAISS_THROW_FMT ("offset %ld unknown", offset);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
void HStackInvertedLists::release_codes (size_t, const uint8_t *codes) const {
|
|
284
|
+
delete [] codes;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const Index::idx_t * HStackInvertedLists::get_ids (size_t list_no) const
|
|
288
|
+
{
|
|
289
|
+
idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
|
|
290
|
+
|
|
291
|
+
for (int i = 0; i < ils.size(); i++) {
|
|
292
|
+
const InvertedLists *il = ils[i];
|
|
293
|
+
size_t sz = il->list_size(list_no);
|
|
294
|
+
if (sz > 0) {
|
|
295
|
+
memcpy (c, ScopedIds (il, list_no).get(), sz * sizeof(idx_t));
|
|
296
|
+
c += sz;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
return ids;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
Index::idx_t HStackInvertedLists::get_single_id (
|
|
303
|
+
size_t list_no, size_t offset) const
|
|
304
|
+
{
|
|
305
|
+
|
|
306
|
+
for (int i = 0; i < ils.size(); i++) {
|
|
307
|
+
const InvertedLists *il = ils[i];
|
|
308
|
+
size_t sz = il->list_size (list_no);
|
|
309
|
+
if (offset < sz) {
|
|
310
|
+
return il->get_single_id (list_no, offset);
|
|
311
|
+
}
|
|
312
|
+
offset -= sz;
|
|
313
|
+
}
|
|
314
|
+
FAISS_THROW_FMT ("offset %ld unknown", offset);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
void HStackInvertedLists::release_ids (size_t, const idx_t *ids) const {
|
|
319
|
+
delete [] ids;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
void HStackInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
|
|
323
|
+
{
|
|
324
|
+
for (int i = 0; i < ils.size(); i++) {
|
|
325
|
+
const InvertedLists *il = ils[i];
|
|
326
|
+
il->prefetch_lists (list_nos, nlist);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/*****************************************
|
|
331
|
+
* SliceInvertedLists implementation
|
|
332
|
+
******************************************/
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
namespace {
|
|
336
|
+
|
|
337
|
+
using idx_t = InvertedLists::idx_t;
|
|
338
|
+
|
|
339
|
+
idx_t translate_list_no (const SliceInvertedLists *sil,
|
|
340
|
+
idx_t list_no) {
|
|
341
|
+
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
|
|
342
|
+
return list_no + sil->i0;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
SliceInvertedLists::SliceInvertedLists (
|
|
350
|
+
const InvertedLists *il, idx_t i0, idx_t i1):
|
|
351
|
+
ReadOnlyInvertedLists (i1 - i0, il->code_size),
|
|
352
|
+
il (il), i0(i0), i1(i1)
|
|
353
|
+
{
|
|
354
|
+
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
size_t SliceInvertedLists::list_size(size_t list_no) const
|
|
358
|
+
{
|
|
359
|
+
return il->list_size (translate_list_no (this, list_no));
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
const uint8_t * SliceInvertedLists::get_codes (size_t list_no) const
|
|
363
|
+
{
|
|
364
|
+
return il->get_codes (translate_list_no (this, list_no));
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const uint8_t * SliceInvertedLists::get_single_code (
|
|
368
|
+
size_t list_no, size_t offset) const
|
|
369
|
+
{
|
|
370
|
+
return il->get_single_code (translate_list_no (this, list_no), offset);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
void SliceInvertedLists::release_codes (
|
|
375
|
+
size_t list_no, const uint8_t *codes) const {
|
|
376
|
+
return il->release_codes (translate_list_no (this, list_no), codes);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
const Index::idx_t * SliceInvertedLists::get_ids (size_t list_no) const
|
|
380
|
+
{
|
|
381
|
+
return il->get_ids (translate_list_no (this, list_no));
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
Index::idx_t SliceInvertedLists::get_single_id (
|
|
385
|
+
size_t list_no, size_t offset) const
|
|
386
|
+
{
|
|
387
|
+
return il->get_single_id (translate_list_no (this, list_no), offset);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
void SliceInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
|
|
392
|
+
return il->release_ids (translate_list_no (this, list_no), ids);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
void SliceInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
|
|
396
|
+
{
|
|
397
|
+
std::vector<idx_t> translated_list_nos;
|
|
398
|
+
for (int j = 0; j < nlist; j++) {
|
|
399
|
+
idx_t list_no = list_nos[j];
|
|
400
|
+
if (list_no < 0) continue;
|
|
401
|
+
translated_list_nos.push_back (translate_list_no (this, list_no));
|
|
402
|
+
}
|
|
403
|
+
il->prefetch_lists (translated_list_nos.data(),
|
|
404
|
+
translated_list_nos.size());
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
/*****************************************
|
|
409
|
+
* VStackInvertedLists implementation
|
|
410
|
+
******************************************/
|
|
411
|
+
|
|
412
|
+
namespace {
|
|
413
|
+
|
|
414
|
+
using idx_t = InvertedLists::idx_t;
|
|
415
|
+
|
|
416
|
+
// find the invlist this number belongs to
|
|
417
|
+
int translate_list_no (const VStackInvertedLists *vil,
|
|
418
|
+
idx_t list_no) {
|
|
419
|
+
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < vil->nlist);
|
|
420
|
+
int i0 = 0, i1 = vil->ils.size();
|
|
421
|
+
const idx_t *cumsz = vil->cumsz.data();
|
|
422
|
+
while (i0 + 1 < i1) {
|
|
423
|
+
int imed = (i0 + i1) / 2;
|
|
424
|
+
if (list_no >= cumsz[imed]) {
|
|
425
|
+
i0 = imed;
|
|
426
|
+
} else {
|
|
427
|
+
i1 = imed;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
|
|
431
|
+
return i0;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
idx_t sum_il_sizes (int nil, const InvertedLists **ils_in) {
|
|
435
|
+
idx_t tot = 0;
|
|
436
|
+
for (int i = 0; i < nil; i++) {
|
|
437
|
+
tot += ils_in[i]->nlist;
|
|
438
|
+
}
|
|
439
|
+
return tot;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
};
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
VStackInvertedLists::VStackInvertedLists (
|
|
447
|
+
int nil, const InvertedLists **ils_in):
|
|
448
|
+
ReadOnlyInvertedLists (sum_il_sizes(nil, ils_in),
|
|
449
|
+
nil > 0 ? ils_in[0]->code_size : 0)
|
|
450
|
+
{
|
|
451
|
+
FAISS_THROW_IF_NOT (nil > 0);
|
|
452
|
+
cumsz.resize (nil + 1);
|
|
453
|
+
for (int i = 0; i < nil; i++) {
|
|
454
|
+
ils.push_back (ils_in[i]);
|
|
455
|
+
FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size);
|
|
456
|
+
cumsz[i + 1] = cumsz[i] + ils_in[i]->nlist;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
size_t VStackInvertedLists::list_size(size_t list_no) const
|
|
461
|
+
{
|
|
462
|
+
int i = translate_list_no (this, list_no);
|
|
463
|
+
list_no -= cumsz[i];
|
|
464
|
+
return ils[i]->list_size (list_no);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
const uint8_t * VStackInvertedLists::get_codes (size_t list_no) const
|
|
468
|
+
{
|
|
469
|
+
int i = translate_list_no (this, list_no);
|
|
470
|
+
list_no -= cumsz[i];
|
|
471
|
+
return ils[i]->get_codes (list_no);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const uint8_t * VStackInvertedLists::get_single_code (
|
|
475
|
+
size_t list_no, size_t offset) const
|
|
476
|
+
{
|
|
477
|
+
int i = translate_list_no (this, list_no);
|
|
478
|
+
list_no -= cumsz[i];
|
|
479
|
+
return ils[i]->get_single_code (list_no, offset);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
void VStackInvertedLists::release_codes (
|
|
484
|
+
size_t list_no, const uint8_t *codes) const {
|
|
485
|
+
int i = translate_list_no (this, list_no);
|
|
486
|
+
list_no -= cumsz[i];
|
|
487
|
+
return ils[i]->release_codes (list_no, codes);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
const Index::idx_t * VStackInvertedLists::get_ids (size_t list_no) const
|
|
491
|
+
{
|
|
492
|
+
int i = translate_list_no (this, list_no);
|
|
493
|
+
list_no -= cumsz[i];
|
|
494
|
+
return ils[i]->get_ids (list_no);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
Index::idx_t VStackInvertedLists::get_single_id (
|
|
498
|
+
size_t list_no, size_t offset) const
|
|
499
|
+
{
|
|
500
|
+
int i = translate_list_no (this, list_no);
|
|
501
|
+
list_no -= cumsz[i];
|
|
502
|
+
return ils[i]->get_single_id (list_no, offset);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
void VStackInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
|
|
507
|
+
int i = translate_list_no (this, list_no);
|
|
508
|
+
list_no -= cumsz[i];
|
|
509
|
+
return ils[i]->release_ids (list_no, ids);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
void VStackInvertedLists::prefetch_lists (
|
|
513
|
+
const idx_t *list_nos, int nlist) const
|
|
514
|
+
{
|
|
515
|
+
std::vector<int> ilno (nlist, -1);
|
|
516
|
+
std::vector<int> n_per_il (ils.size(), 0);
|
|
517
|
+
for (int j = 0; j < nlist; j++) {
|
|
518
|
+
idx_t list_no = list_nos[j];
|
|
519
|
+
if (list_no < 0) continue;
|
|
520
|
+
int i = ilno[j] = translate_list_no (this, list_no);
|
|
521
|
+
n_per_il[i]++;
|
|
522
|
+
}
|
|
523
|
+
std::vector<int> cum_n_per_il (ils.size() + 1, 0);
|
|
524
|
+
for (int j = 0; j < ils.size(); j++) {
|
|
525
|
+
cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
|
|
526
|
+
}
|
|
527
|
+
std::vector<idx_t> sorted_list_nos (cum_n_per_il.back());
|
|
528
|
+
for (int j = 0; j < nlist; j++) {
|
|
529
|
+
idx_t list_no = list_nos[j];
|
|
530
|
+
if (list_no < 0) continue;
|
|
531
|
+
int i = ilno[j];
|
|
532
|
+
list_no -= cumsz[i];
|
|
533
|
+
sorted_list_nos[cum_n_per_il[i]++] = list_no;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
int i0 = 0;
|
|
537
|
+
for (int j = 0; j < ils.size(); j++) {
|
|
538
|
+
int i1 = i0 + n_per_il[j];
|
|
539
|
+
if (i1 > i0) {
|
|
540
|
+
ils[j]->prefetch_lists (sorted_list_nos.data() + i0,
|
|
541
|
+
i1 - i0);
|
|
542
|
+
}
|
|
543
|
+
i0 = i1;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
/*****************************************
|
|
550
|
+
* MaskedInvertedLists implementation
|
|
551
|
+
******************************************/
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
MaskedInvertedLists::MaskedInvertedLists (const InvertedLists *il0,
|
|
555
|
+
const InvertedLists *il1):
|
|
556
|
+
ReadOnlyInvertedLists (il0->nlist, il0->code_size),
|
|
557
|
+
il0 (il0), il1 (il1)
|
|
558
|
+
{
|
|
559
|
+
FAISS_THROW_IF_NOT (il1->nlist == nlist);
|
|
560
|
+
FAISS_THROW_IF_NOT (il1->code_size == code_size);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
size_t MaskedInvertedLists::list_size(size_t list_no) const
|
|
564
|
+
{
|
|
565
|
+
size_t sz = il0->list_size(list_no);
|
|
566
|
+
return sz ? sz : il1->list_size(list_no);
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
const uint8_t * MaskedInvertedLists::get_codes (size_t list_no) const
|
|
570
|
+
{
|
|
571
|
+
size_t sz = il0->list_size(list_no);
|
|
572
|
+
return (sz ? il0 : il1)->get_codes(list_no);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const idx_t * MaskedInvertedLists::get_ids (size_t list_no) const
|
|
576
|
+
{
|
|
577
|
+
size_t sz = il0->list_size (list_no);
|
|
578
|
+
return (sz ? il0 : il1)->get_ids (list_no);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
void MaskedInvertedLists::release_codes (
|
|
582
|
+
size_t list_no, const uint8_t *codes) const
|
|
583
|
+
{
|
|
584
|
+
size_t sz = il0->list_size (list_no);
|
|
585
|
+
(sz ? il0 : il1)->release_codes (list_no, codes);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
void MaskedInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
|
|
589
|
+
{
|
|
590
|
+
size_t sz = il0->list_size (list_no);
|
|
591
|
+
(sz ? il0 : il1)->release_ids (list_no, ids);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
idx_t MaskedInvertedLists::get_single_id (size_t list_no, size_t offset) const
|
|
595
|
+
{
|
|
596
|
+
size_t sz = il0->list_size (list_no);
|
|
597
|
+
return (sz ? il0 : il1)->get_single_id (list_no, offset);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
const uint8_t * MaskedInvertedLists::get_single_code (
|
|
601
|
+
size_t list_no, size_t offset) const
|
|
602
|
+
{
|
|
603
|
+
size_t sz = il0->list_size (list_no);
|
|
604
|
+
return (sz ? il0 : il1)->get_single_code (list_no, offset);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
void MaskedInvertedLists::prefetch_lists (
|
|
608
|
+
const idx_t *list_nos, int nlist) const
|
|
609
|
+
{
|
|
610
|
+
std::vector<idx_t> list0, list1;
|
|
611
|
+
for (int i = 0; i < nlist; i++) {
|
|
612
|
+
idx_t list_no = list_nos[i];
|
|
613
|
+
if (list_no < 0) continue;
|
|
614
|
+
size_t sz = il0->list_size(list_no);
|
|
615
|
+
(sz ? list0 : list1).push_back (list_no);
|
|
616
|
+
}
|
|
617
|
+
il0->prefetch_lists (list0.data(), list0.size());
|
|
618
|
+
il1->prefetch_lists (list1.data(), list1.size());
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
} // namespace faiss
|