faiss 0.1.7 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +7 -7
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +8 -2
- data/ext/faiss/index.cpp +102 -69
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +0 -5
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +26 -12
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include "faiss/impl/ResidualQuantizer.h"
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
#include <faiss/impl/ResidualQuantizer.h>
|
|
13
|
+
#include "faiss/utils/utils.h"
|
|
14
|
+
|
|
15
|
+
#include <cstddef>
|
|
16
|
+
#include <cstdio>
|
|
17
|
+
#include <cstring>
|
|
18
|
+
#include <memory>
|
|
19
|
+
|
|
20
|
+
#include <algorithm>
|
|
21
|
+
|
|
22
|
+
#include <faiss/IndexFlat.h>
|
|
23
|
+
#include <faiss/VectorTransform.h>
|
|
24
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
25
|
+
#include <faiss/impl/FaissAssert.h>
|
|
26
|
+
#include <faiss/utils/Heap.h>
|
|
27
|
+
#include <faiss/utils/distances.h>
|
|
28
|
+
#include <faiss/utils/hamming.h>
|
|
29
|
+
#include <faiss/utils/utils.h>
|
|
30
|
+
|
|
31
|
+
namespace faiss {
|
|
32
|
+
|
|
33
|
+
ResidualQuantizer::ResidualQuantizer()
|
|
34
|
+
: train_type(Train_progressive_dim),
|
|
35
|
+
max_beam_size(30),
|
|
36
|
+
max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
|
|
37
|
+
assign_index_factory(nullptr) {
|
|
38
|
+
d = 0;
|
|
39
|
+
M = 0;
|
|
40
|
+
verbose = false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
ResidualQuantizer::ResidualQuantizer(size_t d, const std::vector<size_t>& nbits)
|
|
44
|
+
: ResidualQuantizer() {
|
|
45
|
+
this->d = d;
|
|
46
|
+
M = nbits.size();
|
|
47
|
+
this->nbits = nbits;
|
|
48
|
+
set_derived_values();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
ResidualQuantizer::ResidualQuantizer(size_t d, size_t M, size_t nbits)
|
|
52
|
+
: ResidualQuantizer(d, std::vector<size_t>(M, nbits)) {}
|
|
53
|
+
|
|
54
|
+
namespace {
|
|
55
|
+
|
|
56
|
+
void fvec_sub(size_t d, const float* a, const float* b, float* c) {
|
|
57
|
+
for (size_t i = 0; i < d; i++) {
|
|
58
|
+
c[i] = a[i] - b[i];
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
} // anonymous namespace
|
|
63
|
+
|
|
64
|
+
void beam_search_encode_step(
|
|
65
|
+
size_t d,
|
|
66
|
+
size_t K,
|
|
67
|
+
const float* cent, /// size (K, d)
|
|
68
|
+
size_t n,
|
|
69
|
+
size_t beam_size,
|
|
70
|
+
const float* residuals, /// size (n, beam_size, d)
|
|
71
|
+
size_t m,
|
|
72
|
+
const int32_t* codes, /// size (n, beam_size, m)
|
|
73
|
+
size_t new_beam_size,
|
|
74
|
+
int32_t* new_codes, /// size (n, new_beam_size, m + 1)
|
|
75
|
+
float* new_residuals, /// size (n, new_beam_size, d)
|
|
76
|
+
float* new_distances, /// size (n, new_beam_size)
|
|
77
|
+
Index* assign_index) {
|
|
78
|
+
// we have to fill in the whole output matrix
|
|
79
|
+
FAISS_THROW_IF_NOT(new_beam_size <= beam_size * K);
|
|
80
|
+
|
|
81
|
+
using idx_t = Index::idx_t;
|
|
82
|
+
|
|
83
|
+
std::vector<float> cent_distances;
|
|
84
|
+
std::vector<idx_t> cent_ids;
|
|
85
|
+
|
|
86
|
+
if (assign_index) {
|
|
87
|
+
// search beam_size distances per query
|
|
88
|
+
FAISS_THROW_IF_NOT(assign_index->d == d);
|
|
89
|
+
cent_distances.resize(n * beam_size * new_beam_size);
|
|
90
|
+
cent_ids.resize(n * beam_size * new_beam_size);
|
|
91
|
+
if (assign_index->ntotal != 0) {
|
|
92
|
+
// then we assume the codebooks are already added to the index
|
|
93
|
+
FAISS_THROW_IF_NOT(assign_index->ntotal != K);
|
|
94
|
+
} else {
|
|
95
|
+
assign_index->add(K, cent);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// printf("beam_search_encode_step -- mem usage %zd\n",
|
|
99
|
+
// get_mem_usage_kb());
|
|
100
|
+
assign_index->search(
|
|
101
|
+
n * beam_size,
|
|
102
|
+
residuals,
|
|
103
|
+
new_beam_size,
|
|
104
|
+
cent_distances.data(),
|
|
105
|
+
cent_ids.data());
|
|
106
|
+
} else {
|
|
107
|
+
// do one big distance computation
|
|
108
|
+
cent_distances.resize(n * beam_size * K);
|
|
109
|
+
pairwise_L2sqr(
|
|
110
|
+
d, n * beam_size, residuals, K, cent, cent_distances.data());
|
|
111
|
+
}
|
|
112
|
+
InterruptCallback::check();
|
|
113
|
+
|
|
114
|
+
#pragma omp parallel for if (n > 100)
|
|
115
|
+
for (int64_t i = 0; i < n; i++) {
|
|
116
|
+
const int32_t* codes_i = codes + i * m * beam_size;
|
|
117
|
+
int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
|
|
118
|
+
const float* residuals_i = residuals + i * d * beam_size;
|
|
119
|
+
float* new_residuals_i = new_residuals + i * d * new_beam_size;
|
|
120
|
+
|
|
121
|
+
float* new_distances_i = new_distances + i * new_beam_size;
|
|
122
|
+
using C = CMax<float, int>;
|
|
123
|
+
|
|
124
|
+
if (assign_index) {
|
|
125
|
+
const float* cent_distances_i =
|
|
126
|
+
cent_distances.data() + i * beam_size * new_beam_size;
|
|
127
|
+
const idx_t* cent_ids_i =
|
|
128
|
+
cent_ids.data() + i * beam_size * new_beam_size;
|
|
129
|
+
|
|
130
|
+
// here we could be a tad more efficient by merging sorted arrays
|
|
131
|
+
for (int i = 0; i < new_beam_size; i++) {
|
|
132
|
+
new_distances_i[i] = C::neutral();
|
|
133
|
+
}
|
|
134
|
+
std::vector<int> perm(new_beam_size, -1);
|
|
135
|
+
heap_addn<C>(
|
|
136
|
+
new_beam_size,
|
|
137
|
+
new_distances_i,
|
|
138
|
+
perm.data(),
|
|
139
|
+
cent_distances_i,
|
|
140
|
+
nullptr,
|
|
141
|
+
beam_size * new_beam_size);
|
|
142
|
+
heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
|
|
143
|
+
|
|
144
|
+
for (int j = 0; j < new_beam_size; j++) {
|
|
145
|
+
int js = perm[j] / new_beam_size;
|
|
146
|
+
int ls = cent_ids_i[perm[j]];
|
|
147
|
+
if (m > 0) {
|
|
148
|
+
memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
|
|
149
|
+
}
|
|
150
|
+
new_codes_i[m] = ls;
|
|
151
|
+
new_codes_i += m + 1;
|
|
152
|
+
fvec_sub(
|
|
153
|
+
d,
|
|
154
|
+
residuals_i + js * d,
|
|
155
|
+
cent + ls * d,
|
|
156
|
+
new_residuals_i);
|
|
157
|
+
new_residuals_i += d;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
} else {
|
|
161
|
+
const float* cent_distances_i =
|
|
162
|
+
cent_distances.data() + i * beam_size * K;
|
|
163
|
+
// then we have to select the best results
|
|
164
|
+
for (int i = 0; i < new_beam_size; i++) {
|
|
165
|
+
new_distances_i[i] = C::neutral();
|
|
166
|
+
}
|
|
167
|
+
std::vector<int> perm(new_beam_size, -1);
|
|
168
|
+
heap_addn<C>(
|
|
169
|
+
new_beam_size,
|
|
170
|
+
new_distances_i,
|
|
171
|
+
perm.data(),
|
|
172
|
+
cent_distances_i,
|
|
173
|
+
nullptr,
|
|
174
|
+
beam_size * K);
|
|
175
|
+
heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
|
|
176
|
+
|
|
177
|
+
for (int j = 0; j < new_beam_size; j++) {
|
|
178
|
+
int js = perm[j] / K;
|
|
179
|
+
int ls = perm[j] % K;
|
|
180
|
+
if (m > 0) {
|
|
181
|
+
memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
|
|
182
|
+
}
|
|
183
|
+
new_codes_i[m] = ls;
|
|
184
|
+
new_codes_i += m + 1;
|
|
185
|
+
fvec_sub(
|
|
186
|
+
d,
|
|
187
|
+
residuals_i + js * d,
|
|
188
|
+
cent + ls * d,
|
|
189
|
+
new_residuals_i);
|
|
190
|
+
new_residuals_i += d;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void ResidualQuantizer::train(size_t n, const float* x) {
|
|
197
|
+
codebooks.resize(d * codebook_offsets.back());
|
|
198
|
+
|
|
199
|
+
if (verbose) {
|
|
200
|
+
printf("Training ResidualQuantizer, with %zd steps on %zd %zdD vectors\n",
|
|
201
|
+
M,
|
|
202
|
+
n,
|
|
203
|
+
size_t(d));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
int cur_beam_size = 1;
|
|
207
|
+
std::vector<float> residuals(x, x + n * d);
|
|
208
|
+
std::vector<int32_t> codes;
|
|
209
|
+
std::vector<float> distances;
|
|
210
|
+
double t0 = getmillisecs();
|
|
211
|
+
|
|
212
|
+
for (int m = 0; m < M; m++) {
|
|
213
|
+
int K = 1 << nbits[m];
|
|
214
|
+
|
|
215
|
+
// on which residuals to train
|
|
216
|
+
std::vector<float>& train_residuals = residuals;
|
|
217
|
+
std::vector<float> residuals1;
|
|
218
|
+
if (train_type & Train_top_beam) {
|
|
219
|
+
residuals1.resize(n * d);
|
|
220
|
+
for (size_t j = 0; j < n; j++) {
|
|
221
|
+
memcpy(residuals1.data() + j * d,
|
|
222
|
+
residuals.data() + j * d * cur_beam_size,
|
|
223
|
+
sizeof(residuals[0]) * d);
|
|
224
|
+
}
|
|
225
|
+
train_residuals = residuals1;
|
|
226
|
+
}
|
|
227
|
+
train_type_t tt = train_type_t(train_type & ~Train_top_beam);
|
|
228
|
+
|
|
229
|
+
std::vector<float> codebooks;
|
|
230
|
+
float obj = 0;
|
|
231
|
+
|
|
232
|
+
std::unique_ptr<Index> assign_index;
|
|
233
|
+
if (assign_index_factory) {
|
|
234
|
+
assign_index.reset((*assign_index_factory)(d));
|
|
235
|
+
} else {
|
|
236
|
+
assign_index.reset(new IndexFlatL2(d));
|
|
237
|
+
}
|
|
238
|
+
if (tt == Train_default) {
|
|
239
|
+
Clustering clus(d, K, cp);
|
|
240
|
+
clus.train(
|
|
241
|
+
train_residuals.size() / d,
|
|
242
|
+
train_residuals.data(),
|
|
243
|
+
*assign_index.get());
|
|
244
|
+
codebooks.swap(clus.centroids);
|
|
245
|
+
assign_index->reset();
|
|
246
|
+
obj = clus.iteration_stats.back().obj;
|
|
247
|
+
} else if (tt == Train_progressive_dim) {
|
|
248
|
+
ProgressiveDimClustering clus(d, K, cp);
|
|
249
|
+
ProgressiveDimIndexFactory default_fac;
|
|
250
|
+
clus.train(
|
|
251
|
+
train_residuals.size() / d,
|
|
252
|
+
train_residuals.data(),
|
|
253
|
+
assign_index_factory ? *assign_index_factory : default_fac);
|
|
254
|
+
codebooks.swap(clus.centroids);
|
|
255
|
+
obj = clus.iteration_stats.back().obj;
|
|
256
|
+
} else {
|
|
257
|
+
FAISS_THROW_MSG("train type not supported");
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
memcpy(this->codebooks.data() + codebook_offsets[m] * d,
|
|
261
|
+
codebooks.data(),
|
|
262
|
+
codebooks.size() * sizeof(codebooks[0]));
|
|
263
|
+
|
|
264
|
+
// quantize using the new codebooks
|
|
265
|
+
|
|
266
|
+
int new_beam_size = std::min(cur_beam_size * K, max_beam_size);
|
|
267
|
+
std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
|
|
268
|
+
std::vector<float> new_residuals(n * new_beam_size * d);
|
|
269
|
+
std::vector<float> new_distances(n * new_beam_size);
|
|
270
|
+
|
|
271
|
+
beam_search_encode_step(
|
|
272
|
+
d,
|
|
273
|
+
K,
|
|
274
|
+
codebooks.data(),
|
|
275
|
+
n,
|
|
276
|
+
cur_beam_size,
|
|
277
|
+
residuals.data(),
|
|
278
|
+
m,
|
|
279
|
+
codes.data(),
|
|
280
|
+
new_beam_size,
|
|
281
|
+
new_codes.data(),
|
|
282
|
+
new_residuals.data(),
|
|
283
|
+
new_distances.data(),
|
|
284
|
+
assign_index.get());
|
|
285
|
+
|
|
286
|
+
codes.swap(new_codes);
|
|
287
|
+
residuals.swap(new_residuals);
|
|
288
|
+
distances.swap(new_distances);
|
|
289
|
+
|
|
290
|
+
float sum_distances = 0;
|
|
291
|
+
for (int j = 0; j < distances.size(); j++) {
|
|
292
|
+
sum_distances += distances[j];
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (verbose) {
|
|
296
|
+
printf("[%.3f s] train stage %d, %d bits, kmeans objective %g, "
|
|
297
|
+
"total distance %g, beam_size %d->%d\n",
|
|
298
|
+
(getmillisecs() - t0) / 1000,
|
|
299
|
+
m,
|
|
300
|
+
int(nbits[m]),
|
|
301
|
+
obj,
|
|
302
|
+
sum_distances,
|
|
303
|
+
cur_beam_size,
|
|
304
|
+
new_beam_size);
|
|
305
|
+
}
|
|
306
|
+
cur_beam_size = new_beam_size;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
is_trained = true;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
size_t ResidualQuantizer::memory_per_point(int beam_size) const {
|
|
313
|
+
if (beam_size < 0) {
|
|
314
|
+
beam_size = max_beam_size;
|
|
315
|
+
}
|
|
316
|
+
size_t mem;
|
|
317
|
+
mem = beam_size * d * 2 * sizeof(float); // size for 2 beams at a time
|
|
318
|
+
mem += beam_size * beam_size *
|
|
319
|
+
(sizeof(float) +
|
|
320
|
+
sizeof(Index::idx_t)); // size for 1 beam search result
|
|
321
|
+
return mem;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
void ResidualQuantizer::compute_codes(
|
|
325
|
+
const float* x,
|
|
326
|
+
uint8_t* codes_out,
|
|
327
|
+
size_t n) const {
|
|
328
|
+
FAISS_THROW_IF_NOT_MSG(is_trained, "RQ is not trained yet.");
|
|
329
|
+
|
|
330
|
+
size_t mem = memory_per_point();
|
|
331
|
+
if (n > 1 && mem * n > max_mem_distances) {
|
|
332
|
+
// then split queries to reduce temp memory
|
|
333
|
+
size_t bs = max_mem_distances / mem;
|
|
334
|
+
if (bs == 0) {
|
|
335
|
+
bs = 1; // otherwise we can't do much
|
|
336
|
+
}
|
|
337
|
+
for (size_t i0 = 0; i0 < n; i0 += bs) {
|
|
338
|
+
size_t i1 = std::min(n, i0 + bs);
|
|
339
|
+
compute_codes(x + i0 * d, codes_out + i0 * code_size, i1 - i0);
|
|
340
|
+
}
|
|
341
|
+
return;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
std::vector<float> residuals(max_beam_size * n * d);
|
|
345
|
+
std::vector<int32_t> codes(max_beam_size * M * n);
|
|
346
|
+
std::vector<float> distances(max_beam_size * n);
|
|
347
|
+
|
|
348
|
+
refine_beam(
|
|
349
|
+
n,
|
|
350
|
+
1,
|
|
351
|
+
x,
|
|
352
|
+
max_beam_size,
|
|
353
|
+
codes.data(),
|
|
354
|
+
residuals.data(),
|
|
355
|
+
distances.data());
|
|
356
|
+
|
|
357
|
+
// pack only the first code of the beam (hence the ld_codes=M *
|
|
358
|
+
// max_beam_size)
|
|
359
|
+
pack_codes(n, codes.data(), codes_out, M * max_beam_size);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
void ResidualQuantizer::refine_beam(
|
|
363
|
+
size_t n,
|
|
364
|
+
size_t beam_size,
|
|
365
|
+
const float* x,
|
|
366
|
+
int out_beam_size,
|
|
367
|
+
int32_t* out_codes,
|
|
368
|
+
float* out_residuals,
|
|
369
|
+
float* out_distances) const {
|
|
370
|
+
int cur_beam_size = beam_size;
|
|
371
|
+
|
|
372
|
+
std::vector<float> residuals(x, x + n * d * beam_size);
|
|
373
|
+
std::vector<int32_t> codes;
|
|
374
|
+
std::vector<float> distances;
|
|
375
|
+
double t0 = getmillisecs();
|
|
376
|
+
|
|
377
|
+
std::unique_ptr<Index> assign_index;
|
|
378
|
+
if (assign_index_factory) {
|
|
379
|
+
assign_index.reset((*assign_index_factory)(d));
|
|
380
|
+
} else {
|
|
381
|
+
assign_index.reset(new IndexFlatL2(d));
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
for (int m = 0; m < M; m++) {
|
|
385
|
+
int K = 1 << nbits[m];
|
|
386
|
+
|
|
387
|
+
const float* codebooks_m =
|
|
388
|
+
this->codebooks.data() + codebook_offsets[m] * d;
|
|
389
|
+
|
|
390
|
+
int new_beam_size = std::min(cur_beam_size * K, out_beam_size);
|
|
391
|
+
|
|
392
|
+
std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
|
|
393
|
+
std::vector<float> new_residuals(n * new_beam_size * d);
|
|
394
|
+
distances.resize(n * new_beam_size);
|
|
395
|
+
|
|
396
|
+
beam_search_encode_step(
|
|
397
|
+
d,
|
|
398
|
+
K,
|
|
399
|
+
codebooks_m,
|
|
400
|
+
n,
|
|
401
|
+
cur_beam_size,
|
|
402
|
+
residuals.data(),
|
|
403
|
+
m,
|
|
404
|
+
codes.data(),
|
|
405
|
+
new_beam_size,
|
|
406
|
+
new_codes.data(),
|
|
407
|
+
new_residuals.data(),
|
|
408
|
+
distances.data(),
|
|
409
|
+
assign_index.get());
|
|
410
|
+
|
|
411
|
+
assign_index->reset();
|
|
412
|
+
|
|
413
|
+
codes.swap(new_codes);
|
|
414
|
+
residuals.swap(new_residuals);
|
|
415
|
+
|
|
416
|
+
cur_beam_size = new_beam_size;
|
|
417
|
+
|
|
418
|
+
if (verbose) {
|
|
419
|
+
float sum_distances = 0;
|
|
420
|
+
for (int j = 0; j < distances.size(); j++) {
|
|
421
|
+
sum_distances += distances[j];
|
|
422
|
+
}
|
|
423
|
+
printf("[%.3f s] encode stage %d, %d bits, "
|
|
424
|
+
"total error %g, beam_size %d\n",
|
|
425
|
+
(getmillisecs() - t0) / 1000,
|
|
426
|
+
m,
|
|
427
|
+
int(nbits[m]),
|
|
428
|
+
sum_distances,
|
|
429
|
+
cur_beam_size);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
if (out_codes) {
|
|
434
|
+
memcpy(out_codes, codes.data(), codes.size() * sizeof(codes[0]));
|
|
435
|
+
}
|
|
436
|
+
if (out_residuals) {
|
|
437
|
+
memcpy(out_residuals,
|
|
438
|
+
residuals.data(),
|
|
439
|
+
residuals.size() * sizeof(residuals[0]));
|
|
440
|
+
}
|
|
441
|
+
if (out_distances) {
|
|
442
|
+
memcpy(out_distances,
|
|
443
|
+
distances.data(),
|
|
444
|
+
distances.size() * sizeof(distances[0]));
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
} // namespace faiss
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
#include <faiss/Clustering.h>
|
|
14
|
+
#include <faiss/impl/AdditiveQuantizer.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
/** Residual quantizer with variable number of bits per sub-quantizer
|
|
19
|
+
*
|
|
20
|
+
* The residual centroids are stored in a big cumulative centroid table.
|
|
21
|
+
* The codes are represented either as a non-compact table of size (n, M) or
|
|
22
|
+
* as the compact output (n, code_size).
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
struct ResidualQuantizer : AdditiveQuantizer {
|
|
26
|
+
/// initialization
|
|
27
|
+
enum train_type_t {
|
|
28
|
+
Train_default, ///< regular k-means
|
|
29
|
+
Train_progressive_dim, ///< progressive dim clustering
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
// set this bit on train_type if beam is to be trained only on the
|
|
33
|
+
// first element of the beam (faster but less accurate)
|
|
34
|
+
static const int Train_top_beam = 1024;
|
|
35
|
+
train_type_t train_type;
|
|
36
|
+
|
|
37
|
+
/// beam size used for training and for encoding
|
|
38
|
+
int max_beam_size;
|
|
39
|
+
|
|
40
|
+
/// distance matrixes with beam search can get large, so use this
|
|
41
|
+
/// to batch computations at encoding time.
|
|
42
|
+
size_t max_mem_distances;
|
|
43
|
+
|
|
44
|
+
/// clustering parameters
|
|
45
|
+
ProgressiveDimClusteringParameters cp;
|
|
46
|
+
|
|
47
|
+
/// if non-NULL, use this index for assignment
|
|
48
|
+
ProgressiveDimIndexFactory* assign_index_factory;
|
|
49
|
+
|
|
50
|
+
ResidualQuantizer(size_t d, const std::vector<size_t>& nbits);
|
|
51
|
+
|
|
52
|
+
ResidualQuantizer(
|
|
53
|
+
size_t d, /* dimensionality of the input vectors */
|
|
54
|
+
size_t M, /* number of subquantizers */
|
|
55
|
+
size_t nbits); /* number of bit per subvector index */
|
|
56
|
+
|
|
57
|
+
ResidualQuantizer();
|
|
58
|
+
|
|
59
|
+
// Train the residual quantizer
|
|
60
|
+
void train(size_t n, const float* x) override;
|
|
61
|
+
|
|
62
|
+
/** Encode a set of vectors
|
|
63
|
+
*
|
|
64
|
+
* @param x vectors to encode, size n * d
|
|
65
|
+
* @param codes output codes, size n * code_size
|
|
66
|
+
*/
|
|
67
|
+
void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
|
|
68
|
+
|
|
69
|
+
/** lower-level encode function
|
|
70
|
+
*
|
|
71
|
+
* @param n number of vectors to hanlde
|
|
72
|
+
* @param residuals vectors to encode, size (n, beam_size, d)
|
|
73
|
+
* @param beam_size input beam size
|
|
74
|
+
* @param new_beam_size output beam size (should be <= K * beam_size)
|
|
75
|
+
* @param new_codes output codes, size (n, new_beam_size, m + 1)
|
|
76
|
+
* @param new_residuals output residuals, size (n, new_beam_size, d)
|
|
77
|
+
* @param new_distances output distances, size (n, new_beam_size)
|
|
78
|
+
*/
|
|
79
|
+
void refine_beam(
|
|
80
|
+
size_t n,
|
|
81
|
+
size_t beam_size,
|
|
82
|
+
const float* residuals,
|
|
83
|
+
int new_beam_size,
|
|
84
|
+
int32_t* new_codes,
|
|
85
|
+
float* new_residuals = nullptr,
|
|
86
|
+
float* new_distances = nullptr) const;
|
|
87
|
+
|
|
88
|
+
/** Beam search can consume a lot of memory. This function estimates the
|
|
89
|
+
* amount of mem used by refine_beam to adjust the batch size
|
|
90
|
+
*
|
|
91
|
+
* @param beam_size if != -1, override the beam size
|
|
92
|
+
*/
|
|
93
|
+
size_t memory_per_point(int beam_size = -1) const;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
/** Encode a residual by sampling from a centroid table.
|
|
97
|
+
*
|
|
98
|
+
* This is a single encoding step the residual quantizer.
|
|
99
|
+
* It allows low-level access to the encoding function, exposed mainly for unit
|
|
100
|
+
* tests.
|
|
101
|
+
*
|
|
102
|
+
* @param n number of vectors to hanlde
|
|
103
|
+
* @param residuals vectors to encode, size (n, beam_size, d)
|
|
104
|
+
* @param cent centroids, size (K, d)
|
|
105
|
+
* @param beam_size input beam size
|
|
106
|
+
* @param m size of the codes for the previous encoding steps
|
|
107
|
+
* @param codes code array for the previous steps of the beam (n,
|
|
108
|
+
* beam_size, m)
|
|
109
|
+
* @param new_beam_size output beam size (should be <= K * beam_size)
|
|
110
|
+
* @param new_codes output codes, size (n, new_beam_size, m + 1)
|
|
111
|
+
* @param new_residuals output residuals, size (n, new_beam_size, d)
|
|
112
|
+
* @param new_distances output distances, size (n, new_beam_size)
|
|
113
|
+
* @param assign_index if non-NULL, will be used to perform assignment
|
|
114
|
+
*/
|
|
115
|
+
void beam_search_encode_step(
|
|
116
|
+
size_t d,
|
|
117
|
+
size_t K,
|
|
118
|
+
const float* cent,
|
|
119
|
+
size_t n,
|
|
120
|
+
size_t beam_size,
|
|
121
|
+
const float* residuals,
|
|
122
|
+
size_t m,
|
|
123
|
+
const int32_t* codes,
|
|
124
|
+
size_t new_beam_size,
|
|
125
|
+
int32_t* new_codes,
|
|
126
|
+
float* new_residuals,
|
|
127
|
+
float* new_distances,
|
|
128
|
+
Index* assign_index = nullptr);
|
|
129
|
+
|
|
130
|
+
}; // namespace faiss
|