faiss 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +10 -3
|
@@ -12,17 +12,34 @@
|
|
|
12
12
|
|
|
13
13
|
namespace faiss {
|
|
14
14
|
|
|
15
|
+
namespace {
|
|
16
|
+
|
|
17
|
+
// IndexBinary needs to update the code_size when d is set...
|
|
18
|
+
|
|
19
|
+
void sync_d(Index* index) {}
|
|
20
|
+
|
|
21
|
+
void sync_d(IndexBinary* index) {
|
|
22
|
+
FAISS_THROW_IF_NOT(index->d % 8 == 0);
|
|
23
|
+
index->code_size = index->d / 8;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
} // anonymous namespace
|
|
27
|
+
|
|
15
28
|
template <typename IndexT>
|
|
16
29
|
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(bool threaded)
|
|
17
30
|
: ThreadedIndex<IndexT>(threaded) {}
|
|
18
31
|
|
|
19
32
|
template <typename IndexT>
|
|
20
33
|
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(idx_t d, bool threaded)
|
|
21
|
-
: ThreadedIndex<IndexT>(d, threaded) {
|
|
34
|
+
: ThreadedIndex<IndexT>(d, threaded) {
|
|
35
|
+
sync_d(this);
|
|
36
|
+
}
|
|
22
37
|
|
|
23
38
|
template <typename IndexT>
|
|
24
39
|
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(int d, bool threaded)
|
|
25
|
-
: ThreadedIndex<IndexT>(d, threaded) {
|
|
40
|
+
: ThreadedIndex<IndexT>(d, threaded) {
|
|
41
|
+
sync_d(this);
|
|
42
|
+
}
|
|
26
43
|
|
|
27
44
|
template <typename IndexT>
|
|
28
45
|
void IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
|
|
@@ -168,6 +185,8 @@ void IndexReplicasTemplate<IndexT>::syncWithSubIndexes() {
|
|
|
168
185
|
}
|
|
169
186
|
|
|
170
187
|
auto firstIndex = this->at(0);
|
|
188
|
+
this->d = firstIndex->d;
|
|
189
|
+
sync_d(this);
|
|
171
190
|
this->metric_type = firstIndex->metric_type;
|
|
172
191
|
this->is_trained = firstIndex->is_trained;
|
|
173
192
|
this->ntotal = firstIndex->ntotal;
|
|
@@ -181,30 +200,8 @@ void IndexReplicasTemplate<IndexT>::syncWithSubIndexes() {
|
|
|
181
200
|
}
|
|
182
201
|
}
|
|
183
202
|
|
|
184
|
-
// No metric_type for IndexBinary
|
|
185
|
-
template <>
|
|
186
|
-
void IndexReplicasTemplate<IndexBinary>::syncWithSubIndexes() {
|
|
187
|
-
if (!this->count()) {
|
|
188
|
-
this->is_trained = false;
|
|
189
|
-
this->ntotal = 0;
|
|
190
|
-
|
|
191
|
-
return;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
auto firstIndex = this->at(0);
|
|
195
|
-
this->is_trained = firstIndex->is_trained;
|
|
196
|
-
this->ntotal = firstIndex->ntotal;
|
|
197
|
-
|
|
198
|
-
for (int i = 1; i < this->count(); ++i) {
|
|
199
|
-
auto index = this->at(i);
|
|
200
|
-
FAISS_THROW_IF_NOT(this->d == index->d);
|
|
201
|
-
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
|
202
|
-
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
203
|
// explicit instantiations
|
|
207
|
-
template
|
|
208
|
-
template
|
|
204
|
+
template class IndexReplicasTemplate<Index>;
|
|
205
|
+
template class IndexReplicasTemplate<IndexBinary>;
|
|
209
206
|
|
|
210
207
|
} // namespace faiss
|
|
@@ -60,10 +60,9 @@ void IndexScalarQuantizer::search(
|
|
|
60
60
|
|
|
61
61
|
#pragma omp parallel
|
|
62
62
|
{
|
|
63
|
-
InvertedListScanner
|
|
64
|
-
sq.select_InvertedListScanner(metric_type, nullptr, true, sel);
|
|
63
|
+
std::unique_ptr<InvertedListScanner> scanner(
|
|
64
|
+
sq.select_InvertedListScanner(metric_type, nullptr, true, sel));
|
|
65
65
|
|
|
66
|
-
ScopeDeleter1<InvertedListScanner> del(scanner);
|
|
67
66
|
scanner->list_no = 0; // directly the list number
|
|
68
67
|
|
|
69
68
|
#pragma omp for
|
|
@@ -122,21 +121,28 @@ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer(
|
|
|
122
121
|
size_t nlist,
|
|
123
122
|
ScalarQuantizer::QuantizerType qtype,
|
|
124
123
|
MetricType metric,
|
|
125
|
-
bool
|
|
126
|
-
: IndexIVF(quantizer, d, nlist, 0, metric),
|
|
127
|
-
sq(d, qtype),
|
|
128
|
-
by_residual(encode_residual) {
|
|
124
|
+
bool by_residual)
|
|
125
|
+
: IndexIVF(quantizer, d, nlist, 0, metric), sq(d, qtype) {
|
|
129
126
|
code_size = sq.code_size;
|
|
127
|
+
this->by_residual = by_residual;
|
|
130
128
|
// was not known at construction time
|
|
131
129
|
invlists->code_size = code_size;
|
|
132
130
|
is_trained = false;
|
|
133
131
|
}
|
|
134
132
|
|
|
135
|
-
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer()
|
|
136
|
-
|
|
133
|
+
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer() : IndexIVF() {
|
|
134
|
+
by_residual = true;
|
|
135
|
+
}
|
|
137
136
|
|
|
138
|
-
void IndexIVFScalarQuantizer::
|
|
139
|
-
|
|
137
|
+
void IndexIVFScalarQuantizer::train_encoder(
|
|
138
|
+
idx_t n,
|
|
139
|
+
const float* x,
|
|
140
|
+
const idx_t* assign) {
|
|
141
|
+
sq.train(n, x);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
idx_t IndexIVFScalarQuantizer::train_encoder_num_vectors() const {
|
|
145
|
+
return 100000;
|
|
140
146
|
}
|
|
141
147
|
|
|
142
148
|
void IndexIVFScalarQuantizer::encode_vectors(
|
|
@@ -201,15 +207,15 @@ void IndexIVFScalarQuantizer::add_core(
|
|
|
201
207
|
idx_t n,
|
|
202
208
|
const float* x,
|
|
203
209
|
const idx_t* xids,
|
|
204
|
-
const idx_t* coarse_idx
|
|
210
|
+
const idx_t* coarse_idx,
|
|
211
|
+
void* inverted_list_context) {
|
|
205
212
|
FAISS_THROW_IF_NOT(is_trained);
|
|
206
213
|
|
|
207
|
-
size_t nadd = 0;
|
|
208
214
|
std::unique_ptr<ScalarQuantizer::SQuantizer> squant(sq.select_quantizer());
|
|
209
215
|
|
|
210
216
|
DirectMapAdd dm_add(direct_map, n, xids);
|
|
211
217
|
|
|
212
|
-
#pragma omp parallel
|
|
218
|
+
#pragma omp parallel
|
|
213
219
|
{
|
|
214
220
|
std::vector<float> residual(d);
|
|
215
221
|
std::vector<uint8_t> one_code(code_size);
|
|
@@ -231,10 +237,10 @@ void IndexIVFScalarQuantizer::add_core(
|
|
|
231
237
|
memset(one_code.data(), 0, code_size);
|
|
232
238
|
squant->encode_vector(xi, one_code.data());
|
|
233
239
|
|
|
234
|
-
size_t ofs = invlists->add_entry(
|
|
240
|
+
size_t ofs = invlists->add_entry(
|
|
241
|
+
list_no, id, one_code.data(), inverted_list_context);
|
|
235
242
|
|
|
236
243
|
dm_add.add(i, list_no, ofs);
|
|
237
|
-
nadd++;
|
|
238
244
|
|
|
239
245
|
} else if (rank == 0 && list_no == -1) {
|
|
240
246
|
dm_add.add(i, -1, 0);
|
|
@@ -65,7 +65,6 @@ struct IndexScalarQuantizer : IndexFlatCodes {
|
|
|
65
65
|
|
|
66
66
|
struct IndexIVFScalarQuantizer : IndexIVF {
|
|
67
67
|
ScalarQuantizer sq;
|
|
68
|
-
bool by_residual;
|
|
69
68
|
|
|
70
69
|
IndexIVFScalarQuantizer(
|
|
71
70
|
Index* quantizer,
|
|
@@ -73,11 +72,13 @@ struct IndexIVFScalarQuantizer : IndexIVF {
|
|
|
73
72
|
size_t nlist,
|
|
74
73
|
ScalarQuantizer::QuantizerType qtype,
|
|
75
74
|
MetricType metric = METRIC_L2,
|
|
76
|
-
bool
|
|
75
|
+
bool by_residual = true);
|
|
77
76
|
|
|
78
77
|
IndexIVFScalarQuantizer();
|
|
79
78
|
|
|
80
|
-
void
|
|
79
|
+
void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
|
|
80
|
+
|
|
81
|
+
idx_t train_encoder_num_vectors() const override;
|
|
81
82
|
|
|
82
83
|
void encode_vectors(
|
|
83
84
|
idx_t n,
|
|
@@ -90,7 +91,8 @@ struct IndexIVFScalarQuantizer : IndexIVF {
|
|
|
90
91
|
idx_t n,
|
|
91
92
|
const float* x,
|
|
92
93
|
const idx_t* xids,
|
|
93
|
-
const idx_t* precomputed_idx
|
|
94
|
+
const idx_t* precomputed_idx,
|
|
95
|
+
void* inverted_list_context = nullptr) override;
|
|
94
96
|
|
|
95
97
|
InvertedListScanner* get_InvertedListScanner(
|
|
96
98
|
bool store_pairs,
|
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#include <faiss/IndexShards.h>
|
|
11
9
|
|
|
12
10
|
#include <cinttypes>
|
|
@@ -22,6 +20,15 @@ namespace faiss {
|
|
|
22
20
|
// subroutines
|
|
23
21
|
namespace {
|
|
24
22
|
|
|
23
|
+
// IndexBinary needs to update the code_size when d is set...
|
|
24
|
+
|
|
25
|
+
void sync_d(Index* index) {}
|
|
26
|
+
|
|
27
|
+
void sync_d(IndexBinary* index) {
|
|
28
|
+
FAISS_THROW_IF_NOT(index->d % 8 == 0);
|
|
29
|
+
index->code_size = index->d / 8;
|
|
30
|
+
}
|
|
31
|
+
|
|
25
32
|
// add translation to all valid labels
|
|
26
33
|
void translate_labels(int64_t n, idx_t* labels, int64_t translation) {
|
|
27
34
|
if (translation == 0)
|
|
@@ -40,20 +47,26 @@ IndexShardsTemplate<IndexT>::IndexShardsTemplate(
|
|
|
40
47
|
idx_t d,
|
|
41
48
|
bool threaded,
|
|
42
49
|
bool successive_ids)
|
|
43
|
-
: ThreadedIndex<IndexT>(d, threaded), successive_ids(successive_ids) {
|
|
50
|
+
: ThreadedIndex<IndexT>(d, threaded), successive_ids(successive_ids) {
|
|
51
|
+
sync_d(this);
|
|
52
|
+
}
|
|
44
53
|
|
|
45
54
|
template <typename IndexT>
|
|
46
55
|
IndexShardsTemplate<IndexT>::IndexShardsTemplate(
|
|
47
56
|
int d,
|
|
48
57
|
bool threaded,
|
|
49
58
|
bool successive_ids)
|
|
50
|
-
: ThreadedIndex<IndexT>(d, threaded), successive_ids(successive_ids) {
|
|
59
|
+
: ThreadedIndex<IndexT>(d, threaded), successive_ids(successive_ids) {
|
|
60
|
+
sync_d(this);
|
|
61
|
+
}
|
|
51
62
|
|
|
52
63
|
template <typename IndexT>
|
|
53
64
|
IndexShardsTemplate<IndexT>::IndexShardsTemplate(
|
|
54
65
|
bool threaded,
|
|
55
66
|
bool successive_ids)
|
|
56
|
-
: ThreadedIndex<IndexT>(threaded), successive_ids(successive_ids) {
|
|
67
|
+
: ThreadedIndex<IndexT>(threaded), successive_ids(successive_ids) {
|
|
68
|
+
sync_d(this);
|
|
69
|
+
}
|
|
57
70
|
|
|
58
71
|
template <typename IndexT>
|
|
59
72
|
void IndexShardsTemplate<IndexT>::onAfterAddIndex(IndexT* index /* unused */) {
|
|
@@ -78,6 +91,8 @@ void IndexShardsTemplate<IndexT>::syncWithSubIndexes() {
|
|
|
78
91
|
}
|
|
79
92
|
|
|
80
93
|
auto firstIndex = this->at(0);
|
|
94
|
+
this->d = firstIndex->d;
|
|
95
|
+
sync_d(this);
|
|
81
96
|
this->metric_type = firstIndex->metric_type;
|
|
82
97
|
this->is_trained = firstIndex->is_trained;
|
|
83
98
|
this->ntotal = firstIndex->ntotal;
|
|
@@ -92,29 +107,6 @@ void IndexShardsTemplate<IndexT>::syncWithSubIndexes() {
|
|
|
92
107
|
}
|
|
93
108
|
}
|
|
94
109
|
|
|
95
|
-
// No metric_type for IndexBinary
|
|
96
|
-
template <>
|
|
97
|
-
void IndexShardsTemplate<IndexBinary>::syncWithSubIndexes() {
|
|
98
|
-
if (!this->count()) {
|
|
99
|
-
this->is_trained = false;
|
|
100
|
-
this->ntotal = 0;
|
|
101
|
-
|
|
102
|
-
return;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
auto firstIndex = this->at(0);
|
|
106
|
-
this->is_trained = firstIndex->is_trained;
|
|
107
|
-
this->ntotal = firstIndex->ntotal;
|
|
108
|
-
|
|
109
|
-
for (int i = 1; i < this->count(); ++i) {
|
|
110
|
-
auto index = this->at(i);
|
|
111
|
-
FAISS_THROW_IF_NOT(this->d == index->d);
|
|
112
|
-
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
|
113
|
-
|
|
114
|
-
this->ntotal += index->ntotal;
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
|
|
118
110
|
template <typename IndexT>
|
|
119
111
|
void IndexShardsTemplate<IndexT>::train(idx_t n, const component_t* x) {
|
|
120
112
|
auto fn = [n, x](int no, IndexT* index) {
|
|
@@ -155,7 +147,7 @@ void IndexShardsTemplate<IndexT>::add_with_ids(
|
|
|
155
147
|
"request them to be shifted");
|
|
156
148
|
FAISS_THROW_IF_NOT_MSG(
|
|
157
149
|
this->ntotal == 0,
|
|
158
|
-
"when adding to IndexShards with
|
|
150
|
+
"when adding to IndexShards with successive_ids, "
|
|
159
151
|
"only add() in a single pass is supported");
|
|
160
152
|
}
|
|
161
153
|
|
|
@@ -111,7 +111,7 @@ void IndexShardsIVF::add_with_ids(
|
|
|
111
111
|
"request them to be shifted");
|
|
112
112
|
FAISS_THROW_IF_NOT_MSG(
|
|
113
113
|
this->ntotal == 0,
|
|
114
|
-
"when adding to IndexShards with
|
|
114
|
+
"when adding to IndexShards with successive_ids, "
|
|
115
115
|
"only add() in a single pass is supported");
|
|
116
116
|
}
|
|
117
117
|
|
|
@@ -137,7 +137,6 @@ void IndexShardsIVF::add_with_ids(
|
|
|
137
137
|
auto fn = [n, ids, x, nshard, d, Iq](int no, Index* index) {
|
|
138
138
|
idx_t i0 = (idx_t)no * n / nshard;
|
|
139
139
|
idx_t i1 = ((idx_t)no + 1) * n / nshard;
|
|
140
|
-
const float* x0 = x + i0 * d;
|
|
141
140
|
auto index_ivf = dynamic_cast<IndexIVF*>(index);
|
|
142
141
|
|
|
143
142
|
if (index->verbose) {
|
|
@@ -9,9 +9,10 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/MatrixStats.h>
|
|
11
11
|
|
|
12
|
-
#include <
|
|
12
|
+
#include <cstdarg> /* va_list, va_start, va_arg, va_end */
|
|
13
13
|
|
|
14
14
|
#include <faiss/utils/utils.h>
|
|
15
|
+
#include <cinttypes>
|
|
15
16
|
#include <cmath>
|
|
16
17
|
#include <cstdio>
|
|
17
18
|
|
|
@@ -21,18 +22,6 @@ namespace faiss {
|
|
|
21
22
|
* MatrixStats
|
|
22
23
|
*********************************************************************/
|
|
23
24
|
|
|
24
|
-
MatrixStats::PerDimStats::PerDimStats()
|
|
25
|
-
: n(0),
|
|
26
|
-
n_nan(0),
|
|
27
|
-
n_inf(0),
|
|
28
|
-
n0(0),
|
|
29
|
-
min(HUGE_VALF),
|
|
30
|
-
max(-HUGE_VALF),
|
|
31
|
-
sum(0),
|
|
32
|
-
sum2(0),
|
|
33
|
-
mean(NAN),
|
|
34
|
-
stddev(NAN) {}
|
|
35
|
-
|
|
36
25
|
void MatrixStats::PerDimStats::add(float x) {
|
|
37
26
|
n++;
|
|
38
27
|
if (std::isnan(x)) {
|
|
@@ -74,19 +63,12 @@ void MatrixStats::do_comment(const char* fmt, ...) {
|
|
|
74
63
|
buf += size;
|
|
75
64
|
}
|
|
76
65
|
|
|
77
|
-
MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
78
|
-
: n(n),
|
|
79
|
-
d(d),
|
|
80
|
-
n_collision(0),
|
|
81
|
-
n_valid(0),
|
|
82
|
-
n0(0),
|
|
83
|
-
min_norm2(HUGE_VAL),
|
|
84
|
-
max_norm2(0) {
|
|
66
|
+
MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
|
|
85
67
|
std::vector<char> comment_buf(10000);
|
|
86
68
|
buf = comment_buf.data();
|
|
87
69
|
nbuf = comment_buf.size();
|
|
88
70
|
|
|
89
|
-
do_comment("analyzing %
|
|
71
|
+
do_comment("analyzing %zd vectors of size %zd\n", n, d);
|
|
90
72
|
|
|
91
73
|
if (d > 1024) {
|
|
92
74
|
do_comment(
|
|
@@ -94,6 +76,9 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
94
76
|
"please consider dimensionality reducution (with PCAMatrix)\n");
|
|
95
77
|
}
|
|
96
78
|
|
|
79
|
+
hash_value = hash_bytes((const uint8_t*)x, n * d * sizeof(*x));
|
|
80
|
+
do_comment("hash value 0x%016" PRIx64 "\n", hash_value);
|
|
81
|
+
|
|
97
82
|
size_t nbytes = sizeof(x[0]) * d;
|
|
98
83
|
per_dim_stats.resize(d);
|
|
99
84
|
|
|
@@ -156,7 +141,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
156
141
|
|
|
157
142
|
if (n_collision > 0) {
|
|
158
143
|
do_comment(
|
|
159
|
-
"%
|
|
144
|
+
"%zd collisions in hash table, "
|
|
160
145
|
"counts may be invalid\n",
|
|
161
146
|
n_collision);
|
|
162
147
|
}
|
|
@@ -167,14 +152,14 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
167
152
|
max = it->second;
|
|
168
153
|
}
|
|
169
154
|
}
|
|
170
|
-
do_comment("vector %
|
|
155
|
+
do_comment("vector %zd has %zd copies\n", max.first, max.count);
|
|
171
156
|
}
|
|
172
157
|
|
|
173
158
|
{ // norm stats
|
|
174
159
|
min_norm2 = sqrt(min_norm2);
|
|
175
160
|
max_norm2 = sqrt(max_norm2);
|
|
176
161
|
do_comment(
|
|
177
|
-
"range of L2 norms=[%g, %g] (%
|
|
162
|
+
"range of L2 norms=[%g, %g] (%zd null vectors)\n",
|
|
178
163
|
min_norm2,
|
|
179
164
|
max_norm2,
|
|
180
165
|
n0);
|
|
@@ -182,7 +167,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
182
167
|
if (max_norm2 < min_norm2 * 1.0001) {
|
|
183
168
|
do_comment(
|
|
184
169
|
"vectors are normalized, inner product and "
|
|
185
|
-
"L2
|
|
170
|
+
"L2 search are equivalent\n");
|
|
186
171
|
}
|
|
187
172
|
|
|
188
173
|
if (max_norm2 > min_norm2 * 100) {
|
|
@@ -196,12 +181,12 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
196
181
|
|
|
197
182
|
double max_std = 0, min_std = HUGE_VAL;
|
|
198
183
|
|
|
199
|
-
size_t n_dangerous_range = 0, n_0_range = 0,
|
|
184
|
+
size_t n_dangerous_range = 0, n_0_range = 0, n0_2 = 0;
|
|
200
185
|
|
|
201
186
|
for (size_t j = 0; j < d; j++) {
|
|
202
187
|
PerDimStats& st = per_dim_stats[j];
|
|
203
188
|
st.compute_mean_std();
|
|
204
|
-
|
|
189
|
+
n0_2 += st.n0;
|
|
205
190
|
|
|
206
191
|
if (st.max == st.min) {
|
|
207
192
|
n_0_range++;
|
|
@@ -215,19 +200,19 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
215
200
|
min_std = st.stddev;
|
|
216
201
|
}
|
|
217
202
|
|
|
218
|
-
if (
|
|
203
|
+
if (n0_2 == 0) {
|
|
219
204
|
do_comment("matrix contains no 0s\n");
|
|
220
205
|
} else {
|
|
221
206
|
do_comment(
|
|
222
207
|
"matrix contains %.2f %% 0 entries\n",
|
|
223
|
-
|
|
208
|
+
n0_2 * 100.0 / (n * d));
|
|
224
209
|
}
|
|
225
210
|
|
|
226
211
|
if (n_0_range == 0) {
|
|
227
212
|
do_comment("no constant dimensions\n");
|
|
228
213
|
} else {
|
|
229
214
|
do_comment(
|
|
230
|
-
"%
|
|
215
|
+
"%zd dimensions are constant: they can be removed\n",
|
|
231
216
|
n_0_range);
|
|
232
217
|
}
|
|
233
218
|
|
|
@@ -235,7 +220,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
|
|
|
235
220
|
do_comment("no dimension has a too large mean\n");
|
|
236
221
|
} else {
|
|
237
222
|
do_comment(
|
|
238
|
-
"%
|
|
223
|
+
"%zd dimensions are too large "
|
|
239
224
|
"wrt. their variance, may loose precision "
|
|
240
225
|
"in IndexFlatL2 (use CenteringTransform)\n",
|
|
241
226
|
n_dangerous_range);
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#pragma once
|
|
11
11
|
|
|
12
12
|
#include <stdint.h>
|
|
13
|
+
#include <cmath>
|
|
13
14
|
#include <string>
|
|
14
15
|
#include <unordered_map>
|
|
15
16
|
#include <vector>
|
|
@@ -26,20 +27,31 @@ struct MatrixStats {
|
|
|
26
27
|
std::string comments;
|
|
27
28
|
|
|
28
29
|
// raw statistics
|
|
29
|
-
size_t n, d;
|
|
30
|
-
size_t n_collision
|
|
31
|
-
|
|
30
|
+
size_t n = 0, d = 0;
|
|
31
|
+
size_t n_collision = 0;
|
|
32
|
+
size_t n_valid = 0;
|
|
33
|
+
size_t n0 = 0;
|
|
34
|
+
double min_norm2 = HUGE_VALF;
|
|
35
|
+
double max_norm2 = 0;
|
|
36
|
+
uint64_t hash_value = 0;
|
|
32
37
|
|
|
33
38
|
struct PerDimStats {
|
|
34
|
-
|
|
39
|
+
/// counts of various special entries
|
|
40
|
+
size_t n = 0;
|
|
41
|
+
size_t n_nan = 0;
|
|
42
|
+
size_t n_inf = 0;
|
|
43
|
+
size_t n0 = 0;
|
|
35
44
|
|
|
36
|
-
|
|
37
|
-
|
|
45
|
+
/// to get min/max and stddev values
|
|
46
|
+
float min = HUGE_VALF;
|
|
47
|
+
float max = -HUGE_VALF;
|
|
48
|
+
double sum = 0;
|
|
49
|
+
double sum2 = 0;
|
|
38
50
|
|
|
39
|
-
size_t n_valid;
|
|
40
|
-
double mean
|
|
51
|
+
size_t n_valid = 0;
|
|
52
|
+
double mean = NAN;
|
|
53
|
+
double stddev = NAN;
|
|
41
54
|
|
|
42
|
-
PerDimStats();
|
|
43
55
|
void add(float x);
|
|
44
56
|
void compute_mean_std();
|
|
45
57
|
};
|
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/MetaIndexes.h>
|
|
11
11
|
|
|
12
|
-
#include <stdint.h>
|
|
13
12
|
#include <cinttypes>
|
|
13
|
+
#include <cstdint>
|
|
14
14
|
#include <cstdio>
|
|
15
15
|
#include <limits>
|
|
16
16
|
|
|
@@ -70,37 +70,37 @@ void IndexSplitVectors::search(
|
|
|
70
70
|
sum_d == d, "not enough indexes compared to # dimensions");
|
|
71
71
|
|
|
72
72
|
int64_t nshard = sub_indexes.size();
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
73
|
+
|
|
74
|
+
std::unique_ptr<float[]> all_distances(new float[nshard * k * n]);
|
|
75
|
+
std::unique_ptr<idx_t[]> all_labels(new idx_t[nshard * k * n]);
|
|
76
|
+
|
|
77
|
+
auto query_func =
|
|
78
|
+
[n, x, k, distances, labels, &all_distances, &all_labels, this](
|
|
79
|
+
int no) {
|
|
80
|
+
const IndexSplitVectors* index = this;
|
|
81
|
+
float* distances1 =
|
|
82
|
+
no == 0 ? distances : all_distances.get() + no * k * n;
|
|
83
|
+
idx_t* labels1 =
|
|
84
|
+
no == 0 ? labels : all_labels.get() + no * k * n;
|
|
85
|
+
if (index->verbose)
|
|
86
|
+
printf("begin query shard %d on %" PRId64 " points\n",
|
|
87
|
+
no,
|
|
88
|
+
n);
|
|
89
|
+
const Index* sub_index = index->sub_indexes[no];
|
|
90
|
+
int64_t sub_d = sub_index->d, d = index->d;
|
|
91
|
+
idx_t ofs = 0;
|
|
92
|
+
for (int i = 0; i < no; i++)
|
|
93
|
+
ofs += index->sub_indexes[i]->d;
|
|
94
|
+
|
|
95
|
+
std::unique_ptr<float[]> sub_x(new float[sub_d * n]);
|
|
96
|
+
for (idx_t i = 0; i < n; i++)
|
|
97
|
+
memcpy(sub_x.get() + i * sub_d,
|
|
98
|
+
x + ofs + i * d,
|
|
99
|
+
sub_d * sizeof(float));
|
|
100
|
+
sub_index->search(n, sub_x.get(), k, distances1, labels1);
|
|
101
|
+
if (index->verbose)
|
|
102
|
+
printf("end query shard %d\n", no);
|
|
103
|
+
};
|
|
104
104
|
|
|
105
105
|
if (!threaded) {
|
|
106
106
|
for (int i = 0; i < nshard; i++) {
|
|
@@ -125,8 +125,8 @@ void IndexSplitVectors::search(
|
|
|
125
125
|
int64_t factor = 1;
|
|
126
126
|
for (int i = 0; i < nshard; i++) {
|
|
127
127
|
if (i > 0) { // results of 0 are already in the table
|
|
128
|
-
const float* distances_i = all_distances + i * k * n;
|
|
129
|
-
const idx_t* labels_i = all_labels + i * k * n;
|
|
128
|
+
const float* distances_i = all_distances.get() + i * k * n;
|
|
129
|
+
const idx_t* labels_i = all_labels.get() + i * k * n;
|
|
130
130
|
for (int64_t j = 0; j < n; j++) {
|
|
131
131
|
if (labels[j] >= 0 && labels_i[j] >= 0) {
|
|
132
132
|
labels[j] += labels_i[j] * factor;
|
|
@@ -238,6 +238,6 @@ void IndexRandom::reset() {
|
|
|
238
238
|
ntotal = 0;
|
|
239
239
|
}
|
|
240
240
|
|
|
241
|
-
IndexRandom::~IndexRandom()
|
|
241
|
+
IndexRandom::~IndexRandom() = default;
|
|
242
242
|
|
|
243
243
|
} // namespace faiss
|
|
@@ -441,13 +441,10 @@ void eig(size_t d_in, double* cov, double* eigenvalues, int verbose) {
|
|
|
441
441
|
|
|
442
442
|
} // namespace
|
|
443
443
|
|
|
444
|
-
void PCAMatrix::train(idx_t n, const float*
|
|
445
|
-
const float*
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
d_in, (size_t*)&n, max_points_per_d * d_in, x, verbose);
|
|
449
|
-
|
|
450
|
-
ScopeDeleter<float> del_x(x != x_in ? x : nullptr);
|
|
444
|
+
void PCAMatrix::train(idx_t n, const float* x_in) {
|
|
445
|
+
const float* x = fvecs_maybe_subsample(
|
|
446
|
+
d_in, (size_t*)&n, max_points_per_d * d_in, x_in, verbose);
|
|
447
|
+
TransformedVectors tv(x_in, x);
|
|
451
448
|
|
|
452
449
|
// compute mean
|
|
453
450
|
mean.clear();
|
|
@@ -884,14 +881,13 @@ ITQTransform::ITQTransform(int d_in, int d_out, bool do_pca)
|
|
|
884
881
|
is_trained = false;
|
|
885
882
|
}
|
|
886
883
|
|
|
887
|
-
void ITQTransform::train(idx_t n, const float*
|
|
884
|
+
void ITQTransform::train(idx_t n, const float* x_in) {
|
|
888
885
|
FAISS_THROW_IF_NOT(!is_trained);
|
|
889
886
|
|
|
890
|
-
const float* x_in = x;
|
|
891
887
|
size_t max_train_points = std::max(d_in * max_train_per_dim, 32768);
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
888
|
+
const float* x =
|
|
889
|
+
fvecs_maybe_subsample(d_in, (size_t*)&n, max_train_points, x_in);
|
|
890
|
+
TransformedVectors tv(x_in, x);
|
|
895
891
|
|
|
896
892
|
std::unique_ptr<float[]> x_norm(new float[n * d_in]);
|
|
897
893
|
{ // normalize
|
|
@@ -988,25 +984,16 @@ void ITQTransform::check_identical(const VectorTransform& other_in) const {
|
|
|
988
984
|
*********************************************/
|
|
989
985
|
|
|
990
986
|
OPQMatrix::OPQMatrix(int d, int M, int d2)
|
|
991
|
-
: LinearTransform(d, d2 == -1 ? d : d2, false),
|
|
992
|
-
M(M),
|
|
993
|
-
niter(50),
|
|
994
|
-
niter_pq(4),
|
|
995
|
-
niter_pq_0(40),
|
|
996
|
-
verbose(false),
|
|
997
|
-
pq(nullptr) {
|
|
987
|
+
: LinearTransform(d, d2 == -1 ? d : d2, false), M(M) {
|
|
998
988
|
is_trained = false;
|
|
999
989
|
// OPQ is quite expensive to train, so set this right.
|
|
1000
990
|
max_train_points = 256 * 256;
|
|
1001
|
-
pq = nullptr;
|
|
1002
991
|
}
|
|
1003
992
|
|
|
1004
|
-
void OPQMatrix::train(idx_t n, const float*
|
|
1005
|
-
const float*
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
ScopeDeleter<float> del_x(x != x_in ? x : nullptr);
|
|
993
|
+
void OPQMatrix::train(idx_t n, const float* x_in) {
|
|
994
|
+
const float* x = fvecs_maybe_subsample(
|
|
995
|
+
d_in, (size_t*)&n, max_train_points, x_in, verbose);
|
|
996
|
+
TransformedVectors tv(x_in, x);
|
|
1010
997
|
|
|
1011
998
|
// To support d_out > d_in, we pad input vectors with 0s to d_out
|
|
1012
999
|
size_t d = d_out <= d_in ? d_in : d_out;
|