faiss 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE.txt +1 -1
- data/README.md +16 -4
- data/ext/faiss/ext.cpp +12 -308
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/index.cpp +189 -0
- data/ext/faiss/index_binary.cpp +75 -0
- data/ext/faiss/kmeans.cpp +40 -0
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +33 -0
- data/ext/faiss/product_quantizer.cpp +53 -0
- data/ext/faiss/utils.cpp +13 -0
- data/ext/faiss/utils.h +5 -0
- data/lib/faiss.rb +0 -5
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +54 -149
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
#include <faiss/VectorTransform.h>
|
|
2
|
+
|
|
3
|
+
#include "utils.h"
|
|
4
|
+
|
|
5
|
+
void init_pca_matrix(Rice::Module& m) {
|
|
6
|
+
Rice::define_class_under<faiss::PCAMatrix>(m, "PCAMatrix")
|
|
7
|
+
.define_constructor(Rice::Constructor<faiss::PCAMatrix, int, int>())
|
|
8
|
+
.define_method(
|
|
9
|
+
"d_in",
|
|
10
|
+
[](faiss::PCAMatrix &self) {
|
|
11
|
+
return self.d_in;
|
|
12
|
+
})
|
|
13
|
+
.define_method(
|
|
14
|
+
"d_out",
|
|
15
|
+
[](faiss::PCAMatrix &self) {
|
|
16
|
+
return self.d_out;
|
|
17
|
+
})
|
|
18
|
+
.define_method(
|
|
19
|
+
"train",
|
|
20
|
+
[](faiss::PCAMatrix &self, numo::SFloat objects) {
|
|
21
|
+
auto n = check_shape(objects, self.d_in);
|
|
22
|
+
self.train(n, objects.read_ptr());
|
|
23
|
+
})
|
|
24
|
+
.define_method(
|
|
25
|
+
"apply",
|
|
26
|
+
[](faiss::PCAMatrix &self, numo::SFloat objects) {
|
|
27
|
+
auto n = check_shape(objects, self.d_in);
|
|
28
|
+
|
|
29
|
+
auto ary = numo::SFloat({n, static_cast<size_t>(self.d_out)});
|
|
30
|
+
self.apply_noalloc(n, objects.read_ptr(), ary.write_ptr());
|
|
31
|
+
return ary;
|
|
32
|
+
});
|
|
33
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
2
|
+
#include <faiss/index_io.h>
|
|
3
|
+
|
|
4
|
+
#include "utils.h"
|
|
5
|
+
|
|
6
|
+
void init_product_quantizer(Rice::Module& m) {
|
|
7
|
+
Rice::define_class_under<faiss::ProductQuantizer>(m, "ProductQuantizer")
|
|
8
|
+
.define_constructor(Rice::Constructor<faiss::ProductQuantizer, size_t, size_t, size_t>())
|
|
9
|
+
.define_method(
|
|
10
|
+
"d",
|
|
11
|
+
[](faiss::ProductQuantizer &self) {
|
|
12
|
+
return self.d;
|
|
13
|
+
})
|
|
14
|
+
.define_method(
|
|
15
|
+
"m",
|
|
16
|
+
[](faiss::ProductQuantizer &self) {
|
|
17
|
+
return self.M;
|
|
18
|
+
})
|
|
19
|
+
.define_method(
|
|
20
|
+
"train",
|
|
21
|
+
[](faiss::ProductQuantizer &self, numo::SFloat objects) {
|
|
22
|
+
auto n = check_shape(objects, self.d);
|
|
23
|
+
self.train(n, objects.read_ptr());
|
|
24
|
+
})
|
|
25
|
+
.define_method(
|
|
26
|
+
"compute_codes",
|
|
27
|
+
[](faiss::ProductQuantizer &self, numo::SFloat objects) {
|
|
28
|
+
auto n = check_shape(objects, self.d);
|
|
29
|
+
|
|
30
|
+
auto codes = numo::UInt8({n, self.M});
|
|
31
|
+
self.compute_codes(objects.read_ptr(), codes.write_ptr(), n);
|
|
32
|
+
return codes;
|
|
33
|
+
})
|
|
34
|
+
.define_method(
|
|
35
|
+
"decode",
|
|
36
|
+
[](faiss::ProductQuantizer &self, numo::UInt8 objects) {
|
|
37
|
+
auto n = check_shape(objects, self.M);
|
|
38
|
+
|
|
39
|
+
auto x = numo::SFloat({n, self.d});
|
|
40
|
+
self.decode(objects.read_ptr(), x.write_ptr(), n);
|
|
41
|
+
return x;
|
|
42
|
+
})
|
|
43
|
+
.define_method(
|
|
44
|
+
"save",
|
|
45
|
+
[](faiss::ProductQuantizer &self, const char *fname) {
|
|
46
|
+
faiss::write_ProductQuantizer(&self, fname);
|
|
47
|
+
})
|
|
48
|
+
.define_singleton_function(
|
|
49
|
+
"load",
|
|
50
|
+
[](const char *fname) {
|
|
51
|
+
return faiss::read_ProductQuantizer(fname);
|
|
52
|
+
});
|
|
53
|
+
}
|
data/ext/faiss/utils.cpp
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#include "utils.h"
|
|
2
|
+
|
|
3
|
+
size_t check_shape(numo::NArray objects, size_t k) {
|
|
4
|
+
auto ndim = objects.ndim();
|
|
5
|
+
if (ndim != 2) {
|
|
6
|
+
throw Rice::Exception(rb_eArgError, "expected 2 dimensions, not %d", ndim);
|
|
7
|
+
}
|
|
8
|
+
auto shape = objects.shape();
|
|
9
|
+
if (shape[1] != k) {
|
|
10
|
+
throw Rice::Exception(rb_eArgError, "expected 2nd dimension to be %d, not %d", k, shape[1]);
|
|
11
|
+
}
|
|
12
|
+
return shape[0];
|
|
13
|
+
}
|
data/ext/faiss/utils.h
ADDED
data/lib/faiss.rb
CHANGED
data/lib/faiss/version.rb
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
#include <cinttypes>
|
|
17
17
|
#include <cmath>
|
|
18
|
+
#include <typeinfo>
|
|
18
19
|
|
|
19
20
|
#include <faiss/impl/FaissAssert.h>
|
|
20
21
|
#include <faiss/utils/utils.h>
|
|
@@ -32,6 +33,7 @@
|
|
|
32
33
|
#include <faiss/MetaIndexes.h>
|
|
33
34
|
#include <faiss/IndexScalarQuantizer.h>
|
|
34
35
|
#include <faiss/IndexHNSW.h>
|
|
36
|
+
#include <faiss/IndexRefine.h>
|
|
35
37
|
|
|
36
38
|
#include <faiss/IndexBinaryFlat.h>
|
|
37
39
|
#include <faiss/IndexBinaryHNSW.h>
|
|
@@ -234,7 +236,7 @@ void OperatingPoints::display (bool only_optimal) const
|
|
|
234
236
|
{
|
|
235
237
|
const std::vector<OperatingPoint> &pts =
|
|
236
238
|
only_optimal ? optimal_pts : all_pts;
|
|
237
|
-
printf("Tested %zd operating points, %zd ones are optimal:\n",
|
|
239
|
+
printf("Tested %zd operating points, %zd ones are Pareto-optimal:\n",
|
|
238
240
|
all_pts.size(), optimal_pts.size());
|
|
239
241
|
|
|
240
242
|
for (int i = 0; i < pts.size(); i++) {
|
|
@@ -333,7 +335,7 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,
|
|
|
333
335
|
pr.values.push_back (pq.code_size * 8);
|
|
334
336
|
}
|
|
335
337
|
|
|
336
|
-
ParameterRange &ParameterSpace::add_range(const
|
|
338
|
+
ParameterRange &ParameterSpace::add_range(const std::string & name)
|
|
337
339
|
{
|
|
338
340
|
for (auto & pr : parameter_ranges) {
|
|
339
341
|
if (pr.name == name) {
|
|
@@ -346,13 +348,13 @@ ParameterRange &ParameterSpace::add_range(const char * name)
|
|
|
346
348
|
}
|
|
347
349
|
|
|
348
350
|
|
|
349
|
-
/// initialize with reasonable parameters for
|
|
351
|
+
/// initialize with reasonable parameters for this type of index
|
|
350
352
|
void ParameterSpace::initialize (const Index * index)
|
|
351
353
|
{
|
|
352
354
|
if (DC (IndexPreTransform)) {
|
|
353
355
|
index = ix->index;
|
|
354
356
|
}
|
|
355
|
-
if (DC (
|
|
357
|
+
if (DC (IndexRefine)) {
|
|
356
358
|
ParameterRange & pr = add_range("k_factor_rf");
|
|
357
359
|
for (int i = 0; i <= 6; i++) {
|
|
358
360
|
pr.values.push_back (1 << i);
|
|
@@ -372,12 +374,14 @@ void ParameterSpace::initialize (const Index * index)
|
|
|
372
374
|
pr.values.push_back (nprobe);
|
|
373
375
|
}
|
|
374
376
|
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
377
|
+
ParameterSpace ivf_pspace;
|
|
378
|
+
ivf_pspace.initialize(ix->quantizer);
|
|
379
|
+
|
|
380
|
+
for (const ParameterRange & p: ivf_pspace.parameter_ranges) {
|
|
381
|
+
ParameterRange & pr = add_range("quantizer_" + p.name);
|
|
382
|
+
pr.values = p.values;
|
|
380
383
|
}
|
|
384
|
+
|
|
381
385
|
}
|
|
382
386
|
if (DC (IndexPQ)) {
|
|
383
387
|
ParameterRange & pr = add_range("ht");
|
|
@@ -446,7 +450,7 @@ void ParameterSpace::set_index_parameters (
|
|
|
446
450
|
tok = strtok_r (nullptr, " ,", &ptr)) {
|
|
447
451
|
char name[100];
|
|
448
452
|
double val;
|
|
449
|
-
int ret = sscanf (tok, "%
|
|
453
|
+
int ret = sscanf (tok, "%99[^=]=%lf", name, &val);
|
|
450
454
|
FAISS_THROW_IF_NOT_FMT (
|
|
451
455
|
ret == 2, "could not interpret parameters %s", tok);
|
|
452
456
|
set_index_parameter (index, name, val);
|
|
@@ -457,44 +461,38 @@ void ParameterSpace::set_index_parameters (
|
|
|
457
461
|
void ParameterSpace::set_index_parameter (
|
|
458
462
|
Index * index, const std::string & name, double val) const
|
|
459
463
|
{
|
|
460
|
-
if (verbose > 1)
|
|
461
|
-
printf("
|
|
464
|
+
if (verbose > 1) {
|
|
465
|
+
printf(" set_index_parameter %s=%g\n", name.c_str(), val);
|
|
466
|
+
}
|
|
462
467
|
|
|
463
468
|
if (name == "verbose") {
|
|
464
469
|
index->verbose = int(val);
|
|
465
470
|
// and fall through to also enable it on sub-indexes
|
|
466
471
|
}
|
|
467
|
-
if (DC (
|
|
472
|
+
if (DC (IndexIDMap)) {
|
|
468
473
|
set_index_parameter (ix->index, name, val);
|
|
469
474
|
return;
|
|
470
475
|
}
|
|
471
|
-
if (DC (
|
|
472
|
-
|
|
473
|
-
auto fn =
|
|
474
|
-
[this, name, val](int, Index* subIndex) {
|
|
475
|
-
set_index_parameter(subIndex, name, val);
|
|
476
|
-
};
|
|
477
|
-
|
|
478
|
-
ix->runOnIndex(fn);
|
|
476
|
+
if (DC (IndexPreTransform)) {
|
|
477
|
+
set_index_parameter (ix->index, name, val);
|
|
479
478
|
return;
|
|
480
479
|
}
|
|
481
|
-
if (DC (
|
|
480
|
+
if (DC (ThreadedIndex<Index>)) {
|
|
482
481
|
// call on all sub-indexes
|
|
483
482
|
auto fn =
|
|
484
|
-
|
|
485
|
-
|
|
483
|
+
[this, name, val](int /* no */, Index* subIndex) {
|
|
484
|
+
set_index_parameter(subIndex, name, val);
|
|
486
485
|
};
|
|
487
|
-
|
|
488
486
|
ix->runOnIndex(fn);
|
|
489
487
|
return;
|
|
490
488
|
}
|
|
491
|
-
if (DC (
|
|
489
|
+
if (DC (IndexRefine)) {
|
|
492
490
|
if (name == "k_factor_rf") {
|
|
493
491
|
ix->k_factor = int(val);
|
|
494
492
|
return;
|
|
495
493
|
}
|
|
496
494
|
// otherwise it is for the sub-index
|
|
497
|
-
set_index_parameter (
|
|
495
|
+
set_index_parameter (ix->base_index, name, val);
|
|
498
496
|
return;
|
|
499
497
|
}
|
|
500
498
|
|
|
@@ -504,10 +502,7 @@ void ParameterSpace::set_index_parameter (
|
|
|
504
502
|
}
|
|
505
503
|
|
|
506
504
|
if (name == "nprobe") {
|
|
507
|
-
if (DC (
|
|
508
|
-
set_index_parameter (ix->index, name, val);
|
|
509
|
-
return;
|
|
510
|
-
} else if (DC (IndexIVF)) {
|
|
505
|
+
if (DC (IndexIVF)) {
|
|
511
506
|
ix->nprobe = int(val);
|
|
512
507
|
return;
|
|
513
508
|
}
|
|
@@ -559,6 +554,14 @@ void ParameterSpace::set_index_parameter (
|
|
|
559
554
|
}
|
|
560
555
|
}
|
|
561
556
|
|
|
557
|
+
if (name.find("quantizer_") == 0) {
|
|
558
|
+
if (DC(IndexIVF)) {
|
|
559
|
+
std::string sub_name = name.substr(strlen("quantizer_"));
|
|
560
|
+
set_index_parameter(ix->quantizer, sub_name, val);
|
|
561
|
+
return;
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
562
565
|
FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
|
|
563
566
|
"could not set parameter %s",
|
|
564
567
|
name.c_str());
|
|
@@ -707,8 +710,8 @@ void ParameterSpace::explore (Index *index,
|
|
|
707
710
|
bool keep = ops->add (perf, t_search, combination_name (cno), cno);
|
|
708
711
|
|
|
709
712
|
if (verbose)
|
|
710
|
-
printf(" perf %.3f t %.3f (%d
|
|
711
|
-
perf, t_search, nrun,
|
|
713
|
+
printf(" perf %.3f t %.3f (%d %s) %s\n",
|
|
714
|
+
perf, t_search, nrun, nrun >= 2 ? "runs" : "run",
|
|
712
715
|
keep ? "*" : "");
|
|
713
716
|
}
|
|
714
717
|
}
|
|
@@ -81,7 +81,10 @@ struct IntersectionCriterion: AutoTuneCriterion {
|
|
|
81
81
|
/**
|
|
82
82
|
* Maintains a list of experimental results. Each operating point is a
|
|
83
83
|
* (perf, t, key) triplet, where higher perf and lower t is
|
|
84
|
-
* better. The key field is an arbitrary identifier for the operating point
|
|
84
|
+
* better. The key field is an arbitrary identifier for the operating point.
|
|
85
|
+
*
|
|
86
|
+
* Includes primitives to extract the Pareto-optimal operating points in the
|
|
87
|
+
* (perf, t) space.
|
|
85
88
|
*/
|
|
86
89
|
|
|
87
90
|
struct OperatingPoint {
|
|
@@ -168,7 +171,7 @@ struct ParameterSpace {
|
|
|
168
171
|
void display () const;
|
|
169
172
|
|
|
170
173
|
/// add a new parameter (or return it if it exists)
|
|
171
|
-
ParameterRange &add_range(const
|
|
174
|
+
ParameterRange &add_range(const std::string & name);
|
|
172
175
|
|
|
173
176
|
/// initialize with reasonable parameters for the index
|
|
174
177
|
virtual void initialize (const Index * index);
|
|
@@ -179,7 +182,7 @@ struct ParameterSpace {
|
|
|
179
182
|
/// set a combination of parameters described by a string
|
|
180
183
|
void set_index_parameters (Index *index, const char *param_string) const;
|
|
181
184
|
|
|
182
|
-
/// set one of the parameters
|
|
185
|
+
/// set one of the parameters, returns whether setting was successful
|
|
183
186
|
virtual void set_index_parameter (
|
|
184
187
|
Index * index, const std::string & name, double val) const;
|
|
185
188
|
|
|
@@ -269,6 +269,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
269
269
|
const Index * codec, Index & index,
|
|
270
270
|
const float *weights) {
|
|
271
271
|
|
|
272
|
+
|
|
272
273
|
FAISS_THROW_IF_NOT_FMT (nx >= k,
|
|
273
274
|
"Number of training points (%" PRId64 ") should be at least "
|
|
274
275
|
"as large as number of clusters (%zd)", nx, k);
|
|
@@ -350,8 +351,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
350
351
|
std::unique_ptr<float []> dis(new float[nx]);
|
|
351
352
|
|
|
352
353
|
// remember best iteration for redo
|
|
353
|
-
|
|
354
|
-
|
|
354
|
+
bool lower_is_better = index.metric_type != METRIC_INNER_PRODUCT;
|
|
355
|
+
float best_obj = lower_is_better ? HUGE_VALF : -HUGE_VALF;
|
|
356
|
+
std::vector<ClusteringIterationStats> best_iteration_stats;
|
|
355
357
|
std::vector<float> best_centroids;
|
|
356
358
|
|
|
357
359
|
// support input centroids
|
|
@@ -417,7 +419,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
417
419
|
|
|
418
420
|
// k-means iterations
|
|
419
421
|
|
|
420
|
-
float
|
|
422
|
+
float obj = 0;
|
|
421
423
|
for (int i = 0; i < niter; i++) {
|
|
422
424
|
double t0s = getmillisecs();
|
|
423
425
|
|
|
@@ -440,10 +442,10 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
440
442
|
InterruptCallback::check();
|
|
441
443
|
t_search_tot += getmillisecs() - t0s;
|
|
442
444
|
|
|
443
|
-
// accumulate
|
|
444
|
-
|
|
445
|
+
// accumulate objective
|
|
446
|
+
obj = 0;
|
|
445
447
|
for (int j = 0; j < nx; j++) {
|
|
446
|
-
|
|
448
|
+
obj += dis[j];
|
|
447
449
|
}
|
|
448
450
|
|
|
449
451
|
// update the centroids
|
|
@@ -463,8 +465,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
463
465
|
|
|
464
466
|
// collect statistics
|
|
465
467
|
ClusteringIterationStats stats =
|
|
466
|
-
{
|
|
467
|
-
t_search_tot / 1000,
|
|
468
|
+
{ obj, (getmillisecs() - t0) / 1000.0,
|
|
469
|
+
t_search_tot / 1000,
|
|
470
|
+
imbalance_factor (nx, k, assign.get()),
|
|
468
471
|
nsplit };
|
|
469
472
|
iteration_stats.push_back(stats);
|
|
470
473
|
|
|
@@ -491,20 +494,21 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
491
494
|
|
|
492
495
|
if (verbose) printf("\n");
|
|
493
496
|
if (nredo > 1) {
|
|
494
|
-
if (
|
|
497
|
+
if ((lower_is_better && obj < best_obj) ||
|
|
498
|
+
(!lower_is_better && obj > best_obj)) {
|
|
495
499
|
if (verbose) {
|
|
496
500
|
printf ("Objective improved: keep new clusters\n");
|
|
497
501
|
}
|
|
498
502
|
best_centroids = centroids;
|
|
499
|
-
|
|
500
|
-
|
|
503
|
+
best_iteration_stats = iteration_stats;
|
|
504
|
+
best_obj = obj;
|
|
501
505
|
}
|
|
502
506
|
index.reset ();
|
|
503
507
|
}
|
|
504
508
|
}
|
|
505
509
|
if (nredo > 1) {
|
|
506
510
|
centroids = best_centroids;
|
|
507
|
-
iteration_stats =
|
|
511
|
+
iteration_stats = best_iteration_stats;
|
|
508
512
|
index.reset();
|
|
509
513
|
index.add(k, best_centroids.data());
|
|
510
514
|
}
|
|
@@ -34,11 +34,10 @@ void Index::range_search (idx_t , const float *, float,
|
|
|
34
34
|
FAISS_THROW_MSG ("range search not implemented");
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k)
|
|
37
|
+
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k) const
|
|
38
38
|
{
|
|
39
|
-
float
|
|
40
|
-
|
|
41
|
-
search (n, x, k, distances, labels);
|
|
39
|
+
std::vector<float> distances(n * k);
|
|
40
|
+
search (n, x, k, distances.data(), labels);
|
|
42
41
|
}
|
|
43
42
|
|
|
44
43
|
void Index::add_with_ids(
|
data/vendor/faiss/faiss/Index.h
CHANGED
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
#include <sstream>
|
|
18
18
|
|
|
19
19
|
#define FAISS_VERSION_MAJOR 1
|
|
20
|
-
#define FAISS_VERSION_MINOR
|
|
21
|
-
#define FAISS_VERSION_PATCH
|
|
20
|
+
#define FAISS_VERSION_MINOR 7
|
|
21
|
+
#define FAISS_VERSION_PATCH 0
|
|
22
22
|
|
|
23
23
|
/**
|
|
24
24
|
* @namespace faiss
|
|
@@ -133,7 +133,7 @@ struct Index {
|
|
|
133
133
|
* @param x input vectors to search, size n * d
|
|
134
134
|
* @param labels output labels of the NNs, size n*k
|
|
135
135
|
*/
|
|
136
|
-
void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
|
|
136
|
+
virtual void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1) const;
|
|
137
137
|
|
|
138
138
|
/// removes all elements from the database.
|
|
139
139
|
virtual void reset() = 0;
|
|
@@ -26,10 +26,9 @@ void IndexBinary::range_search(idx_t, const uint8_t *, int,
|
|
|
26
26
|
FAISS_THROW_MSG("range search not implemented");
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
|
|
30
|
-
int
|
|
31
|
-
|
|
32
|
-
search(n, x, k, distances, labels);
|
|
29
|
+
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) const {
|
|
30
|
+
std::vector<int> distances(n * k);
|
|
31
|
+
search(n, x, k, distances.data(), labels);
|
|
33
32
|
}
|
|
34
33
|
|
|
35
34
|
void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
|