faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4541b0c40468723a8bed3db80d5778fb156afa999cbbd14b83653285b3ae6267
|
|
4
|
+
data.tar.gz: 2beeacbad82a578e2a410938bc4447d73699ef3998c146b4309e4b44816f5e33
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 92d87492ff627e094ef29a48c8e4579dd976846ec9a95d94c45b816391dcea332d370a5fbde9c3f2bdc11964e061557d0e150b223ab6ac0209c092f1e1cd4a6e
|
|
7
|
+
data.tar.gz: 86fc2aaf3151545f24128429cf1369074809f8449f277a2676e9dacea76c156191b3d6df6df83f0d7ac0f061bfdf537b81ef57f6cbbb861e4ba4b592d06e8ca2
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
3
|
Copyright (c) Facebook, Inc. and its affiliates.
|
|
4
|
-
Copyright (c) 2020 Andrew Kane
|
|
4
|
+
Copyright (c) 2020-2021 Andrew Kane
|
|
5
5
|
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
|
|
6
6
|
|
|
7
|
-
[](https://github.com/ankane/faiss/actions)
|
|
8
8
|
|
|
9
9
|
## Installation
|
|
10
10
|
|
data/ext/faiss/extconf.rb
CHANGED
|
@@ -9,7 +9,7 @@ $CXXFLAGS << " -std=c++11 -march=native -DFINTEGER=int"
|
|
|
9
9
|
ext = File.expand_path(".", __dir__)
|
|
10
10
|
vendor = File.expand_path("../../vendor/faiss", __dir__)
|
|
11
11
|
|
|
12
|
-
$srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/impl
|
|
12
|
+
$srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/{impl,invlists,utils}}/*.{cpp}"]
|
|
13
13
|
$objs = $srcs.map { |v| v.sub(/cpp\z/, "o") }
|
|
14
14
|
$INCFLAGS << " -I#{vendor}"
|
|
15
15
|
$VPATH << vendor
|
data/lib/faiss/version.rb
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
#include <cinttypes>
|
|
17
17
|
#include <cmath>
|
|
18
|
+
#include <typeinfo>
|
|
18
19
|
|
|
19
20
|
#include <faiss/impl/FaissAssert.h>
|
|
20
21
|
#include <faiss/utils/utils.h>
|
|
@@ -32,6 +33,7 @@
|
|
|
32
33
|
#include <faiss/MetaIndexes.h>
|
|
33
34
|
#include <faiss/IndexScalarQuantizer.h>
|
|
34
35
|
#include <faiss/IndexHNSW.h>
|
|
36
|
+
#include <faiss/IndexRefine.h>
|
|
35
37
|
|
|
36
38
|
#include <faiss/IndexBinaryFlat.h>
|
|
37
39
|
#include <faiss/IndexBinaryHNSW.h>
|
|
@@ -234,7 +236,7 @@ void OperatingPoints::display (bool only_optimal) const
|
|
|
234
236
|
{
|
|
235
237
|
const std::vector<OperatingPoint> &pts =
|
|
236
238
|
only_optimal ? optimal_pts : all_pts;
|
|
237
|
-
printf("Tested %zd operating points, %zd ones are optimal:\n",
|
|
239
|
+
printf("Tested %zd operating points, %zd ones are Pareto-optimal:\n",
|
|
238
240
|
all_pts.size(), optimal_pts.size());
|
|
239
241
|
|
|
240
242
|
for (int i = 0; i < pts.size(); i++) {
|
|
@@ -333,7 +335,7 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,
|
|
|
333
335
|
pr.values.push_back (pq.code_size * 8);
|
|
334
336
|
}
|
|
335
337
|
|
|
336
|
-
ParameterRange &ParameterSpace::add_range(const
|
|
338
|
+
ParameterRange &ParameterSpace::add_range(const std::string & name)
|
|
337
339
|
{
|
|
338
340
|
for (auto & pr : parameter_ranges) {
|
|
339
341
|
if (pr.name == name) {
|
|
@@ -346,13 +348,13 @@ ParameterRange &ParameterSpace::add_range(const char * name)
|
|
|
346
348
|
}
|
|
347
349
|
|
|
348
350
|
|
|
349
|
-
/// initialize with reasonable parameters for
|
|
351
|
+
/// initialize with reasonable parameters for this type of index
|
|
350
352
|
void ParameterSpace::initialize (const Index * index)
|
|
351
353
|
{
|
|
352
354
|
if (DC (IndexPreTransform)) {
|
|
353
355
|
index = ix->index;
|
|
354
356
|
}
|
|
355
|
-
if (DC (
|
|
357
|
+
if (DC (IndexRefine)) {
|
|
356
358
|
ParameterRange & pr = add_range("k_factor_rf");
|
|
357
359
|
for (int i = 0; i <= 6; i++) {
|
|
358
360
|
pr.values.push_back (1 << i);
|
|
@@ -372,12 +374,14 @@ void ParameterSpace::initialize (const Index * index)
|
|
|
372
374
|
pr.values.push_back (nprobe);
|
|
373
375
|
}
|
|
374
376
|
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
377
|
+
ParameterSpace ivf_pspace;
|
|
378
|
+
ivf_pspace.initialize(ix->quantizer);
|
|
379
|
+
|
|
380
|
+
for (const ParameterRange & p: ivf_pspace.parameter_ranges) {
|
|
381
|
+
ParameterRange & pr = add_range("quantizer_" + p.name);
|
|
382
|
+
pr.values = p.values;
|
|
380
383
|
}
|
|
384
|
+
|
|
381
385
|
}
|
|
382
386
|
if (DC (IndexPQ)) {
|
|
383
387
|
ParameterRange & pr = add_range("ht");
|
|
@@ -446,7 +450,7 @@ void ParameterSpace::set_index_parameters (
|
|
|
446
450
|
tok = strtok_r (nullptr, " ,", &ptr)) {
|
|
447
451
|
char name[100];
|
|
448
452
|
double val;
|
|
449
|
-
int ret = sscanf (tok, "%
|
|
453
|
+
int ret = sscanf (tok, "%99[^=]=%lf", name, &val);
|
|
450
454
|
FAISS_THROW_IF_NOT_FMT (
|
|
451
455
|
ret == 2, "could not interpret parameters %s", tok);
|
|
452
456
|
set_index_parameter (index, name, val);
|
|
@@ -457,44 +461,38 @@ void ParameterSpace::set_index_parameters (
|
|
|
457
461
|
void ParameterSpace::set_index_parameter (
|
|
458
462
|
Index * index, const std::string & name, double val) const
|
|
459
463
|
{
|
|
460
|
-
if (verbose > 1)
|
|
461
|
-
printf("
|
|
464
|
+
if (verbose > 1) {
|
|
465
|
+
printf(" set_index_parameter %s=%g\n", name.c_str(), val);
|
|
466
|
+
}
|
|
462
467
|
|
|
463
468
|
if (name == "verbose") {
|
|
464
469
|
index->verbose = int(val);
|
|
465
470
|
// and fall through to also enable it on sub-indexes
|
|
466
471
|
}
|
|
467
|
-
if (DC (
|
|
472
|
+
if (DC (IndexIDMap)) {
|
|
468
473
|
set_index_parameter (ix->index, name, val);
|
|
469
474
|
return;
|
|
470
475
|
}
|
|
471
|
-
if (DC (
|
|
472
|
-
|
|
473
|
-
auto fn =
|
|
474
|
-
[this, name, val](int, Index* subIndex) {
|
|
475
|
-
set_index_parameter(subIndex, name, val);
|
|
476
|
-
};
|
|
477
|
-
|
|
478
|
-
ix->runOnIndex(fn);
|
|
476
|
+
if (DC (IndexPreTransform)) {
|
|
477
|
+
set_index_parameter (ix->index, name, val);
|
|
479
478
|
return;
|
|
480
479
|
}
|
|
481
|
-
if (DC (
|
|
480
|
+
if (DC (ThreadedIndex<Index>)) {
|
|
482
481
|
// call on all sub-indexes
|
|
483
482
|
auto fn =
|
|
484
|
-
|
|
485
|
-
|
|
483
|
+
[this, name, val](int /* no */, Index* subIndex) {
|
|
484
|
+
set_index_parameter(subIndex, name, val);
|
|
486
485
|
};
|
|
487
|
-
|
|
488
486
|
ix->runOnIndex(fn);
|
|
489
487
|
return;
|
|
490
488
|
}
|
|
491
|
-
if (DC (
|
|
489
|
+
if (DC (IndexRefine)) {
|
|
492
490
|
if (name == "k_factor_rf") {
|
|
493
491
|
ix->k_factor = int(val);
|
|
494
492
|
return;
|
|
495
493
|
}
|
|
496
494
|
// otherwise it is for the sub-index
|
|
497
|
-
set_index_parameter (
|
|
495
|
+
set_index_parameter (ix->base_index, name, val);
|
|
498
496
|
return;
|
|
499
497
|
}
|
|
500
498
|
|
|
@@ -504,10 +502,7 @@ void ParameterSpace::set_index_parameter (
|
|
|
504
502
|
}
|
|
505
503
|
|
|
506
504
|
if (name == "nprobe") {
|
|
507
|
-
if (DC (
|
|
508
|
-
set_index_parameter (ix->index, name, val);
|
|
509
|
-
return;
|
|
510
|
-
} else if (DC (IndexIVF)) {
|
|
505
|
+
if (DC (IndexIVF)) {
|
|
511
506
|
ix->nprobe = int(val);
|
|
512
507
|
return;
|
|
513
508
|
}
|
|
@@ -559,6 +554,14 @@ void ParameterSpace::set_index_parameter (
|
|
|
559
554
|
}
|
|
560
555
|
}
|
|
561
556
|
|
|
557
|
+
if (name.find("quantizer_") == 0) {
|
|
558
|
+
if (DC(IndexIVF)) {
|
|
559
|
+
std::string sub_name = name.substr(strlen("quantizer_"));
|
|
560
|
+
set_index_parameter(ix->quantizer, sub_name, val);
|
|
561
|
+
return;
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
562
565
|
FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
|
|
563
566
|
"could not set parameter %s",
|
|
564
567
|
name.c_str());
|
|
@@ -707,8 +710,8 @@ void ParameterSpace::explore (Index *index,
|
|
|
707
710
|
bool keep = ops->add (perf, t_search, combination_name (cno), cno);
|
|
708
711
|
|
|
709
712
|
if (verbose)
|
|
710
|
-
printf(" perf %.3f t %.3f (%d
|
|
711
|
-
perf, t_search, nrun,
|
|
713
|
+
printf(" perf %.3f t %.3f (%d %s) %s\n",
|
|
714
|
+
perf, t_search, nrun, nrun >= 2 ? "runs" : "run",
|
|
712
715
|
keep ? "*" : "");
|
|
713
716
|
}
|
|
714
717
|
}
|
|
@@ -81,7 +81,10 @@ struct IntersectionCriterion: AutoTuneCriterion {
|
|
|
81
81
|
/**
|
|
82
82
|
* Maintains a list of experimental results. Each operating point is a
|
|
83
83
|
* (perf, t, key) triplet, where higher perf and lower t is
|
|
84
|
-
* better. The key field is an arbitrary identifier for the operating point
|
|
84
|
+
* better. The key field is an arbitrary identifier for the operating point.
|
|
85
|
+
*
|
|
86
|
+
* Includes primitives to extract the Pareto-optimal operating points in the
|
|
87
|
+
* (perf, t) space.
|
|
85
88
|
*/
|
|
86
89
|
|
|
87
90
|
struct OperatingPoint {
|
|
@@ -168,7 +171,7 @@ struct ParameterSpace {
|
|
|
168
171
|
void display () const;
|
|
169
172
|
|
|
170
173
|
/// add a new parameter (or return it if it exists)
|
|
171
|
-
ParameterRange &add_range(const
|
|
174
|
+
ParameterRange &add_range(const std::string & name);
|
|
172
175
|
|
|
173
176
|
/// initialize with reasonable parameters for the index
|
|
174
177
|
virtual void initialize (const Index * index);
|
|
@@ -179,7 +182,7 @@ struct ParameterSpace {
|
|
|
179
182
|
/// set a combination of parameters described by a string
|
|
180
183
|
void set_index_parameters (Index *index, const char *param_string) const;
|
|
181
184
|
|
|
182
|
-
/// set one of the parameters
|
|
185
|
+
/// set one of the parameters, returns whether setting was successful
|
|
183
186
|
virtual void set_index_parameter (
|
|
184
187
|
Index * index, const std::string & name, double val) const;
|
|
185
188
|
|
|
@@ -269,6 +269,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
269
269
|
const Index * codec, Index & index,
|
|
270
270
|
const float *weights) {
|
|
271
271
|
|
|
272
|
+
|
|
272
273
|
FAISS_THROW_IF_NOT_FMT (nx >= k,
|
|
273
274
|
"Number of training points (%" PRId64 ") should be at least "
|
|
274
275
|
"as large as number of clusters (%zd)", nx, k);
|
|
@@ -350,8 +351,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
350
351
|
std::unique_ptr<float []> dis(new float[nx]);
|
|
351
352
|
|
|
352
353
|
// remember best iteration for redo
|
|
353
|
-
|
|
354
|
-
|
|
354
|
+
bool lower_is_better = index.metric_type != METRIC_INNER_PRODUCT;
|
|
355
|
+
float best_obj = lower_is_better ? HUGE_VALF : -HUGE_VALF;
|
|
356
|
+
std::vector<ClusteringIterationStats> best_iteration_stats;
|
|
355
357
|
std::vector<float> best_centroids;
|
|
356
358
|
|
|
357
359
|
// support input centroids
|
|
@@ -417,7 +419,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
417
419
|
|
|
418
420
|
// k-means iterations
|
|
419
421
|
|
|
420
|
-
float
|
|
422
|
+
float obj = 0;
|
|
421
423
|
for (int i = 0; i < niter; i++) {
|
|
422
424
|
double t0s = getmillisecs();
|
|
423
425
|
|
|
@@ -440,10 +442,10 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
440
442
|
InterruptCallback::check();
|
|
441
443
|
t_search_tot += getmillisecs() - t0s;
|
|
442
444
|
|
|
443
|
-
// accumulate
|
|
444
|
-
|
|
445
|
+
// accumulate objective
|
|
446
|
+
obj = 0;
|
|
445
447
|
for (int j = 0; j < nx; j++) {
|
|
446
|
-
|
|
448
|
+
obj += dis[j];
|
|
447
449
|
}
|
|
448
450
|
|
|
449
451
|
// update the centroids
|
|
@@ -463,8 +465,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
463
465
|
|
|
464
466
|
// collect statistics
|
|
465
467
|
ClusteringIterationStats stats =
|
|
466
|
-
{
|
|
467
|
-
t_search_tot / 1000,
|
|
468
|
+
{ obj, (getmillisecs() - t0) / 1000.0,
|
|
469
|
+
t_search_tot / 1000,
|
|
470
|
+
imbalance_factor (nx, k, assign.get()),
|
|
468
471
|
nsplit };
|
|
469
472
|
iteration_stats.push_back(stats);
|
|
470
473
|
|
|
@@ -491,20 +494,21 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
|
491
494
|
|
|
492
495
|
if (verbose) printf("\n");
|
|
493
496
|
if (nredo > 1) {
|
|
494
|
-
if (
|
|
497
|
+
if ((lower_is_better && obj < best_obj) ||
|
|
498
|
+
(!lower_is_better && obj > best_obj)) {
|
|
495
499
|
if (verbose) {
|
|
496
500
|
printf ("Objective improved: keep new clusters\n");
|
|
497
501
|
}
|
|
498
502
|
best_centroids = centroids;
|
|
499
|
-
|
|
500
|
-
|
|
503
|
+
best_iteration_stats = iteration_stats;
|
|
504
|
+
best_obj = obj;
|
|
501
505
|
}
|
|
502
506
|
index.reset ();
|
|
503
507
|
}
|
|
504
508
|
}
|
|
505
509
|
if (nredo > 1) {
|
|
506
510
|
centroids = best_centroids;
|
|
507
|
-
iteration_stats =
|
|
511
|
+
iteration_stats = best_iteration_stats;
|
|
508
512
|
index.reset();
|
|
509
513
|
index.add(k, best_centroids.data());
|
|
510
514
|
}
|
|
@@ -34,11 +34,10 @@ void Index::range_search (idx_t , const float *, float,
|
|
|
34
34
|
FAISS_THROW_MSG ("range search not implemented");
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k)
|
|
37
|
+
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k) const
|
|
38
38
|
{
|
|
39
|
-
float
|
|
40
|
-
|
|
41
|
-
search (n, x, k, distances, labels);
|
|
39
|
+
std::vector<float> distances(n * k);
|
|
40
|
+
search (n, x, k, distances.data(), labels);
|
|
42
41
|
}
|
|
43
42
|
|
|
44
43
|
void Index::add_with_ids(
|
data/vendor/faiss/faiss/Index.h
CHANGED
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
#include <sstream>
|
|
18
18
|
|
|
19
19
|
#define FAISS_VERSION_MAJOR 1
|
|
20
|
-
#define FAISS_VERSION_MINOR
|
|
21
|
-
#define FAISS_VERSION_PATCH
|
|
20
|
+
#define FAISS_VERSION_MINOR 7
|
|
21
|
+
#define FAISS_VERSION_PATCH 0
|
|
22
22
|
|
|
23
23
|
/**
|
|
24
24
|
* @namespace faiss
|
|
@@ -133,7 +133,7 @@ struct Index {
|
|
|
133
133
|
* @param x input vectors to search, size n * d
|
|
134
134
|
* @param labels output labels of the NNs, size n*k
|
|
135
135
|
*/
|
|
136
|
-
void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
|
|
136
|
+
virtual void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1) const;
|
|
137
137
|
|
|
138
138
|
/// removes all elements from the database.
|
|
139
139
|
virtual void reset() = 0;
|
|
@@ -26,10 +26,9 @@ void IndexBinary::range_search(idx_t, const uint8_t *, int,
|
|
|
26
26
|
FAISS_THROW_MSG("range search not implemented");
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
|
|
30
|
-
int
|
|
31
|
-
|
|
32
|
-
search(n, x, k, distances, labels);
|
|
29
|
+
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) const {
|
|
30
|
+
std::vector<int> distances(n * k);
|
|
31
|
+
search(n, x, k, distances.data(), labels);
|
|
33
32
|
}
|
|
34
33
|
|
|
35
34
|
void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
|
|
@@ -120,7 +120,7 @@ struct IndexBinary {
|
|
|
120
120
|
* @param x input vectors to search, size n * d / 8
|
|
121
121
|
* @param labels output labels of the NNs, size n*k
|
|
122
122
|
*/
|
|
123
|
-
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
|
|
123
|
+
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1) const;
|
|
124
124
|
|
|
125
125
|
/// Removes all elements from the database.
|
|
126
126
|
virtual void reset() = 0;
|
|
@@ -18,16 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
20
20
|
#include <faiss/impl/FaissAssert.h>
|
|
21
|
-
|
|
22
|
-
#ifdef _MSC_VER
|
|
23
|
-
#include <intrin.h>
|
|
24
|
-
|
|
25
|
-
static inline int __builtin_ctzll(uint64_t x) {
|
|
26
|
-
unsigned long ret;
|
|
27
|
-
_BitScanForward64(&ret, x);
|
|
28
|
-
return (int)ret;
|
|
29
|
-
}
|
|
30
|
-
#endif // _MSC_VER
|
|
21
|
+
#include <faiss/impl/platform_macros.h>
|
|
31
22
|
|
|
32
23
|
namespace faiss {
|
|
33
24
|
|
|
@@ -145,8 +136,7 @@ struct KnnSearchResults {
|
|
|
145
136
|
|
|
146
137
|
inline void add (float dis, idx_t id) {
|
|
147
138
|
if (dis < heap_sim[0]) {
|
|
148
|
-
|
|
149
|
-
heap_push<C> (k, heap_sim, heap_ids, dis, id);
|
|
139
|
+
heap_replace_top<C> (k, heap_sim, heap_ids, dis, id);
|
|
150
140
|
}
|
|
151
141
|
}
|
|
152
142
|
|
|
@@ -319,9 +319,8 @@ struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
|
|
319
319
|
for (size_t j = 0; j < n; j++) {
|
|
320
320
|
uint32_t dis = hc.hamming (codes);
|
|
321
321
|
if (dis < simi[0]) {
|
|
322
|
-
heap_pop<C> (k, simi, idxi);
|
|
323
322
|
idx_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
|
324
|
-
|
|
323
|
+
heap_replace_top<C> (k, simi, idxi, dis, id);
|
|
325
324
|
nup++;
|
|
326
325
|
}
|
|
327
326
|
codes += code_size;
|
|
@@ -226,155 +226,7 @@ void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
|
|
|
226
226
|
|
|
227
227
|
|
|
228
228
|
|
|
229
|
-
/***************************************************
|
|
230
|
-
* IndexFlatL2BaseShift
|
|
231
|
-
***************************************************/
|
|
232
|
-
|
|
233
|
-
IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
|
|
234
|
-
IndexFlatL2 (d), shift (nshift)
|
|
235
|
-
{
|
|
236
|
-
memcpy (this->shift.data(), shift, sizeof(float) * nshift);
|
|
237
|
-
}
|
|
238
229
|
|
|
239
|
-
void IndexFlatL2BaseShift::search (
|
|
240
|
-
idx_t n,
|
|
241
|
-
const float *x,
|
|
242
|
-
idx_t k,
|
|
243
|
-
float *distances,
|
|
244
|
-
idx_t *labels) const
|
|
245
|
-
{
|
|
246
|
-
FAISS_THROW_IF_NOT (shift.size() == ntotal);
|
|
247
|
-
|
|
248
|
-
float_maxheap_array_t res = {
|
|
249
|
-
size_t(n), size_t(k), labels, distances};
|
|
250
|
-
knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
/***************************************************
|
|
256
|
-
* IndexRefineFlat
|
|
257
|
-
***************************************************/
|
|
258
|
-
|
|
259
|
-
IndexRefineFlat::IndexRefineFlat (Index *base_index):
|
|
260
|
-
Index (base_index->d, base_index->metric_type),
|
|
261
|
-
refine_index (base_index->d, base_index->metric_type),
|
|
262
|
-
base_index (base_index), own_fields (false),
|
|
263
|
-
k_factor (1)
|
|
264
|
-
{
|
|
265
|
-
is_trained = base_index->is_trained;
|
|
266
|
-
FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
|
|
267
|
-
"base_index should be empty in the beginning");
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
IndexRefineFlat::IndexRefineFlat () {
|
|
271
|
-
base_index = nullptr;
|
|
272
|
-
own_fields = false;
|
|
273
|
-
k_factor = 1;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
void IndexRefineFlat::train (idx_t n, const float *x)
|
|
278
|
-
{
|
|
279
|
-
base_index->train (n, x);
|
|
280
|
-
is_trained = true;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
void IndexRefineFlat::add (idx_t n, const float *x) {
|
|
284
|
-
FAISS_THROW_IF_NOT (is_trained);
|
|
285
|
-
base_index->add (n, x);
|
|
286
|
-
refine_index.add (n, x);
|
|
287
|
-
ntotal = refine_index.ntotal;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
void IndexRefineFlat::reset ()
|
|
291
|
-
{
|
|
292
|
-
base_index->reset ();
|
|
293
|
-
refine_index.reset ();
|
|
294
|
-
ntotal = 0;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
namespace {
|
|
298
|
-
typedef faiss::Index::idx_t idx_t;
|
|
299
|
-
|
|
300
|
-
template<class C>
|
|
301
|
-
static void reorder_2_heaps (
|
|
302
|
-
idx_t n,
|
|
303
|
-
idx_t k, idx_t *labels, float *distances,
|
|
304
|
-
idx_t k_base, const idx_t *base_labels, const float *base_distances)
|
|
305
|
-
{
|
|
306
|
-
#pragma omp parallel for
|
|
307
|
-
for (idx_t i = 0; i < n; i++) {
|
|
308
|
-
idx_t *idxo = labels + i * k;
|
|
309
|
-
float *diso = distances + i * k;
|
|
310
|
-
const idx_t *idxi = base_labels + i * k_base;
|
|
311
|
-
const float *disi = base_distances + i * k_base;
|
|
312
|
-
|
|
313
|
-
heap_heapify<C> (k, diso, idxo, disi, idxi, k);
|
|
314
|
-
if (k_base != k) { // add remaining elements
|
|
315
|
-
heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
|
|
316
|
-
}
|
|
317
|
-
heap_reorder<C> (k, diso, idxo);
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
void IndexRefineFlat::search (
|
|
326
|
-
idx_t n, const float *x, idx_t k,
|
|
327
|
-
float *distances, idx_t *labels) const
|
|
328
|
-
{
|
|
329
|
-
FAISS_THROW_IF_NOT (is_trained);
|
|
330
|
-
idx_t k_base = idx_t (k * k_factor);
|
|
331
|
-
idx_t * base_labels = labels;
|
|
332
|
-
float * base_distances = distances;
|
|
333
|
-
ScopeDeleter<idx_t> del1;
|
|
334
|
-
ScopeDeleter<float> del2;
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
if (k != k_base) {
|
|
338
|
-
base_labels = new idx_t [n * k_base];
|
|
339
|
-
del1.set (base_labels);
|
|
340
|
-
base_distances = new float [n * k_base];
|
|
341
|
-
del2.set (base_distances);
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
base_index->search (n, x, k_base, base_distances, base_labels);
|
|
345
|
-
|
|
346
|
-
for (int i = 0; i < n * k_base; i++)
|
|
347
|
-
assert (base_labels[i] >= -1 &&
|
|
348
|
-
base_labels[i] < ntotal);
|
|
349
|
-
|
|
350
|
-
// compute refined distances
|
|
351
|
-
refine_index.compute_distance_subset (
|
|
352
|
-
n, x, k_base, base_distances, base_labels);
|
|
353
|
-
|
|
354
|
-
// sort and store result
|
|
355
|
-
if (metric_type == METRIC_L2) {
|
|
356
|
-
typedef CMax <float, idx_t> C;
|
|
357
|
-
reorder_2_heaps<C> (
|
|
358
|
-
n, k, labels, distances,
|
|
359
|
-
k_base, base_labels, base_distances);
|
|
360
|
-
|
|
361
|
-
} else if (metric_type == METRIC_INNER_PRODUCT) {
|
|
362
|
-
typedef CMin <float, idx_t> C;
|
|
363
|
-
reorder_2_heaps<C> (
|
|
364
|
-
n, k, labels, distances,
|
|
365
|
-
k_base, base_labels, base_distances);
|
|
366
|
-
} else {
|
|
367
|
-
FAISS_THROW_MSG("Metric type not supported");
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
IndexRefineFlat::~IndexRefineFlat ()
|
|
375
|
-
{
|
|
376
|
-
if (own_fields) delete base_index;
|
|
377
|
-
}
|
|
378
230
|
|
|
379
231
|
/***************************************************
|
|
380
232
|
* IndexFlat1D
|