faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4541b0c40468723a8bed3db80d5778fb156afa999cbbd14b83653285b3ae6267
|
4
|
+
data.tar.gz: 2beeacbad82a578e2a410938bc4447d73699ef3998c146b4309e4b44816f5e33
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92d87492ff627e094ef29a48c8e4579dd976846ec9a95d94c45b816391dcea332d370a5fbde9c3f2bdc11964e061557d0e150b223ab6ac0209c092f1e1cd4a6e
|
7
|
+
data.tar.gz: 86fc2aaf3151545f24128429cf1369074809f8449f277a2676e9dacea76c156191b3d6df6df83f0d7ac0f061bfdf537b81ef57f6cbbb861e4ba4b592d06e8ca2
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
3
|
Copyright (c) Facebook, Inc. and its affiliates.
|
4
|
-
Copyright (c) 2020 Andrew Kane
|
4
|
+
Copyright (c) 2020-2021 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
|
6
6
|
|
7
|
-
[![Build Status](https://
|
7
|
+
[![Build Status](https://github.com/ankane/faiss/workflows/build/badge.svg?branch=master)](https://github.com/ankane/faiss/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
data/ext/faiss/extconf.rb
CHANGED
@@ -9,7 +9,7 @@ $CXXFLAGS << " -std=c++11 -march=native -DFINTEGER=int"
|
|
9
9
|
ext = File.expand_path(".", __dir__)
|
10
10
|
vendor = File.expand_path("../../vendor/faiss", __dir__)
|
11
11
|
|
12
|
-
$srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/impl
|
12
|
+
$srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/{impl,invlists,utils}}/*.{cpp}"]
|
13
13
|
$objs = $srcs.map { |v| v.sub(/cpp\z/, "o") }
|
14
14
|
$INCFLAGS << " -I#{vendor}"
|
15
15
|
$VPATH << vendor
|
data/lib/faiss/version.rb
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
#include <cinttypes>
|
17
17
|
#include <cmath>
|
18
|
+
#include <typeinfo>
|
18
19
|
|
19
20
|
#include <faiss/impl/FaissAssert.h>
|
20
21
|
#include <faiss/utils/utils.h>
|
@@ -32,6 +33,7 @@
|
|
32
33
|
#include <faiss/MetaIndexes.h>
|
33
34
|
#include <faiss/IndexScalarQuantizer.h>
|
34
35
|
#include <faiss/IndexHNSW.h>
|
36
|
+
#include <faiss/IndexRefine.h>
|
35
37
|
|
36
38
|
#include <faiss/IndexBinaryFlat.h>
|
37
39
|
#include <faiss/IndexBinaryHNSW.h>
|
@@ -234,7 +236,7 @@ void OperatingPoints::display (bool only_optimal) const
|
|
234
236
|
{
|
235
237
|
const std::vector<OperatingPoint> &pts =
|
236
238
|
only_optimal ? optimal_pts : all_pts;
|
237
|
-
printf("Tested %zd operating points, %zd ones are optimal:\n",
|
239
|
+
printf("Tested %zd operating points, %zd ones are Pareto-optimal:\n",
|
238
240
|
all_pts.size(), optimal_pts.size());
|
239
241
|
|
240
242
|
for (int i = 0; i < pts.size(); i++) {
|
@@ -333,7 +335,7 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,
|
|
333
335
|
pr.values.push_back (pq.code_size * 8);
|
334
336
|
}
|
335
337
|
|
336
|
-
ParameterRange &ParameterSpace::add_range(const
|
338
|
+
ParameterRange &ParameterSpace::add_range(const std::string & name)
|
337
339
|
{
|
338
340
|
for (auto & pr : parameter_ranges) {
|
339
341
|
if (pr.name == name) {
|
@@ -346,13 +348,13 @@ ParameterRange &ParameterSpace::add_range(const char * name)
|
|
346
348
|
}
|
347
349
|
|
348
350
|
|
349
|
-
/// initialize with reasonable parameters for
|
351
|
+
/// initialize with reasonable parameters for this type of index
|
350
352
|
void ParameterSpace::initialize (const Index * index)
|
351
353
|
{
|
352
354
|
if (DC (IndexPreTransform)) {
|
353
355
|
index = ix->index;
|
354
356
|
}
|
355
|
-
if (DC (
|
357
|
+
if (DC (IndexRefine)) {
|
356
358
|
ParameterRange & pr = add_range("k_factor_rf");
|
357
359
|
for (int i = 0; i <= 6; i++) {
|
358
360
|
pr.values.push_back (1 << i);
|
@@ -372,12 +374,14 @@ void ParameterSpace::initialize (const Index * index)
|
|
372
374
|
pr.values.push_back (nprobe);
|
373
375
|
}
|
374
376
|
}
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
377
|
+
ParameterSpace ivf_pspace;
|
378
|
+
ivf_pspace.initialize(ix->quantizer);
|
379
|
+
|
380
|
+
for (const ParameterRange & p: ivf_pspace.parameter_ranges) {
|
381
|
+
ParameterRange & pr = add_range("quantizer_" + p.name);
|
382
|
+
pr.values = p.values;
|
380
383
|
}
|
384
|
+
|
381
385
|
}
|
382
386
|
if (DC (IndexPQ)) {
|
383
387
|
ParameterRange & pr = add_range("ht");
|
@@ -446,7 +450,7 @@ void ParameterSpace::set_index_parameters (
|
|
446
450
|
tok = strtok_r (nullptr, " ,", &ptr)) {
|
447
451
|
char name[100];
|
448
452
|
double val;
|
449
|
-
int ret = sscanf (tok, "%
|
453
|
+
int ret = sscanf (tok, "%99[^=]=%lf", name, &val);
|
450
454
|
FAISS_THROW_IF_NOT_FMT (
|
451
455
|
ret == 2, "could not interpret parameters %s", tok);
|
452
456
|
set_index_parameter (index, name, val);
|
@@ -457,44 +461,38 @@ void ParameterSpace::set_index_parameters (
|
|
457
461
|
void ParameterSpace::set_index_parameter (
|
458
462
|
Index * index, const std::string & name, double val) const
|
459
463
|
{
|
460
|
-
if (verbose > 1)
|
461
|
-
printf("
|
464
|
+
if (verbose > 1) {
|
465
|
+
printf(" set_index_parameter %s=%g\n", name.c_str(), val);
|
466
|
+
}
|
462
467
|
|
463
468
|
if (name == "verbose") {
|
464
469
|
index->verbose = int(val);
|
465
470
|
// and fall through to also enable it on sub-indexes
|
466
471
|
}
|
467
|
-
if (DC (
|
472
|
+
if (DC (IndexIDMap)) {
|
468
473
|
set_index_parameter (ix->index, name, val);
|
469
474
|
return;
|
470
475
|
}
|
471
|
-
if (DC (
|
472
|
-
|
473
|
-
auto fn =
|
474
|
-
[this, name, val](int, Index* subIndex) {
|
475
|
-
set_index_parameter(subIndex, name, val);
|
476
|
-
};
|
477
|
-
|
478
|
-
ix->runOnIndex(fn);
|
476
|
+
if (DC (IndexPreTransform)) {
|
477
|
+
set_index_parameter (ix->index, name, val);
|
479
478
|
return;
|
480
479
|
}
|
481
|
-
if (DC (
|
480
|
+
if (DC (ThreadedIndex<Index>)) {
|
482
481
|
// call on all sub-indexes
|
483
482
|
auto fn =
|
484
|
-
|
485
|
-
|
483
|
+
[this, name, val](int /* no */, Index* subIndex) {
|
484
|
+
set_index_parameter(subIndex, name, val);
|
486
485
|
};
|
487
|
-
|
488
486
|
ix->runOnIndex(fn);
|
489
487
|
return;
|
490
488
|
}
|
491
|
-
if (DC (
|
489
|
+
if (DC (IndexRefine)) {
|
492
490
|
if (name == "k_factor_rf") {
|
493
491
|
ix->k_factor = int(val);
|
494
492
|
return;
|
495
493
|
}
|
496
494
|
// otherwise it is for the sub-index
|
497
|
-
set_index_parameter (
|
495
|
+
set_index_parameter (ix->base_index, name, val);
|
498
496
|
return;
|
499
497
|
}
|
500
498
|
|
@@ -504,10 +502,7 @@ void ParameterSpace::set_index_parameter (
|
|
504
502
|
}
|
505
503
|
|
506
504
|
if (name == "nprobe") {
|
507
|
-
if (DC (
|
508
|
-
set_index_parameter (ix->index, name, val);
|
509
|
-
return;
|
510
|
-
} else if (DC (IndexIVF)) {
|
505
|
+
if (DC (IndexIVF)) {
|
511
506
|
ix->nprobe = int(val);
|
512
507
|
return;
|
513
508
|
}
|
@@ -559,6 +554,14 @@ void ParameterSpace::set_index_parameter (
|
|
559
554
|
}
|
560
555
|
}
|
561
556
|
|
557
|
+
if (name.find("quantizer_") == 0) {
|
558
|
+
if (DC(IndexIVF)) {
|
559
|
+
std::string sub_name = name.substr(strlen("quantizer_"));
|
560
|
+
set_index_parameter(ix->quantizer, sub_name, val);
|
561
|
+
return;
|
562
|
+
}
|
563
|
+
}
|
564
|
+
|
562
565
|
FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
|
563
566
|
"could not set parameter %s",
|
564
567
|
name.c_str());
|
@@ -707,8 +710,8 @@ void ParameterSpace::explore (Index *index,
|
|
707
710
|
bool keep = ops->add (perf, t_search, combination_name (cno), cno);
|
708
711
|
|
709
712
|
if (verbose)
|
710
|
-
printf(" perf %.3f t %.3f (%d
|
711
|
-
perf, t_search, nrun,
|
713
|
+
printf(" perf %.3f t %.3f (%d %s) %s\n",
|
714
|
+
perf, t_search, nrun, nrun >= 2 ? "runs" : "run",
|
712
715
|
keep ? "*" : "");
|
713
716
|
}
|
714
717
|
}
|
@@ -81,7 +81,10 @@ struct IntersectionCriterion: AutoTuneCriterion {
|
|
81
81
|
/**
|
82
82
|
* Maintains a list of experimental results. Each operating point is a
|
83
83
|
* (perf, t, key) triplet, where higher perf and lower t is
|
84
|
-
* better. The key field is an arbitrary identifier for the operating point
|
84
|
+
* better. The key field is an arbitrary identifier for the operating point.
|
85
|
+
*
|
86
|
+
* Includes primitives to extract the Pareto-optimal operating points in the
|
87
|
+
* (perf, t) space.
|
85
88
|
*/
|
86
89
|
|
87
90
|
struct OperatingPoint {
|
@@ -168,7 +171,7 @@ struct ParameterSpace {
|
|
168
171
|
void display () const;
|
169
172
|
|
170
173
|
/// add a new parameter (or return it if it exists)
|
171
|
-
ParameterRange &add_range(const
|
174
|
+
ParameterRange &add_range(const std::string & name);
|
172
175
|
|
173
176
|
/// initialize with reasonable parameters for the index
|
174
177
|
virtual void initialize (const Index * index);
|
@@ -179,7 +182,7 @@ struct ParameterSpace {
|
|
179
182
|
/// set a combination of parameters described by a string
|
180
183
|
void set_index_parameters (Index *index, const char *param_string) const;
|
181
184
|
|
182
|
-
/// set one of the parameters
|
185
|
+
/// set one of the parameters, returns whether setting was successful
|
183
186
|
virtual void set_index_parameter (
|
184
187
|
Index * index, const std::string & name, double val) const;
|
185
188
|
|
@@ -269,6 +269,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
269
269
|
const Index * codec, Index & index,
|
270
270
|
const float *weights) {
|
271
271
|
|
272
|
+
|
272
273
|
FAISS_THROW_IF_NOT_FMT (nx >= k,
|
273
274
|
"Number of training points (%" PRId64 ") should be at least "
|
274
275
|
"as large as number of clusters (%zd)", nx, k);
|
@@ -350,8 +351,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
350
351
|
std::unique_ptr<float []> dis(new float[nx]);
|
351
352
|
|
352
353
|
// remember best iteration for redo
|
353
|
-
|
354
|
-
|
354
|
+
bool lower_is_better = index.metric_type != METRIC_INNER_PRODUCT;
|
355
|
+
float best_obj = lower_is_better ? HUGE_VALF : -HUGE_VALF;
|
356
|
+
std::vector<ClusteringIterationStats> best_iteration_stats;
|
355
357
|
std::vector<float> best_centroids;
|
356
358
|
|
357
359
|
// support input centroids
|
@@ -417,7 +419,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
417
419
|
|
418
420
|
// k-means iterations
|
419
421
|
|
420
|
-
float
|
422
|
+
float obj = 0;
|
421
423
|
for (int i = 0; i < niter; i++) {
|
422
424
|
double t0s = getmillisecs();
|
423
425
|
|
@@ -440,10 +442,10 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
440
442
|
InterruptCallback::check();
|
441
443
|
t_search_tot += getmillisecs() - t0s;
|
442
444
|
|
443
|
-
// accumulate
|
444
|
-
|
445
|
+
// accumulate objective
|
446
|
+
obj = 0;
|
445
447
|
for (int j = 0; j < nx; j++) {
|
446
|
-
|
448
|
+
obj += dis[j];
|
447
449
|
}
|
448
450
|
|
449
451
|
// update the centroids
|
@@ -463,8 +465,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
463
465
|
|
464
466
|
// collect statistics
|
465
467
|
ClusteringIterationStats stats =
|
466
|
-
{
|
467
|
-
t_search_tot / 1000,
|
468
|
+
{ obj, (getmillisecs() - t0) / 1000.0,
|
469
|
+
t_search_tot / 1000,
|
470
|
+
imbalance_factor (nx, k, assign.get()),
|
468
471
|
nsplit };
|
469
472
|
iteration_stats.push_back(stats);
|
470
473
|
|
@@ -491,20 +494,21 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
|
|
491
494
|
|
492
495
|
if (verbose) printf("\n");
|
493
496
|
if (nredo > 1) {
|
494
|
-
if (
|
497
|
+
if ((lower_is_better && obj < best_obj) ||
|
498
|
+
(!lower_is_better && obj > best_obj)) {
|
495
499
|
if (verbose) {
|
496
500
|
printf ("Objective improved: keep new clusters\n");
|
497
501
|
}
|
498
502
|
best_centroids = centroids;
|
499
|
-
|
500
|
-
|
503
|
+
best_iteration_stats = iteration_stats;
|
504
|
+
best_obj = obj;
|
501
505
|
}
|
502
506
|
index.reset ();
|
503
507
|
}
|
504
508
|
}
|
505
509
|
if (nredo > 1) {
|
506
510
|
centroids = best_centroids;
|
507
|
-
iteration_stats =
|
511
|
+
iteration_stats = best_iteration_stats;
|
508
512
|
index.reset();
|
509
513
|
index.add(k, best_centroids.data());
|
510
514
|
}
|
@@ -34,11 +34,10 @@ void Index::range_search (idx_t , const float *, float,
|
|
34
34
|
FAISS_THROW_MSG ("range search not implemented");
|
35
35
|
}
|
36
36
|
|
37
|
-
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k)
|
37
|
+
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k) const
|
38
38
|
{
|
39
|
-
float
|
40
|
-
|
41
|
-
search (n, x, k, distances, labels);
|
39
|
+
std::vector<float> distances(n * k);
|
40
|
+
search (n, x, k, distances.data(), labels);
|
42
41
|
}
|
43
42
|
|
44
43
|
void Index::add_with_ids(
|
data/vendor/faiss/faiss/Index.h
CHANGED
@@ -17,8 +17,8 @@
|
|
17
17
|
#include <sstream>
|
18
18
|
|
19
19
|
#define FAISS_VERSION_MAJOR 1
|
20
|
-
#define FAISS_VERSION_MINOR
|
21
|
-
#define FAISS_VERSION_PATCH
|
20
|
+
#define FAISS_VERSION_MINOR 7
|
21
|
+
#define FAISS_VERSION_PATCH 0
|
22
22
|
|
23
23
|
/**
|
24
24
|
* @namespace faiss
|
@@ -133,7 +133,7 @@ struct Index {
|
|
133
133
|
* @param x input vectors to search, size n * d
|
134
134
|
* @param labels output labels of the NNs, size n*k
|
135
135
|
*/
|
136
|
-
void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
|
136
|
+
virtual void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1) const;
|
137
137
|
|
138
138
|
/// removes all elements from the database.
|
139
139
|
virtual void reset() = 0;
|
@@ -26,10 +26,9 @@ void IndexBinary::range_search(idx_t, const uint8_t *, int,
|
|
26
26
|
FAISS_THROW_MSG("range search not implemented");
|
27
27
|
}
|
28
28
|
|
29
|
-
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
|
30
|
-
int
|
31
|
-
|
32
|
-
search(n, x, k, distances, labels);
|
29
|
+
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) const {
|
30
|
+
std::vector<int> distances(n * k);
|
31
|
+
search(n, x, k, distances.data(), labels);
|
33
32
|
}
|
34
33
|
|
35
34
|
void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
|
@@ -120,7 +120,7 @@ struct IndexBinary {
|
|
120
120
|
* @param x input vectors to search, size n * d / 8
|
121
121
|
* @param labels output labels of the NNs, size n*k
|
122
122
|
*/
|
123
|
-
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
|
123
|
+
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1) const;
|
124
124
|
|
125
125
|
/// Removes all elements from the database.
|
126
126
|
virtual void reset() = 0;
|
@@ -18,16 +18,7 @@
|
|
18
18
|
|
19
19
|
#include <faiss/impl/AuxIndexStructures.h>
|
20
20
|
#include <faiss/impl/FaissAssert.h>
|
21
|
-
|
22
|
-
#ifdef _MSC_VER
|
23
|
-
#include <intrin.h>
|
24
|
-
|
25
|
-
static inline int __builtin_ctzll(uint64_t x) {
|
26
|
-
unsigned long ret;
|
27
|
-
_BitScanForward64(&ret, x);
|
28
|
-
return (int)ret;
|
29
|
-
}
|
30
|
-
#endif // _MSC_VER
|
21
|
+
#include <faiss/impl/platform_macros.h>
|
31
22
|
|
32
23
|
namespace faiss {
|
33
24
|
|
@@ -145,8 +136,7 @@ struct KnnSearchResults {
|
|
145
136
|
|
146
137
|
inline void add (float dis, idx_t id) {
|
147
138
|
if (dis < heap_sim[0]) {
|
148
|
-
|
149
|
-
heap_push<C> (k, heap_sim, heap_ids, dis, id);
|
139
|
+
heap_replace_top<C> (k, heap_sim, heap_ids, dis, id);
|
150
140
|
}
|
151
141
|
}
|
152
142
|
|
@@ -319,9 +319,8 @@ struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
|
319
319
|
for (size_t j = 0; j < n; j++) {
|
320
320
|
uint32_t dis = hc.hamming (codes);
|
321
321
|
if (dis < simi[0]) {
|
322
|
-
heap_pop<C> (k, simi, idxi);
|
323
322
|
idx_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
324
|
-
|
323
|
+
heap_replace_top<C> (k, simi, idxi, dis, id);
|
325
324
|
nup++;
|
326
325
|
}
|
327
326
|
codes += code_size;
|
@@ -226,155 +226,7 @@ void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
|
|
226
226
|
|
227
227
|
|
228
228
|
|
229
|
-
/***************************************************
|
230
|
-
* IndexFlatL2BaseShift
|
231
|
-
***************************************************/
|
232
|
-
|
233
|
-
IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
|
234
|
-
IndexFlatL2 (d), shift (nshift)
|
235
|
-
{
|
236
|
-
memcpy (this->shift.data(), shift, sizeof(float) * nshift);
|
237
|
-
}
|
238
229
|
|
239
|
-
void IndexFlatL2BaseShift::search (
|
240
|
-
idx_t n,
|
241
|
-
const float *x,
|
242
|
-
idx_t k,
|
243
|
-
float *distances,
|
244
|
-
idx_t *labels) const
|
245
|
-
{
|
246
|
-
FAISS_THROW_IF_NOT (shift.size() == ntotal);
|
247
|
-
|
248
|
-
float_maxheap_array_t res = {
|
249
|
-
size_t(n), size_t(k), labels, distances};
|
250
|
-
knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
|
251
|
-
}
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
/***************************************************
|
256
|
-
* IndexRefineFlat
|
257
|
-
***************************************************/
|
258
|
-
|
259
|
-
IndexRefineFlat::IndexRefineFlat (Index *base_index):
|
260
|
-
Index (base_index->d, base_index->metric_type),
|
261
|
-
refine_index (base_index->d, base_index->metric_type),
|
262
|
-
base_index (base_index), own_fields (false),
|
263
|
-
k_factor (1)
|
264
|
-
{
|
265
|
-
is_trained = base_index->is_trained;
|
266
|
-
FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
|
267
|
-
"base_index should be empty in the beginning");
|
268
|
-
}
|
269
|
-
|
270
|
-
IndexRefineFlat::IndexRefineFlat () {
|
271
|
-
base_index = nullptr;
|
272
|
-
own_fields = false;
|
273
|
-
k_factor = 1;
|
274
|
-
}
|
275
|
-
|
276
|
-
|
277
|
-
void IndexRefineFlat::train (idx_t n, const float *x)
|
278
|
-
{
|
279
|
-
base_index->train (n, x);
|
280
|
-
is_trained = true;
|
281
|
-
}
|
282
|
-
|
283
|
-
void IndexRefineFlat::add (idx_t n, const float *x) {
|
284
|
-
FAISS_THROW_IF_NOT (is_trained);
|
285
|
-
base_index->add (n, x);
|
286
|
-
refine_index.add (n, x);
|
287
|
-
ntotal = refine_index.ntotal;
|
288
|
-
}
|
289
|
-
|
290
|
-
void IndexRefineFlat::reset ()
|
291
|
-
{
|
292
|
-
base_index->reset ();
|
293
|
-
refine_index.reset ();
|
294
|
-
ntotal = 0;
|
295
|
-
}
|
296
|
-
|
297
|
-
namespace {
|
298
|
-
typedef faiss::Index::idx_t idx_t;
|
299
|
-
|
300
|
-
template<class C>
|
301
|
-
static void reorder_2_heaps (
|
302
|
-
idx_t n,
|
303
|
-
idx_t k, idx_t *labels, float *distances,
|
304
|
-
idx_t k_base, const idx_t *base_labels, const float *base_distances)
|
305
|
-
{
|
306
|
-
#pragma omp parallel for
|
307
|
-
for (idx_t i = 0; i < n; i++) {
|
308
|
-
idx_t *idxo = labels + i * k;
|
309
|
-
float *diso = distances + i * k;
|
310
|
-
const idx_t *idxi = base_labels + i * k_base;
|
311
|
-
const float *disi = base_distances + i * k_base;
|
312
|
-
|
313
|
-
heap_heapify<C> (k, diso, idxo, disi, idxi, k);
|
314
|
-
if (k_base != k) { // add remaining elements
|
315
|
-
heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
|
316
|
-
}
|
317
|
-
heap_reorder<C> (k, diso, idxo);
|
318
|
-
}
|
319
|
-
}
|
320
|
-
|
321
|
-
|
322
|
-
}
|
323
|
-
|
324
|
-
|
325
|
-
void IndexRefineFlat::search (
|
326
|
-
idx_t n, const float *x, idx_t k,
|
327
|
-
float *distances, idx_t *labels) const
|
328
|
-
{
|
329
|
-
FAISS_THROW_IF_NOT (is_trained);
|
330
|
-
idx_t k_base = idx_t (k * k_factor);
|
331
|
-
idx_t * base_labels = labels;
|
332
|
-
float * base_distances = distances;
|
333
|
-
ScopeDeleter<idx_t> del1;
|
334
|
-
ScopeDeleter<float> del2;
|
335
|
-
|
336
|
-
|
337
|
-
if (k != k_base) {
|
338
|
-
base_labels = new idx_t [n * k_base];
|
339
|
-
del1.set (base_labels);
|
340
|
-
base_distances = new float [n * k_base];
|
341
|
-
del2.set (base_distances);
|
342
|
-
}
|
343
|
-
|
344
|
-
base_index->search (n, x, k_base, base_distances, base_labels);
|
345
|
-
|
346
|
-
for (int i = 0; i < n * k_base; i++)
|
347
|
-
assert (base_labels[i] >= -1 &&
|
348
|
-
base_labels[i] < ntotal);
|
349
|
-
|
350
|
-
// compute refined distances
|
351
|
-
refine_index.compute_distance_subset (
|
352
|
-
n, x, k_base, base_distances, base_labels);
|
353
|
-
|
354
|
-
// sort and store result
|
355
|
-
if (metric_type == METRIC_L2) {
|
356
|
-
typedef CMax <float, idx_t> C;
|
357
|
-
reorder_2_heaps<C> (
|
358
|
-
n, k, labels, distances,
|
359
|
-
k_base, base_labels, base_distances);
|
360
|
-
|
361
|
-
} else if (metric_type == METRIC_INNER_PRODUCT) {
|
362
|
-
typedef CMin <float, idx_t> C;
|
363
|
-
reorder_2_heaps<C> (
|
364
|
-
n, k, labels, distances,
|
365
|
-
k_base, base_labels, base_distances);
|
366
|
-
} else {
|
367
|
-
FAISS_THROW_MSG("Metric type not supported");
|
368
|
-
}
|
369
|
-
|
370
|
-
}
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
IndexRefineFlat::~IndexRefineFlat ()
|
375
|
-
{
|
376
|
-
if (own_fields) delete base_index;
|
377
|
-
}
|
378
230
|
|
379
231
|
/***************************************************
|
380
232
|
* IndexFlat1D
|