faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
#include <faiss/impl/HNSW.h>
|
|
13
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
14
|
+
#include <faiss/utils/utils.h>
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
/** The HNSW index is a normal random-access index with a HNSW
|
|
21
|
+
* link structure built on top */
|
|
22
|
+
|
|
23
|
+
struct IndexBinaryHNSW : IndexBinary {
|
|
24
|
+
typedef HNSW::storage_idx_t storage_idx_t;
|
|
25
|
+
|
|
26
|
+
// the link strcuture
|
|
27
|
+
HNSW hnsw;
|
|
28
|
+
|
|
29
|
+
// the sequential storage
|
|
30
|
+
bool own_fields;
|
|
31
|
+
IndexBinary *storage;
|
|
32
|
+
|
|
33
|
+
explicit IndexBinaryHNSW();
|
|
34
|
+
explicit IndexBinaryHNSW(int d, int M = 32);
|
|
35
|
+
explicit IndexBinaryHNSW(IndexBinary *storage, int M = 32);
|
|
36
|
+
|
|
37
|
+
~IndexBinaryHNSW() override;
|
|
38
|
+
|
|
39
|
+
DistanceComputer *get_distance_computer() const;
|
|
40
|
+
|
|
41
|
+
void add(idx_t n, const uint8_t *x) override;
|
|
42
|
+
|
|
43
|
+
/// Trains the storage if needed
|
|
44
|
+
void train(idx_t n, const uint8_t* x) override;
|
|
45
|
+
|
|
46
|
+
/// entry point for search
|
|
47
|
+
void search(idx_t n, const uint8_t *x, idx_t k,
|
|
48
|
+
int32_t *distances, idx_t *labels) const override;
|
|
49
|
+
|
|
50
|
+
void reconstruct(idx_t key, uint8_t* recons) const override;
|
|
51
|
+
|
|
52
|
+
void reset() override;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
} // namespace faiss
|
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Copyright 2004-present Facebook. All Rights Reserved
|
|
9
|
+
// -*- c++ -*-
|
|
10
|
+
|
|
11
|
+
#include <faiss/IndexBinaryIVF.h>
|
|
12
|
+
|
|
13
|
+
#include <cstdio>
|
|
14
|
+
#include <memory>
|
|
15
|
+
|
|
16
|
+
#include <faiss/utils/hamming.h>
|
|
17
|
+
#include <faiss/utils/utils.h>
|
|
18
|
+
|
|
19
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
20
|
+
#include <faiss/impl/FaissAssert.h>
|
|
21
|
+
#include <faiss/IndexFlat.h>
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
namespace faiss {
|
|
25
|
+
|
|
26
|
+
IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
|
|
27
|
+
: IndexBinary(d),
|
|
28
|
+
invlists(new ArrayInvertedLists(nlist, code_size)),
|
|
29
|
+
own_invlists(true),
|
|
30
|
+
nprobe(1),
|
|
31
|
+
max_codes(0),
|
|
32
|
+
maintain_direct_map(false),
|
|
33
|
+
quantizer(quantizer),
|
|
34
|
+
nlist(nlist),
|
|
35
|
+
own_fields(false),
|
|
36
|
+
clustering_index(nullptr)
|
|
37
|
+
{
|
|
38
|
+
FAISS_THROW_IF_NOT (d == quantizer->d);
|
|
39
|
+
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
|
|
40
|
+
|
|
41
|
+
cp.niter = 10;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
IndexBinaryIVF::IndexBinaryIVF()
|
|
45
|
+
: invlists(nullptr),
|
|
46
|
+
own_invlists(false),
|
|
47
|
+
nprobe(1),
|
|
48
|
+
max_codes(0),
|
|
49
|
+
maintain_direct_map(false),
|
|
50
|
+
quantizer(nullptr),
|
|
51
|
+
nlist(0),
|
|
52
|
+
own_fields(false),
|
|
53
|
+
clustering_index(nullptr)
|
|
54
|
+
{}
|
|
55
|
+
|
|
56
|
+
void IndexBinaryIVF::add(idx_t n, const uint8_t *x) {
|
|
57
|
+
add_with_ids(n, x, nullptr);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
void IndexBinaryIVF::add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) {
|
|
61
|
+
add_core(n, x, xids, nullptr);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const idx_t *xids,
|
|
65
|
+
const idx_t *precomputed_idx) {
|
|
66
|
+
FAISS_THROW_IF_NOT(is_trained);
|
|
67
|
+
assert(invlists);
|
|
68
|
+
FAISS_THROW_IF_NOT_MSG(!(maintain_direct_map && xids),
|
|
69
|
+
"cannot have direct map and add with ids");
|
|
70
|
+
|
|
71
|
+
const idx_t * idx;
|
|
72
|
+
|
|
73
|
+
std::unique_ptr<idx_t[]> scoped_idx;
|
|
74
|
+
|
|
75
|
+
if (precomputed_idx) {
|
|
76
|
+
idx = precomputed_idx;
|
|
77
|
+
} else {
|
|
78
|
+
scoped_idx.reset(new idx_t[n]);
|
|
79
|
+
quantizer->assign(n, x, scoped_idx.get());
|
|
80
|
+
idx = scoped_idx.get();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
long n_add = 0;
|
|
84
|
+
for (size_t i = 0; i < n; i++) {
|
|
85
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
|
86
|
+
idx_t list_no = idx[i];
|
|
87
|
+
|
|
88
|
+
if (list_no < 0)
|
|
89
|
+
continue;
|
|
90
|
+
const uint8_t *xi = x + i * code_size;
|
|
91
|
+
size_t offset = invlists->add_entry(list_no, id, xi);
|
|
92
|
+
|
|
93
|
+
if (maintain_direct_map)
|
|
94
|
+
direct_map.push_back(list_no << 32 | offset);
|
|
95
|
+
n_add++;
|
|
96
|
+
}
|
|
97
|
+
if (verbose) {
|
|
98
|
+
printf("IndexBinaryIVF::add_with_ids: added %ld / %ld vectors\n",
|
|
99
|
+
n_add, n);
|
|
100
|
+
}
|
|
101
|
+
ntotal += n_add;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
void IndexBinaryIVF::make_direct_map(bool new_maintain_direct_map) {
|
|
105
|
+
// nothing to do
|
|
106
|
+
if (new_maintain_direct_map == maintain_direct_map)
|
|
107
|
+
return;
|
|
108
|
+
|
|
109
|
+
if (new_maintain_direct_map) {
|
|
110
|
+
direct_map.resize(ntotal, -1);
|
|
111
|
+
for (size_t key = 0; key < nlist; key++) {
|
|
112
|
+
size_t list_size = invlists->list_size(key);
|
|
113
|
+
const idx_t *idlist = invlists->get_ids(key);
|
|
114
|
+
|
|
115
|
+
for (size_t ofs = 0; ofs < list_size; ofs++) {
|
|
116
|
+
FAISS_THROW_IF_NOT_MSG(0 <= idlist[ofs] && idlist[ofs] < ntotal,
|
|
117
|
+
"direct map supported only for seuquential ids");
|
|
118
|
+
direct_map[idlist[ofs]] = key << 32 | ofs;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
direct_map.clear();
|
|
123
|
+
}
|
|
124
|
+
maintain_direct_map = new_maintain_direct_map;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
|
|
128
|
+
int32_t *distances, idx_t *labels) const {
|
|
129
|
+
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
|
130
|
+
std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
|
|
131
|
+
|
|
132
|
+
double t0 = getmillisecs();
|
|
133
|
+
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
|
|
134
|
+
indexIVF_stats.quantization_time += getmillisecs() - t0;
|
|
135
|
+
|
|
136
|
+
t0 = getmillisecs();
|
|
137
|
+
invlists->prefetch_lists(idx.get(), n * nprobe);
|
|
138
|
+
|
|
139
|
+
search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
|
|
140
|
+
distances, labels, false);
|
|
141
|
+
indexIVF_stats.search_time += getmillisecs() - t0;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {
|
|
145
|
+
FAISS_THROW_IF_NOT_MSG(direct_map.size() == ntotal,
|
|
146
|
+
"direct map is not initialized");
|
|
147
|
+
idx_t list_no = direct_map[key] >> 32;
|
|
148
|
+
idx_t offset = direct_map[key] & 0xffffffff;
|
|
149
|
+
reconstruct_from_offset(list_no, offset, recons);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
|
|
153
|
+
FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
|
|
154
|
+
|
|
155
|
+
for (idx_t list_no = 0; list_no < nlist; list_no++) {
|
|
156
|
+
size_t list_size = invlists->list_size(list_no);
|
|
157
|
+
const Index::idx_t *idlist = invlists->get_ids(list_no);
|
|
158
|
+
|
|
159
|
+
for (idx_t offset = 0; offset < list_size; offset++) {
|
|
160
|
+
idx_t id = idlist[offset];
|
|
161
|
+
if (!(id >= i0 && id < i0 + ni)) {
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
uint8_t *reconstructed = recons + (id - i0) * d;
|
|
166
|
+
reconstruct_from_offset(list_no, offset, reconstructed);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
void IndexBinaryIVF::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
|
|
172
|
+
int32_t *distances, idx_t *labels,
|
|
173
|
+
uint8_t *recons) const {
|
|
174
|
+
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
|
175
|
+
std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
|
|
176
|
+
|
|
177
|
+
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
|
|
178
|
+
|
|
179
|
+
invlists->prefetch_lists(idx.get(), n * nprobe);
|
|
180
|
+
|
|
181
|
+
// search_preassigned() with `store_pairs` enabled to obtain the list_no
|
|
182
|
+
// and offset into `codes` for reconstruction
|
|
183
|
+
search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
|
|
184
|
+
distances, labels, /* store_pairs */true);
|
|
185
|
+
for (idx_t i = 0; i < n; ++i) {
|
|
186
|
+
for (idx_t j = 0; j < k; ++j) {
|
|
187
|
+
idx_t ij = i * k + j;
|
|
188
|
+
idx_t key = labels[ij];
|
|
189
|
+
uint8_t *reconstructed = recons + ij * d;
|
|
190
|
+
if (key < 0) {
|
|
191
|
+
// Fill with NaNs
|
|
192
|
+
memset(reconstructed, -1, sizeof(*reconstructed) * d);
|
|
193
|
+
} else {
|
|
194
|
+
int list_no = key >> 32;
|
|
195
|
+
int offset = key & 0xffffffff;
|
|
196
|
+
|
|
197
|
+
// Update label to the actual id
|
|
198
|
+
labels[ij] = invlists->get_single_id(list_no, offset);
|
|
199
|
+
|
|
200
|
+
reconstruct_from_offset(list_no, offset, reconstructed);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
void IndexBinaryIVF::reconstruct_from_offset(idx_t list_no, idx_t offset,
|
|
207
|
+
uint8_t *recons) const {
|
|
208
|
+
memcpy(recons, invlists->get_single_code(list_no, offset), code_size);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
void IndexBinaryIVF::reset() {
|
|
212
|
+
direct_map.clear();
|
|
213
|
+
invlists->reset();
|
|
214
|
+
ntotal = 0;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
size_t IndexBinaryIVF::remove_ids(const IDSelector& sel) {
|
|
218
|
+
FAISS_THROW_IF_NOT_MSG(!maintain_direct_map,
|
|
219
|
+
"direct map remove not implemented");
|
|
220
|
+
|
|
221
|
+
std::vector<idx_t> toremove(nlist);
|
|
222
|
+
|
|
223
|
+
#pragma omp parallel for
|
|
224
|
+
for (idx_t i = 0; i < nlist; i++) {
|
|
225
|
+
idx_t l0 = invlists->list_size (i), l = l0, j = 0;
|
|
226
|
+
const idx_t *idsi = invlists->get_ids(i);
|
|
227
|
+
while (j < l) {
|
|
228
|
+
if (sel.is_member(idsi[j])) {
|
|
229
|
+
l--;
|
|
230
|
+
invlists->update_entry(
|
|
231
|
+
i, j,
|
|
232
|
+
invlists->get_single_id(i, l),
|
|
233
|
+
invlists->get_single_code(i, l));
|
|
234
|
+
} else {
|
|
235
|
+
j++;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
toremove[i] = l0 - l;
|
|
239
|
+
}
|
|
240
|
+
// this will not run well in parallel on ondisk because of possible shrinks
|
|
241
|
+
size_t nremove = 0;
|
|
242
|
+
for (idx_t i = 0; i < nlist; i++) {
|
|
243
|
+
if (toremove[i] > 0) {
|
|
244
|
+
nremove += toremove[i];
|
|
245
|
+
invlists->resize(
|
|
246
|
+
i, invlists->list_size(i) - toremove[i]);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
ntotal -= nremove;
|
|
250
|
+
return nremove;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
|
|
254
|
+
if (verbose) {
|
|
255
|
+
printf("Training quantizer\n");
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
|
|
259
|
+
if (verbose) {
|
|
260
|
+
printf("IVF quantizer does not need training.\n");
|
|
261
|
+
}
|
|
262
|
+
} else {
|
|
263
|
+
if (verbose) {
|
|
264
|
+
printf("Training quantizer on %ld vectors in %dD\n", n, d);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
Clustering clus(d, nlist, cp);
|
|
268
|
+
quantizer->reset();
|
|
269
|
+
|
|
270
|
+
std::unique_ptr<float[]> x_f(new float[n * d]);
|
|
271
|
+
binary_to_real(n * d, x, x_f.get());
|
|
272
|
+
|
|
273
|
+
IndexFlatL2 index_tmp(d);
|
|
274
|
+
|
|
275
|
+
if (clustering_index && verbose) {
|
|
276
|
+
printf("using clustering_index of dimension %d to do the clustering\n",
|
|
277
|
+
clustering_index->d);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
|
|
281
|
+
|
|
282
|
+
std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
|
|
283
|
+
real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
|
|
284
|
+
|
|
285
|
+
quantizer->add(clus.k, x_b.get());
|
|
286
|
+
quantizer->is_trained = true;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
is_trained = true;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) {
|
|
293
|
+
// minimal sanity checks
|
|
294
|
+
FAISS_THROW_IF_NOT(other.d == d);
|
|
295
|
+
FAISS_THROW_IF_NOT(other.nlist == nlist);
|
|
296
|
+
FAISS_THROW_IF_NOT(other.code_size == code_size);
|
|
297
|
+
FAISS_THROW_IF_NOT_MSG((!maintain_direct_map &&
|
|
298
|
+
!other.maintain_direct_map),
|
|
299
|
+
"direct map copy not implemented");
|
|
300
|
+
FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
|
|
301
|
+
"can only merge indexes of the same type");
|
|
302
|
+
|
|
303
|
+
invlists->merge_from (other.invlists, add_id);
|
|
304
|
+
|
|
305
|
+
ntotal += other.ntotal;
|
|
306
|
+
other.ntotal = 0;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
|
|
310
|
+
FAISS_THROW_IF_NOT(il->nlist == nlist &&
|
|
311
|
+
il->code_size == code_size);
|
|
312
|
+
if (own_invlists) {
|
|
313
|
+
delete invlists;
|
|
314
|
+
}
|
|
315
|
+
invlists = il;
|
|
316
|
+
own_invlists = own;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
namespace {
|
|
321
|
+
|
|
322
|
+
using idx_t = Index::idx_t;
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
template<class HammingComputer, bool store_pairs>
|
|
326
|
+
struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
|
327
|
+
|
|
328
|
+
HammingComputer hc;
|
|
329
|
+
size_t code_size;
|
|
330
|
+
|
|
331
|
+
IVFBinaryScannerL2 (size_t code_size): code_size (code_size)
|
|
332
|
+
{}
|
|
333
|
+
|
|
334
|
+
void set_query (const uint8_t *query_vector) override {
|
|
335
|
+
hc.set (query_vector, code_size);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
idx_t list_no;
|
|
339
|
+
void set_list (idx_t list_no, uint8_t /* coarse_dis */) override {
|
|
340
|
+
this->list_no = list_no;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
uint32_t distance_to_code (const uint8_t *code) const override {
|
|
344
|
+
return hc.hamming (code);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
size_t scan_codes (size_t n,
|
|
348
|
+
const uint8_t *codes,
|
|
349
|
+
const idx_t *ids,
|
|
350
|
+
int32_t *simi, idx_t *idxi,
|
|
351
|
+
size_t k) const override
|
|
352
|
+
{
|
|
353
|
+
using C = CMax<int32_t, idx_t>;
|
|
354
|
+
|
|
355
|
+
size_t nup = 0;
|
|
356
|
+
for (size_t j = 0; j < n; j++) {
|
|
357
|
+
uint32_t dis = hc.hamming (codes);
|
|
358
|
+
if (dis < simi[0]) {
|
|
359
|
+
heap_pop<C> (k, simi, idxi);
|
|
360
|
+
idx_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
|
361
|
+
heap_push<C> (k, simi, idxi, dis, id);
|
|
362
|
+
nup++;
|
|
363
|
+
}
|
|
364
|
+
codes += code_size;
|
|
365
|
+
}
|
|
366
|
+
return nup;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
template <bool store_pairs>
|
|
374
|
+
BinaryInvertedListScanner *select_IVFBinaryScannerL2 (size_t code_size) {
|
|
375
|
+
|
|
376
|
+
switch (code_size) {
|
|
377
|
+
#define HANDLE_CS(cs) \
|
|
378
|
+
case cs: \
|
|
379
|
+
return new IVFBinaryScannerL2<HammingComputer ## cs, store_pairs> (cs);
|
|
380
|
+
HANDLE_CS(4);
|
|
381
|
+
HANDLE_CS(8);
|
|
382
|
+
HANDLE_CS(16);
|
|
383
|
+
HANDLE_CS(20);
|
|
384
|
+
HANDLE_CS(32);
|
|
385
|
+
HANDLE_CS(64);
|
|
386
|
+
#undef HANDLE_CS
|
|
387
|
+
default:
|
|
388
|
+
if (code_size % 8 == 0) {
|
|
389
|
+
return new IVFBinaryScannerL2<HammingComputerM8,
|
|
390
|
+
store_pairs> (code_size);
|
|
391
|
+
} else if (code_size % 4 == 0) {
|
|
392
|
+
return new IVFBinaryScannerL2<HammingComputerM4,
|
|
393
|
+
store_pairs> (code_size);
|
|
394
|
+
} else {
|
|
395
|
+
return new IVFBinaryScannerL2<HammingComputerDefault,
|
|
396
|
+
store_pairs> (code_size);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
|
|
403
|
+
size_t n,
|
|
404
|
+
const uint8_t *x,
|
|
405
|
+
idx_t k,
|
|
406
|
+
const idx_t *keys,
|
|
407
|
+
const int32_t * coarse_dis,
|
|
408
|
+
int32_t *distances, idx_t *labels,
|
|
409
|
+
bool store_pairs,
|
|
410
|
+
const IVFSearchParameters *params)
|
|
411
|
+
{
|
|
412
|
+
long nprobe = params ? params->nprobe : ivf.nprobe;
|
|
413
|
+
long max_codes = params ? params->max_codes : ivf.max_codes;
|
|
414
|
+
MetricType metric_type = ivf.metric_type;
|
|
415
|
+
|
|
416
|
+
// almost verbatim copy from IndexIVF::search_preassigned
|
|
417
|
+
|
|
418
|
+
size_t nlistv = 0, ndis = 0, nheap = 0;
|
|
419
|
+
using HeapForIP = CMin<int32_t, idx_t>;
|
|
420
|
+
using HeapForL2 = CMax<int32_t, idx_t>;
|
|
421
|
+
|
|
422
|
+
#pragma omp parallel if(n > 1) reduction(+: nlistv, ndis, nheap)
|
|
423
|
+
{
|
|
424
|
+
std::unique_ptr<BinaryInvertedListScanner> scanner
|
|
425
|
+
(ivf.get_InvertedListScanner (store_pairs));
|
|
426
|
+
|
|
427
|
+
#pragma omp for
|
|
428
|
+
for (size_t i = 0; i < n; i++) {
|
|
429
|
+
const uint8_t *xi = x + i * ivf.code_size;
|
|
430
|
+
scanner->set_query(xi);
|
|
431
|
+
|
|
432
|
+
const idx_t * keysi = keys + i * nprobe;
|
|
433
|
+
int32_t * simi = distances + k * i;
|
|
434
|
+
idx_t * idxi = labels + k * i;
|
|
435
|
+
|
|
436
|
+
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
437
|
+
heap_heapify<HeapForIP> (k, simi, idxi);
|
|
438
|
+
} else {
|
|
439
|
+
heap_heapify<HeapForL2> (k, simi, idxi);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
size_t nscan = 0;
|
|
443
|
+
|
|
444
|
+
for (size_t ik = 0; ik < nprobe; ik++) {
|
|
445
|
+
idx_t key = keysi[ik]; /* select the list */
|
|
446
|
+
if (key < 0) {
|
|
447
|
+
// not enough centroids for multiprobe
|
|
448
|
+
continue;
|
|
449
|
+
}
|
|
450
|
+
FAISS_THROW_IF_NOT_FMT
|
|
451
|
+
(key < (idx_t) ivf.nlist,
|
|
452
|
+
"Invalid key=%ld at ik=%ld nlist=%ld\n",
|
|
453
|
+
key, ik, ivf.nlist);
|
|
454
|
+
|
|
455
|
+
scanner->set_list (key, coarse_dis[i * nprobe + ik]);
|
|
456
|
+
|
|
457
|
+
nlistv++;
|
|
458
|
+
|
|
459
|
+
size_t list_size = ivf.invlists->list_size(key);
|
|
460
|
+
InvertedLists::ScopedCodes scodes (ivf.invlists, key);
|
|
461
|
+
std::unique_ptr<InvertedLists::ScopedIds> sids;
|
|
462
|
+
const Index::idx_t * ids = nullptr;
|
|
463
|
+
|
|
464
|
+
if (!store_pairs) {
|
|
465
|
+
sids.reset (new InvertedLists::ScopedIds (ivf.invlists, key));
|
|
466
|
+
ids = sids->get();
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
nheap += scanner->scan_codes (list_size, scodes.get(),
|
|
470
|
+
ids, simi, idxi, k);
|
|
471
|
+
|
|
472
|
+
nscan += list_size;
|
|
473
|
+
if (max_codes && nscan >= max_codes)
|
|
474
|
+
break;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
ndis += nscan;
|
|
478
|
+
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
479
|
+
heap_reorder<HeapForIP> (k, simi, idxi);
|
|
480
|
+
} else {
|
|
481
|
+
heap_reorder<HeapForL2> (k, simi, idxi);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
} // parallel for
|
|
485
|
+
} // parallel
|
|
486
|
+
|
|
487
|
+
indexIVF_stats.nq += n;
|
|
488
|
+
indexIVF_stats.nlist += nlistv;
|
|
489
|
+
indexIVF_stats.ndis += ndis;
|
|
490
|
+
indexIVF_stats.nheap_updates += nheap;
|
|
491
|
+
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
template<class HammingComputer, bool store_pairs>
|
|
495
|
+
void search_knn_hamming_count(const IndexBinaryIVF& ivf,
|
|
496
|
+
size_t nx,
|
|
497
|
+
const uint8_t *x,
|
|
498
|
+
const idx_t *keys,
|
|
499
|
+
int k,
|
|
500
|
+
int32_t *distances,
|
|
501
|
+
idx_t *labels,
|
|
502
|
+
const IVFSearchParameters *params) {
|
|
503
|
+
const int nBuckets = ivf.d + 1;
|
|
504
|
+
std::vector<int> all_counters(nx * nBuckets, 0);
|
|
505
|
+
std::unique_ptr<idx_t[]> all_ids_per_dis(new idx_t[nx * nBuckets * k]);
|
|
506
|
+
|
|
507
|
+
long nprobe = params ? params->nprobe : ivf.nprobe;
|
|
508
|
+
long max_codes = params ? params->max_codes : ivf.max_codes;
|
|
509
|
+
|
|
510
|
+
std::vector<HCounterState<HammingComputer>> cs;
|
|
511
|
+
for (size_t i = 0; i < nx; ++i) {
|
|
512
|
+
cs.push_back(HCounterState<HammingComputer>(
|
|
513
|
+
all_counters.data() + i * nBuckets,
|
|
514
|
+
all_ids_per_dis.get() + i * nBuckets * k,
|
|
515
|
+
x + i * ivf.code_size,
|
|
516
|
+
ivf.d,
|
|
517
|
+
k
|
|
518
|
+
));
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
size_t nlistv = 0, ndis = 0;
|
|
522
|
+
|
|
523
|
+
#pragma omp parallel for reduction(+: nlistv, ndis)
|
|
524
|
+
for (size_t i = 0; i < nx; i++) {
|
|
525
|
+
const idx_t * keysi = keys + i * nprobe;
|
|
526
|
+
HCounterState<HammingComputer>& csi = cs[i];
|
|
527
|
+
|
|
528
|
+
size_t nscan = 0;
|
|
529
|
+
|
|
530
|
+
for (size_t ik = 0; ik < nprobe; ik++) {
|
|
531
|
+
idx_t key = keysi[ik]; /* select the list */
|
|
532
|
+
if (key < 0) {
|
|
533
|
+
// not enough centroids for multiprobe
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
FAISS_THROW_IF_NOT_FMT (
|
|
537
|
+
key < (idx_t) ivf.nlist,
|
|
538
|
+
"Invalid key=%ld at ik=%ld nlist=%ld\n",
|
|
539
|
+
key, ik, ivf.nlist);
|
|
540
|
+
|
|
541
|
+
nlistv++;
|
|
542
|
+
size_t list_size = ivf.invlists->list_size(key);
|
|
543
|
+
InvertedLists::ScopedCodes scodes (ivf.invlists, key);
|
|
544
|
+
const uint8_t *list_vecs = scodes.get();
|
|
545
|
+
const Index::idx_t *ids = store_pairs
|
|
546
|
+
? nullptr
|
|
547
|
+
: ivf.invlists->get_ids(key);
|
|
548
|
+
|
|
549
|
+
for (size_t j = 0; j < list_size; j++) {
|
|
550
|
+
const uint8_t * yj = list_vecs + ivf.code_size * j;
|
|
551
|
+
|
|
552
|
+
idx_t id = store_pairs ? (key << 32 | j) : ids[j];
|
|
553
|
+
csi.update_counter(yj, id);
|
|
554
|
+
}
|
|
555
|
+
if (ids)
|
|
556
|
+
ivf.invlists->release_ids (key, ids);
|
|
557
|
+
|
|
558
|
+
nscan += list_size;
|
|
559
|
+
if (max_codes && nscan >= max_codes)
|
|
560
|
+
break;
|
|
561
|
+
}
|
|
562
|
+
ndis += nscan;
|
|
563
|
+
|
|
564
|
+
int nres = 0;
|
|
565
|
+
for (int b = 0; b < nBuckets && nres < k; b++) {
|
|
566
|
+
for (int l = 0; l < csi.counters[b] && nres < k; l++) {
|
|
567
|
+
labels[i * k + nres] = csi.ids_per_dis[b * k + l];
|
|
568
|
+
distances[i * k + nres] = b;
|
|
569
|
+
nres++;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
while (nres < k) {
|
|
573
|
+
labels[i * k + nres] = -1;
|
|
574
|
+
distances[i * k + nres] = std::numeric_limits<int32_t>::max();
|
|
575
|
+
++nres;
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
indexIVF_stats.nq += nx;
|
|
580
|
+
indexIVF_stats.nlist += nlistv;
|
|
581
|
+
indexIVF_stats.ndis += ndis;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
template<bool store_pairs>
|
|
587
|
+
void search_knn_hamming_count_1 (
|
|
588
|
+
const IndexBinaryIVF& ivf,
|
|
589
|
+
size_t nx,
|
|
590
|
+
const uint8_t *x,
|
|
591
|
+
const idx_t *keys,
|
|
592
|
+
int k,
|
|
593
|
+
int32_t *distances,
|
|
594
|
+
idx_t *labels,
|
|
595
|
+
const IVFSearchParameters *params) {
|
|
596
|
+
switch (ivf.code_size) {
|
|
597
|
+
#define HANDLE_CS(cs) \
|
|
598
|
+
case cs: \
|
|
599
|
+
search_knn_hamming_count<HammingComputer ## cs, store_pairs>( \
|
|
600
|
+
ivf, nx, x, keys, k, distances, labels, params); \
|
|
601
|
+
break;
|
|
602
|
+
HANDLE_CS(4);
|
|
603
|
+
HANDLE_CS(8);
|
|
604
|
+
HANDLE_CS(16);
|
|
605
|
+
HANDLE_CS(20);
|
|
606
|
+
HANDLE_CS(32);
|
|
607
|
+
HANDLE_CS(64);
|
|
608
|
+
#undef HANDLE_CS
|
|
609
|
+
default:
|
|
610
|
+
if (ivf.code_size % 8 == 0) {
|
|
611
|
+
search_knn_hamming_count<HammingComputerM8, store_pairs>
|
|
612
|
+
(ivf, nx, x, keys, k, distances, labels, params);
|
|
613
|
+
} else if (ivf.code_size % 4 == 0) {
|
|
614
|
+
search_knn_hamming_count<HammingComputerM4, store_pairs>
|
|
615
|
+
(ivf, nx, x, keys, k, distances, labels, params);
|
|
616
|
+
} else {
|
|
617
|
+
search_knn_hamming_count<HammingComputerDefault, store_pairs>
|
|
618
|
+
(ivf, nx, x, keys, k, distances, labels, params);
|
|
619
|
+
}
|
|
620
|
+
break;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
} // namespace
|
|
626
|
+
|
|
627
|
+
BinaryInvertedListScanner *IndexBinaryIVF::get_InvertedListScanner
|
|
628
|
+
(bool store_pairs) const
|
|
629
|
+
{
|
|
630
|
+
if (store_pairs) {
|
|
631
|
+
return select_IVFBinaryScannerL2<true> (code_size);
|
|
632
|
+
} else {
|
|
633
|
+
return select_IVFBinaryScannerL2<false> (code_size);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
|
|
638
|
+
const idx_t *idx,
|
|
639
|
+
const int32_t * coarse_dis,
|
|
640
|
+
int32_t *distances, idx_t *labels,
|
|
641
|
+
bool store_pairs,
|
|
642
|
+
const IVFSearchParameters *params
|
|
643
|
+
) const {
|
|
644
|
+
|
|
645
|
+
if (use_heap) {
|
|
646
|
+
search_knn_hamming_heap (*this, n, x, k, idx, coarse_dis,
|
|
647
|
+
distances, labels, store_pairs,
|
|
648
|
+
params);
|
|
649
|
+
} else {
|
|
650
|
+
if (store_pairs) {
|
|
651
|
+
search_knn_hamming_count_1<true>
|
|
652
|
+
(*this, n, x, idx, k, distances, labels, params);
|
|
653
|
+
} else {
|
|
654
|
+
search_knn_hamming_count_1<false>
|
|
655
|
+
(*this, n, x, idx, k, distances, labels, params);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
IndexBinaryIVF::~IndexBinaryIVF() {
|
|
661
|
+
if (own_invlists) {
|
|
662
|
+
delete invlists;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
if (own_fields) {
|
|
666
|
+
delete quantizer;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
} // namespace faiss
|