faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
#include <vector>
|
|
13
|
+
|
|
14
|
+
#include <faiss/IndexPQ.h>
|
|
15
|
+
#include <faiss/IndexIVF.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
struct IndexIVFPQ;
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
|
|
23
|
+
*
|
|
24
|
+
* The class is mainly inteded to store encoded vectors that can be
|
|
25
|
+
* accessed randomly, the search function is not implemented.
|
|
26
|
+
*/
|
|
27
|
+
struct Index2Layer: Index {
|
|
28
|
+
/// first level quantizer
|
|
29
|
+
Level1Quantizer q1;
|
|
30
|
+
|
|
31
|
+
/// second level quantizer is always a PQ
|
|
32
|
+
ProductQuantizer pq;
|
|
33
|
+
|
|
34
|
+
/// Codes. Size ntotal * code_size.
|
|
35
|
+
std::vector<uint8_t> codes;
|
|
36
|
+
|
|
37
|
+
/// size of the code for the first level (ceil(log8(q1.nlist)))
|
|
38
|
+
size_t code_size_1;
|
|
39
|
+
|
|
40
|
+
/// size of the code for the second level
|
|
41
|
+
size_t code_size_2;
|
|
42
|
+
|
|
43
|
+
/// code_size_1 + code_size_2
|
|
44
|
+
size_t code_size;
|
|
45
|
+
|
|
46
|
+
Index2Layer (Index * quantizer, size_t nlist,
|
|
47
|
+
int M, int nbit = 8,
|
|
48
|
+
MetricType metric = METRIC_L2);
|
|
49
|
+
|
|
50
|
+
Index2Layer ();
|
|
51
|
+
~Index2Layer ();
|
|
52
|
+
|
|
53
|
+
void train(idx_t n, const float* x) override;
|
|
54
|
+
|
|
55
|
+
void add(idx_t n, const float* x) override;
|
|
56
|
+
|
|
57
|
+
/// not implemented
|
|
58
|
+
void search(
|
|
59
|
+
idx_t n,
|
|
60
|
+
const float* x,
|
|
61
|
+
idx_t k,
|
|
62
|
+
float* distances,
|
|
63
|
+
idx_t* labels) const override;
|
|
64
|
+
|
|
65
|
+
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
|
|
66
|
+
|
|
67
|
+
void reconstruct(idx_t key, float* recons) const override;
|
|
68
|
+
|
|
69
|
+
void reset() override;
|
|
70
|
+
|
|
71
|
+
DistanceComputer * get_distance_computer() const override;
|
|
72
|
+
|
|
73
|
+
/// transfer the flat codes to an IVFPQ index
|
|
74
|
+
void transfer_to_IVFPQ(IndexIVFPQ & other) const;
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
/* The standalone codec interface */
|
|
78
|
+
size_t sa_code_size () const override;
|
|
79
|
+
void sa_encode (idx_t n, const float *x, uint8_t *bytes) const override;
|
|
80
|
+
void sa_decode (idx_t n, const uint8_t *bytes, float *x) const override;
|
|
81
|
+
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
} // namespace faiss
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexBinary.h>
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
|
|
13
|
+
#include <cstring>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
IndexBinary::~IndexBinary() {}
|
|
18
|
+
|
|
19
|
+
void IndexBinary::train(idx_t, const uint8_t *) {
|
|
20
|
+
// Does nothing by default.
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
void IndexBinary::range_search(idx_t, const uint8_t *, int,
|
|
24
|
+
RangeSearchResult *) const {
|
|
25
|
+
FAISS_THROW_MSG("range search not implemented");
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
|
|
29
|
+
int *distances = new int[n * k];
|
|
30
|
+
ScopeDeleter<int> del(distances);
|
|
31
|
+
search(n, x, k, distances, labels);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
|
|
35
|
+
FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
size_t IndexBinary::remove_ids(const IDSelector&) {
|
|
39
|
+
FAISS_THROW_MSG("remove_ids not implemented for this type of index");
|
|
40
|
+
return 0;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
void IndexBinary::reconstruct(idx_t, uint8_t *) const {
|
|
44
|
+
FAISS_THROW_MSG("reconstruct not implemented for this type of index");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
|
|
48
|
+
for (idx_t i = 0; i < ni; i++) {
|
|
49
|
+
reconstruct(i0 + i, recons + i * d);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
void IndexBinary::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
|
|
54
|
+
int32_t *distances, idx_t *labels,
|
|
55
|
+
uint8_t *recons) const {
|
|
56
|
+
search(n, x, k, distances, labels);
|
|
57
|
+
for (idx_t i = 0; i < n; ++i) {
|
|
58
|
+
for (idx_t j = 0; j < k; ++j) {
|
|
59
|
+
idx_t ij = i * k + j;
|
|
60
|
+
idx_t key = labels[ij];
|
|
61
|
+
uint8_t *reconstructed = recons + ij * d;
|
|
62
|
+
if (key < 0) {
|
|
63
|
+
// Fill with NaNs
|
|
64
|
+
memset(reconstructed, -1, sizeof(*reconstructed) * d);
|
|
65
|
+
} else {
|
|
66
|
+
reconstruct(key, reconstructed);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
void IndexBinary::display() const {
|
|
73
|
+
printf("Index: %s -> %ld elements\n", typeid (*this).name(), ntotal);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
} // namespace faiss
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef FAISS_INDEX_BINARY_H
|
|
11
|
+
#define FAISS_INDEX_BINARY_H
|
|
12
|
+
|
|
13
|
+
#include <cstdio>
|
|
14
|
+
#include <typeinfo>
|
|
15
|
+
#include <string>
|
|
16
|
+
#include <sstream>
|
|
17
|
+
|
|
18
|
+
#include <faiss/impl/FaissAssert.h>
|
|
19
|
+
#include <faiss/Index.h>
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
namespace faiss {
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
/// Forward declarations see AuxIndexStructures.h
|
|
26
|
+
struct IDSelector;
|
|
27
|
+
struct RangeSearchResult;
|
|
28
|
+
|
|
29
|
+
/** Abstract structure for a binary index.
|
|
30
|
+
*
|
|
31
|
+
* Supports adding vertices and searching them.
|
|
32
|
+
*
|
|
33
|
+
* All queries are symmetric because there is no distinction between codes and
|
|
34
|
+
* vectors.
|
|
35
|
+
*/
|
|
36
|
+
struct IndexBinary {
|
|
37
|
+
using idx_t = Index::idx_t; ///< all indices are this type
|
|
38
|
+
using component_t = uint8_t;
|
|
39
|
+
using distance_t = int32_t;
|
|
40
|
+
|
|
41
|
+
int d; ///< vector dimension
|
|
42
|
+
int code_size; ///< number of bytes per vector ( = d / 8 )
|
|
43
|
+
idx_t ntotal; ///< total nb of indexed vectors
|
|
44
|
+
bool verbose; ///< verbosity level
|
|
45
|
+
|
|
46
|
+
/// set if the Index does not require training, or if training is done already
|
|
47
|
+
bool is_trained;
|
|
48
|
+
|
|
49
|
+
/// type of metric this index uses for search
|
|
50
|
+
MetricType metric_type;
|
|
51
|
+
|
|
52
|
+
explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
|
|
53
|
+
: d(d),
|
|
54
|
+
code_size(d / 8),
|
|
55
|
+
ntotal(0),
|
|
56
|
+
verbose(false),
|
|
57
|
+
is_trained(true),
|
|
58
|
+
metric_type(metric) {
|
|
59
|
+
FAISS_THROW_IF_NOT(d % 8 == 0);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
virtual ~IndexBinary();
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
/** Perform training on a representative set of vectors.
|
|
66
|
+
*
|
|
67
|
+
* @param n nb of training vectors
|
|
68
|
+
* @param x training vecors, size n * d / 8
|
|
69
|
+
*/
|
|
70
|
+
virtual void train(idx_t n, const uint8_t *x);
|
|
71
|
+
|
|
72
|
+
/** Add n vectors of dimension d to the index.
|
|
73
|
+
*
|
|
74
|
+
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
|
|
75
|
+
* @param x input matrix, size n * d / 8
|
|
76
|
+
*/
|
|
77
|
+
virtual void add(idx_t n, const uint8_t *x) = 0;
|
|
78
|
+
|
|
79
|
+
/** Same as add, but stores xids instead of sequential ids.
|
|
80
|
+
*
|
|
81
|
+
* The default implementation fails with an assertion, as it is
|
|
82
|
+
* not supported by all indexes.
|
|
83
|
+
*
|
|
84
|
+
* @param xids if non-null, ids to store for the vectors (size n)
|
|
85
|
+
*/
|
|
86
|
+
virtual void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids);
|
|
87
|
+
|
|
88
|
+
/** Query n vectors of dimension d to the index.
|
|
89
|
+
*
|
|
90
|
+
* return at most k vectors. If there are not enough results for a
|
|
91
|
+
* query, the result array is padded with -1s.
|
|
92
|
+
*
|
|
93
|
+
* @param x input vectors to search, size n * d / 8
|
|
94
|
+
* @param labels output labels of the NNs, size n*k
|
|
95
|
+
* @param distances output pairwise distances, size n*k
|
|
96
|
+
*/
|
|
97
|
+
virtual void search(idx_t n, const uint8_t *x, idx_t k,
|
|
98
|
+
int32_t *distances, idx_t *labels) const = 0;
|
|
99
|
+
|
|
100
|
+
/** Query n vectors of dimension d to the index.
|
|
101
|
+
*
|
|
102
|
+
* return all vectors with distance < radius. Note that many
|
|
103
|
+
* indexes do not implement the range_search (only the k-NN search
|
|
104
|
+
* is mandatory).
|
|
105
|
+
*
|
|
106
|
+
* @param x input vectors to search, size n * d / 8
|
|
107
|
+
* @param radius search radius
|
|
108
|
+
* @param result result table
|
|
109
|
+
*/
|
|
110
|
+
virtual void range_search(idx_t n, const uint8_t *x, int radius,
|
|
111
|
+
RangeSearchResult *result) const;
|
|
112
|
+
|
|
113
|
+
/** Return the indexes of the k vectors closest to the query x.
|
|
114
|
+
*
|
|
115
|
+
* This function is identical to search but only returns labels of neighbors.
|
|
116
|
+
* @param x input vectors to search, size n * d / 8
|
|
117
|
+
* @param labels output labels of the NNs, size n*k
|
|
118
|
+
*/
|
|
119
|
+
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
|
|
120
|
+
|
|
121
|
+
/// Removes all elements from the database.
|
|
122
|
+
virtual void reset() = 0;
|
|
123
|
+
|
|
124
|
+
/** Removes IDs from the index. Not supported by all indexes.
|
|
125
|
+
*/
|
|
126
|
+
virtual size_t remove_ids(const IDSelector& sel);
|
|
127
|
+
|
|
128
|
+
/** Reconstruct a stored vector.
|
|
129
|
+
*
|
|
130
|
+
* This function may not be defined for some indexes.
|
|
131
|
+
* @param key id of the vector to reconstruct
|
|
132
|
+
* @param recons reconstucted vector (size d / 8)
|
|
133
|
+
*/
|
|
134
|
+
virtual void reconstruct(idx_t key, uint8_t *recons) const;
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
/** Reconstruct vectors i0 to i0 + ni - 1.
|
|
138
|
+
*
|
|
139
|
+
* This function may not be defined for some indexes.
|
|
140
|
+
* @param recons reconstucted vectors (size ni * d / 8)
|
|
141
|
+
*/
|
|
142
|
+
virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const;
|
|
143
|
+
|
|
144
|
+
/** Similar to search, but also reconstructs the stored vectors (or an
|
|
145
|
+
* approximation in the case of lossy coding) for the search results.
|
|
146
|
+
*
|
|
147
|
+
* If there are not enough results for a query, the resulting array
|
|
148
|
+
* is padded with -1s.
|
|
149
|
+
*
|
|
150
|
+
* @param recons reconstructed vectors size (n, k, d)
|
|
151
|
+
**/
|
|
152
|
+
virtual void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
|
|
153
|
+
int32_t *distances, idx_t *labels,
|
|
154
|
+
uint8_t *recons) const;
|
|
155
|
+
|
|
156
|
+
/** Display the actual class name and some more info. */
|
|
157
|
+
void display() const;
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
} // namespace faiss
|
|
162
|
+
|
|
163
|
+
#endif // FAISS_INDEX_BINARY_H
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
11
|
+
|
|
12
|
+
#include <cstring>
|
|
13
|
+
#include <faiss/utils/hamming.h>
|
|
14
|
+
#include <faiss/utils/utils.h>
|
|
15
|
+
#include <faiss/utils/Heap.h>
|
|
16
|
+
#include <faiss/impl/FaissAssert.h>
|
|
17
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
|
|
21
|
+
IndexBinaryFlat::IndexBinaryFlat(idx_t d)
|
|
22
|
+
: IndexBinary(d) {}
|
|
23
|
+
|
|
24
|
+
void IndexBinaryFlat::add(idx_t n, const uint8_t *x) {
|
|
25
|
+
xb.insert(xb.end(), x, x + n * code_size);
|
|
26
|
+
ntotal += n;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
void IndexBinaryFlat::reset() {
|
|
30
|
+
xb.clear();
|
|
31
|
+
ntotal = 0;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void IndexBinaryFlat::search(idx_t n, const uint8_t *x, idx_t k,
|
|
35
|
+
int32_t *distances, idx_t *labels) const {
|
|
36
|
+
const idx_t block_size = query_batch_size;
|
|
37
|
+
for (idx_t s = 0; s < n; s += block_size) {
|
|
38
|
+
idx_t nn = block_size;
|
|
39
|
+
if (s + block_size > n) {
|
|
40
|
+
nn = n - s;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (use_heap) {
|
|
44
|
+
// We see the distances and labels as heaps.
|
|
45
|
+
int_maxheap_array_t res = {
|
|
46
|
+
size_t(nn), size_t(k), labels + s * k, distances + s * k
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
hammings_knn_hc(&res, x + s * code_size, xb.data(), ntotal, code_size,
|
|
50
|
+
/* ordered = */ true);
|
|
51
|
+
} else {
|
|
52
|
+
hammings_knn_mc(x + s * code_size, xb.data(), nn, ntotal, k, code_size,
|
|
53
|
+
distances + s * k, labels + s * k);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
size_t IndexBinaryFlat::remove_ids(const IDSelector& sel) {
|
|
59
|
+
idx_t j = 0;
|
|
60
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
|
61
|
+
if (sel.is_member(i)) {
|
|
62
|
+
// should be removed
|
|
63
|
+
} else {
|
|
64
|
+
if (i > j) {
|
|
65
|
+
memmove(&xb[code_size * j], &xb[code_size * i], sizeof(xb[0]) * code_size);
|
|
66
|
+
}
|
|
67
|
+
j++;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
long nremove = ntotal - j;
|
|
71
|
+
if (nremove > 0) {
|
|
72
|
+
ntotal = j;
|
|
73
|
+
xb.resize(ntotal * code_size);
|
|
74
|
+
}
|
|
75
|
+
return nremove;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
void IndexBinaryFlat::reconstruct(idx_t key, uint8_t *recons) const {
|
|
79
|
+
memcpy(recons, &(xb[code_size * key]), sizeof(*recons) * code_size);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
} // namespace faiss
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef INDEX_BINARY_FLAT_H
|
|
11
|
+
#define INDEX_BINARY_FLAT_H
|
|
12
|
+
|
|
13
|
+
#include <vector>
|
|
14
|
+
|
|
15
|
+
#include <faiss/IndexBinary.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
/** Index that stores the full vectors and performs exhaustive search. */
|
|
21
|
+
struct IndexBinaryFlat : IndexBinary {
|
|
22
|
+
/// database vectors, size ntotal * d / 8
|
|
23
|
+
std::vector<uint8_t> xb;
|
|
24
|
+
|
|
25
|
+
/** Select between using a heap or counting to select the k smallest values
|
|
26
|
+
* when scanning inverted lists.
|
|
27
|
+
*/
|
|
28
|
+
bool use_heap = true;
|
|
29
|
+
|
|
30
|
+
size_t query_batch_size = 32;
|
|
31
|
+
|
|
32
|
+
explicit IndexBinaryFlat(idx_t d);
|
|
33
|
+
|
|
34
|
+
void add(idx_t n, const uint8_t *x) override;
|
|
35
|
+
|
|
36
|
+
void reset() override;
|
|
37
|
+
|
|
38
|
+
void search(idx_t n, const uint8_t *x, idx_t k,
|
|
39
|
+
int32_t *distances, idx_t *labels) const override;
|
|
40
|
+
|
|
41
|
+
void reconstruct(idx_t key, uint8_t *recons) const override;
|
|
42
|
+
|
|
43
|
+
/** Remove some ids. Note that because of the indexing structure,
|
|
44
|
+
* the semantics of this operation are different from the usual ones:
|
|
45
|
+
* the new ids are shifted. */
|
|
46
|
+
size_t remove_ids(const IDSelector& sel) override;
|
|
47
|
+
|
|
48
|
+
IndexBinaryFlat() {}
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
} // namespace faiss
|
|
53
|
+
|
|
54
|
+
#endif // INDEX_BINARY_FLAT_H
|