faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef FAISS_INDEX_BINARY_IVF_H
|
|
11
|
+
#define FAISS_INDEX_BINARY_IVF_H
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
#include <faiss/IndexBinary.h>
|
|
17
|
+
#include <faiss/IndexIVF.h>
|
|
18
|
+
#include <faiss/Clustering.h>
|
|
19
|
+
#include <faiss/utils/Heap.h>
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
namespace faiss {
|
|
23
|
+
|
|
24
|
+
struct BinaryInvertedListScanner;
|
|
25
|
+
|
|
26
|
+
/** Index based on a inverted file (IVF)
|
|
27
|
+
*
|
|
28
|
+
* In the inverted file, the quantizer (an IndexBinary instance) provides a
|
|
29
|
+
* quantization index for each vector to be added. The quantization
|
|
30
|
+
* index maps to a list (aka inverted list or posting list), where the
|
|
31
|
+
* id of the vector is stored.
|
|
32
|
+
*
|
|
33
|
+
* Otherwise the object is similar to the IndexIVF
|
|
34
|
+
*/
|
|
35
|
+
struct IndexBinaryIVF : IndexBinary {
|
|
36
|
+
/// Acess to the actual data
|
|
37
|
+
InvertedLists *invlists;
|
|
38
|
+
bool own_invlists;
|
|
39
|
+
|
|
40
|
+
size_t nprobe; ///< number of probes at query time
|
|
41
|
+
size_t max_codes; ///< max nb of codes to visit to do a query
|
|
42
|
+
|
|
43
|
+
/** Select between using a heap or counting to select the k smallest values
|
|
44
|
+
* when scanning inverted lists.
|
|
45
|
+
*/
|
|
46
|
+
bool use_heap = true;
|
|
47
|
+
|
|
48
|
+
/// map for direct access to the elements. Enables reconstruct().
|
|
49
|
+
bool maintain_direct_map;
|
|
50
|
+
std::vector<idx_t> direct_map;
|
|
51
|
+
|
|
52
|
+
IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
|
|
53
|
+
size_t nlist; ///< number of possible key values
|
|
54
|
+
|
|
55
|
+
bool own_fields; ///< whether object owns the quantizer
|
|
56
|
+
|
|
57
|
+
ClusteringParameters cp; ///< to override default clustering params
|
|
58
|
+
Index *clustering_index; ///< to override index used during clustering
|
|
59
|
+
|
|
60
|
+
/** The Inverted file takes a quantizer (an IndexBinary) on input,
|
|
61
|
+
* which implements the function mapping a vector to a list
|
|
62
|
+
* identifier. The pointer is borrowed: the quantizer should not
|
|
63
|
+
* be deleted while the IndexBinaryIVF is in use.
|
|
64
|
+
*/
|
|
65
|
+
IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
|
|
66
|
+
|
|
67
|
+
IndexBinaryIVF();
|
|
68
|
+
|
|
69
|
+
~IndexBinaryIVF() override;
|
|
70
|
+
|
|
71
|
+
void reset() override;
|
|
72
|
+
|
|
73
|
+
/// Trains the quantizer
|
|
74
|
+
void train(idx_t n, const uint8_t *x) override;
|
|
75
|
+
|
|
76
|
+
void add(idx_t n, const uint8_t *x) override;
|
|
77
|
+
|
|
78
|
+
void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
|
|
79
|
+
|
|
80
|
+
/// same as add_with_ids, with precomputed coarse quantizer
|
|
81
|
+
void add_core (idx_t n, const uint8_t * x, const idx_t *xids,
|
|
82
|
+
const idx_t *precomputed_idx);
|
|
83
|
+
|
|
84
|
+
/** Search a set of vectors, that are pre-quantized by the IVF
|
|
85
|
+
* quantizer. Fill in the corresponding heaps with the query
|
|
86
|
+
* results. search() calls this.
|
|
87
|
+
*
|
|
88
|
+
* @param n nb of vectors to query
|
|
89
|
+
* @param x query vectors, size nx * d
|
|
90
|
+
* @param assign coarse quantization indices, size nx * nprobe
|
|
91
|
+
* @param centroid_dis
|
|
92
|
+
* distances to coarse centroids, size nx * nprobe
|
|
93
|
+
* @param distance
|
|
94
|
+
* output distances, size n * k
|
|
95
|
+
* @param labels output labels, size n * k
|
|
96
|
+
* @param store_pairs store inv list index + inv list offset
|
|
97
|
+
* instead in upper/lower 32 bit of result,
|
|
98
|
+
* instead of ids (used for reranking).
|
|
99
|
+
* @param params used to override the object's search parameters
|
|
100
|
+
*/
|
|
101
|
+
void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
|
|
102
|
+
const idx_t *assign,
|
|
103
|
+
const int32_t *centroid_dis,
|
|
104
|
+
int32_t *distances, idx_t *labels,
|
|
105
|
+
bool store_pairs,
|
|
106
|
+
const IVFSearchParameters *params=nullptr
|
|
107
|
+
) const;
|
|
108
|
+
|
|
109
|
+
virtual BinaryInvertedListScanner *get_InvertedListScanner (
|
|
110
|
+
bool store_pairs=false) const;
|
|
111
|
+
|
|
112
|
+
/** assign the vectors, then call search_preassign */
|
|
113
|
+
virtual void search(idx_t n, const uint8_t *x, idx_t k,
|
|
114
|
+
int32_t *distances, idx_t *labels) const override;
|
|
115
|
+
|
|
116
|
+
void reconstruct(idx_t key, uint8_t *recons) const override;
|
|
117
|
+
|
|
118
|
+
/** Reconstruct a subset of the indexed vectors.
|
|
119
|
+
*
|
|
120
|
+
* Overrides default implementation to bypass reconstruct() which requires
|
|
121
|
+
* direct_map to be maintained.
|
|
122
|
+
*
|
|
123
|
+
* @param i0 first vector to reconstruct
|
|
124
|
+
* @param ni nb of vectors to reconstruct
|
|
125
|
+
* @param recons output array of reconstructed vectors, size ni * d / 8
|
|
126
|
+
*/
|
|
127
|
+
void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
|
|
128
|
+
|
|
129
|
+
/** Similar to search, but also reconstructs the stored vectors (or an
|
|
130
|
+
* approximation in the case of lossy coding) for the search results.
|
|
131
|
+
*
|
|
132
|
+
* Overrides default implementation to avoid having to maintain direct_map
|
|
133
|
+
* and instead fetch the code offsets through the `store_pairs` flag in
|
|
134
|
+
* search_preassigned().
|
|
135
|
+
*
|
|
136
|
+
* @param recons reconstructed vectors size (n, k, d / 8)
|
|
137
|
+
*/
|
|
138
|
+
void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
|
|
139
|
+
int32_t *distances, idx_t *labels,
|
|
140
|
+
uint8_t *recons) const override;
|
|
141
|
+
|
|
142
|
+
/** Reconstruct a vector given the location in terms of (inv list index +
|
|
143
|
+
* inv list offset) instead of the id.
|
|
144
|
+
*
|
|
145
|
+
* Useful for reconstructing when the direct_map is not maintained and
|
|
146
|
+
* the inv list offset is computed by search_preassigned() with
|
|
147
|
+
* `store_pairs` set.
|
|
148
|
+
*/
|
|
149
|
+
virtual void reconstruct_from_offset(idx_t list_no, idx_t offset,
|
|
150
|
+
uint8_t* recons) const;
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
/// Dataset manipulation functions
|
|
154
|
+
size_t remove_ids(const IDSelector& sel) override;
|
|
155
|
+
|
|
156
|
+
/** moves the entries from another dataset to self. On output,
|
|
157
|
+
* other is empty. add_id is added to all moved ids (for
|
|
158
|
+
* sequential ids, this would be this->ntotal */
|
|
159
|
+
virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
|
|
160
|
+
|
|
161
|
+
size_t get_list_size(size_t list_no) const
|
|
162
|
+
{ return invlists->list_size(list_no); }
|
|
163
|
+
|
|
164
|
+
/** intialize a direct map
|
|
165
|
+
*
|
|
166
|
+
* @param new_maintain_direct_map if true, create a direct map,
|
|
167
|
+
* else clear it
|
|
168
|
+
*/
|
|
169
|
+
void make_direct_map(bool new_maintain_direct_map=true);
|
|
170
|
+
|
|
171
|
+
void replace_invlists(InvertedLists *il, bool own=false);
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
struct BinaryInvertedListScanner {
|
|
176
|
+
|
|
177
|
+
using idx_t = Index::idx_t;
|
|
178
|
+
|
|
179
|
+
/// from now on we handle this query.
|
|
180
|
+
virtual void set_query (const uint8_t *query_vector) = 0;
|
|
181
|
+
|
|
182
|
+
/// following codes come from this inverted list
|
|
183
|
+
virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
|
|
184
|
+
|
|
185
|
+
/// compute a single query-to-code distance
|
|
186
|
+
virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
|
|
187
|
+
|
|
188
|
+
/** compute the distances to codes. (distances, labels) should be
|
|
189
|
+
* organized as a min- or max-heap
|
|
190
|
+
*
|
|
191
|
+
* @param n number of codes to scan
|
|
192
|
+
* @param codes codes to scan (n * code_size)
|
|
193
|
+
* @param ids corresponding ids (ignored if store_pairs)
|
|
194
|
+
* @param distances heap distances (size k)
|
|
195
|
+
* @param labels heap labels (size k)
|
|
196
|
+
* @param k heap size
|
|
197
|
+
*/
|
|
198
|
+
virtual size_t scan_codes (size_t n,
|
|
199
|
+
const uint8_t *codes,
|
|
200
|
+
const idx_t *ids,
|
|
201
|
+
int32_t *distances, idx_t *labels,
|
|
202
|
+
size_t k) const = 0;
|
|
203
|
+
|
|
204
|
+
virtual ~BinaryInvertedListScanner () {}
|
|
205
|
+
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
} // namespace faiss
|
|
210
|
+
|
|
211
|
+
#endif // FAISS_INDEX_BINARY_IVF_H
|
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexFlat.h>
|
|
11
|
+
|
|
12
|
+
#include <cstring>
|
|
13
|
+
#include <faiss/utils/distances.h>
|
|
14
|
+
#include <faiss/utils/extra_distances.h>
|
|
15
|
+
#include <faiss/utils/utils.h>
|
|
16
|
+
#include <faiss/utils/Heap.h>
|
|
17
|
+
#include <faiss/impl/FaissAssert.h>
|
|
18
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
namespace faiss {
|
|
22
|
+
|
|
23
|
+
IndexFlat::IndexFlat (idx_t d, MetricType metric):
|
|
24
|
+
Index(d, metric)
|
|
25
|
+
{
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
void IndexFlat::add (idx_t n, const float *x) {
|
|
31
|
+
xb.insert(xb.end(), x, x + n * d);
|
|
32
|
+
ntotal += n;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
void IndexFlat::reset() {
|
|
37
|
+
xb.clear();
|
|
38
|
+
ntotal = 0;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
void IndexFlat::search (idx_t n, const float *x, idx_t k,
|
|
43
|
+
float *distances, idx_t *labels) const
|
|
44
|
+
{
|
|
45
|
+
// we see the distances and labels as heaps
|
|
46
|
+
|
|
47
|
+
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
48
|
+
float_minheap_array_t res = {
|
|
49
|
+
size_t(n), size_t(k), labels, distances};
|
|
50
|
+
knn_inner_product (x, xb.data(), d, n, ntotal, &res);
|
|
51
|
+
} else if (metric_type == METRIC_L2) {
|
|
52
|
+
float_maxheap_array_t res = {
|
|
53
|
+
size_t(n), size_t(k), labels, distances};
|
|
54
|
+
knn_L2sqr (x, xb.data(), d, n, ntotal, &res);
|
|
55
|
+
} else {
|
|
56
|
+
float_maxheap_array_t res = {
|
|
57
|
+
size_t(n), size_t(k), labels, distances};
|
|
58
|
+
knn_extra_metrics (x, xb.data(), d, n, ntotal,
|
|
59
|
+
metric_type, metric_arg,
|
|
60
|
+
&res);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
void IndexFlat::range_search (idx_t n, const float *x, float radius,
|
|
65
|
+
RangeSearchResult *result) const
|
|
66
|
+
{
|
|
67
|
+
switch (metric_type) {
|
|
68
|
+
case METRIC_INNER_PRODUCT:
|
|
69
|
+
range_search_inner_product (x, xb.data(), d, n, ntotal,
|
|
70
|
+
radius, result);
|
|
71
|
+
break;
|
|
72
|
+
case METRIC_L2:
|
|
73
|
+
range_search_L2sqr (x, xb.data(), d, n, ntotal, radius, result);
|
|
74
|
+
break;
|
|
75
|
+
default:
|
|
76
|
+
FAISS_THROW_MSG("metric type not supported");
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
void IndexFlat::compute_distance_subset (
|
|
82
|
+
idx_t n,
|
|
83
|
+
const float *x,
|
|
84
|
+
idx_t k,
|
|
85
|
+
float *distances,
|
|
86
|
+
const idx_t *labels) const
|
|
87
|
+
{
|
|
88
|
+
switch (metric_type) {
|
|
89
|
+
case METRIC_INNER_PRODUCT:
|
|
90
|
+
fvec_inner_products_by_idx (
|
|
91
|
+
distances,
|
|
92
|
+
x, xb.data(), labels, d, n, k);
|
|
93
|
+
break;
|
|
94
|
+
case METRIC_L2:
|
|
95
|
+
fvec_L2sqr_by_idx (
|
|
96
|
+
distances,
|
|
97
|
+
x, xb.data(), labels, d, n, k);
|
|
98
|
+
break;
|
|
99
|
+
default:
|
|
100
|
+
FAISS_THROW_MSG("metric type not supported");
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
size_t IndexFlat::remove_ids (const IDSelector & sel)
|
|
106
|
+
{
|
|
107
|
+
idx_t j = 0;
|
|
108
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
|
109
|
+
if (sel.is_member (i)) {
|
|
110
|
+
// should be removed
|
|
111
|
+
} else {
|
|
112
|
+
if (i > j) {
|
|
113
|
+
memmove (&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
|
|
114
|
+
}
|
|
115
|
+
j++;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
size_t nremove = ntotal - j;
|
|
119
|
+
if (nremove > 0) {
|
|
120
|
+
ntotal = j;
|
|
121
|
+
xb.resize (ntotal * d);
|
|
122
|
+
}
|
|
123
|
+
return nremove;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
namespace {
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
struct FlatL2Dis : DistanceComputer {
|
|
131
|
+
size_t d;
|
|
132
|
+
Index::idx_t nb;
|
|
133
|
+
const float *q;
|
|
134
|
+
const float *b;
|
|
135
|
+
size_t ndis;
|
|
136
|
+
|
|
137
|
+
float operator () (idx_t i) override {
|
|
138
|
+
ndis++;
|
|
139
|
+
return fvec_L2sqr(q, b + i * d, d);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
float symmetric_dis(idx_t i, idx_t j) override {
|
|
143
|
+
return fvec_L2sqr(b + j * d, b + i * d, d);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
explicit FlatL2Dis(const IndexFlat& storage, const float *q = nullptr)
|
|
147
|
+
: d(storage.d),
|
|
148
|
+
nb(storage.ntotal),
|
|
149
|
+
q(q),
|
|
150
|
+
b(storage.xb.data()),
|
|
151
|
+
ndis(0) {}
|
|
152
|
+
|
|
153
|
+
void set_query(const float *x) override {
|
|
154
|
+
q = x;
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
struct FlatIPDis : DistanceComputer {
|
|
159
|
+
size_t d;
|
|
160
|
+
Index::idx_t nb;
|
|
161
|
+
const float *q;
|
|
162
|
+
const float *b;
|
|
163
|
+
size_t ndis;
|
|
164
|
+
|
|
165
|
+
float operator () (idx_t i) override {
|
|
166
|
+
ndis++;
|
|
167
|
+
return fvec_inner_product (q, b + i * d, d);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
float symmetric_dis(idx_t i, idx_t j) override {
|
|
171
|
+
return fvec_inner_product (b + j * d, b + i * d, d);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
explicit FlatIPDis(const IndexFlat& storage, const float *q = nullptr)
|
|
175
|
+
: d(storage.d),
|
|
176
|
+
nb(storage.ntotal),
|
|
177
|
+
q(q),
|
|
178
|
+
b(storage.xb.data()),
|
|
179
|
+
ndis(0) {}
|
|
180
|
+
|
|
181
|
+
void set_query(const float *x) override {
|
|
182
|
+
q = x;
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
} // namespace
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
DistanceComputer * IndexFlat::get_distance_computer() const {
|
|
193
|
+
if (metric_type == METRIC_L2) {
|
|
194
|
+
return new FlatL2Dis(*this);
|
|
195
|
+
} else if (metric_type == METRIC_INNER_PRODUCT) {
|
|
196
|
+
return new FlatIPDis(*this);
|
|
197
|
+
} else {
|
|
198
|
+
return get_extra_distance_computer (d, metric_type, metric_arg,
|
|
199
|
+
ntotal, xb.data());
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
void IndexFlat::reconstruct (idx_t key, float * recons) const
|
|
205
|
+
{
|
|
206
|
+
memcpy (recons, &(xb[key * d]), sizeof(*recons) * d);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
/* The standalone codec interface */
|
|
211
|
+
size_t IndexFlat::sa_code_size () const
|
|
212
|
+
{
|
|
213
|
+
return sizeof(float) * d;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
|
|
217
|
+
{
|
|
218
|
+
memcpy (bytes, x, sizeof(float) * d * n);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
|
|
222
|
+
{
|
|
223
|
+
memcpy (x, bytes, sizeof(float) * d * n);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
/***************************************************
|
|
230
|
+
* IndexFlatL2BaseShift
|
|
231
|
+
***************************************************/
|
|
232
|
+
|
|
233
|
+
IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
|
|
234
|
+
IndexFlatL2 (d), shift (nshift)
|
|
235
|
+
{
|
|
236
|
+
memcpy (this->shift.data(), shift, sizeof(float) * nshift);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
void IndexFlatL2BaseShift::search (
|
|
240
|
+
idx_t n,
|
|
241
|
+
const float *x,
|
|
242
|
+
idx_t k,
|
|
243
|
+
float *distances,
|
|
244
|
+
idx_t *labels) const
|
|
245
|
+
{
|
|
246
|
+
FAISS_THROW_IF_NOT (shift.size() == ntotal);
|
|
247
|
+
|
|
248
|
+
float_maxheap_array_t res = {
|
|
249
|
+
size_t(n), size_t(k), labels, distances};
|
|
250
|
+
knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
/***************************************************
|
|
256
|
+
* IndexRefineFlat
|
|
257
|
+
***************************************************/
|
|
258
|
+
|
|
259
|
+
IndexRefineFlat::IndexRefineFlat (Index *base_index):
|
|
260
|
+
Index (base_index->d, base_index->metric_type),
|
|
261
|
+
refine_index (base_index->d, base_index->metric_type),
|
|
262
|
+
base_index (base_index), own_fields (false),
|
|
263
|
+
k_factor (1)
|
|
264
|
+
{
|
|
265
|
+
is_trained = base_index->is_trained;
|
|
266
|
+
FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
|
|
267
|
+
"base_index should be empty in the beginning");
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
IndexRefineFlat::IndexRefineFlat () {
|
|
271
|
+
base_index = nullptr;
|
|
272
|
+
own_fields = false;
|
|
273
|
+
k_factor = 1;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
void IndexRefineFlat::train (idx_t n, const float *x)
|
|
278
|
+
{
|
|
279
|
+
base_index->train (n, x);
|
|
280
|
+
is_trained = true;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
void IndexRefineFlat::add (idx_t n, const float *x) {
|
|
284
|
+
FAISS_THROW_IF_NOT (is_trained);
|
|
285
|
+
base_index->add (n, x);
|
|
286
|
+
refine_index.add (n, x);
|
|
287
|
+
ntotal = refine_index.ntotal;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
void IndexRefineFlat::reset ()
|
|
291
|
+
{
|
|
292
|
+
base_index->reset ();
|
|
293
|
+
refine_index.reset ();
|
|
294
|
+
ntotal = 0;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
namespace {
|
|
298
|
+
typedef faiss::Index::idx_t idx_t;
|
|
299
|
+
|
|
300
|
+
template<class C>
|
|
301
|
+
static void reorder_2_heaps (
|
|
302
|
+
idx_t n,
|
|
303
|
+
idx_t k, idx_t *labels, float *distances,
|
|
304
|
+
idx_t k_base, const idx_t *base_labels, const float *base_distances)
|
|
305
|
+
{
|
|
306
|
+
#pragma omp parallel for
|
|
307
|
+
for (idx_t i = 0; i < n; i++) {
|
|
308
|
+
idx_t *idxo = labels + i * k;
|
|
309
|
+
float *diso = distances + i * k;
|
|
310
|
+
const idx_t *idxi = base_labels + i * k_base;
|
|
311
|
+
const float *disi = base_distances + i * k_base;
|
|
312
|
+
|
|
313
|
+
heap_heapify<C> (k, diso, idxo, disi, idxi, k);
|
|
314
|
+
if (k_base != k) { // add remaining elements
|
|
315
|
+
heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
|
|
316
|
+
}
|
|
317
|
+
heap_reorder<C> (k, diso, idxo);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
void IndexRefineFlat::search (
|
|
326
|
+
idx_t n, const float *x, idx_t k,
|
|
327
|
+
float *distances, idx_t *labels) const
|
|
328
|
+
{
|
|
329
|
+
FAISS_THROW_IF_NOT (is_trained);
|
|
330
|
+
idx_t k_base = idx_t (k * k_factor);
|
|
331
|
+
idx_t * base_labels = labels;
|
|
332
|
+
float * base_distances = distances;
|
|
333
|
+
ScopeDeleter<idx_t> del1;
|
|
334
|
+
ScopeDeleter<float> del2;
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
if (k != k_base) {
|
|
338
|
+
base_labels = new idx_t [n * k_base];
|
|
339
|
+
del1.set (base_labels);
|
|
340
|
+
base_distances = new float [n * k_base];
|
|
341
|
+
del2.set (base_distances);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
base_index->search (n, x, k_base, base_distances, base_labels);
|
|
345
|
+
|
|
346
|
+
for (int i = 0; i < n * k_base; i++)
|
|
347
|
+
assert (base_labels[i] >= -1 &&
|
|
348
|
+
base_labels[i] < ntotal);
|
|
349
|
+
|
|
350
|
+
// compute refined distances
|
|
351
|
+
refine_index.compute_distance_subset (
|
|
352
|
+
n, x, k_base, base_distances, base_labels);
|
|
353
|
+
|
|
354
|
+
// sort and store result
|
|
355
|
+
if (metric_type == METRIC_L2) {
|
|
356
|
+
typedef CMax <float, idx_t> C;
|
|
357
|
+
reorder_2_heaps<C> (
|
|
358
|
+
n, k, labels, distances,
|
|
359
|
+
k_base, base_labels, base_distances);
|
|
360
|
+
|
|
361
|
+
} else if (metric_type == METRIC_INNER_PRODUCT) {
|
|
362
|
+
typedef CMin <float, idx_t> C;
|
|
363
|
+
reorder_2_heaps<C> (
|
|
364
|
+
n, k, labels, distances,
|
|
365
|
+
k_base, base_labels, base_distances);
|
|
366
|
+
} else {
|
|
367
|
+
FAISS_THROW_MSG("Metric type not supported");
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
IndexRefineFlat::~IndexRefineFlat ()
|
|
375
|
+
{
|
|
376
|
+
if (own_fields) delete base_index;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/***************************************************
|
|
380
|
+
* IndexFlat1D
|
|
381
|
+
***************************************************/
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
IndexFlat1D::IndexFlat1D (bool continuous_update):
|
|
385
|
+
IndexFlatL2 (1),
|
|
386
|
+
continuous_update (continuous_update)
|
|
387
|
+
{
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/// if not continuous_update, call this between the last add and
|
|
391
|
+
/// the first search
|
|
392
|
+
void IndexFlat1D::update_permutation ()
|
|
393
|
+
{
|
|
394
|
+
perm.resize (ntotal);
|
|
395
|
+
if (ntotal < 1000000) {
|
|
396
|
+
fvec_argsort (ntotal, xb.data(), (size_t*)perm.data());
|
|
397
|
+
} else {
|
|
398
|
+
fvec_argsort_parallel (ntotal, xb.data(), (size_t*)perm.data());
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
void IndexFlat1D::add (idx_t n, const float *x)
|
|
403
|
+
{
|
|
404
|
+
IndexFlatL2::add (n, x);
|
|
405
|
+
if (continuous_update)
|
|
406
|
+
update_permutation();
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
void IndexFlat1D::reset()
|
|
410
|
+
{
|
|
411
|
+
IndexFlatL2::reset();
|
|
412
|
+
perm.clear();
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
void IndexFlat1D::search (
|
|
416
|
+
idx_t n,
|
|
417
|
+
const float *x,
|
|
418
|
+
idx_t k,
|
|
419
|
+
float *distances,
|
|
420
|
+
idx_t *labels) const
|
|
421
|
+
{
|
|
422
|
+
FAISS_THROW_IF_NOT_MSG (perm.size() == ntotal,
|
|
423
|
+
"Call update_permutation before search");
|
|
424
|
+
|
|
425
|
+
#pragma omp parallel for
|
|
426
|
+
for (idx_t i = 0; i < n; i++) {
|
|
427
|
+
|
|
428
|
+
float q = x[i]; // query
|
|
429
|
+
float *D = distances + i * k;
|
|
430
|
+
idx_t *I = labels + i * k;
|
|
431
|
+
|
|
432
|
+
// binary search
|
|
433
|
+
idx_t i0 = 0, i1 = ntotal;
|
|
434
|
+
idx_t wp = 0;
|
|
435
|
+
|
|
436
|
+
if (xb[perm[i0]] > q) {
|
|
437
|
+
i1 = 0;
|
|
438
|
+
goto finish_right;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
if (xb[perm[i1 - 1]] <= q) {
|
|
442
|
+
i0 = i1 - 1;
|
|
443
|
+
goto finish_left;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
while (i0 + 1 < i1) {
|
|
447
|
+
idx_t imed = (i0 + i1) / 2;
|
|
448
|
+
if (xb[perm[imed]] <= q) i0 = imed;
|
|
449
|
+
else i1 = imed;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// query is between xb[perm[i0]] and xb[perm[i1]]
|
|
453
|
+
// expand to nearest neighs
|
|
454
|
+
|
|
455
|
+
while (wp < k) {
|
|
456
|
+
float xleft = xb[perm[i0]];
|
|
457
|
+
float xright = xb[perm[i1]];
|
|
458
|
+
|
|
459
|
+
if (q - xleft < xright - q) {
|
|
460
|
+
D[wp] = q - xleft;
|
|
461
|
+
I[wp] = perm[i0];
|
|
462
|
+
i0--; wp++;
|
|
463
|
+
if (i0 < 0) { goto finish_right; }
|
|
464
|
+
} else {
|
|
465
|
+
D[wp] = xright - q;
|
|
466
|
+
I[wp] = perm[i1];
|
|
467
|
+
i1++; wp++;
|
|
468
|
+
if (i1 >= ntotal) { goto finish_left; }
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
goto done;
|
|
472
|
+
|
|
473
|
+
finish_right:
|
|
474
|
+
// grow to the right from i1
|
|
475
|
+
while (wp < k) {
|
|
476
|
+
if (i1 < ntotal) {
|
|
477
|
+
D[wp] = xb[perm[i1]] - q;
|
|
478
|
+
I[wp] = perm[i1];
|
|
479
|
+
i1++;
|
|
480
|
+
} else {
|
|
481
|
+
D[wp] = std::numeric_limits<float>::infinity();
|
|
482
|
+
I[wp] = -1;
|
|
483
|
+
}
|
|
484
|
+
wp++;
|
|
485
|
+
}
|
|
486
|
+
goto done;
|
|
487
|
+
|
|
488
|
+
finish_left:
|
|
489
|
+
// grow to the left from i0
|
|
490
|
+
while (wp < k) {
|
|
491
|
+
if (i0 >= 0) {
|
|
492
|
+
D[wp] = q - xb[perm[i0]];
|
|
493
|
+
I[wp] = perm[i0];
|
|
494
|
+
i0--;
|
|
495
|
+
} else {
|
|
496
|
+
D[wp] = std::numeric_limits<float>::infinity();
|
|
497
|
+
I[wp] = -1;
|
|
498
|
+
}
|
|
499
|
+
wp++;
|
|
500
|
+
}
|
|
501
|
+
done: ;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
} // namespace faiss
|