faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#ifndef FAISS_INDEX_IVFPQ_H
|
|
11
|
+
#define FAISS_INDEX_IVFPQ_H
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
#include <faiss/IndexIVF.h>
|
|
17
|
+
#include <faiss/IndexPQ.h>
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
namespace faiss {
|
|
21
|
+
|
|
22
|
+
struct IVFPQSearchParameters: IVFSearchParameters {
|
|
23
|
+
size_t scan_table_threshold; ///< use table computation or on-the-fly?
|
|
24
|
+
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
|
25
|
+
~IVFPQSearchParameters () {}
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
/** Inverted file with Product Quantizer encoding. Each residual
|
|
30
|
+
* vector is encoded as a product quantizer code.
|
|
31
|
+
*/
|
|
32
|
+
struct IndexIVFPQ: IndexIVF {
|
|
33
|
+
bool by_residual; ///< Encode residual or plain vector?
|
|
34
|
+
|
|
35
|
+
ProductQuantizer pq; ///< produces the codes
|
|
36
|
+
|
|
37
|
+
bool do_polysemous_training; ///< reorder PQ centroids after training?
|
|
38
|
+
PolysemousTraining *polysemous_training; ///< if NULL, use default
|
|
39
|
+
|
|
40
|
+
// search-time parameters
|
|
41
|
+
size_t scan_table_threshold; ///< use table computation or on-the-fly?
|
|
42
|
+
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
|
43
|
+
|
|
44
|
+
/** Precompute table that speed up query preprocessing at some
|
|
45
|
+
* memory cost
|
|
46
|
+
* =-1: force disable
|
|
47
|
+
* =0: decide heuristically (default: use tables only if they are
|
|
48
|
+
* < precomputed_tables_max_bytes)
|
|
49
|
+
* =1: tables that work for all quantizers (size 256 * nlist * M)
|
|
50
|
+
* =2: specific version for MultiIndexQuantizer (much more compact)
|
|
51
|
+
*/
|
|
52
|
+
int use_precomputed_table; ///< if by_residual, build precompute tables
|
|
53
|
+
static size_t precomputed_table_max_bytes;
|
|
54
|
+
|
|
55
|
+
/// if use_precompute_table
|
|
56
|
+
/// size nlist * pq.M * pq.ksub
|
|
57
|
+
std::vector <float> precomputed_table;
|
|
58
|
+
|
|
59
|
+
IndexIVFPQ (
|
|
60
|
+
Index * quantizer, size_t d, size_t nlist,
|
|
61
|
+
size_t M, size_t nbits_per_idx);
|
|
62
|
+
|
|
63
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
|
|
64
|
+
override;
|
|
65
|
+
|
|
66
|
+
void encode_vectors(idx_t n, const float* x,
|
|
67
|
+
const idx_t *list_nos,
|
|
68
|
+
uint8_t * codes,
|
|
69
|
+
bool include_listnos = false) const override;
|
|
70
|
+
|
|
71
|
+
void sa_decode (idx_t n, const uint8_t *bytes,
|
|
72
|
+
float *x) const override;
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
/// same as add_core, also:
|
|
76
|
+
/// - output 2nd level residuals if residuals_2 != NULL
|
|
77
|
+
/// - use precomputed list numbers if precomputed_idx != NULL
|
|
78
|
+
void add_core_o (idx_t n, const float *x,
|
|
79
|
+
const idx_t *xids, float *residuals_2,
|
|
80
|
+
const idx_t *precomputed_idx = nullptr);
|
|
81
|
+
|
|
82
|
+
/// trains the product quantizer
|
|
83
|
+
void train_residual(idx_t n, const float* x) override;
|
|
84
|
+
|
|
85
|
+
/// same as train_residual, also output 2nd level residuals
|
|
86
|
+
void train_residual_o (idx_t n, const float *x, float *residuals_2);
|
|
87
|
+
|
|
88
|
+
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
|
89
|
+
float* recons) const override;
|
|
90
|
+
|
|
91
|
+
/** Find exact duplicates in the dataset.
|
|
92
|
+
*
|
|
93
|
+
* the duplicates are returned in pre-allocated arrays (see the
|
|
94
|
+
* max sizes).
|
|
95
|
+
*
|
|
96
|
+
* @params lims limits between groups of duplicates
|
|
97
|
+
* (max size ntotal / 2 + 1)
|
|
98
|
+
* @params ids ids[lims[i]] : ids[lims[i+1]-1] is a group of
|
|
99
|
+
* duplicates (max size ntotal)
|
|
100
|
+
* @return n number of groups found
|
|
101
|
+
*/
|
|
102
|
+
size_t find_duplicates (idx_t *ids, size_t *lims) const;
|
|
103
|
+
|
|
104
|
+
// map a vector to a binary code knowning the index
|
|
105
|
+
void encode (idx_t key, const float * x, uint8_t * code) const;
|
|
106
|
+
|
|
107
|
+
/** Encode multiple vectors
|
|
108
|
+
*
|
|
109
|
+
* @param n nb vectors to encode
|
|
110
|
+
* @param keys posting list ids for those vectors (size n)
|
|
111
|
+
* @param x vectors (size n * d)
|
|
112
|
+
* @param codes output codes (size n * code_size)
|
|
113
|
+
* @param compute_keys if false, assume keys are precomputed,
|
|
114
|
+
* otherwise compute them
|
|
115
|
+
*/
|
|
116
|
+
void encode_multiple (size_t n, idx_t *keys,
|
|
117
|
+
const float * x, uint8_t * codes,
|
|
118
|
+
bool compute_keys = false) const;
|
|
119
|
+
|
|
120
|
+
/// inverse of encode_multiple
|
|
121
|
+
void decode_multiple (size_t n, const idx_t *keys,
|
|
122
|
+
const uint8_t * xcodes, float * x) const;
|
|
123
|
+
|
|
124
|
+
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
|
|
125
|
+
const override;
|
|
126
|
+
|
|
127
|
+
/// build precomputed table
|
|
128
|
+
void precompute_table ();
|
|
129
|
+
|
|
130
|
+
IndexIVFPQ ();
|
|
131
|
+
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
/// statistics are robust to internal threading, but not if
|
|
136
|
+
/// IndexIVFPQ::search_preassigned is called by multiple threads
|
|
137
|
+
struct IndexIVFPQStats {
|
|
138
|
+
size_t nrefine; // nb of refines (IVFPQR)
|
|
139
|
+
|
|
140
|
+
size_t n_hamming_pass;
|
|
141
|
+
// nb of passed Hamming distance tests (for polysemous)
|
|
142
|
+
|
|
143
|
+
// timings measured with the CPU RTC
|
|
144
|
+
// on all threads
|
|
145
|
+
size_t search_cycles;
|
|
146
|
+
size_t refine_cycles; // only for IVFPQR
|
|
147
|
+
|
|
148
|
+
IndexIVFPQStats () {reset (); }
|
|
149
|
+
void reset ();
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// global var that collects them all
|
|
153
|
+
extern IndexIVFPQStats indexIVFPQ_stats;
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
} // namespace faiss
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
#endif
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexIVFPQR.h>
|
|
11
|
+
|
|
12
|
+
#include <faiss/utils/Heap.h>
|
|
13
|
+
#include <faiss/utils/utils.h>
|
|
14
|
+
#include <faiss/utils/distances.h>
|
|
15
|
+
|
|
16
|
+
#include <faiss/impl/FaissAssert.h>
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
|
|
21
|
+
/*****************************************
|
|
22
|
+
* IndexIVFPQR implementation
|
|
23
|
+
******************************************/
|
|
24
|
+
|
|
25
|
+
IndexIVFPQR::IndexIVFPQR (
|
|
26
|
+
Index * quantizer, size_t d, size_t nlist,
|
|
27
|
+
size_t M, size_t nbits_per_idx,
|
|
28
|
+
size_t M_refine, size_t nbits_per_idx_refine):
|
|
29
|
+
IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
|
|
30
|
+
refine_pq (d, M_refine, nbits_per_idx_refine),
|
|
31
|
+
k_factor (4)
|
|
32
|
+
{
|
|
33
|
+
by_residual = true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
IndexIVFPQR::IndexIVFPQR ():
|
|
37
|
+
k_factor (1)
|
|
38
|
+
{
|
|
39
|
+
by_residual = true;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
void IndexIVFPQR::reset()
|
|
45
|
+
{
|
|
46
|
+
IndexIVFPQ::reset();
|
|
47
|
+
refine_codes.clear();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
void IndexIVFPQR::train_residual (idx_t n, const float *x)
|
|
54
|
+
{
|
|
55
|
+
|
|
56
|
+
float * residual_2 = new float [n * d];
|
|
57
|
+
ScopeDeleter <float> del(residual_2);
|
|
58
|
+
|
|
59
|
+
train_residual_o (n, x, residual_2);
|
|
60
|
+
|
|
61
|
+
if (verbose)
|
|
62
|
+
printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
|
|
63
|
+
refine_pq.M, refine_pq.ksub, n, d);
|
|
64
|
+
|
|
65
|
+
refine_pq.cp.max_points_per_centroid = 1000;
|
|
66
|
+
refine_pq.cp.verbose = verbose;
|
|
67
|
+
|
|
68
|
+
refine_pq.train (n, residual_2);
|
|
69
|
+
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const idx_t *xids) {
|
|
74
|
+
add_core (n, x, xids, nullptr);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids,
|
|
78
|
+
const idx_t *precomputed_idx) {
|
|
79
|
+
|
|
80
|
+
float * residual_2 = new float [n * d];
|
|
81
|
+
ScopeDeleter <float> del(residual_2);
|
|
82
|
+
|
|
83
|
+
idx_t n0 = ntotal;
|
|
84
|
+
|
|
85
|
+
add_core_o (n, x, xids, residual_2, precomputed_idx);
|
|
86
|
+
|
|
87
|
+
refine_codes.resize (ntotal * refine_pq.code_size);
|
|
88
|
+
|
|
89
|
+
refine_pq.compute_codes (
|
|
90
|
+
residual_2, &refine_codes[n0 * refine_pq.code_size], n);
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
}
|
|
94
|
+
#define TIC t0 = get_cycles()
|
|
95
|
+
#define TOC get_cycles () - t0
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
|
|
99
|
+
const idx_t *idx,
|
|
100
|
+
const float *L1_dis,
|
|
101
|
+
float *distances, idx_t *labels,
|
|
102
|
+
bool store_pairs,
|
|
103
|
+
const IVFSearchParameters *params
|
|
104
|
+
) const
|
|
105
|
+
{
|
|
106
|
+
uint64_t t0;
|
|
107
|
+
TIC;
|
|
108
|
+
size_t k_coarse = long(k * k_factor);
|
|
109
|
+
idx_t *coarse_labels = new idx_t [k_coarse * n];
|
|
110
|
+
ScopeDeleter<idx_t> del1 (coarse_labels);
|
|
111
|
+
{ // query with quantizer levels 1 and 2.
|
|
112
|
+
float *coarse_distances = new float [k_coarse * n];
|
|
113
|
+
ScopeDeleter<float> del(coarse_distances);
|
|
114
|
+
|
|
115
|
+
IndexIVFPQ::search_preassigned (
|
|
116
|
+
n, x, k_coarse,
|
|
117
|
+
idx, L1_dis, coarse_distances, coarse_labels,
|
|
118
|
+
true, params);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
indexIVFPQ_stats.search_cycles += TOC;
|
|
123
|
+
|
|
124
|
+
TIC;
|
|
125
|
+
|
|
126
|
+
// 3rd level refinement
|
|
127
|
+
size_t n_refine = 0;
|
|
128
|
+
#pragma omp parallel reduction(+ : n_refine)
|
|
129
|
+
{
|
|
130
|
+
// tmp buffers
|
|
131
|
+
float *residual_1 = new float [2 * d];
|
|
132
|
+
ScopeDeleter<float> del (residual_1);
|
|
133
|
+
float *residual_2 = residual_1 + d;
|
|
134
|
+
#pragma omp for
|
|
135
|
+
for (idx_t i = 0; i < n; i++) {
|
|
136
|
+
const float *xq = x + i * d;
|
|
137
|
+
const idx_t * shortlist = coarse_labels + k_coarse * i;
|
|
138
|
+
float * heap_sim = distances + k * i;
|
|
139
|
+
idx_t * heap_ids = labels + k * i;
|
|
140
|
+
maxheap_heapify (k, heap_sim, heap_ids);
|
|
141
|
+
|
|
142
|
+
for (int j = 0; j < k_coarse; j++) {
|
|
143
|
+
idx_t sl = shortlist[j];
|
|
144
|
+
|
|
145
|
+
if (sl == -1) continue;
|
|
146
|
+
|
|
147
|
+
int list_no = sl >> 32;
|
|
148
|
+
int ofs = sl & 0xffffffff;
|
|
149
|
+
|
|
150
|
+
assert (list_no >= 0 && list_no < nlist);
|
|
151
|
+
assert (ofs >= 0 && ofs < invlists->list_size (list_no));
|
|
152
|
+
|
|
153
|
+
// 1st level residual
|
|
154
|
+
quantizer->compute_residual (xq, residual_1, list_no);
|
|
155
|
+
|
|
156
|
+
// 2nd level residual
|
|
157
|
+
const uint8_t * l2code =
|
|
158
|
+
invlists->get_single_code (list_no, ofs);
|
|
159
|
+
|
|
160
|
+
pq.decode (l2code, residual_2);
|
|
161
|
+
for (int l = 0; l < d; l++)
|
|
162
|
+
residual_2[l] = residual_1[l] - residual_2[l];
|
|
163
|
+
|
|
164
|
+
// 3rd level residual's approximation
|
|
165
|
+
idx_t id = invlists->get_single_id (list_no, ofs);
|
|
166
|
+
assert (0 <= id && id < ntotal);
|
|
167
|
+
refine_pq.decode (&refine_codes [id * refine_pq.code_size],
|
|
168
|
+
residual_1);
|
|
169
|
+
|
|
170
|
+
float dis = fvec_L2sqr (residual_1, residual_2, d);
|
|
171
|
+
|
|
172
|
+
if (dis < heap_sim[0]) {
|
|
173
|
+
maxheap_pop (k, heap_sim, heap_ids);
|
|
174
|
+
idx_t id_or_pair = store_pairs ? sl : id;
|
|
175
|
+
maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair);
|
|
176
|
+
}
|
|
177
|
+
n_refine ++;
|
|
178
|
+
}
|
|
179
|
+
maxheap_reorder (k, heap_sim, heap_ids);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
indexIVFPQ_stats.nrefine += n_refine;
|
|
183
|
+
indexIVFPQ_stats.refine_cycles += TOC;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
void IndexIVFPQR::reconstruct_from_offset (int64_t list_no, int64_t offset,
|
|
187
|
+
float* recons) const
|
|
188
|
+
{
|
|
189
|
+
IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons);
|
|
190
|
+
|
|
191
|
+
idx_t id = invlists->get_single_id (list_no, offset);
|
|
192
|
+
assert (0 <= id && id < ntotal);
|
|
193
|
+
|
|
194
|
+
std::vector<float> r3(d);
|
|
195
|
+
refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data());
|
|
196
|
+
for (int i = 0; i < d; ++i) {
|
|
197
|
+
recons[i] += r3[i];
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id)
|
|
202
|
+
{
|
|
203
|
+
IndexIVFPQR *other = dynamic_cast<IndexIVFPQR *> (&other_in);
|
|
204
|
+
FAISS_THROW_IF_NOT(other);
|
|
205
|
+
|
|
206
|
+
IndexIVF::merge_from (other_in, add_id);
|
|
207
|
+
|
|
208
|
+
refine_codes.insert (refine_codes.end(),
|
|
209
|
+
other->refine_codes.begin(),
|
|
210
|
+
other->refine_codes.end());
|
|
211
|
+
other->refine_codes.clear();
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
|
|
215
|
+
FAISS_THROW_MSG("not implemented");
|
|
216
|
+
return 0;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
} // namespace faiss
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
#include <vector>
|
|
13
|
+
|
|
14
|
+
#include <faiss/IndexIVFPQ.h>
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
/** Index with an additional level of PQ refinement */
|
|
22
|
+
struct IndexIVFPQR: IndexIVFPQ {
|
|
23
|
+
ProductQuantizer refine_pq; ///< 3rd level quantizer
|
|
24
|
+
std::vector <uint8_t> refine_codes; ///< corresponding codes
|
|
25
|
+
|
|
26
|
+
/// factor between k requested in search and the k requested from the IVFPQ
|
|
27
|
+
float k_factor;
|
|
28
|
+
|
|
29
|
+
IndexIVFPQR (
|
|
30
|
+
Index * quantizer, size_t d, size_t nlist,
|
|
31
|
+
size_t M, size_t nbits_per_idx,
|
|
32
|
+
size_t M_refine, size_t nbits_per_idx_refine);
|
|
33
|
+
|
|
34
|
+
void reset() override;
|
|
35
|
+
|
|
36
|
+
size_t remove_ids(const IDSelector& sel) override;
|
|
37
|
+
|
|
38
|
+
/// trains the two product quantizers
|
|
39
|
+
void train_residual(idx_t n, const float* x) override;
|
|
40
|
+
|
|
41
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
42
|
+
|
|
43
|
+
/// same as add_with_ids, but optionally use the precomputed list ids
|
|
44
|
+
void add_core (idx_t n, const float *x, const idx_t *xids,
|
|
45
|
+
const idx_t *precomputed_idx = nullptr);
|
|
46
|
+
|
|
47
|
+
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
|
48
|
+
float* recons) const override;
|
|
49
|
+
|
|
50
|
+
void merge_from (IndexIVF &other, idx_t add_id) override;
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
void search_preassigned (idx_t n, const float *x, idx_t k,
|
|
54
|
+
const idx_t *assign,
|
|
55
|
+
const float *centroid_dis,
|
|
56
|
+
float *distances, idx_t *labels,
|
|
57
|
+
bool store_pairs,
|
|
58
|
+
const IVFSearchParameters *params=nullptr
|
|
59
|
+
) const override;
|
|
60
|
+
|
|
61
|
+
IndexIVFPQR();
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
} // namespace faiss
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
#include <faiss/IndexIVFSpectralHash.h>
|
|
12
|
+
|
|
13
|
+
#include <memory>
|
|
14
|
+
#include <algorithm>
|
|
15
|
+
#include <stdint.h>
|
|
16
|
+
|
|
17
|
+
#include <faiss/utils/hamming.h>
|
|
18
|
+
#include <faiss/utils/utils.h>
|
|
19
|
+
#include <faiss/impl/FaissAssert.h>
|
|
20
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
21
|
+
#include <faiss/VectorTransform.h>
|
|
22
|
+
|
|
23
|
+
namespace faiss {
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash (
|
|
27
|
+
Index * quantizer, size_t d, size_t nlist,
|
|
28
|
+
int nbit, float period):
|
|
29
|
+
IndexIVF (quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
|
|
30
|
+
nbit (nbit), period (period), threshold_type (Thresh_global)
|
|
31
|
+
{
|
|
32
|
+
FAISS_THROW_IF_NOT (code_size % 4 == 0);
|
|
33
|
+
RandomRotationMatrix *rr = new RandomRotationMatrix (d, nbit);
|
|
34
|
+
rr->init (1234);
|
|
35
|
+
vt = rr;
|
|
36
|
+
own_fields = true;
|
|
37
|
+
is_trained = false;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash():
|
|
41
|
+
IndexIVF(), vt(nullptr), own_fields(false),
|
|
42
|
+
nbit(0), period(0), threshold_type(Thresh_global)
|
|
43
|
+
{}
|
|
44
|
+
|
|
45
|
+
IndexIVFSpectralHash::~IndexIVFSpectralHash ()
|
|
46
|
+
{
|
|
47
|
+
if (own_fields) {
|
|
48
|
+
delete vt;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
namespace {
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
float median (size_t n, float *x) {
|
|
56
|
+
std::sort(x, x + n);
|
|
57
|
+
if (n % 2 == 1) {
|
|
58
|
+
return x [n / 2];
|
|
59
|
+
} else {
|
|
60
|
+
return (x [n / 2 - 1] + x [n / 2]) / 2;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
|
|
68
|
+
{
|
|
69
|
+
if (!vt->is_trained) {
|
|
70
|
+
vt->train (n, x);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (threshold_type == Thresh_global) {
|
|
74
|
+
// nothing to do
|
|
75
|
+
return;
|
|
76
|
+
} else if (threshold_type == Thresh_centroid ||
|
|
77
|
+
threshold_type == Thresh_centroid_half) {
|
|
78
|
+
// convert all centroids with vt
|
|
79
|
+
std::vector<float> centroids (nlist * d);
|
|
80
|
+
quantizer->reconstruct_n (0, nlist, centroids.data());
|
|
81
|
+
trained.resize(nlist * nbit);
|
|
82
|
+
vt->apply_noalloc (nlist, centroids.data(), trained.data());
|
|
83
|
+
if (threshold_type == Thresh_centroid_half) {
|
|
84
|
+
for (size_t i = 0; i < nlist * nbit; i++) {
|
|
85
|
+
trained[i] -= 0.25 * period;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
// otherwise train medians
|
|
91
|
+
|
|
92
|
+
// assign
|
|
93
|
+
std::unique_ptr<idx_t []> idx (new idx_t [n]);
|
|
94
|
+
quantizer->assign (n, x, idx.get());
|
|
95
|
+
|
|
96
|
+
std::vector<size_t> sizes(nlist + 1);
|
|
97
|
+
for (size_t i = 0; i < n; i++) {
|
|
98
|
+
FAISS_THROW_IF_NOT (idx[i] >= 0);
|
|
99
|
+
sizes[idx[i]]++;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
size_t ofs = 0;
|
|
103
|
+
for (int j = 0; j < nlist; j++) {
|
|
104
|
+
size_t o0 = ofs;
|
|
105
|
+
ofs += sizes[j];
|
|
106
|
+
sizes[j] = o0;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// transform
|
|
110
|
+
std::unique_ptr<float []> xt (vt->apply (n, x));
|
|
111
|
+
|
|
112
|
+
// transpose + reorder
|
|
113
|
+
std::unique_ptr<float []> xo (new float[n * nbit]);
|
|
114
|
+
|
|
115
|
+
for (size_t i = 0; i < n; i++) {
|
|
116
|
+
size_t idest = sizes[idx[i]]++;
|
|
117
|
+
for (size_t j = 0; j < nbit; j++) {
|
|
118
|
+
xo[idest + n * j] = xt[i * nbit + j];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
trained.resize (n * nbit);
|
|
123
|
+
// compute medians
|
|
124
|
+
#pragma omp for
|
|
125
|
+
for (int i = 0; i < nlist; i++) {
|
|
126
|
+
size_t i0 = i == 0 ? 0 : sizes[i - 1];
|
|
127
|
+
size_t i1 = sizes[i];
|
|
128
|
+
for (int j = 0; j < nbit; j++) {
|
|
129
|
+
float *xoi = xo.get() + i0 + n * j;
|
|
130
|
+
if (i0 == i1) { // nothing to train
|
|
131
|
+
trained[i * nbit + j] = 0.0;
|
|
132
|
+
} else if (i1 == i0 + 1) {
|
|
133
|
+
trained[i * nbit + j] = xoi[0];
|
|
134
|
+
} else {
|
|
135
|
+
trained[i * nbit + j] = median(i1 - i0, xoi);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
namespace {
|
|
143
|
+
|
|
144
|
+
void binarize_with_freq(size_t nbit, float freq,
|
|
145
|
+
const float *x, const float *c,
|
|
146
|
+
uint8_t *codes)
|
|
147
|
+
{
|
|
148
|
+
memset (codes, 0, (nbit + 7) / 8);
|
|
149
|
+
for (size_t i = 0; i < nbit; i++) {
|
|
150
|
+
float xf = (x[i] - c[i]);
|
|
151
|
+
int xi = int(floor(xf * freq));
|
|
152
|
+
int bit = xi & 1;
|
|
153
|
+
codes[i >> 3] |= bit << (i & 7);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
|
|
163
|
+
const idx_t *list_nos,
|
|
164
|
+
uint8_t * codes,
|
|
165
|
+
bool include_listnos) const
|
|
166
|
+
{
|
|
167
|
+
FAISS_THROW_IF_NOT (is_trained);
|
|
168
|
+
float freq = 2.0 / period;
|
|
169
|
+
|
|
170
|
+
FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported");
|
|
171
|
+
|
|
172
|
+
// transform with vt
|
|
173
|
+
std::unique_ptr<float []> x (vt->apply (n, x_in));
|
|
174
|
+
|
|
175
|
+
#pragma omp parallel
|
|
176
|
+
{
|
|
177
|
+
std::vector<float> zero (nbit);
|
|
178
|
+
|
|
179
|
+
// each thread takes care of a subset of lists
|
|
180
|
+
#pragma omp for
|
|
181
|
+
for (size_t i = 0; i < n; i++) {
|
|
182
|
+
int64_t list_no = list_nos [i];
|
|
183
|
+
|
|
184
|
+
if (list_no >= 0) {
|
|
185
|
+
const float *c;
|
|
186
|
+
if (threshold_type == Thresh_global) {
|
|
187
|
+
c = zero.data();
|
|
188
|
+
} else {
|
|
189
|
+
c = trained.data() + list_no * nbit;
|
|
190
|
+
}
|
|
191
|
+
binarize_with_freq (nbit, freq,
|
|
192
|
+
x.get() + i * nbit, c,
|
|
193
|
+
codes + i * code_size) ;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
namespace {
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
template<class HammingComputer>
|
|
203
|
+
struct IVFScanner: InvertedListScanner {
|
|
204
|
+
|
|
205
|
+
// copied from index structure
|
|
206
|
+
const IndexIVFSpectralHash *index;
|
|
207
|
+
size_t code_size;
|
|
208
|
+
size_t nbit;
|
|
209
|
+
bool store_pairs;
|
|
210
|
+
|
|
211
|
+
float period, freq;
|
|
212
|
+
std::vector<float> q;
|
|
213
|
+
std::vector<float> zero;
|
|
214
|
+
std::vector<uint8_t> qcode;
|
|
215
|
+
HammingComputer hc;
|
|
216
|
+
|
|
217
|
+
using idx_t = Index::idx_t;
|
|
218
|
+
|
|
219
|
+
IVFScanner (const IndexIVFSpectralHash * index,
|
|
220
|
+
bool store_pairs):
|
|
221
|
+
index (index),
|
|
222
|
+
code_size(index->code_size),
|
|
223
|
+
nbit(index->nbit),
|
|
224
|
+
store_pairs(store_pairs),
|
|
225
|
+
period(index->period), freq(2.0 / index->period),
|
|
226
|
+
q(nbit), zero(nbit), qcode(code_size),
|
|
227
|
+
hc(qcode.data(), code_size)
|
|
228
|
+
{
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
void set_query (const float *query) override {
|
|
233
|
+
FAISS_THROW_IF_NOT(query);
|
|
234
|
+
FAISS_THROW_IF_NOT(q.size() == nbit);
|
|
235
|
+
index->vt->apply_noalloc (1, query, q.data());
|
|
236
|
+
|
|
237
|
+
if (index->threshold_type ==
|
|
238
|
+
IndexIVFSpectralHash::Thresh_global) {
|
|
239
|
+
binarize_with_freq
|
|
240
|
+
(nbit, freq, q.data(), zero.data(), qcode.data());
|
|
241
|
+
hc.set (qcode.data(), code_size);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
idx_t list_no;
|
|
246
|
+
|
|
247
|
+
void set_list (idx_t list_no, float /*coarse_dis*/) override {
|
|
248
|
+
this->list_no = list_no;
|
|
249
|
+
if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
|
|
250
|
+
const float *c = index->trained.data() + list_no * nbit;
|
|
251
|
+
binarize_with_freq (nbit, freq, q.data(), c, qcode.data());
|
|
252
|
+
hc.set (qcode.data(), code_size);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
float distance_to_code (const uint8_t *code) const final {
|
|
257
|
+
return hc.hamming (code);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
size_t scan_codes (size_t list_size,
|
|
261
|
+
const uint8_t *codes,
|
|
262
|
+
const idx_t *ids,
|
|
263
|
+
float *simi, idx_t *idxi,
|
|
264
|
+
size_t k) const override
|
|
265
|
+
{
|
|
266
|
+
size_t nup = 0;
|
|
267
|
+
for (size_t j = 0; j < list_size; j++) {
|
|
268
|
+
|
|
269
|
+
float dis = hc.hamming (codes);
|
|
270
|
+
|
|
271
|
+
if (dis < simi [0]) {
|
|
272
|
+
maxheap_pop (k, simi, idxi);
|
|
273
|
+
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
|
274
|
+
maxheap_push (k, simi, idxi, dis, id);
|
|
275
|
+
nup++;
|
|
276
|
+
}
|
|
277
|
+
codes += code_size;
|
|
278
|
+
}
|
|
279
|
+
return nup;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
void scan_codes_range (size_t list_size,
|
|
283
|
+
const uint8_t *codes,
|
|
284
|
+
const idx_t *ids,
|
|
285
|
+
float radius,
|
|
286
|
+
RangeQueryResult & res) const override
|
|
287
|
+
{
|
|
288
|
+
for (size_t j = 0; j < list_size; j++) {
|
|
289
|
+
float dis = hc.hamming (codes);
|
|
290
|
+
if (dis < radius) {
|
|
291
|
+
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
|
292
|
+
res.add (dis, id);
|
|
293
|
+
}
|
|
294
|
+
codes += code_size;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
} // anonymous namespace
|
|
302
|
+
|
|
303
|
+
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
|
|
304
|
+
(bool store_pairs) const
|
|
305
|
+
{
|
|
306
|
+
switch (code_size) {
|
|
307
|
+
#define HANDLE_CODE_SIZE(cs) \
|
|
308
|
+
case cs: \
|
|
309
|
+
return new IVFScanner<HammingComputer ## cs> (this, store_pairs)
|
|
310
|
+
HANDLE_CODE_SIZE(4);
|
|
311
|
+
HANDLE_CODE_SIZE(8);
|
|
312
|
+
HANDLE_CODE_SIZE(16);
|
|
313
|
+
HANDLE_CODE_SIZE(20);
|
|
314
|
+
HANDLE_CODE_SIZE(32);
|
|
315
|
+
HANDLE_CODE_SIZE(64);
|
|
316
|
+
#undef HANDLE_CODE_SIZE
|
|
317
|
+
default:
|
|
318
|
+
if (code_size % 8 == 0) {
|
|
319
|
+
return new IVFScanner<HammingComputerM8>(this, store_pairs);
|
|
320
|
+
} else if (code_size % 4 == 0) {
|
|
321
|
+
return new IVFScanner<HammingComputerM4>(this, store_pairs);
|
|
322
|
+
} else {
|
|
323
|
+
FAISS_THROW_MSG("not supported");
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
} // namespace faiss
|