faiss 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -24,9 +24,6 @@
|
|
24
24
|
#include <sys/stat.h>
|
25
25
|
#include <sys/types.h>
|
26
26
|
|
27
|
-
#ifdef __SSE__
|
28
|
-
#endif
|
29
|
-
|
30
27
|
#include <faiss/Index2Layer.h>
|
31
28
|
#include <faiss/IndexFlat.h>
|
32
29
|
#include <faiss/IndexIVFPQ.h>
|
@@ -35,6 +32,7 @@
|
|
35
32
|
#include <faiss/utils/Heap.h>
|
36
33
|
#include <faiss/utils/distances.h>
|
37
34
|
#include <faiss/utils/random.h>
|
35
|
+
#include <faiss/utils/sorting.h>
|
38
36
|
|
39
37
|
extern "C" {
|
40
38
|
|
@@ -58,7 +56,6 @@ int sgemm_(
|
|
58
56
|
|
59
57
|
namespace faiss {
|
60
58
|
|
61
|
-
using idx_t = Index::idx_t;
|
62
59
|
using MinimaxHeap = HNSW::MinimaxHeap;
|
63
60
|
using storage_idx_t = HNSW::storage_idx_t;
|
64
61
|
using NodeDistFarther = HNSW::NodeDistFarther;
|
@@ -101,7 +98,7 @@ struct NegativeDistanceComputer : DistanceComputer {
|
|
101
98
|
};
|
102
99
|
|
103
100
|
DistanceComputer* storage_distance_computer(const Index* storage) {
|
104
|
-
if (storage->metric_type
|
101
|
+
if (is_similarity_metric(storage->metric_type)) {
|
105
102
|
return new NegativeDistanceComputer(storage->get_distance_computer());
|
106
103
|
} else {
|
107
104
|
return storage->get_distance_computer();
|
@@ -349,7 +346,7 @@ void IndexHNSW::search(
|
|
349
346
|
InterruptCallback::check();
|
350
347
|
}
|
351
348
|
|
352
|
-
if (metric_type
|
349
|
+
if (is_similarity_metric(metric_type)) {
|
353
350
|
// we need to revert the negated distances
|
354
351
|
for (size_t i = 0; i < k * n; i++) {
|
355
352
|
distances[i] = -distances[i];
|
@@ -64,7 +64,7 @@ template <typename IndexT>
|
|
64
64
|
void IndexIDMapTemplate<IndexT>::add_with_ids(
|
65
65
|
idx_t n,
|
66
66
|
const typename IndexT::component_t* x,
|
67
|
-
const
|
67
|
+
const idx_t* xids) {
|
68
68
|
index->add(n, x);
|
69
69
|
for (idx_t i = 0; i < n; i++)
|
70
70
|
id_map.push_back(xids[i]);
|
@@ -77,7 +77,7 @@ void IndexIDMapTemplate<IndexT>::search(
|
|
77
77
|
const typename IndexT::component_t* x,
|
78
78
|
idx_t k,
|
79
79
|
typename IndexT::distance_t* distances,
|
80
|
-
|
80
|
+
idx_t* labels,
|
81
81
|
const SearchParameters* params) const {
|
82
82
|
FAISS_THROW_IF_NOT_MSG(
|
83
83
|
!params, "search params not supported for this index");
|
@@ -91,7 +91,7 @@ void IndexIDMapTemplate<IndexT>::search(
|
|
91
91
|
|
92
92
|
template <typename IndexT>
|
93
93
|
void IndexIDMapTemplate<IndexT>::range_search(
|
94
|
-
|
94
|
+
idx_t n,
|
95
95
|
const typename IndexT::component_t* x,
|
96
96
|
typename IndexT::distance_t radius,
|
97
97
|
RangeSearchResult* result,
|
@@ -182,7 +182,7 @@ template <typename IndexT>
|
|
182
182
|
void IndexIDMap2Template<IndexT>::add_with_ids(
|
183
183
|
idx_t n,
|
184
184
|
const typename IndexT::component_t* x,
|
185
|
-
const
|
185
|
+
const idx_t* xids) {
|
186
186
|
size_t prev_ntotal = this->ntotal;
|
187
187
|
IndexIDMapTemplate<IndexT>::add_with_ids(n, x, xids);
|
188
188
|
for (size_t i = prev_ntotal; i < this->ntotal; i++) {
|
@@ -18,7 +18,6 @@ namespace faiss {
|
|
18
18
|
/** Index that translates search results to ids */
|
19
19
|
template <typename IndexT>
|
20
20
|
struct IndexIDMapTemplate : IndexT {
|
21
|
-
using idx_t = typename IndexT::idx_t;
|
22
21
|
using component_t = typename IndexT::component_t;
|
23
22
|
using distance_t = typename IndexT::distance_t;
|
24
23
|
|
@@ -74,7 +73,6 @@ using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
|
|
74
73
|
* implementation via a 2-way index */
|
75
74
|
template <typename IndexT>
|
76
75
|
struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
|
77
|
-
using idx_t = typename IndexT::idx_t;
|
78
76
|
using component_t = typename IndexT::component_t;
|
79
77
|
using distance_t = typename IndexT::distance_t;
|
80
78
|
|
@@ -10,11 +10,13 @@
|
|
10
10
|
#include <faiss/IndexIVF.h>
|
11
11
|
|
12
12
|
#include <omp.h>
|
13
|
+
#include <cstdint>
|
13
14
|
#include <mutex>
|
14
15
|
|
15
16
|
#include <algorithm>
|
16
17
|
#include <cinttypes>
|
17
18
|
#include <cstdio>
|
19
|
+
#include <limits>
|
18
20
|
#include <memory>
|
19
21
|
|
20
22
|
#include <faiss/utils/hamming.h>
|
@@ -22,6 +24,7 @@
|
|
22
24
|
|
23
25
|
#include <faiss/IndexFlat.h>
|
24
26
|
#include <faiss/impl/AuxIndexStructures.h>
|
27
|
+
#include <faiss/impl/CodePacker.h>
|
25
28
|
#include <faiss/impl/FaissAssert.h>
|
26
29
|
#include <faiss/impl/IDSelector.h>
|
27
30
|
|
@@ -35,27 +38,19 @@ using ScopedCodes = InvertedLists::ScopedCodes;
|
|
35
38
|
******************************************/
|
36
39
|
|
37
40
|
Level1Quantizer::Level1Quantizer(Index* quantizer, size_t nlist)
|
38
|
-
: quantizer(quantizer),
|
39
|
-
nlist(nlist),
|
40
|
-
quantizer_trains_alone(0),
|
41
|
-
own_fields(false),
|
42
|
-
clustering_index(nullptr) {
|
41
|
+
: quantizer(quantizer), nlist(nlist) {
|
43
42
|
// here we set a low # iterations because this is typically used
|
44
43
|
// for large clusterings (nb this is not used for the MultiIndex,
|
45
44
|
// for which quantizer_trains_alone = true)
|
46
45
|
cp.niter = 10;
|
47
46
|
}
|
48
47
|
|
49
|
-
Level1Quantizer::Level1Quantizer()
|
50
|
-
: quantizer(nullptr),
|
51
|
-
nlist(0),
|
52
|
-
quantizer_trains_alone(0),
|
53
|
-
own_fields(false),
|
54
|
-
clustering_index(nullptr) {}
|
48
|
+
Level1Quantizer::Level1Quantizer() {}
|
55
49
|
|
56
50
|
Level1Quantizer::~Level1Quantizer() {
|
57
|
-
if (own_fields)
|
51
|
+
if (own_fields) {
|
58
52
|
delete quantizer;
|
53
|
+
}
|
59
54
|
}
|
60
55
|
|
61
56
|
void Level1Quantizer::train_q1(
|
@@ -131,7 +126,7 @@ size_t Level1Quantizer::coarse_code_size() const {
|
|
131
126
|
return nbyte;
|
132
127
|
}
|
133
128
|
|
134
|
-
void Level1Quantizer::encode_listno(
|
129
|
+
void Level1Quantizer::encode_listno(idx_t list_no, uint8_t* code) const {
|
135
130
|
// little endian
|
136
131
|
size_t nl = nlist - 1;
|
137
132
|
while (nl > 0) {
|
@@ -141,7 +136,7 @@ void Level1Quantizer::encode_listno(Index::idx_t list_no, uint8_t* code) const {
|
|
141
136
|
}
|
142
137
|
}
|
143
138
|
|
144
|
-
|
139
|
+
idx_t Level1Quantizer::decode_listno(const uint8_t* code) const {
|
145
140
|
size_t nl = nlist - 1;
|
146
141
|
int64_t list_no = 0;
|
147
142
|
int nbit = 0;
|
@@ -165,13 +160,10 @@ IndexIVF::IndexIVF(
|
|
165
160
|
size_t code_size,
|
166
161
|
MetricType metric)
|
167
162
|
: Index(d, metric),
|
168
|
-
|
163
|
+
IndexIVFInterface(quantizer, nlist),
|
169
164
|
invlists(new ArrayInvertedLists(nlist, code_size)),
|
170
165
|
own_invlists(true),
|
171
|
-
code_size(code_size)
|
172
|
-
nprobe(1),
|
173
|
-
max_codes(0),
|
174
|
-
parallel_mode(0) {
|
166
|
+
code_size(code_size) {
|
175
167
|
FAISS_THROW_IF_NOT(d == quantizer->d);
|
176
168
|
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
|
177
169
|
// Spherical by default if the metric is inner_product
|
@@ -180,13 +172,7 @@ IndexIVF::IndexIVF(
|
|
180
172
|
}
|
181
173
|
}
|
182
174
|
|
183
|
-
IndexIVF::IndexIVF()
|
184
|
-
: invlists(nullptr),
|
185
|
-
own_invlists(false),
|
186
|
-
code_size(0),
|
187
|
-
nprobe(1),
|
188
|
-
max_codes(0),
|
189
|
-
parallel_mode(0) {}
|
175
|
+
IndexIVF::IndexIVF() {}
|
190
176
|
|
191
177
|
void IndexIVF::add(idx_t n, const float* x) {
|
192
178
|
add_with_ids(n, x, nullptr);
|
@@ -412,6 +398,7 @@ void IndexIVF::search_preassigned(
|
|
412
398
|
nprobe = std::min((idx_t)nlist, nprobe);
|
413
399
|
FAISS_THROW_IF_NOT(nprobe > 0);
|
414
400
|
|
401
|
+
const idx_t unlimited_list_size = std::numeric_limits<idx_t>::max();
|
415
402
|
idx_t max_codes = params ? params->max_codes : this->max_codes;
|
416
403
|
IDSelector* sel = params ? params->sel : nullptr;
|
417
404
|
const IDSelectorRange* selr = dynamic_cast<const IDSelectorRange*>(sel);
|
@@ -427,6 +414,10 @@ void IndexIVF::search_preassigned(
|
|
427
414
|
!(sel && store_pairs),
|
428
415
|
"selector and store_pairs cannot be combined");
|
429
416
|
|
417
|
+
FAISS_THROW_IF_NOT_MSG(
|
418
|
+
!invlists->use_iterator || (max_codes == 0 && store_pairs == false),
|
419
|
+
"iterable inverted lists don't support max_codes and store_pairs");
|
420
|
+
|
430
421
|
size_t nlistv = 0, ndis = 0, nheap = 0;
|
431
422
|
|
432
423
|
using HeapForIP = CMin<float, idx_t>;
|
@@ -439,6 +430,14 @@ void IndexIVF::search_preassigned(
|
|
439
430
|
int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
|
440
431
|
bool do_heap_init = !(this->parallel_mode & PARALLEL_MODE_NO_HEAP_INIT);
|
441
432
|
|
433
|
+
FAISS_THROW_IF_NOT_MSG(
|
434
|
+
max_codes == 0 || pmode == 0 || pmode == 3,
|
435
|
+
"max_codes supported only for parallel_mode = 0 or 3");
|
436
|
+
|
437
|
+
if (max_codes == 0) {
|
438
|
+
max_codes = unlimited_list_size;
|
439
|
+
}
|
440
|
+
|
442
441
|
bool do_parallel = omp_get_max_threads() >= 2 &&
|
443
442
|
(pmode == 0 ? false
|
444
443
|
: pmode == 3 ? n > 1
|
@@ -457,7 +456,7 @@ void IndexIVF::search_preassigned(
|
|
457
456
|
* that are in common between the two
|
458
457
|
******************************************************/
|
459
458
|
|
460
|
-
//
|
459
|
+
// initialize + reorder a result heap
|
461
460
|
|
462
461
|
auto init_result = [&](float* simi, idx_t* idxi) {
|
463
462
|
if (!do_heap_init)
|
@@ -495,7 +494,8 @@ void IndexIVF::search_preassigned(
|
|
495
494
|
auto scan_one_list = [&](idx_t key,
|
496
495
|
float coarse_dis_i,
|
497
496
|
float* simi,
|
498
|
-
idx_t* idxi
|
497
|
+
idx_t* idxi,
|
498
|
+
idx_t list_size_max) {
|
499
499
|
if (key < 0) {
|
500
500
|
// not enough centroids for multiprobe
|
501
501
|
return (size_t)0;
|
@@ -506,10 +506,8 @@ void IndexIVF::search_preassigned(
|
|
506
506
|
key,
|
507
507
|
nlist);
|
508
508
|
|
509
|
-
size_t list_size = invlists->list_size(key);
|
510
|
-
|
511
509
|
// don't waste time on empty lists
|
512
|
-
if (
|
510
|
+
if (invlists->is_empty(key)) {
|
513
511
|
return (size_t)0;
|
514
512
|
}
|
515
513
|
|
@@ -518,32 +516,51 @@ void IndexIVF::search_preassigned(
|
|
518
516
|
nlistv++;
|
519
517
|
|
520
518
|
try {
|
521
|
-
|
522
|
-
|
519
|
+
if (invlists->use_iterator) {
|
520
|
+
size_t list_size = 0;
|
523
521
|
|
524
|
-
|
525
|
-
|
522
|
+
std::unique_ptr<InvertedListsIterator> it(
|
523
|
+
invlists->get_iterator(key));
|
526
524
|
|
527
|
-
|
528
|
-
|
529
|
-
ids = sids->get();
|
530
|
-
}
|
525
|
+
nheap += scanner->iterate_codes(
|
526
|
+
it.get(), simi, idxi, k, list_size);
|
531
527
|
|
532
|
-
|
533
|
-
|
534
|
-
size_t
|
535
|
-
|
536
|
-
|
537
|
-
if (list_size == 0) {
|
538
|
-
return (size_t)0;
|
528
|
+
return list_size;
|
529
|
+
} else {
|
530
|
+
size_t list_size = invlists->list_size(key);
|
531
|
+
if (list_size > list_size_max) {
|
532
|
+
list_size = list_size_max;
|
539
533
|
}
|
540
|
-
codes += jmin * code_size;
|
541
|
-
ids += jmin;
|
542
|
-
}
|
543
534
|
|
544
|
-
|
545
|
-
|
535
|
+
InvertedLists::ScopedCodes scodes(invlists, key);
|
536
|
+
const uint8_t* codes = scodes.get();
|
537
|
+
|
538
|
+
std::unique_ptr<InvertedLists::ScopedIds> sids;
|
539
|
+
const idx_t* ids = nullptr;
|
546
540
|
|
541
|
+
if (!store_pairs) {
|
542
|
+
sids.reset(new InvertedLists::ScopedIds(invlists, key));
|
543
|
+
ids = sids->get();
|
544
|
+
}
|
545
|
+
|
546
|
+
if (selr) { // IDSelectorRange
|
547
|
+
// restrict search to a section of the inverted list
|
548
|
+
size_t jmin, jmax;
|
549
|
+
selr->find_sorted_ids_bounds(
|
550
|
+
list_size, ids, &jmin, &jmax);
|
551
|
+
list_size = jmax - jmin;
|
552
|
+
if (list_size == 0) {
|
553
|
+
return (size_t)0;
|
554
|
+
}
|
555
|
+
codes += jmin * code_size;
|
556
|
+
ids += jmin;
|
557
|
+
}
|
558
|
+
|
559
|
+
nheap += scanner->scan_codes(
|
560
|
+
list_size, codes, ids, simi, idxi, k);
|
561
|
+
|
562
|
+
return list_size;
|
563
|
+
}
|
547
564
|
} catch (const std::exception& e) {
|
548
565
|
std::lock_guard<std::mutex> lock(exception_mutex);
|
549
566
|
exception_string =
|
@@ -551,8 +568,6 @@ void IndexIVF::search_preassigned(
|
|
551
568
|
interrupt = true;
|
552
569
|
return size_t(0);
|
553
570
|
}
|
554
|
-
|
555
|
-
return list_size;
|
556
571
|
};
|
557
572
|
|
558
573
|
/****************************************************
|
@@ -581,9 +596,9 @@ void IndexIVF::search_preassigned(
|
|
581
596
|
keys[i * nprobe + ik],
|
582
597
|
coarse_dis[i * nprobe + ik],
|
583
598
|
simi,
|
584
|
-
idxi
|
585
|
-
|
586
|
-
if (
|
599
|
+
idxi,
|
600
|
+
max_codes - nscan);
|
601
|
+
if (nscan >= max_codes) {
|
587
602
|
break;
|
588
603
|
}
|
589
604
|
}
|
@@ -610,7 +625,8 @@ void IndexIVF::search_preassigned(
|
|
610
625
|
keys[i * nprobe + ik],
|
611
626
|
coarse_dis[i * nprobe + ik],
|
612
627
|
local_dis.data(),
|
613
|
-
local_idx.data()
|
628
|
+
local_idx.data(),
|
629
|
+
unlimited_list_size);
|
614
630
|
|
615
631
|
// can't do the test on max_codes
|
616
632
|
}
|
@@ -651,7 +667,8 @@ void IndexIVF::search_preassigned(
|
|
651
667
|
keys[ij],
|
652
668
|
coarse_dis[ij],
|
653
669
|
local_dis.data(),
|
654
|
-
local_idx.data()
|
670
|
+
local_idx.data(),
|
671
|
+
unlimited_list_size);
|
655
672
|
#pragma omp critical
|
656
673
|
{
|
657
674
|
add_local_results(
|
@@ -744,6 +761,10 @@ void IndexIVF::range_search_preassigned(
|
|
744
761
|
idx_t max_codes = params ? params->max_codes : this->max_codes;
|
745
762
|
IDSelector* sel = params ? params->sel : nullptr;
|
746
763
|
|
764
|
+
FAISS_THROW_IF_NOT_MSG(
|
765
|
+
!invlists->use_iterator || (max_codes == 0 && store_pairs == false),
|
766
|
+
"iterable inverted lists don't support max_codes and store_pairs");
|
767
|
+
|
747
768
|
size_t nlistv = 0, ndis = 0;
|
748
769
|
|
749
770
|
bool interrupt = false;
|
@@ -780,21 +801,30 @@ void IndexIVF::range_search_preassigned(
|
|
780
801
|
key,
|
781
802
|
ik,
|
782
803
|
nlist);
|
783
|
-
const size_t list_size = invlists->list_size(key);
|
784
804
|
|
785
|
-
if (
|
805
|
+
if (invlists->is_empty(key)) {
|
786
806
|
return;
|
807
|
+
}
|
787
808
|
|
788
809
|
try {
|
789
|
-
|
790
|
-
InvertedLists::ScopedIds ids(invlists, key);
|
791
|
-
|
810
|
+
size_t list_size = 0;
|
792
811
|
scanner->set_list(key, coarse_dis[i * nprobe + ik]);
|
812
|
+
if (invlists->use_iterator) {
|
813
|
+
std::unique_ptr<InvertedListsIterator> it(
|
814
|
+
invlists->get_iterator(key));
|
815
|
+
|
816
|
+
scanner->iterate_codes_range(
|
817
|
+
it.get(), radius, qres, list_size);
|
818
|
+
} else {
|
819
|
+
InvertedLists::ScopedCodes scodes(invlists, key);
|
820
|
+
InvertedLists::ScopedIds ids(invlists, key);
|
821
|
+
list_size = invlists->list_size(key);
|
822
|
+
|
823
|
+
scanner->scan_codes_range(
|
824
|
+
list_size, scodes.get(), ids.get(), radius, qres);
|
825
|
+
}
|
793
826
|
nlistv++;
|
794
827
|
ndis += list_size;
|
795
|
-
scanner->scan_codes_range(
|
796
|
-
list_size, scodes.get(), ids.get(), radius, qres);
|
797
|
-
|
798
828
|
} catch (const std::exception& e) {
|
799
829
|
std::lock_guard<std::mutex> lock(exception_mutex);
|
800
830
|
exception_string =
|
@@ -1086,6 +1116,10 @@ void IndexIVF::merge_from(Index& otherIndex, idx_t add_id) {
|
|
1086
1116
|
other->ntotal = 0;
|
1087
1117
|
}
|
1088
1118
|
|
1119
|
+
CodePacker* IndexIVF::get_CodePacker() const {
|
1120
|
+
return new CodePackerFlat(code_size);
|
1121
|
+
}
|
1122
|
+
|
1089
1123
|
void IndexIVF::replace_invlists(InvertedLists* il, bool own) {
|
1090
1124
|
if (own_invlists) {
|
1091
1125
|
delete invlists;
|
@@ -1104,71 +1138,11 @@ void IndexIVF::replace_invlists(InvertedLists* il, bool own) {
|
|
1104
1138
|
|
1105
1139
|
void IndexIVF::copy_subset_to(
|
1106
1140
|
IndexIVF& other,
|
1107
|
-
|
1141
|
+
InvertedLists::subset_type_t subset_type,
|
1108
1142
|
idx_t a1,
|
1109
1143
|
idx_t a2) const {
|
1110
|
-
|
1111
|
-
|
1112
|
-
FAISS_THROW_IF_NOT(other.direct_map.no());
|
1113
|
-
FAISS_THROW_IF_NOT_FMT(
|
1114
|
-
subset_type == 0 || subset_type == 1 || subset_type == 2,
|
1115
|
-
"subset type %d not implemented",
|
1116
|
-
subset_type);
|
1117
|
-
|
1118
|
-
size_t accu_n = 0;
|
1119
|
-
size_t accu_a1 = 0;
|
1120
|
-
size_t accu_a2 = 0;
|
1121
|
-
|
1122
|
-
InvertedLists* oivf = other.invlists;
|
1123
|
-
|
1124
|
-
for (idx_t list_no = 0; list_no < nlist; list_no++) {
|
1125
|
-
size_t n = invlists->list_size(list_no);
|
1126
|
-
ScopedIds ids_in(invlists, list_no);
|
1127
|
-
|
1128
|
-
if (subset_type == 0) {
|
1129
|
-
for (idx_t i = 0; i < n; i++) {
|
1130
|
-
idx_t id = ids_in[i];
|
1131
|
-
if (a1 <= id && id < a2) {
|
1132
|
-
oivf->add_entry(
|
1133
|
-
list_no,
|
1134
|
-
invlists->get_single_id(list_no, i),
|
1135
|
-
ScopedCodes(invlists, list_no, i).get());
|
1136
|
-
other.ntotal++;
|
1137
|
-
}
|
1138
|
-
}
|
1139
|
-
} else if (subset_type == 1) {
|
1140
|
-
for (idx_t i = 0; i < n; i++) {
|
1141
|
-
idx_t id = ids_in[i];
|
1142
|
-
if (id % a1 == a2) {
|
1143
|
-
oivf->add_entry(
|
1144
|
-
list_no,
|
1145
|
-
invlists->get_single_id(list_no, i),
|
1146
|
-
ScopedCodes(invlists, list_no, i).get());
|
1147
|
-
other.ntotal++;
|
1148
|
-
}
|
1149
|
-
}
|
1150
|
-
} else if (subset_type == 2) {
|
1151
|
-
// see what is allocated to a1 and to a2
|
1152
|
-
size_t next_accu_n = accu_n + n;
|
1153
|
-
size_t next_accu_a1 = next_accu_n * a1 / ntotal;
|
1154
|
-
size_t i1 = next_accu_a1 - accu_a1;
|
1155
|
-
size_t next_accu_a2 = next_accu_n * a2 / ntotal;
|
1156
|
-
size_t i2 = next_accu_a2 - accu_a2;
|
1157
|
-
|
1158
|
-
for (idx_t i = i1; i < i2; i++) {
|
1159
|
-
oivf->add_entry(
|
1160
|
-
list_no,
|
1161
|
-
invlists->get_single_id(list_no, i),
|
1162
|
-
ScopedCodes(invlists, list_no, i).get());
|
1163
|
-
}
|
1164
|
-
|
1165
|
-
other.ntotal += i2 - i1;
|
1166
|
-
accu_a1 = next_accu_a1;
|
1167
|
-
accu_a2 = next_accu_a2;
|
1168
|
-
}
|
1169
|
-
accu_n += n;
|
1170
|
-
}
|
1171
|
-
FAISS_ASSERT(accu_n == ntotal);
|
1144
|
+
other.ntotal +=
|
1145
|
+
invlists->copy_subset_to(*other.invlists, subset_type, a1, a2);
|
1172
1146
|
}
|
1173
1147
|
|
1174
1148
|
IndexIVF::~IndexIVF() {
|
@@ -1233,6 +1207,39 @@ size_t InvertedListScanner::scan_codes(
|
|
1233
1207
|
return nup;
|
1234
1208
|
}
|
1235
1209
|
|
1210
|
+
size_t InvertedListScanner::iterate_codes(
|
1211
|
+
InvertedListsIterator* it,
|
1212
|
+
float* simi,
|
1213
|
+
idx_t* idxi,
|
1214
|
+
size_t k,
|
1215
|
+
size_t& list_size) const {
|
1216
|
+
size_t nup = 0;
|
1217
|
+
list_size = 0;
|
1218
|
+
|
1219
|
+
if (!keep_max) {
|
1220
|
+
for (; it->is_available(); it->next()) {
|
1221
|
+
auto id_and_codes = it->get_id_and_codes();
|
1222
|
+
float dis = distance_to_code(id_and_codes.second);
|
1223
|
+
if (dis < simi[0]) {
|
1224
|
+
maxheap_replace_top(k, simi, idxi, dis, id_and_codes.first);
|
1225
|
+
nup++;
|
1226
|
+
}
|
1227
|
+
list_size++;
|
1228
|
+
}
|
1229
|
+
} else {
|
1230
|
+
for (; it->is_available(); it->next()) {
|
1231
|
+
auto id_and_codes = it->get_id_and_codes();
|
1232
|
+
float dis = distance_to_code(id_and_codes.second);
|
1233
|
+
if (dis > simi[0]) {
|
1234
|
+
minheap_replace_top(k, simi, idxi, dis, id_and_codes.first);
|
1235
|
+
nup++;
|
1236
|
+
}
|
1237
|
+
list_size++;
|
1238
|
+
}
|
1239
|
+
}
|
1240
|
+
return nup;
|
1241
|
+
}
|
1242
|
+
|
1236
1243
|
void InvertedListScanner::scan_codes_range(
|
1237
1244
|
size_t list_size,
|
1238
1245
|
const uint8_t* codes,
|
@@ -1252,4 +1259,23 @@ void InvertedListScanner::scan_codes_range(
|
|
1252
1259
|
}
|
1253
1260
|
}
|
1254
1261
|
|
1262
|
+
void InvertedListScanner::iterate_codes_range(
|
1263
|
+
InvertedListsIterator* it,
|
1264
|
+
float radius,
|
1265
|
+
RangeQueryResult& res,
|
1266
|
+
size_t& list_size) const {
|
1267
|
+
list_size = 0;
|
1268
|
+
for (; it->is_available(); it->next()) {
|
1269
|
+
auto id_and_codes = it->get_id_and_codes();
|
1270
|
+
float dis = distance_to_code(id_and_codes.second);
|
1271
|
+
bool keep = !keep_max
|
1272
|
+
? dis < radius
|
1273
|
+
: dis > radius; // TODO templatize to remove this test
|
1274
|
+
if (keep) {
|
1275
|
+
res.add(dis, id_and_codes.first);
|
1276
|
+
}
|
1277
|
+
list_size++;
|
1278
|
+
}
|
1279
|
+
}
|
1280
|
+
|
1255
1281
|
} // namespace faiss
|