faiss 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -24,9 +24,6 @@
|
|
|
24
24
|
#include <sys/stat.h>
|
|
25
25
|
#include <sys/types.h>
|
|
26
26
|
|
|
27
|
-
#ifdef __SSE__
|
|
28
|
-
#endif
|
|
29
|
-
|
|
30
27
|
#include <faiss/Index2Layer.h>
|
|
31
28
|
#include <faiss/IndexFlat.h>
|
|
32
29
|
#include <faiss/IndexIVFPQ.h>
|
|
@@ -35,6 +32,7 @@
|
|
|
35
32
|
#include <faiss/utils/Heap.h>
|
|
36
33
|
#include <faiss/utils/distances.h>
|
|
37
34
|
#include <faiss/utils/random.h>
|
|
35
|
+
#include <faiss/utils/sorting.h>
|
|
38
36
|
|
|
39
37
|
extern "C" {
|
|
40
38
|
|
|
@@ -58,7 +56,6 @@ int sgemm_(
|
|
|
58
56
|
|
|
59
57
|
namespace faiss {
|
|
60
58
|
|
|
61
|
-
using idx_t = Index::idx_t;
|
|
62
59
|
using MinimaxHeap = HNSW::MinimaxHeap;
|
|
63
60
|
using storage_idx_t = HNSW::storage_idx_t;
|
|
64
61
|
using NodeDistFarther = HNSW::NodeDistFarther;
|
|
@@ -101,7 +98,7 @@ struct NegativeDistanceComputer : DistanceComputer {
|
|
|
101
98
|
};
|
|
102
99
|
|
|
103
100
|
DistanceComputer* storage_distance_computer(const Index* storage) {
|
|
104
|
-
if (storage->metric_type
|
|
101
|
+
if (is_similarity_metric(storage->metric_type)) {
|
|
105
102
|
return new NegativeDistanceComputer(storage->get_distance_computer());
|
|
106
103
|
} else {
|
|
107
104
|
return storage->get_distance_computer();
|
|
@@ -349,7 +346,7 @@ void IndexHNSW::search(
|
|
|
349
346
|
InterruptCallback::check();
|
|
350
347
|
}
|
|
351
348
|
|
|
352
|
-
if (metric_type
|
|
349
|
+
if (is_similarity_metric(metric_type)) {
|
|
353
350
|
// we need to revert the negated distances
|
|
354
351
|
for (size_t i = 0; i < k * n; i++) {
|
|
355
352
|
distances[i] = -distances[i];
|
|
@@ -64,7 +64,7 @@ template <typename IndexT>
|
|
|
64
64
|
void IndexIDMapTemplate<IndexT>::add_with_ids(
|
|
65
65
|
idx_t n,
|
|
66
66
|
const typename IndexT::component_t* x,
|
|
67
|
-
const
|
|
67
|
+
const idx_t* xids) {
|
|
68
68
|
index->add(n, x);
|
|
69
69
|
for (idx_t i = 0; i < n; i++)
|
|
70
70
|
id_map.push_back(xids[i]);
|
|
@@ -77,7 +77,7 @@ void IndexIDMapTemplate<IndexT>::search(
|
|
|
77
77
|
const typename IndexT::component_t* x,
|
|
78
78
|
idx_t k,
|
|
79
79
|
typename IndexT::distance_t* distances,
|
|
80
|
-
|
|
80
|
+
idx_t* labels,
|
|
81
81
|
const SearchParameters* params) const {
|
|
82
82
|
FAISS_THROW_IF_NOT_MSG(
|
|
83
83
|
!params, "search params not supported for this index");
|
|
@@ -91,7 +91,7 @@ void IndexIDMapTemplate<IndexT>::search(
|
|
|
91
91
|
|
|
92
92
|
template <typename IndexT>
|
|
93
93
|
void IndexIDMapTemplate<IndexT>::range_search(
|
|
94
|
-
|
|
94
|
+
idx_t n,
|
|
95
95
|
const typename IndexT::component_t* x,
|
|
96
96
|
typename IndexT::distance_t radius,
|
|
97
97
|
RangeSearchResult* result,
|
|
@@ -182,7 +182,7 @@ template <typename IndexT>
|
|
|
182
182
|
void IndexIDMap2Template<IndexT>::add_with_ids(
|
|
183
183
|
idx_t n,
|
|
184
184
|
const typename IndexT::component_t* x,
|
|
185
|
-
const
|
|
185
|
+
const idx_t* xids) {
|
|
186
186
|
size_t prev_ntotal = this->ntotal;
|
|
187
187
|
IndexIDMapTemplate<IndexT>::add_with_ids(n, x, xids);
|
|
188
188
|
for (size_t i = prev_ntotal; i < this->ntotal; i++) {
|
|
@@ -18,7 +18,6 @@ namespace faiss {
|
|
|
18
18
|
/** Index that translates search results to ids */
|
|
19
19
|
template <typename IndexT>
|
|
20
20
|
struct IndexIDMapTemplate : IndexT {
|
|
21
|
-
using idx_t = typename IndexT::idx_t;
|
|
22
21
|
using component_t = typename IndexT::component_t;
|
|
23
22
|
using distance_t = typename IndexT::distance_t;
|
|
24
23
|
|
|
@@ -74,7 +73,6 @@ using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
|
|
|
74
73
|
* implementation via a 2-way index */
|
|
75
74
|
template <typename IndexT>
|
|
76
75
|
struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
|
|
77
|
-
using idx_t = typename IndexT::idx_t;
|
|
78
76
|
using component_t = typename IndexT::component_t;
|
|
79
77
|
using distance_t = typename IndexT::distance_t;
|
|
80
78
|
|
|
@@ -10,11 +10,13 @@
|
|
|
10
10
|
#include <faiss/IndexIVF.h>
|
|
11
11
|
|
|
12
12
|
#include <omp.h>
|
|
13
|
+
#include <cstdint>
|
|
13
14
|
#include <mutex>
|
|
14
15
|
|
|
15
16
|
#include <algorithm>
|
|
16
17
|
#include <cinttypes>
|
|
17
18
|
#include <cstdio>
|
|
19
|
+
#include <limits>
|
|
18
20
|
#include <memory>
|
|
19
21
|
|
|
20
22
|
#include <faiss/utils/hamming.h>
|
|
@@ -22,6 +24,7 @@
|
|
|
22
24
|
|
|
23
25
|
#include <faiss/IndexFlat.h>
|
|
24
26
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
27
|
+
#include <faiss/impl/CodePacker.h>
|
|
25
28
|
#include <faiss/impl/FaissAssert.h>
|
|
26
29
|
#include <faiss/impl/IDSelector.h>
|
|
27
30
|
|
|
@@ -35,27 +38,19 @@ using ScopedCodes = InvertedLists::ScopedCodes;
|
|
|
35
38
|
******************************************/
|
|
36
39
|
|
|
37
40
|
Level1Quantizer::Level1Quantizer(Index* quantizer, size_t nlist)
|
|
38
|
-
: quantizer(quantizer),
|
|
39
|
-
nlist(nlist),
|
|
40
|
-
quantizer_trains_alone(0),
|
|
41
|
-
own_fields(false),
|
|
42
|
-
clustering_index(nullptr) {
|
|
41
|
+
: quantizer(quantizer), nlist(nlist) {
|
|
43
42
|
// here we set a low # iterations because this is typically used
|
|
44
43
|
// for large clusterings (nb this is not used for the MultiIndex,
|
|
45
44
|
// for which quantizer_trains_alone = true)
|
|
46
45
|
cp.niter = 10;
|
|
47
46
|
}
|
|
48
47
|
|
|
49
|
-
Level1Quantizer::Level1Quantizer()
|
|
50
|
-
: quantizer(nullptr),
|
|
51
|
-
nlist(0),
|
|
52
|
-
quantizer_trains_alone(0),
|
|
53
|
-
own_fields(false),
|
|
54
|
-
clustering_index(nullptr) {}
|
|
48
|
+
Level1Quantizer::Level1Quantizer() {}
|
|
55
49
|
|
|
56
50
|
Level1Quantizer::~Level1Quantizer() {
|
|
57
|
-
if (own_fields)
|
|
51
|
+
if (own_fields) {
|
|
58
52
|
delete quantizer;
|
|
53
|
+
}
|
|
59
54
|
}
|
|
60
55
|
|
|
61
56
|
void Level1Quantizer::train_q1(
|
|
@@ -131,7 +126,7 @@ size_t Level1Quantizer::coarse_code_size() const {
|
|
|
131
126
|
return nbyte;
|
|
132
127
|
}
|
|
133
128
|
|
|
134
|
-
void Level1Quantizer::encode_listno(
|
|
129
|
+
void Level1Quantizer::encode_listno(idx_t list_no, uint8_t* code) const {
|
|
135
130
|
// little endian
|
|
136
131
|
size_t nl = nlist - 1;
|
|
137
132
|
while (nl > 0) {
|
|
@@ -141,7 +136,7 @@ void Level1Quantizer::encode_listno(Index::idx_t list_no, uint8_t* code) const {
|
|
|
141
136
|
}
|
|
142
137
|
}
|
|
143
138
|
|
|
144
|
-
|
|
139
|
+
idx_t Level1Quantizer::decode_listno(const uint8_t* code) const {
|
|
145
140
|
size_t nl = nlist - 1;
|
|
146
141
|
int64_t list_no = 0;
|
|
147
142
|
int nbit = 0;
|
|
@@ -165,13 +160,10 @@ IndexIVF::IndexIVF(
|
|
|
165
160
|
size_t code_size,
|
|
166
161
|
MetricType metric)
|
|
167
162
|
: Index(d, metric),
|
|
168
|
-
|
|
163
|
+
IndexIVFInterface(quantizer, nlist),
|
|
169
164
|
invlists(new ArrayInvertedLists(nlist, code_size)),
|
|
170
165
|
own_invlists(true),
|
|
171
|
-
code_size(code_size)
|
|
172
|
-
nprobe(1),
|
|
173
|
-
max_codes(0),
|
|
174
|
-
parallel_mode(0) {
|
|
166
|
+
code_size(code_size) {
|
|
175
167
|
FAISS_THROW_IF_NOT(d == quantizer->d);
|
|
176
168
|
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
|
|
177
169
|
// Spherical by default if the metric is inner_product
|
|
@@ -180,13 +172,7 @@ IndexIVF::IndexIVF(
|
|
|
180
172
|
}
|
|
181
173
|
}
|
|
182
174
|
|
|
183
|
-
IndexIVF::IndexIVF()
|
|
184
|
-
: invlists(nullptr),
|
|
185
|
-
own_invlists(false),
|
|
186
|
-
code_size(0),
|
|
187
|
-
nprobe(1),
|
|
188
|
-
max_codes(0),
|
|
189
|
-
parallel_mode(0) {}
|
|
175
|
+
IndexIVF::IndexIVF() {}
|
|
190
176
|
|
|
191
177
|
void IndexIVF::add(idx_t n, const float* x) {
|
|
192
178
|
add_with_ids(n, x, nullptr);
|
|
@@ -412,6 +398,7 @@ void IndexIVF::search_preassigned(
|
|
|
412
398
|
nprobe = std::min((idx_t)nlist, nprobe);
|
|
413
399
|
FAISS_THROW_IF_NOT(nprobe > 0);
|
|
414
400
|
|
|
401
|
+
const idx_t unlimited_list_size = std::numeric_limits<idx_t>::max();
|
|
415
402
|
idx_t max_codes = params ? params->max_codes : this->max_codes;
|
|
416
403
|
IDSelector* sel = params ? params->sel : nullptr;
|
|
417
404
|
const IDSelectorRange* selr = dynamic_cast<const IDSelectorRange*>(sel);
|
|
@@ -427,6 +414,10 @@ void IndexIVF::search_preassigned(
|
|
|
427
414
|
!(sel && store_pairs),
|
|
428
415
|
"selector and store_pairs cannot be combined");
|
|
429
416
|
|
|
417
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
418
|
+
!invlists->use_iterator || (max_codes == 0 && store_pairs == false),
|
|
419
|
+
"iterable inverted lists don't support max_codes and store_pairs");
|
|
420
|
+
|
|
430
421
|
size_t nlistv = 0, ndis = 0, nheap = 0;
|
|
431
422
|
|
|
432
423
|
using HeapForIP = CMin<float, idx_t>;
|
|
@@ -439,6 +430,14 @@ void IndexIVF::search_preassigned(
|
|
|
439
430
|
int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
|
|
440
431
|
bool do_heap_init = !(this->parallel_mode & PARALLEL_MODE_NO_HEAP_INIT);
|
|
441
432
|
|
|
433
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
434
|
+
max_codes == 0 || pmode == 0 || pmode == 3,
|
|
435
|
+
"max_codes supported only for parallel_mode = 0 or 3");
|
|
436
|
+
|
|
437
|
+
if (max_codes == 0) {
|
|
438
|
+
max_codes = unlimited_list_size;
|
|
439
|
+
}
|
|
440
|
+
|
|
442
441
|
bool do_parallel = omp_get_max_threads() >= 2 &&
|
|
443
442
|
(pmode == 0 ? false
|
|
444
443
|
: pmode == 3 ? n > 1
|
|
@@ -457,7 +456,7 @@ void IndexIVF::search_preassigned(
|
|
|
457
456
|
* that are in common between the two
|
|
458
457
|
******************************************************/
|
|
459
458
|
|
|
460
|
-
//
|
|
459
|
+
// initialize + reorder a result heap
|
|
461
460
|
|
|
462
461
|
auto init_result = [&](float* simi, idx_t* idxi) {
|
|
463
462
|
if (!do_heap_init)
|
|
@@ -495,7 +494,8 @@ void IndexIVF::search_preassigned(
|
|
|
495
494
|
auto scan_one_list = [&](idx_t key,
|
|
496
495
|
float coarse_dis_i,
|
|
497
496
|
float* simi,
|
|
498
|
-
idx_t* idxi
|
|
497
|
+
idx_t* idxi,
|
|
498
|
+
idx_t list_size_max) {
|
|
499
499
|
if (key < 0) {
|
|
500
500
|
// not enough centroids for multiprobe
|
|
501
501
|
return (size_t)0;
|
|
@@ -506,10 +506,8 @@ void IndexIVF::search_preassigned(
|
|
|
506
506
|
key,
|
|
507
507
|
nlist);
|
|
508
508
|
|
|
509
|
-
size_t list_size = invlists->list_size(key);
|
|
510
|
-
|
|
511
509
|
// don't waste time on empty lists
|
|
512
|
-
if (
|
|
510
|
+
if (invlists->is_empty(key)) {
|
|
513
511
|
return (size_t)0;
|
|
514
512
|
}
|
|
515
513
|
|
|
@@ -518,32 +516,51 @@ void IndexIVF::search_preassigned(
|
|
|
518
516
|
nlistv++;
|
|
519
517
|
|
|
520
518
|
try {
|
|
521
|
-
|
|
522
|
-
|
|
519
|
+
if (invlists->use_iterator) {
|
|
520
|
+
size_t list_size = 0;
|
|
523
521
|
|
|
524
|
-
|
|
525
|
-
|
|
522
|
+
std::unique_ptr<InvertedListsIterator> it(
|
|
523
|
+
invlists->get_iterator(key));
|
|
526
524
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
ids = sids->get();
|
|
530
|
-
}
|
|
525
|
+
nheap += scanner->iterate_codes(
|
|
526
|
+
it.get(), simi, idxi, k, list_size);
|
|
531
527
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
size_t
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
if (list_size == 0) {
|
|
538
|
-
return (size_t)0;
|
|
528
|
+
return list_size;
|
|
529
|
+
} else {
|
|
530
|
+
size_t list_size = invlists->list_size(key);
|
|
531
|
+
if (list_size > list_size_max) {
|
|
532
|
+
list_size = list_size_max;
|
|
539
533
|
}
|
|
540
|
-
codes += jmin * code_size;
|
|
541
|
-
ids += jmin;
|
|
542
|
-
}
|
|
543
534
|
|
|
544
|
-
|
|
545
|
-
|
|
535
|
+
InvertedLists::ScopedCodes scodes(invlists, key);
|
|
536
|
+
const uint8_t* codes = scodes.get();
|
|
537
|
+
|
|
538
|
+
std::unique_ptr<InvertedLists::ScopedIds> sids;
|
|
539
|
+
const idx_t* ids = nullptr;
|
|
546
540
|
|
|
541
|
+
if (!store_pairs) {
|
|
542
|
+
sids.reset(new InvertedLists::ScopedIds(invlists, key));
|
|
543
|
+
ids = sids->get();
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
if (selr) { // IDSelectorRange
|
|
547
|
+
// restrict search to a section of the inverted list
|
|
548
|
+
size_t jmin, jmax;
|
|
549
|
+
selr->find_sorted_ids_bounds(
|
|
550
|
+
list_size, ids, &jmin, &jmax);
|
|
551
|
+
list_size = jmax - jmin;
|
|
552
|
+
if (list_size == 0) {
|
|
553
|
+
return (size_t)0;
|
|
554
|
+
}
|
|
555
|
+
codes += jmin * code_size;
|
|
556
|
+
ids += jmin;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
nheap += scanner->scan_codes(
|
|
560
|
+
list_size, codes, ids, simi, idxi, k);
|
|
561
|
+
|
|
562
|
+
return list_size;
|
|
563
|
+
}
|
|
547
564
|
} catch (const std::exception& e) {
|
|
548
565
|
std::lock_guard<std::mutex> lock(exception_mutex);
|
|
549
566
|
exception_string =
|
|
@@ -551,8 +568,6 @@ void IndexIVF::search_preassigned(
|
|
|
551
568
|
interrupt = true;
|
|
552
569
|
return size_t(0);
|
|
553
570
|
}
|
|
554
|
-
|
|
555
|
-
return list_size;
|
|
556
571
|
};
|
|
557
572
|
|
|
558
573
|
/****************************************************
|
|
@@ -581,9 +596,9 @@ void IndexIVF::search_preassigned(
|
|
|
581
596
|
keys[i * nprobe + ik],
|
|
582
597
|
coarse_dis[i * nprobe + ik],
|
|
583
598
|
simi,
|
|
584
|
-
idxi
|
|
585
|
-
|
|
586
|
-
if (
|
|
599
|
+
idxi,
|
|
600
|
+
max_codes - nscan);
|
|
601
|
+
if (nscan >= max_codes) {
|
|
587
602
|
break;
|
|
588
603
|
}
|
|
589
604
|
}
|
|
@@ -610,7 +625,8 @@ void IndexIVF::search_preassigned(
|
|
|
610
625
|
keys[i * nprobe + ik],
|
|
611
626
|
coarse_dis[i * nprobe + ik],
|
|
612
627
|
local_dis.data(),
|
|
613
|
-
local_idx.data()
|
|
628
|
+
local_idx.data(),
|
|
629
|
+
unlimited_list_size);
|
|
614
630
|
|
|
615
631
|
// can't do the test on max_codes
|
|
616
632
|
}
|
|
@@ -651,7 +667,8 @@ void IndexIVF::search_preassigned(
|
|
|
651
667
|
keys[ij],
|
|
652
668
|
coarse_dis[ij],
|
|
653
669
|
local_dis.data(),
|
|
654
|
-
local_idx.data()
|
|
670
|
+
local_idx.data(),
|
|
671
|
+
unlimited_list_size);
|
|
655
672
|
#pragma omp critical
|
|
656
673
|
{
|
|
657
674
|
add_local_results(
|
|
@@ -744,6 +761,10 @@ void IndexIVF::range_search_preassigned(
|
|
|
744
761
|
idx_t max_codes = params ? params->max_codes : this->max_codes;
|
|
745
762
|
IDSelector* sel = params ? params->sel : nullptr;
|
|
746
763
|
|
|
764
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
765
|
+
!invlists->use_iterator || (max_codes == 0 && store_pairs == false),
|
|
766
|
+
"iterable inverted lists don't support max_codes and store_pairs");
|
|
767
|
+
|
|
747
768
|
size_t nlistv = 0, ndis = 0;
|
|
748
769
|
|
|
749
770
|
bool interrupt = false;
|
|
@@ -780,21 +801,30 @@ void IndexIVF::range_search_preassigned(
|
|
|
780
801
|
key,
|
|
781
802
|
ik,
|
|
782
803
|
nlist);
|
|
783
|
-
const size_t list_size = invlists->list_size(key);
|
|
784
804
|
|
|
785
|
-
if (
|
|
805
|
+
if (invlists->is_empty(key)) {
|
|
786
806
|
return;
|
|
807
|
+
}
|
|
787
808
|
|
|
788
809
|
try {
|
|
789
|
-
|
|
790
|
-
InvertedLists::ScopedIds ids(invlists, key);
|
|
791
|
-
|
|
810
|
+
size_t list_size = 0;
|
|
792
811
|
scanner->set_list(key, coarse_dis[i * nprobe + ik]);
|
|
812
|
+
if (invlists->use_iterator) {
|
|
813
|
+
std::unique_ptr<InvertedListsIterator> it(
|
|
814
|
+
invlists->get_iterator(key));
|
|
815
|
+
|
|
816
|
+
scanner->iterate_codes_range(
|
|
817
|
+
it.get(), radius, qres, list_size);
|
|
818
|
+
} else {
|
|
819
|
+
InvertedLists::ScopedCodes scodes(invlists, key);
|
|
820
|
+
InvertedLists::ScopedIds ids(invlists, key);
|
|
821
|
+
list_size = invlists->list_size(key);
|
|
822
|
+
|
|
823
|
+
scanner->scan_codes_range(
|
|
824
|
+
list_size, scodes.get(), ids.get(), radius, qres);
|
|
825
|
+
}
|
|
793
826
|
nlistv++;
|
|
794
827
|
ndis += list_size;
|
|
795
|
-
scanner->scan_codes_range(
|
|
796
|
-
list_size, scodes.get(), ids.get(), radius, qres);
|
|
797
|
-
|
|
798
828
|
} catch (const std::exception& e) {
|
|
799
829
|
std::lock_guard<std::mutex> lock(exception_mutex);
|
|
800
830
|
exception_string =
|
|
@@ -1086,6 +1116,10 @@ void IndexIVF::merge_from(Index& otherIndex, idx_t add_id) {
|
|
|
1086
1116
|
other->ntotal = 0;
|
|
1087
1117
|
}
|
|
1088
1118
|
|
|
1119
|
+
CodePacker* IndexIVF::get_CodePacker() const {
|
|
1120
|
+
return new CodePackerFlat(code_size);
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1089
1123
|
void IndexIVF::replace_invlists(InvertedLists* il, bool own) {
|
|
1090
1124
|
if (own_invlists) {
|
|
1091
1125
|
delete invlists;
|
|
@@ -1104,71 +1138,11 @@ void IndexIVF::replace_invlists(InvertedLists* il, bool own) {
|
|
|
1104
1138
|
|
|
1105
1139
|
void IndexIVF::copy_subset_to(
|
|
1106
1140
|
IndexIVF& other,
|
|
1107
|
-
|
|
1141
|
+
InvertedLists::subset_type_t subset_type,
|
|
1108
1142
|
idx_t a1,
|
|
1109
1143
|
idx_t a2) const {
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
FAISS_THROW_IF_NOT(other.direct_map.no());
|
|
1113
|
-
FAISS_THROW_IF_NOT_FMT(
|
|
1114
|
-
subset_type == 0 || subset_type == 1 || subset_type == 2,
|
|
1115
|
-
"subset type %d not implemented",
|
|
1116
|
-
subset_type);
|
|
1117
|
-
|
|
1118
|
-
size_t accu_n = 0;
|
|
1119
|
-
size_t accu_a1 = 0;
|
|
1120
|
-
size_t accu_a2 = 0;
|
|
1121
|
-
|
|
1122
|
-
InvertedLists* oivf = other.invlists;
|
|
1123
|
-
|
|
1124
|
-
for (idx_t list_no = 0; list_no < nlist; list_no++) {
|
|
1125
|
-
size_t n = invlists->list_size(list_no);
|
|
1126
|
-
ScopedIds ids_in(invlists, list_no);
|
|
1127
|
-
|
|
1128
|
-
if (subset_type == 0) {
|
|
1129
|
-
for (idx_t i = 0; i < n; i++) {
|
|
1130
|
-
idx_t id = ids_in[i];
|
|
1131
|
-
if (a1 <= id && id < a2) {
|
|
1132
|
-
oivf->add_entry(
|
|
1133
|
-
list_no,
|
|
1134
|
-
invlists->get_single_id(list_no, i),
|
|
1135
|
-
ScopedCodes(invlists, list_no, i).get());
|
|
1136
|
-
other.ntotal++;
|
|
1137
|
-
}
|
|
1138
|
-
}
|
|
1139
|
-
} else if (subset_type == 1) {
|
|
1140
|
-
for (idx_t i = 0; i < n; i++) {
|
|
1141
|
-
idx_t id = ids_in[i];
|
|
1142
|
-
if (id % a1 == a2) {
|
|
1143
|
-
oivf->add_entry(
|
|
1144
|
-
list_no,
|
|
1145
|
-
invlists->get_single_id(list_no, i),
|
|
1146
|
-
ScopedCodes(invlists, list_no, i).get());
|
|
1147
|
-
other.ntotal++;
|
|
1148
|
-
}
|
|
1149
|
-
}
|
|
1150
|
-
} else if (subset_type == 2) {
|
|
1151
|
-
// see what is allocated to a1 and to a2
|
|
1152
|
-
size_t next_accu_n = accu_n + n;
|
|
1153
|
-
size_t next_accu_a1 = next_accu_n * a1 / ntotal;
|
|
1154
|
-
size_t i1 = next_accu_a1 - accu_a1;
|
|
1155
|
-
size_t next_accu_a2 = next_accu_n * a2 / ntotal;
|
|
1156
|
-
size_t i2 = next_accu_a2 - accu_a2;
|
|
1157
|
-
|
|
1158
|
-
for (idx_t i = i1; i < i2; i++) {
|
|
1159
|
-
oivf->add_entry(
|
|
1160
|
-
list_no,
|
|
1161
|
-
invlists->get_single_id(list_no, i),
|
|
1162
|
-
ScopedCodes(invlists, list_no, i).get());
|
|
1163
|
-
}
|
|
1164
|
-
|
|
1165
|
-
other.ntotal += i2 - i1;
|
|
1166
|
-
accu_a1 = next_accu_a1;
|
|
1167
|
-
accu_a2 = next_accu_a2;
|
|
1168
|
-
}
|
|
1169
|
-
accu_n += n;
|
|
1170
|
-
}
|
|
1171
|
-
FAISS_ASSERT(accu_n == ntotal);
|
|
1144
|
+
other.ntotal +=
|
|
1145
|
+
invlists->copy_subset_to(*other.invlists, subset_type, a1, a2);
|
|
1172
1146
|
}
|
|
1173
1147
|
|
|
1174
1148
|
IndexIVF::~IndexIVF() {
|
|
@@ -1233,6 +1207,39 @@ size_t InvertedListScanner::scan_codes(
|
|
|
1233
1207
|
return nup;
|
|
1234
1208
|
}
|
|
1235
1209
|
|
|
1210
|
+
size_t InvertedListScanner::iterate_codes(
|
|
1211
|
+
InvertedListsIterator* it,
|
|
1212
|
+
float* simi,
|
|
1213
|
+
idx_t* idxi,
|
|
1214
|
+
size_t k,
|
|
1215
|
+
size_t& list_size) const {
|
|
1216
|
+
size_t nup = 0;
|
|
1217
|
+
list_size = 0;
|
|
1218
|
+
|
|
1219
|
+
if (!keep_max) {
|
|
1220
|
+
for (; it->is_available(); it->next()) {
|
|
1221
|
+
auto id_and_codes = it->get_id_and_codes();
|
|
1222
|
+
float dis = distance_to_code(id_and_codes.second);
|
|
1223
|
+
if (dis < simi[0]) {
|
|
1224
|
+
maxheap_replace_top(k, simi, idxi, dis, id_and_codes.first);
|
|
1225
|
+
nup++;
|
|
1226
|
+
}
|
|
1227
|
+
list_size++;
|
|
1228
|
+
}
|
|
1229
|
+
} else {
|
|
1230
|
+
for (; it->is_available(); it->next()) {
|
|
1231
|
+
auto id_and_codes = it->get_id_and_codes();
|
|
1232
|
+
float dis = distance_to_code(id_and_codes.second);
|
|
1233
|
+
if (dis > simi[0]) {
|
|
1234
|
+
minheap_replace_top(k, simi, idxi, dis, id_and_codes.first);
|
|
1235
|
+
nup++;
|
|
1236
|
+
}
|
|
1237
|
+
list_size++;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
return nup;
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1236
1243
|
void InvertedListScanner::scan_codes_range(
|
|
1237
1244
|
size_t list_size,
|
|
1238
1245
|
const uint8_t* codes,
|
|
@@ -1252,4 +1259,23 @@ void InvertedListScanner::scan_codes_range(
|
|
|
1252
1259
|
}
|
|
1253
1260
|
}
|
|
1254
1261
|
|
|
1262
|
+
void InvertedListScanner::iterate_codes_range(
|
|
1263
|
+
InvertedListsIterator* it,
|
|
1264
|
+
float radius,
|
|
1265
|
+
RangeQueryResult& res,
|
|
1266
|
+
size_t& list_size) const {
|
|
1267
|
+
list_size = 0;
|
|
1268
|
+
for (; it->is_available(); it->next()) {
|
|
1269
|
+
auto id_and_codes = it->get_id_and_codes();
|
|
1270
|
+
float dis = distance_to_code(id_and_codes.second);
|
|
1271
|
+
bool keep = !keep_max
|
|
1272
|
+
? dis < radius
|
|
1273
|
+
: dis > radius; // TODO templatize to remove this test
|
|
1274
|
+
if (keep) {
|
|
1275
|
+
res.add(dis, id_and_codes.first);
|
|
1276
|
+
}
|
|
1277
|
+
list_size++;
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1255
1281
|
} // namespace faiss
|