faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
/*
|
|
10
9
|
* C++ support for heaps. The set of functions is tailored for efficient
|
|
11
10
|
* similarity search.
|
|
@@ -19,17 +18,16 @@
|
|
|
19
18
|
* Cmin). The C types are defined in ordered_key_value.h
|
|
20
19
|
*/
|
|
21
20
|
|
|
22
|
-
|
|
23
21
|
#ifndef FAISS_Heap_h
|
|
24
22
|
#define FAISS_Heap_h
|
|
25
23
|
|
|
26
24
|
#include <climits>
|
|
27
|
-
#include <cstring>
|
|
28
25
|
#include <cmath>
|
|
26
|
+
#include <cstring>
|
|
29
27
|
|
|
28
|
+
#include <stdint.h>
|
|
30
29
|
#include <cassert>
|
|
31
30
|
#include <cstdio>
|
|
32
|
-
#include <stdint.h>
|
|
33
31
|
|
|
34
32
|
#include <limits>
|
|
35
33
|
|
|
@@ -37,9 +35,6 @@
|
|
|
37
35
|
|
|
38
36
|
namespace faiss {
|
|
39
37
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
38
|
/*******************************************************************
|
|
44
39
|
* Basic heap ops: push and pop
|
|
45
40
|
*******************************************************************/
|
|
@@ -47,9 +42,8 @@ namespace faiss {
|
|
|
47
42
|
/** Pops the top element from the heap defined by bh_val[0..k-1] and
|
|
48
43
|
* bh_ids[0..k-1]. on output the element at k-1 is undefined.
|
|
49
44
|
*/
|
|
50
|
-
template <class C>
|
|
51
|
-
void heap_pop
|
|
52
|
-
{
|
|
45
|
+
template <class C>
|
|
46
|
+
inline void heap_pop(size_t k, typename C::T* bh_val, typename C::TI* bh_ids) {
|
|
53
47
|
bh_val--; /* Use 1-based indexing for easier node->child translation */
|
|
54
48
|
bh_ids--;
|
|
55
49
|
typename C::T val = bh_val[k];
|
|
@@ -65,8 +59,7 @@ void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
|
|
|
65
59
|
bh_val[i] = bh_val[i1];
|
|
66
60
|
bh_ids[i] = bh_ids[i1];
|
|
67
61
|
i = i1;
|
|
68
|
-
}
|
|
69
|
-
else {
|
|
62
|
+
} else {
|
|
70
63
|
if (C::cmp(val, bh_val[i2]))
|
|
71
64
|
break;
|
|
72
65
|
bh_val[i] = bh_val[i2];
|
|
@@ -78,22 +71,22 @@ void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
|
|
|
78
71
|
bh_ids[i] = bh_ids[k];
|
|
79
72
|
}
|
|
80
73
|
|
|
81
|
-
|
|
82
|
-
|
|
83
74
|
/** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
|
|
84
75
|
* bh_ids[0..k-2]. on output the element at k-1 is defined.
|
|
85
76
|
*/
|
|
86
|
-
template <class C>
|
|
87
|
-
void heap_push
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
77
|
+
template <class C>
|
|
78
|
+
inline void heap_push(
|
|
79
|
+
size_t k,
|
|
80
|
+
typename C::T* bh_val,
|
|
81
|
+
typename C::TI* bh_ids,
|
|
82
|
+
typename C::T val,
|
|
83
|
+
typename C::TI ids) {
|
|
91
84
|
bh_val--; /* Use 1-based indexing for easier node->child translation */
|
|
92
85
|
bh_ids--;
|
|
93
86
|
size_t i = k, i_father;
|
|
94
87
|
while (i > 1) {
|
|
95
88
|
i_father = i >> 1;
|
|
96
|
-
if (!C::cmp
|
|
89
|
+
if (!C::cmp(val, bh_val[i_father])) /* the heap structure is ok */
|
|
97
90
|
break;
|
|
98
91
|
bh_val[i] = bh_val[i_father];
|
|
99
92
|
bh_ids[i] = bh_ids[i_father];
|
|
@@ -103,16 +96,16 @@ void heap_push (size_t k,
|
|
|
103
96
|
bh_ids[i] = ids;
|
|
104
97
|
}
|
|
105
98
|
|
|
106
|
-
|
|
107
|
-
|
|
108
99
|
/** Replace the top element from the heap defined by bh_val[0..k-1] and
|
|
109
100
|
* bh_ids[0..k-1].
|
|
110
101
|
*/
|
|
111
|
-
template <class C>
|
|
112
|
-
void heap_replace_top
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
102
|
+
template <class C>
|
|
103
|
+
inline void heap_replace_top(
|
|
104
|
+
size_t k,
|
|
105
|
+
typename C::T* bh_val,
|
|
106
|
+
typename C::TI* bh_ids,
|
|
107
|
+
typename C::T val,
|
|
108
|
+
typename C::TI ids) {
|
|
116
109
|
bh_val--; /* Use 1-based indexing for easier node->child translation */
|
|
117
110
|
bh_ids--;
|
|
118
111
|
size_t i = 1, i1, i2;
|
|
@@ -127,8 +120,7 @@ void heap_replace_top (size_t k,
|
|
|
127
120
|
bh_val[i] = bh_val[i1];
|
|
128
121
|
bh_ids[i] = bh_ids[i1];
|
|
129
122
|
i = i1;
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
123
|
+
} else {
|
|
132
124
|
if (C::cmp(val, bh_val[i2]))
|
|
133
125
|
break;
|
|
134
126
|
bh_val[i] = bh_val[i2];
|
|
@@ -140,52 +132,58 @@ void heap_replace_top (size_t k,
|
|
|
140
132
|
bh_ids[i] = ids;
|
|
141
133
|
}
|
|
142
134
|
|
|
143
|
-
|
|
144
|
-
|
|
145
135
|
/* Partial instanciation for heaps with TI = int64_t */
|
|
146
136
|
|
|
147
|
-
template <typename T>
|
|
148
|
-
void minheap_pop
|
|
149
|
-
|
|
150
|
-
heap_pop<CMin<T, int64_t> > (k, bh_val, bh_ids);
|
|
137
|
+
template <typename T>
|
|
138
|
+
inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
|
|
139
|
+
heap_pop<CMin<T, int64_t>>(k, bh_val, bh_ids);
|
|
151
140
|
}
|
|
152
141
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
142
|
+
template <typename T>
|
|
143
|
+
inline void minheap_push(
|
|
144
|
+
size_t k,
|
|
145
|
+
T* bh_val,
|
|
146
|
+
int64_t* bh_ids,
|
|
147
|
+
T val,
|
|
148
|
+
int64_t ids) {
|
|
149
|
+
heap_push<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
|
|
158
150
|
}
|
|
159
151
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
152
|
+
template <typename T>
|
|
153
|
+
inline void minheap_replace_top(
|
|
154
|
+
size_t k,
|
|
155
|
+
T* bh_val,
|
|
156
|
+
int64_t* bh_ids,
|
|
157
|
+
T val,
|
|
158
|
+
int64_t ids) {
|
|
159
|
+
heap_replace_top<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
|
|
165
160
|
}
|
|
166
161
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
{
|
|
171
|
-
heap_pop<CMax<T, int64_t> > (k, bh_val, bh_ids);
|
|
162
|
+
template <typename T>
|
|
163
|
+
inline void maxheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
|
|
164
|
+
heap_pop<CMax<T, int64_t>>(k, bh_val, bh_ids);
|
|
172
165
|
}
|
|
173
166
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
167
|
+
template <typename T>
|
|
168
|
+
inline void maxheap_push(
|
|
169
|
+
size_t k,
|
|
170
|
+
T* bh_val,
|
|
171
|
+
int64_t* bh_ids,
|
|
172
|
+
T val,
|
|
173
|
+
int64_t ids) {
|
|
174
|
+
heap_push<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
|
|
179
175
|
}
|
|
180
176
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
177
|
+
template <typename T>
|
|
178
|
+
inline void maxheap_replace_top(
|
|
179
|
+
size_t k,
|
|
180
|
+
T* bh_val,
|
|
181
|
+
int64_t* bh_ids,
|
|
182
|
+
T val,
|
|
183
|
+
int64_t ids) {
|
|
184
|
+
heap_replace_top<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
|
|
186
185
|
}
|
|
187
186
|
|
|
188
|
-
|
|
189
187
|
/*******************************************************************
|
|
190
188
|
* Heap initialization
|
|
191
189
|
*******************************************************************/
|
|
@@ -193,118 +191,116 @@ void maxheap_replace_top (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t
|
|
|
193
191
|
/* Initialization phase for the heap (with unconditionnal pushes).
|
|
194
192
|
* Store k0 elements in a heap containing up to k values. Note that
|
|
195
193
|
* (bh_val, bh_ids) can be the same as (x, ids) */
|
|
196
|
-
template <class C>
|
|
197
|
-
void heap_heapify
|
|
194
|
+
template <class C>
|
|
195
|
+
inline void heap_heapify(
|
|
198
196
|
size_t k,
|
|
199
|
-
typename C::T
|
|
200
|
-
typename C::TI
|
|
201
|
-
const typename C::T
|
|
202
|
-
const typename C::TI
|
|
203
|
-
size_t k0 = 0)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
for (size_t i = k0; i < k; i++) {
|
|
216
|
-
bh_val[i] = C::neutral();
|
|
217
|
-
bh_ids[i] = -1;
|
|
218
|
-
}
|
|
197
|
+
typename C::T* bh_val,
|
|
198
|
+
typename C::TI* bh_ids,
|
|
199
|
+
const typename C::T* x = nullptr,
|
|
200
|
+
const typename C::TI* ids = nullptr,
|
|
201
|
+
size_t k0 = 0) {
|
|
202
|
+
if (k0 > 0)
|
|
203
|
+
assert(x);
|
|
204
|
+
|
|
205
|
+
if (ids) {
|
|
206
|
+
for (size_t i = 0; i < k0; i++)
|
|
207
|
+
heap_push<C>(i + 1, bh_val, bh_ids, x[i], ids[i]);
|
|
208
|
+
} else {
|
|
209
|
+
for (size_t i = 0; i < k0; i++)
|
|
210
|
+
heap_push<C>(i + 1, bh_val, bh_ids, x[i], i);
|
|
211
|
+
}
|
|
219
212
|
|
|
213
|
+
for (size_t i = k0; i < k; i++) {
|
|
214
|
+
bh_val[i] = C::neutral();
|
|
215
|
+
bh_ids[i] = -1;
|
|
216
|
+
}
|
|
220
217
|
}
|
|
221
218
|
|
|
222
|
-
template <typename T>
|
|
223
|
-
void minheap_heapify
|
|
224
|
-
size_t k,
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
const
|
|
228
|
-
|
|
229
|
-
{
|
|
230
|
-
heap_heapify<
|
|
219
|
+
template <typename T>
|
|
220
|
+
inline void minheap_heapify(
|
|
221
|
+
size_t k,
|
|
222
|
+
T* bh_val,
|
|
223
|
+
int64_t* bh_ids,
|
|
224
|
+
const T* x = nullptr,
|
|
225
|
+
const int64_t* ids = nullptr,
|
|
226
|
+
size_t k0 = 0) {
|
|
227
|
+
heap_heapify<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
|
|
231
228
|
}
|
|
232
229
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
void maxheap_heapify (
|
|
230
|
+
template <typename T>
|
|
231
|
+
inline void maxheap_heapify(
|
|
236
232
|
size_t k,
|
|
237
|
-
T
|
|
238
|
-
int64_t
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
heap_heapify< CMax<T, int64_t> > (k, bh_val, bh_ids, x, ids, k0);
|
|
233
|
+
T* bh_val,
|
|
234
|
+
int64_t* bh_ids,
|
|
235
|
+
const T* x = nullptr,
|
|
236
|
+
const int64_t* ids = nullptr,
|
|
237
|
+
size_t k0 = 0) {
|
|
238
|
+
heap_heapify<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
|
|
244
239
|
}
|
|
245
240
|
|
|
246
|
-
|
|
247
|
-
|
|
248
241
|
/*******************************************************************
|
|
249
242
|
* Add n elements to the heap
|
|
250
243
|
*******************************************************************/
|
|
251
244
|
|
|
252
|
-
|
|
253
245
|
/* Add some elements to the heap */
|
|
254
|
-
template <class C>
|
|
255
|
-
void heap_addn
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
246
|
+
template <class C>
|
|
247
|
+
inline void heap_addn(
|
|
248
|
+
size_t k,
|
|
249
|
+
typename C::T* bh_val,
|
|
250
|
+
typename C::TI* bh_ids,
|
|
251
|
+
const typename C::T* x,
|
|
252
|
+
const typename C::TI* ids,
|
|
253
|
+
size_t n) {
|
|
261
254
|
size_t i;
|
|
262
255
|
if (ids)
|
|
263
256
|
for (i = 0; i < n; i++) {
|
|
264
|
-
if (C::cmp
|
|
265
|
-
heap_replace_top<C>
|
|
257
|
+
if (C::cmp(bh_val[0], x[i])) {
|
|
258
|
+
heap_replace_top<C>(k, bh_val, bh_ids, x[i], ids[i]);
|
|
266
259
|
}
|
|
267
260
|
}
|
|
268
261
|
else
|
|
269
262
|
for (i = 0; i < n; i++) {
|
|
270
|
-
if (C::cmp
|
|
271
|
-
heap_replace_top<C>
|
|
263
|
+
if (C::cmp(bh_val[0], x[i])) {
|
|
264
|
+
heap_replace_top<C>(k, bh_val, bh_ids, x[i], i);
|
|
272
265
|
}
|
|
273
266
|
}
|
|
274
267
|
}
|
|
275
268
|
|
|
276
|
-
|
|
277
269
|
/* Partial instanciation for heaps with TI = int64_t */
|
|
278
270
|
|
|
279
|
-
template <typename T>
|
|
280
|
-
void minheap_addn
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
271
|
+
template <typename T>
|
|
272
|
+
inline void minheap_addn(
|
|
273
|
+
size_t k,
|
|
274
|
+
T* bh_val,
|
|
275
|
+
int64_t* bh_ids,
|
|
276
|
+
const T* x,
|
|
277
|
+
const int64_t* ids,
|
|
278
|
+
size_t n) {
|
|
279
|
+
heap_addn<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
|
|
284
280
|
}
|
|
285
281
|
|
|
286
|
-
template <typename T>
|
|
287
|
-
void maxheap_addn
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
282
|
+
template <typename T>
|
|
283
|
+
inline void maxheap_addn(
|
|
284
|
+
size_t k,
|
|
285
|
+
T* bh_val,
|
|
286
|
+
int64_t* bh_ids,
|
|
287
|
+
const T* x,
|
|
288
|
+
const int64_t* ids,
|
|
289
|
+
size_t n) {
|
|
290
|
+
heap_addn<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
|
|
291
291
|
}
|
|
292
292
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
293
|
/*******************************************************************
|
|
299
294
|
* Heap finalization (reorder elements)
|
|
300
295
|
*******************************************************************/
|
|
301
296
|
|
|
302
|
-
|
|
303
297
|
/* This function maps a binary heap into an sorted structure.
|
|
304
298
|
It returns the number */
|
|
305
|
-
template <typename C>
|
|
306
|
-
size_t heap_reorder
|
|
307
|
-
|
|
299
|
+
template <typename C>
|
|
300
|
+
inline size_t heap_reorder(
|
|
301
|
+
size_t k,
|
|
302
|
+
typename C::T* bh_val,
|
|
303
|
+
typename C::TI* bh_ids) {
|
|
308
304
|
size_t i, ii;
|
|
309
305
|
|
|
310
306
|
for (i = 0, ii = 0; i < k; i++) {
|
|
@@ -313,16 +309,17 @@ size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
|
|
|
313
309
|
typename C::TI id = bh_ids[0];
|
|
314
310
|
|
|
315
311
|
/* boundary case: we will over-ride this value if not a true element */
|
|
316
|
-
heap_pop<C>
|
|
317
|
-
bh_val[k-ii-1] = val;
|
|
318
|
-
bh_ids[k-ii-1] = id;
|
|
319
|
-
if (id != -1)
|
|
312
|
+
heap_pop<C>(k - i, bh_val, bh_ids);
|
|
313
|
+
bh_val[k - ii - 1] = val;
|
|
314
|
+
bh_ids[k - ii - 1] = id;
|
|
315
|
+
if (id != -1)
|
|
316
|
+
ii++;
|
|
320
317
|
}
|
|
321
318
|
/* Count the number of elements which are effectively returned */
|
|
322
319
|
size_t nel = ii;
|
|
323
320
|
|
|
324
|
-
memmove
|
|
325
|
-
memmove
|
|
321
|
+
memmove(bh_val, bh_val + k - ii, ii * sizeof(*bh_val));
|
|
322
|
+
memmove(bh_ids, bh_ids + k - ii, ii * sizeof(*bh_ids));
|
|
326
323
|
|
|
327
324
|
for (; ii < k; ii++) {
|
|
328
325
|
bh_val[ii] = C::neutral();
|
|
@@ -331,22 +328,16 @@ size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
|
|
|
331
328
|
return nel;
|
|
332
329
|
}
|
|
333
330
|
|
|
334
|
-
template <typename T>
|
|
335
|
-
size_t minheap_reorder
|
|
336
|
-
|
|
337
|
-
return heap_reorder< CMin<T, int64_t> > (k, bh_val, bh_ids);
|
|
331
|
+
template <typename T>
|
|
332
|
+
inline size_t minheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
|
|
333
|
+
return heap_reorder<CMin<T, int64_t>>(k, bh_val, bh_ids);
|
|
338
334
|
}
|
|
339
335
|
|
|
340
|
-
template <typename T>
|
|
341
|
-
size_t maxheap_reorder
|
|
342
|
-
|
|
343
|
-
return heap_reorder< CMax<T, int64_t> > (k, bh_val, bh_ids);
|
|
336
|
+
template <typename T>
|
|
337
|
+
inline size_t maxheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
|
|
338
|
+
return heap_reorder<CMax<T, int64_t>>(k, bh_val, bh_ids);
|
|
344
339
|
}
|
|
345
340
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
341
|
/*******************************************************************
|
|
351
342
|
* Operations on heap arrays
|
|
352
343
|
*******************************************************************/
|
|
@@ -360,19 +351,23 @@ struct HeapArray {
|
|
|
360
351
|
typedef typename C::TI TI;
|
|
361
352
|
typedef typename C::T T;
|
|
362
353
|
|
|
363
|
-
size_t nh;
|
|
364
|
-
size_t k;
|
|
365
|
-
TI
|
|
366
|
-
T
|
|
354
|
+
size_t nh; ///< number of heaps
|
|
355
|
+
size_t k; ///< allocated size per heap
|
|
356
|
+
TI* ids; ///< identifiers (size nh * k)
|
|
357
|
+
T* val; ///< values (distances or similarities), size nh * k
|
|
367
358
|
|
|
368
359
|
/// Return the list of values for a heap
|
|
369
|
-
T
|
|
360
|
+
T* get_val(size_t key) {
|
|
361
|
+
return val + key * k;
|
|
362
|
+
}
|
|
370
363
|
|
|
371
364
|
/// Correspponding identifiers
|
|
372
|
-
TI
|
|
365
|
+
TI* get_ids(size_t key) {
|
|
366
|
+
return ids + key * k;
|
|
367
|
+
}
|
|
373
368
|
|
|
374
369
|
/// prepare all the heaps before adding
|
|
375
|
-
void heapify
|
|
370
|
+
void heapify();
|
|
376
371
|
|
|
377
372
|
/** add nj elements to heaps i0:i0+ni, with sequential ids
|
|
378
373
|
*
|
|
@@ -382,58 +377,46 @@ struct HeapArray {
|
|
|
382
377
|
* @param i0 first heap to update
|
|
383
378
|
* @param ni nb of elements to update (-1 = use nh)
|
|
384
379
|
*/
|
|
385
|
-
void addn
|
|
386
|
-
|
|
380
|
+
void addn(
|
|
381
|
+
size_t nj,
|
|
382
|
+
const T* vin,
|
|
383
|
+
TI j0 = 0,
|
|
384
|
+
size_t i0 = 0,
|
|
385
|
+
int64_t ni = -1);
|
|
387
386
|
|
|
388
387
|
/** same as addn
|
|
389
388
|
*
|
|
390
389
|
* @param id_in ids of the elements to add, size ni * nj
|
|
391
390
|
* @param id_stride stride for id_in
|
|
392
391
|
*/
|
|
393
|
-
void addn_with_ids
|
|
394
|
-
|
|
395
|
-
|
|
392
|
+
void addn_with_ids(
|
|
393
|
+
size_t nj,
|
|
394
|
+
const T* vin,
|
|
395
|
+
const TI* id_in = nullptr,
|
|
396
|
+
int64_t id_stride = 0,
|
|
397
|
+
size_t i0 = 0,
|
|
398
|
+
int64_t ni = -1);
|
|
396
399
|
|
|
397
400
|
/// reorder all the heaps
|
|
398
|
-
void reorder
|
|
401
|
+
void reorder();
|
|
399
402
|
|
|
400
403
|
/** this is not really a heap function. It just finds the per-line
|
|
401
404
|
* extrema of each line of array D
|
|
402
405
|
* @param vals_out extreme value of each line (size nh, or NULL)
|
|
403
406
|
* @param idx_out index of extreme value (size nh or NULL)
|
|
404
407
|
*/
|
|
405
|
-
void per_line_extrema
|
|
406
|
-
|
|
408
|
+
void per_line_extrema(T* vals_out, TI* idx_out) const;
|
|
407
409
|
};
|
|
408
410
|
|
|
409
|
-
|
|
410
411
|
/* Define useful heaps */
|
|
411
|
-
typedef HeapArray<CMin<float, int64_t
|
|
412
|
-
typedef HeapArray<CMin<int, int64_t
|
|
412
|
+
typedef HeapArray<CMin<float, int64_t>> float_minheap_array_t;
|
|
413
|
+
typedef HeapArray<CMin<int, int64_t>> int_minheap_array_t;
|
|
413
414
|
|
|
414
|
-
typedef HeapArray<CMax<float, int64_t
|
|
415
|
-
typedef HeapArray<CMax<int, int64_t
|
|
415
|
+
typedef HeapArray<CMax<float, int64_t>> float_maxheap_array_t;
|
|
416
|
+
typedef HeapArray<CMax<int, int64_t>> int_maxheap_array_t;
|
|
416
417
|
|
|
417
418
|
// The heap templates are instanciated explicitly in Heap.cpp
|
|
418
419
|
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
420
|
/*********************************************************************
|
|
438
421
|
* Indirect heaps: instead of having
|
|
439
422
|
*
|
|
@@ -445,14 +428,11 @@ typedef HeapArray<CMax<int, int64_t> > int_maxheap_array_t;
|
|
|
445
428
|
*
|
|
446
429
|
*********************************************************************/
|
|
447
430
|
|
|
448
|
-
|
|
449
431
|
template <class C>
|
|
450
|
-
inline
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
typename C::TI * bh_ids)
|
|
455
|
-
{
|
|
432
|
+
inline void indirect_heap_pop(
|
|
433
|
+
size_t k,
|
|
434
|
+
const typename C::T* bh_val,
|
|
435
|
+
typename C::TI* bh_ids) {
|
|
456
436
|
bh_ids--; /* Use 1-based indexing for easier node->child translation */
|
|
457
437
|
typename C::T val = bh_val[bh_ids[k]];
|
|
458
438
|
size_t i = 1;
|
|
@@ -477,20 +457,18 @@ void indirect_heap_pop (
|
|
|
477
457
|
bh_ids[i] = bh_ids[k];
|
|
478
458
|
}
|
|
479
459
|
|
|
480
|
-
|
|
481
|
-
|
|
482
460
|
template <class C>
|
|
483
|
-
inline
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
{
|
|
461
|
+
inline void indirect_heap_push(
|
|
462
|
+
size_t k,
|
|
463
|
+
const typename C::T* bh_val,
|
|
464
|
+
typename C::TI* bh_ids,
|
|
465
|
+
typename C::TI id) {
|
|
488
466
|
bh_ids--; /* Use 1-based indexing for easier node->child translation */
|
|
489
467
|
typename C::T val = bh_val[id];
|
|
490
468
|
size_t i = k;
|
|
491
469
|
while (i > 1) {
|
|
492
470
|
size_t i_father = i >> 1;
|
|
493
|
-
if (!C::cmp
|
|
471
|
+
if (!C::cmp(val, bh_val[bh_ids[i_father]]))
|
|
494
472
|
break;
|
|
495
473
|
bh_ids[i] = bh_ids[i_father];
|
|
496
474
|
i = i_father;
|
|
@@ -498,7 +476,6 @@ void indirect_heap_push (size_t k,
|
|
|
498
476
|
bh_ids[i] = id;
|
|
499
477
|
}
|
|
500
478
|
|
|
501
|
-
|
|
502
479
|
} // namespace faiss
|
|
503
480
|
|
|
504
|
-
#endif
|
|
481
|
+
#endif /* FAISS_Heap_h */
|