faiss 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -35,6 +35,7 @@
|
|
35
35
|
#include <faiss/impl/AuxIndexStructures.h>
|
36
36
|
#include <faiss/impl/FaissAssert.h>
|
37
37
|
#include <faiss/utils/Heap.h>
|
38
|
+
#include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
|
38
39
|
#include <faiss/utils/utils.h>
|
39
40
|
|
40
41
|
static const size_t BLOCKSIZE_QUERY = 8192;
|
@@ -43,26 +44,13 @@ namespace faiss {
|
|
43
44
|
|
44
45
|
size_t hamming_batch_size = 65536;
|
45
46
|
|
46
|
-
const uint8_t hamdis_tab_ham_bytes[256] = {
|
47
|
-
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
|
48
|
-
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
49
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
|
50
|
-
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
51
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
|
52
|
-
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
53
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
|
54
|
-
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
55
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
|
56
|
-
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
57
|
-
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
58
|
-
|
59
47
|
template <size_t nbits>
|
60
48
|
void hammings(
|
61
|
-
const uint64_t* bs1,
|
62
|
-
const uint64_t* bs2,
|
49
|
+
const uint64_t* __restrict bs1,
|
50
|
+
const uint64_t* __restrict bs2,
|
63
51
|
size_t n1,
|
64
52
|
size_t n2,
|
65
|
-
hamdis_t* dis)
|
53
|
+
hamdis_t* __restrict dis)
|
66
54
|
|
67
55
|
{
|
68
56
|
size_t i, j;
|
@@ -76,8 +64,8 @@ void hammings(
|
|
76
64
|
}
|
77
65
|
|
78
66
|
void hammings(
|
79
|
-
const uint64_t* bs1,
|
80
|
-
const uint64_t* bs2,
|
67
|
+
const uint64_t* __restrict bs1,
|
68
|
+
const uint64_t* __restrict bs2,
|
81
69
|
size_t n1,
|
82
70
|
size_t n2,
|
83
71
|
size_t nwords,
|
@@ -95,12 +83,12 @@ void hammings(
|
|
95
83
|
/* Count number of matches given a max threshold */
|
96
84
|
template <size_t nbits>
|
97
85
|
void hamming_count_thres(
|
98
|
-
const uint64_t* bs1,
|
99
|
-
const uint64_t* bs2,
|
86
|
+
const uint64_t* __restrict bs1,
|
87
|
+
const uint64_t* __restrict bs2,
|
100
88
|
size_t n1,
|
101
89
|
size_t n2,
|
102
90
|
hamdis_t ht,
|
103
|
-
size_t* nptr) {
|
91
|
+
size_t* __restrict nptr) {
|
104
92
|
const size_t nwords = nbits / 64;
|
105
93
|
size_t i, j, posm = 0;
|
106
94
|
const uint64_t* bs2_ = bs2;
|
@@ -120,10 +108,10 @@ void hamming_count_thres(
|
|
120
108
|
|
121
109
|
template <size_t nbits>
|
122
110
|
void crosshamming_count_thres(
|
123
|
-
const uint64_t* dbs,
|
111
|
+
const uint64_t* __restrict dbs,
|
124
112
|
size_t n,
|
125
113
|
int ht,
|
126
|
-
size_t* nptr) {
|
114
|
+
size_t* __restrict nptr) {
|
127
115
|
const size_t nwords = nbits / 64;
|
128
116
|
size_t i, j, posm = 0;
|
129
117
|
const uint64_t* bs1 = dbs;
|
@@ -142,13 +130,13 @@ void crosshamming_count_thres(
|
|
142
130
|
|
143
131
|
template <size_t nbits>
|
144
132
|
size_t match_hamming_thres(
|
145
|
-
const uint64_t* bs1,
|
146
|
-
const uint64_t* bs2,
|
133
|
+
const uint64_t* __restrict bs1,
|
134
|
+
const uint64_t* __restrict bs2,
|
147
135
|
size_t n1,
|
148
136
|
size_t n2,
|
149
137
|
int ht,
|
150
|
-
int64_t* idx,
|
151
|
-
hamdis_t* hams) {
|
138
|
+
int64_t* __restrict idx,
|
139
|
+
hamdis_t* __restrict hams) {
|
152
140
|
const size_t nwords = nbits / 64;
|
153
141
|
size_t i, j, posm = 0;
|
154
142
|
hamdis_t h;
|
@@ -181,12 +169,13 @@ size_t match_hamming_thres(
|
|
181
169
|
template <class HammingComputer>
|
182
170
|
static void hammings_knn_hc(
|
183
171
|
int bytes_per_code,
|
184
|
-
int_maxheap_array_t* ha,
|
185
|
-
const uint8_t* bs1,
|
186
|
-
const uint8_t* bs2,
|
172
|
+
int_maxheap_array_t* __restrict ha,
|
173
|
+
const uint8_t* __restrict bs1,
|
174
|
+
const uint8_t* __restrict bs2,
|
187
175
|
size_t n2,
|
188
176
|
bool order = true,
|
189
|
-
bool init_heap = true
|
177
|
+
bool init_heap = true,
|
178
|
+
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK) {
|
190
179
|
size_t k = ha->k;
|
191
180
|
if (init_heap)
|
192
181
|
ha->heapify();
|
@@ -198,17 +187,44 @@ static void hammings_knn_hc(
|
|
198
187
|
for (int64_t i = 0; i < ha->nh; i++) {
|
199
188
|
HammingComputer hc(bs1 + i * bytes_per_code, bytes_per_code);
|
200
189
|
|
201
|
-
const uint8_t* bs2_ = bs2 + j0 * bytes_per_code;
|
190
|
+
const uint8_t* __restrict bs2_ = bs2 + j0 * bytes_per_code;
|
202
191
|
hamdis_t dis;
|
203
192
|
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
204
193
|
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
194
|
+
|
195
|
+
// if larger number of k is required, then ::bs_addn() needs to be
|
196
|
+
// used instead of ::addn()
|
197
|
+
#define HANDLE_APPROX(NB, BD) \
|
198
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD: \
|
199
|
+
FAISS_THROW_IF_NOT_FMT( \
|
200
|
+
k <= NB * BD, \
|
201
|
+
"The chosen mode (%d) of approximate top-k supports " \
|
202
|
+
"up to %d values, but %zd is requested.", \
|
203
|
+
(int)(ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD), \
|
204
|
+
NB * BD, \
|
205
|
+
k); \
|
206
|
+
HeapWithBucketsForHamming32< \
|
207
|
+
CMax<hamdis_t, int64_t>, \
|
208
|
+
NB, \
|
209
|
+
BD, \
|
210
|
+
HammingComputer>:: \
|
211
|
+
addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_); \
|
212
|
+
break;
|
213
|
+
|
214
|
+
switch (approx_topk_mode) {
|
215
|
+
HANDLE_APPROX(8, 3)
|
216
|
+
HANDLE_APPROX(8, 2)
|
217
|
+
HANDLE_APPROX(16, 2)
|
218
|
+
HANDLE_APPROX(32, 2)
|
219
|
+
default: {
|
220
|
+
for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) {
|
221
|
+
dis = hc.hamming(bs2_);
|
222
|
+
if (dis < bh_val_[0]) {
|
223
|
+
faiss::maxheap_replace_top<hamdis_t>(
|
224
|
+
k, bh_val_, bh_ids_, dis, j);
|
225
|
+
}
|
226
|
+
}
|
227
|
+
} break;
|
212
228
|
}
|
213
229
|
}
|
214
230
|
}
|
@@ -220,13 +236,13 @@ static void hammings_knn_hc(
|
|
220
236
|
template <class HammingComputer>
|
221
237
|
static void hammings_knn_mc(
|
222
238
|
int bytes_per_code,
|
223
|
-
const uint8_t* a,
|
224
|
-
const uint8_t* b,
|
239
|
+
const uint8_t* __restrict a,
|
240
|
+
const uint8_t* __restrict b,
|
225
241
|
size_t na,
|
226
242
|
size_t nb,
|
227
243
|
size_t k,
|
228
|
-
int32_t* distances,
|
229
|
-
int64_t* labels) {
|
244
|
+
int32_t* __restrict distances,
|
245
|
+
int64_t* __restrict labels) {
|
230
246
|
const int nBuckets = bytes_per_code * 8 + 1;
|
231
247
|
std::vector<int> all_counters(na * nBuckets, 0);
|
232
248
|
std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]);
|
@@ -271,44 +287,6 @@ static void hammings_knn_mc(
|
|
271
287
|
}
|
272
288
|
}
|
273
289
|
|
274
|
-
// works faster than the template version
|
275
|
-
static void hammings_knn_hc_1(
|
276
|
-
int_maxheap_array_t* ha,
|
277
|
-
const uint64_t* bs1,
|
278
|
-
const uint64_t* bs2,
|
279
|
-
size_t n2,
|
280
|
-
bool order = true,
|
281
|
-
bool init_heap = true) {
|
282
|
-
const size_t nwords = 1;
|
283
|
-
size_t k = ha->k;
|
284
|
-
|
285
|
-
if (init_heap) {
|
286
|
-
ha->heapify();
|
287
|
-
}
|
288
|
-
|
289
|
-
#pragma omp parallel for
|
290
|
-
for (int64_t i = 0; i < ha->nh; i++) {
|
291
|
-
const uint64_t bs1_ = bs1[i];
|
292
|
-
const uint64_t* bs2_ = bs2;
|
293
|
-
hamdis_t dis;
|
294
|
-
hamdis_t* bh_val_ = ha->val + i * k;
|
295
|
-
hamdis_t bh_val_0 = bh_val_[0];
|
296
|
-
int64_t* bh_ids_ = ha->ids + i * k;
|
297
|
-
size_t j;
|
298
|
-
for (j = 0; j < n2; j++, bs2_ += nwords) {
|
299
|
-
dis = popcount64(bs1_ ^ *bs2_);
|
300
|
-
if (dis < bh_val_0) {
|
301
|
-
faiss::maxheap_replace_top<hamdis_t>(
|
302
|
-
k, bh_val_, bh_ids_, dis, j);
|
303
|
-
bh_val_0 = bh_val_[0];
|
304
|
-
}
|
305
|
-
}
|
306
|
-
}
|
307
|
-
if (order) {
|
308
|
-
ha->reorder();
|
309
|
-
}
|
310
|
-
}
|
311
|
-
|
312
290
|
/* Functions to maps vectors to bits. Assume proper allocation done beforehand,
|
313
291
|
meaning that b should be be able to receive as many bits as x may produce. */
|
314
292
|
|
@@ -316,7 +294,7 @@ static void hammings_knn_hc_1(
|
|
316
294
|
* dimension 0 corresponds to the least significant bit of b[0], or
|
317
295
|
* equivalently to the lsb of the first byte that is stored.
|
318
296
|
*/
|
319
|
-
void fvec2bitvec(const float* x, uint8_t* b, size_t d) {
|
297
|
+
void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) {
|
320
298
|
for (int i = 0; i < d; i += 8) {
|
321
299
|
uint8_t w = 0;
|
322
300
|
uint8_t mask = 1;
|
@@ -333,14 +311,22 @@ void fvec2bitvec(const float* x, uint8_t* b, size_t d) {
|
|
333
311
|
|
334
312
|
/* Same but for n vectors.
|
335
313
|
Ensure that the ouptut b is byte-aligned (pad with 0s). */
|
336
|
-
void fvecs2bitvecs(
|
314
|
+
void fvecs2bitvecs(
|
315
|
+
const float* __restrict x,
|
316
|
+
uint8_t* __restrict b,
|
317
|
+
size_t d,
|
318
|
+
size_t n) {
|
337
319
|
const int64_t ncodes = ((d + 7) / 8);
|
338
320
|
#pragma omp parallel for if (n > 100000)
|
339
321
|
for (int64_t i = 0; i < n; i++)
|
340
322
|
fvec2bitvec(x + i * d, b + i * ncodes, d);
|
341
323
|
}
|
342
324
|
|
343
|
-
void bitvecs2fvecs(
|
325
|
+
void bitvecs2fvecs(
|
326
|
+
const uint8_t* __restrict b,
|
327
|
+
float* __restrict x,
|
328
|
+
size_t d,
|
329
|
+
size_t n) {
|
344
330
|
const int64_t ncodes = ((d + 7) / 8);
|
345
331
|
#pragma omp parallel for if (n > 100000)
|
346
332
|
for (int64_t i = 0; i < n; i++) {
|
@@ -378,9 +364,9 @@ void bitvec_shuffle(
|
|
378
364
|
size_t n,
|
379
365
|
size_t da,
|
380
366
|
size_t db,
|
381
|
-
const int* order,
|
382
|
-
const uint8_t* a,
|
383
|
-
uint8_t* b) {
|
367
|
+
const int* __restrict order,
|
368
|
+
const uint8_t* __restrict a,
|
369
|
+
uint8_t* __restrict b) {
|
384
370
|
for (size_t i = 0; i < db; i++) {
|
385
371
|
FAISS_THROW_IF_NOT(order[i] >= 0 && order[i] < da);
|
386
372
|
}
|
@@ -407,8 +393,8 @@ void bitvec_shuffle(
|
|
407
393
|
|
408
394
|
/* Compute a set of Hamming distances */
|
409
395
|
void hammings(
|
410
|
-
const uint8_t* a,
|
411
|
-
const uint8_t* b,
|
396
|
+
const uint8_t* __restrict a,
|
397
|
+
const uint8_t* __restrict b,
|
412
398
|
size_t na,
|
413
399
|
size_t nb,
|
414
400
|
size_t ncodes,
|
@@ -434,9 +420,9 @@ void hammings(
|
|
434
420
|
}
|
435
421
|
|
436
422
|
void hammings_knn(
|
437
|
-
int_maxheap_array_t* ha,
|
438
|
-
const uint8_t* a,
|
439
|
-
const uint8_t* b,
|
423
|
+
int_maxheap_array_t* __restrict ha,
|
424
|
+
const uint8_t* __restrict a,
|
425
|
+
const uint8_t* __restrict b,
|
440
426
|
size_t nb,
|
441
427
|
size_t ncodes,
|
442
428
|
int order) {
|
@@ -444,54 +430,52 @@ void hammings_knn(
|
|
444
430
|
}
|
445
431
|
|
446
432
|
void hammings_knn_hc(
|
447
|
-
int_maxheap_array_t* ha,
|
448
|
-
const uint8_t* a,
|
449
|
-
const uint8_t* b,
|
433
|
+
int_maxheap_array_t* __restrict ha,
|
434
|
+
const uint8_t* __restrict a,
|
435
|
+
const uint8_t* __restrict b,
|
450
436
|
size_t nb,
|
451
437
|
size_t ncodes,
|
452
|
-
int order
|
438
|
+
int order,
|
439
|
+
ApproxTopK_mode_t approx_topk_mode) {
|
453
440
|
switch (ncodes) {
|
454
441
|
case 4:
|
455
442
|
hammings_knn_hc<faiss::HammingComputer4>(
|
456
|
-
4, ha, a, b, nb, order, true);
|
443
|
+
4, ha, a, b, nb, order, true, approx_topk_mode);
|
457
444
|
break;
|
458
445
|
case 8:
|
459
|
-
|
460
|
-
|
461
|
-
// (8, ha, a, b, nb, order, true);
|
446
|
+
hammings_knn_hc<faiss::HammingComputer8>(
|
447
|
+
8, ha, a, b, nb, order, true, approx_topk_mode);
|
462
448
|
break;
|
463
449
|
case 16:
|
464
450
|
hammings_knn_hc<faiss::HammingComputer16>(
|
465
|
-
16, ha, a, b, nb, order, true);
|
451
|
+
16, ha, a, b, nb, order, true, approx_topk_mode);
|
466
452
|
break;
|
467
453
|
case 32:
|
468
454
|
hammings_knn_hc<faiss::HammingComputer32>(
|
469
|
-
32, ha, a, b, nb, order, true);
|
455
|
+
32, ha, a, b, nb, order, true, approx_topk_mode);
|
470
456
|
break;
|
471
457
|
default:
|
472
458
|
hammings_knn_hc<faiss::HammingComputerDefault>(
|
473
|
-
ncodes, ha, a, b, nb, order, true);
|
459
|
+
ncodes, ha, a, b, nb, order, true, approx_topk_mode);
|
474
460
|
break;
|
475
461
|
}
|
476
462
|
}
|
477
463
|
|
478
464
|
void hammings_knn_mc(
|
479
|
-
const uint8_t* a,
|
480
|
-
const uint8_t* b,
|
465
|
+
const uint8_t* __restrict a,
|
466
|
+
const uint8_t* __restrict b,
|
481
467
|
size_t na,
|
482
468
|
size_t nb,
|
483
469
|
size_t k,
|
484
470
|
size_t ncodes,
|
485
|
-
int32_t* distances,
|
486
|
-
int64_t* labels) {
|
471
|
+
int32_t* __restrict distances,
|
472
|
+
int64_t* __restrict labels) {
|
487
473
|
switch (ncodes) {
|
488
474
|
case 4:
|
489
475
|
hammings_knn_mc<faiss::HammingComputer4>(
|
490
476
|
4, a, b, na, nb, k, distances, labels);
|
491
477
|
break;
|
492
478
|
case 8:
|
493
|
-
// TODO(hoss): Write analog to hammings_knn_hc_1
|
494
|
-
// hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order, true);
|
495
479
|
hammings_knn_mc<faiss::HammingComputer8>(
|
496
480
|
8, a, b, na, nb, k, distances, labels);
|
497
481
|
break;
|
@@ -664,13 +648,13 @@ size_t match_hamming_thres(
|
|
664
648
|
|
665
649
|
template <class HammingComputer>
|
666
650
|
static void hamming_dis_inner_loop(
|
667
|
-
const uint8_t* ca,
|
668
|
-
const uint8_t* cb,
|
651
|
+
const uint8_t* __restrict ca,
|
652
|
+
const uint8_t* __restrict cb,
|
669
653
|
size_t nb,
|
670
654
|
size_t code_size,
|
671
655
|
int k,
|
672
|
-
hamdis_t* bh_val_,
|
673
|
-
int64_t* bh_ids_) {
|
656
|
+
hamdis_t* __restrict bh_val_,
|
657
|
+
int64_t* __restrict bh_ids_) {
|
674
658
|
HammingComputer hc(ca, code_size);
|
675
659
|
|
676
660
|
for (size_t j = 0; j < nb; j++) {
|
@@ -683,9 +667,9 @@ static void hamming_dis_inner_loop(
|
|
683
667
|
}
|
684
668
|
|
685
669
|
void generalized_hammings_knn_hc(
|
686
|
-
int_maxheap_array_t* ha,
|
687
|
-
const uint8_t* a,
|
688
|
-
const uint8_t* b,
|
670
|
+
int_maxheap_array_t* __restrict ha,
|
671
|
+
const uint8_t* __restrict a,
|
672
|
+
const uint8_t* __restrict b,
|
689
673
|
size_t nb,
|
690
674
|
size_t code_size,
|
691
675
|
int ordered) {
|
@@ -697,11 +681,11 @@ void generalized_hammings_knn_hc(
|
|
697
681
|
|
698
682
|
#pragma omp parallel for
|
699
683
|
for (int i = 0; i < na; i++) {
|
700
|
-
const uint8_t* ca = a + i * code_size;
|
701
|
-
const uint8_t* cb = b;
|
684
|
+
const uint8_t* __restrict ca = a + i * code_size;
|
685
|
+
const uint8_t* __restrict cb = b;
|
702
686
|
|
703
|
-
hamdis_t* bh_val_ = ha->val + i * k;
|
704
|
-
int64_t* bh_ids_ = ha->ids + i * k;
|
687
|
+
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
688
|
+
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
705
689
|
|
706
690
|
switch (code_size) {
|
707
691
|
case 8:
|
@@ -19,6 +19,7 @@
|
|
19
19
|
* - memory usage
|
20
20
|
* - cache-misses when dealing with large volumes of data (fewer bits is better)
|
21
21
|
*
|
22
|
+
* hamdis_t is defined in utils/hamming_distance/common.h
|
22
23
|
*/
|
23
24
|
|
24
25
|
#ifndef FAISS_hamming_h
|
@@ -29,8 +30,10 @@
|
|
29
30
|
#include <faiss/impl/platform_macros.h>
|
30
31
|
#include <faiss/utils/Heap.h>
|
31
32
|
|
32
|
-
|
33
|
-
|
33
|
+
// Low-level Hamming distance computations and hamdis_t.
|
34
|
+
#include <faiss/utils/hamming_distance/hamdis-inl.h>
|
35
|
+
|
36
|
+
#include <faiss/utils/approx_topk/mode.h>
|
34
37
|
|
35
38
|
namespace faiss {
|
36
39
|
|
@@ -99,10 +102,6 @@ struct BitstringReader {
|
|
99
102
|
|
100
103
|
FAISS_API extern size_t hamming_batch_size;
|
101
104
|
|
102
|
-
inline int popcount64(uint64_t x) {
|
103
|
-
return __builtin_popcountl(x);
|
104
|
-
}
|
105
|
-
|
106
105
|
/** Compute a set of Hamming distances between na and nb binary vectors
|
107
106
|
*
|
108
107
|
* @param a size na * nbytespercode
|
@@ -125,14 +124,18 @@ void hammings(
|
|
125
124
|
* @param nb number of database vectors
|
126
125
|
* @param ncodes size of the binary codes (bytes)
|
127
126
|
* @param ordered if != 0: order the results by decreasing distance
|
128
|
-
* (may be bottleneck for k/n > 0.01)
|
127
|
+
* (may be bottleneck for k/n > 0.01)
|
128
|
+
* @param approx_topk_mode allows to use approximate top-k facilities
|
129
|
+
* to speedup heap
|
130
|
+
*/
|
129
131
|
void hammings_knn_hc(
|
130
132
|
int_maxheap_array_t* ha,
|
131
133
|
const uint8_t* a,
|
132
134
|
const uint8_t* b,
|
133
135
|
size_t nb,
|
134
136
|
size_t ncodes,
|
135
|
-
int ordered
|
137
|
+
int ordered,
|
138
|
+
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK);
|
136
139
|
|
137
140
|
/* Legacy alias to hammings_knn_hc. */
|
138
141
|
void hammings_knn(
|
@@ -209,9 +212,17 @@ void crosshamming_count_thres(
|
|
209
212
|
/* compute the Hamming distances between two codewords of nwords*64 bits */
|
210
213
|
hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords);
|
211
214
|
|
212
|
-
|
215
|
+
/** generalized Hamming distances (= count number of code bytes that
|
216
|
+
are the same) */
|
217
|
+
void generalized_hammings_knn_hc(
|
218
|
+
int_maxheap_array_t* ha,
|
219
|
+
const uint8_t* a,
|
220
|
+
const uint8_t* b,
|
221
|
+
size_t nb,
|
222
|
+
size_t code_size,
|
223
|
+
int ordered = true);
|
213
224
|
|
214
|
-
//
|
225
|
+
} // namespace faiss
|
215
226
|
|
216
227
|
#include <faiss/utils/hamming-inl.h>
|
217
228
|
|