faiss 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
36
36
|
#include <faiss/impl/FaissAssert.h>
|
|
37
37
|
#include <faiss/utils/Heap.h>
|
|
38
|
+
#include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
|
|
38
39
|
#include <faiss/utils/utils.h>
|
|
39
40
|
|
|
40
41
|
static const size_t BLOCKSIZE_QUERY = 8192;
|
|
@@ -43,26 +44,13 @@ namespace faiss {
|
|
|
43
44
|
|
|
44
45
|
size_t hamming_batch_size = 65536;
|
|
45
46
|
|
|
46
|
-
const uint8_t hamdis_tab_ham_bytes[256] = {
|
|
47
|
-
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
48
|
-
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
49
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
50
|
-
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
51
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
52
|
-
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
53
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
54
|
-
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
55
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
56
|
-
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
57
|
-
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
|
58
|
-
|
|
59
47
|
template <size_t nbits>
|
|
60
48
|
void hammings(
|
|
61
|
-
const uint64_t* bs1,
|
|
62
|
-
const uint64_t* bs2,
|
|
49
|
+
const uint64_t* __restrict bs1,
|
|
50
|
+
const uint64_t* __restrict bs2,
|
|
63
51
|
size_t n1,
|
|
64
52
|
size_t n2,
|
|
65
|
-
hamdis_t* dis)
|
|
53
|
+
hamdis_t* __restrict dis)
|
|
66
54
|
|
|
67
55
|
{
|
|
68
56
|
size_t i, j;
|
|
@@ -76,8 +64,8 @@ void hammings(
|
|
|
76
64
|
}
|
|
77
65
|
|
|
78
66
|
void hammings(
|
|
79
|
-
const uint64_t* bs1,
|
|
80
|
-
const uint64_t* bs2,
|
|
67
|
+
const uint64_t* __restrict bs1,
|
|
68
|
+
const uint64_t* __restrict bs2,
|
|
81
69
|
size_t n1,
|
|
82
70
|
size_t n2,
|
|
83
71
|
size_t nwords,
|
|
@@ -95,12 +83,12 @@ void hammings(
|
|
|
95
83
|
/* Count number of matches given a max threshold */
|
|
96
84
|
template <size_t nbits>
|
|
97
85
|
void hamming_count_thres(
|
|
98
|
-
const uint64_t* bs1,
|
|
99
|
-
const uint64_t* bs2,
|
|
86
|
+
const uint64_t* __restrict bs1,
|
|
87
|
+
const uint64_t* __restrict bs2,
|
|
100
88
|
size_t n1,
|
|
101
89
|
size_t n2,
|
|
102
90
|
hamdis_t ht,
|
|
103
|
-
size_t* nptr) {
|
|
91
|
+
size_t* __restrict nptr) {
|
|
104
92
|
const size_t nwords = nbits / 64;
|
|
105
93
|
size_t i, j, posm = 0;
|
|
106
94
|
const uint64_t* bs2_ = bs2;
|
|
@@ -120,10 +108,10 @@ void hamming_count_thres(
|
|
|
120
108
|
|
|
121
109
|
template <size_t nbits>
|
|
122
110
|
void crosshamming_count_thres(
|
|
123
|
-
const uint64_t* dbs,
|
|
111
|
+
const uint64_t* __restrict dbs,
|
|
124
112
|
size_t n,
|
|
125
113
|
int ht,
|
|
126
|
-
size_t* nptr) {
|
|
114
|
+
size_t* __restrict nptr) {
|
|
127
115
|
const size_t nwords = nbits / 64;
|
|
128
116
|
size_t i, j, posm = 0;
|
|
129
117
|
const uint64_t* bs1 = dbs;
|
|
@@ -142,13 +130,13 @@ void crosshamming_count_thres(
|
|
|
142
130
|
|
|
143
131
|
template <size_t nbits>
|
|
144
132
|
size_t match_hamming_thres(
|
|
145
|
-
const uint64_t* bs1,
|
|
146
|
-
const uint64_t* bs2,
|
|
133
|
+
const uint64_t* __restrict bs1,
|
|
134
|
+
const uint64_t* __restrict bs2,
|
|
147
135
|
size_t n1,
|
|
148
136
|
size_t n2,
|
|
149
137
|
int ht,
|
|
150
|
-
int64_t* idx,
|
|
151
|
-
hamdis_t* hams) {
|
|
138
|
+
int64_t* __restrict idx,
|
|
139
|
+
hamdis_t* __restrict hams) {
|
|
152
140
|
const size_t nwords = nbits / 64;
|
|
153
141
|
size_t i, j, posm = 0;
|
|
154
142
|
hamdis_t h;
|
|
@@ -181,12 +169,13 @@ size_t match_hamming_thres(
|
|
|
181
169
|
template <class HammingComputer>
|
|
182
170
|
static void hammings_knn_hc(
|
|
183
171
|
int bytes_per_code,
|
|
184
|
-
int_maxheap_array_t* ha,
|
|
185
|
-
const uint8_t* bs1,
|
|
186
|
-
const uint8_t* bs2,
|
|
172
|
+
int_maxheap_array_t* __restrict ha,
|
|
173
|
+
const uint8_t* __restrict bs1,
|
|
174
|
+
const uint8_t* __restrict bs2,
|
|
187
175
|
size_t n2,
|
|
188
176
|
bool order = true,
|
|
189
|
-
bool init_heap = true
|
|
177
|
+
bool init_heap = true,
|
|
178
|
+
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK) {
|
|
190
179
|
size_t k = ha->k;
|
|
191
180
|
if (init_heap)
|
|
192
181
|
ha->heapify();
|
|
@@ -198,17 +187,44 @@ static void hammings_knn_hc(
|
|
|
198
187
|
for (int64_t i = 0; i < ha->nh; i++) {
|
|
199
188
|
HammingComputer hc(bs1 + i * bytes_per_code, bytes_per_code);
|
|
200
189
|
|
|
201
|
-
const uint8_t* bs2_ = bs2 + j0 * bytes_per_code;
|
|
190
|
+
const uint8_t* __restrict bs2_ = bs2 + j0 * bytes_per_code;
|
|
202
191
|
hamdis_t dis;
|
|
203
192
|
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
|
204
193
|
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
194
|
+
|
|
195
|
+
// if larger number of k is required, then ::bs_addn() needs to be
|
|
196
|
+
// used instead of ::addn()
|
|
197
|
+
#define HANDLE_APPROX(NB, BD) \
|
|
198
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD: \
|
|
199
|
+
FAISS_THROW_IF_NOT_FMT( \
|
|
200
|
+
k <= NB * BD, \
|
|
201
|
+
"The chosen mode (%d) of approximate top-k supports " \
|
|
202
|
+
"up to %d values, but %zd is requested.", \
|
|
203
|
+
(int)(ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD), \
|
|
204
|
+
NB * BD, \
|
|
205
|
+
k); \
|
|
206
|
+
HeapWithBucketsForHamming32< \
|
|
207
|
+
CMax<hamdis_t, int64_t>, \
|
|
208
|
+
NB, \
|
|
209
|
+
BD, \
|
|
210
|
+
HammingComputer>:: \
|
|
211
|
+
addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_); \
|
|
212
|
+
break;
|
|
213
|
+
|
|
214
|
+
switch (approx_topk_mode) {
|
|
215
|
+
HANDLE_APPROX(8, 3)
|
|
216
|
+
HANDLE_APPROX(8, 2)
|
|
217
|
+
HANDLE_APPROX(16, 2)
|
|
218
|
+
HANDLE_APPROX(32, 2)
|
|
219
|
+
default: {
|
|
220
|
+
for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) {
|
|
221
|
+
dis = hc.hamming(bs2_);
|
|
222
|
+
if (dis < bh_val_[0]) {
|
|
223
|
+
faiss::maxheap_replace_top<hamdis_t>(
|
|
224
|
+
k, bh_val_, bh_ids_, dis, j);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
} break;
|
|
212
228
|
}
|
|
213
229
|
}
|
|
214
230
|
}
|
|
@@ -220,13 +236,13 @@ static void hammings_knn_hc(
|
|
|
220
236
|
template <class HammingComputer>
|
|
221
237
|
static void hammings_knn_mc(
|
|
222
238
|
int bytes_per_code,
|
|
223
|
-
const uint8_t* a,
|
|
224
|
-
const uint8_t* b,
|
|
239
|
+
const uint8_t* __restrict a,
|
|
240
|
+
const uint8_t* __restrict b,
|
|
225
241
|
size_t na,
|
|
226
242
|
size_t nb,
|
|
227
243
|
size_t k,
|
|
228
|
-
int32_t* distances,
|
|
229
|
-
int64_t* labels) {
|
|
244
|
+
int32_t* __restrict distances,
|
|
245
|
+
int64_t* __restrict labels) {
|
|
230
246
|
const int nBuckets = bytes_per_code * 8 + 1;
|
|
231
247
|
std::vector<int> all_counters(na * nBuckets, 0);
|
|
232
248
|
std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]);
|
|
@@ -271,44 +287,6 @@ static void hammings_knn_mc(
|
|
|
271
287
|
}
|
|
272
288
|
}
|
|
273
289
|
|
|
274
|
-
// works faster than the template version
|
|
275
|
-
static void hammings_knn_hc_1(
|
|
276
|
-
int_maxheap_array_t* ha,
|
|
277
|
-
const uint64_t* bs1,
|
|
278
|
-
const uint64_t* bs2,
|
|
279
|
-
size_t n2,
|
|
280
|
-
bool order = true,
|
|
281
|
-
bool init_heap = true) {
|
|
282
|
-
const size_t nwords = 1;
|
|
283
|
-
size_t k = ha->k;
|
|
284
|
-
|
|
285
|
-
if (init_heap) {
|
|
286
|
-
ha->heapify();
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
#pragma omp parallel for
|
|
290
|
-
for (int64_t i = 0; i < ha->nh; i++) {
|
|
291
|
-
const uint64_t bs1_ = bs1[i];
|
|
292
|
-
const uint64_t* bs2_ = bs2;
|
|
293
|
-
hamdis_t dis;
|
|
294
|
-
hamdis_t* bh_val_ = ha->val + i * k;
|
|
295
|
-
hamdis_t bh_val_0 = bh_val_[0];
|
|
296
|
-
int64_t* bh_ids_ = ha->ids + i * k;
|
|
297
|
-
size_t j;
|
|
298
|
-
for (j = 0; j < n2; j++, bs2_ += nwords) {
|
|
299
|
-
dis = popcount64(bs1_ ^ *bs2_);
|
|
300
|
-
if (dis < bh_val_0) {
|
|
301
|
-
faiss::maxheap_replace_top<hamdis_t>(
|
|
302
|
-
k, bh_val_, bh_ids_, dis, j);
|
|
303
|
-
bh_val_0 = bh_val_[0];
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
if (order) {
|
|
308
|
-
ha->reorder();
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
|
|
312
290
|
/* Functions to maps vectors to bits. Assume proper allocation done beforehand,
|
|
313
291
|
meaning that b should be be able to receive as many bits as x may produce. */
|
|
314
292
|
|
|
@@ -316,7 +294,7 @@ static void hammings_knn_hc_1(
|
|
|
316
294
|
* dimension 0 corresponds to the least significant bit of b[0], or
|
|
317
295
|
* equivalently to the lsb of the first byte that is stored.
|
|
318
296
|
*/
|
|
319
|
-
void fvec2bitvec(const float* x, uint8_t* b, size_t d) {
|
|
297
|
+
void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) {
|
|
320
298
|
for (int i = 0; i < d; i += 8) {
|
|
321
299
|
uint8_t w = 0;
|
|
322
300
|
uint8_t mask = 1;
|
|
@@ -333,14 +311,22 @@ void fvec2bitvec(const float* x, uint8_t* b, size_t d) {
|
|
|
333
311
|
|
|
334
312
|
/* Same but for n vectors.
|
|
335
313
|
Ensure that the ouptut b is byte-aligned (pad with 0s). */
|
|
336
|
-
void fvecs2bitvecs(
|
|
314
|
+
void fvecs2bitvecs(
|
|
315
|
+
const float* __restrict x,
|
|
316
|
+
uint8_t* __restrict b,
|
|
317
|
+
size_t d,
|
|
318
|
+
size_t n) {
|
|
337
319
|
const int64_t ncodes = ((d + 7) / 8);
|
|
338
320
|
#pragma omp parallel for if (n > 100000)
|
|
339
321
|
for (int64_t i = 0; i < n; i++)
|
|
340
322
|
fvec2bitvec(x + i * d, b + i * ncodes, d);
|
|
341
323
|
}
|
|
342
324
|
|
|
343
|
-
void bitvecs2fvecs(
|
|
325
|
+
void bitvecs2fvecs(
|
|
326
|
+
const uint8_t* __restrict b,
|
|
327
|
+
float* __restrict x,
|
|
328
|
+
size_t d,
|
|
329
|
+
size_t n) {
|
|
344
330
|
const int64_t ncodes = ((d + 7) / 8);
|
|
345
331
|
#pragma omp parallel for if (n > 100000)
|
|
346
332
|
for (int64_t i = 0; i < n; i++) {
|
|
@@ -378,9 +364,9 @@ void bitvec_shuffle(
|
|
|
378
364
|
size_t n,
|
|
379
365
|
size_t da,
|
|
380
366
|
size_t db,
|
|
381
|
-
const int* order,
|
|
382
|
-
const uint8_t* a,
|
|
383
|
-
uint8_t* b) {
|
|
367
|
+
const int* __restrict order,
|
|
368
|
+
const uint8_t* __restrict a,
|
|
369
|
+
uint8_t* __restrict b) {
|
|
384
370
|
for (size_t i = 0; i < db; i++) {
|
|
385
371
|
FAISS_THROW_IF_NOT(order[i] >= 0 && order[i] < da);
|
|
386
372
|
}
|
|
@@ -407,8 +393,8 @@ void bitvec_shuffle(
|
|
|
407
393
|
|
|
408
394
|
/* Compute a set of Hamming distances */
|
|
409
395
|
void hammings(
|
|
410
|
-
const uint8_t* a,
|
|
411
|
-
const uint8_t* b,
|
|
396
|
+
const uint8_t* __restrict a,
|
|
397
|
+
const uint8_t* __restrict b,
|
|
412
398
|
size_t na,
|
|
413
399
|
size_t nb,
|
|
414
400
|
size_t ncodes,
|
|
@@ -434,9 +420,9 @@ void hammings(
|
|
|
434
420
|
}
|
|
435
421
|
|
|
436
422
|
void hammings_knn(
|
|
437
|
-
int_maxheap_array_t* ha,
|
|
438
|
-
const uint8_t* a,
|
|
439
|
-
const uint8_t* b,
|
|
423
|
+
int_maxheap_array_t* __restrict ha,
|
|
424
|
+
const uint8_t* __restrict a,
|
|
425
|
+
const uint8_t* __restrict b,
|
|
440
426
|
size_t nb,
|
|
441
427
|
size_t ncodes,
|
|
442
428
|
int order) {
|
|
@@ -444,54 +430,52 @@ void hammings_knn(
|
|
|
444
430
|
}
|
|
445
431
|
|
|
446
432
|
void hammings_knn_hc(
|
|
447
|
-
int_maxheap_array_t* ha,
|
|
448
|
-
const uint8_t* a,
|
|
449
|
-
const uint8_t* b,
|
|
433
|
+
int_maxheap_array_t* __restrict ha,
|
|
434
|
+
const uint8_t* __restrict a,
|
|
435
|
+
const uint8_t* __restrict b,
|
|
450
436
|
size_t nb,
|
|
451
437
|
size_t ncodes,
|
|
452
|
-
int order
|
|
438
|
+
int order,
|
|
439
|
+
ApproxTopK_mode_t approx_topk_mode) {
|
|
453
440
|
switch (ncodes) {
|
|
454
441
|
case 4:
|
|
455
442
|
hammings_knn_hc<faiss::HammingComputer4>(
|
|
456
|
-
4, ha, a, b, nb, order, true);
|
|
443
|
+
4, ha, a, b, nb, order, true, approx_topk_mode);
|
|
457
444
|
break;
|
|
458
445
|
case 8:
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
// (8, ha, a, b, nb, order, true);
|
|
446
|
+
hammings_knn_hc<faiss::HammingComputer8>(
|
|
447
|
+
8, ha, a, b, nb, order, true, approx_topk_mode);
|
|
462
448
|
break;
|
|
463
449
|
case 16:
|
|
464
450
|
hammings_knn_hc<faiss::HammingComputer16>(
|
|
465
|
-
16, ha, a, b, nb, order, true);
|
|
451
|
+
16, ha, a, b, nb, order, true, approx_topk_mode);
|
|
466
452
|
break;
|
|
467
453
|
case 32:
|
|
468
454
|
hammings_knn_hc<faiss::HammingComputer32>(
|
|
469
|
-
32, ha, a, b, nb, order, true);
|
|
455
|
+
32, ha, a, b, nb, order, true, approx_topk_mode);
|
|
470
456
|
break;
|
|
471
457
|
default:
|
|
472
458
|
hammings_knn_hc<faiss::HammingComputerDefault>(
|
|
473
|
-
ncodes, ha, a, b, nb, order, true);
|
|
459
|
+
ncodes, ha, a, b, nb, order, true, approx_topk_mode);
|
|
474
460
|
break;
|
|
475
461
|
}
|
|
476
462
|
}
|
|
477
463
|
|
|
478
464
|
void hammings_knn_mc(
|
|
479
|
-
const uint8_t* a,
|
|
480
|
-
const uint8_t* b,
|
|
465
|
+
const uint8_t* __restrict a,
|
|
466
|
+
const uint8_t* __restrict b,
|
|
481
467
|
size_t na,
|
|
482
468
|
size_t nb,
|
|
483
469
|
size_t k,
|
|
484
470
|
size_t ncodes,
|
|
485
|
-
int32_t* distances,
|
|
486
|
-
int64_t* labels) {
|
|
471
|
+
int32_t* __restrict distances,
|
|
472
|
+
int64_t* __restrict labels) {
|
|
487
473
|
switch (ncodes) {
|
|
488
474
|
case 4:
|
|
489
475
|
hammings_knn_mc<faiss::HammingComputer4>(
|
|
490
476
|
4, a, b, na, nb, k, distances, labels);
|
|
491
477
|
break;
|
|
492
478
|
case 8:
|
|
493
|
-
// TODO(hoss): Write analog to hammings_knn_hc_1
|
|
494
|
-
// hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order, true);
|
|
495
479
|
hammings_knn_mc<faiss::HammingComputer8>(
|
|
496
480
|
8, a, b, na, nb, k, distances, labels);
|
|
497
481
|
break;
|
|
@@ -664,13 +648,13 @@ size_t match_hamming_thres(
|
|
|
664
648
|
|
|
665
649
|
template <class HammingComputer>
|
|
666
650
|
static void hamming_dis_inner_loop(
|
|
667
|
-
const uint8_t* ca,
|
|
668
|
-
const uint8_t* cb,
|
|
651
|
+
const uint8_t* __restrict ca,
|
|
652
|
+
const uint8_t* __restrict cb,
|
|
669
653
|
size_t nb,
|
|
670
654
|
size_t code_size,
|
|
671
655
|
int k,
|
|
672
|
-
hamdis_t* bh_val_,
|
|
673
|
-
int64_t* bh_ids_) {
|
|
656
|
+
hamdis_t* __restrict bh_val_,
|
|
657
|
+
int64_t* __restrict bh_ids_) {
|
|
674
658
|
HammingComputer hc(ca, code_size);
|
|
675
659
|
|
|
676
660
|
for (size_t j = 0; j < nb; j++) {
|
|
@@ -683,9 +667,9 @@ static void hamming_dis_inner_loop(
|
|
|
683
667
|
}
|
|
684
668
|
|
|
685
669
|
void generalized_hammings_knn_hc(
|
|
686
|
-
int_maxheap_array_t* ha,
|
|
687
|
-
const uint8_t* a,
|
|
688
|
-
const uint8_t* b,
|
|
670
|
+
int_maxheap_array_t* __restrict ha,
|
|
671
|
+
const uint8_t* __restrict a,
|
|
672
|
+
const uint8_t* __restrict b,
|
|
689
673
|
size_t nb,
|
|
690
674
|
size_t code_size,
|
|
691
675
|
int ordered) {
|
|
@@ -697,11 +681,11 @@ void generalized_hammings_knn_hc(
|
|
|
697
681
|
|
|
698
682
|
#pragma omp parallel for
|
|
699
683
|
for (int i = 0; i < na; i++) {
|
|
700
|
-
const uint8_t* ca = a + i * code_size;
|
|
701
|
-
const uint8_t* cb = b;
|
|
684
|
+
const uint8_t* __restrict ca = a + i * code_size;
|
|
685
|
+
const uint8_t* __restrict cb = b;
|
|
702
686
|
|
|
703
|
-
hamdis_t* bh_val_ = ha->val + i * k;
|
|
704
|
-
int64_t* bh_ids_ = ha->ids + i * k;
|
|
687
|
+
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
|
688
|
+
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
|
705
689
|
|
|
706
690
|
switch (code_size) {
|
|
707
691
|
case 8:
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
* - memory usage
|
|
20
20
|
* - cache-misses when dealing with large volumes of data (fewer bits is better)
|
|
21
21
|
*
|
|
22
|
+
* hamdis_t is defined in utils/hamming_distance/common.h
|
|
22
23
|
*/
|
|
23
24
|
|
|
24
25
|
#ifndef FAISS_hamming_h
|
|
@@ -29,8 +30,10 @@
|
|
|
29
30
|
#include <faiss/impl/platform_macros.h>
|
|
30
31
|
#include <faiss/utils/Heap.h>
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
// Low-level Hamming distance computations and hamdis_t.
|
|
34
|
+
#include <faiss/utils/hamming_distance/hamdis-inl.h>
|
|
35
|
+
|
|
36
|
+
#include <faiss/utils/approx_topk/mode.h>
|
|
34
37
|
|
|
35
38
|
namespace faiss {
|
|
36
39
|
|
|
@@ -99,10 +102,6 @@ struct BitstringReader {
|
|
|
99
102
|
|
|
100
103
|
FAISS_API extern size_t hamming_batch_size;
|
|
101
104
|
|
|
102
|
-
inline int popcount64(uint64_t x) {
|
|
103
|
-
return __builtin_popcountl(x);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
105
|
/** Compute a set of Hamming distances between na and nb binary vectors
|
|
107
106
|
*
|
|
108
107
|
* @param a size na * nbytespercode
|
|
@@ -125,14 +124,18 @@ void hammings(
|
|
|
125
124
|
* @param nb number of database vectors
|
|
126
125
|
* @param ncodes size of the binary codes (bytes)
|
|
127
126
|
* @param ordered if != 0: order the results by decreasing distance
|
|
128
|
-
* (may be bottleneck for k/n > 0.01)
|
|
127
|
+
* (may be bottleneck for k/n > 0.01)
|
|
128
|
+
* @param approx_topk_mode allows to use approximate top-k facilities
|
|
129
|
+
* to speedup heap
|
|
130
|
+
*/
|
|
129
131
|
void hammings_knn_hc(
|
|
130
132
|
int_maxheap_array_t* ha,
|
|
131
133
|
const uint8_t* a,
|
|
132
134
|
const uint8_t* b,
|
|
133
135
|
size_t nb,
|
|
134
136
|
size_t ncodes,
|
|
135
|
-
int ordered
|
|
137
|
+
int ordered,
|
|
138
|
+
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK);
|
|
136
139
|
|
|
137
140
|
/* Legacy alias to hammings_knn_hc. */
|
|
138
141
|
void hammings_knn(
|
|
@@ -209,9 +212,17 @@ void crosshamming_count_thres(
|
|
|
209
212
|
/* compute the Hamming distances between two codewords of nwords*64 bits */
|
|
210
213
|
hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords);
|
|
211
214
|
|
|
212
|
-
|
|
215
|
+
/** generalized Hamming distances (= count number of code bytes that
|
|
216
|
+
are the same) */
|
|
217
|
+
void generalized_hammings_knn_hc(
|
|
218
|
+
int_maxheap_array_t* ha,
|
|
219
|
+
const uint8_t* a,
|
|
220
|
+
const uint8_t* b,
|
|
221
|
+
size_t nb,
|
|
222
|
+
size_t code_size,
|
|
223
|
+
int ordered = true);
|
|
213
224
|
|
|
214
|
-
//
|
|
225
|
+
} // namespace faiss
|
|
215
226
|
|
|
216
227
|
#include <faiss/utils/hamming-inl.h>
|
|
217
228
|
|