hnswlib 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/hnswlib/hnswlibext.cpp +1 -1
- data/ext/hnswlib/hnswlibext.hpp +194 -62
- data/ext/hnswlib/src/bruteforce.h +142 -131
- data/ext/hnswlib/src/hnswalg.h +1028 -964
- data/ext/hnswlib/src/hnswlib.h +74 -66
- data/ext/hnswlib/src/space_ip.h +299 -299
- data/ext/hnswlib/src/space_l2.h +268 -273
- data/ext/hnswlib/src/visited_list_pool.h +54 -55
- data/lib/hnswlib/version.rb +2 -2
- data/lib/hnswlib.rb +17 -10
- data/sig/hnswlib.rbs +6 -6
- metadata +3 -3
data/ext/hnswlib/src/space_ip.h
CHANGED
@@ -3,375 +3,375 @@
|
|
3
3
|
|
4
4
|
namespace hnswlib {
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
}
|
13
|
-
return res;
|
14
|
-
|
6
|
+
static float
|
7
|
+
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
8
|
+
size_t qty = *((size_t *) qty_ptr);
|
9
|
+
float res = 0;
|
10
|
+
for (unsigned i = 0; i < qty; i++) {
|
11
|
+
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
|
15
12
|
}
|
13
|
+
return res;
|
14
|
+
}
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
static float
|
17
|
+
InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
18
|
+
return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
|
19
|
+
}
|
21
20
|
|
22
21
|
#if defined(USE_AVX)
|
23
22
|
|
24
23
|
// Favor using AVX if available.
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
__m128 v1, v2;
|
57
|
-
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
|
24
|
+
static float
|
25
|
+
InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
26
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
27
|
+
float *pVect1 = (float *) pVect1v;
|
28
|
+
float *pVect2 = (float *) pVect2v;
|
29
|
+
size_t qty = *((size_t *) qty_ptr);
|
30
|
+
|
31
|
+
size_t qty16 = qty / 16;
|
32
|
+
size_t qty4 = qty / 4;
|
33
|
+
|
34
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
35
|
+
const float *pEnd2 = pVect1 + 4 * qty4;
|
36
|
+
|
37
|
+
__m256 sum256 = _mm256_set1_ps(0);
|
38
|
+
|
39
|
+
while (pVect1 < pEnd1) {
|
40
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
41
|
+
|
42
|
+
__m256 v1 = _mm256_loadu_ps(pVect1);
|
43
|
+
pVect1 += 8;
|
44
|
+
__m256 v2 = _mm256_loadu_ps(pVect2);
|
45
|
+
pVect2 += 8;
|
46
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
47
|
+
|
48
|
+
v1 = _mm256_loadu_ps(pVect1);
|
49
|
+
pVect1 += 8;
|
50
|
+
v2 = _mm256_loadu_ps(pVect2);
|
51
|
+
pVect2 += 8;
|
52
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
53
|
+
}
|
58
54
|
|
59
|
-
|
60
|
-
|
61
|
-
pVect1 += 4;
|
62
|
-
v2 = _mm_loadu_ps(pVect2);
|
63
|
-
pVect2 += 4;
|
64
|
-
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
65
|
-
}
|
55
|
+
__m128 v1, v2;
|
56
|
+
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
|
66
57
|
|
67
|
-
|
68
|
-
|
69
|
-
|
58
|
+
while (pVect1 < pEnd2) {
|
59
|
+
v1 = _mm_loadu_ps(pVect1);
|
60
|
+
pVect1 += 4;
|
61
|
+
v2 = _mm_loadu_ps(pVect2);
|
62
|
+
pVect2 += 4;
|
63
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
70
64
|
}
|
71
65
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
66
|
+
_mm_store_ps(TmpRes, sum_prod);
|
67
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
68
|
+
return sum;
|
69
|
+
}
|
70
|
+
|
71
|
+
static float
|
72
|
+
InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
73
|
+
return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
|
74
|
+
}
|
76
75
|
|
77
76
|
#endif
|
78
77
|
|
79
78
|
#if defined(USE_SSE)
|
80
79
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
80
|
+
static float
|
81
|
+
InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
82
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
83
|
+
float *pVect1 = (float *) pVect1v;
|
84
|
+
float *pVect2 = (float *) pVect2v;
|
85
|
+
size_t qty = *((size_t *) qty_ptr);
|
86
|
+
|
87
|
+
size_t qty16 = qty / 16;
|
88
|
+
size_t qty4 = qty / 4;
|
89
|
+
|
90
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
91
|
+
const float *pEnd2 = pVect1 + 4 * qty4;
|
92
|
+
|
93
|
+
__m128 v1, v2;
|
94
|
+
__m128 sum_prod = _mm_set1_ps(0);
|
95
|
+
|
96
|
+
while (pVect1 < pEnd1) {
|
97
|
+
v1 = _mm_loadu_ps(pVect1);
|
98
|
+
pVect1 += 4;
|
99
|
+
v2 = _mm_loadu_ps(pVect2);
|
100
|
+
pVect2 += 4;
|
101
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
102
|
+
|
103
|
+
v1 = _mm_loadu_ps(pVect1);
|
104
|
+
pVect1 += 4;
|
105
|
+
v2 = _mm_loadu_ps(pVect2);
|
106
|
+
pVect2 += 4;
|
107
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
108
|
+
|
109
|
+
v1 = _mm_loadu_ps(pVect1);
|
110
|
+
pVect1 += 4;
|
111
|
+
v2 = _mm_loadu_ps(pVect2);
|
112
|
+
pVect2 += 4;
|
113
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
114
|
+
|
115
|
+
v1 = _mm_loadu_ps(pVect1);
|
116
|
+
pVect1 += 4;
|
117
|
+
v2 = _mm_loadu_ps(pVect2);
|
118
|
+
pVect2 += 4;
|
119
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
120
|
+
}
|
122
121
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
122
|
+
while (pVect1 < pEnd2) {
|
123
|
+
v1 = _mm_loadu_ps(pVect1);
|
124
|
+
pVect1 += 4;
|
125
|
+
v2 = _mm_loadu_ps(pVect2);
|
126
|
+
pVect2 += 4;
|
127
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
128
|
+
}
|
130
129
|
|
131
|
-
|
132
|
-
|
130
|
+
_mm_store_ps(TmpRes, sum_prod);
|
131
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
133
132
|
|
134
|
-
|
135
|
-
|
133
|
+
return sum;
|
134
|
+
}
|
136
135
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
136
|
+
static float
|
137
|
+
InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
138
|
+
return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
|
139
|
+
}
|
141
140
|
|
142
141
|
#endif
|
143
142
|
|
144
143
|
|
145
144
|
#if defined(USE_AVX512)
|
146
145
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
146
|
+
static float
|
147
|
+
InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
148
|
+
float PORTABLE_ALIGN64 TmpRes[16];
|
149
|
+
float *pVect1 = (float *) pVect1v;
|
150
|
+
float *pVect2 = (float *) pVect2v;
|
151
|
+
size_t qty = *((size_t *) qty_ptr);
|
153
152
|
|
154
|
-
|
153
|
+
size_t qty16 = qty / 16;
|
155
154
|
|
156
155
|
|
157
|
-
|
156
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
158
157
|
|
159
|
-
|
158
|
+
__m512 sum512 = _mm512_set1_ps(0);
|
160
159
|
|
161
|
-
|
162
|
-
|
160
|
+
while (pVect1 < pEnd1) {
|
161
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
163
162
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
163
|
+
__m512 v1 = _mm512_loadu_ps(pVect1);
|
164
|
+
pVect1 += 16;
|
165
|
+
__m512 v2 = _mm512_loadu_ps(pVect2);
|
166
|
+
pVect2 += 16;
|
167
|
+
sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2));
|
168
|
+
}
|
170
169
|
|
171
|
-
|
172
|
-
|
170
|
+
_mm512_store_ps(TmpRes, sum512);
|
171
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
|
173
172
|
|
174
|
-
|
175
|
-
|
173
|
+
return sum;
|
174
|
+
}
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
176
|
+
static float
|
177
|
+
InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
178
|
+
return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
|
179
|
+
}
|
181
180
|
|
182
181
|
#endif
|
183
182
|
|
184
183
|
#if defined(USE_AVX)
|
185
184
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
185
|
+
static float
|
186
|
+
InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
187
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
188
|
+
float *pVect1 = (float *) pVect1v;
|
189
|
+
float *pVect2 = (float *) pVect2v;
|
190
|
+
size_t qty = *((size_t *) qty_ptr);
|
192
191
|
|
193
|
-
|
192
|
+
size_t qty16 = qty / 16;
|
194
193
|
|
195
194
|
|
196
|
-
|
195
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
197
196
|
|
198
|
-
|
197
|
+
__m256 sum256 = _mm256_set1_ps(0);
|
199
198
|
|
200
|
-
|
201
|
-
|
199
|
+
while (pVect1 < pEnd1) {
|
200
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
202
201
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
202
|
+
__m256 v1 = _mm256_loadu_ps(pVect1);
|
203
|
+
pVect1 += 8;
|
204
|
+
__m256 v2 = _mm256_loadu_ps(pVect2);
|
205
|
+
pVect2 += 8;
|
206
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
208
207
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
208
|
+
v1 = _mm256_loadu_ps(pVect1);
|
209
|
+
pVect1 += 8;
|
210
|
+
v2 = _mm256_loadu_ps(pVect2);
|
211
|
+
pVect2 += 8;
|
212
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
213
|
+
}
|
215
214
|
|
216
|
-
|
217
|
-
|
215
|
+
_mm256_store_ps(TmpRes, sum256);
|
216
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
218
217
|
|
219
|
-
|
220
|
-
|
218
|
+
return sum;
|
219
|
+
}
|
221
220
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
221
|
+
static float
|
222
|
+
InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
223
|
+
return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
|
224
|
+
}
|
226
225
|
|
227
226
|
#endif
|
228
227
|
|
229
228
|
#if defined(USE_SSE)
|
230
229
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
}
|
270
|
-
_mm_store_ps(TmpRes, sum_prod);
|
271
|
-
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
272
|
-
|
273
|
-
return sum;
|
230
|
+
static float
|
231
|
+
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
232
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
233
|
+
float *pVect1 = (float *) pVect1v;
|
234
|
+
float *pVect2 = (float *) pVect2v;
|
235
|
+
size_t qty = *((size_t *) qty_ptr);
|
236
|
+
|
237
|
+
size_t qty16 = qty / 16;
|
238
|
+
|
239
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
240
|
+
|
241
|
+
__m128 v1, v2;
|
242
|
+
__m128 sum_prod = _mm_set1_ps(0);
|
243
|
+
|
244
|
+
while (pVect1 < pEnd1) {
|
245
|
+
v1 = _mm_loadu_ps(pVect1);
|
246
|
+
pVect1 += 4;
|
247
|
+
v2 = _mm_loadu_ps(pVect2);
|
248
|
+
pVect2 += 4;
|
249
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
250
|
+
|
251
|
+
v1 = _mm_loadu_ps(pVect1);
|
252
|
+
pVect1 += 4;
|
253
|
+
v2 = _mm_loadu_ps(pVect2);
|
254
|
+
pVect2 += 4;
|
255
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
256
|
+
|
257
|
+
v1 = _mm_loadu_ps(pVect1);
|
258
|
+
pVect1 += 4;
|
259
|
+
v2 = _mm_loadu_ps(pVect2);
|
260
|
+
pVect2 += 4;
|
261
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
262
|
+
|
263
|
+
v1 = _mm_loadu_ps(pVect1);
|
264
|
+
pVect1 += 4;
|
265
|
+
v2 = _mm_loadu_ps(pVect2);
|
266
|
+
pVect2 += 4;
|
267
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
274
268
|
}
|
269
|
+
_mm_store_ps(TmpRes, sum_prod);
|
270
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
275
271
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
272
|
+
return sum;
|
273
|
+
}
|
274
|
+
|
275
|
+
static float
|
276
|
+
InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
277
|
+
return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
|
278
|
+
}
|
280
279
|
|
281
280
|
#endif
|
282
281
|
|
283
282
|
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
283
|
+
static DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
|
284
|
+
static DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
|
285
|
+
static DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
|
286
|
+
static DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
|
287
|
+
|
288
|
+
static float
|
289
|
+
InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
290
|
+
size_t qty = *((size_t *) qty_ptr);
|
291
|
+
size_t qty16 = qty >> 4 << 4;
|
292
|
+
float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
|
293
|
+
float *pVect1 = (float *) pVect1v + qty16;
|
294
|
+
float *pVect2 = (float *) pVect2v + qty16;
|
295
|
+
|
296
|
+
size_t qty_left = qty - qty16;
|
297
|
+
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
|
298
|
+
return 1.0f - (res + res_tail);
|
299
|
+
}
|
301
300
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
301
|
+
static float
|
302
|
+
InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
303
|
+
size_t qty = *((size_t *) qty_ptr);
|
304
|
+
size_t qty4 = qty >> 2 << 2;
|
306
305
|
|
307
|
-
|
308
|
-
|
306
|
+
float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
|
307
|
+
size_t qty_left = qty - qty4;
|
309
308
|
|
310
|
-
|
311
|
-
|
312
|
-
|
309
|
+
float *pVect1 = (float *) pVect1v + qty4;
|
310
|
+
float *pVect2 = (float *) pVect2v + qty4;
|
311
|
+
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
|
313
312
|
|
314
|
-
|
315
|
-
|
313
|
+
return 1.0f - (res + res_tail);
|
314
|
+
}
|
316
315
|
#endif
|
317
316
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
#if defined(USE_AVX)
|
343
|
-
if (AVXCapable()) {
|
344
|
-
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
|
345
|
-
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
|
346
|
-
}
|
347
|
-
#endif
|
348
|
-
|
349
|
-
if (dim % 16 == 0)
|
350
|
-
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
|
351
|
-
else if (dim % 4 == 0)
|
352
|
-
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
|
353
|
-
else if (dim > 16)
|
354
|
-
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
|
355
|
-
else if (dim > 4)
|
356
|
-
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
|
317
|
+
class InnerProductSpace : public SpaceInterface<float> {
|
318
|
+
DISTFUNC<float> fstdistfunc_;
|
319
|
+
size_t data_size_;
|
320
|
+
size_t dim_;
|
321
|
+
|
322
|
+
public:
|
323
|
+
InnerProductSpace() : data_size_(0), dim_(0) { }
|
324
|
+
|
325
|
+
InnerProductSpace(size_t dim) {
|
326
|
+
fstdistfunc_ = InnerProductDistance;
|
327
|
+
#if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
|
328
|
+
#if defined(USE_AVX512)
|
329
|
+
if (AVX512Capable()) {
|
330
|
+
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
|
331
|
+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
|
332
|
+
} else if (AVXCapable()) {
|
333
|
+
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
|
334
|
+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
|
335
|
+
}
|
336
|
+
#elif defined(USE_AVX)
|
337
|
+
if (AVXCapable()) {
|
338
|
+
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
|
339
|
+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
|
340
|
+
}
|
357
341
|
#endif
|
358
|
-
|
359
|
-
|
342
|
+
#if defined(USE_AVX)
|
343
|
+
if (AVXCapable()) {
|
344
|
+
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
|
345
|
+
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
|
360
346
|
}
|
347
|
+
#endif
|
361
348
|
|
362
|
-
|
363
|
-
|
364
|
-
|
349
|
+
if (dim % 16 == 0)
|
350
|
+
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
|
351
|
+
else if (dim % 4 == 0)
|
352
|
+
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
|
353
|
+
else if (dim > 16)
|
354
|
+
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
|
355
|
+
else if (dim > 4)
|
356
|
+
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
|
357
|
+
#endif
|
358
|
+
dim_ = dim;
|
359
|
+
data_size_ = dim * sizeof(float);
|
360
|
+
}
|
365
361
|
|
366
|
-
|
367
|
-
|
368
|
-
|
362
|
+
size_t get_data_size() {
|
363
|
+
return data_size_;
|
364
|
+
}
|
369
365
|
|
370
|
-
|
371
|
-
|
372
|
-
|
366
|
+
DISTFUNC<float> get_dist_func() {
|
367
|
+
return fstdistfunc_;
|
368
|
+
}
|
373
369
|
|
374
|
-
|
375
|
-
|
370
|
+
void *get_dist_func_param() {
|
371
|
+
return &dim_;
|
372
|
+
}
|
376
373
|
|
377
|
-
}
|
374
|
+
~InnerProductSpace() {}
|
375
|
+
};
|
376
|
+
|
377
|
+
} // namespace hnswlib
|