hnswlib 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/hnswlib/hnswlibext.cpp +1 -1
- data/ext/hnswlib/hnswlibext.hpp +194 -62
- data/ext/hnswlib/src/bruteforce.h +142 -131
- data/ext/hnswlib/src/hnswalg.h +1028 -964
- data/ext/hnswlib/src/hnswlib.h +74 -66
- data/ext/hnswlib/src/space_ip.h +299 -299
- data/ext/hnswlib/src/space_l2.h +268 -273
- data/ext/hnswlib/src/visited_list_pool.h +54 -55
- data/lib/hnswlib/version.rb +2 -2
- data/lib/hnswlib.rb +17 -10
- data/sig/hnswlib.rbs +6 -6
- metadata +3 -3
data/ext/hnswlib/src/space_ip.h
CHANGED
@@ -3,375 +3,375 @@
|
|
3
3
|
|
4
4
|
namespace hnswlib {
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
}
|
13
|
-
return res;
|
14
|
-
|
6
|
+
static float
|
7
|
+
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
8
|
+
size_t qty = *((size_t *) qty_ptr);
|
9
|
+
float res = 0;
|
10
|
+
for (unsigned i = 0; i < qty; i++) {
|
11
|
+
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
|
15
12
|
}
|
13
|
+
return res;
|
14
|
+
}
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
static float
|
17
|
+
InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
18
|
+
return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
|
19
|
+
}
|
21
20
|
|
22
21
|
#if defined(USE_AVX)
|
23
22
|
|
24
23
|
// Favor using AVX if available.
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
__m128 v1, v2;
|
57
|
-
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
|
24
|
+
static float
|
25
|
+
InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
26
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
27
|
+
float *pVect1 = (float *) pVect1v;
|
28
|
+
float *pVect2 = (float *) pVect2v;
|
29
|
+
size_t qty = *((size_t *) qty_ptr);
|
30
|
+
|
31
|
+
size_t qty16 = qty / 16;
|
32
|
+
size_t qty4 = qty / 4;
|
33
|
+
|
34
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
35
|
+
const float *pEnd2 = pVect1 + 4 * qty4;
|
36
|
+
|
37
|
+
__m256 sum256 = _mm256_set1_ps(0);
|
38
|
+
|
39
|
+
while (pVect1 < pEnd1) {
|
40
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
41
|
+
|
42
|
+
__m256 v1 = _mm256_loadu_ps(pVect1);
|
43
|
+
pVect1 += 8;
|
44
|
+
__m256 v2 = _mm256_loadu_ps(pVect2);
|
45
|
+
pVect2 += 8;
|
46
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
47
|
+
|
48
|
+
v1 = _mm256_loadu_ps(pVect1);
|
49
|
+
pVect1 += 8;
|
50
|
+
v2 = _mm256_loadu_ps(pVect2);
|
51
|
+
pVect2 += 8;
|
52
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
53
|
+
}
|
58
54
|
|
59
|
-
|
60
|
-
|
61
|
-
pVect1 += 4;
|
62
|
-
v2 = _mm_loadu_ps(pVect2);
|
63
|
-
pVect2 += 4;
|
64
|
-
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
65
|
-
}
|
55
|
+
__m128 v1, v2;
|
56
|
+
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
|
66
57
|
|
67
|
-
|
68
|
-
|
69
|
-
|
58
|
+
while (pVect1 < pEnd2) {
|
59
|
+
v1 = _mm_loadu_ps(pVect1);
|
60
|
+
pVect1 += 4;
|
61
|
+
v2 = _mm_loadu_ps(pVect2);
|
62
|
+
pVect2 += 4;
|
63
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
70
64
|
}
|
71
65
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
66
|
+
_mm_store_ps(TmpRes, sum_prod);
|
67
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
68
|
+
return sum;
|
69
|
+
}
|
70
|
+
|
71
|
+
static float
|
72
|
+
InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
73
|
+
return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
|
74
|
+
}
|
76
75
|
|
77
76
|
#endif
|
78
77
|
|
79
78
|
#if defined(USE_SSE)
|
80
79
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
80
|
+
static float
|
81
|
+
InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
82
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
83
|
+
float *pVect1 = (float *) pVect1v;
|
84
|
+
float *pVect2 = (float *) pVect2v;
|
85
|
+
size_t qty = *((size_t *) qty_ptr);
|
86
|
+
|
87
|
+
size_t qty16 = qty / 16;
|
88
|
+
size_t qty4 = qty / 4;
|
89
|
+
|
90
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
91
|
+
const float *pEnd2 = pVect1 + 4 * qty4;
|
92
|
+
|
93
|
+
__m128 v1, v2;
|
94
|
+
__m128 sum_prod = _mm_set1_ps(0);
|
95
|
+
|
96
|
+
while (pVect1 < pEnd1) {
|
97
|
+
v1 = _mm_loadu_ps(pVect1);
|
98
|
+
pVect1 += 4;
|
99
|
+
v2 = _mm_loadu_ps(pVect2);
|
100
|
+
pVect2 += 4;
|
101
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
102
|
+
|
103
|
+
v1 = _mm_loadu_ps(pVect1);
|
104
|
+
pVect1 += 4;
|
105
|
+
v2 = _mm_loadu_ps(pVect2);
|
106
|
+
pVect2 += 4;
|
107
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
108
|
+
|
109
|
+
v1 = _mm_loadu_ps(pVect1);
|
110
|
+
pVect1 += 4;
|
111
|
+
v2 = _mm_loadu_ps(pVect2);
|
112
|
+
pVect2 += 4;
|
113
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
114
|
+
|
115
|
+
v1 = _mm_loadu_ps(pVect1);
|
116
|
+
pVect1 += 4;
|
117
|
+
v2 = _mm_loadu_ps(pVect2);
|
118
|
+
pVect2 += 4;
|
119
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
120
|
+
}
|
122
121
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
122
|
+
while (pVect1 < pEnd2) {
|
123
|
+
v1 = _mm_loadu_ps(pVect1);
|
124
|
+
pVect1 += 4;
|
125
|
+
v2 = _mm_loadu_ps(pVect2);
|
126
|
+
pVect2 += 4;
|
127
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
128
|
+
}
|
130
129
|
|
131
|
-
|
132
|
-
|
130
|
+
_mm_store_ps(TmpRes, sum_prod);
|
131
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
133
132
|
|
134
|
-
|
135
|
-
|
133
|
+
return sum;
|
134
|
+
}
|
136
135
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
136
|
+
static float
|
137
|
+
InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
138
|
+
return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
|
139
|
+
}
|
141
140
|
|
142
141
|
#endif
|
143
142
|
|
144
143
|
|
145
144
|
#if defined(USE_AVX512)
|
146
145
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
146
|
+
static float
|
147
|
+
InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
148
|
+
float PORTABLE_ALIGN64 TmpRes[16];
|
149
|
+
float *pVect1 = (float *) pVect1v;
|
150
|
+
float *pVect2 = (float *) pVect2v;
|
151
|
+
size_t qty = *((size_t *) qty_ptr);
|
153
152
|
|
154
|
-
|
153
|
+
size_t qty16 = qty / 16;
|
155
154
|
|
156
155
|
|
157
|
-
|
156
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
158
157
|
|
159
|
-
|
158
|
+
__m512 sum512 = _mm512_set1_ps(0);
|
160
159
|
|
161
|
-
|
162
|
-
|
160
|
+
while (pVect1 < pEnd1) {
|
161
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
163
162
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
163
|
+
__m512 v1 = _mm512_loadu_ps(pVect1);
|
164
|
+
pVect1 += 16;
|
165
|
+
__m512 v2 = _mm512_loadu_ps(pVect2);
|
166
|
+
pVect2 += 16;
|
167
|
+
sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2));
|
168
|
+
}
|
170
169
|
|
171
|
-
|
172
|
-
|
170
|
+
_mm512_store_ps(TmpRes, sum512);
|
171
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
|
173
172
|
|
174
|
-
|
175
|
-
|
173
|
+
return sum;
|
174
|
+
}
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
176
|
+
static float
|
177
|
+
InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
178
|
+
return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
|
179
|
+
}
|
181
180
|
|
182
181
|
#endif
|
183
182
|
|
184
183
|
#if defined(USE_AVX)
|
185
184
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
185
|
+
static float
|
186
|
+
InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
187
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
188
|
+
float *pVect1 = (float *) pVect1v;
|
189
|
+
float *pVect2 = (float *) pVect2v;
|
190
|
+
size_t qty = *((size_t *) qty_ptr);
|
192
191
|
|
193
|
-
|
192
|
+
size_t qty16 = qty / 16;
|
194
193
|
|
195
194
|
|
196
|
-
|
195
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
197
196
|
|
198
|
-
|
197
|
+
__m256 sum256 = _mm256_set1_ps(0);
|
199
198
|
|
200
|
-
|
201
|
-
|
199
|
+
while (pVect1 < pEnd1) {
|
200
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
202
201
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
202
|
+
__m256 v1 = _mm256_loadu_ps(pVect1);
|
203
|
+
pVect1 += 8;
|
204
|
+
__m256 v2 = _mm256_loadu_ps(pVect2);
|
205
|
+
pVect2 += 8;
|
206
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
208
207
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
208
|
+
v1 = _mm256_loadu_ps(pVect1);
|
209
|
+
pVect1 += 8;
|
210
|
+
v2 = _mm256_loadu_ps(pVect2);
|
211
|
+
pVect2 += 8;
|
212
|
+
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
213
|
+
}
|
215
214
|
|
216
|
-
|
217
|
-
|
215
|
+
_mm256_store_ps(TmpRes, sum256);
|
216
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
218
217
|
|
219
|
-
|
220
|
-
|
218
|
+
return sum;
|
219
|
+
}
|
221
220
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
221
|
+
static float
|
222
|
+
InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
223
|
+
return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
|
224
|
+
}
|
226
225
|
|
227
226
|
#endif
|
228
227
|
|
229
228
|
#if defined(USE_SSE)
|
230
229
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
}
|
270
|
-
_mm_store_ps(TmpRes, sum_prod);
|
271
|
-
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
272
|
-
|
273
|
-
return sum;
|
230
|
+
static float
|
231
|
+
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
232
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
233
|
+
float *pVect1 = (float *) pVect1v;
|
234
|
+
float *pVect2 = (float *) pVect2v;
|
235
|
+
size_t qty = *((size_t *) qty_ptr);
|
236
|
+
|
237
|
+
size_t qty16 = qty / 16;
|
238
|
+
|
239
|
+
const float *pEnd1 = pVect1 + 16 * qty16;
|
240
|
+
|
241
|
+
__m128 v1, v2;
|
242
|
+
__m128 sum_prod = _mm_set1_ps(0);
|
243
|
+
|
244
|
+
while (pVect1 < pEnd1) {
|
245
|
+
v1 = _mm_loadu_ps(pVect1);
|
246
|
+
pVect1 += 4;
|
247
|
+
v2 = _mm_loadu_ps(pVect2);
|
248
|
+
pVect2 += 4;
|
249
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
250
|
+
|
251
|
+
v1 = _mm_loadu_ps(pVect1);
|
252
|
+
pVect1 += 4;
|
253
|
+
v2 = _mm_loadu_ps(pVect2);
|
254
|
+
pVect2 += 4;
|
255
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
256
|
+
|
257
|
+
v1 = _mm_loadu_ps(pVect1);
|
258
|
+
pVect1 += 4;
|
259
|
+
v2 = _mm_loadu_ps(pVect2);
|
260
|
+
pVect2 += 4;
|
261
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
262
|
+
|
263
|
+
v1 = _mm_loadu_ps(pVect1);
|
264
|
+
pVect1 += 4;
|
265
|
+
v2 = _mm_loadu_ps(pVect2);
|
266
|
+
pVect2 += 4;
|
267
|
+
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
274
268
|
}
|
269
|
+
_mm_store_ps(TmpRes, sum_prod);
|
270
|
+
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
275
271
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
272
|
+
return sum;
|
273
|
+
}
|
274
|
+
|
275
|
+
static float
|
276
|
+
InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
277
|
+
return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
|
278
|
+
}
|
280
279
|
|
281
280
|
#endif
|
282
281
|
|
283
282
|
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
283
|
+
static DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
|
284
|
+
static DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
|
285
|
+
static DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
|
286
|
+
static DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
|
287
|
+
|
288
|
+
static float
|
289
|
+
InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
290
|
+
size_t qty = *((size_t *) qty_ptr);
|
291
|
+
size_t qty16 = qty >> 4 << 4;
|
292
|
+
float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
|
293
|
+
float *pVect1 = (float *) pVect1v + qty16;
|
294
|
+
float *pVect2 = (float *) pVect2v + qty16;
|
295
|
+
|
296
|
+
size_t qty_left = qty - qty16;
|
297
|
+
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
|
298
|
+
return 1.0f - (res + res_tail);
|
299
|
+
}
|
301
300
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
301
|
+
static float
|
302
|
+
InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
303
|
+
size_t qty = *((size_t *) qty_ptr);
|
304
|
+
size_t qty4 = qty >> 2 << 2;
|
306
305
|
|
307
|
-
|
308
|
-
|
306
|
+
float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
|
307
|
+
size_t qty_left = qty - qty4;
|
309
308
|
|
310
|
-
|
311
|
-
|
312
|
-
|
309
|
+
float *pVect1 = (float *) pVect1v + qty4;
|
310
|
+
float *pVect2 = (float *) pVect2v + qty4;
|
311
|
+
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
|
313
312
|
|
314
|
-
|
315
|
-
|
313
|
+
return 1.0f - (res + res_tail);
|
314
|
+
}
|
316
315
|
#endif
|
317
316
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
#if defined(USE_AVX)
|
343
|
-
if (AVXCapable()) {
|
344
|
-
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
|
345
|
-
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
|
346
|
-
}
|
347
|
-
#endif
|
348
|
-
|
349
|
-
if (dim % 16 == 0)
|
350
|
-
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
|
351
|
-
else if (dim % 4 == 0)
|
352
|
-
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
|
353
|
-
else if (dim > 16)
|
354
|
-
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
|
355
|
-
else if (dim > 4)
|
356
|
-
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
|
317
|
+
class InnerProductSpace : public SpaceInterface<float> {
|
318
|
+
DISTFUNC<float> fstdistfunc_;
|
319
|
+
size_t data_size_;
|
320
|
+
size_t dim_;
|
321
|
+
|
322
|
+
public:
|
323
|
+
InnerProductSpace() : data_size_(0), dim_(0) { }
|
324
|
+
|
325
|
+
InnerProductSpace(size_t dim) {
|
326
|
+
fstdistfunc_ = InnerProductDistance;
|
327
|
+
#if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
|
328
|
+
#if defined(USE_AVX512)
|
329
|
+
if (AVX512Capable()) {
|
330
|
+
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
|
331
|
+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
|
332
|
+
} else if (AVXCapable()) {
|
333
|
+
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
|
334
|
+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
|
335
|
+
}
|
336
|
+
#elif defined(USE_AVX)
|
337
|
+
if (AVXCapable()) {
|
338
|
+
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
|
339
|
+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
|
340
|
+
}
|
357
341
|
#endif
|
358
|
-
|
359
|
-
|
342
|
+
#if defined(USE_AVX)
|
343
|
+
if (AVXCapable()) {
|
344
|
+
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
|
345
|
+
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
|
360
346
|
}
|
347
|
+
#endif
|
361
348
|
|
362
|
-
|
363
|
-
|
364
|
-
|
349
|
+
if (dim % 16 == 0)
|
350
|
+
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
|
351
|
+
else if (dim % 4 == 0)
|
352
|
+
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
|
353
|
+
else if (dim > 16)
|
354
|
+
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
|
355
|
+
else if (dim > 4)
|
356
|
+
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
|
357
|
+
#endif
|
358
|
+
dim_ = dim;
|
359
|
+
data_size_ = dim * sizeof(float);
|
360
|
+
}
|
365
361
|
|
366
|
-
|
367
|
-
|
368
|
-
|
362
|
+
size_t get_data_size() {
|
363
|
+
return data_size_;
|
364
|
+
}
|
369
365
|
|
370
|
-
|
371
|
-
|
372
|
-
|
366
|
+
DISTFUNC<float> get_dist_func() {
|
367
|
+
return fstdistfunc_;
|
368
|
+
}
|
373
369
|
|
374
|
-
|
375
|
-
|
370
|
+
void *get_dist_func_param() {
|
371
|
+
return &dim_;
|
372
|
+
}
|
376
373
|
|
377
|
-
}
|
374
|
+
~InnerProductSpace() {}
|
375
|
+
};
|
376
|
+
|
377
|
+
} // namespace hnswlib
|