hnswlib 0.6.2 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,375 +3,375 @@
3
3
 
4
4
  namespace hnswlib {
5
5
 
6
- static float
7
- InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
8
- size_t qty = *((size_t *) qty_ptr);
9
- float res = 0;
10
- for (unsigned i = 0; i < qty; i++) {
11
- res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
12
- }
13
- return res;
14
-
6
+ static float
7
+ InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
8
+ size_t qty = *((size_t *) qty_ptr);
9
+ float res = 0;
10
+ for (unsigned i = 0; i < qty; i++) {
11
+ res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
15
12
  }
13
+ return res;
14
+ }
16
15
 
17
- static float
18
- InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
19
- return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
20
- }
16
+ static float
17
+ InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
18
+ return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
19
+ }
21
20
 
22
21
  #if defined(USE_AVX)
23
22
 
24
23
  // Favor using AVX if available.
25
- static float
26
- InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
27
- float PORTABLE_ALIGN32 TmpRes[8];
28
- float *pVect1 = (float *) pVect1v;
29
- float *pVect2 = (float *) pVect2v;
30
- size_t qty = *((size_t *) qty_ptr);
31
-
32
- size_t qty16 = qty / 16;
33
- size_t qty4 = qty / 4;
34
-
35
- const float *pEnd1 = pVect1 + 16 * qty16;
36
- const float *pEnd2 = pVect1 + 4 * qty4;
37
-
38
- __m256 sum256 = _mm256_set1_ps(0);
39
-
40
- while (pVect1 < pEnd1) {
41
- //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
42
-
43
- __m256 v1 = _mm256_loadu_ps(pVect1);
44
- pVect1 += 8;
45
- __m256 v2 = _mm256_loadu_ps(pVect2);
46
- pVect2 += 8;
47
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
48
-
49
- v1 = _mm256_loadu_ps(pVect1);
50
- pVect1 += 8;
51
- v2 = _mm256_loadu_ps(pVect2);
52
- pVect2 += 8;
53
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
54
- }
55
-
56
- __m128 v1, v2;
57
- __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
24
+ static float
25
+ InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
26
+ float PORTABLE_ALIGN32 TmpRes[8];
27
+ float *pVect1 = (float *) pVect1v;
28
+ float *pVect2 = (float *) pVect2v;
29
+ size_t qty = *((size_t *) qty_ptr);
30
+
31
+ size_t qty16 = qty / 16;
32
+ size_t qty4 = qty / 4;
33
+
34
+ const float *pEnd1 = pVect1 + 16 * qty16;
35
+ const float *pEnd2 = pVect1 + 4 * qty4;
36
+
37
+ __m256 sum256 = _mm256_set1_ps(0);
38
+
39
+ while (pVect1 < pEnd1) {
40
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
41
+
42
+ __m256 v1 = _mm256_loadu_ps(pVect1);
43
+ pVect1 += 8;
44
+ __m256 v2 = _mm256_loadu_ps(pVect2);
45
+ pVect2 += 8;
46
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
47
+
48
+ v1 = _mm256_loadu_ps(pVect1);
49
+ pVect1 += 8;
50
+ v2 = _mm256_loadu_ps(pVect2);
51
+ pVect2 += 8;
52
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
53
+ }
58
54
 
59
- while (pVect1 < pEnd2) {
60
- v1 = _mm_loadu_ps(pVect1);
61
- pVect1 += 4;
62
- v2 = _mm_loadu_ps(pVect2);
63
- pVect2 += 4;
64
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
65
- }
55
+ __m128 v1, v2;
56
+ __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
66
57
 
67
- _mm_store_ps(TmpRes, sum_prod);
68
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
69
- return sum;
58
+ while (pVect1 < pEnd2) {
59
+ v1 = _mm_loadu_ps(pVect1);
60
+ pVect1 += 4;
61
+ v2 = _mm_loadu_ps(pVect2);
62
+ pVect2 += 4;
63
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
70
64
  }
71
65
 
72
- static float
73
- InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
74
- return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
75
- }
66
+ _mm_store_ps(TmpRes, sum_prod);
67
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
68
+ return sum;
69
+ }
70
+
71
+ static float
72
+ InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
73
+ return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
74
+ }
76
75
 
77
76
  #endif
78
77
 
79
78
  #if defined(USE_SSE)
80
79
 
81
- static float
82
- InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
83
- float PORTABLE_ALIGN32 TmpRes[8];
84
- float *pVect1 = (float *) pVect1v;
85
- float *pVect2 = (float *) pVect2v;
86
- size_t qty = *((size_t *) qty_ptr);
87
-
88
- size_t qty16 = qty / 16;
89
- size_t qty4 = qty / 4;
90
-
91
- const float *pEnd1 = pVect1 + 16 * qty16;
92
- const float *pEnd2 = pVect1 + 4 * qty4;
93
-
94
- __m128 v1, v2;
95
- __m128 sum_prod = _mm_set1_ps(0);
96
-
97
- while (pVect1 < pEnd1) {
98
- v1 = _mm_loadu_ps(pVect1);
99
- pVect1 += 4;
100
- v2 = _mm_loadu_ps(pVect2);
101
- pVect2 += 4;
102
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
103
-
104
- v1 = _mm_loadu_ps(pVect1);
105
- pVect1 += 4;
106
- v2 = _mm_loadu_ps(pVect2);
107
- pVect2 += 4;
108
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
109
-
110
- v1 = _mm_loadu_ps(pVect1);
111
- pVect1 += 4;
112
- v2 = _mm_loadu_ps(pVect2);
113
- pVect2 += 4;
114
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
115
-
116
- v1 = _mm_loadu_ps(pVect1);
117
- pVect1 += 4;
118
- v2 = _mm_loadu_ps(pVect2);
119
- pVect2 += 4;
120
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
121
- }
80
+ static float
81
+ InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
82
+ float PORTABLE_ALIGN32 TmpRes[8];
83
+ float *pVect1 = (float *) pVect1v;
84
+ float *pVect2 = (float *) pVect2v;
85
+ size_t qty = *((size_t *) qty_ptr);
86
+
87
+ size_t qty16 = qty / 16;
88
+ size_t qty4 = qty / 4;
89
+
90
+ const float *pEnd1 = pVect1 + 16 * qty16;
91
+ const float *pEnd2 = pVect1 + 4 * qty4;
92
+
93
+ __m128 v1, v2;
94
+ __m128 sum_prod = _mm_set1_ps(0);
95
+
96
+ while (pVect1 < pEnd1) {
97
+ v1 = _mm_loadu_ps(pVect1);
98
+ pVect1 += 4;
99
+ v2 = _mm_loadu_ps(pVect2);
100
+ pVect2 += 4;
101
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
102
+
103
+ v1 = _mm_loadu_ps(pVect1);
104
+ pVect1 += 4;
105
+ v2 = _mm_loadu_ps(pVect2);
106
+ pVect2 += 4;
107
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
108
+
109
+ v1 = _mm_loadu_ps(pVect1);
110
+ pVect1 += 4;
111
+ v2 = _mm_loadu_ps(pVect2);
112
+ pVect2 += 4;
113
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
114
+
115
+ v1 = _mm_loadu_ps(pVect1);
116
+ pVect1 += 4;
117
+ v2 = _mm_loadu_ps(pVect2);
118
+ pVect2 += 4;
119
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
120
+ }
122
121
 
123
- while (pVect1 < pEnd2) {
124
- v1 = _mm_loadu_ps(pVect1);
125
- pVect1 += 4;
126
- v2 = _mm_loadu_ps(pVect2);
127
- pVect2 += 4;
128
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
129
- }
122
+ while (pVect1 < pEnd2) {
123
+ v1 = _mm_loadu_ps(pVect1);
124
+ pVect1 += 4;
125
+ v2 = _mm_loadu_ps(pVect2);
126
+ pVect2 += 4;
127
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
128
+ }
130
129
 
131
- _mm_store_ps(TmpRes, sum_prod);
132
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
130
+ _mm_store_ps(TmpRes, sum_prod);
131
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
133
132
 
134
- return sum;
135
- }
133
+ return sum;
134
+ }
136
135
 
137
- static float
138
- InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
139
- return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
140
- }
136
+ static float
137
+ InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
138
+ return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
139
+ }
141
140
 
142
141
  #endif
143
142
 
144
143
 
145
144
  #if defined(USE_AVX512)
146
145
 
147
- static float
148
- InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
149
- float PORTABLE_ALIGN64 TmpRes[16];
150
- float *pVect1 = (float *) pVect1v;
151
- float *pVect2 = (float *) pVect2v;
152
- size_t qty = *((size_t *) qty_ptr);
146
+ static float
147
+ InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
148
+ float PORTABLE_ALIGN64 TmpRes[16];
149
+ float *pVect1 = (float *) pVect1v;
150
+ float *pVect2 = (float *) pVect2v;
151
+ size_t qty = *((size_t *) qty_ptr);
153
152
 
154
- size_t qty16 = qty / 16;
153
+ size_t qty16 = qty / 16;
155
154
 
156
155
 
157
- const float *pEnd1 = pVect1 + 16 * qty16;
156
+ const float *pEnd1 = pVect1 + 16 * qty16;
158
157
 
159
- __m512 sum512 = _mm512_set1_ps(0);
158
+ __m512 sum512 = _mm512_set1_ps(0);
160
159
 
161
- while (pVect1 < pEnd1) {
162
- //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
160
+ while (pVect1 < pEnd1) {
161
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
163
162
 
164
- __m512 v1 = _mm512_loadu_ps(pVect1);
165
- pVect1 += 16;
166
- __m512 v2 = _mm512_loadu_ps(pVect2);
167
- pVect2 += 16;
168
- sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2));
169
- }
163
+ __m512 v1 = _mm512_loadu_ps(pVect1);
164
+ pVect1 += 16;
165
+ __m512 v2 = _mm512_loadu_ps(pVect2);
166
+ pVect2 += 16;
167
+ sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2));
168
+ }
170
169
 
171
- _mm512_store_ps(TmpRes, sum512);
172
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
170
+ _mm512_store_ps(TmpRes, sum512);
171
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
173
172
 
174
- return sum;
175
- }
173
+ return sum;
174
+ }
176
175
 
177
- static float
178
- InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
179
- return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
180
- }
176
+ static float
177
+ InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
178
+ return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
179
+ }
181
180
 
182
181
  #endif
183
182
 
184
183
  #if defined(USE_AVX)
185
184
 
186
- static float
187
- InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
188
- float PORTABLE_ALIGN32 TmpRes[8];
189
- float *pVect1 = (float *) pVect1v;
190
- float *pVect2 = (float *) pVect2v;
191
- size_t qty = *((size_t *) qty_ptr);
185
+ static float
186
+ InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
187
+ float PORTABLE_ALIGN32 TmpRes[8];
188
+ float *pVect1 = (float *) pVect1v;
189
+ float *pVect2 = (float *) pVect2v;
190
+ size_t qty = *((size_t *) qty_ptr);
192
191
 
193
- size_t qty16 = qty / 16;
192
+ size_t qty16 = qty / 16;
194
193
 
195
194
 
196
- const float *pEnd1 = pVect1 + 16 * qty16;
195
+ const float *pEnd1 = pVect1 + 16 * qty16;
197
196
 
198
- __m256 sum256 = _mm256_set1_ps(0);
197
+ __m256 sum256 = _mm256_set1_ps(0);
199
198
 
200
- while (pVect1 < pEnd1) {
201
- //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
199
+ while (pVect1 < pEnd1) {
200
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
202
201
 
203
- __m256 v1 = _mm256_loadu_ps(pVect1);
204
- pVect1 += 8;
205
- __m256 v2 = _mm256_loadu_ps(pVect2);
206
- pVect2 += 8;
207
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
202
+ __m256 v1 = _mm256_loadu_ps(pVect1);
203
+ pVect1 += 8;
204
+ __m256 v2 = _mm256_loadu_ps(pVect2);
205
+ pVect2 += 8;
206
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
208
207
 
209
- v1 = _mm256_loadu_ps(pVect1);
210
- pVect1 += 8;
211
- v2 = _mm256_loadu_ps(pVect2);
212
- pVect2 += 8;
213
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
214
- }
208
+ v1 = _mm256_loadu_ps(pVect1);
209
+ pVect1 += 8;
210
+ v2 = _mm256_loadu_ps(pVect2);
211
+ pVect2 += 8;
212
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
213
+ }
215
214
 
216
- _mm256_store_ps(TmpRes, sum256);
217
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
215
+ _mm256_store_ps(TmpRes, sum256);
216
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
218
217
 
219
- return sum;
220
- }
218
+ return sum;
219
+ }
221
220
 
222
- static float
223
- InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
224
- return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
225
- }
221
+ static float
222
+ InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
223
+ return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
224
+ }
226
225
 
227
226
  #endif
228
227
 
229
228
  #if defined(USE_SSE)
230
229
 
231
- static float
232
- InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
233
- float PORTABLE_ALIGN32 TmpRes[8];
234
- float *pVect1 = (float *) pVect1v;
235
- float *pVect2 = (float *) pVect2v;
236
- size_t qty = *((size_t *) qty_ptr);
237
-
238
- size_t qty16 = qty / 16;
239
-
240
- const float *pEnd1 = pVect1 + 16 * qty16;
241
-
242
- __m128 v1, v2;
243
- __m128 sum_prod = _mm_set1_ps(0);
244
-
245
- while (pVect1 < pEnd1) {
246
- v1 = _mm_loadu_ps(pVect1);
247
- pVect1 += 4;
248
- v2 = _mm_loadu_ps(pVect2);
249
- pVect2 += 4;
250
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
251
-
252
- v1 = _mm_loadu_ps(pVect1);
253
- pVect1 += 4;
254
- v2 = _mm_loadu_ps(pVect2);
255
- pVect2 += 4;
256
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
257
-
258
- v1 = _mm_loadu_ps(pVect1);
259
- pVect1 += 4;
260
- v2 = _mm_loadu_ps(pVect2);
261
- pVect2 += 4;
262
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
263
-
264
- v1 = _mm_loadu_ps(pVect1);
265
- pVect1 += 4;
266
- v2 = _mm_loadu_ps(pVect2);
267
- pVect2 += 4;
268
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
269
- }
270
- _mm_store_ps(TmpRes, sum_prod);
271
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
272
-
273
- return sum;
230
+ static float
231
+ InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
232
+ float PORTABLE_ALIGN32 TmpRes[8];
233
+ float *pVect1 = (float *) pVect1v;
234
+ float *pVect2 = (float *) pVect2v;
235
+ size_t qty = *((size_t *) qty_ptr);
236
+
237
+ size_t qty16 = qty / 16;
238
+
239
+ const float *pEnd1 = pVect1 + 16 * qty16;
240
+
241
+ __m128 v1, v2;
242
+ __m128 sum_prod = _mm_set1_ps(0);
243
+
244
+ while (pVect1 < pEnd1) {
245
+ v1 = _mm_loadu_ps(pVect1);
246
+ pVect1 += 4;
247
+ v2 = _mm_loadu_ps(pVect2);
248
+ pVect2 += 4;
249
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
250
+
251
+ v1 = _mm_loadu_ps(pVect1);
252
+ pVect1 += 4;
253
+ v2 = _mm_loadu_ps(pVect2);
254
+ pVect2 += 4;
255
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
256
+
257
+ v1 = _mm_loadu_ps(pVect1);
258
+ pVect1 += 4;
259
+ v2 = _mm_loadu_ps(pVect2);
260
+ pVect2 += 4;
261
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
262
+
263
+ v1 = _mm_loadu_ps(pVect1);
264
+ pVect1 += 4;
265
+ v2 = _mm_loadu_ps(pVect2);
266
+ pVect2 += 4;
267
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
274
268
  }
269
+ _mm_store_ps(TmpRes, sum_prod);
270
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
275
271
 
276
- static float
277
- InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
278
- return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
279
- }
272
+ return sum;
273
+ }
274
+
275
+ static float
276
+ InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
277
+ return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
278
+ }
280
279
 
281
280
  #endif
282
281
 
283
282
  #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
284
- DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
285
- DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
286
- DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
287
- DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
288
-
289
- static float
290
- InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
291
- size_t qty = *((size_t *) qty_ptr);
292
- size_t qty16 = qty >> 4 << 4;
293
- float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
294
- float *pVect1 = (float *) pVect1v + qty16;
295
- float *pVect2 = (float *) pVect2v + qty16;
296
-
297
- size_t qty_left = qty - qty16;
298
- float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
299
- return 1.0f - (res + res_tail);
300
- }
283
+ static DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
284
+ static DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
285
+ static DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
286
+ static DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
287
+
288
+ static float
289
+ InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
290
+ size_t qty = *((size_t *) qty_ptr);
291
+ size_t qty16 = qty >> 4 << 4;
292
+ float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
293
+ float *pVect1 = (float *) pVect1v + qty16;
294
+ float *pVect2 = (float *) pVect2v + qty16;
295
+
296
+ size_t qty_left = qty - qty16;
297
+ float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
298
+ return 1.0f - (res + res_tail);
299
+ }
301
300
 
302
- static float
303
- InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
304
- size_t qty = *((size_t *) qty_ptr);
305
- size_t qty4 = qty >> 2 << 2;
301
+ static float
302
+ InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
303
+ size_t qty = *((size_t *) qty_ptr);
304
+ size_t qty4 = qty >> 2 << 2;
306
305
 
307
- float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
308
- size_t qty_left = qty - qty4;
306
+ float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
307
+ size_t qty_left = qty - qty4;
309
308
 
310
- float *pVect1 = (float *) pVect1v + qty4;
311
- float *pVect2 = (float *) pVect2v + qty4;
312
- float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
309
+ float *pVect1 = (float *) pVect1v + qty4;
310
+ float *pVect2 = (float *) pVect2v + qty4;
311
+ float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
313
312
 
314
- return 1.0f - (res + res_tail);
315
- }
313
+ return 1.0f - (res + res_tail);
314
+ }
316
315
  #endif
317
316
 
318
- class InnerProductSpace : public SpaceInterface<float> {
319
-
320
- DISTFUNC<float> fstdistfunc_;
321
- size_t data_size_;
322
- size_t dim_;
323
- public:
324
- InnerProductSpace() : data_size_(0), dim_(0) { }
325
- InnerProductSpace(size_t dim) {
326
- fstdistfunc_ = InnerProductDistance;
327
- #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
328
- #if defined(USE_AVX512)
329
- if (AVX512Capable()) {
330
- InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
331
- InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
332
- } else if (AVXCapable()) {
333
- InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
334
- InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
335
- }
336
- #elif defined(USE_AVX)
337
- if (AVXCapable()) {
338
- InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
339
- InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
340
- }
341
- #endif
342
- #if defined(USE_AVX)
343
- if (AVXCapable()) {
344
- InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
345
- InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
346
- }
347
- #endif
348
-
349
- if (dim % 16 == 0)
350
- fstdistfunc_ = InnerProductDistanceSIMD16Ext;
351
- else if (dim % 4 == 0)
352
- fstdistfunc_ = InnerProductDistanceSIMD4Ext;
353
- else if (dim > 16)
354
- fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
355
- else if (dim > 4)
356
- fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
317
+ class InnerProductSpace : public SpaceInterface<float> {
318
+ DISTFUNC<float> fstdistfunc_;
319
+ size_t data_size_;
320
+ size_t dim_;
321
+
322
+ public:
323
+ InnerProductSpace() : data_size_(0), dim_(0) { }
324
+
325
+ InnerProductSpace(size_t dim) {
326
+ fstdistfunc_ = InnerProductDistance;
327
+ #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
328
+ #if defined(USE_AVX512)
329
+ if (AVX512Capable()) {
330
+ InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
331
+ InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
332
+ } else if (AVXCapable()) {
333
+ InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
334
+ InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
335
+ }
336
+ #elif defined(USE_AVX)
337
+ if (AVXCapable()) {
338
+ InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
339
+ InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
340
+ }
357
341
  #endif
358
- dim_ = dim;
359
- data_size_ = dim * sizeof(float);
342
+ #if defined(USE_AVX)
343
+ if (AVXCapable()) {
344
+ InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
345
+ InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
360
346
  }
347
+ #endif
361
348
 
362
- size_t get_data_size() {
363
- return data_size_;
364
- }
349
+ if (dim % 16 == 0)
350
+ fstdistfunc_ = InnerProductDistanceSIMD16Ext;
351
+ else if (dim % 4 == 0)
352
+ fstdistfunc_ = InnerProductDistanceSIMD4Ext;
353
+ else if (dim > 16)
354
+ fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
355
+ else if (dim > 4)
356
+ fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
357
+ #endif
358
+ dim_ = dim;
359
+ data_size_ = dim * sizeof(float);
360
+ }
365
361
 
366
- DISTFUNC<float> get_dist_func() {
367
- return fstdistfunc_;
368
- }
362
+ size_t get_data_size() {
363
+ return data_size_;
364
+ }
369
365
 
370
- void *get_dist_func_param() {
371
- return &dim_;
372
- }
366
+ DISTFUNC<float> get_dist_func() {
367
+ return fstdistfunc_;
368
+ }
373
369
 
374
- ~InnerProductSpace() {}
375
- };
370
+ void *get_dist_func_param() {
371
+ return &dim_;
372
+ }
376
373
 
377
- }
374
+ ~InnerProductSpace() {}
375
+ };
376
+
377
+ } // namespace hnswlib