hnswlib 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,375 +3,375 @@
3
3
 
4
4
  namespace hnswlib {
5
5
 
6
- static float
7
- InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
8
- size_t qty = *((size_t *) qty_ptr);
9
- float res = 0;
10
- for (unsigned i = 0; i < qty; i++) {
11
- res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
12
- }
13
- return res;
14
-
6
+ static float
7
+ InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
8
+ size_t qty = *((size_t *) qty_ptr);
9
+ float res = 0;
10
+ for (unsigned i = 0; i < qty; i++) {
11
+ res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
15
12
  }
13
+ return res;
14
+ }
16
15
 
17
- static float
18
- InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
19
- return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
20
- }
16
+ static float
17
+ InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
18
+ return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
19
+ }
21
20
 
22
21
  #if defined(USE_AVX)
23
22
 
24
23
  // Favor using AVX if available.
25
- static float
26
- InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
27
- float PORTABLE_ALIGN32 TmpRes[8];
28
- float *pVect1 = (float *) pVect1v;
29
- float *pVect2 = (float *) pVect2v;
30
- size_t qty = *((size_t *) qty_ptr);
31
-
32
- size_t qty16 = qty / 16;
33
- size_t qty4 = qty / 4;
34
-
35
- const float *pEnd1 = pVect1 + 16 * qty16;
36
- const float *pEnd2 = pVect1 + 4 * qty4;
37
-
38
- __m256 sum256 = _mm256_set1_ps(0);
39
-
40
- while (pVect1 < pEnd1) {
41
- //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
42
-
43
- __m256 v1 = _mm256_loadu_ps(pVect1);
44
- pVect1 += 8;
45
- __m256 v2 = _mm256_loadu_ps(pVect2);
46
- pVect2 += 8;
47
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
48
-
49
- v1 = _mm256_loadu_ps(pVect1);
50
- pVect1 += 8;
51
- v2 = _mm256_loadu_ps(pVect2);
52
- pVect2 += 8;
53
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
54
- }
55
-
56
- __m128 v1, v2;
57
- __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
24
+ static float
25
+ InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
26
+ float PORTABLE_ALIGN32 TmpRes[8];
27
+ float *pVect1 = (float *) pVect1v;
28
+ float *pVect2 = (float *) pVect2v;
29
+ size_t qty = *((size_t *) qty_ptr);
30
+
31
+ size_t qty16 = qty / 16;
32
+ size_t qty4 = qty / 4;
33
+
34
+ const float *pEnd1 = pVect1 + 16 * qty16;
35
+ const float *pEnd2 = pVect1 + 4 * qty4;
36
+
37
+ __m256 sum256 = _mm256_set1_ps(0);
38
+
39
+ while (pVect1 < pEnd1) {
40
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
41
+
42
+ __m256 v1 = _mm256_loadu_ps(pVect1);
43
+ pVect1 += 8;
44
+ __m256 v2 = _mm256_loadu_ps(pVect2);
45
+ pVect2 += 8;
46
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
47
+
48
+ v1 = _mm256_loadu_ps(pVect1);
49
+ pVect1 += 8;
50
+ v2 = _mm256_loadu_ps(pVect2);
51
+ pVect2 += 8;
52
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
53
+ }
58
54
 
59
- while (pVect1 < pEnd2) {
60
- v1 = _mm_loadu_ps(pVect1);
61
- pVect1 += 4;
62
- v2 = _mm_loadu_ps(pVect2);
63
- pVect2 += 4;
64
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
65
- }
55
+ __m128 v1, v2;
56
+ __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
66
57
 
67
- _mm_store_ps(TmpRes, sum_prod);
68
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
69
- return sum;
58
+ while (pVect1 < pEnd2) {
59
+ v1 = _mm_loadu_ps(pVect1);
60
+ pVect1 += 4;
61
+ v2 = _mm_loadu_ps(pVect2);
62
+ pVect2 += 4;
63
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
70
64
  }
71
65
 
72
- static float
73
- InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
74
- return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
75
- }
66
+ _mm_store_ps(TmpRes, sum_prod);
67
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
68
+ return sum;
69
+ }
70
+
71
+ static float
72
+ InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
73
+ return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
74
+ }
76
75
 
77
76
  #endif
78
77
 
79
78
  #if defined(USE_SSE)
80
79
 
81
- static float
82
- InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
83
- float PORTABLE_ALIGN32 TmpRes[8];
84
- float *pVect1 = (float *) pVect1v;
85
- float *pVect2 = (float *) pVect2v;
86
- size_t qty = *((size_t *) qty_ptr);
87
-
88
- size_t qty16 = qty / 16;
89
- size_t qty4 = qty / 4;
90
-
91
- const float *pEnd1 = pVect1 + 16 * qty16;
92
- const float *pEnd2 = pVect1 + 4 * qty4;
93
-
94
- __m128 v1, v2;
95
- __m128 sum_prod = _mm_set1_ps(0);
96
-
97
- while (pVect1 < pEnd1) {
98
- v1 = _mm_loadu_ps(pVect1);
99
- pVect1 += 4;
100
- v2 = _mm_loadu_ps(pVect2);
101
- pVect2 += 4;
102
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
103
-
104
- v1 = _mm_loadu_ps(pVect1);
105
- pVect1 += 4;
106
- v2 = _mm_loadu_ps(pVect2);
107
- pVect2 += 4;
108
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
109
-
110
- v1 = _mm_loadu_ps(pVect1);
111
- pVect1 += 4;
112
- v2 = _mm_loadu_ps(pVect2);
113
- pVect2 += 4;
114
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
115
-
116
- v1 = _mm_loadu_ps(pVect1);
117
- pVect1 += 4;
118
- v2 = _mm_loadu_ps(pVect2);
119
- pVect2 += 4;
120
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
121
- }
80
+ static float
81
+ InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
82
+ float PORTABLE_ALIGN32 TmpRes[8];
83
+ float *pVect1 = (float *) pVect1v;
84
+ float *pVect2 = (float *) pVect2v;
85
+ size_t qty = *((size_t *) qty_ptr);
86
+
87
+ size_t qty16 = qty / 16;
88
+ size_t qty4 = qty / 4;
89
+
90
+ const float *pEnd1 = pVect1 + 16 * qty16;
91
+ const float *pEnd2 = pVect1 + 4 * qty4;
92
+
93
+ __m128 v1, v2;
94
+ __m128 sum_prod = _mm_set1_ps(0);
95
+
96
+ while (pVect1 < pEnd1) {
97
+ v1 = _mm_loadu_ps(pVect1);
98
+ pVect1 += 4;
99
+ v2 = _mm_loadu_ps(pVect2);
100
+ pVect2 += 4;
101
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
102
+
103
+ v1 = _mm_loadu_ps(pVect1);
104
+ pVect1 += 4;
105
+ v2 = _mm_loadu_ps(pVect2);
106
+ pVect2 += 4;
107
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
108
+
109
+ v1 = _mm_loadu_ps(pVect1);
110
+ pVect1 += 4;
111
+ v2 = _mm_loadu_ps(pVect2);
112
+ pVect2 += 4;
113
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
114
+
115
+ v1 = _mm_loadu_ps(pVect1);
116
+ pVect1 += 4;
117
+ v2 = _mm_loadu_ps(pVect2);
118
+ pVect2 += 4;
119
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
120
+ }
122
121
 
123
- while (pVect1 < pEnd2) {
124
- v1 = _mm_loadu_ps(pVect1);
125
- pVect1 += 4;
126
- v2 = _mm_loadu_ps(pVect2);
127
- pVect2 += 4;
128
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
129
- }
122
+ while (pVect1 < pEnd2) {
123
+ v1 = _mm_loadu_ps(pVect1);
124
+ pVect1 += 4;
125
+ v2 = _mm_loadu_ps(pVect2);
126
+ pVect2 += 4;
127
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
128
+ }
130
129
 
131
- _mm_store_ps(TmpRes, sum_prod);
132
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
130
+ _mm_store_ps(TmpRes, sum_prod);
131
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
133
132
 
134
- return sum;
135
- }
133
+ return sum;
134
+ }
136
135
 
137
- static float
138
- InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
139
- return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
140
- }
136
+ static float
137
+ InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
138
+ return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
139
+ }
141
140
 
142
141
  #endif
143
142
 
144
143
 
145
144
  #if defined(USE_AVX512)
146
145
 
147
- static float
148
- InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
149
- float PORTABLE_ALIGN64 TmpRes[16];
150
- float *pVect1 = (float *) pVect1v;
151
- float *pVect2 = (float *) pVect2v;
152
- size_t qty = *((size_t *) qty_ptr);
146
+ static float
147
+ InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
148
+ float PORTABLE_ALIGN64 TmpRes[16];
149
+ float *pVect1 = (float *) pVect1v;
150
+ float *pVect2 = (float *) pVect2v;
151
+ size_t qty = *((size_t *) qty_ptr);
153
152
 
154
- size_t qty16 = qty / 16;
153
+ size_t qty16 = qty / 16;
155
154
 
156
155
 
157
- const float *pEnd1 = pVect1 + 16 * qty16;
156
+ const float *pEnd1 = pVect1 + 16 * qty16;
158
157
 
159
- __m512 sum512 = _mm512_set1_ps(0);
158
+ __m512 sum512 = _mm512_set1_ps(0);
160
159
 
161
- while (pVect1 < pEnd1) {
162
- //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
160
+ while (pVect1 < pEnd1) {
161
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
163
162
 
164
- __m512 v1 = _mm512_loadu_ps(pVect1);
165
- pVect1 += 16;
166
- __m512 v2 = _mm512_loadu_ps(pVect2);
167
- pVect2 += 16;
168
- sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2));
169
- }
163
+ __m512 v1 = _mm512_loadu_ps(pVect1);
164
+ pVect1 += 16;
165
+ __m512 v2 = _mm512_loadu_ps(pVect2);
166
+ pVect2 += 16;
167
+ sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2));
168
+ }
170
169
 
171
- _mm512_store_ps(TmpRes, sum512);
172
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
170
+ _mm512_store_ps(TmpRes, sum512);
171
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
173
172
 
174
- return sum;
175
- }
173
+ return sum;
174
+ }
176
175
 
177
- static float
178
- InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
179
- return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
180
- }
176
+ static float
177
+ InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
178
+ return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
179
+ }
181
180
 
182
181
  #endif
183
182
 
184
183
  #if defined(USE_AVX)
185
184
 
186
- static float
187
- InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
188
- float PORTABLE_ALIGN32 TmpRes[8];
189
- float *pVect1 = (float *) pVect1v;
190
- float *pVect2 = (float *) pVect2v;
191
- size_t qty = *((size_t *) qty_ptr);
185
+ static float
186
+ InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
187
+ float PORTABLE_ALIGN32 TmpRes[8];
188
+ float *pVect1 = (float *) pVect1v;
189
+ float *pVect2 = (float *) pVect2v;
190
+ size_t qty = *((size_t *) qty_ptr);
192
191
 
193
- size_t qty16 = qty / 16;
192
+ size_t qty16 = qty / 16;
194
193
 
195
194
 
196
- const float *pEnd1 = pVect1 + 16 * qty16;
195
+ const float *pEnd1 = pVect1 + 16 * qty16;
197
196
 
198
- __m256 sum256 = _mm256_set1_ps(0);
197
+ __m256 sum256 = _mm256_set1_ps(0);
199
198
 
200
- while (pVect1 < pEnd1) {
201
- //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
199
+ while (pVect1 < pEnd1) {
200
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
202
201
 
203
- __m256 v1 = _mm256_loadu_ps(pVect1);
204
- pVect1 += 8;
205
- __m256 v2 = _mm256_loadu_ps(pVect2);
206
- pVect2 += 8;
207
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
202
+ __m256 v1 = _mm256_loadu_ps(pVect1);
203
+ pVect1 += 8;
204
+ __m256 v2 = _mm256_loadu_ps(pVect2);
205
+ pVect2 += 8;
206
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
208
207
 
209
- v1 = _mm256_loadu_ps(pVect1);
210
- pVect1 += 8;
211
- v2 = _mm256_loadu_ps(pVect2);
212
- pVect2 += 8;
213
- sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
214
- }
208
+ v1 = _mm256_loadu_ps(pVect1);
209
+ pVect1 += 8;
210
+ v2 = _mm256_loadu_ps(pVect2);
211
+ pVect2 += 8;
212
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
213
+ }
215
214
 
216
- _mm256_store_ps(TmpRes, sum256);
217
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
215
+ _mm256_store_ps(TmpRes, sum256);
216
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
218
217
 
219
- return sum;
220
- }
218
+ return sum;
219
+ }
221
220
 
222
- static float
223
- InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
224
- return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
225
- }
221
+ static float
222
+ InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
223
+ return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
224
+ }
226
225
 
227
226
  #endif
228
227
 
229
228
  #if defined(USE_SSE)
230
229
 
231
- static float
232
- InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
233
- float PORTABLE_ALIGN32 TmpRes[8];
234
- float *pVect1 = (float *) pVect1v;
235
- float *pVect2 = (float *) pVect2v;
236
- size_t qty = *((size_t *) qty_ptr);
237
-
238
- size_t qty16 = qty / 16;
239
-
240
- const float *pEnd1 = pVect1 + 16 * qty16;
241
-
242
- __m128 v1, v2;
243
- __m128 sum_prod = _mm_set1_ps(0);
244
-
245
- while (pVect1 < pEnd1) {
246
- v1 = _mm_loadu_ps(pVect1);
247
- pVect1 += 4;
248
- v2 = _mm_loadu_ps(pVect2);
249
- pVect2 += 4;
250
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
251
-
252
- v1 = _mm_loadu_ps(pVect1);
253
- pVect1 += 4;
254
- v2 = _mm_loadu_ps(pVect2);
255
- pVect2 += 4;
256
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
257
-
258
- v1 = _mm_loadu_ps(pVect1);
259
- pVect1 += 4;
260
- v2 = _mm_loadu_ps(pVect2);
261
- pVect2 += 4;
262
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
263
-
264
- v1 = _mm_loadu_ps(pVect1);
265
- pVect1 += 4;
266
- v2 = _mm_loadu_ps(pVect2);
267
- pVect2 += 4;
268
- sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
269
- }
270
- _mm_store_ps(TmpRes, sum_prod);
271
- float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
272
-
273
- return sum;
230
+ static float
231
+ InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
232
+ float PORTABLE_ALIGN32 TmpRes[8];
233
+ float *pVect1 = (float *) pVect1v;
234
+ float *pVect2 = (float *) pVect2v;
235
+ size_t qty = *((size_t *) qty_ptr);
236
+
237
+ size_t qty16 = qty / 16;
238
+
239
+ const float *pEnd1 = pVect1 + 16 * qty16;
240
+
241
+ __m128 v1, v2;
242
+ __m128 sum_prod = _mm_set1_ps(0);
243
+
244
+ while (pVect1 < pEnd1) {
245
+ v1 = _mm_loadu_ps(pVect1);
246
+ pVect1 += 4;
247
+ v2 = _mm_loadu_ps(pVect2);
248
+ pVect2 += 4;
249
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
250
+
251
+ v1 = _mm_loadu_ps(pVect1);
252
+ pVect1 += 4;
253
+ v2 = _mm_loadu_ps(pVect2);
254
+ pVect2 += 4;
255
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
256
+
257
+ v1 = _mm_loadu_ps(pVect1);
258
+ pVect1 += 4;
259
+ v2 = _mm_loadu_ps(pVect2);
260
+ pVect2 += 4;
261
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
262
+
263
+ v1 = _mm_loadu_ps(pVect1);
264
+ pVect1 += 4;
265
+ v2 = _mm_loadu_ps(pVect2);
266
+ pVect2 += 4;
267
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
274
268
  }
269
+ _mm_store_ps(TmpRes, sum_prod);
270
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
275
271
 
276
- static float
277
- InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
278
- return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
279
- }
272
+ return sum;
273
+ }
274
+
275
+ static float
276
+ InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
277
+ return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
278
+ }
280
279
 
281
280
  #endif
282
281
 
283
282
  #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
284
- DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
285
- DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
286
- DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
287
- DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
288
-
289
- static float
290
- InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
291
- size_t qty = *((size_t *) qty_ptr);
292
- size_t qty16 = qty >> 4 << 4;
293
- float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
294
- float *pVect1 = (float *) pVect1v + qty16;
295
- float *pVect2 = (float *) pVect2v + qty16;
296
-
297
- size_t qty_left = qty - qty16;
298
- float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
299
- return 1.0f - (res + res_tail);
300
- }
283
+ static DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
284
+ static DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
285
+ static DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
286
+ static DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
287
+
288
+ static float
289
+ InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
290
+ size_t qty = *((size_t *) qty_ptr);
291
+ size_t qty16 = qty >> 4 << 4;
292
+ float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
293
+ float *pVect1 = (float *) pVect1v + qty16;
294
+ float *pVect2 = (float *) pVect2v + qty16;
295
+
296
+ size_t qty_left = qty - qty16;
297
+ float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
298
+ return 1.0f - (res + res_tail);
299
+ }
301
300
 
302
- static float
303
- InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
304
- size_t qty = *((size_t *) qty_ptr);
305
- size_t qty4 = qty >> 2 << 2;
301
+ static float
302
+ InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
303
+ size_t qty = *((size_t *) qty_ptr);
304
+ size_t qty4 = qty >> 2 << 2;
306
305
 
307
- float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
308
- size_t qty_left = qty - qty4;
306
+ float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
307
+ size_t qty_left = qty - qty4;
309
308
 
310
- float *pVect1 = (float *) pVect1v + qty4;
311
- float *pVect2 = (float *) pVect2v + qty4;
312
- float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
309
+ float *pVect1 = (float *) pVect1v + qty4;
310
+ float *pVect2 = (float *) pVect2v + qty4;
311
+ float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
313
312
 
314
- return 1.0f - (res + res_tail);
315
- }
313
+ return 1.0f - (res + res_tail);
314
+ }
316
315
  #endif
317
316
 
318
- class InnerProductSpace : public SpaceInterface<float> {
319
-
320
- DISTFUNC<float> fstdistfunc_;
321
- size_t data_size_;
322
- size_t dim_;
323
- public:
324
- InnerProductSpace() : data_size_(0), dim_(0) { }
325
- InnerProductSpace(size_t dim) {
326
- fstdistfunc_ = InnerProductDistance;
327
- #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
328
- #if defined(USE_AVX512)
329
- if (AVX512Capable()) {
330
- InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
331
- InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
332
- } else if (AVXCapable()) {
333
- InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
334
- InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
335
- }
336
- #elif defined(USE_AVX)
337
- if (AVXCapable()) {
338
- InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
339
- InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
340
- }
341
- #endif
342
- #if defined(USE_AVX)
343
- if (AVXCapable()) {
344
- InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
345
- InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
346
- }
347
- #endif
348
-
349
- if (dim % 16 == 0)
350
- fstdistfunc_ = InnerProductDistanceSIMD16Ext;
351
- else if (dim % 4 == 0)
352
- fstdistfunc_ = InnerProductDistanceSIMD4Ext;
353
- else if (dim > 16)
354
- fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
355
- else if (dim > 4)
356
- fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
317
+ class InnerProductSpace : public SpaceInterface<float> {
318
+ DISTFUNC<float> fstdistfunc_;
319
+ size_t data_size_;
320
+ size_t dim_;
321
+
322
+ public:
323
+ InnerProductSpace() : data_size_(0), dim_(0) { }
324
+
325
+ InnerProductSpace(size_t dim) {
326
+ fstdistfunc_ = InnerProductDistance;
327
+ #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
328
+ #if defined(USE_AVX512)
329
+ if (AVX512Capable()) {
330
+ InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
331
+ InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
332
+ } else if (AVXCapable()) {
333
+ InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
334
+ InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
335
+ }
336
+ #elif defined(USE_AVX)
337
+ if (AVXCapable()) {
338
+ InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
339
+ InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
340
+ }
357
341
  #endif
358
- dim_ = dim;
359
- data_size_ = dim * sizeof(float);
342
+ #if defined(USE_AVX)
343
+ if (AVXCapable()) {
344
+ InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
345
+ InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
360
346
  }
347
+ #endif
361
348
 
362
- size_t get_data_size() {
363
- return data_size_;
364
- }
349
+ if (dim % 16 == 0)
350
+ fstdistfunc_ = InnerProductDistanceSIMD16Ext;
351
+ else if (dim % 4 == 0)
352
+ fstdistfunc_ = InnerProductDistanceSIMD4Ext;
353
+ else if (dim > 16)
354
+ fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
355
+ else if (dim > 4)
356
+ fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
357
+ #endif
358
+ dim_ = dim;
359
+ data_size_ = dim * sizeof(float);
360
+ }
365
361
 
366
- DISTFUNC<float> get_dist_func() {
367
- return fstdistfunc_;
368
- }
362
+ size_t get_data_size() {
363
+ return data_size_;
364
+ }
369
365
 
370
- void *get_dist_func_param() {
371
- return &dim_;
372
- }
366
+ DISTFUNC<float> get_dist_func() {
367
+ return fstdistfunc_;
368
+ }
373
369
 
374
- ~InnerProductSpace() {}
375
- };
370
+ void *get_dist_func_param() {
371
+ return &dim_;
372
+ }
376
373
 
377
- }
374
+ ~InnerProductSpace() {}
375
+ };
376
+
377
+ } // namespace hnswlib