hnswlib 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ #pragma once
2
+ #ifndef NO_MANUAL_VECTORIZATION
3
+ #ifdef __SSE__
4
+ #define USE_SSE
5
+ #ifdef __AVX__
6
+ #define USE_AVX
7
+ #endif
8
+ #endif
9
+ #endif
10
+
11
+ #if defined(USE_AVX) || defined(USE_SSE)
12
+ #ifdef _MSC_VER
13
+ #include <intrin.h>
14
+ #include <stdexcept>
15
+ #else
16
+ #include <x86intrin.h>
17
+ #endif
18
+
19
+ #if defined(__GNUC__)
20
+ #define PORTABLE_ALIGN32 __attribute__((aligned(32)))
21
+ #else
22
+ #define PORTABLE_ALIGN32 __declspec(align(32))
23
+ #endif
24
+ #endif
25
+
26
+ #include <queue>
27
+ #include <vector>
28
+ #include <iostream>
29
+ #include <string.h>
30
+
31
+ namespace hnswlib {
32
+ typedef size_t labeltype;
33
+
34
+ template <typename T>
35
+ class pairGreater {
36
+ public:
37
+ bool operator()(const T& p1, const T& p2) {
38
+ return p1.first > p2.first;
39
+ }
40
+ };
41
+
42
+ template<typename T>
43
+ static void writeBinaryPOD(std::ostream &out, const T &podRef) {
44
+ out.write((char *) &podRef, sizeof(T));
45
+ }
46
+
47
+ template<typename T>
48
+ static void readBinaryPOD(std::istream &in, T &podRef) {
49
+ in.read((char *) &podRef, sizeof(T));
50
+ }
51
+
52
+ template<typename MTYPE>
53
+ using DISTFUNC = MTYPE(*)(const void *, const void *, const void *);
54
+
55
+
56
+ template<typename MTYPE>
57
+ class SpaceInterface {
58
+ public:
59
+ //virtual void search(void *);
60
+ virtual size_t get_data_size() = 0;
61
+
62
+ virtual DISTFUNC<MTYPE> get_dist_func() = 0;
63
+
64
+ virtual void *get_dist_func_param() = 0;
65
+
66
+ virtual ~SpaceInterface() {}
67
+ };
68
+
69
+ template<typename dist_t>
70
+ class AlgorithmInterface {
71
+ public:
72
+ virtual void addPoint(const void *datapoint, labeltype label)=0;
73
+ virtual std::priority_queue<std::pair<dist_t, labeltype >> searchKnn(const void *, size_t) const = 0;
74
+
75
+ // Return k nearest neighbor in the order of closer fist
76
+ virtual std::vector<std::pair<dist_t, labeltype>>
77
+ searchKnnCloserFirst(const void* query_data, size_t k) const;
78
+
79
+ virtual void saveIndex(const std::string &location)=0;
80
+ virtual ~AlgorithmInterface(){
81
+ }
82
+ };
83
+
84
+ template<typename dist_t>
85
+ std::vector<std::pair<dist_t, labeltype>>
86
+ AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k) const {
87
+ std::vector<std::pair<dist_t, labeltype>> result;
88
+
89
+ // here searchKnn returns the result in the order of further first
90
+ auto ret = searchKnn(query_data, k);
91
+ {
92
+ size_t sz = ret.size();
93
+ result.resize(sz);
94
+ while (!ret.empty()) {
95
+ result[--sz] = ret.top();
96
+ ret.pop();
97
+ }
98
+ }
99
+
100
+ return result;
101
+ }
102
+
103
+ }
104
+
105
+ #include "space_l2.h"
106
+ #include "space_ip.h"
107
+ #include "bruteforce.h"
108
+ #include "hnswalg.h"
@@ -0,0 +1,282 @@
1
+ #pragma once
2
+ #include "hnswlib.h"
3
+
4
+ namespace hnswlib {
5
+
6
+ static float
7
+ InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
8
+ size_t qty = *((size_t *) qty_ptr);
9
+ float res = 0;
10
+ for (unsigned i = 0; i < qty; i++) {
11
+ res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
12
+ }
13
+ return (1.0f - res);
14
+
15
+ }
16
+
17
+ #if defined(USE_AVX)
18
+
19
+ // Favor using AVX if available.
20
+ static float
21
+ InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
22
+ float PORTABLE_ALIGN32 TmpRes[8];
23
+ float *pVect1 = (float *) pVect1v;
24
+ float *pVect2 = (float *) pVect2v;
25
+ size_t qty = *((size_t *) qty_ptr);
26
+
27
+ size_t qty16 = qty / 16;
28
+ size_t qty4 = qty / 4;
29
+
30
+ const float *pEnd1 = pVect1 + 16 * qty16;
31
+ const float *pEnd2 = pVect1 + 4 * qty4;
32
+
33
+ __m256 sum256 = _mm256_set1_ps(0);
34
+
35
+ while (pVect1 < pEnd1) {
36
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
37
+
38
+ __m256 v1 = _mm256_loadu_ps(pVect1);
39
+ pVect1 += 8;
40
+ __m256 v2 = _mm256_loadu_ps(pVect2);
41
+ pVect2 += 8;
42
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
43
+
44
+ v1 = _mm256_loadu_ps(pVect1);
45
+ pVect1 += 8;
46
+ v2 = _mm256_loadu_ps(pVect2);
47
+ pVect2 += 8;
48
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
49
+ }
50
+
51
+ __m128 v1, v2;
52
+ __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
53
+
54
+ while (pVect1 < pEnd2) {
55
+ v1 = _mm_loadu_ps(pVect1);
56
+ pVect1 += 4;
57
+ v2 = _mm_loadu_ps(pVect2);
58
+ pVect2 += 4;
59
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
60
+ }
61
+
62
+ _mm_store_ps(TmpRes, sum_prod);
63
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
64
+ return 1.0f - sum;
65
+ }
66
+
67
+ #elif defined(USE_SSE)
68
+
69
+ static float
70
+ InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
71
+ float PORTABLE_ALIGN32 TmpRes[8];
72
+ float *pVect1 = (float *) pVect1v;
73
+ float *pVect2 = (float *) pVect2v;
74
+ size_t qty = *((size_t *) qty_ptr);
75
+
76
+ size_t qty16 = qty / 16;
77
+ size_t qty4 = qty / 4;
78
+
79
+ const float *pEnd1 = pVect1 + 16 * qty16;
80
+ const float *pEnd2 = pVect1 + 4 * qty4;
81
+
82
+ __m128 v1, v2;
83
+ __m128 sum_prod = _mm_set1_ps(0);
84
+
85
+ while (pVect1 < pEnd1) {
86
+ v1 = _mm_loadu_ps(pVect1);
87
+ pVect1 += 4;
88
+ v2 = _mm_loadu_ps(pVect2);
89
+ pVect2 += 4;
90
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
91
+
92
+ v1 = _mm_loadu_ps(pVect1);
93
+ pVect1 += 4;
94
+ v2 = _mm_loadu_ps(pVect2);
95
+ pVect2 += 4;
96
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
97
+
98
+ v1 = _mm_loadu_ps(pVect1);
99
+ pVect1 += 4;
100
+ v2 = _mm_loadu_ps(pVect2);
101
+ pVect2 += 4;
102
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
103
+
104
+ v1 = _mm_loadu_ps(pVect1);
105
+ pVect1 += 4;
106
+ v2 = _mm_loadu_ps(pVect2);
107
+ pVect2 += 4;
108
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
109
+ }
110
+
111
+ while (pVect1 < pEnd2) {
112
+ v1 = _mm_loadu_ps(pVect1);
113
+ pVect1 += 4;
114
+ v2 = _mm_loadu_ps(pVect2);
115
+ pVect2 += 4;
116
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
117
+ }
118
+
119
+ _mm_store_ps(TmpRes, sum_prod);
120
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
121
+
122
+ return 1.0f - sum;
123
+ }
124
+
125
+ #endif
126
+
127
+ #if defined(USE_AVX)
128
+
129
+ static float
130
+ InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
131
+ float PORTABLE_ALIGN32 TmpRes[8];
132
+ float *pVect1 = (float *) pVect1v;
133
+ float *pVect2 = (float *) pVect2v;
134
+ size_t qty = *((size_t *) qty_ptr);
135
+
136
+ size_t qty16 = qty / 16;
137
+
138
+
139
+ const float *pEnd1 = pVect1 + 16 * qty16;
140
+
141
+ __m256 sum256 = _mm256_set1_ps(0);
142
+
143
+ while (pVect1 < pEnd1) {
144
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
145
+
146
+ __m256 v1 = _mm256_loadu_ps(pVect1);
147
+ pVect1 += 8;
148
+ __m256 v2 = _mm256_loadu_ps(pVect2);
149
+ pVect2 += 8;
150
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
151
+
152
+ v1 = _mm256_loadu_ps(pVect1);
153
+ pVect1 += 8;
154
+ v2 = _mm256_loadu_ps(pVect2);
155
+ pVect2 += 8;
156
+ sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
157
+ }
158
+
159
+ _mm256_store_ps(TmpRes, sum256);
160
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
161
+
162
+ return 1.0f - sum;
163
+ }
164
+
165
+ #elif defined(USE_SSE)
166
+
167
+ static float
168
+ InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
169
+ float PORTABLE_ALIGN32 TmpRes[8];
170
+ float *pVect1 = (float *) pVect1v;
171
+ float *pVect2 = (float *) pVect2v;
172
+ size_t qty = *((size_t *) qty_ptr);
173
+
174
+ size_t qty16 = qty / 16;
175
+
176
+ const float *pEnd1 = pVect1 + 16 * qty16;
177
+
178
+ __m128 v1, v2;
179
+ __m128 sum_prod = _mm_set1_ps(0);
180
+
181
+ while (pVect1 < pEnd1) {
182
+ v1 = _mm_loadu_ps(pVect1);
183
+ pVect1 += 4;
184
+ v2 = _mm_loadu_ps(pVect2);
185
+ pVect2 += 4;
186
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
187
+
188
+ v1 = _mm_loadu_ps(pVect1);
189
+ pVect1 += 4;
190
+ v2 = _mm_loadu_ps(pVect2);
191
+ pVect2 += 4;
192
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
193
+
194
+ v1 = _mm_loadu_ps(pVect1);
195
+ pVect1 += 4;
196
+ v2 = _mm_loadu_ps(pVect2);
197
+ pVect2 += 4;
198
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
199
+
200
+ v1 = _mm_loadu_ps(pVect1);
201
+ pVect1 += 4;
202
+ v2 = _mm_loadu_ps(pVect2);
203
+ pVect2 += 4;
204
+ sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
205
+ }
206
+ _mm_store_ps(TmpRes, sum_prod);
207
+ float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
208
+
209
+ return 1.0f - sum;
210
+ }
211
+
212
+ #endif
213
+
214
+ #if defined(USE_SSE) || defined(USE_AVX)
215
+ static float
216
+ InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
217
+ size_t qty = *((size_t *) qty_ptr);
218
+ size_t qty16 = qty >> 4 << 4;
219
+ float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
220
+ float *pVect1 = (float *) pVect1v + qty16;
221
+ float *pVect2 = (float *) pVect2v + qty16;
222
+
223
+ size_t qty_left = qty - qty16;
224
+ float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
225
+ return res + res_tail - 1.0f;
226
+ }
227
+
228
+ static float
229
+ InnerProductSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
230
+ size_t qty = *((size_t *) qty_ptr);
231
+ size_t qty4 = qty >> 2 << 2;
232
+
233
+ float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
234
+ size_t qty_left = qty - qty4;
235
+
236
+ float *pVect1 = (float *) pVect1v + qty4;
237
+ float *pVect2 = (float *) pVect2v + qty4;
238
+ float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
239
+
240
+ return res + res_tail - 1.0f;
241
+ }
242
+ #endif
243
+
244
+ class InnerProductSpace : public SpaceInterface<float> {
245
+
246
+ DISTFUNC<float> fstdistfunc_;
247
+ size_t data_size_;
248
+ size_t dim_;
249
+ public:
250
+ InnerProductSpace(size_t dim) {
251
+ fstdistfunc_ = InnerProduct;
252
+ #if defined(USE_AVX) || defined(USE_SSE)
253
+ if (dim % 16 == 0)
254
+ fstdistfunc_ = InnerProductSIMD16Ext;
255
+ else if (dim % 4 == 0)
256
+ fstdistfunc_ = InnerProductSIMD4Ext;
257
+ else if (dim > 16)
258
+ fstdistfunc_ = InnerProductSIMD16ExtResiduals;
259
+ else if (dim > 4)
260
+ fstdistfunc_ = InnerProductSIMD4ExtResiduals;
261
+ #endif
262
+ dim_ = dim;
263
+ data_size_ = dim * sizeof(float);
264
+ }
265
+
266
+ size_t get_data_size() {
267
+ return data_size_;
268
+ }
269
+
270
+ DISTFUNC<float> get_dist_func() {
271
+ return fstdistfunc_;
272
+ }
273
+
274
+ void *get_dist_func_param() {
275
+ return &dim_;
276
+ }
277
+
278
+ ~InnerProductSpace() {}
279
+ };
280
+
281
+
282
+ }
@@ -0,0 +1,281 @@
1
+ #pragma once
2
+ #include "hnswlib.h"
3
+
4
+ namespace hnswlib {
5
+
6
+ static float
7
+ L2Sqr(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
8
+ float *pVect1 = (float *) pVect1v;
9
+ float *pVect2 = (float *) pVect2v;
10
+ size_t qty = *((size_t *) qty_ptr);
11
+
12
+ float res = 0;
13
+ for (size_t i = 0; i < qty; i++) {
14
+ float t = *pVect1 - *pVect2;
15
+ pVect1++;
16
+ pVect2++;
17
+ res += t * t;
18
+ }
19
+ return (res);
20
+ }
21
+
22
+ #if defined(USE_AVX)
23
+
24
+ // Favor using AVX if available.
25
+ static float
26
+ L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
27
+ float *pVect1 = (float *) pVect1v;
28
+ float *pVect2 = (float *) pVect2v;
29
+ size_t qty = *((size_t *) qty_ptr);
30
+ float PORTABLE_ALIGN32 TmpRes[8];
31
+ size_t qty16 = qty >> 4;
32
+
33
+ const float *pEnd1 = pVect1 + (qty16 << 4);
34
+
35
+ __m256 diff, v1, v2;
36
+ __m256 sum = _mm256_set1_ps(0);
37
+
38
+ while (pVect1 < pEnd1) {
39
+ v1 = _mm256_loadu_ps(pVect1);
40
+ pVect1 += 8;
41
+ v2 = _mm256_loadu_ps(pVect2);
42
+ pVect2 += 8;
43
+ diff = _mm256_sub_ps(v1, v2);
44
+ sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
45
+
46
+ v1 = _mm256_loadu_ps(pVect1);
47
+ pVect1 += 8;
48
+ v2 = _mm256_loadu_ps(pVect2);
49
+ pVect2 += 8;
50
+ diff = _mm256_sub_ps(v1, v2);
51
+ sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
52
+ }
53
+
54
+ _mm256_store_ps(TmpRes, sum);
55
+ return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
56
+ }
57
+
58
+ #elif defined(USE_SSE)
59
+
60
+ static float
61
+ L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
62
+ float *pVect1 = (float *) pVect1v;
63
+ float *pVect2 = (float *) pVect2v;
64
+ size_t qty = *((size_t *) qty_ptr);
65
+ float PORTABLE_ALIGN32 TmpRes[8];
66
+ size_t qty16 = qty >> 4;
67
+
68
+ const float *pEnd1 = pVect1 + (qty16 << 4);
69
+
70
+ __m128 diff, v1, v2;
71
+ __m128 sum = _mm_set1_ps(0);
72
+
73
+ while (pVect1 < pEnd1) {
74
+ //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
75
+ v1 = _mm_loadu_ps(pVect1);
76
+ pVect1 += 4;
77
+ v2 = _mm_loadu_ps(pVect2);
78
+ pVect2 += 4;
79
+ diff = _mm_sub_ps(v1, v2);
80
+ sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
81
+
82
+ v1 = _mm_loadu_ps(pVect1);
83
+ pVect1 += 4;
84
+ v2 = _mm_loadu_ps(pVect2);
85
+ pVect2 += 4;
86
+ diff = _mm_sub_ps(v1, v2);
87
+ sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
88
+
89
+ v1 = _mm_loadu_ps(pVect1);
90
+ pVect1 += 4;
91
+ v2 = _mm_loadu_ps(pVect2);
92
+ pVect2 += 4;
93
+ diff = _mm_sub_ps(v1, v2);
94
+ sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
95
+
96
+ v1 = _mm_loadu_ps(pVect1);
97
+ pVect1 += 4;
98
+ v2 = _mm_loadu_ps(pVect2);
99
+ pVect2 += 4;
100
+ diff = _mm_sub_ps(v1, v2);
101
+ sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
102
+ }
103
+
104
+ _mm_store_ps(TmpRes, sum);
105
+ return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
106
+ }
107
+ #endif
108
+
109
+ #if defined(USE_SSE) || defined(USE_AVX)
110
+ static float
111
+ L2SqrSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
112
+ size_t qty = *((size_t *) qty_ptr);
113
+ size_t qty16 = qty >> 4 << 4;
114
+ float res = L2SqrSIMD16Ext(pVect1v, pVect2v, &qty16);
115
+ float *pVect1 = (float *) pVect1v + qty16;
116
+ float *pVect2 = (float *) pVect2v + qty16;
117
+
118
+ size_t qty_left = qty - qty16;
119
+ float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
120
+ return (res + res_tail);
121
+ }
122
+ #endif
123
+
124
+
125
+ #ifdef USE_SSE
126
+ static float
127
+ L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
128
+ float PORTABLE_ALIGN32 TmpRes[8];
129
+ float *pVect1 = (float *) pVect1v;
130
+ float *pVect2 = (float *) pVect2v;
131
+ size_t qty = *((size_t *) qty_ptr);
132
+
133
+
134
+ size_t qty4 = qty >> 2;
135
+
136
+ const float *pEnd1 = pVect1 + (qty4 << 2);
137
+
138
+ __m128 diff, v1, v2;
139
+ __m128 sum = _mm_set1_ps(0);
140
+
141
+ while (pVect1 < pEnd1) {
142
+ v1 = _mm_loadu_ps(pVect1);
143
+ pVect1 += 4;
144
+ v2 = _mm_loadu_ps(pVect2);
145
+ pVect2 += 4;
146
+ diff = _mm_sub_ps(v1, v2);
147
+ sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
148
+ }
149
+ _mm_store_ps(TmpRes, sum);
150
+ return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
151
+ }
152
+
153
+ static float
154
+ L2SqrSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
155
+ size_t qty = *((size_t *) qty_ptr);
156
+ size_t qty4 = qty >> 2 << 2;
157
+
158
+ float res = L2SqrSIMD4Ext(pVect1v, pVect2v, &qty4);
159
+ size_t qty_left = qty - qty4;
160
+
161
+ float *pVect1 = (float *) pVect1v + qty4;
162
+ float *pVect2 = (float *) pVect2v + qty4;
163
+ float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
164
+
165
+ return (res + res_tail);
166
+ }
167
+ #endif
168
+
169
+ class L2Space : public SpaceInterface<float> {
170
+
171
+ DISTFUNC<float> fstdistfunc_;
172
+ size_t data_size_;
173
+ size_t dim_;
174
+ public:
175
+ L2Space(size_t dim) {
176
+ fstdistfunc_ = L2Sqr;
177
+ #if defined(USE_SSE) || defined(USE_AVX)
178
+ if (dim % 16 == 0)
179
+ fstdistfunc_ = L2SqrSIMD16Ext;
180
+ else if (dim % 4 == 0)
181
+ fstdistfunc_ = L2SqrSIMD4Ext;
182
+ else if (dim > 16)
183
+ fstdistfunc_ = L2SqrSIMD16ExtResiduals;
184
+ else if (dim > 4)
185
+ fstdistfunc_ = L2SqrSIMD4ExtResiduals;
186
+ #endif
187
+ dim_ = dim;
188
+ data_size_ = dim * sizeof(float);
189
+ }
190
+
191
+ size_t get_data_size() {
192
+ return data_size_;
193
+ }
194
+
195
+ DISTFUNC<float> get_dist_func() {
196
+ return fstdistfunc_;
197
+ }
198
+
199
+ void *get_dist_func_param() {
200
+ return &dim_;
201
+ }
202
+
203
+ ~L2Space() {}
204
+ };
205
+
206
+ static int
207
+ L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
208
+
209
+ size_t qty = *((size_t *) qty_ptr);
210
+ int res = 0;
211
+ unsigned char *a = (unsigned char *) pVect1;
212
+ unsigned char *b = (unsigned char *) pVect2;
213
+
214
+ qty = qty >> 2;
215
+ for (size_t i = 0; i < qty; i++) {
216
+
217
+ res += ((*a) - (*b)) * ((*a) - (*b));
218
+ a++;
219
+ b++;
220
+ res += ((*a) - (*b)) * ((*a) - (*b));
221
+ a++;
222
+ b++;
223
+ res += ((*a) - (*b)) * ((*a) - (*b));
224
+ a++;
225
+ b++;
226
+ res += ((*a) - (*b)) * ((*a) - (*b));
227
+ a++;
228
+ b++;
229
+ }
230
+ return (res);
231
+ }
232
+
233
+ static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
234
+ size_t qty = *((size_t*)qty_ptr);
235
+ int res = 0;
236
+ unsigned char* a = (unsigned char*)pVect1;
237
+ unsigned char* b = (unsigned char*)pVect2;
238
+
239
+ for(size_t i = 0; i < qty; i++)
240
+ {
241
+ res += ((*a) - (*b)) * ((*a) - (*b));
242
+ a++;
243
+ b++;
244
+ }
245
+ return (res);
246
+ }
247
+
248
+ class L2SpaceI : public SpaceInterface<int> {
249
+
250
+ DISTFUNC<int> fstdistfunc_;
251
+ size_t data_size_;
252
+ size_t dim_;
253
+ public:
254
+ L2SpaceI(size_t dim) {
255
+ if(dim % 4 == 0) {
256
+ fstdistfunc_ = L2SqrI4x;
257
+ }
258
+ else {
259
+ fstdistfunc_ = L2SqrI;
260
+ }
261
+ dim_ = dim;
262
+ data_size_ = dim * sizeof(unsigned char);
263
+ }
264
+
265
+ size_t get_data_size() {
266
+ return data_size_;
267
+ }
268
+
269
+ DISTFUNC<int> get_dist_func() {
270
+ return fstdistfunc_;
271
+ }
272
+
273
+ void *get_dist_func_param() {
274
+ return &dim_;
275
+ }
276
+
277
+ ~L2SpaceI() {}
278
+ };
279
+
280
+
281
+ }