umappp 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -4
- data/ext/umappp/umappp.cpp +41 -43
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +6 -6
data/vendor/hnswlib/space_l2.h
CHANGED
@@ -3,279 +3,322 @@
|
|
3
3
|
|
4
4
|
namespace hnswlib {
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
6
|
+
static float
|
7
|
+
L2Sqr(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
8
|
+
float *pVect1 = (float *) pVect1v;
|
9
|
+
float *pVect2 = (float *) pVect2v;
|
10
|
+
size_t qty = *((size_t *) qty_ptr);
|
11
|
+
|
12
|
+
float res = 0;
|
13
|
+
for (size_t i = 0; i < qty; i++) {
|
14
|
+
float t = *pVect1 - *pVect2;
|
15
|
+
pVect1++;
|
16
|
+
pVect2++;
|
17
|
+
res += t * t;
|
18
|
+
}
|
19
|
+
return (res);
|
20
|
+
}
|
21
|
+
|
22
|
+
#if defined(USE_AVX512)
|
23
|
+
|
24
|
+
// Favor using AVX512 if available.
|
25
|
+
static float
|
26
|
+
L2SqrSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
27
|
+
float *pVect1 = (float *) pVect1v;
|
28
|
+
float *pVect2 = (float *) pVect2v;
|
29
|
+
size_t qty = *((size_t *) qty_ptr);
|
30
|
+
float PORTABLE_ALIGN64 TmpRes[16];
|
31
|
+
size_t qty16 = qty >> 4;
|
32
|
+
|
33
|
+
const float *pEnd1 = pVect1 + (qty16 << 4);
|
34
|
+
|
35
|
+
__m512 diff, v1, v2;
|
36
|
+
__m512 sum = _mm512_set1_ps(0);
|
37
|
+
|
38
|
+
while (pVect1 < pEnd1) {
|
39
|
+
v1 = _mm512_loadu_ps(pVect1);
|
40
|
+
pVect1 += 16;
|
41
|
+
v2 = _mm512_loadu_ps(pVect2);
|
42
|
+
pVect2 += 16;
|
43
|
+
diff = _mm512_sub_ps(v1, v2);
|
44
|
+
// sum = _mm512_fmadd_ps(diff, diff, sum);
|
45
|
+
sum = _mm512_add_ps(sum, _mm512_mul_ps(diff, diff));
|
20
46
|
}
|
21
47
|
|
22
|
-
|
48
|
+
_mm512_store_ps(TmpRes, sum);
|
49
|
+
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] +
|
50
|
+
TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] +
|
51
|
+
TmpRes[13] + TmpRes[14] + TmpRes[15];
|
23
52
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
float *pVect1 = (float *) pVect1v;
|
28
|
-
float *pVect2 = (float *) pVect2v;
|
29
|
-
size_t qty = *((size_t *) qty_ptr);
|
30
|
-
float PORTABLE_ALIGN32 TmpRes[8];
|
31
|
-
size_t qty16 = qty >> 4;
|
32
|
-
|
33
|
-
const float *pEnd1 = pVect1 + (qty16 << 4);
|
34
|
-
|
35
|
-
__m256 diff, v1, v2;
|
36
|
-
__m256 sum = _mm256_set1_ps(0);
|
37
|
-
|
38
|
-
while (pVect1 < pEnd1) {
|
39
|
-
v1 = _mm256_loadu_ps(pVect1);
|
40
|
-
pVect1 += 8;
|
41
|
-
v2 = _mm256_loadu_ps(pVect2);
|
42
|
-
pVect2 += 8;
|
43
|
-
diff = _mm256_sub_ps(v1, v2);
|
44
|
-
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
45
|
-
|
46
|
-
v1 = _mm256_loadu_ps(pVect1);
|
47
|
-
pVect1 += 8;
|
48
|
-
v2 = _mm256_loadu_ps(pVect2);
|
49
|
-
pVect2 += 8;
|
50
|
-
diff = _mm256_sub_ps(v1, v2);
|
51
|
-
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
52
|
-
}
|
53
|
+
return (res);
|
54
|
+
}
|
55
|
+
#endif
|
53
56
|
|
54
|
-
|
55
|
-
|
57
|
+
#if defined(USE_AVX)
|
58
|
+
|
59
|
+
// Favor using AVX if available.
|
60
|
+
static float
|
61
|
+
L2SqrSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
62
|
+
float *pVect1 = (float *) pVect1v;
|
63
|
+
float *pVect2 = (float *) pVect2v;
|
64
|
+
size_t qty = *((size_t *) qty_ptr);
|
65
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
66
|
+
size_t qty16 = qty >> 4;
|
67
|
+
|
68
|
+
const float *pEnd1 = pVect1 + (qty16 << 4);
|
69
|
+
|
70
|
+
__m256 diff, v1, v2;
|
71
|
+
__m256 sum = _mm256_set1_ps(0);
|
72
|
+
|
73
|
+
while (pVect1 < pEnd1) {
|
74
|
+
v1 = _mm256_loadu_ps(pVect1);
|
75
|
+
pVect1 += 8;
|
76
|
+
v2 = _mm256_loadu_ps(pVect2);
|
77
|
+
pVect2 += 8;
|
78
|
+
diff = _mm256_sub_ps(v1, v2);
|
79
|
+
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
80
|
+
|
81
|
+
v1 = _mm256_loadu_ps(pVect1);
|
82
|
+
pVect1 += 8;
|
83
|
+
v2 = _mm256_loadu_ps(pVect2);
|
84
|
+
pVect2 += 8;
|
85
|
+
diff = _mm256_sub_ps(v1, v2);
|
86
|
+
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
56
87
|
}
|
57
88
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
62
|
-
float *pVect1 = (float *) pVect1v;
|
63
|
-
float *pVect2 = (float *) pVect2v;
|
64
|
-
size_t qty = *((size_t *) qty_ptr);
|
65
|
-
float PORTABLE_ALIGN32 TmpRes[8];
|
66
|
-
size_t qty16 = qty >> 4;
|
67
|
-
|
68
|
-
const float *pEnd1 = pVect1 + (qty16 << 4);
|
69
|
-
|
70
|
-
__m128 diff, v1, v2;
|
71
|
-
__m128 sum = _mm_set1_ps(0);
|
72
|
-
|
73
|
-
while (pVect1 < pEnd1) {
|
74
|
-
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
75
|
-
v1 = _mm_loadu_ps(pVect1);
|
76
|
-
pVect1 += 4;
|
77
|
-
v2 = _mm_loadu_ps(pVect2);
|
78
|
-
pVect2 += 4;
|
79
|
-
diff = _mm_sub_ps(v1, v2);
|
80
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
81
|
-
|
82
|
-
v1 = _mm_loadu_ps(pVect1);
|
83
|
-
pVect1 += 4;
|
84
|
-
v2 = _mm_loadu_ps(pVect2);
|
85
|
-
pVect2 += 4;
|
86
|
-
diff = _mm_sub_ps(v1, v2);
|
87
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
88
|
-
|
89
|
-
v1 = _mm_loadu_ps(pVect1);
|
90
|
-
pVect1 += 4;
|
91
|
-
v2 = _mm_loadu_ps(pVect2);
|
92
|
-
pVect2 += 4;
|
93
|
-
diff = _mm_sub_ps(v1, v2);
|
94
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
95
|
-
|
96
|
-
v1 = _mm_loadu_ps(pVect1);
|
97
|
-
pVect1 += 4;
|
98
|
-
v2 = _mm_loadu_ps(pVect2);
|
99
|
-
pVect2 += 4;
|
100
|
-
diff = _mm_sub_ps(v1, v2);
|
101
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
102
|
-
}
|
89
|
+
_mm256_store_ps(TmpRes, sum);
|
90
|
+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
91
|
+
}
|
103
92
|
|
104
|
-
_mm_store_ps(TmpRes, sum);
|
105
|
-
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
106
|
-
}
|
107
93
|
#endif
|
108
94
|
|
109
|
-
#if defined(USE_SSE)
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
95
|
+
#if defined(USE_SSE)
|
96
|
+
|
97
|
+
static float
|
98
|
+
L2SqrSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
99
|
+
float *pVect1 = (float *) pVect1v;
|
100
|
+
float *pVect2 = (float *) pVect2v;
|
101
|
+
size_t qty = *((size_t *) qty_ptr);
|
102
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
103
|
+
size_t qty16 = qty >> 4;
|
104
|
+
|
105
|
+
const float *pEnd1 = pVect1 + (qty16 << 4);
|
106
|
+
|
107
|
+
__m128 diff, v1, v2;
|
108
|
+
__m128 sum = _mm_set1_ps(0);
|
109
|
+
|
110
|
+
while (pVect1 < pEnd1) {
|
111
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
112
|
+
v1 = _mm_loadu_ps(pVect1);
|
113
|
+
pVect1 += 4;
|
114
|
+
v2 = _mm_loadu_ps(pVect2);
|
115
|
+
pVect2 += 4;
|
116
|
+
diff = _mm_sub_ps(v1, v2);
|
117
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
118
|
+
|
119
|
+
v1 = _mm_loadu_ps(pVect1);
|
120
|
+
pVect1 += 4;
|
121
|
+
v2 = _mm_loadu_ps(pVect2);
|
122
|
+
pVect2 += 4;
|
123
|
+
diff = _mm_sub_ps(v1, v2);
|
124
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
125
|
+
|
126
|
+
v1 = _mm_loadu_ps(pVect1);
|
127
|
+
pVect1 += 4;
|
128
|
+
v2 = _mm_loadu_ps(pVect2);
|
129
|
+
pVect2 += 4;
|
130
|
+
diff = _mm_sub_ps(v1, v2);
|
131
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
132
|
+
|
133
|
+
v1 = _mm_loadu_ps(pVect1);
|
134
|
+
pVect1 += 4;
|
135
|
+
v2 = _mm_loadu_ps(pVect2);
|
136
|
+
pVect2 += 4;
|
137
|
+
diff = _mm_sub_ps(v1, v2);
|
138
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
121
139
|
}
|
122
|
-
#endif
|
123
140
|
|
141
|
+
_mm_store_ps(TmpRes, sum);
|
142
|
+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
143
|
+
}
|
144
|
+
#endif
|
124
145
|
|
125
|
-
#
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
146
|
+
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
|
147
|
+
static DISTFUNC<float> L2SqrSIMD16Ext = L2SqrSIMD16ExtSSE;
|
148
|
+
|
149
|
+
static float
|
150
|
+
L2SqrSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
151
|
+
size_t qty = *((size_t *) qty_ptr);
|
152
|
+
size_t qty16 = qty >> 4 << 4;
|
153
|
+
float res = L2SqrSIMD16Ext(pVect1v, pVect2v, &qty16);
|
154
|
+
float *pVect1 = (float *) pVect1v + qty16;
|
155
|
+
float *pVect2 = (float *) pVect2v + qty16;
|
156
|
+
|
157
|
+
size_t qty_left = qty - qty16;
|
158
|
+
float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
|
159
|
+
return (res + res_tail);
|
160
|
+
}
|
161
|
+
#endif
|
132
162
|
|
133
163
|
|
134
|
-
|
164
|
+
#if defined(USE_SSE)
|
165
|
+
static float
|
166
|
+
L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
167
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
168
|
+
float *pVect1 = (float *) pVect1v;
|
169
|
+
float *pVect2 = (float *) pVect2v;
|
170
|
+
size_t qty = *((size_t *) qty_ptr);
|
135
171
|
|
136
|
-
const float *pEnd1 = pVect1 + (qty4 << 2);
|
137
172
|
|
138
|
-
|
139
|
-
__m128 sum = _mm_set1_ps(0);
|
173
|
+
size_t qty4 = qty >> 2;
|
140
174
|
|
141
|
-
|
142
|
-
v1 = _mm_loadu_ps(pVect1);
|
143
|
-
pVect1 += 4;
|
144
|
-
v2 = _mm_loadu_ps(pVect2);
|
145
|
-
pVect2 += 4;
|
146
|
-
diff = _mm_sub_ps(v1, v2);
|
147
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
148
|
-
}
|
149
|
-
_mm_store_ps(TmpRes, sum);
|
150
|
-
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
151
|
-
}
|
175
|
+
const float *pEnd1 = pVect1 + (qty4 << 2);
|
152
176
|
|
153
|
-
|
154
|
-
|
155
|
-
size_t qty = *((size_t *) qty_ptr);
|
156
|
-
size_t qty4 = qty >> 2 << 2;
|
177
|
+
__m128 diff, v1, v2;
|
178
|
+
__m128 sum = _mm_set1_ps(0);
|
157
179
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
return (res + res_tail);
|
180
|
+
while (pVect1 < pEnd1) {
|
181
|
+
v1 = _mm_loadu_ps(pVect1);
|
182
|
+
pVect1 += 4;
|
183
|
+
v2 = _mm_loadu_ps(pVect2);
|
184
|
+
pVect2 += 4;
|
185
|
+
diff = _mm_sub_ps(v1, v2);
|
186
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
166
187
|
}
|
167
|
-
|
188
|
+
_mm_store_ps(TmpRes, sum);
|
189
|
+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
190
|
+
}
|
168
191
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
size_t dim_;
|
174
|
-
public:
|
175
|
-
L2Space(size_t dim) {
|
176
|
-
fstdistfunc_ = L2Sqr;
|
177
|
-
#if defined(USE_SSE) || defined(USE_AVX)
|
178
|
-
if (dim % 16 == 0)
|
179
|
-
fstdistfunc_ = L2SqrSIMD16Ext;
|
180
|
-
else if (dim % 4 == 0)
|
181
|
-
fstdistfunc_ = L2SqrSIMD4Ext;
|
182
|
-
else if (dim > 16)
|
183
|
-
fstdistfunc_ = L2SqrSIMD16ExtResiduals;
|
184
|
-
else if (dim > 4)
|
185
|
-
fstdistfunc_ = L2SqrSIMD4ExtResiduals;
|
186
|
-
#endif
|
187
|
-
dim_ = dim;
|
188
|
-
data_size_ = dim * sizeof(float);
|
189
|
-
}
|
192
|
+
static float
|
193
|
+
L2SqrSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
194
|
+
size_t qty = *((size_t *) qty_ptr);
|
195
|
+
size_t qty4 = qty >> 2 << 2;
|
190
196
|
|
191
|
-
|
192
|
-
|
193
|
-
}
|
197
|
+
float res = L2SqrSIMD4Ext(pVect1v, pVect2v, &qty4);
|
198
|
+
size_t qty_left = qty - qty4;
|
194
199
|
|
195
|
-
|
196
|
-
|
197
|
-
|
200
|
+
float *pVect1 = (float *) pVect1v + qty4;
|
201
|
+
float *pVect2 = (float *) pVect2v + qty4;
|
202
|
+
float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
|
198
203
|
|
199
|
-
|
200
|
-
|
201
|
-
|
204
|
+
return (res + res_tail);
|
205
|
+
}
|
206
|
+
#endif
|
202
207
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
208
|
+
class L2Space : public SpaceInterface<float> {
|
209
|
+
DISTFUNC<float> fstdistfunc_;
|
210
|
+
size_t data_size_;
|
211
|
+
size_t dim_;
|
212
|
+
|
213
|
+
public:
|
214
|
+
L2Space(size_t dim) {
|
215
|
+
fstdistfunc_ = L2Sqr;
|
216
|
+
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
|
217
|
+
#if defined(USE_AVX512)
|
218
|
+
if (AVX512Capable())
|
219
|
+
L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX512;
|
220
|
+
else if (AVXCapable())
|
221
|
+
L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
|
222
|
+
#elif defined(USE_AVX)
|
223
|
+
if (AVXCapable())
|
224
|
+
L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
|
225
|
+
#endif
|
226
|
+
|
227
|
+
if (dim % 16 == 0)
|
228
|
+
fstdistfunc_ = L2SqrSIMD16Ext;
|
229
|
+
else if (dim % 4 == 0)
|
230
|
+
fstdistfunc_ = L2SqrSIMD4Ext;
|
231
|
+
else if (dim > 16)
|
232
|
+
fstdistfunc_ = L2SqrSIMD16ExtResiduals;
|
233
|
+
else if (dim > 4)
|
234
|
+
fstdistfunc_ = L2SqrSIMD4ExtResiduals;
|
235
|
+
#endif
|
236
|
+
dim_ = dim;
|
237
|
+
data_size_ = dim * sizeof(float);
|
231
238
|
}
|
232
239
|
|
233
|
-
|
234
|
-
|
235
|
-
int res = 0;
|
236
|
-
unsigned char* a = (unsigned char*)pVect1;
|
237
|
-
unsigned char* b = (unsigned char*)pVect2;
|
238
|
-
|
239
|
-
for(size_t i = 0; i < qty; i++)
|
240
|
-
{
|
241
|
-
res += ((*a) - (*b)) * ((*a) - (*b));
|
242
|
-
a++;
|
243
|
-
b++;
|
244
|
-
}
|
245
|
-
return (res);
|
240
|
+
size_t get_data_size() {
|
241
|
+
return data_size_;
|
246
242
|
}
|
247
243
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
size_t data_size_;
|
252
|
-
size_t dim_;
|
253
|
-
public:
|
254
|
-
L2SpaceI(size_t dim) {
|
255
|
-
if(dim % 4 == 0) {
|
256
|
-
fstdistfunc_ = L2SqrI4x;
|
257
|
-
}
|
258
|
-
else {
|
259
|
-
fstdistfunc_ = L2SqrI;
|
260
|
-
}
|
261
|
-
dim_ = dim;
|
262
|
-
data_size_ = dim * sizeof(unsigned char);
|
263
|
-
}
|
244
|
+
DISTFUNC<float> get_dist_func() {
|
245
|
+
return fstdistfunc_;
|
246
|
+
}
|
264
247
|
|
265
|
-
|
266
|
-
|
267
|
-
|
248
|
+
void *get_dist_func_param() {
|
249
|
+
return &dim_;
|
250
|
+
}
|
268
251
|
|
269
|
-
|
270
|
-
|
252
|
+
~L2Space() {}
|
253
|
+
};
|
254
|
+
|
255
|
+
static int
|
256
|
+
L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
|
257
|
+
size_t qty = *((size_t *) qty_ptr);
|
258
|
+
int res = 0;
|
259
|
+
unsigned char *a = (unsigned char *) pVect1;
|
260
|
+
unsigned char *b = (unsigned char *) pVect2;
|
261
|
+
|
262
|
+
qty = qty >> 2;
|
263
|
+
for (size_t i = 0; i < qty; i++) {
|
264
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
265
|
+
a++;
|
266
|
+
b++;
|
267
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
268
|
+
a++;
|
269
|
+
b++;
|
270
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
271
|
+
a++;
|
272
|
+
b++;
|
273
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
274
|
+
a++;
|
275
|
+
b++;
|
276
|
+
}
|
277
|
+
return (res);
|
278
|
+
}
|
279
|
+
|
280
|
+
static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
|
281
|
+
size_t qty = *((size_t*)qty_ptr);
|
282
|
+
int res = 0;
|
283
|
+
unsigned char* a = (unsigned char*)pVect1;
|
284
|
+
unsigned char* b = (unsigned char*)pVect2;
|
285
|
+
|
286
|
+
for (size_t i = 0; i < qty; i++) {
|
287
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
288
|
+
a++;
|
289
|
+
b++;
|
290
|
+
}
|
291
|
+
return (res);
|
292
|
+
}
|
293
|
+
|
294
|
+
class L2SpaceI : public SpaceInterface<int> {
|
295
|
+
DISTFUNC<int> fstdistfunc_;
|
296
|
+
size_t data_size_;
|
297
|
+
size_t dim_;
|
298
|
+
|
299
|
+
public:
|
300
|
+
L2SpaceI(size_t dim) {
|
301
|
+
if (dim % 4 == 0) {
|
302
|
+
fstdistfunc_ = L2SqrI4x;
|
303
|
+
} else {
|
304
|
+
fstdistfunc_ = L2SqrI;
|
271
305
|
}
|
306
|
+
dim_ = dim;
|
307
|
+
data_size_ = dim * sizeof(unsigned char);
|
308
|
+
}
|
272
309
|
|
273
|
-
|
274
|
-
|
275
|
-
|
310
|
+
size_t get_data_size() {
|
311
|
+
return data_size_;
|
312
|
+
}
|
276
313
|
|
277
|
-
|
278
|
-
|
314
|
+
DISTFUNC<int> get_dist_func() {
|
315
|
+
return fstdistfunc_;
|
316
|
+
}
|
279
317
|
|
318
|
+
void *get_dist_func_param() {
|
319
|
+
return &dim_;
|
320
|
+
}
|
280
321
|
|
281
|
-
}
|
322
|
+
~L2SpaceI() {}
|
323
|
+
};
|
324
|
+
} // namespace hnswlib
|
@@ -5,75 +5,74 @@
|
|
5
5
|
#include <deque>
|
6
6
|
|
7
7
|
namespace hnswlib {
|
8
|
-
|
8
|
+
typedef unsigned short int vl_type;
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
class VisitedList {
|
11
|
+
public:
|
12
|
+
vl_type curV;
|
13
|
+
vl_type *mass;
|
14
|
+
unsigned int numelements;
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
VisitedList(int numelements1) {
|
17
|
+
curV = -1;
|
18
|
+
numelements = numelements1;
|
19
|
+
mass = new vl_type[numelements];
|
20
|
+
}
|
21
21
|
|
22
|
-
|
22
|
+
void reset() {
|
23
|
+
curV++;
|
24
|
+
if (curV == 0) {
|
25
|
+
memset(mass, 0, sizeof(vl_type) * numelements);
|
23
26
|
curV++;
|
24
|
-
|
25
|
-
|
26
|
-
curV++;
|
27
|
-
}
|
28
|
-
};
|
27
|
+
}
|
28
|
+
}
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
~VisitedList() { delete[] mass; }
|
31
|
+
};
|
32
32
|
///////////////////////////////////////////////////////////
|
33
33
|
//
|
34
34
|
// Class for multi-threaded pool-management of VisitedLists
|
35
35
|
//
|
36
36
|
/////////////////////////////////////////////////////////
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
public:
|
44
|
-
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
-
numelements = numelements1;
|
46
|
-
for (int i = 0; i < initmaxpools; i++)
|
47
|
-
pool.push_front(new VisitedList(numelements));
|
48
|
-
}
|
38
|
+
class VisitedListPool {
|
39
|
+
std::deque<VisitedList *> pool;
|
40
|
+
std::mutex poolguard;
|
41
|
+
int numelements;
|
49
42
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
pool.pop_front();
|
57
|
-
} else {
|
58
|
-
rez = new VisitedList(numelements);
|
59
|
-
}
|
60
|
-
}
|
61
|
-
rez->reset();
|
62
|
-
return rez;
|
63
|
-
};
|
43
|
+
public:
|
44
|
+
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
+
numelements = numelements1;
|
46
|
+
for (int i = 0; i < initmaxpools; i++)
|
47
|
+
pool.push_front(new VisitedList(numelements));
|
48
|
+
}
|
64
49
|
|
65
|
-
|
50
|
+
VisitedList *getFreeVisitedList() {
|
51
|
+
VisitedList *rez;
|
52
|
+
{
|
66
53
|
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
-
pool.
|
68
|
-
|
69
|
-
|
70
|
-
~VisitedListPool() {
|
71
|
-
while (pool.size()) {
|
72
|
-
VisitedList *rez = pool.front();
|
54
|
+
if (pool.size() > 0) {
|
55
|
+
rez = pool.front();
|
73
56
|
pool.pop_front();
|
74
|
-
|
57
|
+
} else {
|
58
|
+
rez = new VisitedList(numelements);
|
75
59
|
}
|
76
|
-
}
|
77
|
-
|
78
|
-
|
60
|
+
}
|
61
|
+
rez->reset();
|
62
|
+
return rez;
|
63
|
+
}
|
79
64
|
|
65
|
+
void releaseVisitedList(VisitedList *vl) {
|
66
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
+
pool.push_front(vl);
|
68
|
+
}
|
69
|
+
|
70
|
+
~VisitedListPool() {
|
71
|
+
while (pool.size()) {
|
72
|
+
VisitedList *rez = pool.front();
|
73
|
+
pool.pop_front();
|
74
|
+
delete rez;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
};
|
78
|
+
} // namespace hnswlib
|