umappp 0.1.6 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -16
- data/ext/umappp/numo.hpp +957 -833
- data/ext/umappp/umappp.cpp +39 -45
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +7 -10
data/vendor/hnswlib/space_l2.h
CHANGED
@@ -3,279 +3,322 @@
|
|
3
3
|
|
4
4
|
namespace hnswlib {
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
6
|
+
static float
|
7
|
+
L2Sqr(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
8
|
+
float *pVect1 = (float *) pVect1v;
|
9
|
+
float *pVect2 = (float *) pVect2v;
|
10
|
+
size_t qty = *((size_t *) qty_ptr);
|
11
|
+
|
12
|
+
float res = 0;
|
13
|
+
for (size_t i = 0; i < qty; i++) {
|
14
|
+
float t = *pVect1 - *pVect2;
|
15
|
+
pVect1++;
|
16
|
+
pVect2++;
|
17
|
+
res += t * t;
|
18
|
+
}
|
19
|
+
return (res);
|
20
|
+
}
|
21
|
+
|
22
|
+
#if defined(USE_AVX512)
|
23
|
+
|
24
|
+
// Favor using AVX512 if available.
|
25
|
+
static float
|
26
|
+
L2SqrSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
27
|
+
float *pVect1 = (float *) pVect1v;
|
28
|
+
float *pVect2 = (float *) pVect2v;
|
29
|
+
size_t qty = *((size_t *) qty_ptr);
|
30
|
+
float PORTABLE_ALIGN64 TmpRes[16];
|
31
|
+
size_t qty16 = qty >> 4;
|
32
|
+
|
33
|
+
const float *pEnd1 = pVect1 + (qty16 << 4);
|
34
|
+
|
35
|
+
__m512 diff, v1, v2;
|
36
|
+
__m512 sum = _mm512_set1_ps(0);
|
37
|
+
|
38
|
+
while (pVect1 < pEnd1) {
|
39
|
+
v1 = _mm512_loadu_ps(pVect1);
|
40
|
+
pVect1 += 16;
|
41
|
+
v2 = _mm512_loadu_ps(pVect2);
|
42
|
+
pVect2 += 16;
|
43
|
+
diff = _mm512_sub_ps(v1, v2);
|
44
|
+
// sum = _mm512_fmadd_ps(diff, diff, sum);
|
45
|
+
sum = _mm512_add_ps(sum, _mm512_mul_ps(diff, diff));
|
20
46
|
}
|
21
47
|
|
22
|
-
|
48
|
+
_mm512_store_ps(TmpRes, sum);
|
49
|
+
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] +
|
50
|
+
TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] +
|
51
|
+
TmpRes[13] + TmpRes[14] + TmpRes[15];
|
23
52
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
float *pVect1 = (float *) pVect1v;
|
28
|
-
float *pVect2 = (float *) pVect2v;
|
29
|
-
size_t qty = *((size_t *) qty_ptr);
|
30
|
-
float PORTABLE_ALIGN32 TmpRes[8];
|
31
|
-
size_t qty16 = qty >> 4;
|
32
|
-
|
33
|
-
const float *pEnd1 = pVect1 + (qty16 << 4);
|
34
|
-
|
35
|
-
__m256 diff, v1, v2;
|
36
|
-
__m256 sum = _mm256_set1_ps(0);
|
37
|
-
|
38
|
-
while (pVect1 < pEnd1) {
|
39
|
-
v1 = _mm256_loadu_ps(pVect1);
|
40
|
-
pVect1 += 8;
|
41
|
-
v2 = _mm256_loadu_ps(pVect2);
|
42
|
-
pVect2 += 8;
|
43
|
-
diff = _mm256_sub_ps(v1, v2);
|
44
|
-
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
45
|
-
|
46
|
-
v1 = _mm256_loadu_ps(pVect1);
|
47
|
-
pVect1 += 8;
|
48
|
-
v2 = _mm256_loadu_ps(pVect2);
|
49
|
-
pVect2 += 8;
|
50
|
-
diff = _mm256_sub_ps(v1, v2);
|
51
|
-
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
52
|
-
}
|
53
|
+
return (res);
|
54
|
+
}
|
55
|
+
#endif
|
53
56
|
|
54
|
-
|
55
|
-
|
57
|
+
#if defined(USE_AVX)
|
58
|
+
|
59
|
+
// Favor using AVX if available.
|
60
|
+
static float
|
61
|
+
L2SqrSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
62
|
+
float *pVect1 = (float *) pVect1v;
|
63
|
+
float *pVect2 = (float *) pVect2v;
|
64
|
+
size_t qty = *((size_t *) qty_ptr);
|
65
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
66
|
+
size_t qty16 = qty >> 4;
|
67
|
+
|
68
|
+
const float *pEnd1 = pVect1 + (qty16 << 4);
|
69
|
+
|
70
|
+
__m256 diff, v1, v2;
|
71
|
+
__m256 sum = _mm256_set1_ps(0);
|
72
|
+
|
73
|
+
while (pVect1 < pEnd1) {
|
74
|
+
v1 = _mm256_loadu_ps(pVect1);
|
75
|
+
pVect1 += 8;
|
76
|
+
v2 = _mm256_loadu_ps(pVect2);
|
77
|
+
pVect2 += 8;
|
78
|
+
diff = _mm256_sub_ps(v1, v2);
|
79
|
+
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
80
|
+
|
81
|
+
v1 = _mm256_loadu_ps(pVect1);
|
82
|
+
pVect1 += 8;
|
83
|
+
v2 = _mm256_loadu_ps(pVect2);
|
84
|
+
pVect2 += 8;
|
85
|
+
diff = _mm256_sub_ps(v1, v2);
|
86
|
+
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
56
87
|
}
|
57
88
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
62
|
-
float *pVect1 = (float *) pVect1v;
|
63
|
-
float *pVect2 = (float *) pVect2v;
|
64
|
-
size_t qty = *((size_t *) qty_ptr);
|
65
|
-
float PORTABLE_ALIGN32 TmpRes[8];
|
66
|
-
size_t qty16 = qty >> 4;
|
67
|
-
|
68
|
-
const float *pEnd1 = pVect1 + (qty16 << 4);
|
69
|
-
|
70
|
-
__m128 diff, v1, v2;
|
71
|
-
__m128 sum = _mm_set1_ps(0);
|
72
|
-
|
73
|
-
while (pVect1 < pEnd1) {
|
74
|
-
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
75
|
-
v1 = _mm_loadu_ps(pVect1);
|
76
|
-
pVect1 += 4;
|
77
|
-
v2 = _mm_loadu_ps(pVect2);
|
78
|
-
pVect2 += 4;
|
79
|
-
diff = _mm_sub_ps(v1, v2);
|
80
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
81
|
-
|
82
|
-
v1 = _mm_loadu_ps(pVect1);
|
83
|
-
pVect1 += 4;
|
84
|
-
v2 = _mm_loadu_ps(pVect2);
|
85
|
-
pVect2 += 4;
|
86
|
-
diff = _mm_sub_ps(v1, v2);
|
87
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
88
|
-
|
89
|
-
v1 = _mm_loadu_ps(pVect1);
|
90
|
-
pVect1 += 4;
|
91
|
-
v2 = _mm_loadu_ps(pVect2);
|
92
|
-
pVect2 += 4;
|
93
|
-
diff = _mm_sub_ps(v1, v2);
|
94
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
95
|
-
|
96
|
-
v1 = _mm_loadu_ps(pVect1);
|
97
|
-
pVect1 += 4;
|
98
|
-
v2 = _mm_loadu_ps(pVect2);
|
99
|
-
pVect2 += 4;
|
100
|
-
diff = _mm_sub_ps(v1, v2);
|
101
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
102
|
-
}
|
89
|
+
_mm256_store_ps(TmpRes, sum);
|
90
|
+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
91
|
+
}
|
103
92
|
|
104
|
-
_mm_store_ps(TmpRes, sum);
|
105
|
-
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
106
|
-
}
|
107
93
|
#endif
|
108
94
|
|
109
|
-
#if defined(USE_SSE)
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
95
|
+
#if defined(USE_SSE)
|
96
|
+
|
97
|
+
static float
|
98
|
+
L2SqrSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
99
|
+
float *pVect1 = (float *) pVect1v;
|
100
|
+
float *pVect2 = (float *) pVect2v;
|
101
|
+
size_t qty = *((size_t *) qty_ptr);
|
102
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
103
|
+
size_t qty16 = qty >> 4;
|
104
|
+
|
105
|
+
const float *pEnd1 = pVect1 + (qty16 << 4);
|
106
|
+
|
107
|
+
__m128 diff, v1, v2;
|
108
|
+
__m128 sum = _mm_set1_ps(0);
|
109
|
+
|
110
|
+
while (pVect1 < pEnd1) {
|
111
|
+
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
112
|
+
v1 = _mm_loadu_ps(pVect1);
|
113
|
+
pVect1 += 4;
|
114
|
+
v2 = _mm_loadu_ps(pVect2);
|
115
|
+
pVect2 += 4;
|
116
|
+
diff = _mm_sub_ps(v1, v2);
|
117
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
118
|
+
|
119
|
+
v1 = _mm_loadu_ps(pVect1);
|
120
|
+
pVect1 += 4;
|
121
|
+
v2 = _mm_loadu_ps(pVect2);
|
122
|
+
pVect2 += 4;
|
123
|
+
diff = _mm_sub_ps(v1, v2);
|
124
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
125
|
+
|
126
|
+
v1 = _mm_loadu_ps(pVect1);
|
127
|
+
pVect1 += 4;
|
128
|
+
v2 = _mm_loadu_ps(pVect2);
|
129
|
+
pVect2 += 4;
|
130
|
+
diff = _mm_sub_ps(v1, v2);
|
131
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
132
|
+
|
133
|
+
v1 = _mm_loadu_ps(pVect1);
|
134
|
+
pVect1 += 4;
|
135
|
+
v2 = _mm_loadu_ps(pVect2);
|
136
|
+
pVect2 += 4;
|
137
|
+
diff = _mm_sub_ps(v1, v2);
|
138
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
121
139
|
}
|
122
|
-
#endif
|
123
140
|
|
141
|
+
_mm_store_ps(TmpRes, sum);
|
142
|
+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
143
|
+
}
|
144
|
+
#endif
|
124
145
|
|
125
|
-
#
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
146
|
+
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
|
147
|
+
static DISTFUNC<float> L2SqrSIMD16Ext = L2SqrSIMD16ExtSSE;
|
148
|
+
|
149
|
+
static float
|
150
|
+
L2SqrSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
151
|
+
size_t qty = *((size_t *) qty_ptr);
|
152
|
+
size_t qty16 = qty >> 4 << 4;
|
153
|
+
float res = L2SqrSIMD16Ext(pVect1v, pVect2v, &qty16);
|
154
|
+
float *pVect1 = (float *) pVect1v + qty16;
|
155
|
+
float *pVect2 = (float *) pVect2v + qty16;
|
156
|
+
|
157
|
+
size_t qty_left = qty - qty16;
|
158
|
+
float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
|
159
|
+
return (res + res_tail);
|
160
|
+
}
|
161
|
+
#endif
|
132
162
|
|
133
163
|
|
134
|
-
|
164
|
+
#if defined(USE_SSE)
|
165
|
+
static float
|
166
|
+
L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
167
|
+
float PORTABLE_ALIGN32 TmpRes[8];
|
168
|
+
float *pVect1 = (float *) pVect1v;
|
169
|
+
float *pVect2 = (float *) pVect2v;
|
170
|
+
size_t qty = *((size_t *) qty_ptr);
|
135
171
|
|
136
|
-
const float *pEnd1 = pVect1 + (qty4 << 2);
|
137
172
|
|
138
|
-
|
139
|
-
__m128 sum = _mm_set1_ps(0);
|
173
|
+
size_t qty4 = qty >> 2;
|
140
174
|
|
141
|
-
|
142
|
-
v1 = _mm_loadu_ps(pVect1);
|
143
|
-
pVect1 += 4;
|
144
|
-
v2 = _mm_loadu_ps(pVect2);
|
145
|
-
pVect2 += 4;
|
146
|
-
diff = _mm_sub_ps(v1, v2);
|
147
|
-
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
148
|
-
}
|
149
|
-
_mm_store_ps(TmpRes, sum);
|
150
|
-
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
151
|
-
}
|
175
|
+
const float *pEnd1 = pVect1 + (qty4 << 2);
|
152
176
|
|
153
|
-
|
154
|
-
|
155
|
-
size_t qty = *((size_t *) qty_ptr);
|
156
|
-
size_t qty4 = qty >> 2 << 2;
|
177
|
+
__m128 diff, v1, v2;
|
178
|
+
__m128 sum = _mm_set1_ps(0);
|
157
179
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
return (res + res_tail);
|
180
|
+
while (pVect1 < pEnd1) {
|
181
|
+
v1 = _mm_loadu_ps(pVect1);
|
182
|
+
pVect1 += 4;
|
183
|
+
v2 = _mm_loadu_ps(pVect2);
|
184
|
+
pVect2 += 4;
|
185
|
+
diff = _mm_sub_ps(v1, v2);
|
186
|
+
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
166
187
|
}
|
167
|
-
|
188
|
+
_mm_store_ps(TmpRes, sum);
|
189
|
+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
190
|
+
}
|
168
191
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
size_t dim_;
|
174
|
-
public:
|
175
|
-
L2Space(size_t dim) {
|
176
|
-
fstdistfunc_ = L2Sqr;
|
177
|
-
#if defined(USE_SSE) || defined(USE_AVX)
|
178
|
-
if (dim % 16 == 0)
|
179
|
-
fstdistfunc_ = L2SqrSIMD16Ext;
|
180
|
-
else if (dim % 4 == 0)
|
181
|
-
fstdistfunc_ = L2SqrSIMD4Ext;
|
182
|
-
else if (dim > 16)
|
183
|
-
fstdistfunc_ = L2SqrSIMD16ExtResiduals;
|
184
|
-
else if (dim > 4)
|
185
|
-
fstdistfunc_ = L2SqrSIMD4ExtResiduals;
|
186
|
-
#endif
|
187
|
-
dim_ = dim;
|
188
|
-
data_size_ = dim * sizeof(float);
|
189
|
-
}
|
192
|
+
static float
|
193
|
+
L2SqrSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
194
|
+
size_t qty = *((size_t *) qty_ptr);
|
195
|
+
size_t qty4 = qty >> 2 << 2;
|
190
196
|
|
191
|
-
|
192
|
-
|
193
|
-
}
|
197
|
+
float res = L2SqrSIMD4Ext(pVect1v, pVect2v, &qty4);
|
198
|
+
size_t qty_left = qty - qty4;
|
194
199
|
|
195
|
-
|
196
|
-
|
197
|
-
|
200
|
+
float *pVect1 = (float *) pVect1v + qty4;
|
201
|
+
float *pVect2 = (float *) pVect2v + qty4;
|
202
|
+
float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
|
198
203
|
|
199
|
-
|
200
|
-
|
201
|
-
|
204
|
+
return (res + res_tail);
|
205
|
+
}
|
206
|
+
#endif
|
202
207
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
208
|
+
class L2Space : public SpaceInterface<float> {
|
209
|
+
DISTFUNC<float> fstdistfunc_;
|
210
|
+
size_t data_size_;
|
211
|
+
size_t dim_;
|
212
|
+
|
213
|
+
public:
|
214
|
+
L2Space(size_t dim) {
|
215
|
+
fstdistfunc_ = L2Sqr;
|
216
|
+
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
|
217
|
+
#if defined(USE_AVX512)
|
218
|
+
if (AVX512Capable())
|
219
|
+
L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX512;
|
220
|
+
else if (AVXCapable())
|
221
|
+
L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
|
222
|
+
#elif defined(USE_AVX)
|
223
|
+
if (AVXCapable())
|
224
|
+
L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
|
225
|
+
#endif
|
226
|
+
|
227
|
+
if (dim % 16 == 0)
|
228
|
+
fstdistfunc_ = L2SqrSIMD16Ext;
|
229
|
+
else if (dim % 4 == 0)
|
230
|
+
fstdistfunc_ = L2SqrSIMD4Ext;
|
231
|
+
else if (dim > 16)
|
232
|
+
fstdistfunc_ = L2SqrSIMD16ExtResiduals;
|
233
|
+
else if (dim > 4)
|
234
|
+
fstdistfunc_ = L2SqrSIMD4ExtResiduals;
|
235
|
+
#endif
|
236
|
+
dim_ = dim;
|
237
|
+
data_size_ = dim * sizeof(float);
|
231
238
|
}
|
232
239
|
|
233
|
-
|
234
|
-
|
235
|
-
int res = 0;
|
236
|
-
unsigned char* a = (unsigned char*)pVect1;
|
237
|
-
unsigned char* b = (unsigned char*)pVect2;
|
238
|
-
|
239
|
-
for(size_t i = 0; i < qty; i++)
|
240
|
-
{
|
241
|
-
res += ((*a) - (*b)) * ((*a) - (*b));
|
242
|
-
a++;
|
243
|
-
b++;
|
244
|
-
}
|
245
|
-
return (res);
|
240
|
+
size_t get_data_size() {
|
241
|
+
return data_size_;
|
246
242
|
}
|
247
243
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
size_t data_size_;
|
252
|
-
size_t dim_;
|
253
|
-
public:
|
254
|
-
L2SpaceI(size_t dim) {
|
255
|
-
if(dim % 4 == 0) {
|
256
|
-
fstdistfunc_ = L2SqrI4x;
|
257
|
-
}
|
258
|
-
else {
|
259
|
-
fstdistfunc_ = L2SqrI;
|
260
|
-
}
|
261
|
-
dim_ = dim;
|
262
|
-
data_size_ = dim * sizeof(unsigned char);
|
263
|
-
}
|
244
|
+
DISTFUNC<float> get_dist_func() {
|
245
|
+
return fstdistfunc_;
|
246
|
+
}
|
264
247
|
|
265
|
-
|
266
|
-
|
267
|
-
|
248
|
+
void *get_dist_func_param() {
|
249
|
+
return &dim_;
|
250
|
+
}
|
268
251
|
|
269
|
-
|
270
|
-
|
252
|
+
~L2Space() {}
|
253
|
+
};
|
254
|
+
|
255
|
+
static int
|
256
|
+
L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
|
257
|
+
size_t qty = *((size_t *) qty_ptr);
|
258
|
+
int res = 0;
|
259
|
+
unsigned char *a = (unsigned char *) pVect1;
|
260
|
+
unsigned char *b = (unsigned char *) pVect2;
|
261
|
+
|
262
|
+
qty = qty >> 2;
|
263
|
+
for (size_t i = 0; i < qty; i++) {
|
264
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
265
|
+
a++;
|
266
|
+
b++;
|
267
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
268
|
+
a++;
|
269
|
+
b++;
|
270
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
271
|
+
a++;
|
272
|
+
b++;
|
273
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
274
|
+
a++;
|
275
|
+
b++;
|
276
|
+
}
|
277
|
+
return (res);
|
278
|
+
}
|
279
|
+
|
280
|
+
static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
|
281
|
+
size_t qty = *((size_t*)qty_ptr);
|
282
|
+
int res = 0;
|
283
|
+
unsigned char* a = (unsigned char*)pVect1;
|
284
|
+
unsigned char* b = (unsigned char*)pVect2;
|
285
|
+
|
286
|
+
for (size_t i = 0; i < qty; i++) {
|
287
|
+
res += ((*a) - (*b)) * ((*a) - (*b));
|
288
|
+
a++;
|
289
|
+
b++;
|
290
|
+
}
|
291
|
+
return (res);
|
292
|
+
}
|
293
|
+
|
294
|
+
class L2SpaceI : public SpaceInterface<int> {
|
295
|
+
DISTFUNC<int> fstdistfunc_;
|
296
|
+
size_t data_size_;
|
297
|
+
size_t dim_;
|
298
|
+
|
299
|
+
public:
|
300
|
+
L2SpaceI(size_t dim) {
|
301
|
+
if (dim % 4 == 0) {
|
302
|
+
fstdistfunc_ = L2SqrI4x;
|
303
|
+
} else {
|
304
|
+
fstdistfunc_ = L2SqrI;
|
271
305
|
}
|
306
|
+
dim_ = dim;
|
307
|
+
data_size_ = dim * sizeof(unsigned char);
|
308
|
+
}
|
272
309
|
|
273
|
-
|
274
|
-
|
275
|
-
|
310
|
+
size_t get_data_size() {
|
311
|
+
return data_size_;
|
312
|
+
}
|
276
313
|
|
277
|
-
|
278
|
-
|
314
|
+
DISTFUNC<int> get_dist_func() {
|
315
|
+
return fstdistfunc_;
|
316
|
+
}
|
279
317
|
|
318
|
+
void *get_dist_func_param() {
|
319
|
+
return &dim_;
|
320
|
+
}
|
280
321
|
|
281
|
-
}
|
322
|
+
~L2SpaceI() {}
|
323
|
+
};
|
324
|
+
} // namespace hnswlib
|
@@ -5,75 +5,74 @@
|
|
5
5
|
#include <deque>
|
6
6
|
|
7
7
|
namespace hnswlib {
|
8
|
-
|
8
|
+
typedef unsigned short int vl_type;
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
class VisitedList {
|
11
|
+
public:
|
12
|
+
vl_type curV;
|
13
|
+
vl_type *mass;
|
14
|
+
unsigned int numelements;
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
VisitedList(int numelements1) {
|
17
|
+
curV = -1;
|
18
|
+
numelements = numelements1;
|
19
|
+
mass = new vl_type[numelements];
|
20
|
+
}
|
21
21
|
|
22
|
-
|
22
|
+
void reset() {
|
23
|
+
curV++;
|
24
|
+
if (curV == 0) {
|
25
|
+
memset(mass, 0, sizeof(vl_type) * numelements);
|
23
26
|
curV++;
|
24
|
-
|
25
|
-
|
26
|
-
curV++;
|
27
|
-
}
|
28
|
-
};
|
27
|
+
}
|
28
|
+
}
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
~VisitedList() { delete[] mass; }
|
31
|
+
};
|
32
32
|
///////////////////////////////////////////////////////////
|
33
33
|
//
|
34
34
|
// Class for multi-threaded pool-management of VisitedLists
|
35
35
|
//
|
36
36
|
/////////////////////////////////////////////////////////
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
public:
|
44
|
-
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
-
numelements = numelements1;
|
46
|
-
for (int i = 0; i < initmaxpools; i++)
|
47
|
-
pool.push_front(new VisitedList(numelements));
|
48
|
-
}
|
38
|
+
class VisitedListPool {
|
39
|
+
std::deque<VisitedList *> pool;
|
40
|
+
std::mutex poolguard;
|
41
|
+
int numelements;
|
49
42
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
pool.pop_front();
|
57
|
-
} else {
|
58
|
-
rez = new VisitedList(numelements);
|
59
|
-
}
|
60
|
-
}
|
61
|
-
rez->reset();
|
62
|
-
return rez;
|
63
|
-
};
|
43
|
+
public:
|
44
|
+
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
+
numelements = numelements1;
|
46
|
+
for (int i = 0; i < initmaxpools; i++)
|
47
|
+
pool.push_front(new VisitedList(numelements));
|
48
|
+
}
|
64
49
|
|
65
|
-
|
50
|
+
VisitedList *getFreeVisitedList() {
|
51
|
+
VisitedList *rez;
|
52
|
+
{
|
66
53
|
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
-
pool.
|
68
|
-
|
69
|
-
|
70
|
-
~VisitedListPool() {
|
71
|
-
while (pool.size()) {
|
72
|
-
VisitedList *rez = pool.front();
|
54
|
+
if (pool.size() > 0) {
|
55
|
+
rez = pool.front();
|
73
56
|
pool.pop_front();
|
74
|
-
|
57
|
+
} else {
|
58
|
+
rez = new VisitedList(numelements);
|
75
59
|
}
|
76
|
-
}
|
77
|
-
|
78
|
-
|
60
|
+
}
|
61
|
+
rez->reset();
|
62
|
+
return rez;
|
63
|
+
}
|
79
64
|
|
65
|
+
void releaseVisitedList(VisitedList *vl) {
|
66
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
+
pool.push_front(vl);
|
68
|
+
}
|
69
|
+
|
70
|
+
~VisitedListPool() {
|
71
|
+
while (pool.size()) {
|
72
|
+
VisitedList *rez = pool.front();
|
73
|
+
pool.pop_front();
|
74
|
+
delete rez;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
};
|
78
|
+
} // namespace hnswlib
|