umappp 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -4
- data/ext/umappp/umappp.cpp +41 -43
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +6 -6
@@ -5,6 +5,10 @@
|
|
5
5
|
#include <limits>
|
6
6
|
#include <algorithm>
|
7
7
|
#include <cmath>
|
8
|
+
#ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
|
9
|
+
#include <thread>
|
10
|
+
#include <atomic>
|
11
|
+
#endif
|
8
12
|
|
9
13
|
#include "NeighborList.hpp"
|
10
14
|
#include "aarand/aarand.hpp"
|
@@ -85,94 +89,9 @@ Float clamp(Float input) {
|
|
85
89
|
return std::min(std::max(input, min_gradient), max_gradient);
|
86
90
|
}
|
87
91
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
int ndim,
|
92
|
-
Float* embedding,
|
93
|
-
Float* buffer,
|
94
|
-
Setup& setup,
|
95
|
-
Float a,
|
96
|
-
Float b,
|
97
|
-
Float gamma,
|
98
|
-
Float alpha,
|
99
|
-
Rng& rng,
|
100
|
-
Float epoch
|
101
|
-
) {
|
102
|
-
const auto& head = setup.head;
|
103
|
-
const auto& tail = setup.tail;
|
104
|
-
const auto& epochs_per_sample = setup.epochs_per_sample;
|
105
|
-
auto& epoch_of_next_sample = setup.epoch_of_next_sample;
|
106
|
-
auto& epoch_of_next_negative_sample = setup.epoch_of_next_negative_sample;
|
107
|
-
|
108
|
-
const size_t num_obs = head.size();
|
109
|
-
const Float negative_sample_rate = setup.negative_sample_rate;
|
110
|
-
|
111
|
-
size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
|
112
|
-
Float* left = embedding + i * ndim;
|
113
|
-
|
114
|
-
for (size_t j = start; j < end; ++j) {
|
115
|
-
if (epoch_of_next_sample[j] > epoch) {
|
116
|
-
continue;
|
117
|
-
}
|
118
|
-
|
119
|
-
Float* right = embedding + tail[j] * ndim;
|
120
|
-
Float dist2 = quick_squared_distance(left, right, ndim);
|
121
|
-
const Float pd2b = std::pow(dist2, b);
|
122
|
-
const Float grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
|
123
|
-
{
|
124
|
-
Float* lcopy = left;
|
125
|
-
Float* rcopy = right;
|
126
|
-
|
127
|
-
for (int d = 0; d < ndim; ++d, ++lcopy, ++rcopy) {
|
128
|
-
Float gradient = alpha * clamp(grad_coef * (*lcopy - *rcopy));
|
129
|
-
if constexpr(!batch) {
|
130
|
-
*lcopy += gradient;
|
131
|
-
*rcopy -= gradient;
|
132
|
-
} else {
|
133
|
-
// Doubling as we'll assume symmetry from the same
|
134
|
-
// force applied by the right node. This allows us to
|
135
|
-
// avoid worrying about accounting for modifications to
|
136
|
-
// the right node.
|
137
|
-
buffer[d] += 2 * gradient;
|
138
|
-
}
|
139
|
-
}
|
140
|
-
}
|
141
|
-
|
142
|
-
// Here, we divide by epochs_per_negative_sample, defined as epochs_per_sample[j] / negative_sample_rate.
|
143
|
-
const size_t num_neg_samples = (epoch - epoch_of_next_negative_sample[j]) * negative_sample_rate / epochs_per_sample[j];
|
144
|
-
|
145
|
-
for (size_t p = 0; p < num_neg_samples; ++p) {
|
146
|
-
size_t sampled = aarand::discrete_uniform(rng, num_obs);
|
147
|
-
if (sampled == i) {
|
148
|
-
continue;
|
149
|
-
}
|
150
|
-
|
151
|
-
Float* right = embedding + sampled * ndim;
|
152
|
-
Float dist2 = quick_squared_distance(left, right, ndim);
|
153
|
-
const Float grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
|
154
|
-
{
|
155
|
-
Float* lcopy = left;
|
156
|
-
const Float* rcopy = right;
|
157
|
-
for (int d = 0; d < ndim; ++d, ++lcopy, ++rcopy) {
|
158
|
-
Float gradient = alpha * clamp(grad_coef * (*lcopy - *rcopy));
|
159
|
-
if constexpr(!batch) {
|
160
|
-
*lcopy += gradient;
|
161
|
-
} else {
|
162
|
-
buffer[d] += gradient;
|
163
|
-
}
|
164
|
-
}
|
165
|
-
}
|
166
|
-
}
|
167
|
-
|
168
|
-
epoch_of_next_sample[j] += epochs_per_sample[j];
|
169
|
-
|
170
|
-
// The update to epoch_of_next_negative_sample involves adding
|
171
|
-
// num_neg_samples * epochs_per_negative_sample, which eventually boils
|
172
|
-
// down to setting epoch_of_next_negative_sample to 'n'.
|
173
|
-
epoch_of_next_negative_sample[j] = epoch;
|
174
|
-
}
|
175
|
-
}
|
92
|
+
/*****************************************************
|
93
|
+
***************** Serial code ***********************
|
94
|
+
*****************************************************/
|
176
95
|
|
177
96
|
template<typename Float, class Setup, class Rng>
|
178
97
|
void optimize_layout(
|
@@ -192,20 +111,254 @@ void optimize_layout(
|
|
192
111
|
if (epoch_limit> 0) {
|
193
112
|
limit_epochs = std::min(epoch_limit, num_epochs);
|
194
113
|
}
|
195
|
-
|
114
|
+
|
115
|
+
const size_t num_obs = setup.head.size();
|
196
116
|
for (; n < limit_epochs; ++n) {
|
197
117
|
const Float epoch = n;
|
198
118
|
const Float alpha = initial_alpha * (1.0 - epoch / num_epochs);
|
199
|
-
|
200
|
-
|
119
|
+
|
120
|
+
for (size_t i = 0; i < num_obs; ++i) {
|
121
|
+
size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
|
122
|
+
Float* left = embedding + i * ndim;
|
123
|
+
|
124
|
+
for (size_t j = start; j < end; ++j) {
|
125
|
+
if (setup.epoch_of_next_sample[j] > epoch) {
|
126
|
+
continue;
|
127
|
+
}
|
128
|
+
|
129
|
+
{
|
130
|
+
Float* right = embedding + setup.tail[j] * ndim;
|
131
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
132
|
+
const Float pd2b = std::pow(dist2, b);
|
133
|
+
const Float grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
|
134
|
+
|
135
|
+
Float* lcopy = left;
|
136
|
+
for (int d = 0; d < ndim; ++d, ++lcopy, ++right) {
|
137
|
+
Float gradient = alpha * clamp(grad_coef * (*lcopy - *right));
|
138
|
+
*lcopy += gradient;
|
139
|
+
*right -= gradient;
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
// Remember that 'epochs_per_negative_sample' is defined as 'epochs_per_sample[j] / negative_sample_rate'.
|
144
|
+
// We just use it inline below rather than defining a new variable and suffering floating-point round-off.
|
145
|
+
const size_t num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) *
|
146
|
+
setup.negative_sample_rate / setup.epochs_per_sample[j]; // i.e., 1/epochs_per_negative_sample.
|
147
|
+
|
148
|
+
for (size_t p = 0; p < num_neg_samples; ++p) {
|
149
|
+
size_t sampled = aarand::discrete_uniform(rng, num_obs);
|
150
|
+
if (sampled == i) {
|
151
|
+
continue;
|
152
|
+
}
|
153
|
+
|
154
|
+
const Float* right = embedding + sampled * ndim;
|
155
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
156
|
+
const Float grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
|
157
|
+
|
158
|
+
Float* lcopy = left;
|
159
|
+
for (int d = 0; d < ndim; ++d, ++lcopy, ++right) {
|
160
|
+
*lcopy += alpha * clamp(grad_coef * (*lcopy - *right));
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
setup.epoch_of_next_sample[j] += setup.epochs_per_sample[j];
|
165
|
+
|
166
|
+
// The update to 'epoch_of_next_negative_sample' involves adding
|
167
|
+
// 'num_neg_samples * epochs_per_negative_sample', which eventually boils
|
168
|
+
// down to setting epoch_of_next_negative_sample to 'epoch'.
|
169
|
+
setup.epoch_of_next_negative_sample[j] = epoch;
|
170
|
+
}
|
201
171
|
}
|
202
172
|
}
|
203
173
|
|
204
174
|
return;
|
205
175
|
}
|
206
176
|
|
207
|
-
|
208
|
-
|
177
|
+
/*****************************************************
|
178
|
+
**************** Parallel code **********************
|
179
|
+
*****************************************************/
|
180
|
+
|
181
|
+
#ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
|
182
|
+
template<class Float, class Setup>
|
183
|
+
struct BusyWaiterThread {
|
184
|
+
public:
|
185
|
+
std::vector<size_t> selections;
|
186
|
+
std::vector<unsigned char> skips;
|
187
|
+
size_t observation;
|
188
|
+
Float alpha;
|
189
|
+
|
190
|
+
private:
|
191
|
+
int ndim;
|
192
|
+
Float* embedding;
|
193
|
+
const Setup* setup;
|
194
|
+
Float a;
|
195
|
+
Float b;
|
196
|
+
Float gamma;
|
197
|
+
|
198
|
+
std::vector<Float> self_modified;
|
199
|
+
|
200
|
+
private:
|
201
|
+
std::thread pool;
|
202
|
+
std::atomic<bool> ready = false;
|
203
|
+
bool finished = false;
|
204
|
+
bool active = false;
|
205
|
+
|
206
|
+
public:
|
207
|
+
void run() {
|
208
|
+
ready.store(true, std::memory_order_release);
|
209
|
+
}
|
210
|
+
|
211
|
+
void wait() {
|
212
|
+
while (ready.load(std::memory_order_acquire)) {
|
213
|
+
;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
void migrate_parameters(BusyWaiterThread& src) {
|
218
|
+
selections.swap(src.selections);
|
219
|
+
skips.swap(src.skips);
|
220
|
+
alpha = src.alpha;
|
221
|
+
observation = src.observation;
|
222
|
+
}
|
223
|
+
|
224
|
+
void transfer_coordinates() {
|
225
|
+
std::copy(self_modified.begin(), self_modified.end(), embedding + observation * ndim);
|
226
|
+
}
|
227
|
+
|
228
|
+
public:
|
229
|
+
void run_direct() {
|
230
|
+
auto seIt = selections.begin();
|
231
|
+
auto skIt = skips.begin();
|
232
|
+
const size_t i = observation;
|
233
|
+
const size_t start = (i == 0 ? 0 : setup->head[i-1]), end = setup->head[i];
|
234
|
+
|
235
|
+
// Copying it over into a thread-local buffer to avoid false sharing.
|
236
|
+
// We don't bother doing this for the neighbors, though, as it's
|
237
|
+
// tedious to make sure that the modified values are available during negative sampling.
|
238
|
+
// (This isn't a problem for the self, as the self cannot be its own negative sample.)
|
239
|
+
{
|
240
|
+
const Float* left = embedding + i * ndim;
|
241
|
+
std::copy(left, left + ndim, self_modified.data());
|
242
|
+
}
|
243
|
+
|
244
|
+
for (size_t j = start; j < end; ++j) {
|
245
|
+
if (*(skIt++)) {
|
246
|
+
continue;
|
247
|
+
}
|
248
|
+
|
249
|
+
{
|
250
|
+
Float* left = self_modified.data();
|
251
|
+
Float* right = embedding + setup->tail[j] * ndim;
|
252
|
+
|
253
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
254
|
+
const Float pd2b = std::pow(dist2, b);
|
255
|
+
const Float grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
|
256
|
+
|
257
|
+
for (int d = 0; d < ndim; ++d, ++left, ++right) {
|
258
|
+
Float gradient = alpha * clamp(grad_coef * (*left - *right));
|
259
|
+
*left += gradient;
|
260
|
+
*right -= gradient;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
while (seIt != selections.end() && *seIt != -1) {
|
265
|
+
Float* left = self_modified.data();
|
266
|
+
const Float* right = embedding + (*seIt) * ndim;
|
267
|
+
|
268
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
269
|
+
const Float grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
|
270
|
+
|
271
|
+
for (int d = 0; d < ndim; ++d, ++left, ++right) {
|
272
|
+
*left += alpha * clamp(grad_coef * (*left - *right));
|
273
|
+
}
|
274
|
+
++seIt;
|
275
|
+
}
|
276
|
+
++seIt; // get past the -1.
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
private:
|
281
|
+
void loop() {
|
282
|
+
while (true) {
|
283
|
+
while (!ready.load(std::memory_order_acquire)) {
|
284
|
+
;
|
285
|
+
}
|
286
|
+
if (finished) {
|
287
|
+
break;
|
288
|
+
}
|
289
|
+
run_direct();
|
290
|
+
ready.store(false, std::memory_order_release);
|
291
|
+
}
|
292
|
+
}
|
293
|
+
|
294
|
+
public:
|
295
|
+
BusyWaiterThread() {}
|
296
|
+
|
297
|
+
BusyWaiterThread(int ndim_, Float* embedding_, Setup& setup_, Float a_, Float b_, Float gamma_) :
|
298
|
+
ndim(ndim_),
|
299
|
+
embedding(embedding_),
|
300
|
+
setup(&setup_),
|
301
|
+
a(a_),
|
302
|
+
b(b_),
|
303
|
+
gamma(gamma_),
|
304
|
+
self_modified(ndim)
|
305
|
+
{}
|
306
|
+
|
307
|
+
void start() {
|
308
|
+
active = true;
|
309
|
+
pool = std::thread(&BusyWaiterThread::loop, this);
|
310
|
+
}
|
311
|
+
|
312
|
+
public:
|
313
|
+
~BusyWaiterThread() {
|
314
|
+
if (active) {
|
315
|
+
finished = true;
|
316
|
+
ready.store(true, std::memory_order_release);
|
317
|
+
pool.join();
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
BusyWaiterThread(BusyWaiterThread&&) = default;
|
322
|
+
BusyWaiterThread& operator=(BusyWaiterThread&&) = default;
|
323
|
+
|
324
|
+
BusyWaiterThread(const BusyWaiterThread& src) :
|
325
|
+
selections(src.selections),
|
326
|
+
skips(src.skips),
|
327
|
+
observation(src.observation),
|
328
|
+
|
329
|
+
ndim(src.ndim),
|
330
|
+
embedding(src.embedding),
|
331
|
+
setup(src.setup),
|
332
|
+
a(src.a),
|
333
|
+
b(src.b),
|
334
|
+
gamma(src.gamma),
|
335
|
+
alpha(src.alpha),
|
336
|
+
|
337
|
+
self_modified(src.self_modified)
|
338
|
+
{}
|
339
|
+
|
340
|
+
BusyWaiterThread& operator=(const BusyWaiterThread& src) {
|
341
|
+
selections = src.selections;
|
342
|
+
skips = src.skips;
|
343
|
+
observation = src.observation;
|
344
|
+
|
345
|
+
ndim = src.ndim;
|
346
|
+
embedding = src.embedding;
|
347
|
+
setup = src.setup;
|
348
|
+
a = src.a;
|
349
|
+
b = src.b;
|
350
|
+
gamma = src.gamma;
|
351
|
+
alpha = src.alpha;
|
352
|
+
|
353
|
+
self_modified = src.self_modified;
|
354
|
+
}
|
355
|
+
};
|
356
|
+
#endif
|
357
|
+
|
358
|
+
//#define PRINT false
|
359
|
+
|
360
|
+
template<typename Float, class Setup, class Rng>
|
361
|
+
void optimize_layout_parallel(
|
209
362
|
int ndim,
|
210
363
|
Float* embedding,
|
211
364
|
Setup& setup,
|
@@ -213,71 +366,195 @@ inline void optimize_layout_batched(
|
|
213
366
|
Float b,
|
214
367
|
Float gamma,
|
215
368
|
Float initial_alpha,
|
216
|
-
|
217
|
-
EngineFunction creator,
|
369
|
+
Rng& rng,
|
218
370
|
int epoch_limit,
|
219
371
|
int nthreads
|
220
372
|
) {
|
373
|
+
#ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
|
221
374
|
auto& n = setup.current_epoch;
|
222
375
|
auto num_epochs = setup.total_epochs;
|
223
376
|
auto limit_epochs = num_epochs;
|
224
|
-
if (epoch_limit
|
377
|
+
if (epoch_limit> 0) {
|
225
378
|
limit_epochs = std::min(epoch_limit, num_epochs);
|
226
379
|
}
|
227
380
|
|
228
381
|
const size_t num_obs = setup.head.size();
|
229
|
-
std::vector<
|
230
|
-
std::vector<
|
231
|
-
|
232
|
-
|
382
|
+
std::vector<int> last_touched(num_obs);
|
383
|
+
std::vector<unsigned char> touch_type(num_obs);
|
384
|
+
|
385
|
+
// We run some things directly in this main thread to avoid excessive busy-waiting.
|
386
|
+
BusyWaiterThread<Float, Setup> staging(ndim, embedding, setup, a, b, gamma);
|
387
|
+
|
388
|
+
int nthreadsm1 = nthreads - 1;
|
389
|
+
std::vector<BusyWaiterThread<Float, Setup> > pool;
|
390
|
+
pool.reserve(nthreadsm1);
|
391
|
+
for (int t = 0; t < nthreadsm1; ++t) {
|
392
|
+
pool.emplace_back(ndim, embedding, setup, a, b, gamma);
|
393
|
+
pool.back().start();
|
394
|
+
}
|
395
|
+
|
396
|
+
std::vector<int> jobs_in_progress;
|
233
397
|
|
234
398
|
for (; n < limit_epochs; ++n) {
|
235
399
|
const Float epoch = n;
|
236
400
|
const Float alpha = initial_alpha * (1.0 - epoch / num_epochs);
|
237
401
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
402
|
+
int base_iteration = 0;
|
403
|
+
std::fill(last_touched.begin(), last_touched.end(), -1);
|
404
|
+
|
405
|
+
size_t i = 0;
|
406
|
+
while (i < num_obs) {
|
407
|
+
bool is_clear = true;
|
408
|
+
// if (PRINT) { std::cout << "size is " << jobs_in_progress.size() << std::endl; }
|
409
|
+
|
410
|
+
for (int t = jobs_in_progress.size(); t < nthreads && i < num_obs; ++t) {
|
411
|
+
staging.alpha = alpha;
|
412
|
+
staging.observation = i;
|
413
|
+
|
414
|
+
// Tapping the RNG here in the serial section.
|
415
|
+
auto& selections = staging.selections;
|
416
|
+
selections.clear();
|
417
|
+
auto& skips = staging.skips;
|
418
|
+
skips.clear();
|
419
|
+
|
420
|
+
const int self_iteration = i;
|
421
|
+
constexpr unsigned char READONLY = 0;
|
422
|
+
constexpr unsigned char WRITE = 1;
|
423
|
+
|
424
|
+
{
|
425
|
+
auto& touched = last_touched[i];
|
426
|
+
auto& ttype = touch_type[i];
|
427
|
+
// if (PRINT) { std::cout << "SELF: " << i << ": " << touched << " (" << ttype << ")" << std::endl; }
|
428
|
+
if (touched >= base_iteration) {
|
429
|
+
is_clear = false;
|
430
|
+
// if (PRINT) { std::cout << "=== FAILED! ===" << std::endl; }
|
431
|
+
}
|
432
|
+
touched = self_iteration;
|
433
|
+
ttype = WRITE;
|
434
|
+
}
|
242
435
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
436
|
+
const size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
|
437
|
+
for (size_t j = start; j < end; ++j) {
|
438
|
+
bool skip = setup.epoch_of_next_sample[j] > epoch;
|
439
|
+
skips.push_back(skip);
|
440
|
+
if (skip) {
|
441
|
+
continue;
|
442
|
+
}
|
248
443
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
444
|
+
{
|
445
|
+
auto neighbor = setup.tail[j];
|
446
|
+
auto& touched = last_touched[neighbor];
|
447
|
+
auto& ttype = touch_type[neighbor];
|
448
|
+
// if (PRINT) { std::cout << "\tNEIGHBOR: " << neighbor << ": " << touched << " (" << ttype << ")" << std::endl; }
|
449
|
+
if (touched >= base_iteration) {
|
450
|
+
if (touched != self_iteration) {
|
451
|
+
is_clear = false;
|
452
|
+
// if (PRINT) { std::cout << "=== FAILED! ===" << std::endl; }
|
453
|
+
}
|
454
|
+
}
|
455
|
+
touched = self_iteration;
|
456
|
+
ttype = WRITE;
|
457
|
+
}
|
458
|
+
|
459
|
+
const size_t num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) *
|
460
|
+
setup.negative_sample_rate / setup.epochs_per_sample[j];
|
461
|
+
|
462
|
+
for (size_t p = 0; p < num_neg_samples; ++p) {
|
463
|
+
size_t sampled = aarand::discrete_uniform(rng, num_obs);
|
464
|
+
if (sampled == i) {
|
465
|
+
continue;
|
466
|
+
}
|
467
|
+
selections.push_back(sampled);
|
468
|
+
|
469
|
+
auto& touched = last_touched[sampled];
|
470
|
+
auto& ttype = touch_type[sampled];
|
471
|
+
// if (PRINT) { std::cout << "\t\tSAMPLED: " << sampled << ": " << touched << " (" << ttype << ")" << std::endl; }
|
472
|
+
if (touched >= base_iteration) {
|
473
|
+
if (touched != self_iteration) {
|
474
|
+
if (ttype == WRITE) {
|
475
|
+
is_clear = false;
|
476
|
+
// if (PRINT) { std::cout << "=== FAILED! ===" << std::endl; }
|
477
|
+
}
|
478
|
+
}
|
479
|
+
} else {
|
480
|
+
// Only updating if it wasn't touched by a previous thread in this
|
481
|
+
// round of thread iterations.
|
482
|
+
ttype = READONLY;
|
483
|
+
touched = self_iteration;
|
484
|
+
}
|
485
|
+
}
|
486
|
+
|
487
|
+
selections.push_back(-1);
|
260
488
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
optimize_sample<true>(i, ndim, reference, buffer.data(), setup, a, b, gamma, alpha, rng, epoch);
|
265
|
-
std::copy(buffer.begin(), buffer.end(), output + shift);
|
489
|
+
setup.epoch_of_next_sample[j] += setup.epochs_per_sample[j];
|
490
|
+
setup.epoch_of_next_negative_sample[j] = epoch;
|
491
|
+
}
|
266
492
|
|
267
|
-
|
493
|
+
if (!is_clear) {
|
494
|
+
// As we only updated the access for 'sampled' to READONLY
|
495
|
+
// if they weren't touched by another thread, we need to go
|
496
|
+
// through and manually update them now that the next round
|
497
|
+
// of thread_iterations will use 'self_iteration' as the
|
498
|
+
// 'base_iteration'. This ensures that the flags are properly
|
499
|
+
// set for the next round, under the expectation that the
|
500
|
+
// pending thread becomes the first thread.
|
501
|
+
for (auto s : selections) {
|
502
|
+
if (s != -1) {
|
503
|
+
auto& touched = last_touched[s];
|
504
|
+
if (touched != self_iteration) {
|
505
|
+
touched = self_iteration;
|
506
|
+
touch_type[s] = READONLY;
|
507
|
+
}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
break;
|
511
|
+
}
|
512
|
+
|
513
|
+
// Submitting if it's not the final job, otherwise just running it directly.
|
514
|
+
// This avoids a busy-wait on the main thread that uses up an extra CPU.
|
515
|
+
if (t < nthreadsm1) {
|
516
|
+
const int thread_index = i % nthreadsm1;
|
517
|
+
pool[thread_index].migrate_parameters(staging);
|
518
|
+
pool[thread_index].run();
|
519
|
+
jobs_in_progress.push_back(thread_index);
|
520
|
+
} else {
|
521
|
+
staging.run_direct();
|
522
|
+
staging.transfer_coordinates();
|
523
|
+
}
|
524
|
+
|
525
|
+
++i;
|
268
526
|
}
|
269
|
-
|
270
|
-
|
527
|
+
|
528
|
+
// Waiting for all the jobs that were submitted.
|
529
|
+
for (auto job : jobs_in_progress) {
|
530
|
+
pool[job].wait();
|
531
|
+
pool[job].transfer_coordinates();
|
271
532
|
}
|
272
|
-
|
273
|
-
#endif
|
274
|
-
}
|
533
|
+
jobs_in_progress.clear();
|
275
534
|
|
276
|
-
|
277
|
-
|
535
|
+
// if (PRINT) { std::cout << "###################### OK ##########################" << std::endl; }
|
536
|
+
|
537
|
+
base_iteration = i;
|
538
|
+
if (!is_clear) {
|
539
|
+
const int thread_index = i % nthreadsm1;
|
540
|
+
pool[thread_index].migrate_parameters(staging);
|
541
|
+
pool[thread_index].run();
|
542
|
+
jobs_in_progress.push_back(thread_index);
|
543
|
+
++i;
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
for (auto job : jobs_in_progress) {
|
548
|
+
pool[job].wait();
|
549
|
+
pool[job].transfer_coordinates();
|
550
|
+
}
|
551
|
+
jobs_in_progress.clear();
|
278
552
|
}
|
279
553
|
|
280
554
|
return;
|
555
|
+
#else
|
556
|
+
throw std::runtime_error("umappp was not compiled with support for parallel optimization");
|
557
|
+
#endif
|
281
558
|
}
|
282
559
|
|
283
560
|
}
|
@@ -101,7 +101,10 @@ bool normalized_laplacian(const NeighborList<Float>& edges, int ndim, Float* Y,
|
|
101
101
|
* correspond to the smallest 'ndim + 1' eigenvalues from the original
|
102
102
|
* matrix. This is obvious when we realize that the eigenvectors of A are
|
103
103
|
* the same as the eigenvectors of (xI - A), but the order of eigenvalues
|
104
|
-
* is reversed because of the negation.
|
104
|
+
* is reversed because of the negation. Then, out of the 'ndim + 1' largest
|
105
|
+
* eigenvalues, we remove the largest one, because that corresponds to the
|
106
|
+
* smallest eigenvalue of zero in the original matrix - leaving us with
|
107
|
+
* eigenvectors for the 'ndim' smallest positive eigenvalues.
|
105
108
|
*
|
106
109
|
* Initially motivated by comments at yixuan/spectra#126 but I misread the
|
107
110
|
* equations so this approach (while correct) is not what is described in
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: umappp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -443,7 +443,7 @@ homepage: https://github.com/kojix2/ruby-umappp
|
|
443
443
|
licenses:
|
444
444
|
- BSD-2-Clause
|
445
445
|
metadata: {}
|
446
|
-
post_install_message:
|
446
|
+
post_install_message:
|
447
447
|
rdoc_options: []
|
448
448
|
require_paths:
|
449
449
|
- lib
|
@@ -458,8 +458,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
458
458
|
- !ruby/object:Gem::Version
|
459
459
|
version: '0'
|
460
460
|
requirements: []
|
461
|
-
rubygems_version: 3.
|
462
|
-
signing_key:
|
461
|
+
rubygems_version: 3.5.4
|
462
|
+
signing_key:
|
463
463
|
specification_version: 4
|
464
464
|
summary: Umap for Ruby
|
465
465
|
test_files: []
|