umappp 0.1.6 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -16
- data/ext/umappp/numo.hpp +957 -833
- data/ext/umappp/umappp.cpp +39 -45
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +7 -10
@@ -5,6 +5,10 @@
|
|
5
5
|
#include <limits>
|
6
6
|
#include <algorithm>
|
7
7
|
#include <cmath>
|
8
|
+
#ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
|
9
|
+
#include <thread>
|
10
|
+
#include <atomic>
|
11
|
+
#endif
|
8
12
|
|
9
13
|
#include "NeighborList.hpp"
|
10
14
|
#include "aarand/aarand.hpp"
|
@@ -85,94 +89,9 @@ Float clamp(Float input) {
|
|
85
89
|
return std::min(std::max(input, min_gradient), max_gradient);
|
86
90
|
}
|
87
91
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
int ndim,
|
92
|
-
Float* embedding,
|
93
|
-
Float* buffer,
|
94
|
-
Setup& setup,
|
95
|
-
Float a,
|
96
|
-
Float b,
|
97
|
-
Float gamma,
|
98
|
-
Float alpha,
|
99
|
-
Rng& rng,
|
100
|
-
Float epoch
|
101
|
-
) {
|
102
|
-
const auto& head = setup.head;
|
103
|
-
const auto& tail = setup.tail;
|
104
|
-
const auto& epochs_per_sample = setup.epochs_per_sample;
|
105
|
-
auto& epoch_of_next_sample = setup.epoch_of_next_sample;
|
106
|
-
auto& epoch_of_next_negative_sample = setup.epoch_of_next_negative_sample;
|
107
|
-
|
108
|
-
const size_t num_obs = head.size();
|
109
|
-
const Float negative_sample_rate = setup.negative_sample_rate;
|
110
|
-
|
111
|
-
size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
|
112
|
-
Float* left = embedding + i * ndim;
|
113
|
-
|
114
|
-
for (size_t j = start; j < end; ++j) {
|
115
|
-
if (epoch_of_next_sample[j] > epoch) {
|
116
|
-
continue;
|
117
|
-
}
|
118
|
-
|
119
|
-
Float* right = embedding + tail[j] * ndim;
|
120
|
-
Float dist2 = quick_squared_distance(left, right, ndim);
|
121
|
-
const Float pd2b = std::pow(dist2, b);
|
122
|
-
const Float grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
|
123
|
-
{
|
124
|
-
Float* lcopy = left;
|
125
|
-
Float* rcopy = right;
|
126
|
-
|
127
|
-
for (int d = 0; d < ndim; ++d, ++lcopy, ++rcopy) {
|
128
|
-
Float gradient = alpha * clamp(grad_coef * (*lcopy - *rcopy));
|
129
|
-
if constexpr(!batch) {
|
130
|
-
*lcopy += gradient;
|
131
|
-
*rcopy -= gradient;
|
132
|
-
} else {
|
133
|
-
// Doubling as we'll assume symmetry from the same
|
134
|
-
// force applied by the right node. This allows us to
|
135
|
-
// avoid worrying about accounting for modifications to
|
136
|
-
// the right node.
|
137
|
-
buffer[d] += 2 * gradient;
|
138
|
-
}
|
139
|
-
}
|
140
|
-
}
|
141
|
-
|
142
|
-
// Here, we divide by epochs_per_negative_sample, defined as epochs_per_sample[j] / negative_sample_rate.
|
143
|
-
const size_t num_neg_samples = (epoch - epoch_of_next_negative_sample[j]) * negative_sample_rate / epochs_per_sample[j];
|
144
|
-
|
145
|
-
for (size_t p = 0; p < num_neg_samples; ++p) {
|
146
|
-
size_t sampled = aarand::discrete_uniform(rng, num_obs);
|
147
|
-
if (sampled == i) {
|
148
|
-
continue;
|
149
|
-
}
|
150
|
-
|
151
|
-
Float* right = embedding + sampled * ndim;
|
152
|
-
Float dist2 = quick_squared_distance(left, right, ndim);
|
153
|
-
const Float grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
|
154
|
-
{
|
155
|
-
Float* lcopy = left;
|
156
|
-
const Float* rcopy = right;
|
157
|
-
for (int d = 0; d < ndim; ++d, ++lcopy, ++rcopy) {
|
158
|
-
Float gradient = alpha * clamp(grad_coef * (*lcopy - *rcopy));
|
159
|
-
if constexpr(!batch) {
|
160
|
-
*lcopy += gradient;
|
161
|
-
} else {
|
162
|
-
buffer[d] += gradient;
|
163
|
-
}
|
164
|
-
}
|
165
|
-
}
|
166
|
-
}
|
167
|
-
|
168
|
-
epoch_of_next_sample[j] += epochs_per_sample[j];
|
169
|
-
|
170
|
-
// The update to epoch_of_next_negative_sample involves adding
|
171
|
-
// num_neg_samples * epochs_per_negative_sample, which eventually boils
|
172
|
-
// down to setting epoch_of_next_negative_sample to 'n'.
|
173
|
-
epoch_of_next_negative_sample[j] = epoch;
|
174
|
-
}
|
175
|
-
}
|
92
|
+
/*****************************************************
|
93
|
+
***************** Serial code ***********************
|
94
|
+
*****************************************************/
|
176
95
|
|
177
96
|
template<typename Float, class Setup, class Rng>
|
178
97
|
void optimize_layout(
|
@@ -192,20 +111,254 @@ void optimize_layout(
|
|
192
111
|
if (epoch_limit> 0) {
|
193
112
|
limit_epochs = std::min(epoch_limit, num_epochs);
|
194
113
|
}
|
195
|
-
|
114
|
+
|
115
|
+
const size_t num_obs = setup.head.size();
|
196
116
|
for (; n < limit_epochs; ++n) {
|
197
117
|
const Float epoch = n;
|
198
118
|
const Float alpha = initial_alpha * (1.0 - epoch / num_epochs);
|
199
|
-
|
200
|
-
|
119
|
+
|
120
|
+
for (size_t i = 0; i < num_obs; ++i) {
|
121
|
+
size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
|
122
|
+
Float* left = embedding + i * ndim;
|
123
|
+
|
124
|
+
for (size_t j = start; j < end; ++j) {
|
125
|
+
if (setup.epoch_of_next_sample[j] > epoch) {
|
126
|
+
continue;
|
127
|
+
}
|
128
|
+
|
129
|
+
{
|
130
|
+
Float* right = embedding + setup.tail[j] * ndim;
|
131
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
132
|
+
const Float pd2b = std::pow(dist2, b);
|
133
|
+
const Float grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
|
134
|
+
|
135
|
+
Float* lcopy = left;
|
136
|
+
for (int d = 0; d < ndim; ++d, ++lcopy, ++right) {
|
137
|
+
Float gradient = alpha * clamp(grad_coef * (*lcopy - *right));
|
138
|
+
*lcopy += gradient;
|
139
|
+
*right -= gradient;
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
// Remember that 'epochs_per_negative_sample' is defined as 'epochs_per_sample[j] / negative_sample_rate'.
|
144
|
+
// We just use it inline below rather than defining a new variable and suffering floating-point round-off.
|
145
|
+
const size_t num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) *
|
146
|
+
setup.negative_sample_rate / setup.epochs_per_sample[j]; // i.e., 1/epochs_per_negative_sample.
|
147
|
+
|
148
|
+
for (size_t p = 0; p < num_neg_samples; ++p) {
|
149
|
+
size_t sampled = aarand::discrete_uniform(rng, num_obs);
|
150
|
+
if (sampled == i) {
|
151
|
+
continue;
|
152
|
+
}
|
153
|
+
|
154
|
+
const Float* right = embedding + sampled * ndim;
|
155
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
156
|
+
const Float grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
|
157
|
+
|
158
|
+
Float* lcopy = left;
|
159
|
+
for (int d = 0; d < ndim; ++d, ++lcopy, ++right) {
|
160
|
+
*lcopy += alpha * clamp(grad_coef * (*lcopy - *right));
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
setup.epoch_of_next_sample[j] += setup.epochs_per_sample[j];
|
165
|
+
|
166
|
+
// The update to 'epoch_of_next_negative_sample' involves adding
|
167
|
+
// 'num_neg_samples * epochs_per_negative_sample', which eventually boils
|
168
|
+
// down to setting epoch_of_next_negative_sample to 'epoch'.
|
169
|
+
setup.epoch_of_next_negative_sample[j] = epoch;
|
170
|
+
}
|
201
171
|
}
|
202
172
|
}
|
203
173
|
|
204
174
|
return;
|
205
175
|
}
|
206
176
|
|
207
|
-
|
208
|
-
|
177
|
+
/*****************************************************
|
178
|
+
**************** Parallel code **********************
|
179
|
+
*****************************************************/
|
180
|
+
|
181
|
+
#ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
|
182
|
+
template<class Float, class Setup>
|
183
|
+
struct BusyWaiterThread {
|
184
|
+
public:
|
185
|
+
std::vector<size_t> selections;
|
186
|
+
std::vector<unsigned char> skips;
|
187
|
+
size_t observation;
|
188
|
+
Float alpha;
|
189
|
+
|
190
|
+
private:
|
191
|
+
int ndim;
|
192
|
+
Float* embedding;
|
193
|
+
const Setup* setup;
|
194
|
+
Float a;
|
195
|
+
Float b;
|
196
|
+
Float gamma;
|
197
|
+
|
198
|
+
std::vector<Float> self_modified;
|
199
|
+
|
200
|
+
private:
|
201
|
+
std::thread pool;
|
202
|
+
std::atomic<bool> ready = false;
|
203
|
+
bool finished = false;
|
204
|
+
bool active = false;
|
205
|
+
|
206
|
+
public:
|
207
|
+
void run() {
|
208
|
+
ready.store(true, std::memory_order_release);
|
209
|
+
}
|
210
|
+
|
211
|
+
void wait() {
|
212
|
+
while (ready.load(std::memory_order_acquire)) {
|
213
|
+
;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
void migrate_parameters(BusyWaiterThread& src) {
|
218
|
+
selections.swap(src.selections);
|
219
|
+
skips.swap(src.skips);
|
220
|
+
alpha = src.alpha;
|
221
|
+
observation = src.observation;
|
222
|
+
}
|
223
|
+
|
224
|
+
void transfer_coordinates() {
|
225
|
+
std::copy(self_modified.begin(), self_modified.end(), embedding + observation * ndim);
|
226
|
+
}
|
227
|
+
|
228
|
+
public:
|
229
|
+
void run_direct() {
|
230
|
+
auto seIt = selections.begin();
|
231
|
+
auto skIt = skips.begin();
|
232
|
+
const size_t i = observation;
|
233
|
+
const size_t start = (i == 0 ? 0 : setup->head[i-1]), end = setup->head[i];
|
234
|
+
|
235
|
+
// Copying it over into a thread-local buffer to avoid false sharing.
|
236
|
+
// We don't bother doing this for the neighbors, though, as it's
|
237
|
+
// tedious to make sure that the modified values are available during negative sampling.
|
238
|
+
// (This isn't a problem for the self, as the self cannot be its own negative sample.)
|
239
|
+
{
|
240
|
+
const Float* left = embedding + i * ndim;
|
241
|
+
std::copy(left, left + ndim, self_modified.data());
|
242
|
+
}
|
243
|
+
|
244
|
+
for (size_t j = start; j < end; ++j) {
|
245
|
+
if (*(skIt++)) {
|
246
|
+
continue;
|
247
|
+
}
|
248
|
+
|
249
|
+
{
|
250
|
+
Float* left = self_modified.data();
|
251
|
+
Float* right = embedding + setup->tail[j] * ndim;
|
252
|
+
|
253
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
254
|
+
const Float pd2b = std::pow(dist2, b);
|
255
|
+
const Float grad_coef = (-2 * a * b * pd2b) / (dist2 * (a * pd2b + 1.0));
|
256
|
+
|
257
|
+
for (int d = 0; d < ndim; ++d, ++left, ++right) {
|
258
|
+
Float gradient = alpha * clamp(grad_coef * (*left - *right));
|
259
|
+
*left += gradient;
|
260
|
+
*right -= gradient;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
while (seIt != selections.end() && *seIt != -1) {
|
265
|
+
Float* left = self_modified.data();
|
266
|
+
const Float* right = embedding + (*seIt) * ndim;
|
267
|
+
|
268
|
+
Float dist2 = quick_squared_distance(left, right, ndim);
|
269
|
+
const Float grad_coef = 2 * gamma * b / ((0.001 + dist2) * (a * std::pow(dist2, b) + 1.0));
|
270
|
+
|
271
|
+
for (int d = 0; d < ndim; ++d, ++left, ++right) {
|
272
|
+
*left += alpha * clamp(grad_coef * (*left - *right));
|
273
|
+
}
|
274
|
+
++seIt;
|
275
|
+
}
|
276
|
+
++seIt; // get past the -1.
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
private:
|
281
|
+
void loop() {
|
282
|
+
while (true) {
|
283
|
+
while (!ready.load(std::memory_order_acquire)) {
|
284
|
+
;
|
285
|
+
}
|
286
|
+
if (finished) {
|
287
|
+
break;
|
288
|
+
}
|
289
|
+
run_direct();
|
290
|
+
ready.store(false, std::memory_order_release);
|
291
|
+
}
|
292
|
+
}
|
293
|
+
|
294
|
+
public:
|
295
|
+
BusyWaiterThread() {}
|
296
|
+
|
297
|
+
BusyWaiterThread(int ndim_, Float* embedding_, Setup& setup_, Float a_, Float b_, Float gamma_) :
|
298
|
+
ndim(ndim_),
|
299
|
+
embedding(embedding_),
|
300
|
+
setup(&setup_),
|
301
|
+
a(a_),
|
302
|
+
b(b_),
|
303
|
+
gamma(gamma_),
|
304
|
+
self_modified(ndim)
|
305
|
+
{}
|
306
|
+
|
307
|
+
void start() {
|
308
|
+
active = true;
|
309
|
+
pool = std::thread(&BusyWaiterThread::loop, this);
|
310
|
+
}
|
311
|
+
|
312
|
+
public:
|
313
|
+
~BusyWaiterThread() {
|
314
|
+
if (active) {
|
315
|
+
finished = true;
|
316
|
+
ready.store(true, std::memory_order_release);
|
317
|
+
pool.join();
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
BusyWaiterThread(BusyWaiterThread&&) = default;
|
322
|
+
BusyWaiterThread& operator=(BusyWaiterThread&&) = default;
|
323
|
+
|
324
|
+
BusyWaiterThread(const BusyWaiterThread& src) :
|
325
|
+
selections(src.selections),
|
326
|
+
skips(src.skips),
|
327
|
+
observation(src.observation),
|
328
|
+
|
329
|
+
ndim(src.ndim),
|
330
|
+
embedding(src.embedding),
|
331
|
+
setup(src.setup),
|
332
|
+
a(src.a),
|
333
|
+
b(src.b),
|
334
|
+
gamma(src.gamma),
|
335
|
+
alpha(src.alpha),
|
336
|
+
|
337
|
+
self_modified(src.self_modified)
|
338
|
+
{}
|
339
|
+
|
340
|
+
BusyWaiterThread& operator=(const BusyWaiterThread& src) {
|
341
|
+
selections = src.selections;
|
342
|
+
skips = src.skips;
|
343
|
+
observation = src.observation;
|
344
|
+
|
345
|
+
ndim = src.ndim;
|
346
|
+
embedding = src.embedding;
|
347
|
+
setup = src.setup;
|
348
|
+
a = src.a;
|
349
|
+
b = src.b;
|
350
|
+
gamma = src.gamma;
|
351
|
+
alpha = src.alpha;
|
352
|
+
|
353
|
+
self_modified = src.self_modified;
|
354
|
+
}
|
355
|
+
};
|
356
|
+
#endif
|
357
|
+
|
358
|
+
//#define PRINT false
|
359
|
+
|
360
|
+
template<typename Float, class Setup, class Rng>
|
361
|
+
void optimize_layout_parallel(
|
209
362
|
int ndim,
|
210
363
|
Float* embedding,
|
211
364
|
Setup& setup,
|
@@ -213,71 +366,195 @@ inline void optimize_layout_batched(
|
|
213
366
|
Float b,
|
214
367
|
Float gamma,
|
215
368
|
Float initial_alpha,
|
216
|
-
|
217
|
-
EngineFunction creator,
|
369
|
+
Rng& rng,
|
218
370
|
int epoch_limit,
|
219
371
|
int nthreads
|
220
372
|
) {
|
373
|
+
#ifndef UMAPPP_NO_PARALLEL_OPTIMIZATION
|
221
374
|
auto& n = setup.current_epoch;
|
222
375
|
auto num_epochs = setup.total_epochs;
|
223
376
|
auto limit_epochs = num_epochs;
|
224
|
-
if (epoch_limit
|
377
|
+
if (epoch_limit> 0) {
|
225
378
|
limit_epochs = std::min(epoch_limit, num_epochs);
|
226
379
|
}
|
227
380
|
|
228
381
|
const size_t num_obs = setup.head.size();
|
229
|
-
std::vector<
|
230
|
-
std::vector<
|
231
|
-
|
232
|
-
|
382
|
+
std::vector<int> last_touched(num_obs);
|
383
|
+
std::vector<unsigned char> touch_type(num_obs);
|
384
|
+
|
385
|
+
// We run some things directly in this main thread to avoid excessive busy-waiting.
|
386
|
+
BusyWaiterThread<Float, Setup> staging(ndim, embedding, setup, a, b, gamma);
|
387
|
+
|
388
|
+
int nthreadsm1 = nthreads - 1;
|
389
|
+
std::vector<BusyWaiterThread<Float, Setup> > pool;
|
390
|
+
pool.reserve(nthreadsm1);
|
391
|
+
for (int t = 0; t < nthreadsm1; ++t) {
|
392
|
+
pool.emplace_back(ndim, embedding, setup, a, b, gamma);
|
393
|
+
pool.back().start();
|
394
|
+
}
|
395
|
+
|
396
|
+
std::vector<int> jobs_in_progress;
|
233
397
|
|
234
398
|
for (; n < limit_epochs; ++n) {
|
235
399
|
const Float epoch = n;
|
236
400
|
const Float alpha = initial_alpha * (1.0 - epoch / num_epochs);
|
237
401
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
402
|
+
int base_iteration = 0;
|
403
|
+
std::fill(last_touched.begin(), last_touched.end(), -1);
|
404
|
+
|
405
|
+
size_t i = 0;
|
406
|
+
while (i < num_obs) {
|
407
|
+
bool is_clear = true;
|
408
|
+
// if (PRINT) { std::cout << "size is " << jobs_in_progress.size() << std::endl; }
|
409
|
+
|
410
|
+
for (int t = jobs_in_progress.size(); t < nthreads && i < num_obs; ++t) {
|
411
|
+
staging.alpha = alpha;
|
412
|
+
staging.observation = i;
|
413
|
+
|
414
|
+
// Tapping the RNG here in the serial section.
|
415
|
+
auto& selections = staging.selections;
|
416
|
+
selections.clear();
|
417
|
+
auto& skips = staging.skips;
|
418
|
+
skips.clear();
|
419
|
+
|
420
|
+
const int self_iteration = i;
|
421
|
+
constexpr unsigned char READONLY = 0;
|
422
|
+
constexpr unsigned char WRITE = 1;
|
423
|
+
|
424
|
+
{
|
425
|
+
auto& touched = last_touched[i];
|
426
|
+
auto& ttype = touch_type[i];
|
427
|
+
// if (PRINT) { std::cout << "SELF: " << i << ": " << touched << " (" << ttype << ")" << std::endl; }
|
428
|
+
if (touched >= base_iteration) {
|
429
|
+
is_clear = false;
|
430
|
+
// if (PRINT) { std::cout << "=== FAILED! ===" << std::endl; }
|
431
|
+
}
|
432
|
+
touched = self_iteration;
|
433
|
+
ttype = WRITE;
|
434
|
+
}
|
242
435
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
436
|
+
const size_t start = (i == 0 ? 0 : setup.head[i-1]), end = setup.head[i];
|
437
|
+
for (size_t j = start; j < end; ++j) {
|
438
|
+
bool skip = setup.epoch_of_next_sample[j] > epoch;
|
439
|
+
skips.push_back(skip);
|
440
|
+
if (skip) {
|
441
|
+
continue;
|
442
|
+
}
|
248
443
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
444
|
+
{
|
445
|
+
auto neighbor = setup.tail[j];
|
446
|
+
auto& touched = last_touched[neighbor];
|
447
|
+
auto& ttype = touch_type[neighbor];
|
448
|
+
// if (PRINT) { std::cout << "\tNEIGHBOR: " << neighbor << ": " << touched << " (" << ttype << ")" << std::endl; }
|
449
|
+
if (touched >= base_iteration) {
|
450
|
+
if (touched != self_iteration) {
|
451
|
+
is_clear = false;
|
452
|
+
// if (PRINT) { std::cout << "=== FAILED! ===" << std::endl; }
|
453
|
+
}
|
454
|
+
}
|
455
|
+
touched = self_iteration;
|
456
|
+
ttype = WRITE;
|
457
|
+
}
|
458
|
+
|
459
|
+
const size_t num_neg_samples = (epoch - setup.epoch_of_next_negative_sample[j]) *
|
460
|
+
setup.negative_sample_rate / setup.epochs_per_sample[j];
|
461
|
+
|
462
|
+
for (size_t p = 0; p < num_neg_samples; ++p) {
|
463
|
+
size_t sampled = aarand::discrete_uniform(rng, num_obs);
|
464
|
+
if (sampled == i) {
|
465
|
+
continue;
|
466
|
+
}
|
467
|
+
selections.push_back(sampled);
|
468
|
+
|
469
|
+
auto& touched = last_touched[sampled];
|
470
|
+
auto& ttype = touch_type[sampled];
|
471
|
+
// if (PRINT) { std::cout << "\t\tSAMPLED: " << sampled << ": " << touched << " (" << ttype << ")" << std::endl; }
|
472
|
+
if (touched >= base_iteration) {
|
473
|
+
if (touched != self_iteration) {
|
474
|
+
if (ttype == WRITE) {
|
475
|
+
is_clear = false;
|
476
|
+
// if (PRINT) { std::cout << "=== FAILED! ===" << std::endl; }
|
477
|
+
}
|
478
|
+
}
|
479
|
+
} else {
|
480
|
+
// Only updating if it wasn't touched by a previous thread in this
|
481
|
+
// round of thread iterations.
|
482
|
+
ttype = READONLY;
|
483
|
+
touched = self_iteration;
|
484
|
+
}
|
485
|
+
}
|
486
|
+
|
487
|
+
selections.push_back(-1);
|
260
488
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
optimize_sample<true>(i, ndim, reference, buffer.data(), setup, a, b, gamma, alpha, rng, epoch);
|
265
|
-
std::copy(buffer.begin(), buffer.end(), output + shift);
|
489
|
+
setup.epoch_of_next_sample[j] += setup.epochs_per_sample[j];
|
490
|
+
setup.epoch_of_next_negative_sample[j] = epoch;
|
491
|
+
}
|
266
492
|
|
267
|
-
|
493
|
+
if (!is_clear) {
|
494
|
+
// As we only updated the access for 'sampled' to READONLY
|
495
|
+
// if they weren't touched by another thread, we need to go
|
496
|
+
// through and manually update them now that the next round
|
497
|
+
// of thread_iterations will use 'self_iteration' as the
|
498
|
+
// 'base_iteration'. This ensures that the flags are properly
|
499
|
+
// set for the next round, under the expectation that the
|
500
|
+
// pending thread becomes the first thread.
|
501
|
+
for (auto s : selections) {
|
502
|
+
if (s != -1) {
|
503
|
+
auto& touched = last_touched[s];
|
504
|
+
if (touched != self_iteration) {
|
505
|
+
touched = self_iteration;
|
506
|
+
touch_type[s] = READONLY;
|
507
|
+
}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
break;
|
511
|
+
}
|
512
|
+
|
513
|
+
// Submitting if it's not the final job, otherwise just running it directly.
|
514
|
+
// This avoids a busy-wait on the main thread that uses up an extra CPU.
|
515
|
+
if (t < nthreadsm1) {
|
516
|
+
const int thread_index = i % nthreadsm1;
|
517
|
+
pool[thread_index].migrate_parameters(staging);
|
518
|
+
pool[thread_index].run();
|
519
|
+
jobs_in_progress.push_back(thread_index);
|
520
|
+
} else {
|
521
|
+
staging.run_direct();
|
522
|
+
staging.transfer_coordinates();
|
523
|
+
}
|
524
|
+
|
525
|
+
++i;
|
268
526
|
}
|
269
|
-
|
270
|
-
|
527
|
+
|
528
|
+
// Waiting for all the jobs that were submitted.
|
529
|
+
for (auto job : jobs_in_progress) {
|
530
|
+
pool[job].wait();
|
531
|
+
pool[job].transfer_coordinates();
|
271
532
|
}
|
272
|
-
|
273
|
-
#endif
|
274
|
-
}
|
533
|
+
jobs_in_progress.clear();
|
275
534
|
|
276
|
-
|
277
|
-
|
535
|
+
// if (PRINT) { std::cout << "###################### OK ##########################" << std::endl; }
|
536
|
+
|
537
|
+
base_iteration = i;
|
538
|
+
if (!is_clear) {
|
539
|
+
const int thread_index = i % nthreadsm1;
|
540
|
+
pool[thread_index].migrate_parameters(staging);
|
541
|
+
pool[thread_index].run();
|
542
|
+
jobs_in_progress.push_back(thread_index);
|
543
|
+
++i;
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
for (auto job : jobs_in_progress) {
|
548
|
+
pool[job].wait();
|
549
|
+
pool[job].transfer_coordinates();
|
550
|
+
}
|
551
|
+
jobs_in_progress.clear();
|
278
552
|
}
|
279
553
|
|
280
554
|
return;
|
555
|
+
#else
|
556
|
+
throw std::runtime_error("umappp was not compiled with support for parallel optimization");
|
557
|
+
#endif
|
281
558
|
}
|
282
559
|
|
283
560
|
}
|
@@ -101,7 +101,10 @@ bool normalized_laplacian(const NeighborList<Float>& edges, int ndim, Float* Y,
|
|
101
101
|
* correspond to the smallest 'ndim + 1' eigenvalues from the original
|
102
102
|
* matrix. This is obvious when we realize that the eigenvectors of A are
|
103
103
|
* the same as the eigenvectors of (xI - A), but the order of eigenvalues
|
104
|
-
* is reversed because of the negation.
|
104
|
+
* is reversed because of the negation. Then, out of the 'ndim + 1' largest
|
105
|
+
* eigenvalues, we remove the largest one, because that corresponds to the
|
106
|
+
* smallest eigenvalue of zero in the original matrix - leaving us with
|
107
|
+
* eigenvectors for the 'ndim' smallest positive eigenvalues.
|
105
108
|
*
|
106
109
|
* Initially motivated by comments at yixuan/spectra#126 but I misread the
|
107
110
|
* equations so this approach (while correct) is not what is described in
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: umappp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: numo-narray
|
@@ -28,16 +27,16 @@ dependencies:
|
|
28
27
|
name: rice
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
|
-
- - "
|
30
|
+
- - ">="
|
32
31
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
32
|
+
version: 4.5.0
|
34
33
|
type: :runtime
|
35
34
|
prerelease: false
|
36
35
|
version_requirements: !ruby/object:Gem::Requirement
|
37
36
|
requirements:
|
38
|
-
- - "
|
37
|
+
- - ">="
|
39
38
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
39
|
+
version: 4.5.0
|
41
40
|
description: Umappp wrapper for Ruby
|
42
41
|
email:
|
43
42
|
- 2xijok@gmail.com
|
@@ -443,7 +442,6 @@ homepage: https://github.com/kojix2/ruby-umappp
|
|
443
442
|
licenses:
|
444
443
|
- BSD-2-Clause
|
445
444
|
metadata: {}
|
446
|
-
post_install_message:
|
447
445
|
rdoc_options: []
|
448
446
|
require_paths:
|
449
447
|
- lib
|
@@ -458,8 +456,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
458
456
|
- !ruby/object:Gem::Version
|
459
457
|
version: '0'
|
460
458
|
requirements: []
|
461
|
-
rubygems_version: 3.
|
462
|
-
signing_key:
|
459
|
+
rubygems_version: 3.6.7
|
463
460
|
specification_version: 4
|
464
461
|
summary: Umap for Ruby
|
465
462
|
test_files: []
|