umappp 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -3
- data/ext/umappp/umappp.cpp +39 -45
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e329dda5fe3d577f175b6c55059f165c4e9569ed14208785cd0a9184b5d14df
|
4
|
+
data.tar.gz: 3e0a7ed9a3a7a08109019adef9dc5c1f9a88c82e77d28307875a719c4bb0551e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1838cffb49dcac3e8429d7d112bff2dca7d6a72608d1ca19889533c1d394d332da24457cade7179845901712176d3ae9af626ce372e1f6e444ab490203180b65
|
7
|
+
data.tar.gz: bff7628b13e053fe337d9cf3b0ba37e68012440b943147303d0208c16491f82f45c7069e83e48084999f24b0b4880a0c510cac551059ff45d7302f4167428c28
|
data/README.md
CHANGED
@@ -41,7 +41,6 @@ Available parameters and their default values
|
|
41
41
|
|----------------------|------------------------------------|
|
42
42
|
| method | :annoy (another option is :vptree) |
|
43
43
|
| ndim | 2 |
|
44
|
-
| tick | 0 (Not yet implemented) |
|
45
44
|
| local_connectivity | 1.0 |
|
46
45
|
| bandwidth | 1 |
|
47
46
|
| mix_ratio | 1 |
|
@@ -50,12 +49,12 @@ Available parameters and their default values
|
|
50
49
|
| a | 0 |
|
51
50
|
| b | 0 |
|
52
51
|
| repulsion_strength | 1 |
|
52
|
+
| initialize | Umappp::InitMethod::SPECTRAL |
|
53
53
|
| num_epochs | 500 |
|
54
54
|
| learning_rate | 1 |
|
55
55
|
| negative_sample_rate | 5 |
|
56
56
|
| num_neighbors | 15 |
|
57
57
|
| seed | 1234567890 |
|
58
|
-
| batch | false |
|
59
58
|
| num_threads | 1 (OpenMP required) |
|
60
59
|
|
61
60
|
## Development
|
@@ -63,10 +62,19 @@ Available parameters and their default values
|
|
63
62
|
```
|
64
63
|
git clone https://github.com/kojix2/ruby-umappp
|
65
64
|
cd umap
|
66
|
-
|
65
|
+
bundle exec rake compile
|
67
66
|
bundle exec rake test
|
68
67
|
```
|
69
68
|
|
69
|
+
Update LTLA/umappp
|
70
|
+
|
71
|
+
Requires cmake to run
|
72
|
+
|
73
|
+
```
|
74
|
+
cd script
|
75
|
+
./vendor.sh
|
76
|
+
```
|
77
|
+
|
70
78
|
### Ruby dependencies
|
71
79
|
|
72
80
|
* [rice](https://github.com/jasonroelofs/rice) - Ruby Interface for C++ Extensions
|
data/ext/umappp/umappp.cpp
CHANGED
@@ -9,10 +9,6 @@
|
|
9
9
|
typedef float Float;
|
10
10
|
typedef typename umappp::Umap<Float> Umap;
|
11
11
|
|
12
|
-
#ifdef _OPENMP
|
13
|
-
#include <omp.h>
|
14
|
-
#endif
|
15
|
-
|
16
12
|
using namespace Rice;
|
17
13
|
|
18
14
|
// This function is used to view default parameters from Ruby.
|
@@ -28,13 +24,14 @@ Hash umappp_default_parameters(Object self)
|
|
28
24
|
d[Symbol("a")] = Umap::Defaults::a;
|
29
25
|
d[Symbol("b")] = Umap::Defaults::b;
|
30
26
|
d[Symbol("repulsion_strength")] = Umap::Defaults::repulsion_strength;
|
27
|
+
d[Symbol("initialize")] = Umap::Defaults::initialize;
|
31
28
|
d[Symbol("num_epochs")] = Umap::Defaults::num_epochs;
|
32
29
|
d[Symbol("learning_rate")] = Umap::Defaults::learning_rate;
|
33
30
|
d[Symbol("negative_sample_rate")] = Umap::Defaults::negative_sample_rate;
|
34
31
|
d[Symbol("num_neighbors")] = Umap::Defaults::num_neighbors;
|
35
32
|
d[Symbol("seed")] = Umap::Defaults::seed;
|
36
|
-
d[Symbol("batch")] = Umap::Defaults::batch;
|
37
33
|
d[Symbol("num_threads")] = Umap::Defaults::num_threads;
|
34
|
+
d[Symbol("parallel_optimization")] = Umap::Defaults::parallel_optimization;
|
38
35
|
|
39
36
|
return d;
|
40
37
|
}
|
@@ -46,13 +43,16 @@ Object umappp_run(
|
|
46
43
|
Hash params,
|
47
44
|
numo::SFloat data,
|
48
45
|
int ndim,
|
49
|
-
int nn_method
|
50
|
-
int tick = 0)
|
46
|
+
int nn_method)
|
51
47
|
{
|
52
48
|
// Parameters are taken from a Ruby Hash object.
|
53
49
|
// If there is key, set the value.
|
50
|
+
if (ndim < 1)
|
51
|
+
{
|
52
|
+
throw std::runtime_error("ndim is less than 1");
|
53
|
+
}
|
54
54
|
|
55
|
-
|
55
|
+
std::unique_ptr<Umap> umap_ptr(new Umap);
|
56
56
|
|
57
57
|
double local_connectivity = Umap::Defaults::local_connectivity;
|
58
58
|
if (RTEST(params.call("has_key?", Symbol("local_connectivity"))))
|
@@ -110,6 +110,13 @@ Object umappp_run(
|
|
110
110
|
umap_ptr->set_repulsion_strength(repulsion_strength);
|
111
111
|
}
|
112
112
|
|
113
|
+
umappp::InitMethod initialize = Umap::Defaults::initialize;
|
114
|
+
if (RTEST(params.call("has_key?", Symbol("initialize"))))
|
115
|
+
{
|
116
|
+
initialize = params.get<umappp::InitMethod>(Symbol("initialize"));
|
117
|
+
umap_ptr->set_initialize(initialize);
|
118
|
+
}
|
119
|
+
|
113
120
|
int num_epochs = Umap::Defaults::num_epochs;
|
114
121
|
if (RTEST(params.call("has_key?", Symbol("num_epochs"))))
|
115
122
|
{
|
@@ -145,13 +152,6 @@ Object umappp_run(
|
|
145
152
|
umap_ptr->set_seed(seed);
|
146
153
|
}
|
147
154
|
|
148
|
-
bool batch = Umap::Defaults::batch;
|
149
|
-
if (RTEST(params.call("has_key?", Symbol("batch"))))
|
150
|
-
{
|
151
|
-
batch = params.get<bool>(Symbol("batch"));
|
152
|
-
umap_ptr->set_batch(batch);
|
153
|
-
}
|
154
|
-
|
155
155
|
int num_threads = Umap::Defaults::num_threads;
|
156
156
|
if (RTEST(params.call("has_key?", Symbol("num_threads"))))
|
157
157
|
{
|
@@ -159,6 +159,13 @@ Object umappp_run(
|
|
159
159
|
umap_ptr->set_num_threads(num_threads);
|
160
160
|
}
|
161
161
|
|
162
|
+
bool parallel_optimization = Umap::Defaults::parallel_optimization;
|
163
|
+
if (RTEST(params.call("has_key?", Symbol("parallel_optimization"))))
|
164
|
+
{
|
165
|
+
parallel_optimization = params.get<bool>(Symbol("parallel_optimization"));
|
166
|
+
umap_ptr->set_parallel_optimization(parallel_optimization);
|
167
|
+
}
|
168
|
+
|
162
169
|
// initialize_from_matrix
|
163
170
|
|
164
171
|
const float *y = data.read_ptr();
|
@@ -166,6 +173,10 @@ Object umappp_run(
|
|
166
173
|
|
167
174
|
int nd = shape[1];
|
168
175
|
int nobs = shape[0];
|
176
|
+
if (nobs < 0)
|
177
|
+
{
|
178
|
+
throw std::runtime_error("nobs is negative");
|
179
|
+
}
|
169
180
|
|
170
181
|
std::unique_ptr<knncolle::Base<int, Float>> knncolle_ptr;
|
171
182
|
if (nn_method == 0)
|
@@ -180,39 +191,16 @@ Object umappp_run(
|
|
180
191
|
std::vector<Float> embedding(ndim * nobs);
|
181
192
|
|
182
193
|
auto status = umap_ptr->initialize(knncolle_ptr.get(), ndim, embedding.data());
|
183
|
-
if (nobs < 0 || ndim < 0)
|
184
|
-
{
|
185
|
-
throw std::runtime_error("nobs or ndim is negative");
|
186
|
-
}
|
187
|
-
if (tick == 0)
|
188
|
-
{
|
189
|
-
status.run(ndim, embedding.data(), 0);
|
190
|
-
|
191
|
-
// it is safe to cast to unsigned int
|
192
|
-
auto na = numo::SFloat({(unsigned int)nobs, (unsigned int)ndim});
|
193
|
-
std::copy(embedding.begin(), embedding.end(), na.write_ptr());
|
194
194
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
{
|
199
|
-
VALUE ret = rb_ary_new();
|
200
|
-
|
201
|
-
while (status.epoch() < status.num_epochs())
|
202
|
-
{
|
203
|
-
int epoch_limit = status.epoch() + tick;
|
195
|
+
int epoch_limit = 0;
|
196
|
+
// tick is not implemented yet
|
197
|
+
status.run(epoch_limit);
|
204
198
|
|
205
|
-
|
199
|
+
// it is safe to cast to unsigned int
|
200
|
+
auto na = numo::SFloat({(unsigned int)nobs, (unsigned int)ndim});
|
201
|
+
std::copy(embedding.begin(), embedding.end(), na.write_ptr());
|
206
202
|
|
207
|
-
|
208
|
-
auto na = numo::SFloat({(unsigned int)nobs, (unsigned int)ndim});
|
209
|
-
std::copy(embedding.begin(), embedding.end(), na.write_ptr());
|
210
|
-
|
211
|
-
rb_ary_push(ret, na.value());
|
212
|
-
}
|
213
|
-
|
214
|
-
return ret;
|
215
|
-
}
|
203
|
+
return na;
|
216
204
|
}
|
217
205
|
|
218
206
|
extern "C" void Init_umappp()
|
@@ -221,4 +209,10 @@ extern "C" void Init_umappp()
|
|
221
209
|
define_module("Umappp")
|
222
210
|
.define_singleton_method("umappp_run", &umappp_run)
|
223
211
|
.define_singleton_method("umappp_default_parameters", &umappp_default_parameters);
|
212
|
+
Enum<umappp::InitMethod> init_method =
|
213
|
+
define_enum<umappp::InitMethod>("InitMethod", rb_mUmappp)
|
214
|
+
.define_value("SPECTRAL", umappp::InitMethod::SPECTRAL)
|
215
|
+
.define_value("SPECTRAL_ONLY", umappp::InitMethod::SPECTRAL_ONLY)
|
216
|
+
.define_value("RANDOM", umappp::InitMethod::RANDOM)
|
217
|
+
.define_value("NONE", umappp::InitMethod::NONE);
|
224
218
|
}
|
data/lib/umappp/version.rb
CHANGED
data/lib/umappp.rb
CHANGED
@@ -12,7 +12,7 @@ module Umappp
|
|
12
12
|
|
13
13
|
# View the default parameters defined within the Umappp C++ library structure.
|
14
14
|
def self.default_parameters
|
15
|
-
# {method: :annoy, ndim: 2
|
15
|
+
# {method: :annoy, ndim: 2}.merge
|
16
16
|
umappp_default_parameters
|
17
17
|
end
|
18
18
|
|
@@ -30,16 +30,17 @@ module Umappp
|
|
30
30
|
# @param a [Numeric]
|
31
31
|
# @param b [Numeric]
|
32
32
|
# @param repulsion_strength [Numeric]
|
33
|
+
# @param initilaize [Umappp::InitMethod]
|
33
34
|
# @param num_epochs [Integer]
|
34
35
|
# @param learning_rate [Numeric]
|
35
36
|
# @param negative_sample_rate [Numeric]
|
36
37
|
# @param num_neighbors [Integer]
|
37
38
|
# @param seed [Integer]
|
38
|
-
# @param batch [Boolean]
|
39
39
|
# @param num_threads [Integer]
|
40
|
+
# @param parallel_optimization [Boolean]
|
40
41
|
# @return [Numo::SFloat] the final embedding
|
41
42
|
|
42
|
-
def self.run(embedding, method: :annoy, ndim: 2,
|
43
|
+
def self.run(embedding, method: :annoy, ndim: 2, **params)
|
43
44
|
unless (u = (params.keys - default_parameters.keys)).empty?
|
44
45
|
raise ArgumentError, "[umappp.rb] unknown option : #{u.inspect}"
|
45
46
|
end
|
@@ -50,6 +51,6 @@ module Umappp
|
|
50
51
|
embedding2 = Numo::SFloat.cast(embedding)
|
51
52
|
raise ArgumentError, "embedding must be a 2D array" if embedding2.ndim <= 1
|
52
53
|
|
53
|
-
umappp_run(params, embedding2, ndim, nnmethod
|
54
|
+
umappp_run(params, embedding2, ndim, nnmethod)
|
54
55
|
end
|
55
56
|
end
|
data/vendor/aarand/aarand.hpp
CHANGED
@@ -5,10 +5,17 @@
|
|
5
5
|
#include <limits>
|
6
6
|
#include <stdexcept>
|
7
7
|
|
8
|
+
/**
|
9
|
+
* @file aarand.hpp
|
10
|
+
*
|
11
|
+
* @brief Collection of random distribution functions.
|
12
|
+
*/
|
13
|
+
|
8
14
|
namespace aarand {
|
9
15
|
|
10
16
|
/**
|
11
|
-
* @tparam T Floating point type.
|
17
|
+
* @tparam T Floating point type to return.
|
18
|
+
* This is also used for intermediate calculations, so it is usually safest to provide a type that is at least as precise as a `double`.
|
12
19
|
* @tparam Engine A random number generator class with `operator()`, `min()` (static) and `max()` (static) methods,
|
13
20
|
* where the `result_type` is an unsigned integer value.
|
14
21
|
*
|
@@ -18,22 +25,34 @@ namespace aarand {
|
|
18
25
|
*/
|
19
26
|
template<typename T = double, class Engine>
|
20
27
|
T standard_uniform(Engine& eng) {
|
21
|
-
|
22
|
-
static_assert(std::numeric_limits<
|
28
|
+
typedef typename Engine::result_type R;
|
29
|
+
static_assert(std::numeric_limits<R>::is_integer, "RNG engine must yield integer results");
|
30
|
+
|
31
|
+
// Can't be bothered to figure out whether the range fits into 'R' for signed values.
|
32
|
+
// So instead, we just require unsigned integers, where the range will always fit.
|
33
|
+
static_assert(!std::numeric_limits<R>::is_signed, "RNG engine must yield unsigned integers");
|
34
|
+
|
35
|
+
// Make sure we get the right type to avoid inadvertent promotions.
|
36
|
+
constexpr T ONE_ = 1;
|
23
37
|
|
24
38
|
// Stolen from Boost, see https://www.boost.org/doc/libs/1_67_0/boost/random/uniform_01.hpp
|
25
39
|
// The +1 probably doesn't matter for 64-bit generators, but is helpful for engines with
|
26
40
|
// fewer output bits, to reduce the (small) probability of sampling 1's.
|
27
|
-
constexpr
|
28
|
-
|
41
|
+
constexpr T factor = ONE_ / (static_cast<T>(Engine::max() - Engine::min()) + ONE_);
|
42
|
+
|
43
|
+
// Note that it still might be possible to get a result = 1, depending on
|
44
|
+
// the numerical precision used to compute the product; hence the loop.
|
45
|
+
T result;
|
29
46
|
do {
|
30
47
|
result = static_cast<T>(eng() - Engine::min()) * factor;
|
31
|
-
} while (result ==
|
48
|
+
} while (result == ONE_);
|
49
|
+
|
32
50
|
return result;
|
33
51
|
}
|
34
52
|
|
35
53
|
/**
|
36
|
-
* @tparam T Floating point type.
|
54
|
+
* @tparam T Floating point type to return.
|
55
|
+
* This is also used for intermediate calculations, so it is usually safest to provide a type that is at least as precise as a `double`.
|
37
56
|
* @tparam Engine A random number generator class with `operator()`, `min()` (static) and `max()` (static) methods,
|
38
57
|
* where the `result_type` is an unsigned integer value.
|
39
58
|
*
|
@@ -43,16 +62,18 @@ T standard_uniform(Engine& eng) {
|
|
43
62
|
*/
|
44
63
|
template<typename T = double, class Engine>
|
45
64
|
std::pair<T, T> standard_normal(Engine& eng) {
|
46
|
-
constexpr
|
65
|
+
constexpr T PI_ = 3.14159265358979323846;
|
66
|
+
constexpr T TWO_ = 2;
|
47
67
|
|
48
68
|
// Box-Muller gives us two random values at a time.
|
49
|
-
|
50
|
-
|
69
|
+
T constant = std::sqrt(-TWO_ * std::log(standard_uniform<T>(eng)));
|
70
|
+
T angle = TWO_ * PI_ * standard_uniform<T>(eng);
|
51
71
|
return std::make_pair(constant * std::sin(angle), constant * std::cos(angle));
|
52
72
|
}
|
53
73
|
|
54
74
|
/**
|
55
|
-
* @tparam T Floating point type.
|
75
|
+
* @tparam T Floating point type to return.
|
76
|
+
* This is also used for intermediate calculations, so it is usually safest to provide a type that is at least as precise as a `double`.
|
56
77
|
* @tparam Engine A random number generator class with `operator()`, `min()` (static) and `max()` (static) methods,
|
57
78
|
* where the `result_type` is an unsigned integer value.
|
58
79
|
*
|
@@ -62,7 +83,11 @@ std::pair<T, T> standard_normal(Engine& eng) {
|
|
62
83
|
*/
|
63
84
|
template<typename T = double, class Engine>
|
64
85
|
T standard_exponential(Engine& eng) {
|
65
|
-
|
86
|
+
T val;
|
87
|
+
do {
|
88
|
+
val = standard_uniform<T>(eng);
|
89
|
+
} while (val == static_cast<T>(0));
|
90
|
+
return -std::log(val);
|
66
91
|
}
|
67
92
|
|
68
93
|
/**
|
@@ -79,7 +104,7 @@ template<typename T = int, class Engine>
|
|
79
104
|
T discrete_uniform(Engine& eng, T bound) {
|
80
105
|
typedef typename Engine::result_type R;
|
81
106
|
static_assert(std::numeric_limits<R>::is_integer);
|
82
|
-
static_assert(!std::numeric_limits<R>::is_signed);
|
107
|
+
static_assert(!std::numeric_limits<R>::is_signed); // don't want to figure out how to store the range.
|
83
108
|
|
84
109
|
constexpr R range = Engine::max() - Engine::min();
|
85
110
|
if (bound > range) {
|
@@ -91,22 +116,110 @@ T discrete_uniform(Engine& eng, T bound) {
|
|
91
116
|
throw std::runtime_error("'bound' should be a positive integer");
|
92
117
|
}
|
93
118
|
|
94
|
-
|
95
|
-
|
96
|
-
//
|
97
|
-
//
|
98
|
-
//
|
99
|
-
//
|
100
|
-
|
101
|
-
|
102
|
-
// In addition, we don't have to deal with the crap about combining draws
|
103
|
-
// to get enough entropy, which is 90% of the Boost implementation.
|
104
|
-
T draw;
|
105
|
-
do {
|
106
|
-
draw = (eng() - Engine::min()) % bound;
|
107
|
-
} while (draw > limit);
|
119
|
+
R draw = eng() - Engine::min();
|
120
|
+
|
121
|
+
// Conservative shortcut to avoid an extra modulo operation in computing
|
122
|
+
// 'limit' if 'draw' is below 'limit'. This is based on the observation
|
123
|
+
// that 'range - bound <= limit', so any condition that triggers the loop
|
124
|
+
// will also pass this check. Allows early return when 'range >> bound'.
|
125
|
+
if (draw > range - bound) {
|
108
126
|
|
109
|
-
|
127
|
+
// The limit is necessary to provide uniformity in the presence of the
|
128
|
+
// modulus. The idea is to re-sample if we get a draw above the limit.
|
129
|
+
// Technically this can have problems as bound approaches range, in which
|
130
|
+
// case we might end up discarding a lot of the sample space... but this
|
131
|
+
// is unlikely to happen in practice, and even if it does, it's a rejection
|
132
|
+
// rate that's guaranteed to be less than 50%, so whatever.
|
133
|
+
//
|
134
|
+
// Note that the +1 is necessary because range is inclusive but bound is not.
|
135
|
+
const R limit = range - ((range % bound) + 1);
|
136
|
+
|
137
|
+
// In addition, we don't have to deal with the crap about combining draws
|
138
|
+
// to get enough entropy, which is 90% of the Boost implementation.
|
139
|
+
while (draw > limit) {
|
140
|
+
draw = eng() - Engine::min();
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
return draw % bound;
|
145
|
+
}
|
146
|
+
|
147
|
+
/**
|
148
|
+
* @tparam In Random-access iterator or pointer.
|
149
|
+
* @tparam Engine A random number generator class with `operator()`, `min()` (static) and `max()` (static) methods,
|
150
|
+
* where the `result_type` is an unsigned integer value.
|
151
|
+
*
|
152
|
+
* @param values Iterator or pointer to an array of values to shuffle.
|
153
|
+
* @param n Number of values in the array pointed to by `values`.
|
154
|
+
* @param eng Instance of an RNG class like `std::mt19937_64`.
|
155
|
+
*
|
156
|
+
* @return Contents of `values` are randomly permuted in place using the Fisher-Yates algorithm.
|
157
|
+
*/
|
158
|
+
template<class In, class Engine>
|
159
|
+
void shuffle(In values, size_t n, Engine& eng) {
|
160
|
+
if (n) {
|
161
|
+
using std::swap;
|
162
|
+
for (size_t i = 0; i < n - 1; ++i) {
|
163
|
+
auto chosen = discrete_uniform(eng, n - i);
|
164
|
+
swap(*(values + i), *(values + i + chosen));
|
165
|
+
}
|
166
|
+
}
|
167
|
+
return;
|
168
|
+
}
|
169
|
+
|
170
|
+
/**
|
171
|
+
* @tparam In Random-access iterator or pointer for the inputs.
|
172
|
+
* @tparam Out Random-access iterator or pointer for the outputs.
|
173
|
+
* @tparam Engine A random number generator class with `operator()`, `min()` (static) and `max()` (static) methods,
|
174
|
+
* where the `result_type` is an unsigned integer value.
|
175
|
+
*
|
176
|
+
* @param values Iterator or pointer to an array of values to sample from.
|
177
|
+
* @param n Number of values in the array pointed to by `values`.
|
178
|
+
* @param s Number of values to sample.
|
179
|
+
* @param output Iterator or pointer to an array of length `s`, to store the sampled values.
|
180
|
+
* @param eng Instance of an RNG class like `std::mt19937_64`.
|
181
|
+
*
|
182
|
+
* @return `output` is filled with `s` sampled values from `values`.
|
183
|
+
*
|
184
|
+
* If `s > n`, `values` is copied into the first `n` elements of `output` and the remaining values of `output` are undefined.
|
185
|
+
*/
|
186
|
+
template<class In, class Out, class Engine>
|
187
|
+
void sample(In values, size_t n, size_t s, Out output, Engine& eng) {
|
188
|
+
for (size_t i = 0; i < n && s; ++i, ++values) {
|
189
|
+
const double threshold = static_cast<double>(s)/(n - i);
|
190
|
+
if (threshold >= 1 || standard_uniform(eng) <= threshold) {
|
191
|
+
*output = *values;
|
192
|
+
++output;
|
193
|
+
--s;
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
/**
|
199
|
+
* @tparam Out Random-access iterator or pointer for the outputs.
|
200
|
+
* @tparam Engine A random number generator class with `operator()`, `min()` (static) and `max()` (static) methods,
|
201
|
+
* where the `result_type` is an unsigned integer value.
|
202
|
+
*
|
203
|
+
* @param bound Upper bound of the indices to sample from.
|
204
|
+
* @param s Number of values to sample.
|
205
|
+
* @param output Iterator or pointer to an array of length `s`, to store the sampled values.
|
206
|
+
* @param eng Instance of an RNG class like `std::mt19937_64`.
|
207
|
+
*
|
208
|
+
* @return `output` is filled with `s` sampled values from the sequence of integers in `{0, 1, ..., bound - 1}`.
|
209
|
+
*
|
210
|
+
* If `s > bound`, the first `n` elements of `output` will contain the sequence of integers from `0` to `bound - 1`.
|
211
|
+
* The remaining values of `output` are undefined.
|
212
|
+
*/
|
213
|
+
template<class Out, class Engine>
|
214
|
+
void sample(size_t bound, size_t s, Out output, Engine& eng) {
|
215
|
+
for (size_t i = 0; i < bound && s; ++i) {
|
216
|
+
const double threshold = static_cast<double>(s)/(bound - i);
|
217
|
+
if (threshold >= 1 || standard_uniform(eng) <= threshold) {
|
218
|
+
*output = i;
|
219
|
+
++output;
|
220
|
+
--s;
|
221
|
+
}
|
222
|
+
}
|
110
223
|
}
|
111
224
|
|
112
225
|
}
|
data/vendor/annoy/annoylib.h
CHANGED
@@ -128,7 +128,7 @@ inline void set_error_from_errno(char **error, const char* msg) {
|
|
128
128
|
annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
129
129
|
if (error) {
|
130
130
|
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
131
|
-
|
131
|
+
snprintf(*error, 255, "%s: %s (%d)", msg, strerror(errno), errno);
|
132
132
|
}
|
133
133
|
}
|
134
134
|
|