kdtree 0.1 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.travis.yml +9 -0
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +70 -0
- data/Rakefile +41 -0
- data/ext/{extconf.rb → kdtree/extconf.rb} +1 -1
- data/ext/{kdtree.c → kdtree/kdtree.c} +157 -138
- data/kdtree.gemspec +22 -0
- data/lib/kdtree.rb +1 -0
- data/test/test_kdtree.rb +150 -0
- metadata +69 -45
- data/test/test.rb +0 -138
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
CHANGED
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
## Kdtree
|
2
|
+
|
3
|
+
A kd tree is a data structure that recursively partitions the world in order to rapidly answer nearest neighbor queries. A generic kd tree can support any number of dimensions, and can return either the nearest neighbor or a set of N nearest neighbors.
|
4
|
+
|
5
|
+
This gem is a blazingly fast, native, 2d kdtree. It's specifically built to find the nearest neighbor when searching millions of points. It's used in production at Urbanspoon and several other companies.
|
6
|
+
|
7
|
+
The first version of this gem was released back in 2009. See the original [blog post](http://gurge.com/2009/10/22/ruby-nearest-neighbor-fast-kdtree-gem/) for the full story. Wikipedia has a great [article on kdtrees](http://en.wikipedia.org/wiki/K-d_tree).
|
8
|
+
|
9
|
+
### Usage
|
10
|
+
|
11
|
+
Usage is very simple:
|
12
|
+
|
13
|
+
* **Kdtree.new(points)** - construct a new tree. Each point should be of the form `[x, y, id]`, where `x/y` are floats and `id` is an int. Not a string, not an object, just an int.
|
14
|
+
* **kd.nearest(x, y)** - find the nearest point. Returns an id.
|
15
|
+
* **kd.nearestk(x, y, k)** - find the nearest `k` points. Returns an array of ids.
|
16
|
+
|
17
|
+
Also, I made it possible to **persist** the tree to disk and load it later. That way you can calculate the tree offline and load it quickly at some future point. Loading a persisted tree w/ 1 millions points takes less than a second, as opposed to the 3.5 second startup time shown above. For example:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
File.open("treefile", "w") { |f| kd.persist(f) }
|
21
|
+
... later ...
|
22
|
+
kd2 = File.open("treefile") { |f| Kdtree.new(f) }
|
23
|
+
```
|
24
|
+
|
25
|
+
### Performance
|
26
|
+
|
27
|
+
Kdtree is fast. How fast? Using a tree with 1 million points on my i5 2.8ghz:
|
28
|
+
|
29
|
+
```
|
30
|
+
build 3.5s
|
31
|
+
nearest point 0.000003s
|
32
|
+
nearest 5 points 0.000004s
|
33
|
+
nearest 50 points 0.000014s
|
34
|
+
nearest 255 points 0.000063s
|
35
|
+
```
|
36
|
+
|
37
|
+
### Limitations
|
38
|
+
|
39
|
+
* No **editing** allowed! Once you construct a tree you’re stuck with it.
|
40
|
+
* The tree is stored in **one big memory block**, 20 bytes per point. A tree with one million points will allocate a single 19mb block to store its nodes.
|
41
|
+
* Persisted trees are **architecture dependent**, and may not work across different machines due to endian issues.
|
42
|
+
* nearestk is limited to **255 results**
|
43
|
+
|
44
|
+
### Contributors
|
45
|
+
|
46
|
+
Since this gem was originally released, several folks have contributed important patches:
|
47
|
+
|
48
|
+
* @antifuchs (thread safety)
|
49
|
+
* @evanphx (native cleanups, perf)
|
50
|
+
* @ghazel (C89 compliance)
|
51
|
+
* @mcerna (1.9 compat)
|
52
|
+
|
53
|
+
### Changelog
|
54
|
+
|
55
|
+
#### 0.3 (in progress, unreleased)
|
56
|
+
|
57
|
+
* Ruby 1.9.x compatibility (@mcerna and others)
|
58
|
+
* renamed KDTree to the more idiomatic Kdtree
|
59
|
+
* use IO methods directly instead of rooting around in rb_io
|
60
|
+
* thread safe, no more statics (@antifuchs)
|
61
|
+
* C90 compliance, no warnings (@ghazel)
|
62
|
+
* native cleanups (@evanphx)
|
63
|
+
|
64
|
+
#### 0.2
|
65
|
+
|
66
|
+
skipped this version to prevent confusion with other flavors of the gem
|
67
|
+
|
68
|
+
#### 0.1
|
69
|
+
|
70
|
+
* Original release
|
data/Rakefile
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "rake/extensiontask"
|
3
|
+
require "rake/testtask"
|
4
|
+
|
5
|
+
# load the spec, we use it below
|
6
|
+
spec = Gem::Specification.load("kdtree.gemspec")
|
7
|
+
|
8
|
+
#
|
9
|
+
# gem
|
10
|
+
#
|
11
|
+
|
12
|
+
task :build do
|
13
|
+
system "gem build --quiet kdtree.gemspec"
|
14
|
+
end
|
15
|
+
|
16
|
+
task :install => :build do
|
17
|
+
system "sudo gem install --quiet kdtree-#{spec.version}.gem"
|
18
|
+
end
|
19
|
+
|
20
|
+
task :release => :build do
|
21
|
+
system "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
22
|
+
system "git push --tags"
|
23
|
+
system "gem push kdtree-#{spec.version}.gem"
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# rake-compiler
|
28
|
+
#
|
29
|
+
|
30
|
+
Rake::ExtensionTask.new("kdtree", spec)
|
31
|
+
|
32
|
+
|
33
|
+
#
|
34
|
+
# testing
|
35
|
+
#
|
36
|
+
|
37
|
+
Rake::TestTask.new(:test) do |test|
|
38
|
+
test.libs << "test"
|
39
|
+
end
|
40
|
+
task :test => :compile
|
41
|
+
task :default => :test
|
@@ -1,15 +1,10 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
-
#include "rubyio.h"
|
3
|
-
#include "version.h"
|
4
|
-
|
5
|
-
#ifndef HAVE_RB_IO_T
|
6
|
-
#define rb_io_t OpenFile
|
7
|
-
#endif
|
8
2
|
|
9
3
|
//
|
10
4
|
// interface
|
11
5
|
//
|
12
6
|
|
7
|
+
// the tree itself
|
13
8
|
typedef struct kdtree_data
|
14
9
|
{
|
15
10
|
int root;
|
@@ -17,6 +12,7 @@ typedef struct kdtree_data
|
|
17
12
|
struct kdtree_node *nodes;
|
18
13
|
} kdtree_data;
|
19
14
|
|
15
|
+
// a node in the tree
|
20
16
|
typedef struct kdtree_node
|
21
17
|
{
|
22
18
|
float x, y;
|
@@ -25,10 +21,18 @@ typedef struct kdtree_node
|
|
25
21
|
int right;
|
26
22
|
} kdtree_node;
|
27
23
|
|
24
|
+
// a result node from kdtree_nearestk0
|
25
|
+
typedef struct kresult {
|
26
|
+
int index;
|
27
|
+
float distance;
|
28
|
+
} kresult;
|
29
|
+
|
30
|
+
// helper macro for digging out our struct
|
28
31
|
#define KDTREEP \
|
29
32
|
struct kdtree_data *kdtreep; \
|
30
33
|
Data_Get_Struct(kdtree, struct kdtree_data, kdtreep);
|
31
34
|
|
35
|
+
// kdtree public methods
|
32
36
|
static VALUE kdtree_alloc(VALUE klass);
|
33
37
|
static void kdtree_free(struct kdtree_data *kdtreep);
|
34
38
|
static VALUE kdtree_initialize(VALUE kdtree, VALUE points);
|
@@ -37,13 +41,20 @@ static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k);
|
|
37
41
|
static VALUE kdtree_persist(VALUE kdtree, VALUE io);
|
38
42
|
static VALUE kdtree_to_s(VALUE kdtree);
|
39
43
|
|
40
|
-
// helpers
|
44
|
+
// kdtree helpers
|
41
45
|
static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth);
|
42
|
-
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth);
|
43
|
-
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth);
|
46
|
+
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth, int *n_index, float *n_dist);
|
47
|
+
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth, kresult *k_list, int *k_len, float *k_dist);
|
48
|
+
|
49
|
+
// io helpers
|
50
|
+
static void read_all(VALUE io, void *buf, int len);
|
51
|
+
static void write_all(VALUE io, const void *buf, int len);
|
44
52
|
|
45
53
|
#define KDTREE_MAGIC "KdTr"
|
46
54
|
|
55
|
+
// ids
|
56
|
+
static ID id_read, id_write, id_binmode;
|
57
|
+
|
47
58
|
//
|
48
59
|
// implementation
|
49
60
|
//
|
@@ -63,24 +74,12 @@ static void kdtree_free(struct kdtree_data *kdtreep)
|
|
63
74
|
}
|
64
75
|
}
|
65
76
|
|
66
|
-
static void read_all(struct rb_io_t *fptr, char *buf, int len)
|
67
|
-
{
|
68
|
-
while (len > 0) {
|
69
|
-
int n = rb_io_fread(buf, len, fptr->f);
|
70
|
-
if (n == 0) {
|
71
|
-
rb_eof_error();
|
72
|
-
}
|
73
|
-
buf += n;
|
74
|
-
len -= n;
|
75
|
-
}
|
76
|
-
}
|
77
|
-
|
78
77
|
/*
|
79
78
|
* call-seq:
|
80
|
-
*
|
81
|
-
*
|
79
|
+
* Kdtree.new(points) => kdtree
|
80
|
+
* Kdtree.new(io) => kdtree
|
82
81
|
*
|
83
|
-
* Returns a new <code>
|
82
|
+
* Returns a new <code>Kdtree</code>. To construct a tree, pass an array of
|
84
83
|
* <i>points</i>. Each point should be an array of the form <code>[x, y,
|
85
84
|
* id]</code>, where <i>x</i> and <i>y</i> are floats and <i>id</i> is an
|
86
85
|
* integer. The <i>id</i> is arbitrary and will be returned to you whenever you
|
@@ -90,7 +89,7 @@ static void read_all(struct rb_io_t *fptr, char *buf, int len)
|
|
90
89
|
* points = []
|
91
90
|
* points << [47.6, -122.3, 1] # Seattle
|
92
91
|
* points << [40.7, -74.0, 2] # New York
|
93
|
-
* kd =
|
92
|
+
* kd = Kdtree.new(points)
|
94
93
|
*
|
95
94
|
* Alternately, you can pass in an <i>IO</i> object containing a persisted
|
96
95
|
* kdtree. This makes it possible to build the tree in advance, persist it, and
|
@@ -103,104 +102,97 @@ static VALUE kdtree_initialize(VALUE kdtree, VALUE arg)
|
|
103
102
|
if (TYPE(arg) == T_ARRAY) {
|
104
103
|
// init from array of pints
|
105
104
|
VALUE points = arg;
|
105
|
+
int i;
|
106
106
|
kdtreep->len = RARRAY_LEN(points);
|
107
107
|
kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
|
108
108
|
|
109
|
-
int i;
|
110
109
|
for (i = 0; i < RARRAY_LEN(points); ++i) {
|
111
110
|
struct kdtree_node *n = kdtreep->nodes + i;
|
112
|
-
|
113
|
-
VALUE ptr =
|
111
|
+
|
112
|
+
VALUE ptr = rb_ary_entry(points, i);
|
114
113
|
VALUE v = rb_check_array_type(ptr);
|
115
114
|
if (NIL_P(v) || RARRAY_LEN(v) != 3) {
|
116
115
|
continue;
|
117
116
|
}
|
118
|
-
|
119
|
-
n->
|
120
|
-
n->
|
121
|
-
n->id = NUM2INT(a[2]);
|
117
|
+
n->x = NUM2DBL(rb_ary_entry(v, 0));
|
118
|
+
n->y = NUM2DBL(rb_ary_entry(v, 1));
|
119
|
+
n->id = NUM2INT(rb_ary_entry(v, 2));
|
122
120
|
}
|
123
121
|
|
124
122
|
// now build the tree
|
125
123
|
kdtreep->root = kdtree_build(kdtreep, 0, kdtreep->len, 0);
|
126
124
|
} else if (rb_respond_to(arg, rb_intern("read"))) {
|
127
125
|
VALUE io = arg;
|
128
|
-
|
129
|
-
|
126
|
+
char buf[4];
|
127
|
+
if (rb_respond_to(io, id_binmode)) {
|
128
|
+
rb_funcall(io, id_binmode, 0);
|
130
129
|
}
|
131
130
|
|
132
|
-
struct rb_io_t *fptr = RFILE(rb_io_taint_check(io))->fptr;
|
133
|
-
rb_io_check_readable(fptr);
|
134
|
-
|
135
131
|
// check magic
|
136
|
-
|
137
|
-
read_all(fptr, buf, 4);
|
132
|
+
read_all(io, buf, 4);
|
138
133
|
if (memcmp(KDTREE_MAGIC, buf, 4) != 0) {
|
139
134
|
rb_raise(rb_eRuntimeError, "wrong magic number in kdtree file");
|
140
135
|
}
|
141
|
-
|
136
|
+
|
142
137
|
// read start of the struct
|
143
|
-
read_all(
|
138
|
+
read_all(io, kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
|
139
|
+
|
144
140
|
// read the nodes
|
145
141
|
kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
|
146
|
-
read_all(
|
142
|
+
read_all(io, kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
147
143
|
} else {
|
148
|
-
rb_raise(rb_eTypeError, "array or IO required to init
|
144
|
+
rb_raise(rb_eTypeError, "array or IO required to init Kdtree");
|
149
145
|
}
|
150
|
-
|
146
|
+
|
151
147
|
return kdtree;
|
152
148
|
}
|
153
149
|
|
154
150
|
static int comparex(const void *pa, const void *pb)
|
155
151
|
{
|
156
152
|
float a = ((const struct kdtree_node*)pa)->x;
|
157
|
-
float b = ((const struct kdtree_node*)pb)->x;
|
153
|
+
float b = ((const struct kdtree_node*)pb)->x;
|
158
154
|
return (a < b) ? -1 : ((a > b) ? 1 : 0);
|
159
155
|
}
|
160
156
|
|
161
157
|
static int comparey(const void *pa, const void *pb)
|
162
158
|
{
|
163
159
|
float a = ((const struct kdtree_node*)pa)->y;
|
164
|
-
float b = ((const struct kdtree_node*)pb)->y;
|
160
|
+
float b = ((const struct kdtree_node*)pb)->y;
|
165
161
|
return (a < b) ? -1 : ((a > b) ? 1 : 0);
|
166
162
|
}
|
167
163
|
|
168
164
|
static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth)
|
169
165
|
{
|
166
|
+
int(*compar)(const void *, const void *);
|
167
|
+
struct kdtree_node *m;
|
168
|
+
int median;
|
170
169
|
if (max <= min) {
|
171
170
|
return -1;
|
172
171
|
}
|
173
172
|
|
174
173
|
// sort nodes from min to max
|
175
|
-
|
174
|
+
compar = (depth % 2) ? comparex : comparey;
|
176
175
|
qsort(kdtreep->nodes + min, max - min, sizeof(struct kdtree_node), compar);
|
177
176
|
|
178
|
-
|
179
|
-
|
177
|
+
median = (min + max) / 2;
|
178
|
+
m = kdtreep->nodes + median;
|
180
179
|
m->left = kdtree_build(kdtreep, min, median, depth + 1);
|
181
180
|
m->right = kdtree_build(kdtreep, median + 1, max, depth + 1);
|
182
181
|
return median;
|
183
182
|
}
|
184
183
|
|
185
|
-
//
|
186
|
-
// nearest
|
187
|
-
//
|
188
|
-
|
189
|
-
static int n_index;
|
190
|
-
static float n_dist;
|
191
|
-
|
192
184
|
/*
|
193
185
|
* call-seq:
|
194
186
|
* kd.nearest(x, y) => id
|
195
187
|
*
|
196
188
|
* Finds the point closest to <i>x</i>, <i>y</i> and returns the id for that
|
197
189
|
* point. Returns -1 if the tree is empty.
|
198
|
-
*
|
190
|
+
*
|
199
191
|
* points = []
|
200
192
|
* points << [47.6, -122.3, 1] # Seattle
|
201
193
|
* points << [40.7, -74.0, 2] # New York
|
202
|
-
* kd =
|
203
|
-
*
|
194
|
+
* kd = Kdtree.new(points)
|
195
|
+
*
|
204
196
|
* # which city is closest to Portland?
|
205
197
|
* kd.nearest(45.5, -122.8) #=> 1
|
206
198
|
* # which city is closest to Boston?
|
@@ -208,52 +200,59 @@ static float n_dist;
|
|
208
200
|
*/
|
209
201
|
static VALUE kdtree_nearest(VALUE kdtree, VALUE x, VALUE y)
|
210
202
|
{
|
203
|
+
int n_index;
|
204
|
+
float n_dist;
|
211
205
|
KDTREEP;
|
212
206
|
|
213
207
|
n_index = -1;
|
214
208
|
n_dist = INT_MAX;
|
215
|
-
|
209
|
+
|
210
|
+
kdtree_nearest0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), 0, &n_index, &n_dist);
|
216
211
|
if (n_index == -1) {
|
217
212
|
return -1;
|
218
213
|
}
|
219
214
|
return INT2NUM((kdtreep->nodes + n_index)->id);
|
220
215
|
}
|
221
216
|
|
222
|
-
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth)
|
217
|
+
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth, int *n_index, float *n_dist)
|
223
218
|
{
|
219
|
+
struct kdtree_node *n;
|
220
|
+
float ad;
|
221
|
+
int near, far;
|
222
|
+
float dx;
|
223
|
+
|
224
224
|
if (i == -1) {
|
225
225
|
return;
|
226
226
|
}
|
227
|
-
|
228
|
-
struct kdtree_node *n = kdtreep->nodes + i;
|
229
227
|
|
230
|
-
|
228
|
+
n = kdtreep->nodes + i;
|
229
|
+
|
230
|
+
ad = (depth % 2) ? (x - n->x) : (y - n->y);
|
231
231
|
|
232
232
|
//
|
233
233
|
// recurse near, and perhaps far as well
|
234
234
|
//
|
235
|
-
|
236
|
-
int near, far;
|
235
|
+
|
237
236
|
if (ad <= 0) {
|
238
237
|
near = n->left; far = n->right;
|
239
238
|
} else {
|
240
239
|
near = n->right; far = n->left;
|
241
240
|
}
|
242
|
-
kdtree_nearest0(kdtreep, near, x, y, depth + 1);
|
243
|
-
if (ad * ad < n_dist) {
|
244
|
-
kdtree_nearest0(kdtreep, far, x, y, depth + 1);
|
241
|
+
kdtree_nearest0(kdtreep, near, x, y, depth + 1, n_index, n_dist);
|
242
|
+
if (ad * ad < *n_dist) {
|
243
|
+
kdtree_nearest0(kdtreep, far, x, y, depth + 1, n_index, n_dist);
|
245
244
|
}
|
246
245
|
|
247
246
|
//
|
248
247
|
// do we beat the old distance?
|
249
248
|
//
|
250
|
-
|
251
|
-
|
252
|
-
if (dx < n_dist) {
|
249
|
+
|
250
|
+
dx = (x - n->x) * (x - n->x);
|
251
|
+
if (dx < *n_dist) {
|
253
252
|
float d = dx + ((y - n->y) * (y - n->y));
|
254
|
-
if (d < n_dist) {
|
255
|
-
n_index = i;
|
256
|
-
n_dist = d;
|
253
|
+
if (d < *n_dist) {
|
254
|
+
*n_index = i;
|
255
|
+
*n_dist = d;
|
257
256
|
}
|
258
257
|
}
|
259
258
|
}
|
@@ -264,15 +263,6 @@ static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y
|
|
264
263
|
|
265
264
|
#define MAX_K 255
|
266
265
|
|
267
|
-
typedef struct kresult {
|
268
|
-
int index;
|
269
|
-
float distance;
|
270
|
-
} kresult;
|
271
|
-
// note I leave an extra slot here at the end because of the way our binary insert works
|
272
|
-
static struct kresult k_list[MAX_K + 1];
|
273
|
-
static int k_len;
|
274
|
-
static float k_dist;
|
275
|
-
|
276
266
|
/*
|
277
267
|
* call-seq:
|
278
268
|
* kd.nearestk(x, y, k) => array
|
@@ -280,77 +270,84 @@ static float k_dist;
|
|
280
270
|
* Finds the <i>k</i> points closest to <i>x</i>, <i>y</i>. Returns an array of
|
281
271
|
* ids, sorted by distance. Returns an empty array if the tree is empty. Note
|
282
272
|
* that <i>k</i> is capped at 255.
|
283
|
-
*
|
273
|
+
*
|
284
274
|
* points = []
|
285
275
|
* points << [47.6, -122.3, 1] # Seattle
|
286
276
|
* points << [45.5, -122.8, 2] # Portland
|
287
277
|
* points << [40.7, -74.0, 3] # New York
|
288
|
-
* kd =
|
289
|
-
*
|
278
|
+
* kd = Kdtree.new(points)
|
279
|
+
*
|
290
280
|
* # which two cities are closest to San Francisco?
|
291
281
|
* kd.nearest(34.1, -118.2) #=> [2, 1]
|
292
282
|
*/
|
293
283
|
static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k)
|
294
284
|
{
|
285
|
+
// note I leave an extra slot here at the end because of the way our binary insert works
|
286
|
+
kresult k_list[MAX_K + 1];
|
287
|
+
int k_len = 0;
|
288
|
+
float k_dist = INT_MAX;
|
289
|
+
int ki = NUM2INT(k);
|
290
|
+
VALUE ary;
|
291
|
+
int i;
|
295
292
|
KDTREEP;
|
296
293
|
|
297
|
-
k_len = 0;
|
298
|
-
k_dist = INT_MAX;
|
299
|
-
|
300
|
-
int ki = NUM2INT(k);
|
301
294
|
if (ki < 1) {
|
302
295
|
ki = 1;
|
303
296
|
} else if (ki > MAX_K) {
|
304
297
|
ki = MAX_K;
|
305
298
|
}
|
306
|
-
kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0);
|
299
|
+
kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0, k_list, &k_len, &k_dist);
|
307
300
|
|
308
301
|
// convert result to ruby array
|
309
|
-
|
310
|
-
int i;
|
302
|
+
ary = rb_ary_new();
|
311
303
|
for (i = 0; i < k_len; ++i) {
|
312
304
|
rb_ary_push(ary, INT2NUM(kdtreep->nodes[k_list[i].index].id));
|
313
305
|
}
|
314
306
|
return ary;
|
315
307
|
}
|
316
308
|
|
317
|
-
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth)
|
309
|
+
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth, kresult *k_list, int *k_len, float *k_dist)
|
318
310
|
{
|
311
|
+
struct kdtree_node *n;
|
312
|
+
float ad;
|
313
|
+
int near, far;
|
314
|
+
float dx;
|
315
|
+
int lo, hi;
|
316
|
+
|
319
317
|
if (i == -1) {
|
320
318
|
return;
|
321
319
|
}
|
322
|
-
|
323
|
-
struct kdtree_node *n = kdtreep->nodes + i;
|
324
320
|
|
325
|
-
|
321
|
+
n = kdtreep->nodes + i;
|
322
|
+
|
323
|
+
ad = (depth % 2) ? (x - n->x) : (y - n->y);
|
326
324
|
|
327
325
|
//
|
328
326
|
// recurse near, and then perhaps far as well
|
329
327
|
//
|
330
|
-
|
331
|
-
int near, far;
|
328
|
+
|
332
329
|
if (ad <= 0) {
|
333
330
|
near = n->left; far = n->right;
|
334
331
|
} else {
|
335
332
|
near = n->right; far = n->left;
|
336
333
|
}
|
337
|
-
kdtree_nearestk0(kdtreep, near, x, y, k, depth + 1);
|
338
|
-
if (ad * ad < k_dist) {
|
339
|
-
kdtree_nearestk0(kdtreep, far, x, y, k, depth + 1);
|
334
|
+
kdtree_nearestk0(kdtreep, near, x, y, k, depth + 1, k_list, k_len, k_dist);
|
335
|
+
if (ad * ad < *k_dist) {
|
336
|
+
kdtree_nearestk0(kdtreep, far, x, y, k, depth + 1, k_list, k_len, k_dist);
|
340
337
|
}
|
341
338
|
|
342
339
|
//
|
343
340
|
// do we beat the old distance?
|
344
341
|
//
|
345
|
-
|
346
|
-
|
347
|
-
if (dx < k_dist) {
|
342
|
+
|
343
|
+
dx = (x - n->x) * (x - n->x);
|
344
|
+
if (dx < *k_dist) {
|
348
345
|
float d = dx + ((y - n->y) * (y - n->y));
|
349
|
-
if (d < k_dist) {
|
346
|
+
if (d < *k_dist) {
|
350
347
|
//
|
351
348
|
// find spot to insert
|
352
349
|
//
|
353
|
-
|
350
|
+
lo = 0, hi = *k_len;
|
354
351
|
while (lo < hi) {
|
355
352
|
int mid = (lo + hi) / 2;
|
356
353
|
if (k_list[mid].distance < d) {
|
@@ -363,21 +360,21 @@ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float
|
|
363
360
|
//
|
364
361
|
// insert
|
365
362
|
//
|
366
|
-
|
367
|
-
memmove(k_list + lo + 1, k_list + lo, (k_len - lo) * sizeof(struct kresult));
|
363
|
+
|
364
|
+
memmove(k_list + lo + 1, k_list + lo, (*k_len - lo) * sizeof(struct kresult));
|
368
365
|
k_list[lo].index = i;
|
369
366
|
k_list[lo].distance = d;
|
370
367
|
|
371
368
|
//
|
372
369
|
// adjust len/dist if necessary
|
373
370
|
//
|
374
|
-
|
375
|
-
if (k_len < k) {
|
376
|
-
++k_len;
|
371
|
+
|
372
|
+
if (*k_len < k) {
|
373
|
+
++(*k_len);
|
377
374
|
} else {
|
378
|
-
k_dist = k_list[k - 1].distance;
|
375
|
+
*k_dist = k_list[k - 1].distance;
|
379
376
|
}
|
380
|
-
|
377
|
+
}
|
381
378
|
}
|
382
379
|
}
|
383
380
|
|
@@ -386,43 +383,42 @@ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float
|
|
386
383
|
* kd.persist(io)
|
387
384
|
*
|
388
385
|
* Writes the tree out to <i>io</i> so you can quickly load it later with
|
389
|
-
*
|
386
|
+
* Kdtree.new. This avoids the startup cost of initializing a tree. Apart from a
|
390
387
|
* small header, the size of the file is proportional to the number of points,
|
391
388
|
* requiring 20 bytes per point.
|
392
389
|
*
|
393
390
|
* This file is <b>NOT PORTABLE</b> across different architectures due to endian
|
394
391
|
* issues.
|
395
|
-
*
|
392
|
+
*
|
396
393
|
* points = []
|
397
394
|
* points << [47.6, -122.3, 1] # Seattle
|
398
395
|
* points << [45.5, -122.8, 2] # Portland
|
399
396
|
* points << [40.7, -74.0, 3] # New York
|
400
|
-
* kd =
|
397
|
+
* kd = Kdtree.new(points)
|
401
398
|
*
|
402
399
|
* # persist the tree to disk
|
403
400
|
* File.open("treefile", "w") { |f| kd.persist(f) }
|
404
401
|
*
|
405
402
|
* ...
|
406
|
-
*
|
403
|
+
*
|
407
404
|
* # later, read the tree from disk
|
408
|
-
* kd2 = File.open("treefile") { |f|
|
405
|
+
* kd2 = File.open("treefile") { |f| Kdtree.new(f) }
|
409
406
|
*/
|
410
407
|
static VALUE kdtree_persist(VALUE kdtree, VALUE io)
|
411
408
|
{
|
409
|
+
VALUE str;
|
412
410
|
KDTREEP;
|
413
|
-
|
411
|
+
|
414
412
|
if (!rb_respond_to(io, rb_intern("write"))) {
|
415
413
|
rb_raise(rb_eTypeError, "instance of IO needed");
|
416
414
|
}
|
417
|
-
if (rb_respond_to(io,
|
418
|
-
|
415
|
+
if (rb_respond_to(io, id_binmode)) {
|
416
|
+
rb_funcall(io, id_binmode, 0);
|
419
417
|
}
|
420
418
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
rb_str_buf_cat(str, (char*)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
425
|
-
rb_io_write(io, str);
|
419
|
+
write_all(io, KDTREE_MAGIC, 4);
|
420
|
+
write_all(io, kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
|
421
|
+
write_all(io, kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
426
422
|
return io;
|
427
423
|
}
|
428
424
|
|
@@ -434,22 +430,40 @@ static VALUE kdtree_persist(VALUE kdtree, VALUE io)
|
|
434
430
|
*/
|
435
431
|
static VALUE kdtree_to_s(VALUE kdtree)
|
436
432
|
{
|
433
|
+
char buf[256];
|
437
434
|
KDTREEP;
|
438
435
|
|
439
|
-
char buf[256];
|
440
436
|
sprintf(buf, "#<%s:%p nodes=%d>", rb_obj_classname(kdtree), (void*)kdtree, kdtreep->len);
|
441
437
|
return rb_str_new(buf, strlen(buf));
|
442
438
|
}
|
443
439
|
|
440
|
+
//
|
441
|
+
// io helpers
|
442
|
+
//
|
443
|
+
|
444
|
+
static void read_all(VALUE io, void *buf, int len)
|
445
|
+
{
|
446
|
+
VALUE string = rb_funcall(io, id_read, 1, INT2NUM(len));
|
447
|
+
if (NIL_P(string) || RSTRING_LEN(string) != len) {
|
448
|
+
rb_raise(rb_eEOFError, "end of file reached");
|
449
|
+
}
|
450
|
+
memcpy(buf, RSTRING_PTR(string), len);
|
451
|
+
}
|
452
|
+
|
453
|
+
static void write_all(VALUE io, const void *buf, int len)
|
454
|
+
{
|
455
|
+
rb_funcall(io, id_write, 1, rb_str_new(buf, len));
|
456
|
+
}
|
457
|
+
|
444
458
|
//
|
445
459
|
// entry point
|
446
460
|
//
|
447
461
|
|
448
462
|
/*
|
449
|
-
*
|
463
|
+
* Kdtree is an insanely fast data structure for finding the nearest
|
450
464
|
* neighbor(s) to a given point. This implementation only supports 2d
|
451
465
|
* points. Also, it only supports static points - there is no way to edit the
|
452
|
-
* tree after it has been initialized.
|
466
|
+
* tree after it has been initialized. Kdtree should scale to millions of
|
453
467
|
* points, though it's only been tested with around 1 million.
|
454
468
|
*
|
455
469
|
* Once the tree is constructed, it can be searched with nearest and nearestk.
|
@@ -462,8 +476,8 @@ static VALUE kdtree_to_s(VALUE kdtree)
|
|
462
476
|
* points << [47.6, -122.3, 1] # Seattle
|
463
477
|
* points << [45.5, -122.8, 2] # Portland
|
464
478
|
* points << [40.7, -74.0, 3] # New York
|
465
|
-
* kd =
|
466
|
-
*
|
479
|
+
* kd = Kdtree.new(points)
|
480
|
+
*
|
467
481
|
* # which city is closest to San Francisco?
|
468
482
|
* kd.nearest(34.1, -118.2) #=> 2
|
469
483
|
* # which two cities are closest to San Francisco?
|
@@ -477,12 +491,17 @@ void Init_kdtree()
|
|
477
491
|
{
|
478
492
|
static VALUE clazz;
|
479
493
|
|
480
|
-
clazz = rb_define_class("
|
481
|
-
|
482
|
-
rb_define_alloc_func(clazz, kdtree_alloc);
|
494
|
+
clazz = rb_define_class("Kdtree", rb_cObject);
|
495
|
+
|
496
|
+
rb_define_alloc_func(clazz, kdtree_alloc);
|
483
497
|
rb_define_method(clazz, "initialize", kdtree_initialize, 1);
|
484
498
|
rb_define_method(clazz, "nearest", kdtree_nearest, 2);
|
485
499
|
rb_define_method(clazz, "nearestk", kdtree_nearestk, 3);
|
486
|
-
rb_define_method(clazz, "persist", kdtree_persist, 1);
|
487
|
-
rb_define_method(clazz, "to_s", kdtree_to_s, 0);
|
500
|
+
rb_define_method(clazz, "persist", kdtree_persist, 1);
|
501
|
+
rb_define_method(clazz, "to_s", kdtree_to_s, 0);
|
502
|
+
|
503
|
+
// function ids
|
504
|
+
id_binmode = rb_intern("binmode");
|
505
|
+
id_read = rb_intern("read");
|
506
|
+
id_write = rb_intern("write");
|
488
507
|
}
|
data/kdtree.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "kdtree"
|
3
|
+
s.version = "0.3"
|
4
|
+
|
5
|
+
s.authors = ["Adam Doppelt"]
|
6
|
+
s.email = ["amd@gurge.com"]
|
7
|
+
s.homepage = "http://github.com/gurgeous/kdtree"
|
8
|
+
s.summary = "Blazingly fast, native 2d kdtree."
|
9
|
+
s.description = <<EOF
|
10
|
+
A kdtree is a data structure that makes it possible to quickly solve
|
11
|
+
the nearest neighbor problem. This is a native 2d kdtree suitable for
|
12
|
+
production use with millions of points.
|
13
|
+
EOF
|
14
|
+
|
15
|
+
s.rubyforge_project = "kdtree"
|
16
|
+
s.add_development_dependency "rake-compiler"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.extensions = ["ext/kdtree/extconf.rb"]
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
end
|
data/lib/kdtree.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "kdtree.so"
|
data/test/test_kdtree.rb
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require "benchmark"
|
2
|
+
require "kdtree"
|
3
|
+
require "tempfile"
|
4
|
+
require "test/unit"
|
5
|
+
|
6
|
+
#
|
7
|
+
# create a tree
|
8
|
+
#
|
9
|
+
|
10
|
+
class KdtreeTest < Test::Unit::TestCase
|
11
|
+
TMP = "#{Dir.tmpdir}/kdtree_test"
|
12
|
+
|
13
|
+
def setup
|
14
|
+
@points = (0...1000).map { |i| [rand_coord, rand_coord, i] }
|
15
|
+
@kdtree = Kdtree.new(@points)
|
16
|
+
end
|
17
|
+
|
18
|
+
def teardown
|
19
|
+
File.unlink(TMP) if File.exists?(TMP)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_nearest
|
23
|
+
100.times do
|
24
|
+
pt = [rand_coord, rand_coord]
|
25
|
+
|
26
|
+
# kdtree search
|
27
|
+
id = @kdtree.nearest(pt[0], pt[1])
|
28
|
+
kdpt = @points[id]
|
29
|
+
|
30
|
+
# slow search
|
31
|
+
sortpt = @points.sort_by { |i| distance(i, pt) }.first
|
32
|
+
|
33
|
+
# assert
|
34
|
+
kdd = distance(kdpt, pt)
|
35
|
+
sortd = distance(sortpt, pt)
|
36
|
+
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_nearestk
|
41
|
+
100.times do
|
42
|
+
pt = [rand_coord, rand_coord]
|
43
|
+
|
44
|
+
# kdtree search
|
45
|
+
list = @kdtree.nearestk(pt[0], pt[1], 5)
|
46
|
+
kdpt = @points[list.last]
|
47
|
+
|
48
|
+
# slow search
|
49
|
+
sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
|
50
|
+
|
51
|
+
# assert
|
52
|
+
kdd = distance(kdpt, pt)
|
53
|
+
sortd = distance(sortpt, pt)
|
54
|
+
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_persist
|
59
|
+
# write
|
60
|
+
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
61
|
+
# read
|
62
|
+
kdtree2 = File.open(TMP, "r") { |f| Kdtree.new(f) }
|
63
|
+
|
64
|
+
# now test some random points
|
65
|
+
100.times do
|
66
|
+
pt = [rand_coord, rand_coord]
|
67
|
+
id1 = @kdtree.nearest(*pt)
|
68
|
+
id2 = kdtree2.nearest(*pt)
|
69
|
+
assert(id1 == id2, "kdtree2 differed from kdtree")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_bad_magic
|
74
|
+
File.open(TMP, "w") { |f| f.puts "That ain't right" }
|
75
|
+
assert_raise RuntimeError do
|
76
|
+
File.open(TMP, "r") { |f| Kdtree.new(f) }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_eof
|
81
|
+
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
82
|
+
bytes = File.read(TMP)
|
83
|
+
|
84
|
+
[2, 10, 100].each do |len|
|
85
|
+
File.open(TMP, "w") { |f| f.write(bytes[0, len]) }
|
86
|
+
assert_raise EOFError do
|
87
|
+
File.open(TMP, "r") { |f| Kdtree.new(f) }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def dont_test_speed
|
93
|
+
sizes = [1, 100, 1000, 10000, 100000, 1000000]
|
94
|
+
ks = [1, 5, 50, 255]
|
95
|
+
sizes.each do |s|
|
96
|
+
points = (0...s).map { |i| [rand_coord, rand_coord, i] }
|
97
|
+
|
98
|
+
# build
|
99
|
+
Benchmark.bm(17) do |bm|
|
100
|
+
kdtree = nil
|
101
|
+
bm.report "build" do
|
102
|
+
kdtree = Kdtree.new(points)
|
103
|
+
end
|
104
|
+
bm.report "persist" do
|
105
|
+
File.open(TMP, "w") { |f| kdtree.persist(f) }
|
106
|
+
end
|
107
|
+
bm.report "read" do
|
108
|
+
File.open(TMP, "r") { |f| Kdtree.new(f) }
|
109
|
+
end
|
110
|
+
|
111
|
+
ks.each do |k|
|
112
|
+
bm.report "100 queries (#{k})" do
|
113
|
+
total = count = 0
|
114
|
+
100.times do
|
115
|
+
tm = Time.now
|
116
|
+
if k == 1
|
117
|
+
kdtree.nearest(rand_coord, rand_coord)
|
118
|
+
else
|
119
|
+
kdtree.nearestk(rand_coord, rand_coord, k)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
puts
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
protected
|
130
|
+
|
131
|
+
def distance(a, b)
|
132
|
+
x, y = a[0] - b[0], a[1] - b[1]
|
133
|
+
x * x + y * y
|
134
|
+
end
|
135
|
+
|
136
|
+
def rand_coord
|
137
|
+
rand(0) * 10 - 5
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# running dont_test_speed on my i5 2.8ghz:
|
142
|
+
#
|
143
|
+
# user system total real
|
144
|
+
# build 3.350000 0.020000 3.370000 ( 3.520528)
|
145
|
+
# persist 0.150000 0.020000 0.170000 ( 0.301963)
|
146
|
+
# read 0.280000 0.000000 0.280000 ( 0.432676)
|
147
|
+
# 100 queries (1) 0.000000 0.000000 0.000000 ( 0.000319)
|
148
|
+
# 100 queries (5) 0.000000 0.000000 0.000000 ( 0.000412)
|
149
|
+
# 100 queries (50) 0.000000 0.000000 0.000000 ( 0.001417)
|
150
|
+
# 100 queries (255) 0.000000 0.000000 0.000000 ( 0.006268)
|
metadata
CHANGED
@@ -1,61 +1,85 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: kdtree
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.3'
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
+
authors:
|
7
8
|
- Adam Doppelt
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
12
|
+
date: 2012-10-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: ! 'A kdtree is a data structure that makes it possible to quickly solve
|
11
31
|
|
12
|
-
|
13
|
-
default_executable:
|
14
|
-
dependencies: []
|
32
|
+
the nearest neighbor problem. This is a native 2d kdtree suitable for
|
15
33
|
|
16
|
-
|
17
|
-
email: amd@gurge.com
|
18
|
-
executables: []
|
34
|
+
production use with millions of points.
|
19
35
|
|
20
|
-
|
21
|
-
|
36
|
+
'
|
37
|
+
email:
|
38
|
+
- amd@gurge.com
|
39
|
+
executables: []
|
40
|
+
extensions:
|
41
|
+
- ext/kdtree/extconf.rb
|
22
42
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
|
25
|
-
-
|
26
|
-
-
|
43
|
+
files:
|
44
|
+
- .gitignore
|
45
|
+
- .travis.yml
|
46
|
+
- Gemfile
|
27
47
|
- LICENSE
|
28
|
-
-
|
29
|
-
|
30
|
-
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- ext/kdtree/extconf.rb
|
51
|
+
- ext/kdtree/kdtree.c
|
52
|
+
- kdtree.gemspec
|
53
|
+
- lib/kdtree.rb
|
54
|
+
- test/test_kdtree.rb
|
55
|
+
homepage: http://github.com/gurgeous/kdtree
|
31
56
|
licenses: []
|
32
|
-
|
33
57
|
post_install_message:
|
34
|
-
rdoc_options:
|
35
|
-
|
36
|
-
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
requirements:
|
49
|
-
- -
|
50
|
-
- !ruby/object:Gem::Version
|
51
|
-
version:
|
52
|
-
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
hash: -3094601017742930682
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
hash: -3094601017742930682
|
53
79
|
requirements: []
|
54
|
-
|
55
|
-
|
56
|
-
rubygems_version: 1.3.5
|
80
|
+
rubyforge_project: kdtree
|
81
|
+
rubygems_version: 1.8.21
|
57
82
|
signing_key:
|
58
83
|
specification_version: 3
|
59
|
-
summary: Blazingly fast 2d kdtree.
|
60
|
-
test_files:
|
61
|
-
- test/test.rb
|
84
|
+
summary: Blazingly fast, native 2d kdtree.
|
85
|
+
test_files: []
|
data/test/test.rb
DELETED
@@ -1,138 +0,0 @@
|
|
1
|
-
require "#{File.expand_path(File.dirname(__FILE__))}/../ext/kdtree.o"
|
2
|
-
require "test/unit"
|
3
|
-
require "tempfile"
|
4
|
-
|
5
|
-
#
|
6
|
-
# create a tree
|
7
|
-
#
|
8
|
-
|
9
|
-
class KDTreeTest < Test::Unit::TestCase
|
10
|
-
TMP = "#{Dir.tmpdir}/kdtree_test"
|
11
|
-
|
12
|
-
def test_nearest
|
13
|
-
setup_tree(1000)
|
14
|
-
100.times do
|
15
|
-
pt = [rand_coord, rand_coord]
|
16
|
-
|
17
|
-
# kdtree search
|
18
|
-
id = @kdtree.nearest(pt[0], pt[1])
|
19
|
-
kdpt = @points[id]
|
20
|
-
|
21
|
-
# slow search
|
22
|
-
sortpt = @points.sort_by { |i| distance(i, pt) }.first
|
23
|
-
|
24
|
-
# assert
|
25
|
-
kdd = distance(kdpt, pt)
|
26
|
-
sortd = distance(sortpt, pt)
|
27
|
-
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_nearestk
|
32
|
-
setup_tree(1000)
|
33
|
-
100.times do
|
34
|
-
pt = [rand_coord, rand_coord]
|
35
|
-
|
36
|
-
# kdtree search
|
37
|
-
list = @kdtree.nearestk(pt[0], pt[1], 5)
|
38
|
-
kdpt = @points[list.last]
|
39
|
-
|
40
|
-
# slow search
|
41
|
-
sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
|
42
|
-
|
43
|
-
# assert
|
44
|
-
kdd = distance(kdpt, pt)
|
45
|
-
sortd = distance(sortpt, pt)
|
46
|
-
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_persist
|
51
|
-
setup_tree(1000)
|
52
|
-
|
53
|
-
begin
|
54
|
-
# write
|
55
|
-
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
56
|
-
# read
|
57
|
-
kdtree2 = File.open(TMP, "r") { |f| KDTree.new(f) }
|
58
|
-
|
59
|
-
# now test some random points
|
60
|
-
100.times do
|
61
|
-
pt = [rand_coord, rand_coord]
|
62
|
-
id1 = @kdtree.nearest(*pt)
|
63
|
-
id2 = kdtree2.nearest(*pt)
|
64
|
-
assert(id1 == id2, "kdtree2 differed from kdtree")
|
65
|
-
end
|
66
|
-
ensure
|
67
|
-
File.unlink(TMP)
|
68
|
-
end
|
69
|
-
|
70
|
-
# now test magic problems
|
71
|
-
begin
|
72
|
-
File.open(TMP, "w") { |f| f.puts "That ain't right" }
|
73
|
-
assert_raise RuntimeError do
|
74
|
-
File.open(TMP, "r") { |f| KDTree.new(f) }
|
75
|
-
end
|
76
|
-
ensure
|
77
|
-
File.unlink(TMP)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def dont_test_speed
|
82
|
-
printf("\n")
|
83
|
-
sizes = [1, 100, 1000, 10000, 100000, 1000000]
|
84
|
-
ks = [1, 5, 50, 255]
|
85
|
-
sizes.each do |s|
|
86
|
-
points = (0...s).map { |i| [rand_coord, rand_coord, i] }
|
87
|
-
|
88
|
-
# build
|
89
|
-
tm = Time.now
|
90
|
-
kdtree = KDTree.new(points)
|
91
|
-
printf "build %d took %.6fs\n", s, Time.now - tm
|
92
|
-
|
93
|
-
begin
|
94
|
-
# write
|
95
|
-
tm = Time.now
|
96
|
-
File.open(TMP, "w") { |f| kdtree.persist(f) }
|
97
|
-
printf "write %d took %.6fs\n", s, Time.now - tm
|
98
|
-
# read
|
99
|
-
tm = Time.now
|
100
|
-
File.open(TMP, "r") { |f| KDTree.new(f) }
|
101
|
-
printf "read %d took %.6fs\n", s, Time.now - tm
|
102
|
-
ensure
|
103
|
-
File.unlink(TMP)
|
104
|
-
end
|
105
|
-
|
106
|
-
ks.each do |k|
|
107
|
-
total = count = 0
|
108
|
-
100.times do
|
109
|
-
tm = Time.now
|
110
|
-
if k == 1
|
111
|
-
kdtree.nearest(rand_coord, rand_coord)
|
112
|
-
else
|
113
|
-
kdtree.nearestk(rand_coord, rand_coord, k)
|
114
|
-
end
|
115
|
-
total += Time.now - tm
|
116
|
-
count += 1
|
117
|
-
end
|
118
|
-
printf "avg query time = %.6fs [%d/%d]\n", total / count, s, k
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
protected
|
124
|
-
|
125
|
-
def setup_tree(len)
|
126
|
-
@points = (0...len).map { |i| [rand_coord, rand_coord, i] }
|
127
|
-
@kdtree = KDTree.new(@points)
|
128
|
-
end
|
129
|
-
|
130
|
-
def distance(a, b)
|
131
|
-
x, y = a[0] - b[0], a[1] - b[1]
|
132
|
-
x * x + y * y
|
133
|
-
end
|
134
|
-
|
135
|
-
def rand_coord
|
136
|
-
rand(0) * 10 - 5
|
137
|
-
end
|
138
|
-
end
|