kdtree 0.1 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.travis.yml +9 -0
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +70 -0
- data/Rakefile +41 -0
- data/ext/{extconf.rb → kdtree/extconf.rb} +1 -1
- data/ext/{kdtree.c → kdtree/kdtree.c} +157 -138
- data/kdtree.gemspec +22 -0
- data/lib/kdtree.rb +1 -0
- data/test/test_kdtree.rb +150 -0
- metadata +69 -45
- data/test/test.rb +0 -138
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
CHANGED
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
## Kdtree
|
2
|
+
|
3
|
+
A kd tree is a data structure that recursively partitions the world in order to rapidly answer nearest neighbor queries. A generic kd tree can support any number of dimensions, and can return either the nearest neighbor or a set of N nearest neighbors.
|
4
|
+
|
5
|
+
This gem is a blazingly fast, native, 2d kdtree. It's specifically built to find the nearest neighbor when searching millions of points. It's used in production at Urbanspoon and several other companies.
|
6
|
+
|
7
|
+
The first version of this gem was released back in 2009. See the original [blog post](http://gurge.com/2009/10/22/ruby-nearest-neighbor-fast-kdtree-gem/) for the full story. Wikipedia has a great [article on kdtrees](http://en.wikipedia.org/wiki/K-d_tree).
|
8
|
+
|
9
|
+
### Usage
|
10
|
+
|
11
|
+
Usage is very simple:
|
12
|
+
|
13
|
+
* **Kdtree.new(points)** - construct a new tree. Each point should be of the form `[x, y, id]`, where `x/y` are floats and `id` is an int. Not a string, not an object, just an int.
|
14
|
+
* **kd.nearest(x, y)** - find the nearest point. Returns an id.
|
15
|
+
* **kd.nearestk(x, y, k)** - find the nearest `k` points. Returns an array of ids.
|
16
|
+
|
17
|
+
Also, I made it possible to **persist** the tree to disk and load it later. That way you can calculate the tree offline and load it quickly at some future point. Loading a persisted tree w/ 1 millions points takes less than a second, as opposed to the 3.5 second startup time shown above. For example:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
File.open("treefile", "w") { |f| kd.persist(f) }
|
21
|
+
... later ...
|
22
|
+
kd2 = File.open("treefile") { |f| Kdtree.new(f) }
|
23
|
+
```
|
24
|
+
|
25
|
+
### Performance
|
26
|
+
|
27
|
+
Kdtree is fast. How fast? Using a tree with 1 million points on my i5 2.8ghz:
|
28
|
+
|
29
|
+
```
|
30
|
+
build 3.5s
|
31
|
+
nearest point 0.000003s
|
32
|
+
nearest 5 points 0.000004s
|
33
|
+
nearest 50 points 0.000014s
|
34
|
+
nearest 255 points 0.000063s
|
35
|
+
```
|
36
|
+
|
37
|
+
### Limitations
|
38
|
+
|
39
|
+
* No **editing** allowed! Once you construct a tree you’re stuck with it.
|
40
|
+
* The tree is stored in **one big memory block**, 20 bytes per point. A tree with one million points will allocate a single 19mb block to store its nodes.
|
41
|
+
* Persisted trees are **architecture dependent**, and may not work across different machines due to endian issues.
|
42
|
+
* nearestk is limited to **255 results**
|
43
|
+
|
44
|
+
### Contributors
|
45
|
+
|
46
|
+
Since this gem was originally released, several folks have contributed important patches:
|
47
|
+
|
48
|
+
* @antifuchs (thread safety)
|
49
|
+
* @evanphx (native cleanups, perf)
|
50
|
+
* @ghazel (C89 compliance)
|
51
|
+
* @mcerna (1.9 compat)
|
52
|
+
|
53
|
+
### Changelog
|
54
|
+
|
55
|
+
#### 0.3 (in progress, unreleased)
|
56
|
+
|
57
|
+
* Ruby 1.9.x compatibility (@mcerna and others)
|
58
|
+
* renamed KDTree to the more idiomatic Kdtree
|
59
|
+
* use IO methods directly instead of rooting around in rb_io
|
60
|
+
* thread safe, no more statics (@antifuchs)
|
61
|
+
* C90 compliance, no warnings (@ghazel)
|
62
|
+
* native cleanups (@evanphx)
|
63
|
+
|
64
|
+
#### 0.2
|
65
|
+
|
66
|
+
skipped this version to prevent confusion with other flavors of the gem
|
67
|
+
|
68
|
+
#### 0.1
|
69
|
+
|
70
|
+
* Original release
|
data/Rakefile
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "rake/extensiontask"
|
3
|
+
require "rake/testtask"
|
4
|
+
|
5
|
+
# load the spec, we use it below
|
6
|
+
spec = Gem::Specification.load("kdtree.gemspec")
|
7
|
+
|
8
|
+
#
|
9
|
+
# gem
|
10
|
+
#
|
11
|
+
|
12
|
+
task :build do
|
13
|
+
system "gem build --quiet kdtree.gemspec"
|
14
|
+
end
|
15
|
+
|
16
|
+
task :install => :build do
|
17
|
+
system "sudo gem install --quiet kdtree-#{spec.version}.gem"
|
18
|
+
end
|
19
|
+
|
20
|
+
task :release => :build do
|
21
|
+
system "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
22
|
+
system "git push --tags"
|
23
|
+
system "gem push kdtree-#{spec.version}.gem"
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# rake-compiler
|
28
|
+
#
|
29
|
+
|
30
|
+
Rake::ExtensionTask.new("kdtree", spec)
|
31
|
+
|
32
|
+
|
33
|
+
#
|
34
|
+
# testing
|
35
|
+
#
|
36
|
+
|
37
|
+
Rake::TestTask.new(:test) do |test|
|
38
|
+
test.libs << "test"
|
39
|
+
end
|
40
|
+
task :test => :compile
|
41
|
+
task :default => :test
|
@@ -1,15 +1,10 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
-
#include "rubyio.h"
|
3
|
-
#include "version.h"
|
4
|
-
|
5
|
-
#ifndef HAVE_RB_IO_T
|
6
|
-
#define rb_io_t OpenFile
|
7
|
-
#endif
|
8
2
|
|
9
3
|
//
|
10
4
|
// interface
|
11
5
|
//
|
12
6
|
|
7
|
+
// the tree itself
|
13
8
|
typedef struct kdtree_data
|
14
9
|
{
|
15
10
|
int root;
|
@@ -17,6 +12,7 @@ typedef struct kdtree_data
|
|
17
12
|
struct kdtree_node *nodes;
|
18
13
|
} kdtree_data;
|
19
14
|
|
15
|
+
// a node in the tree
|
20
16
|
typedef struct kdtree_node
|
21
17
|
{
|
22
18
|
float x, y;
|
@@ -25,10 +21,18 @@ typedef struct kdtree_node
|
|
25
21
|
int right;
|
26
22
|
} kdtree_node;
|
27
23
|
|
24
|
+
// a result node from kdtree_nearestk0
|
25
|
+
typedef struct kresult {
|
26
|
+
int index;
|
27
|
+
float distance;
|
28
|
+
} kresult;
|
29
|
+
|
30
|
+
// helper macro for digging out our struct
|
28
31
|
#define KDTREEP \
|
29
32
|
struct kdtree_data *kdtreep; \
|
30
33
|
Data_Get_Struct(kdtree, struct kdtree_data, kdtreep);
|
31
34
|
|
35
|
+
// kdtree public methods
|
32
36
|
static VALUE kdtree_alloc(VALUE klass);
|
33
37
|
static void kdtree_free(struct kdtree_data *kdtreep);
|
34
38
|
static VALUE kdtree_initialize(VALUE kdtree, VALUE points);
|
@@ -37,13 +41,20 @@ static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k);
|
|
37
41
|
static VALUE kdtree_persist(VALUE kdtree, VALUE io);
|
38
42
|
static VALUE kdtree_to_s(VALUE kdtree);
|
39
43
|
|
40
|
-
// helpers
|
44
|
+
// kdtree helpers
|
41
45
|
static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth);
|
42
|
-
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth);
|
43
|
-
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth);
|
46
|
+
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth, int *n_index, float *n_dist);
|
47
|
+
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth, kresult *k_list, int *k_len, float *k_dist);
|
48
|
+
|
49
|
+
// io helpers
|
50
|
+
static void read_all(VALUE io, void *buf, int len);
|
51
|
+
static void write_all(VALUE io, const void *buf, int len);
|
44
52
|
|
45
53
|
#define KDTREE_MAGIC "KdTr"
|
46
54
|
|
55
|
+
// ids
|
56
|
+
static ID id_read, id_write, id_binmode;
|
57
|
+
|
47
58
|
//
|
48
59
|
// implementation
|
49
60
|
//
|
@@ -63,24 +74,12 @@ static void kdtree_free(struct kdtree_data *kdtreep)
|
|
63
74
|
}
|
64
75
|
}
|
65
76
|
|
66
|
-
static void read_all(struct rb_io_t *fptr, char *buf, int len)
|
67
|
-
{
|
68
|
-
while (len > 0) {
|
69
|
-
int n = rb_io_fread(buf, len, fptr->f);
|
70
|
-
if (n == 0) {
|
71
|
-
rb_eof_error();
|
72
|
-
}
|
73
|
-
buf += n;
|
74
|
-
len -= n;
|
75
|
-
}
|
76
|
-
}
|
77
|
-
|
78
77
|
/*
|
79
78
|
* call-seq:
|
80
|
-
*
|
81
|
-
*
|
79
|
+
* Kdtree.new(points) => kdtree
|
80
|
+
* Kdtree.new(io) => kdtree
|
82
81
|
*
|
83
|
-
* Returns a new <code>
|
82
|
+
* Returns a new <code>Kdtree</code>. To construct a tree, pass an array of
|
84
83
|
* <i>points</i>. Each point should be an array of the form <code>[x, y,
|
85
84
|
* id]</code>, where <i>x</i> and <i>y</i> are floats and <i>id</i> is an
|
86
85
|
* integer. The <i>id</i> is arbitrary and will be returned to you whenever you
|
@@ -90,7 +89,7 @@ static void read_all(struct rb_io_t *fptr, char *buf, int len)
|
|
90
89
|
* points = []
|
91
90
|
* points << [47.6, -122.3, 1] # Seattle
|
92
91
|
* points << [40.7, -74.0, 2] # New York
|
93
|
-
* kd =
|
92
|
+
* kd = Kdtree.new(points)
|
94
93
|
*
|
95
94
|
* Alternately, you can pass in an <i>IO</i> object containing a persisted
|
96
95
|
* kdtree. This makes it possible to build the tree in advance, persist it, and
|
@@ -103,104 +102,97 @@ static VALUE kdtree_initialize(VALUE kdtree, VALUE arg)
|
|
103
102
|
if (TYPE(arg) == T_ARRAY) {
|
104
103
|
// init from array of pints
|
105
104
|
VALUE points = arg;
|
105
|
+
int i;
|
106
106
|
kdtreep->len = RARRAY_LEN(points);
|
107
107
|
kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
|
108
108
|
|
109
|
-
int i;
|
110
109
|
for (i = 0; i < RARRAY_LEN(points); ++i) {
|
111
110
|
struct kdtree_node *n = kdtreep->nodes + i;
|
112
|
-
|
113
|
-
VALUE ptr =
|
111
|
+
|
112
|
+
VALUE ptr = rb_ary_entry(points, i);
|
114
113
|
VALUE v = rb_check_array_type(ptr);
|
115
114
|
if (NIL_P(v) || RARRAY_LEN(v) != 3) {
|
116
115
|
continue;
|
117
116
|
}
|
118
|
-
|
119
|
-
n->
|
120
|
-
n->
|
121
|
-
n->id = NUM2INT(a[2]);
|
117
|
+
n->x = NUM2DBL(rb_ary_entry(v, 0));
|
118
|
+
n->y = NUM2DBL(rb_ary_entry(v, 1));
|
119
|
+
n->id = NUM2INT(rb_ary_entry(v, 2));
|
122
120
|
}
|
123
121
|
|
124
122
|
// now build the tree
|
125
123
|
kdtreep->root = kdtree_build(kdtreep, 0, kdtreep->len, 0);
|
126
124
|
} else if (rb_respond_to(arg, rb_intern("read"))) {
|
127
125
|
VALUE io = arg;
|
128
|
-
|
129
|
-
|
126
|
+
char buf[4];
|
127
|
+
if (rb_respond_to(io, id_binmode)) {
|
128
|
+
rb_funcall(io, id_binmode, 0);
|
130
129
|
}
|
131
130
|
|
132
|
-
struct rb_io_t *fptr = RFILE(rb_io_taint_check(io))->fptr;
|
133
|
-
rb_io_check_readable(fptr);
|
134
|
-
|
135
131
|
// check magic
|
136
|
-
|
137
|
-
read_all(fptr, buf, 4);
|
132
|
+
read_all(io, buf, 4);
|
138
133
|
if (memcmp(KDTREE_MAGIC, buf, 4) != 0) {
|
139
134
|
rb_raise(rb_eRuntimeError, "wrong magic number in kdtree file");
|
140
135
|
}
|
141
|
-
|
136
|
+
|
142
137
|
// read start of the struct
|
143
|
-
read_all(
|
138
|
+
read_all(io, kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
|
139
|
+
|
144
140
|
// read the nodes
|
145
141
|
kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
|
146
|
-
read_all(
|
142
|
+
read_all(io, kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
147
143
|
} else {
|
148
|
-
rb_raise(rb_eTypeError, "array or IO required to init
|
144
|
+
rb_raise(rb_eTypeError, "array or IO required to init Kdtree");
|
149
145
|
}
|
150
|
-
|
146
|
+
|
151
147
|
return kdtree;
|
152
148
|
}
|
153
149
|
|
154
150
|
static int comparex(const void *pa, const void *pb)
|
155
151
|
{
|
156
152
|
float a = ((const struct kdtree_node*)pa)->x;
|
157
|
-
float b = ((const struct kdtree_node*)pb)->x;
|
153
|
+
float b = ((const struct kdtree_node*)pb)->x;
|
158
154
|
return (a < b) ? -1 : ((a > b) ? 1 : 0);
|
159
155
|
}
|
160
156
|
|
161
157
|
static int comparey(const void *pa, const void *pb)
|
162
158
|
{
|
163
159
|
float a = ((const struct kdtree_node*)pa)->y;
|
164
|
-
float b = ((const struct kdtree_node*)pb)->y;
|
160
|
+
float b = ((const struct kdtree_node*)pb)->y;
|
165
161
|
return (a < b) ? -1 : ((a > b) ? 1 : 0);
|
166
162
|
}
|
167
163
|
|
168
164
|
static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth)
|
169
165
|
{
|
166
|
+
int(*compar)(const void *, const void *);
|
167
|
+
struct kdtree_node *m;
|
168
|
+
int median;
|
170
169
|
if (max <= min) {
|
171
170
|
return -1;
|
172
171
|
}
|
173
172
|
|
174
173
|
// sort nodes from min to max
|
175
|
-
|
174
|
+
compar = (depth % 2) ? comparex : comparey;
|
176
175
|
qsort(kdtreep->nodes + min, max - min, sizeof(struct kdtree_node), compar);
|
177
176
|
|
178
|
-
|
179
|
-
|
177
|
+
median = (min + max) / 2;
|
178
|
+
m = kdtreep->nodes + median;
|
180
179
|
m->left = kdtree_build(kdtreep, min, median, depth + 1);
|
181
180
|
m->right = kdtree_build(kdtreep, median + 1, max, depth + 1);
|
182
181
|
return median;
|
183
182
|
}
|
184
183
|
|
185
|
-
//
|
186
|
-
// nearest
|
187
|
-
//
|
188
|
-
|
189
|
-
static int n_index;
|
190
|
-
static float n_dist;
|
191
|
-
|
192
184
|
/*
|
193
185
|
* call-seq:
|
194
186
|
* kd.nearest(x, y) => id
|
195
187
|
*
|
196
188
|
* Finds the point closest to <i>x</i>, <i>y</i> and returns the id for that
|
197
189
|
* point. Returns -1 if the tree is empty.
|
198
|
-
*
|
190
|
+
*
|
199
191
|
* points = []
|
200
192
|
* points << [47.6, -122.3, 1] # Seattle
|
201
193
|
* points << [40.7, -74.0, 2] # New York
|
202
|
-
* kd =
|
203
|
-
*
|
194
|
+
* kd = Kdtree.new(points)
|
195
|
+
*
|
204
196
|
* # which city is closest to Portland?
|
205
197
|
* kd.nearest(45.5, -122.8) #=> 1
|
206
198
|
* # which city is closest to Boston?
|
@@ -208,52 +200,59 @@ static float n_dist;
|
|
208
200
|
*/
|
209
201
|
static VALUE kdtree_nearest(VALUE kdtree, VALUE x, VALUE y)
|
210
202
|
{
|
203
|
+
int n_index;
|
204
|
+
float n_dist;
|
211
205
|
KDTREEP;
|
212
206
|
|
213
207
|
n_index = -1;
|
214
208
|
n_dist = INT_MAX;
|
215
|
-
|
209
|
+
|
210
|
+
kdtree_nearest0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), 0, &n_index, &n_dist);
|
216
211
|
if (n_index == -1) {
|
217
212
|
return -1;
|
218
213
|
}
|
219
214
|
return INT2NUM((kdtreep->nodes + n_index)->id);
|
220
215
|
}
|
221
216
|
|
222
|
-
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth)
|
217
|
+
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth, int *n_index, float *n_dist)
|
223
218
|
{
|
219
|
+
struct kdtree_node *n;
|
220
|
+
float ad;
|
221
|
+
int near, far;
|
222
|
+
float dx;
|
223
|
+
|
224
224
|
if (i == -1) {
|
225
225
|
return;
|
226
226
|
}
|
227
|
-
|
228
|
-
struct kdtree_node *n = kdtreep->nodes + i;
|
229
227
|
|
230
|
-
|
228
|
+
n = kdtreep->nodes + i;
|
229
|
+
|
230
|
+
ad = (depth % 2) ? (x - n->x) : (y - n->y);
|
231
231
|
|
232
232
|
//
|
233
233
|
// recurse near, and perhaps far as well
|
234
234
|
//
|
235
|
-
|
236
|
-
int near, far;
|
235
|
+
|
237
236
|
if (ad <= 0) {
|
238
237
|
near = n->left; far = n->right;
|
239
238
|
} else {
|
240
239
|
near = n->right; far = n->left;
|
241
240
|
}
|
242
|
-
kdtree_nearest0(kdtreep, near, x, y, depth + 1);
|
243
|
-
if (ad * ad < n_dist) {
|
244
|
-
kdtree_nearest0(kdtreep, far, x, y, depth + 1);
|
241
|
+
kdtree_nearest0(kdtreep, near, x, y, depth + 1, n_index, n_dist);
|
242
|
+
if (ad * ad < *n_dist) {
|
243
|
+
kdtree_nearest0(kdtreep, far, x, y, depth + 1, n_index, n_dist);
|
245
244
|
}
|
246
245
|
|
247
246
|
//
|
248
247
|
// do we beat the old distance?
|
249
248
|
//
|
250
|
-
|
251
|
-
|
252
|
-
if (dx < n_dist) {
|
249
|
+
|
250
|
+
dx = (x - n->x) * (x - n->x);
|
251
|
+
if (dx < *n_dist) {
|
253
252
|
float d = dx + ((y - n->y) * (y - n->y));
|
254
|
-
if (d < n_dist) {
|
255
|
-
n_index = i;
|
256
|
-
n_dist = d;
|
253
|
+
if (d < *n_dist) {
|
254
|
+
*n_index = i;
|
255
|
+
*n_dist = d;
|
257
256
|
}
|
258
257
|
}
|
259
258
|
}
|
@@ -264,15 +263,6 @@ static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y
|
|
264
263
|
|
265
264
|
#define MAX_K 255
|
266
265
|
|
267
|
-
typedef struct kresult {
|
268
|
-
int index;
|
269
|
-
float distance;
|
270
|
-
} kresult;
|
271
|
-
// note I leave an extra slot here at the end because of the way our binary insert works
|
272
|
-
static struct kresult k_list[MAX_K + 1];
|
273
|
-
static int k_len;
|
274
|
-
static float k_dist;
|
275
|
-
|
276
266
|
/*
|
277
267
|
* call-seq:
|
278
268
|
* kd.nearestk(x, y, k) => array
|
@@ -280,77 +270,84 @@ static float k_dist;
|
|
280
270
|
* Finds the <i>k</i> points closest to <i>x</i>, <i>y</i>. Returns an array of
|
281
271
|
* ids, sorted by distance. Returns an empty array if the tree is empty. Note
|
282
272
|
* that <i>k</i> is capped at 255.
|
283
|
-
*
|
273
|
+
*
|
284
274
|
* points = []
|
285
275
|
* points << [47.6, -122.3, 1] # Seattle
|
286
276
|
* points << [45.5, -122.8, 2] # Portland
|
287
277
|
* points << [40.7, -74.0, 3] # New York
|
288
|
-
* kd =
|
289
|
-
*
|
278
|
+
* kd = Kdtree.new(points)
|
279
|
+
*
|
290
280
|
* # which two cities are closest to San Francisco?
|
291
281
|
* kd.nearest(34.1, -118.2) #=> [2, 1]
|
292
282
|
*/
|
293
283
|
static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k)
|
294
284
|
{
|
285
|
+
// note I leave an extra slot here at the end because of the way our binary insert works
|
286
|
+
kresult k_list[MAX_K + 1];
|
287
|
+
int k_len = 0;
|
288
|
+
float k_dist = INT_MAX;
|
289
|
+
int ki = NUM2INT(k);
|
290
|
+
VALUE ary;
|
291
|
+
int i;
|
295
292
|
KDTREEP;
|
296
293
|
|
297
|
-
k_len = 0;
|
298
|
-
k_dist = INT_MAX;
|
299
|
-
|
300
|
-
int ki = NUM2INT(k);
|
301
294
|
if (ki < 1) {
|
302
295
|
ki = 1;
|
303
296
|
} else if (ki > MAX_K) {
|
304
297
|
ki = MAX_K;
|
305
298
|
}
|
306
|
-
kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0);
|
299
|
+
kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0, k_list, &k_len, &k_dist);
|
307
300
|
|
308
301
|
// convert result to ruby array
|
309
|
-
|
310
|
-
int i;
|
302
|
+
ary = rb_ary_new();
|
311
303
|
for (i = 0; i < k_len; ++i) {
|
312
304
|
rb_ary_push(ary, INT2NUM(kdtreep->nodes[k_list[i].index].id));
|
313
305
|
}
|
314
306
|
return ary;
|
315
307
|
}
|
316
308
|
|
317
|
-
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth)
|
309
|
+
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth, kresult *k_list, int *k_len, float *k_dist)
|
318
310
|
{
|
311
|
+
struct kdtree_node *n;
|
312
|
+
float ad;
|
313
|
+
int near, far;
|
314
|
+
float dx;
|
315
|
+
int lo, hi;
|
316
|
+
|
319
317
|
if (i == -1) {
|
320
318
|
return;
|
321
319
|
}
|
322
|
-
|
323
|
-
struct kdtree_node *n = kdtreep->nodes + i;
|
324
320
|
|
325
|
-
|
321
|
+
n = kdtreep->nodes + i;
|
322
|
+
|
323
|
+
ad = (depth % 2) ? (x - n->x) : (y - n->y);
|
326
324
|
|
327
325
|
//
|
328
326
|
// recurse near, and then perhaps far as well
|
329
327
|
//
|
330
|
-
|
331
|
-
int near, far;
|
328
|
+
|
332
329
|
if (ad <= 0) {
|
333
330
|
near = n->left; far = n->right;
|
334
331
|
} else {
|
335
332
|
near = n->right; far = n->left;
|
336
333
|
}
|
337
|
-
kdtree_nearestk0(kdtreep, near, x, y, k, depth + 1);
|
338
|
-
if (ad * ad < k_dist) {
|
339
|
-
kdtree_nearestk0(kdtreep, far, x, y, k, depth + 1);
|
334
|
+
kdtree_nearestk0(kdtreep, near, x, y, k, depth + 1, k_list, k_len, k_dist);
|
335
|
+
if (ad * ad < *k_dist) {
|
336
|
+
kdtree_nearestk0(kdtreep, far, x, y, k, depth + 1, k_list, k_len, k_dist);
|
340
337
|
}
|
341
338
|
|
342
339
|
//
|
343
340
|
// do we beat the old distance?
|
344
341
|
//
|
345
|
-
|
346
|
-
|
347
|
-
if (dx < k_dist) {
|
342
|
+
|
343
|
+
dx = (x - n->x) * (x - n->x);
|
344
|
+
if (dx < *k_dist) {
|
348
345
|
float d = dx + ((y - n->y) * (y - n->y));
|
349
|
-
if (d < k_dist) {
|
346
|
+
if (d < *k_dist) {
|
350
347
|
//
|
351
348
|
// find spot to insert
|
352
349
|
//
|
353
|
-
|
350
|
+
lo = 0, hi = *k_len;
|
354
351
|
while (lo < hi) {
|
355
352
|
int mid = (lo + hi) / 2;
|
356
353
|
if (k_list[mid].distance < d) {
|
@@ -363,21 +360,21 @@ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float
|
|
363
360
|
//
|
364
361
|
// insert
|
365
362
|
//
|
366
|
-
|
367
|
-
memmove(k_list + lo + 1, k_list + lo, (k_len - lo) * sizeof(struct kresult));
|
363
|
+
|
364
|
+
memmove(k_list + lo + 1, k_list + lo, (*k_len - lo) * sizeof(struct kresult));
|
368
365
|
k_list[lo].index = i;
|
369
366
|
k_list[lo].distance = d;
|
370
367
|
|
371
368
|
//
|
372
369
|
// adjust len/dist if necessary
|
373
370
|
//
|
374
|
-
|
375
|
-
if (k_len < k) {
|
376
|
-
++k_len;
|
371
|
+
|
372
|
+
if (*k_len < k) {
|
373
|
+
++(*k_len);
|
377
374
|
} else {
|
378
|
-
k_dist = k_list[k - 1].distance;
|
375
|
+
*k_dist = k_list[k - 1].distance;
|
379
376
|
}
|
380
|
-
|
377
|
+
}
|
381
378
|
}
|
382
379
|
}
|
383
380
|
|
@@ -386,43 +383,42 @@ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float
|
|
386
383
|
* kd.persist(io)
|
387
384
|
*
|
388
385
|
* Writes the tree out to <i>io</i> so you can quickly load it later with
|
389
|
-
*
|
386
|
+
* Kdtree.new. This avoids the startup cost of initializing a tree. Apart from a
|
390
387
|
* small header, the size of the file is proportional to the number of points,
|
391
388
|
* requiring 20 bytes per point.
|
392
389
|
*
|
393
390
|
* This file is <b>NOT PORTABLE</b> across different architectures due to endian
|
394
391
|
* issues.
|
395
|
-
*
|
392
|
+
*
|
396
393
|
* points = []
|
397
394
|
* points << [47.6, -122.3, 1] # Seattle
|
398
395
|
* points << [45.5, -122.8, 2] # Portland
|
399
396
|
* points << [40.7, -74.0, 3] # New York
|
400
|
-
* kd =
|
397
|
+
* kd = Kdtree.new(points)
|
401
398
|
*
|
402
399
|
* # persist the tree to disk
|
403
400
|
* File.open("treefile", "w") { |f| kd.persist(f) }
|
404
401
|
*
|
405
402
|
* ...
|
406
|
-
*
|
403
|
+
*
|
407
404
|
* # later, read the tree from disk
|
408
|
-
* kd2 = File.open("treefile") { |f|
|
405
|
+
* kd2 = File.open("treefile") { |f| Kdtree.new(f) }
|
409
406
|
*/
|
410
407
|
static VALUE kdtree_persist(VALUE kdtree, VALUE io)
|
411
408
|
{
|
409
|
+
VALUE str;
|
412
410
|
KDTREEP;
|
413
|
-
|
411
|
+
|
414
412
|
if (!rb_respond_to(io, rb_intern("write"))) {
|
415
413
|
rb_raise(rb_eTypeError, "instance of IO needed");
|
416
414
|
}
|
417
|
-
if (rb_respond_to(io,
|
418
|
-
|
415
|
+
if (rb_respond_to(io, id_binmode)) {
|
416
|
+
rb_funcall(io, id_binmode, 0);
|
419
417
|
}
|
420
418
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
rb_str_buf_cat(str, (char*)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
425
|
-
rb_io_write(io, str);
|
419
|
+
write_all(io, KDTREE_MAGIC, 4);
|
420
|
+
write_all(io, kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
|
421
|
+
write_all(io, kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
426
422
|
return io;
|
427
423
|
}
|
428
424
|
|
@@ -434,22 +430,40 @@ static VALUE kdtree_persist(VALUE kdtree, VALUE io)
|
|
434
430
|
*/
|
435
431
|
static VALUE kdtree_to_s(VALUE kdtree)
|
436
432
|
{
|
433
|
+
char buf[256];
|
437
434
|
KDTREEP;
|
438
435
|
|
439
|
-
char buf[256];
|
440
436
|
sprintf(buf, "#<%s:%p nodes=%d>", rb_obj_classname(kdtree), (void*)kdtree, kdtreep->len);
|
441
437
|
return rb_str_new(buf, strlen(buf));
|
442
438
|
}
|
443
439
|
|
440
|
+
//
|
441
|
+
// io helpers
|
442
|
+
//
|
443
|
+
|
444
|
+
static void read_all(VALUE io, void *buf, int len)
|
445
|
+
{
|
446
|
+
VALUE string = rb_funcall(io, id_read, 1, INT2NUM(len));
|
447
|
+
if (NIL_P(string) || RSTRING_LEN(string) != len) {
|
448
|
+
rb_raise(rb_eEOFError, "end of file reached");
|
449
|
+
}
|
450
|
+
memcpy(buf, RSTRING_PTR(string), len);
|
451
|
+
}
|
452
|
+
|
453
|
+
static void write_all(VALUE io, const void *buf, int len)
|
454
|
+
{
|
455
|
+
rb_funcall(io, id_write, 1, rb_str_new(buf, len));
|
456
|
+
}
|
457
|
+
|
444
458
|
//
|
445
459
|
// entry point
|
446
460
|
//
|
447
461
|
|
448
462
|
/*
|
449
|
-
*
|
463
|
+
* Kdtree is an insanely fast data structure for finding the nearest
|
450
464
|
* neighbor(s) to a given point. This implementation only supports 2d
|
451
465
|
* points. Also, it only supports static points - there is no way to edit the
|
452
|
-
* tree after it has been initialized.
|
466
|
+
* tree after it has been initialized. Kdtree should scale to millions of
|
453
467
|
* points, though it's only been tested with around 1 million.
|
454
468
|
*
|
455
469
|
* Once the tree is constructed, it can be searched with nearest and nearestk.
|
@@ -462,8 +476,8 @@ static VALUE kdtree_to_s(VALUE kdtree)
|
|
462
476
|
* points << [47.6, -122.3, 1] # Seattle
|
463
477
|
* points << [45.5, -122.8, 2] # Portland
|
464
478
|
* points << [40.7, -74.0, 3] # New York
|
465
|
-
* kd =
|
466
|
-
*
|
479
|
+
* kd = Kdtree.new(points)
|
480
|
+
*
|
467
481
|
* # which city is closest to San Francisco?
|
468
482
|
* kd.nearest(34.1, -118.2) #=> 2
|
469
483
|
* # which two cities are closest to San Francisco?
|
@@ -477,12 +491,17 @@ void Init_kdtree()
|
|
477
491
|
{
|
478
492
|
static VALUE clazz;
|
479
493
|
|
480
|
-
clazz = rb_define_class("
|
481
|
-
|
482
|
-
rb_define_alloc_func(clazz, kdtree_alloc);
|
494
|
+
clazz = rb_define_class("Kdtree", rb_cObject);
|
495
|
+
|
496
|
+
rb_define_alloc_func(clazz, kdtree_alloc);
|
483
497
|
rb_define_method(clazz, "initialize", kdtree_initialize, 1);
|
484
498
|
rb_define_method(clazz, "nearest", kdtree_nearest, 2);
|
485
499
|
rb_define_method(clazz, "nearestk", kdtree_nearestk, 3);
|
486
|
-
rb_define_method(clazz, "persist", kdtree_persist, 1);
|
487
|
-
rb_define_method(clazz, "to_s", kdtree_to_s, 0);
|
500
|
+
rb_define_method(clazz, "persist", kdtree_persist, 1);
|
501
|
+
rb_define_method(clazz, "to_s", kdtree_to_s, 0);
|
502
|
+
|
503
|
+
// function ids
|
504
|
+
id_binmode = rb_intern("binmode");
|
505
|
+
id_read = rb_intern("read");
|
506
|
+
id_write = rb_intern("write");
|
488
507
|
}
|
data/kdtree.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "kdtree"
|
3
|
+
s.version = "0.3"
|
4
|
+
|
5
|
+
s.authors = ["Adam Doppelt"]
|
6
|
+
s.email = ["amd@gurge.com"]
|
7
|
+
s.homepage = "http://github.com/gurgeous/kdtree"
|
8
|
+
s.summary = "Blazingly fast, native 2d kdtree."
|
9
|
+
s.description = <<EOF
|
10
|
+
A kdtree is a data structure that makes it possible to quickly solve
|
11
|
+
the nearest neighbor problem. This is a native 2d kdtree suitable for
|
12
|
+
production use with millions of points.
|
13
|
+
EOF
|
14
|
+
|
15
|
+
s.rubyforge_project = "kdtree"
|
16
|
+
s.add_development_dependency "rake-compiler"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.extensions = ["ext/kdtree/extconf.rb"]
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
end
|
data/lib/kdtree.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "kdtree.so"
|
data/test/test_kdtree.rb
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require "benchmark"
|
2
|
+
require "kdtree"
|
3
|
+
require "tempfile"
|
4
|
+
require "test/unit"
|
5
|
+
|
6
|
+
#
|
7
|
+
# create a tree
|
8
|
+
#
|
9
|
+
|
10
|
+
class KdtreeTest < Test::Unit::TestCase
|
11
|
+
TMP = "#{Dir.tmpdir}/kdtree_test"
|
12
|
+
|
13
|
+
def setup
|
14
|
+
@points = (0...1000).map { |i| [rand_coord, rand_coord, i] }
|
15
|
+
@kdtree = Kdtree.new(@points)
|
16
|
+
end
|
17
|
+
|
18
|
+
def teardown
|
19
|
+
File.unlink(TMP) if File.exists?(TMP)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_nearest
|
23
|
+
100.times do
|
24
|
+
pt = [rand_coord, rand_coord]
|
25
|
+
|
26
|
+
# kdtree search
|
27
|
+
id = @kdtree.nearest(pt[0], pt[1])
|
28
|
+
kdpt = @points[id]
|
29
|
+
|
30
|
+
# slow search
|
31
|
+
sortpt = @points.sort_by { |i| distance(i, pt) }.first
|
32
|
+
|
33
|
+
# assert
|
34
|
+
kdd = distance(kdpt, pt)
|
35
|
+
sortd = distance(sortpt, pt)
|
36
|
+
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_nearestk
|
41
|
+
100.times do
|
42
|
+
pt = [rand_coord, rand_coord]
|
43
|
+
|
44
|
+
# kdtree search
|
45
|
+
list = @kdtree.nearestk(pt[0], pt[1], 5)
|
46
|
+
kdpt = @points[list.last]
|
47
|
+
|
48
|
+
# slow search
|
49
|
+
sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
|
50
|
+
|
51
|
+
# assert
|
52
|
+
kdd = distance(kdpt, pt)
|
53
|
+
sortd = distance(sortpt, pt)
|
54
|
+
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_persist
|
59
|
+
# write
|
60
|
+
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
61
|
+
# read
|
62
|
+
kdtree2 = File.open(TMP, "r") { |f| Kdtree.new(f) }
|
63
|
+
|
64
|
+
# now test some random points
|
65
|
+
100.times do
|
66
|
+
pt = [rand_coord, rand_coord]
|
67
|
+
id1 = @kdtree.nearest(*pt)
|
68
|
+
id2 = kdtree2.nearest(*pt)
|
69
|
+
assert(id1 == id2, "kdtree2 differed from kdtree")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_bad_magic
|
74
|
+
File.open(TMP, "w") { |f| f.puts "That ain't right" }
|
75
|
+
assert_raise RuntimeError do
|
76
|
+
File.open(TMP, "r") { |f| Kdtree.new(f) }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_eof
|
81
|
+
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
82
|
+
bytes = File.read(TMP)
|
83
|
+
|
84
|
+
[2, 10, 100].each do |len|
|
85
|
+
File.open(TMP, "w") { |f| f.write(bytes[0, len]) }
|
86
|
+
assert_raise EOFError do
|
87
|
+
File.open(TMP, "r") { |f| Kdtree.new(f) }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def dont_test_speed
|
93
|
+
sizes = [1, 100, 1000, 10000, 100000, 1000000]
|
94
|
+
ks = [1, 5, 50, 255]
|
95
|
+
sizes.each do |s|
|
96
|
+
points = (0...s).map { |i| [rand_coord, rand_coord, i] }
|
97
|
+
|
98
|
+
# build
|
99
|
+
Benchmark.bm(17) do |bm|
|
100
|
+
kdtree = nil
|
101
|
+
bm.report "build" do
|
102
|
+
kdtree = Kdtree.new(points)
|
103
|
+
end
|
104
|
+
bm.report "persist" do
|
105
|
+
File.open(TMP, "w") { |f| kdtree.persist(f) }
|
106
|
+
end
|
107
|
+
bm.report "read" do
|
108
|
+
File.open(TMP, "r") { |f| Kdtree.new(f) }
|
109
|
+
end
|
110
|
+
|
111
|
+
ks.each do |k|
|
112
|
+
bm.report "100 queries (#{k})" do
|
113
|
+
total = count = 0
|
114
|
+
100.times do
|
115
|
+
tm = Time.now
|
116
|
+
if k == 1
|
117
|
+
kdtree.nearest(rand_coord, rand_coord)
|
118
|
+
else
|
119
|
+
kdtree.nearestk(rand_coord, rand_coord, k)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
puts
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
protected
|
130
|
+
|
131
|
+
def distance(a, b)
|
132
|
+
x, y = a[0] - b[0], a[1] - b[1]
|
133
|
+
x * x + y * y
|
134
|
+
end
|
135
|
+
|
136
|
+
def rand_coord
|
137
|
+
rand(0) * 10 - 5
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# running dont_test_speed on my i5 2.8ghz:
|
142
|
+
#
|
143
|
+
# user system total real
|
144
|
+
# build 3.350000 0.020000 3.370000 ( 3.520528)
|
145
|
+
# persist 0.150000 0.020000 0.170000 ( 0.301963)
|
146
|
+
# read 0.280000 0.000000 0.280000 ( 0.432676)
|
147
|
+
# 100 queries (1) 0.000000 0.000000 0.000000 ( 0.000319)
|
148
|
+
# 100 queries (5) 0.000000 0.000000 0.000000 ( 0.000412)
|
149
|
+
# 100 queries (50) 0.000000 0.000000 0.000000 ( 0.001417)
|
150
|
+
# 100 queries (255) 0.000000 0.000000 0.000000 ( 0.006268)
|
metadata
CHANGED
@@ -1,61 +1,85 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: kdtree
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.3'
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
+
authors:
|
7
8
|
- Adam Doppelt
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
12
|
+
date: 2012-10-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: ! 'A kdtree is a data structure that makes it possible to quickly solve
|
11
31
|
|
12
|
-
|
13
|
-
default_executable:
|
14
|
-
dependencies: []
|
32
|
+
the nearest neighbor problem. This is a native 2d kdtree suitable for
|
15
33
|
|
16
|
-
|
17
|
-
email: amd@gurge.com
|
18
|
-
executables: []
|
34
|
+
production use with millions of points.
|
19
35
|
|
20
|
-
|
21
|
-
|
36
|
+
'
|
37
|
+
email:
|
38
|
+
- amd@gurge.com
|
39
|
+
executables: []
|
40
|
+
extensions:
|
41
|
+
- ext/kdtree/extconf.rb
|
22
42
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
|
25
|
-
-
|
26
|
-
-
|
43
|
+
files:
|
44
|
+
- .gitignore
|
45
|
+
- .travis.yml
|
46
|
+
- Gemfile
|
27
47
|
- LICENSE
|
28
|
-
-
|
29
|
-
|
30
|
-
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- ext/kdtree/extconf.rb
|
51
|
+
- ext/kdtree/kdtree.c
|
52
|
+
- kdtree.gemspec
|
53
|
+
- lib/kdtree.rb
|
54
|
+
- test/test_kdtree.rb
|
55
|
+
homepage: http://github.com/gurgeous/kdtree
|
31
56
|
licenses: []
|
32
|
-
|
33
57
|
post_install_message:
|
34
|
-
rdoc_options:
|
35
|
-
|
36
|
-
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
requirements:
|
49
|
-
- -
|
50
|
-
- !ruby/object:Gem::Version
|
51
|
-
version:
|
52
|
-
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
hash: -3094601017742930682
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
hash: -3094601017742930682
|
53
79
|
requirements: []
|
54
|
-
|
55
|
-
|
56
|
-
rubygems_version: 1.3.5
|
80
|
+
rubyforge_project: kdtree
|
81
|
+
rubygems_version: 1.8.21
|
57
82
|
signing_key:
|
58
83
|
specification_version: 3
|
59
|
-
summary: Blazingly fast 2d kdtree.
|
60
|
-
test_files:
|
61
|
-
- test/test.rb
|
84
|
+
summary: Blazingly fast, native 2d kdtree.
|
85
|
+
test_files: []
|
data/test/test.rb
DELETED
@@ -1,138 +0,0 @@
|
|
1
|
-
require "#{File.expand_path(File.dirname(__FILE__))}/../ext/kdtree.o"
|
2
|
-
require "test/unit"
|
3
|
-
require "tempfile"
|
4
|
-
|
5
|
-
#
|
6
|
-
# create a tree
|
7
|
-
#
|
8
|
-
|
9
|
-
class KDTreeTest < Test::Unit::TestCase
|
10
|
-
TMP = "#{Dir.tmpdir}/kdtree_test"
|
11
|
-
|
12
|
-
def test_nearest
|
13
|
-
setup_tree(1000)
|
14
|
-
100.times do
|
15
|
-
pt = [rand_coord, rand_coord]
|
16
|
-
|
17
|
-
# kdtree search
|
18
|
-
id = @kdtree.nearest(pt[0], pt[1])
|
19
|
-
kdpt = @points[id]
|
20
|
-
|
21
|
-
# slow search
|
22
|
-
sortpt = @points.sort_by { |i| distance(i, pt) }.first
|
23
|
-
|
24
|
-
# assert
|
25
|
-
kdd = distance(kdpt, pt)
|
26
|
-
sortd = distance(sortpt, pt)
|
27
|
-
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_nearestk
|
32
|
-
setup_tree(1000)
|
33
|
-
100.times do
|
34
|
-
pt = [rand_coord, rand_coord]
|
35
|
-
|
36
|
-
# kdtree search
|
37
|
-
list = @kdtree.nearestk(pt[0], pt[1], 5)
|
38
|
-
kdpt = @points[list.last]
|
39
|
-
|
40
|
-
# slow search
|
41
|
-
sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
|
42
|
-
|
43
|
-
# assert
|
44
|
-
kdd = distance(kdpt, pt)
|
45
|
-
sortd = distance(sortpt, pt)
|
46
|
-
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_persist
|
51
|
-
setup_tree(1000)
|
52
|
-
|
53
|
-
begin
|
54
|
-
# write
|
55
|
-
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
56
|
-
# read
|
57
|
-
kdtree2 = File.open(TMP, "r") { |f| KDTree.new(f) }
|
58
|
-
|
59
|
-
# now test some random points
|
60
|
-
100.times do
|
61
|
-
pt = [rand_coord, rand_coord]
|
62
|
-
id1 = @kdtree.nearest(*pt)
|
63
|
-
id2 = kdtree2.nearest(*pt)
|
64
|
-
assert(id1 == id2, "kdtree2 differed from kdtree")
|
65
|
-
end
|
66
|
-
ensure
|
67
|
-
File.unlink(TMP)
|
68
|
-
end
|
69
|
-
|
70
|
-
# now test magic problems
|
71
|
-
begin
|
72
|
-
File.open(TMP, "w") { |f| f.puts "That ain't right" }
|
73
|
-
assert_raise RuntimeError do
|
74
|
-
File.open(TMP, "r") { |f| KDTree.new(f) }
|
75
|
-
end
|
76
|
-
ensure
|
77
|
-
File.unlink(TMP)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def dont_test_speed
|
82
|
-
printf("\n")
|
83
|
-
sizes = [1, 100, 1000, 10000, 100000, 1000000]
|
84
|
-
ks = [1, 5, 50, 255]
|
85
|
-
sizes.each do |s|
|
86
|
-
points = (0...s).map { |i| [rand_coord, rand_coord, i] }
|
87
|
-
|
88
|
-
# build
|
89
|
-
tm = Time.now
|
90
|
-
kdtree = KDTree.new(points)
|
91
|
-
printf "build %d took %.6fs\n", s, Time.now - tm
|
92
|
-
|
93
|
-
begin
|
94
|
-
# write
|
95
|
-
tm = Time.now
|
96
|
-
File.open(TMP, "w") { |f| kdtree.persist(f) }
|
97
|
-
printf "write %d took %.6fs\n", s, Time.now - tm
|
98
|
-
# read
|
99
|
-
tm = Time.now
|
100
|
-
File.open(TMP, "r") { |f| KDTree.new(f) }
|
101
|
-
printf "read %d took %.6fs\n", s, Time.now - tm
|
102
|
-
ensure
|
103
|
-
File.unlink(TMP)
|
104
|
-
end
|
105
|
-
|
106
|
-
ks.each do |k|
|
107
|
-
total = count = 0
|
108
|
-
100.times do
|
109
|
-
tm = Time.now
|
110
|
-
if k == 1
|
111
|
-
kdtree.nearest(rand_coord, rand_coord)
|
112
|
-
else
|
113
|
-
kdtree.nearestk(rand_coord, rand_coord, k)
|
114
|
-
end
|
115
|
-
total += Time.now - tm
|
116
|
-
count += 1
|
117
|
-
end
|
118
|
-
printf "avg query time = %.6fs [%d/%d]\n", total / count, s, k
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
protected
|
124
|
-
|
125
|
-
def setup_tree(len)
|
126
|
-
@points = (0...len).map { |i| [rand_coord, rand_coord, i] }
|
127
|
-
@kdtree = KDTree.new(@points)
|
128
|
-
end
|
129
|
-
|
130
|
-
def distance(a, b)
|
131
|
-
x, y = a[0] - b[0], a[1] - b[1]
|
132
|
-
x * x + y * y
|
133
|
-
end
|
134
|
-
|
135
|
-
def rand_coord
|
136
|
-
rand(0) * 10 - 5
|
137
|
-
end
|
138
|
-
end
|