ghazel-kdtree 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/ext/extconf.rb +3 -0
- data/ext/kdtree.c +503 -0
- data/test/test.rb +138 -0
- metadata +61 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Adam Doppelt
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/ext/extconf.rb
ADDED
data/ext/kdtree.c
ADDED
@@ -0,0 +1,503 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "rubyio.h"
|
3
|
+
#include "version.h"
|
4
|
+
|
5
|
+
#ifndef HAVE_RB_IO_T
|
6
|
+
#define rb_io_t OpenFile
|
7
|
+
#endif
|
8
|
+
|
9
|
+
//
|
10
|
+
// interface
|
11
|
+
//
|
12
|
+
|
13
|
+
typedef struct kdtree_data
|
14
|
+
{
|
15
|
+
int root;
|
16
|
+
int len;
|
17
|
+
struct kdtree_node *nodes;
|
18
|
+
} kdtree_data;
|
19
|
+
|
20
|
+
typedef struct kdtree_node
|
21
|
+
{
|
22
|
+
float x, y;
|
23
|
+
int id;
|
24
|
+
int left;
|
25
|
+
int right;
|
26
|
+
} kdtree_node;
|
27
|
+
|
28
|
+
#define KDTREEP \
|
29
|
+
struct kdtree_data *kdtreep; \
|
30
|
+
Data_Get_Struct(kdtree, struct kdtree_data, kdtreep);
|
31
|
+
|
32
|
+
static VALUE kdtree_alloc(VALUE klass);
|
33
|
+
static void kdtree_free(struct kdtree_data *kdtreep);
|
34
|
+
static VALUE kdtree_initialize(VALUE kdtree, VALUE points);
|
35
|
+
static VALUE kdtree_nearest(VALUE kdtree, VALUE x, VALUE y);
|
36
|
+
static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k);
|
37
|
+
static VALUE kdtree_persist(VALUE kdtree, VALUE io);
|
38
|
+
static VALUE kdtree_to_s(VALUE kdtree);
|
39
|
+
|
40
|
+
// helpers
|
41
|
+
static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth);
|
42
|
+
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth);
|
43
|
+
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth);
|
44
|
+
|
45
|
+
#define KDTREE_MAGIC "KdTr"
|
46
|
+
|
47
|
+
//
|
48
|
+
// implementation
|
49
|
+
//
|
50
|
+
|
51
|
+
static VALUE kdtree_alloc(VALUE klass)
|
52
|
+
{
|
53
|
+
struct kdtree_data *kdtreep;
|
54
|
+
VALUE obj = Data_Make_Struct(klass, struct kdtree_data, 0, kdtree_free, kdtreep);
|
55
|
+
kdtreep->root = -1;
|
56
|
+
return obj;
|
57
|
+
}
|
58
|
+
|
59
|
+
static void kdtree_free(struct kdtree_data *kdtreep)
|
60
|
+
{
|
61
|
+
if (kdtreep) {
|
62
|
+
free(kdtreep->nodes);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
static void read_all(struct rb_io_t *fptr, char *buf, int len)
|
67
|
+
{
|
68
|
+
while (len > 0) {
|
69
|
+
int n = rb_io_fread(buf, len, fptr->f);
|
70
|
+
if (n == 0) {
|
71
|
+
rb_eof_error();
|
72
|
+
}
|
73
|
+
buf += n;
|
74
|
+
len -= n;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
/*
|
79
|
+
* call-seq:
|
80
|
+
* KDTree.new(points) => kdtree
|
81
|
+
* KDTree.new(io) => kdtree
|
82
|
+
*
|
83
|
+
* Returns a new <code>KDTree</code>. To construct a tree, pass an array of
|
84
|
+
* <i>points</i>. Each point should be an array of the form <code>[x, y,
|
85
|
+
* id]</code>, where <i>x</i> and <i>y</i> are floats and <i>id</i> is an
|
86
|
+
* integer. The <i>id</i> is arbitrary and will be returned to you whenever you
|
87
|
+
* search with nearest or nearestk.
|
88
|
+
*
|
89
|
+
* # create a new tree
|
90
|
+
* points = []
|
91
|
+
* points << [47.6, -122.3, 1] # Seattle
|
92
|
+
* points << [40.7, -74.0, 2] # New York
|
93
|
+
* kd = KDTree.new(points)
|
94
|
+
*
|
95
|
+
* Alternately, you can pass in an <i>IO</i> object containing a persisted
|
96
|
+
* kdtree. This makes it possible to build the tree in advance, persist it, and
|
97
|
+
* start it up quickly later. See persist for more information.
|
98
|
+
*/
|
99
|
+
static VALUE kdtree_initialize(VALUE kdtree, VALUE arg)
|
100
|
+
{
|
101
|
+
KDTREEP;
|
102
|
+
|
103
|
+
if (TYPE(arg) == T_ARRAY) {
|
104
|
+
// init from array of pints
|
105
|
+
VALUE points = arg;
|
106
|
+
int i;
|
107
|
+
kdtreep->len = RARRAY_LEN(points);
|
108
|
+
kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
|
109
|
+
|
110
|
+
for (i = 0; i < RARRAY_LEN(points); ++i) {
|
111
|
+
struct kdtree_node *n = kdtreep->nodes + i;
|
112
|
+
|
113
|
+
VALUE ptr = RARRAY_PTR(points)[i];
|
114
|
+
VALUE v = rb_check_array_type(ptr);
|
115
|
+
VALUE *a;
|
116
|
+
if (NIL_P(v) || RARRAY_LEN(v) != 3) {
|
117
|
+
continue;
|
118
|
+
}
|
119
|
+
a = RARRAY_PTR(ptr);
|
120
|
+
n->x = NUM2DBL(a[0]);
|
121
|
+
n->y = NUM2DBL(a[1]);
|
122
|
+
n->id = NUM2INT(a[2]);
|
123
|
+
}
|
124
|
+
|
125
|
+
// now build the tree
|
126
|
+
kdtreep->root = kdtree_build(kdtreep, 0, kdtreep->len, 0);
|
127
|
+
} else if (rb_respond_to(arg, rb_intern("read"))) {
|
128
|
+
VALUE io = arg;
|
129
|
+
struct rb_io_t *fptr;
|
130
|
+
char buf[4];
|
131
|
+
if (rb_respond_to(io, rb_intern("binmode"))) {
|
132
|
+
rb_funcall2(io, rb_intern("binmode"), 0, 0);
|
133
|
+
}
|
134
|
+
|
135
|
+
fptr = RFILE(rb_io_taint_check(io))->fptr;
|
136
|
+
rb_io_check_readable(fptr);
|
137
|
+
|
138
|
+
// check magic
|
139
|
+
read_all(fptr, buf, 4);
|
140
|
+
if (memcmp(KDTREE_MAGIC, buf, 4) != 0) {
|
141
|
+
rb_raise(rb_eRuntimeError, "wrong magic number in kdtree file");
|
142
|
+
}
|
143
|
+
|
144
|
+
// read start of the struct
|
145
|
+
read_all(fptr, (char *)kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
|
146
|
+
// read the nodes
|
147
|
+
kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
|
148
|
+
read_all(fptr, (char *)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
149
|
+
} else {
|
150
|
+
rb_raise(rb_eTypeError, "array or IO required to init KDTree");
|
151
|
+
}
|
152
|
+
|
153
|
+
return kdtree;
|
154
|
+
}
|
155
|
+
|
156
|
+
static int comparex(const void *pa, const void *pb)
|
157
|
+
{
|
158
|
+
float a = ((const struct kdtree_node*)pa)->x;
|
159
|
+
float b = ((const struct kdtree_node*)pb)->x;
|
160
|
+
return (a < b) ? -1 : ((a > b) ? 1 : 0);
|
161
|
+
}
|
162
|
+
|
163
|
+
static int comparey(const void *pa, const void *pb)
|
164
|
+
{
|
165
|
+
float a = ((const struct kdtree_node*)pa)->y;
|
166
|
+
float b = ((const struct kdtree_node*)pb)->y;
|
167
|
+
return (a < b) ? -1 : ((a > b) ? 1 : 0);
|
168
|
+
}
|
169
|
+
|
170
|
+
static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth)
|
171
|
+
{
|
172
|
+
int(*compar)(const void *, const void *);
|
173
|
+
struct kdtree_node *m;
|
174
|
+
int median;
|
175
|
+
if (max <= min) {
|
176
|
+
return -1;
|
177
|
+
}
|
178
|
+
|
179
|
+
// sort nodes from min to max
|
180
|
+
compar = (depth % 2) ? comparex : comparey;
|
181
|
+
qsort(kdtreep->nodes + min, max - min, sizeof(struct kdtree_node), compar);
|
182
|
+
|
183
|
+
median = (min + max) / 2;
|
184
|
+
m = kdtreep->nodes + median;
|
185
|
+
m->left = kdtree_build(kdtreep, min, median, depth + 1);
|
186
|
+
m->right = kdtree_build(kdtreep, median + 1, max, depth + 1);
|
187
|
+
return median;
|
188
|
+
}
|
189
|
+
|
190
|
+
//
|
191
|
+
// nearest
|
192
|
+
//
|
193
|
+
|
194
|
+
static int n_index;
|
195
|
+
static float n_dist;
|
196
|
+
|
197
|
+
/*
|
198
|
+
* call-seq:
|
199
|
+
* kd.nearest(x, y) => id
|
200
|
+
*
|
201
|
+
* Finds the point closest to <i>x</i>, <i>y</i> and returns the id for that
|
202
|
+
* point. Returns -1 if the tree is empty.
|
203
|
+
*
|
204
|
+
* points = []
|
205
|
+
* points << [47.6, -122.3, 1] # Seattle
|
206
|
+
* points << [40.7, -74.0, 2] # New York
|
207
|
+
* kd = KDTree.new(points)
|
208
|
+
*
|
209
|
+
* # which city is closest to Portland?
|
210
|
+
* kd.nearest(45.5, -122.8) #=> 1
|
211
|
+
* # which city is closest to Boston?
|
212
|
+
* kd.nearest(42.4, -71.1) #=> 2
|
213
|
+
*/
|
214
|
+
static VALUE kdtree_nearest(VALUE kdtree, VALUE x, VALUE y)
|
215
|
+
{
|
216
|
+
KDTREEP;
|
217
|
+
|
218
|
+
n_index = -1;
|
219
|
+
n_dist = INT_MAX;
|
220
|
+
kdtree_nearest0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), 0);
|
221
|
+
if (n_index == -1) {
|
222
|
+
return -1;
|
223
|
+
}
|
224
|
+
return INT2NUM((kdtreep->nodes + n_index)->id);
|
225
|
+
}
|
226
|
+
|
227
|
+
static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth)
|
228
|
+
{
|
229
|
+
struct kdtree_node *n;
|
230
|
+
float ad;
|
231
|
+
int near_v, far_v;
|
232
|
+
float dx;
|
233
|
+
if (i == -1) {
|
234
|
+
return;
|
235
|
+
}
|
236
|
+
|
237
|
+
n = kdtreep->nodes + i;
|
238
|
+
|
239
|
+
ad = (depth % 2) ? (x - n->x) : (y - n->y);
|
240
|
+
|
241
|
+
//
|
242
|
+
// recurse near, and perhaps far as well
|
243
|
+
//
|
244
|
+
|
245
|
+
if (ad <= 0) {
|
246
|
+
near_v = n->left; far_v = n->right;
|
247
|
+
} else {
|
248
|
+
near_v = n->right; far_v = n->left;
|
249
|
+
}
|
250
|
+
kdtree_nearest0(kdtreep, near_v, x, y, depth + 1);
|
251
|
+
if (ad * ad < n_dist) {
|
252
|
+
kdtree_nearest0(kdtreep, far_v, x, y, depth + 1);
|
253
|
+
}
|
254
|
+
|
255
|
+
//
|
256
|
+
// do we beat the old distance?
|
257
|
+
//
|
258
|
+
|
259
|
+
dx = (x - n->x) * (x - n->x);
|
260
|
+
if (dx < n_dist) {
|
261
|
+
float d = dx + ((y - n->y) * (y - n->y));
|
262
|
+
if (d < n_dist) {
|
263
|
+
n_index = i;
|
264
|
+
n_dist = d;
|
265
|
+
}
|
266
|
+
}
|
267
|
+
}
|
268
|
+
|
269
|
+
//
|
270
|
+
// nearestK
|
271
|
+
//
|
272
|
+
|
273
|
+
#define MAX_K 255
|
274
|
+
|
275
|
+
typedef struct kresult {
|
276
|
+
int index;
|
277
|
+
float distance;
|
278
|
+
} kresult;
|
279
|
+
// note I leave an extra slot here at the end because of the way our binary insert works
|
280
|
+
static struct kresult k_list[MAX_K + 1];
|
281
|
+
static int k_len;
|
282
|
+
static float k_dist;
|
283
|
+
|
284
|
+
/*
|
285
|
+
* call-seq:
|
286
|
+
* kd.nearestk(x, y, k) => array
|
287
|
+
*
|
288
|
+
* Finds the <i>k</i> points closest to <i>x</i>, <i>y</i>. Returns an array of
|
289
|
+
* ids, sorted by distance. Returns an empty array if the tree is empty. Note
|
290
|
+
* that <i>k</i> is capped at 255.
|
291
|
+
*
|
292
|
+
* points = []
|
293
|
+
* points << [47.6, -122.3, 1] # Seattle
|
294
|
+
* points << [45.5, -122.8, 2] # Portland
|
295
|
+
* points << [40.7, -74.0, 3] # New York
|
296
|
+
* kd = KDTree.new(points)
|
297
|
+
*
|
298
|
+
* # which two cities are closest to San Francisco?
|
299
|
+
* kd.nearest(34.1, -118.2) #=> [2, 1]
|
300
|
+
*/
|
301
|
+
static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k)
|
302
|
+
{
|
303
|
+
int ki;
|
304
|
+
VALUE ary;
|
305
|
+
int i;
|
306
|
+
KDTREEP;
|
307
|
+
|
308
|
+
k_len = 0;
|
309
|
+
k_dist = INT_MAX;
|
310
|
+
|
311
|
+
ki = NUM2INT(k);
|
312
|
+
if (ki < 1) {
|
313
|
+
ki = 1;
|
314
|
+
} else if (ki > MAX_K) {
|
315
|
+
ki = MAX_K;
|
316
|
+
}
|
317
|
+
kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0);
|
318
|
+
|
319
|
+
// convert result to ruby array
|
320
|
+
ary = rb_ary_new();
|
321
|
+
for (i = 0; i < k_len; ++i) {
|
322
|
+
rb_ary_push(ary, INT2NUM(kdtreep->nodes[k_list[i].index].id));
|
323
|
+
}
|
324
|
+
return ary;
|
325
|
+
}
|
326
|
+
|
327
|
+
static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth)
|
328
|
+
{
|
329
|
+
struct kdtree_node *n;
|
330
|
+
int near_v, far_v;
|
331
|
+
float ad;
|
332
|
+
float dx;
|
333
|
+
if (i == -1) {
|
334
|
+
return;
|
335
|
+
}
|
336
|
+
|
337
|
+
n = kdtreep->nodes + i;
|
338
|
+
|
339
|
+
ad = (depth % 2) ? (x - n->x) : (y - n->y);
|
340
|
+
|
341
|
+
//
|
342
|
+
// recurse near, and then perhaps far as well
|
343
|
+
//
|
344
|
+
|
345
|
+
|
346
|
+
if (ad <= 0) {
|
347
|
+
near_v = n->left; far_v = n->right;
|
348
|
+
} else {
|
349
|
+
near_v = n->right; far_v = n->left;
|
350
|
+
}
|
351
|
+
kdtree_nearestk0(kdtreep, near_v, x, y, k, depth + 1);
|
352
|
+
if (ad * ad < k_dist) {
|
353
|
+
kdtree_nearestk0(kdtreep, far_v, x, y, k, depth + 1);
|
354
|
+
}
|
355
|
+
|
356
|
+
//
|
357
|
+
// do we beat the old distance?
|
358
|
+
//
|
359
|
+
|
360
|
+
dx = (x - n->x) * (x - n->x);
|
361
|
+
if (dx < k_dist) {
|
362
|
+
float d = dx + ((y - n->y) * (y - n->y));
|
363
|
+
if (d < k_dist) {
|
364
|
+
//
|
365
|
+
// find spot to insert
|
366
|
+
//
|
367
|
+
int lo = 0, hi = k_len;
|
368
|
+
while (lo < hi) {
|
369
|
+
int mid = (lo + hi) / 2;
|
370
|
+
if (k_list[mid].distance < d) {
|
371
|
+
lo = mid + 1;
|
372
|
+
} else {
|
373
|
+
hi = mid;
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
//
|
378
|
+
// insert
|
379
|
+
//
|
380
|
+
|
381
|
+
memmove(k_list + lo + 1, k_list + lo, (k_len - lo) * sizeof(struct kresult));
|
382
|
+
k_list[lo].index = i;
|
383
|
+
k_list[lo].distance = d;
|
384
|
+
|
385
|
+
//
|
386
|
+
// adjust len/dist if necessary
|
387
|
+
//
|
388
|
+
|
389
|
+
if (k_len < k) {
|
390
|
+
++k_len;
|
391
|
+
} else {
|
392
|
+
k_dist = k_list[k - 1].distance;
|
393
|
+
}
|
394
|
+
}
|
395
|
+
}
|
396
|
+
}
|
397
|
+
|
398
|
+
/*
|
399
|
+
* call-seq:
|
400
|
+
* kd.persist(io)
|
401
|
+
*
|
402
|
+
* Writes the tree out to <i>io</i> so you can quickly load it later with
|
403
|
+
* KDTree.new. This avoids the startup cost of initializing a tree. Apart from a
|
404
|
+
* small header, the size of the file is proportional to the number of points,
|
405
|
+
* requiring 20 bytes per point.
|
406
|
+
*
|
407
|
+
* This file is <b>NOT PORTABLE</b> across different architectures due to endian
|
408
|
+
* issues.
|
409
|
+
*
|
410
|
+
* points = []
|
411
|
+
* points << [47.6, -122.3, 1] # Seattle
|
412
|
+
* points << [45.5, -122.8, 2] # Portland
|
413
|
+
* points << [40.7, -74.0, 3] # New York
|
414
|
+
* kd = KDTree.new(points)
|
415
|
+
*
|
416
|
+
* # persist the tree to disk
|
417
|
+
* File.open("treefile", "w") { |f| kd.persist(f) }
|
418
|
+
*
|
419
|
+
* ...
|
420
|
+
*
|
421
|
+
* # later, read the tree from disk
|
422
|
+
* kd2 = File.open("treefile") { |f| KDTree.new(f) }
|
423
|
+
*/
|
424
|
+
static VALUE kdtree_persist(VALUE kdtree, VALUE io)
|
425
|
+
{
|
426
|
+
VALUE str;
|
427
|
+
KDTREEP;
|
428
|
+
|
429
|
+
if (!rb_respond_to(io, rb_intern("write"))) {
|
430
|
+
rb_raise(rb_eTypeError, "instance of IO needed");
|
431
|
+
}
|
432
|
+
if (rb_respond_to(io, rb_intern("binmode"))) {
|
433
|
+
rb_funcall2(io, rb_intern("binmode"), 0, 0);
|
434
|
+
}
|
435
|
+
|
436
|
+
str = rb_str_buf_new(0);
|
437
|
+
rb_str_buf_cat(str, KDTREE_MAGIC, 4);
|
438
|
+
rb_str_buf_cat(str, (char*)kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
|
439
|
+
rb_str_buf_cat(str, (char*)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
|
440
|
+
rb_io_write(io, str);
|
441
|
+
return io;
|
442
|
+
}
|
443
|
+
|
444
|
+
/*
|
445
|
+
* call-seq:
|
446
|
+
* kd.to_s => string
|
447
|
+
*
|
448
|
+
* A string that tells you a bit about the tree.
|
449
|
+
*/
|
450
|
+
static VALUE kdtree_to_s(VALUE kdtree)
|
451
|
+
{
|
452
|
+
char buf[256];
|
453
|
+
KDTREEP;
|
454
|
+
|
455
|
+
sprintf(buf, "#<%s:%p nodes=%d>", rb_obj_classname(kdtree), (void*)kdtree, kdtreep->len);
|
456
|
+
return rb_str_new(buf, strlen(buf));
|
457
|
+
}
|
458
|
+
|
459
|
+
//
|
460
|
+
// entry point
|
461
|
+
//
|
462
|
+
|
463
|
+
/*
|
464
|
+
* KDTree is an insanely fast data structure for finding the nearest
|
465
|
+
* neighbor(s) to a given point. This implementation only supports 2d
|
466
|
+
* points. Also, it only supports static points - there is no way to edit the
|
467
|
+
* tree after it has been initialized. KDTree should scale to millions of
|
468
|
+
* points, though it's only been tested with around 1 million.
|
469
|
+
*
|
470
|
+
* Once the tree is constructed, it can be searched with nearest and nearestk.
|
471
|
+
*
|
472
|
+
* To avoid the startup costs associated with creating a new tree, use persist
|
473
|
+
* to write the tree to disk. You can then construct the tree later from that
|
474
|
+
* file.
|
475
|
+
*
|
476
|
+
* points = []
|
477
|
+
* points << [47.6, -122.3, 1] # Seattle
|
478
|
+
* points << [45.5, -122.8, 2] # Portland
|
479
|
+
* points << [40.7, -74.0, 3] # New York
|
480
|
+
* kd = KDTree.new(points)
|
481
|
+
*
|
482
|
+
* # which city is closest to San Francisco?
|
483
|
+
* kd.nearest(34.1, -118.2) #=> 2
|
484
|
+
* # which two cities are closest to San Francisco?
|
485
|
+
* kd.nearest(34.1, -118.2) #=> [2, 1]
|
486
|
+
*
|
487
|
+
* For more information on kd trees, see:
|
488
|
+
*
|
489
|
+
* http://en.wikipedia.org/wiki/Kd-tree
|
490
|
+
*/
|
491
|
+
void Init_kdtree()
|
492
|
+
{
|
493
|
+
static VALUE clazz;
|
494
|
+
|
495
|
+
clazz = rb_define_class("KDTree", rb_cObject);
|
496
|
+
|
497
|
+
rb_define_alloc_func(clazz, kdtree_alloc);
|
498
|
+
rb_define_method(clazz, "initialize", kdtree_initialize, 1);
|
499
|
+
rb_define_method(clazz, "nearest", kdtree_nearest, 2);
|
500
|
+
rb_define_method(clazz, "nearestk", kdtree_nearestk, 3);
|
501
|
+
rb_define_method(clazz, "persist", kdtree_persist, 1);
|
502
|
+
rb_define_method(clazz, "to_s", kdtree_to_s, 0);
|
503
|
+
}
|
data/test/test.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
require "#{File.expand_path(File.dirname(__FILE__))}/../ext/kdtree.o"
|
2
|
+
require "test/unit"
|
3
|
+
require "tempfile"
|
4
|
+
|
5
|
+
#
|
6
|
+
# create a tree
|
7
|
+
#
|
8
|
+
|
9
|
+
class KDTreeTest < Test::Unit::TestCase
|
10
|
+
TMP = "#{Dir.tmpdir}/kdtree_test"
|
11
|
+
|
12
|
+
def test_nearest
|
13
|
+
setup_tree(1000)
|
14
|
+
100.times do
|
15
|
+
pt = [rand_coord, rand_coord]
|
16
|
+
|
17
|
+
# kdtree search
|
18
|
+
id = @kdtree.nearest(pt[0], pt[1])
|
19
|
+
kdpt = @points[id]
|
20
|
+
|
21
|
+
# slow search
|
22
|
+
sortpt = @points.sort_by { |i| distance(i, pt) }.first
|
23
|
+
|
24
|
+
# assert
|
25
|
+
kdd = distance(kdpt, pt)
|
26
|
+
sortd = distance(sortpt, pt)
|
27
|
+
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_nearestk
|
32
|
+
setup_tree(1000)
|
33
|
+
100.times do
|
34
|
+
pt = [rand_coord, rand_coord]
|
35
|
+
|
36
|
+
# kdtree search
|
37
|
+
list = @kdtree.nearestk(pt[0], pt[1], 5)
|
38
|
+
kdpt = @points[list.last]
|
39
|
+
|
40
|
+
# slow search
|
41
|
+
sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
|
42
|
+
|
43
|
+
# assert
|
44
|
+
kdd = distance(kdpt, pt)
|
45
|
+
sortd = distance(sortpt, pt)
|
46
|
+
assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_persist
|
51
|
+
setup_tree(1000)
|
52
|
+
|
53
|
+
begin
|
54
|
+
# write
|
55
|
+
File.open(TMP, "w") { |f| @kdtree.persist(f) }
|
56
|
+
# read
|
57
|
+
kdtree2 = File.open(TMP, "r") { |f| KDTree.new(f) }
|
58
|
+
|
59
|
+
# now test some random points
|
60
|
+
100.times do
|
61
|
+
pt = [rand_coord, rand_coord]
|
62
|
+
id1 = @kdtree.nearest(*pt)
|
63
|
+
id2 = kdtree2.nearest(*pt)
|
64
|
+
assert(id1 == id2, "kdtree2 differed from kdtree")
|
65
|
+
end
|
66
|
+
ensure
|
67
|
+
File.unlink(TMP)
|
68
|
+
end
|
69
|
+
|
70
|
+
# now test magic problems
|
71
|
+
begin
|
72
|
+
File.open(TMP, "w") { |f| f.puts "That ain't right" }
|
73
|
+
assert_raise RuntimeError do
|
74
|
+
File.open(TMP, "r") { |f| KDTree.new(f) }
|
75
|
+
end
|
76
|
+
ensure
|
77
|
+
File.unlink(TMP)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def dont_test_speed
|
82
|
+
printf("\n")
|
83
|
+
sizes = [1, 100, 1000, 10000, 100000, 1000000]
|
84
|
+
ks = [1, 5, 50, 255]
|
85
|
+
sizes.each do |s|
|
86
|
+
points = (0...s).map { |i| [rand_coord, rand_coord, i] }
|
87
|
+
|
88
|
+
# build
|
89
|
+
tm = Time.now
|
90
|
+
kdtree = KDTree.new(points)
|
91
|
+
printf "build %d took %.6fs\n", s, Time.now - tm
|
92
|
+
|
93
|
+
begin
|
94
|
+
# write
|
95
|
+
tm = Time.now
|
96
|
+
File.open(TMP, "w") { |f| kdtree.persist(f) }
|
97
|
+
printf "write %d took %.6fs\n", s, Time.now - tm
|
98
|
+
# read
|
99
|
+
tm = Time.now
|
100
|
+
File.open(TMP, "r") { |f| KDTree.new(f) }
|
101
|
+
printf "read %d took %.6fs\n", s, Time.now - tm
|
102
|
+
ensure
|
103
|
+
File.unlink(TMP)
|
104
|
+
end
|
105
|
+
|
106
|
+
ks.each do |k|
|
107
|
+
total = count = 0
|
108
|
+
100.times do
|
109
|
+
tm = Time.now
|
110
|
+
if k == 1
|
111
|
+
kdtree.nearest(rand_coord, rand_coord)
|
112
|
+
else
|
113
|
+
kdtree.nearestk(rand_coord, rand_coord, k)
|
114
|
+
end
|
115
|
+
total += Time.now - tm
|
116
|
+
count += 1
|
117
|
+
end
|
118
|
+
printf "avg query time = %.6fs [%d/%d]\n", total / count, s, k
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
protected
|
124
|
+
|
125
|
+
def setup_tree(len)
|
126
|
+
@points = (0...len).map { |i| [rand_coord, rand_coord, i] }
|
127
|
+
@kdtree = KDTree.new(@points)
|
128
|
+
end
|
129
|
+
|
130
|
+
def distance(a, b)
|
131
|
+
x, y = a[0] - b[0], a[1] - b[1]
|
132
|
+
x * x + y * y
|
133
|
+
end
|
134
|
+
|
135
|
+
def rand_coord
|
136
|
+
rand(0) * 10 - 5
|
137
|
+
end
|
138
|
+
end
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ghazel-kdtree
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Adam Doppelt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-01-22 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: amd@gurge.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- ext/extconf.rb
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- ext/extconf.rb
|
26
|
+
- ext/kdtree.c
|
27
|
+
- LICENSE
|
28
|
+
- test/test.rb
|
29
|
+
has_rdoc: true
|
30
|
+
homepage:
|
31
|
+
licenses: []
|
32
|
+
|
33
|
+
post_install_message:
|
34
|
+
rdoc_options:
|
35
|
+
- --exclude
|
36
|
+
- test
|
37
|
+
- --exclude
|
38
|
+
- extconf
|
39
|
+
require_paths:
|
40
|
+
- .
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 1.8.5
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.3.5
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Blazingly fast 2d kdtree.
|
60
|
+
test_files:
|
61
|
+
- test/test.rb
|