kdtree 0.1 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ lib/*.so
4
+ tmp
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - rbx-18mode
7
+ - rbx-19mode
8
+ # - ruby-head
9
+ - ree
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://rubygems.org"
2
+ gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 Adam Doppelt
1
+ Copyright (c) 2012 Adam Doppelt
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -0,0 +1,70 @@
1
+ ## Kdtree
2
+
3
+ A kd tree is a data structure that recursively partitions the world in order to rapidly answer nearest neighbor queries. A generic kd tree can support any number of dimensions, and can return either the nearest neighbor or a set of N nearest neighbors.
4
+
5
+ This gem is a blazingly fast, native, 2d kdtree. It's specifically built to find the nearest neighbor when searching millions of points. It's used in production at Urbanspoon and several other companies.
6
+
7
+ The first version of this gem was released back in 2009. See the original [blog post](http://gurge.com/2009/10/22/ruby-nearest-neighbor-fast-kdtree-gem/) for the full story. Wikipedia has a great [article on kdtrees](http://en.wikipedia.org/wiki/K-d_tree).
8
+
9
+ ### Usage
10
+
11
+ Usage is very simple:
12
+
13
+ * **Kdtree.new(points)** - construct a new tree. Each point should be of the form `[x, y, id]`, where `x/y` are floats and `id` is an int. Not a string, not an object, just an int.
14
+ * **kd.nearest(x, y)** - find the nearest point. Returns an id.
15
+ * **kd.nearestk(x, y, k)** - find the nearest `k` points. Returns an array of ids.
16
+
17
+ Also, I made it possible to **persist** the tree to disk and load it later. That way you can calculate the tree offline and load it quickly at some future point. Loading a persisted tree w/ 1 millions points takes less than a second, as opposed to the 3.5 second startup time shown above. For example:
18
+
19
+ ```ruby
20
+ File.open("treefile", "w") { |f| kd.persist(f) }
21
+ ... later ...
22
+ kd2 = File.open("treefile") { |f| Kdtree.new(f) }
23
+ ```
24
+
25
+ ### Performance
26
+
27
+ Kdtree is fast. How fast? Using a tree with 1 million points on my i5 2.8ghz:
28
+
29
+ ```
30
+ build 3.5s
31
+ nearest point 0.000003s
32
+ nearest 5 points 0.000004s
33
+ nearest 50 points 0.000014s
34
+ nearest 255 points 0.000063s
35
+ ```
36
+
37
+ ### Limitations
38
+
39
+ * No **editing** allowed! Once you construct a tree you’re stuck with it.
40
+ * The tree is stored in **one big memory block**, 20 bytes per point. A tree with one million points will allocate a single 19mb block to store its nodes.
41
+ * Persisted trees are **architecture dependent**, and may not work across different machines due to endian issues.
42
+ * nearestk is limited to **255 results**
43
+
44
+ ### Contributors
45
+
46
+ Since this gem was originally released, several folks have contributed important patches:
47
+
48
+ * @antifuchs (thread safety)
49
+ * @evanphx (native cleanups, perf)
50
+ * @ghazel (C89 compliance)
51
+ * @mcerna (1.9 compat)
52
+
53
+ ### Changelog
54
+
55
+ #### 0.3 (in progress, unreleased)
56
+
57
+ * Ruby 1.9.x compatibility (@mcerna and others)
58
+ * renamed KDTree to the more idiomatic Kdtree
59
+ * use IO methods directly instead of rooting around in rb_io
60
+ * thread safe, no more statics (@antifuchs)
61
+ * C90 compliance, no warnings (@ghazel)
62
+ * native cleanups (@evanphx)
63
+
64
+ #### 0.2
65
+
66
+ skipped this version to prevent confusion with other flavors of the gem
67
+
68
+ #### 0.1
69
+
70
+ * Original release
@@ -0,0 +1,41 @@
1
+ require "bundler/setup"
2
+ require "rake/extensiontask"
3
+ require "rake/testtask"
4
+
5
+ # load the spec, we use it below
6
+ spec = Gem::Specification.load("kdtree.gemspec")
7
+
8
+ #
9
+ # gem
10
+ #
11
+
12
+ task :build do
13
+ system "gem build --quiet kdtree.gemspec"
14
+ end
15
+
16
+ task :install => :build do
17
+ system "sudo gem install --quiet kdtree-#{spec.version}.gem"
18
+ end
19
+
20
+ task :release => :build do
21
+ system "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
22
+ system "git push --tags"
23
+ system "gem push kdtree-#{spec.version}.gem"
24
+ end
25
+
26
+ #
27
+ # rake-compiler
28
+ #
29
+
30
+ Rake::ExtensionTask.new("kdtree", spec)
31
+
32
+
33
+ #
34
+ # testing
35
+ #
36
+
37
+ Rake::TestTask.new(:test) do |test|
38
+ test.libs << "test"
39
+ end
40
+ task :test => :compile
41
+ task :default => :test
@@ -1,3 +1,3 @@
1
- require 'mkmf'
1
+ require "mkmf"
2
2
 
3
3
  create_makefile("kdtree")
@@ -1,15 +1,10 @@
1
1
  #include "ruby.h"
2
- #include "rubyio.h"
3
- #include "version.h"
4
-
5
- #ifndef HAVE_RB_IO_T
6
- #define rb_io_t OpenFile
7
- #endif
8
2
 
9
3
  //
10
4
  // interface
11
5
  //
12
6
 
7
+ // the tree itself
13
8
  typedef struct kdtree_data
14
9
  {
15
10
  int root;
@@ -17,6 +12,7 @@ typedef struct kdtree_data
17
12
  struct kdtree_node *nodes;
18
13
  } kdtree_data;
19
14
 
15
+ // a node in the tree
20
16
  typedef struct kdtree_node
21
17
  {
22
18
  float x, y;
@@ -25,10 +21,18 @@ typedef struct kdtree_node
25
21
  int right;
26
22
  } kdtree_node;
27
23
 
24
+ // a result node from kdtree_nearestk0
25
+ typedef struct kresult {
26
+ int index;
27
+ float distance;
28
+ } kresult;
29
+
30
+ // helper macro for digging out our struct
28
31
  #define KDTREEP \
29
32
  struct kdtree_data *kdtreep; \
30
33
  Data_Get_Struct(kdtree, struct kdtree_data, kdtreep);
31
34
 
35
+ // kdtree public methods
32
36
  static VALUE kdtree_alloc(VALUE klass);
33
37
  static void kdtree_free(struct kdtree_data *kdtreep);
34
38
  static VALUE kdtree_initialize(VALUE kdtree, VALUE points);
@@ -37,13 +41,20 @@ static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k);
37
41
  static VALUE kdtree_persist(VALUE kdtree, VALUE io);
38
42
  static VALUE kdtree_to_s(VALUE kdtree);
39
43
 
40
- // helpers
44
+ // kdtree helpers
41
45
  static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth);
42
- static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth);
43
- static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth);
46
+ static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth, int *n_index, float *n_dist);
47
+ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth, kresult *k_list, int *k_len, float *k_dist);
48
+
49
+ // io helpers
50
+ static void read_all(VALUE io, void *buf, int len);
51
+ static void write_all(VALUE io, const void *buf, int len);
44
52
 
45
53
  #define KDTREE_MAGIC "KdTr"
46
54
 
55
+ // ids
56
+ static ID id_read, id_write, id_binmode;
57
+
47
58
  //
48
59
  // implementation
49
60
  //
@@ -63,24 +74,12 @@ static void kdtree_free(struct kdtree_data *kdtreep)
63
74
  }
64
75
  }
65
76
 
66
- static void read_all(struct rb_io_t *fptr, char *buf, int len)
67
- {
68
- while (len > 0) {
69
- int n = rb_io_fread(buf, len, fptr->f);
70
- if (n == 0) {
71
- rb_eof_error();
72
- }
73
- buf += n;
74
- len -= n;
75
- }
76
- }
77
-
78
77
  /*
79
78
  * call-seq:
80
- * KDTree.new(points) => kdtree
81
- * KDTree.new(io) => kdtree
79
+ * Kdtree.new(points) => kdtree
80
+ * Kdtree.new(io) => kdtree
82
81
  *
83
- * Returns a new <code>KDTree</code>. To construct a tree, pass an array of
82
+ * Returns a new <code>Kdtree</code>. To construct a tree, pass an array of
84
83
  * <i>points</i>. Each point should be an array of the form <code>[x, y,
85
84
  * id]</code>, where <i>x</i> and <i>y</i> are floats and <i>id</i> is an
86
85
  * integer. The <i>id</i> is arbitrary and will be returned to you whenever you
@@ -90,7 +89,7 @@ static void read_all(struct rb_io_t *fptr, char *buf, int len)
90
89
  * points = []
91
90
  * points << [47.6, -122.3, 1] # Seattle
92
91
  * points << [40.7, -74.0, 2] # New York
93
- * kd = KDTree.new(points)
92
+ * kd = Kdtree.new(points)
94
93
  *
95
94
  * Alternately, you can pass in an <i>IO</i> object containing a persisted
96
95
  * kdtree. This makes it possible to build the tree in advance, persist it, and
@@ -103,104 +102,97 @@ static VALUE kdtree_initialize(VALUE kdtree, VALUE arg)
103
102
  if (TYPE(arg) == T_ARRAY) {
104
103
  // init from array of pints
105
104
  VALUE points = arg;
105
+ int i;
106
106
  kdtreep->len = RARRAY_LEN(points);
107
107
  kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
108
108
 
109
- int i;
110
109
  for (i = 0; i < RARRAY_LEN(points); ++i) {
111
110
  struct kdtree_node *n = kdtreep->nodes + i;
112
-
113
- VALUE ptr = RARRAY_PTR(points)[i];
111
+
112
+ VALUE ptr = rb_ary_entry(points, i);
114
113
  VALUE v = rb_check_array_type(ptr);
115
114
  if (NIL_P(v) || RARRAY_LEN(v) != 3) {
116
115
  continue;
117
116
  }
118
- VALUE *a = RARRAY_PTR(ptr);
119
- n->x = NUM2DBL(a[0]);
120
- n->y = NUM2DBL(a[1]);
121
- n->id = NUM2INT(a[2]);
117
+ n->x = NUM2DBL(rb_ary_entry(v, 0));
118
+ n->y = NUM2DBL(rb_ary_entry(v, 1));
119
+ n->id = NUM2INT(rb_ary_entry(v, 2));
122
120
  }
123
121
 
124
122
  // now build the tree
125
123
  kdtreep->root = kdtree_build(kdtreep, 0, kdtreep->len, 0);
126
124
  } else if (rb_respond_to(arg, rb_intern("read"))) {
127
125
  VALUE io = arg;
128
- if (rb_respond_to(io, rb_intern("binmode"))) {
129
- rb_funcall2(io, rb_intern("binmode"), 0, 0);
126
+ char buf[4];
127
+ if (rb_respond_to(io, id_binmode)) {
128
+ rb_funcall(io, id_binmode, 0);
130
129
  }
131
130
 
132
- struct rb_io_t *fptr = RFILE(rb_io_taint_check(io))->fptr;
133
- rb_io_check_readable(fptr);
134
-
135
131
  // check magic
136
- char buf[4];
137
- read_all(fptr, buf, 4);
132
+ read_all(io, buf, 4);
138
133
  if (memcmp(KDTREE_MAGIC, buf, 4) != 0) {
139
134
  rb_raise(rb_eRuntimeError, "wrong magic number in kdtree file");
140
135
  }
141
-
136
+
142
137
  // read start of the struct
143
- read_all(fptr, (char *)kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
138
+ read_all(io, kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
139
+
144
140
  // read the nodes
145
141
  kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
146
- read_all(fptr, (char *)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
142
+ read_all(io, kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
147
143
  } else {
148
- rb_raise(rb_eTypeError, "array or IO required to init KDTree");
144
+ rb_raise(rb_eTypeError, "array or IO required to init Kdtree");
149
145
  }
150
-
146
+
151
147
  return kdtree;
152
148
  }
153
149
 
154
150
  static int comparex(const void *pa, const void *pb)
155
151
  {
156
152
  float a = ((const struct kdtree_node*)pa)->x;
157
- float b = ((const struct kdtree_node*)pb)->x;
153
+ float b = ((const struct kdtree_node*)pb)->x;
158
154
  return (a < b) ? -1 : ((a > b) ? 1 : 0);
159
155
  }
160
156
 
161
157
  static int comparey(const void *pa, const void *pb)
162
158
  {
163
159
  float a = ((const struct kdtree_node*)pa)->y;
164
- float b = ((const struct kdtree_node*)pb)->y;
160
+ float b = ((const struct kdtree_node*)pb)->y;
165
161
  return (a < b) ? -1 : ((a > b) ? 1 : 0);
166
162
  }
167
163
 
168
164
  static int kdtree_build(struct kdtree_data *kdtreep, int min, int max, int depth)
169
165
  {
166
+ int(*compar)(const void *, const void *);
167
+ struct kdtree_node *m;
168
+ int median;
170
169
  if (max <= min) {
171
170
  return -1;
172
171
  }
173
172
 
174
173
  // sort nodes from min to max
175
- int(*compar)(const void *, const void *) = (depth % 2) ? comparex : comparey;
174
+ compar = (depth % 2) ? comparex : comparey;
176
175
  qsort(kdtreep->nodes + min, max - min, sizeof(struct kdtree_node), compar);
177
176
 
178
- int median = (min + max) / 2;
179
- struct kdtree_node *m = kdtreep->nodes + median;
177
+ median = (min + max) / 2;
178
+ m = kdtreep->nodes + median;
180
179
  m->left = kdtree_build(kdtreep, min, median, depth + 1);
181
180
  m->right = kdtree_build(kdtreep, median + 1, max, depth + 1);
182
181
  return median;
183
182
  }
184
183
 
185
- //
186
- // nearest
187
- //
188
-
189
- static int n_index;
190
- static float n_dist;
191
-
192
184
  /*
193
185
  * call-seq:
194
186
  * kd.nearest(x, y) => id
195
187
  *
196
188
  * Finds the point closest to <i>x</i>, <i>y</i> and returns the id for that
197
189
  * point. Returns -1 if the tree is empty.
198
- *
190
+ *
199
191
  * points = []
200
192
  * points << [47.6, -122.3, 1] # Seattle
201
193
  * points << [40.7, -74.0, 2] # New York
202
- * kd = KDTree.new(points)
203
- *
194
+ * kd = Kdtree.new(points)
195
+ *
204
196
  * # which city is closest to Portland?
205
197
  * kd.nearest(45.5, -122.8) #=> 1
206
198
  * # which city is closest to Boston?
@@ -208,52 +200,59 @@ static float n_dist;
208
200
  */
209
201
  static VALUE kdtree_nearest(VALUE kdtree, VALUE x, VALUE y)
210
202
  {
203
+ int n_index;
204
+ float n_dist;
211
205
  KDTREEP;
212
206
 
213
207
  n_index = -1;
214
208
  n_dist = INT_MAX;
215
- kdtree_nearest0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), 0);
209
+
210
+ kdtree_nearest0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), 0, &n_index, &n_dist);
216
211
  if (n_index == -1) {
217
212
  return -1;
218
213
  }
219
214
  return INT2NUM((kdtreep->nodes + n_index)->id);
220
215
  }
221
216
 
222
- static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth)
217
+ static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y, int depth, int *n_index, float *n_dist)
223
218
  {
219
+ struct kdtree_node *n;
220
+ float ad;
221
+ int near, far;
222
+ float dx;
223
+
224
224
  if (i == -1) {
225
225
  return;
226
226
  }
227
-
228
- struct kdtree_node *n = kdtreep->nodes + i;
229
227
 
230
- float ad = (depth % 2) ? (x - n->x) : (y - n->y);
228
+ n = kdtreep->nodes + i;
229
+
230
+ ad = (depth % 2) ? (x - n->x) : (y - n->y);
231
231
 
232
232
  //
233
233
  // recurse near, and perhaps far as well
234
234
  //
235
-
236
- int near, far;
235
+
237
236
  if (ad <= 0) {
238
237
  near = n->left; far = n->right;
239
238
  } else {
240
239
  near = n->right; far = n->left;
241
240
  }
242
- kdtree_nearest0(kdtreep, near, x, y, depth + 1);
243
- if (ad * ad < n_dist) {
244
- kdtree_nearest0(kdtreep, far, x, y, depth + 1);
241
+ kdtree_nearest0(kdtreep, near, x, y, depth + 1, n_index, n_dist);
242
+ if (ad * ad < *n_dist) {
243
+ kdtree_nearest0(kdtreep, far, x, y, depth + 1, n_index, n_dist);
245
244
  }
246
245
 
247
246
  //
248
247
  // do we beat the old distance?
249
248
  //
250
-
251
- float dx = (x - n->x) * (x - n->x);
252
- if (dx < n_dist) {
249
+
250
+ dx = (x - n->x) * (x - n->x);
251
+ if (dx < *n_dist) {
253
252
  float d = dx + ((y - n->y) * (y - n->y));
254
- if (d < n_dist) {
255
- n_index = i;
256
- n_dist = d;
253
+ if (d < *n_dist) {
254
+ *n_index = i;
255
+ *n_dist = d;
257
256
  }
258
257
  }
259
258
  }
@@ -264,15 +263,6 @@ static void kdtree_nearest0(struct kdtree_data *kdtreep, int i, float x, float y
264
263
 
265
264
  #define MAX_K 255
266
265
 
267
- typedef struct kresult {
268
- int index;
269
- float distance;
270
- } kresult;
271
- // note I leave an extra slot here at the end because of the way our binary insert works
272
- static struct kresult k_list[MAX_K + 1];
273
- static int k_len;
274
- static float k_dist;
275
-
276
266
  /*
277
267
  * call-seq:
278
268
  * kd.nearestk(x, y, k) => array
@@ -280,77 +270,84 @@ static float k_dist;
280
270
  * Finds the <i>k</i> points closest to <i>x</i>, <i>y</i>. Returns an array of
281
271
  * ids, sorted by distance. Returns an empty array if the tree is empty. Note
282
272
  * that <i>k</i> is capped at 255.
283
- *
273
+ *
284
274
  * points = []
285
275
  * points << [47.6, -122.3, 1] # Seattle
286
276
  * points << [45.5, -122.8, 2] # Portland
287
277
  * points << [40.7, -74.0, 3] # New York
288
- * kd = KDTree.new(points)
289
- *
278
+ * kd = Kdtree.new(points)
279
+ *
290
280
  * # which two cities are closest to San Francisco?
291
281
  * kd.nearest(34.1, -118.2) #=> [2, 1]
292
282
  */
293
283
  static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k)
294
284
  {
285
+ // note I leave an extra slot here at the end because of the way our binary insert works
286
+ kresult k_list[MAX_K + 1];
287
+ int k_len = 0;
288
+ float k_dist = INT_MAX;
289
+ int ki = NUM2INT(k);
290
+ VALUE ary;
291
+ int i;
295
292
  KDTREEP;
296
293
 
297
- k_len = 0;
298
- k_dist = INT_MAX;
299
-
300
- int ki = NUM2INT(k);
301
294
  if (ki < 1) {
302
295
  ki = 1;
303
296
  } else if (ki > MAX_K) {
304
297
  ki = MAX_K;
305
298
  }
306
- kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0);
299
+ kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0, k_list, &k_len, &k_dist);
307
300
 
308
301
  // convert result to ruby array
309
- VALUE ary = rb_ary_new();
310
- int i;
302
+ ary = rb_ary_new();
311
303
  for (i = 0; i < k_len; ++i) {
312
304
  rb_ary_push(ary, INT2NUM(kdtreep->nodes[k_list[i].index].id));
313
305
  }
314
306
  return ary;
315
307
  }
316
308
 
317
- static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth)
309
+ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float y, int k, int depth, kresult *k_list, int *k_len, float *k_dist)
318
310
  {
311
+ struct kdtree_node *n;
312
+ float ad;
313
+ int near, far;
314
+ float dx;
315
+ int lo, hi;
316
+
319
317
  if (i == -1) {
320
318
  return;
321
319
  }
322
-
323
- struct kdtree_node *n = kdtreep->nodes + i;
324
320
 
325
- float ad = (depth % 2) ? (x - n->x) : (y - n->y);
321
+ n = kdtreep->nodes + i;
322
+
323
+ ad = (depth % 2) ? (x - n->x) : (y - n->y);
326
324
 
327
325
  //
328
326
  // recurse near, and then perhaps far as well
329
327
  //
330
-
331
- int near, far;
328
+
332
329
  if (ad <= 0) {
333
330
  near = n->left; far = n->right;
334
331
  } else {
335
332
  near = n->right; far = n->left;
336
333
  }
337
- kdtree_nearestk0(kdtreep, near, x, y, k, depth + 1);
338
- if (ad * ad < k_dist) {
339
- kdtree_nearestk0(kdtreep, far, x, y, k, depth + 1);
334
+ kdtree_nearestk0(kdtreep, near, x, y, k, depth + 1, k_list, k_len, k_dist);
335
+ if (ad * ad < *k_dist) {
336
+ kdtree_nearestk0(kdtreep, far, x, y, k, depth + 1, k_list, k_len, k_dist);
340
337
  }
341
338
 
342
339
  //
343
340
  // do we beat the old distance?
344
341
  //
345
-
346
- float dx = (x - n->x) * (x - n->x);
347
- if (dx < k_dist) {
342
+
343
+ dx = (x - n->x) * (x - n->x);
344
+ if (dx < *k_dist) {
348
345
  float d = dx + ((y - n->y) * (y - n->y));
349
- if (d < k_dist) {
346
+ if (d < *k_dist) {
350
347
  //
351
348
  // find spot to insert
352
349
  //
353
- int lo = 0, hi = k_len;
350
+ lo = 0, hi = *k_len;
354
351
  while (lo < hi) {
355
352
  int mid = (lo + hi) / 2;
356
353
  if (k_list[mid].distance < d) {
@@ -363,21 +360,21 @@ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float
363
360
  //
364
361
  // insert
365
362
  //
366
-
367
- memmove(k_list + lo + 1, k_list + lo, (k_len - lo) * sizeof(struct kresult));
363
+
364
+ memmove(k_list + lo + 1, k_list + lo, (*k_len - lo) * sizeof(struct kresult));
368
365
  k_list[lo].index = i;
369
366
  k_list[lo].distance = d;
370
367
 
371
368
  //
372
369
  // adjust len/dist if necessary
373
370
  //
374
-
375
- if (k_len < k) {
376
- ++k_len;
371
+
372
+ if (*k_len < k) {
373
+ ++(*k_len);
377
374
  } else {
378
- k_dist = k_list[k - 1].distance;
375
+ *k_dist = k_list[k - 1].distance;
379
376
  }
380
- }
377
+ }
381
378
  }
382
379
  }
383
380
 
@@ -386,43 +383,42 @@ static void kdtree_nearestk0(struct kdtree_data *kdtreep, int i, float x, float
386
383
  * kd.persist(io)
387
384
  *
388
385
  * Writes the tree out to <i>io</i> so you can quickly load it later with
389
- * KDTree.new. This avoids the startup cost of initializing a tree. Apart from a
386
+ * Kdtree.new. This avoids the startup cost of initializing a tree. Apart from a
390
387
  * small header, the size of the file is proportional to the number of points,
391
388
  * requiring 20 bytes per point.
392
389
  *
393
390
  * This file is <b>NOT PORTABLE</b> across different architectures due to endian
394
391
  * issues.
395
- *
392
+ *
396
393
  * points = []
397
394
  * points << [47.6, -122.3, 1] # Seattle
398
395
  * points << [45.5, -122.8, 2] # Portland
399
396
  * points << [40.7, -74.0, 3] # New York
400
- * kd = KDTree.new(points)
397
+ * kd = Kdtree.new(points)
401
398
  *
402
399
  * # persist the tree to disk
403
400
  * File.open("treefile", "w") { |f| kd.persist(f) }
404
401
  *
405
402
  * ...
406
- *
403
+ *
407
404
  * # later, read the tree from disk
408
- * kd2 = File.open("treefile") { |f| KDTree.new(f) }
405
+ * kd2 = File.open("treefile") { |f| Kdtree.new(f) }
409
406
  */
410
407
  static VALUE kdtree_persist(VALUE kdtree, VALUE io)
411
408
  {
409
+ VALUE str;
412
410
  KDTREEP;
413
-
411
+
414
412
  if (!rb_respond_to(io, rb_intern("write"))) {
415
413
  rb_raise(rb_eTypeError, "instance of IO needed");
416
414
  }
417
- if (rb_respond_to(io, rb_intern("binmode"))) {
418
- rb_funcall2(io, rb_intern("binmode"), 0, 0);
415
+ if (rb_respond_to(io, id_binmode)) {
416
+ rb_funcall(io, id_binmode, 0);
419
417
  }
420
418
 
421
- VALUE str = rb_str_buf_new(0);
422
- rb_str_buf_cat(str, KDTREE_MAGIC, 4);
423
- rb_str_buf_cat(str, (char*)kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
424
- rb_str_buf_cat(str, (char*)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
425
- rb_io_write(io, str);
419
+ write_all(io, KDTREE_MAGIC, 4);
420
+ write_all(io, kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
421
+ write_all(io, kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
426
422
  return io;
427
423
  }
428
424
 
@@ -434,22 +430,40 @@ static VALUE kdtree_persist(VALUE kdtree, VALUE io)
434
430
  */
435
431
  static VALUE kdtree_to_s(VALUE kdtree)
436
432
  {
433
+ char buf[256];
437
434
  KDTREEP;
438
435
 
439
- char buf[256];
440
436
  sprintf(buf, "#<%s:%p nodes=%d>", rb_obj_classname(kdtree), (void*)kdtree, kdtreep->len);
441
437
  return rb_str_new(buf, strlen(buf));
442
438
  }
443
439
 
440
+ //
441
+ // io helpers
442
+ //
443
+
444
+ static void read_all(VALUE io, void *buf, int len)
445
+ {
446
+ VALUE string = rb_funcall(io, id_read, 1, INT2NUM(len));
447
+ if (NIL_P(string) || RSTRING_LEN(string) != len) {
448
+ rb_raise(rb_eEOFError, "end of file reached");
449
+ }
450
+ memcpy(buf, RSTRING_PTR(string), len);
451
+ }
452
+
453
+ static void write_all(VALUE io, const void *buf, int len)
454
+ {
455
+ rb_funcall(io, id_write, 1, rb_str_new(buf, len));
456
+ }
457
+
444
458
  //
445
459
  // entry point
446
460
  //
447
461
 
448
462
  /*
449
- * KDTree is an insanely fast data structure for finding the nearest
463
+ * Kdtree is an insanely fast data structure for finding the nearest
450
464
  * neighbor(s) to a given point. This implementation only supports 2d
451
465
  * points. Also, it only supports static points - there is no way to edit the
452
- * tree after it has been initialized. KDTree should scale to millions of
466
+ * tree after it has been initialized. Kdtree should scale to millions of
453
467
  * points, though it's only been tested with around 1 million.
454
468
  *
455
469
  * Once the tree is constructed, it can be searched with nearest and nearestk.
@@ -462,8 +476,8 @@ static VALUE kdtree_to_s(VALUE kdtree)
462
476
  * points << [47.6, -122.3, 1] # Seattle
463
477
  * points << [45.5, -122.8, 2] # Portland
464
478
  * points << [40.7, -74.0, 3] # New York
465
- * kd = KDTree.new(points)
466
- *
479
+ * kd = Kdtree.new(points)
480
+ *
467
481
  * # which city is closest to San Francisco?
468
482
  * kd.nearest(34.1, -118.2) #=> 2
469
483
  * # which two cities are closest to San Francisco?
@@ -477,12 +491,17 @@ void Init_kdtree()
477
491
  {
478
492
  static VALUE clazz;
479
493
 
480
- clazz = rb_define_class("KDTree", rb_cObject);
481
-
482
- rb_define_alloc_func(clazz, kdtree_alloc);
494
+ clazz = rb_define_class("Kdtree", rb_cObject);
495
+
496
+ rb_define_alloc_func(clazz, kdtree_alloc);
483
497
  rb_define_method(clazz, "initialize", kdtree_initialize, 1);
484
498
  rb_define_method(clazz, "nearest", kdtree_nearest, 2);
485
499
  rb_define_method(clazz, "nearestk", kdtree_nearestk, 3);
486
- rb_define_method(clazz, "persist", kdtree_persist, 1);
487
- rb_define_method(clazz, "to_s", kdtree_to_s, 0);
500
+ rb_define_method(clazz, "persist", kdtree_persist, 1);
501
+ rb_define_method(clazz, "to_s", kdtree_to_s, 0);
502
+
503
+ // function ids
504
+ id_binmode = rb_intern("binmode");
505
+ id_read = rb_intern("read");
506
+ id_write = rb_intern("write");
488
507
  }
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "kdtree"
3
+ s.version = "0.3"
4
+
5
+ s.authors = ["Adam Doppelt"]
6
+ s.email = ["amd@gurge.com"]
7
+ s.homepage = "http://github.com/gurgeous/kdtree"
8
+ s.summary = "Blazingly fast, native 2d kdtree."
9
+ s.description = <<EOF
10
+ A kdtree is a data structure that makes it possible to quickly solve
11
+ the nearest neighbor problem. This is a native 2d kdtree suitable for
12
+ production use with millions of points.
13
+ EOF
14
+
15
+ s.rubyforge_project = "kdtree"
16
+ s.add_development_dependency "rake-compiler"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.extensions = ["ext/kdtree/extconf.rb"]
21
+ s.require_paths = ["lib"]
22
+ end
@@ -0,0 +1 @@
1
+ require "kdtree.so"
@@ -0,0 +1,150 @@
1
+ require "benchmark"
2
+ require "kdtree"
3
+ require "tempfile"
4
+ require "test/unit"
5
+
6
+ #
7
+ # create a tree
8
+ #
9
+
10
+ class KdtreeTest < Test::Unit::TestCase
11
+ TMP = "#{Dir.tmpdir}/kdtree_test"
12
+
13
+ def setup
14
+ @points = (0...1000).map { |i| [rand_coord, rand_coord, i] }
15
+ @kdtree = Kdtree.new(@points)
16
+ end
17
+
18
+ def teardown
19
+ File.unlink(TMP) if File.exists?(TMP)
20
+ end
21
+
22
+ def test_nearest
23
+ 100.times do
24
+ pt = [rand_coord, rand_coord]
25
+
26
+ # kdtree search
27
+ id = @kdtree.nearest(pt[0], pt[1])
28
+ kdpt = @points[id]
29
+
30
+ # slow search
31
+ sortpt = @points.sort_by { |i| distance(i, pt) }.first
32
+
33
+ # assert
34
+ kdd = distance(kdpt, pt)
35
+ sortd = distance(sortpt, pt)
36
+ assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
37
+ end
38
+ end
39
+
40
+ def test_nearestk
41
+ 100.times do
42
+ pt = [rand_coord, rand_coord]
43
+
44
+ # kdtree search
45
+ list = @kdtree.nearestk(pt[0], pt[1], 5)
46
+ kdpt = @points[list.last]
47
+
48
+ # slow search
49
+ sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
50
+
51
+ # assert
52
+ kdd = distance(kdpt, pt)
53
+ sortd = distance(sortpt, pt)
54
+ assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
55
+ end
56
+ end
57
+
58
+ def test_persist
59
+ # write
60
+ File.open(TMP, "w") { |f| @kdtree.persist(f) }
61
+ # read
62
+ kdtree2 = File.open(TMP, "r") { |f| Kdtree.new(f) }
63
+
64
+ # now test some random points
65
+ 100.times do
66
+ pt = [rand_coord, rand_coord]
67
+ id1 = @kdtree.nearest(*pt)
68
+ id2 = kdtree2.nearest(*pt)
69
+ assert(id1 == id2, "kdtree2 differed from kdtree")
70
+ end
71
+ end
72
+
73
+ def test_bad_magic
74
+ File.open(TMP, "w") { |f| f.puts "That ain't right" }
75
+ assert_raise RuntimeError do
76
+ File.open(TMP, "r") { |f| Kdtree.new(f) }
77
+ end
78
+ end
79
+
80
+ def test_eof
81
+ File.open(TMP, "w") { |f| @kdtree.persist(f) }
82
+ bytes = File.read(TMP)
83
+
84
+ [2, 10, 100].each do |len|
85
+ File.open(TMP, "w") { |f| f.write(bytes[0, len]) }
86
+ assert_raise EOFError do
87
+ File.open(TMP, "r") { |f| Kdtree.new(f) }
88
+ end
89
+ end
90
+ end
91
+
92
+ def dont_test_speed
93
+ sizes = [1, 100, 1000, 10000, 100000, 1000000]
94
+ ks = [1, 5, 50, 255]
95
+ sizes.each do |s|
96
+ points = (0...s).map { |i| [rand_coord, rand_coord, i] }
97
+
98
+ # build
99
+ Benchmark.bm(17) do |bm|
100
+ kdtree = nil
101
+ bm.report "build" do
102
+ kdtree = Kdtree.new(points)
103
+ end
104
+ bm.report "persist" do
105
+ File.open(TMP, "w") { |f| kdtree.persist(f) }
106
+ end
107
+ bm.report "read" do
108
+ File.open(TMP, "r") { |f| Kdtree.new(f) }
109
+ end
110
+
111
+ ks.each do |k|
112
+ bm.report "100 queries (#{k})" do
113
+ total = count = 0
114
+ 100.times do
115
+ tm = Time.now
116
+ if k == 1
117
+ kdtree.nearest(rand_coord, rand_coord)
118
+ else
119
+ kdtree.nearestk(rand_coord, rand_coord, k)
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ puts
126
+ end
127
+ end
128
+
129
+ protected
130
+
131
+ def distance(a, b)
132
+ x, y = a[0] - b[0], a[1] - b[1]
133
+ x * x + y * y
134
+ end
135
+
136
+ def rand_coord
137
+ rand(0) * 10 - 5
138
+ end
139
+ end
140
+
141
+ # running dont_test_speed on my i5 2.8ghz:
142
+ #
143
+ # user system total real
144
+ # build 3.350000 0.020000 3.370000 ( 3.520528)
145
+ # persist 0.150000 0.020000 0.170000 ( 0.301963)
146
+ # read 0.280000 0.000000 0.280000 ( 0.432676)
147
+ # 100 queries (1) 0.000000 0.000000 0.000000 ( 0.000319)
148
+ # 100 queries (5) 0.000000 0.000000 0.000000 ( 0.000412)
149
+ # 100 queries (50) 0.000000 0.000000 0.000000 ( 0.001417)
150
+ # 100 queries (255) 0.000000 0.000000 0.000000 ( 0.006268)
metadata CHANGED
@@ -1,61 +1,85 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: kdtree
3
- version: !ruby/object:Gem::Version
4
- version: "0.1"
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.3'
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - Adam Doppelt
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
12
+ date: 2012-10-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake-compiler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: ! 'A kdtree is a data structure that makes it possible to quickly solve
11
31
 
12
- date: 2010-01-21 00:00:00 -08:00
13
- default_executable:
14
- dependencies: []
32
+ the nearest neighbor problem. This is a native 2d kdtree suitable for
15
33
 
16
- description:
17
- email: amd@gurge.com
18
- executables: []
34
+ production use with millions of points.
19
35
 
20
- extensions:
21
- - ext/extconf.rb
36
+ '
37
+ email:
38
+ - amd@gurge.com
39
+ executables: []
40
+ extensions:
41
+ - ext/kdtree/extconf.rb
22
42
  extra_rdoc_files: []
23
-
24
- files:
25
- - ext/extconf.rb
26
- - ext/kdtree.c
43
+ files:
44
+ - .gitignore
45
+ - .travis.yml
46
+ - Gemfile
27
47
  - LICENSE
28
- - test/test.rb
29
- has_rdoc: true
30
- homepage:
48
+ - README.md
49
+ - Rakefile
50
+ - ext/kdtree/extconf.rb
51
+ - ext/kdtree/kdtree.c
52
+ - kdtree.gemspec
53
+ - lib/kdtree.rb
54
+ - test/test_kdtree.rb
55
+ homepage: http://github.com/gurgeous/kdtree
31
56
  licenses: []
32
-
33
57
  post_install_message:
34
- rdoc_options:
35
- - --exclude
36
- - test
37
- - --exclude
38
- - extconf
39
- require_paths:
40
- - .
41
- required_ruby_version: !ruby/object:Gem::Requirement
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- version: 1.8.5
46
- version:
47
- required_rubygems_version: !ruby/object:Gem::Requirement
48
- requirements:
49
- - - ">="
50
- - !ruby/object:Gem::Version
51
- version: "0"
52
- version:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ segments:
68
+ - 0
69
+ hash: -3094601017742930682
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ segments:
77
+ - 0
78
+ hash: -3094601017742930682
53
79
  requirements: []
54
-
55
- rubyforge_project:
56
- rubygems_version: 1.3.5
80
+ rubyforge_project: kdtree
81
+ rubygems_version: 1.8.21
57
82
  signing_key:
58
83
  specification_version: 3
59
- summary: Blazingly fast 2d kdtree.
60
- test_files:
61
- - test/test.rb
84
+ summary: Blazingly fast, native 2d kdtree.
85
+ test_files: []
@@ -1,138 +0,0 @@
1
- require "#{File.expand_path(File.dirname(__FILE__))}/../ext/kdtree.o"
2
- require "test/unit"
3
- require "tempfile"
4
-
5
- #
6
- # create a tree
7
- #
8
-
9
- class KDTreeTest < Test::Unit::TestCase
10
- TMP = "#{Dir.tmpdir}/kdtree_test"
11
-
12
- def test_nearest
13
- setup_tree(1000)
14
- 100.times do
15
- pt = [rand_coord, rand_coord]
16
-
17
- # kdtree search
18
- id = @kdtree.nearest(pt[0], pt[1])
19
- kdpt = @points[id]
20
-
21
- # slow search
22
- sortpt = @points.sort_by { |i| distance(i, pt) }.first
23
-
24
- # assert
25
- kdd = distance(kdpt, pt)
26
- sortd = distance(sortpt, pt)
27
- assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
28
- end
29
- end
30
-
31
- def test_nearestk
32
- setup_tree(1000)
33
- 100.times do
34
- pt = [rand_coord, rand_coord]
35
-
36
- # kdtree search
37
- list = @kdtree.nearestk(pt[0], pt[1], 5)
38
- kdpt = @points[list.last]
39
-
40
- # slow search
41
- sortpt = @points.sort_by { |i| distance(i, pt) }[list.length - 1]
42
-
43
- # assert
44
- kdd = distance(kdpt, pt)
45
- sortd = distance(sortpt, pt)
46
- assert((kdd - sortd).abs < 0.0000001, "kdtree didn't return the closest result")
47
- end
48
- end
49
-
50
- def test_persist
51
- setup_tree(1000)
52
-
53
- begin
54
- # write
55
- File.open(TMP, "w") { |f| @kdtree.persist(f) }
56
- # read
57
- kdtree2 = File.open(TMP, "r") { |f| KDTree.new(f) }
58
-
59
- # now test some random points
60
- 100.times do
61
- pt = [rand_coord, rand_coord]
62
- id1 = @kdtree.nearest(*pt)
63
- id2 = kdtree2.nearest(*pt)
64
- assert(id1 == id2, "kdtree2 differed from kdtree")
65
- end
66
- ensure
67
- File.unlink(TMP)
68
- end
69
-
70
- # now test magic problems
71
- begin
72
- File.open(TMP, "w") { |f| f.puts "That ain't right" }
73
- assert_raise RuntimeError do
74
- File.open(TMP, "r") { |f| KDTree.new(f) }
75
- end
76
- ensure
77
- File.unlink(TMP)
78
- end
79
- end
80
-
81
- def dont_test_speed
82
- printf("\n")
83
- sizes = [1, 100, 1000, 10000, 100000, 1000000]
84
- ks = [1, 5, 50, 255]
85
- sizes.each do |s|
86
- points = (0...s).map { |i| [rand_coord, rand_coord, i] }
87
-
88
- # build
89
- tm = Time.now
90
- kdtree = KDTree.new(points)
91
- printf "build %d took %.6fs\n", s, Time.now - tm
92
-
93
- begin
94
- # write
95
- tm = Time.now
96
- File.open(TMP, "w") { |f| kdtree.persist(f) }
97
- printf "write %d took %.6fs\n", s, Time.now - tm
98
- # read
99
- tm = Time.now
100
- File.open(TMP, "r") { |f| KDTree.new(f) }
101
- printf "read %d took %.6fs\n", s, Time.now - tm
102
- ensure
103
- File.unlink(TMP)
104
- end
105
-
106
- ks.each do |k|
107
- total = count = 0
108
- 100.times do
109
- tm = Time.now
110
- if k == 1
111
- kdtree.nearest(rand_coord, rand_coord)
112
- else
113
- kdtree.nearestk(rand_coord, rand_coord, k)
114
- end
115
- total += Time.now - tm
116
- count += 1
117
- end
118
- printf "avg query time = %.6fs [%d/%d]\n", total / count, s, k
119
- end
120
- end
121
- end
122
-
123
- protected
124
-
125
- def setup_tree(len)
126
- @points = (0...len).map { |i| [rand_coord, rand_coord, i] }
127
- @kdtree = KDTree.new(@points)
128
- end
129
-
130
- def distance(a, b)
131
- x, y = a[0] - b[0], a[1] - b[1]
132
- x * x + y * y
133
- end
134
-
135
- def rand_coord
136
- rand(0) * 10 - 5
137
- end
138
- end