fastcluster 0.9 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,18 @@
3
3
  #include <stdlib.h>
4
4
  #include <math.h>
5
5
 
6
+ #ifndef RUBY_19
7
+ #ifndef RFLOAT_VALUE
8
+ #define RFLOAT_VALUE(v) (RFLOAT(v)->value)
9
+ #endif
10
+ #ifndef RARRAY_LEN
11
+ #define RARRAY_LEN(v) (RARRAY(v)->len)
12
+ #endif
13
+ #ifndef RARRAY_PTR
14
+ #define RARRAY_PTR(v) (RARRAY(v)->ptr)
15
+ #endif
16
+ #endif
17
+
6
18
  /*
7
19
  *
8
20
  * Algorithm:
@@ -88,11 +100,11 @@ static void fc_combine_clusters(CLUSTER * dst, CLUSTER * src) {
88
100
  */
89
101
  static long fc_get_max_grid(long resolution, CLUSTER * point_array, long num_points) {
90
102
  int i;
91
- int max_grid = 0;
103
+ long max_grid = 0;
92
104
  for(i = 0; i < num_points; i++) {
93
105
  CLUSTER * point = &point_array[i];
94
- int xg = point->x/resolution;
95
- int yg = point->y/resolution;
106
+ long xg = point->x/resolution;
107
+ long yg = point->y/resolution;
96
108
  if(xg>max_grid)
97
109
  max_grid = xg;
98
110
  if(yg>max_grid)
@@ -148,9 +160,9 @@ static VALUE fc_initialize_clusterer(int argc, VALUE *argv, VALUE self) {
148
160
  static void fc_native_point_array(CLUSTER * arrayPtr, VALUE rubyArray, long num_points) {
149
161
  int i;
150
162
  for(i=0;i<num_points;i++) {
151
- VALUE holdArray = RARRAY(rubyArray)->ptr[i];
152
- double x = NUM2DBL(RARRAY(holdArray)->ptr[0]);
153
- double y = NUM2DBL(RARRAY(holdArray)->ptr[1]);
163
+ VALUE holdArray = RARRAY_PTR(rubyArray)[i];
164
+ double x = NUM2DBL(RARRAY_PTR(holdArray)[0]);
165
+ double y = NUM2DBL(RARRAY_PTR(holdArray)[1]);
154
166
 
155
167
  arrayPtr[i].x = x;
156
168
  arrayPtr[i].y = y;
@@ -158,14 +170,30 @@ static void fc_native_point_array(CLUSTER * arrayPtr, VALUE rubyArray, long num_
158
170
  }
159
171
  }
160
172
 
161
- static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER * point_array, int num_points, long * cluster_size) {
162
- int max_grid = fc_get_max_grid(resolution, &point_array[0], num_points);
173
+ /*
174
+ * This function does the actual clustering. It takes the following params:
175
+ *
176
+ * <tt>separation</tt> - The minimum distance between clusters. At 0 there will be one cluster with all the points.
177
+ * <tt>resolution</tt> - Any points that fall within resolution distance will be clustered automatically.
178
+ * <tt>point_array</tt> - Array of points to cluster.
179
+ * <tt>num_points</tt> - Size of the point array.
180
+ * <tt>cluster_size</tt> - Pointer for a variable to receive the size of the returned array.
181
+ *
182
+ * This function return an array of CLUSTER.
183
+ */
184
+ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER * point_array, long num_points, long * cluster_size) {
163
185
  int i, j;
164
186
  long preclust_size = 0;
165
187
 
166
188
  CLUSTER * cluster;
167
189
  CLUSTER * clusters;
168
190
 
191
+ // This first section does preclustering. The points are split into a grid where each
192
+ // grid box is of the size resolutionxresolution. When more than one point falls
193
+ // in a grid box they are clustered.
194
+ long max_grid = fc_get_max_grid(resolution, &point_array[0], num_points);
195
+
196
+ // Only precluster if a resolution is specified
169
197
  if(resolution > 0) {
170
198
  CLUSTER grid_array[max_grid][max_grid];
171
199
 
@@ -175,20 +203,24 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
175
203
  }
176
204
  }
177
205
 
206
+ // Add clusters to grid
178
207
  for(i = 0; i < num_points; i++) {
179
208
  cluster = &point_array[i];
180
209
 
181
- int gx = floor(cluster->x/resolution);
182
- int gy = floor(cluster->y/resolution);
210
+ long gx = floor(cluster->x/resolution);
211
+ long gy = floor(cluster->y/resolution);
183
212
 
184
213
  fc_add_to_cluster(&grid_array[gx][gy], cluster->x, cluster->y);
185
214
 
215
+ // If the grid array is holding a cluster of size 1 at this point
216
+ // then its a new cluster, so the preclust_size is incremented.
186
217
  if(grid_array[gx][gy].size == 1) preclust_size++;
187
218
  }
188
219
 
220
+ // Now the grid clusters are copied into an array
189
221
  clusters = malloc(preclust_size * sizeof(CLUSTER));
190
222
 
191
- int max_grid_total = max_grid * max_grid;
223
+ long max_grid_total = max_grid * max_grid;
192
224
  CLUSTER * gridPtr = grid_array[0];
193
225
 
194
226
  int incr = 0;
@@ -199,6 +231,7 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
199
231
  }
200
232
  }
201
233
  } else {
234
+ // As there is no grid just copy the original point array into a new array
202
235
  preclust_size = num_points;
203
236
  clusters = malloc(preclust_size * sizeof(CLUSTER));
204
237
  memcpy(&clusters[0], &point_array[0], preclust_size * sizeof(CLUSTER));
@@ -207,11 +240,10 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
207
240
  double distance_sep = 0;
208
241
  long current_cluster_size = 0;
209
242
  int found;
210
- long nearest_origin;
243
+ long nearest_origin = 0;
211
244
  long nearest_other;
212
245
 
213
246
  do {
214
- // calculate distance sep
215
247
  distance_sep = 0;
216
248
  nearest_other = 0;
217
249
 
@@ -219,8 +251,6 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
219
251
  for(j=i+1;j<preclust_size;j++){
220
252
  double distance = fc_get_distance_between(&clusters[i], &clusters[j]);
221
253
 
222
- // printf("distance between %f, %f and %f, %f is %f\n", clusters[i].x, clusters[i].y, clusters[j].x, clusters[j].y, distance);
223
-
224
254
  if(distance_sep == 0 || distance < distance_sep) {
225
255
  distance_sep = distance;
226
256
 
@@ -232,9 +262,13 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
232
262
  }
233
263
  }
234
264
 
265
+ // If two clusters have been identified for merging, this part merges
266
+ // them into the first cluster and removes the second cluster from the array
235
267
  if(nearest_other > 0) {
268
+ // merge into first cluster
236
269
  fc_combine_clusters(&clusters[nearest_origin], &clusters[nearest_other]);
237
270
 
271
+ // remove second cluster by creating temporary array without it
238
272
  CLUSTER *newarr = malloc(preclust_size * sizeof(CLUSTER));
239
273
  memcpy(&newarr[0], &clusters[0], nearest_other * sizeof(CLUSTER));
240
274
  memcpy(&newarr[nearest_other], &clusters[nearest_other+1], (preclust_size - (nearest_other + 1)) * sizeof(CLUSTER));
@@ -243,18 +277,22 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
243
277
  clusters = (CLUSTER*)_tmp;
244
278
  preclust_size = preclust_size - 1;
245
279
 
246
- for(i=0;i<preclust_size;i++)
247
- clusters[i] = newarr[i];
280
+ // and copying it back
281
+ memcpy(&clusters[0], &newarr[0], preclust_size * sizeof(CLUSTER));
248
282
 
249
283
  free(newarr);
250
284
  }
251
-
285
+ // keep looping until either everything is in one cluster, or all clusters are
286
+ // outside the separation distance.
252
287
  } while((separation == 0 || distance_sep < separation) && preclust_size > 1);
253
288
 
254
289
  *cluster_size = preclust_size;
255
290
  return clusters;
256
291
  }
257
292
 
293
+ /*
294
+ * Get the ruby class Cluster
295
+ */
258
296
  static VALUE fc_get_cluster_class() {
259
297
  ID cluster_module_id = rb_intern("Fastcluster");
260
298
  ID cluster_class_id = rb_intern("Cluster");
@@ -278,7 +316,7 @@ static VALUE fc_get_clusters(VALUE self) {
278
316
 
279
317
  // Create a native array of clusters from the ruby array of points
280
318
  VALUE pointArray = fc_get_points(self);
281
- long num_points = RARRAY(pointArray)->len;
319
+ long num_points = RARRAY_LEN(pointArray);
282
320
  CLUSTER native_point_array[num_points];
283
321
 
284
322
  fc_native_point_array(&native_point_array[0], pointArray, num_points);
@@ -322,4 +360,4 @@ void Init_clusterer() {
322
360
 
323
361
  rb_define_method(clustererClass, "clusters", fc_get_clusters, 0);
324
362
  rb_define_method(clustererClass, "points", fc_get_points, 0);
325
- }
363
+ }
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
1
+ require 'spec_helper'
2
2
  require 'benchmark'
3
3
 
4
4
  describe Fastcluster::Clusterer do
@@ -117,7 +117,7 @@ describe Fastcluster::Clusterer do
117
117
 
118
118
  describe "and separation 1" do
119
119
  before do
120
- require 'lib/fastcluster'
120
+ require 'fastcluster'
121
121
  @clusterer = Fastcluster::Clusterer.new(1, 0, @points)
122
122
  end
123
123
 
@@ -219,4 +219,4 @@ describe Fastcluster::Clusterer do
219
219
  end
220
220
  end
221
221
  end
222
- end
222
+ end
data/test.rb CHANGED
@@ -1,69 +1,15 @@
1
1
  require File.dirname(__FILE__) + '/lib/fastcluster'
2
2
  require 'benchmark'
3
3
 
4
- points = [[815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
5
- [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
6
- [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
7
- [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
8
- [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
9
- [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
10
- [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
11
- [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
12
- [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
13
- [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
14
- [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
15
- [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
16
- [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
17
- [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
18
- [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
19
- [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
20
- [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
21
- [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
22
- [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
23
- [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
24
- [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
25
- [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
26
- [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
27
- [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
28
- [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
29
- [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
30
- [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
31
- [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177],
32
- [815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
33
- [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
34
- [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
35
- [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
36
- [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
37
- [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
38
- [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
39
- [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
40
- [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
41
- [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
42
- [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
43
- [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
44
- [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
45
- [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
46
- [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
47
- [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
48
- [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
49
- [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
50
- [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
51
- [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
52
- [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
53
- [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
54
- [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
55
- [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
56
- [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
57
- [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
58
- [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
59
- [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177]]
4
+ points = [[237, 434], [282, 435], [281, 429], [241, 427], [259, 434], [499, 218], [254, 431], [222, 433], [253, 441], [212, 440], [252, 432], [279, 433], [248, 428], [249, 202], [249, 202], [252, 202], [252, 202], [562, 402], [728, 23], [227, 424], [267, 428], [247, 438], [290, 452]]
60
5
 
61
6
  puts Benchmark.measure {
62
- clusterer = Fastcluster::Clusterer.new(105, 5, points)
63
- clusters = clusterer.clusters
7
+ clusterer = Fastcluster::Clusterer.new(25, 15, points)
8
+ clusters = clusterer.clusters
9
+ puts clusters.inspect
64
10
 
65
- clusters.sort{|a,b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }.each do |cluster|
66
- puts cluster
67
- end
11
+ #clusters.sort{|a,b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }.each do |cluster|
12
+ # puts cluster
13
+ #end
68
14
 
69
15
  }
metadata CHANGED
@@ -1,80 +1,70 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: fastcluster
3
- version: !ruby/object:Gem::Version
4
- version: "0.9"
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.0'
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - Jeremy Wells
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
-
12
- date: 2009-10-24 00:00:00 +13:00
12
+ date: 2011-07-09 00:00:00.000000000 +12:00
13
13
  default_executable:
14
14
  dependencies: []
15
-
16
15
  description: A clustering library for 2 dimensional points
17
- email: jeremy@boost.co.nz
16
+ email:
17
+ - jemmyw@gmail.com
18
18
  executables: []
19
-
20
- extensions:
19
+ extensions:
21
20
  - ext/extconf.rb
22
- extra_rdoc_files:
23
- - CHANGELOG
24
- - README.rdoc
25
- - ext/clusterer.c
21
+ extra_rdoc_files: []
22
+ files:
26
23
  - ext/extconf.rb
27
- - lib/fastcluster.rb
24
+ - ext/clusterer.c
28
25
  - lib/fastcluster/cluster.rb
29
- files:
26
+ - lib/fastcluster.rb
30
27
  - CHANGELOG
31
- - Manifest
32
28
  - README.rdoc
33
- - Rakefile
34
- - ext/clusterer.c
35
- - ext/extconf.rb
36
- - lib/fastcluster.rb
37
- - lib/fastcluster/cluster.rb
38
29
  - spec/lib/fastcluster/cluster_spec.rb
39
30
  - spec/lib/fastcluster/clusterer_spec.rb
40
- - spec/spec.opts
41
31
  - spec/spec_helper.rb
42
32
  - spec/test_data.rb
43
33
  - test.rb
44
- - fastcluster.gemspec
45
34
  has_rdoc: true
46
35
  homepage: http://github.com/jemmyw/fastcluster
47
36
  licenses: []
48
-
49
37
  post_install_message:
50
- rdoc_options:
51
- - --line-numbers
52
- - --inline-source
53
- - --title
54
- - Fastcluster
55
- - --main
56
- - README.rdoc
57
- require_paths:
38
+ rdoc_options: []
39
+ require_paths:
58
40
  - lib
59
- - ext
60
- required_ruby_version: !ruby/object:Gem::Requirement
61
- requirements:
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- version: "0"
65
- version:
66
- required_rubygems_version: !ruby/object:Gem::Requirement
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- version: "1.2"
71
- version:
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ segments:
48
+ - 0
49
+ hash: 716520748893881833
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ segments:
57
+ - 0
58
+ hash: 716520748893881833
72
59
  requirements: []
73
-
74
- rubyforge_project: fastcluster
75
- rubygems_version: 1.3.5
60
+ rubyforge_project:
61
+ rubygems_version: 1.6.2
76
62
  signing_key:
77
63
  specification_version: 3
78
64
  summary: A clustering library for 2 dimensional points
79
- test_files: []
80
-
65
+ test_files:
66
+ - spec/lib/fastcluster/cluster_spec.rb
67
+ - spec/lib/fastcluster/clusterer_spec.rb
68
+ - spec/spec_helper.rb
69
+ - spec/test_data.rb
70
+ - test.rb
data/Manifest DELETED
@@ -1,14 +0,0 @@
1
- CHANGELOG
2
- Manifest
3
- README.rdoc
4
- Rakefile
5
- ext/clusterer.c
6
- ext/extconf.rb
7
- lib/fastcluster.rb
8
- lib/fastcluster/cluster.rb
9
- spec/lib/fastcluster/cluster_spec.rb
10
- spec/lib/fastcluster/clusterer_spec.rb
11
- spec/spec.opts
12
- spec/spec_helper.rb
13
- spec/test_data.rb
14
- test.rb
data/Rakefile DELETED
@@ -1,12 +0,0 @@
1
- require 'rubygems'
2
- require 'spec'
3
- require 'spec/rake/spectask'
4
- require 'echoe'
5
-
6
- Echoe.new("fastcluster") do |p|
7
- p.author = "Jeremy Wells"
8
- p.email = "jeremy@boost.co.nz"
9
- p.summary = "A clustering library for 2 dimensional points"
10
- p.description = "A clustering library for 2 dimensional points"
11
- p.url = "http://github.com/jemmyw/fastcluster"
12
- end
@@ -1,31 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- Gem::Specification.new do |s|
4
- s.name = %q{fastcluster}
5
- s.version = "0.9"
6
-
7
- s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = ["Jeremy Wells"]
9
- s.date = %q{2009-10-24}
10
- s.description = %q{A clustering library for 2 dimensional points}
11
- s.email = %q{jeremy@boost.co.nz}
12
- s.extensions = ["ext/extconf.rb"]
13
- s.extra_rdoc_files = ["CHANGELOG", "README.rdoc", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb"]
14
- s.files = ["CHANGELOG", "Manifest", "README.rdoc", "Rakefile", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb", "spec/lib/fastcluster/cluster_spec.rb", "spec/lib/fastcluster/clusterer_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "spec/test_data.rb", "test.rb", "fastcluster.gemspec"]
15
- s.homepage = %q{http://github.com/jemmyw/fastcluster}
16
- s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Fastcluster", "--main", "README.rdoc"]
17
- s.require_paths = ["lib", "ext"]
18
- s.rubyforge_project = %q{fastcluster}
19
- s.rubygems_version = %q{1.3.5}
20
- s.summary = %q{A clustering library for 2 dimensional points}
21
-
22
- if s.respond_to? :specification_version then
23
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
- s.specification_version = 3
25
-
26
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
- else
28
- end
29
- else
30
- end
31
- end
@@ -1,5 +0,0 @@
1
- --colour
2
- --format progress
3
- --loadby mtime
4
- --reverse
5
- --debugger