fastcluster 0.9 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,18 @@
3
3
  #include <stdlib.h>
4
4
  #include <math.h>
5
5
 
6
+ #ifndef RUBY_19
7
+ #ifndef RFLOAT_VALUE
8
+ #define RFLOAT_VALUE(v) (RFLOAT(v)->value)
9
+ #endif
10
+ #ifndef RARRAY_LEN
11
+ #define RARRAY_LEN(v) (RARRAY(v)->len)
12
+ #endif
13
+ #ifndef RARRAY_PTR
14
+ #define RARRAY_PTR(v) (RARRAY(v)->ptr)
15
+ #endif
16
+ #endif
17
+
6
18
  /*
7
19
  *
8
20
  * Algorithm:
@@ -88,11 +100,11 @@ static void fc_combine_clusters(CLUSTER * dst, CLUSTER * src) {
88
100
  */
89
101
  static long fc_get_max_grid(long resolution, CLUSTER * point_array, long num_points) {
90
102
  int i;
91
- int max_grid = 0;
103
+ long max_grid = 0;
92
104
  for(i = 0; i < num_points; i++) {
93
105
  CLUSTER * point = &point_array[i];
94
- int xg = point->x/resolution;
95
- int yg = point->y/resolution;
106
+ long xg = point->x/resolution;
107
+ long yg = point->y/resolution;
96
108
  if(xg>max_grid)
97
109
  max_grid = xg;
98
110
  if(yg>max_grid)
@@ -148,9 +160,9 @@ static VALUE fc_initialize_clusterer(int argc, VALUE *argv, VALUE self) {
148
160
  static void fc_native_point_array(CLUSTER * arrayPtr, VALUE rubyArray, long num_points) {
149
161
  int i;
150
162
  for(i=0;i<num_points;i++) {
151
- VALUE holdArray = RARRAY(rubyArray)->ptr[i];
152
- double x = NUM2DBL(RARRAY(holdArray)->ptr[0]);
153
- double y = NUM2DBL(RARRAY(holdArray)->ptr[1]);
163
+ VALUE holdArray = RARRAY_PTR(rubyArray)[i];
164
+ double x = NUM2DBL(RARRAY_PTR(holdArray)[0]);
165
+ double y = NUM2DBL(RARRAY_PTR(holdArray)[1]);
154
166
 
155
167
  arrayPtr[i].x = x;
156
168
  arrayPtr[i].y = y;
@@ -158,14 +170,30 @@ static void fc_native_point_array(CLUSTER * arrayPtr, VALUE rubyArray, long num_
158
170
  }
159
171
  }
160
172
 
161
- static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER * point_array, int num_points, long * cluster_size) {
162
- int max_grid = fc_get_max_grid(resolution, &point_array[0], num_points);
173
+ /*
174
+ * This function does the actual clustering. It takes the following params:
175
+ *
176
+ * <tt>separation</tt> - The minimum distance between clusters. At 0 there will be one cluster with all the points.
177
+ * <tt>resolution</tt> - Any points that fall within resolution distance will be clustered automatically.
178
+ * <tt>point_array</tt> - Array of points to cluster.
179
+ * <tt>num_points</tt> - Size of the point array.
180
+ * <tt>cluster_size</tt> - Pointer for a variable to receive the size of the returned array.
181
+ *
182
+ * This function return an array of CLUSTER.
183
+ */
184
+ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER * point_array, long num_points, long * cluster_size) {
163
185
  int i, j;
164
186
  long preclust_size = 0;
165
187
 
166
188
  CLUSTER * cluster;
167
189
  CLUSTER * clusters;
168
190
 
191
+ // This first section does preclustering. The points are split into a grid where each
192
+ // grid box is of the size resolutionxresolution. When more than one point falls
193
+ // in a grid box they are clustered.
194
+ long max_grid = fc_get_max_grid(resolution, &point_array[0], num_points);
195
+
196
+ // Only precluster if a resolution is specified
169
197
  if(resolution > 0) {
170
198
  CLUSTER grid_array[max_grid][max_grid];
171
199
 
@@ -175,20 +203,24 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
175
203
  }
176
204
  }
177
205
 
206
+ // Add clusters to grid
178
207
  for(i = 0; i < num_points; i++) {
179
208
  cluster = &point_array[i];
180
209
 
181
- int gx = floor(cluster->x/resolution);
182
- int gy = floor(cluster->y/resolution);
210
+ long gx = floor(cluster->x/resolution);
211
+ long gy = floor(cluster->y/resolution);
183
212
 
184
213
  fc_add_to_cluster(&grid_array[gx][gy], cluster->x, cluster->y);
185
214
 
215
+ // If the grid array is holding a cluster of size 1 at this point
216
+ // then its a new cluster, so the preclust_size is incremented.
186
217
  if(grid_array[gx][gy].size == 1) preclust_size++;
187
218
  }
188
219
 
220
+ // Now the grid clusters are copied into an array
189
221
  clusters = malloc(preclust_size * sizeof(CLUSTER));
190
222
 
191
- int max_grid_total = max_grid * max_grid;
223
+ long max_grid_total = max_grid * max_grid;
192
224
  CLUSTER * gridPtr = grid_array[0];
193
225
 
194
226
  int incr = 0;
@@ -199,6 +231,7 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
199
231
  }
200
232
  }
201
233
  } else {
234
+ // As there is no grid just copy the original point array into a new array
202
235
  preclust_size = num_points;
203
236
  clusters = malloc(preclust_size * sizeof(CLUSTER));
204
237
  memcpy(&clusters[0], &point_array[0], preclust_size * sizeof(CLUSTER));
@@ -207,11 +240,10 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
207
240
  double distance_sep = 0;
208
241
  long current_cluster_size = 0;
209
242
  int found;
210
- long nearest_origin;
243
+ long nearest_origin = 0;
211
244
  long nearest_other;
212
245
 
213
246
  do {
214
- // calculate distance sep
215
247
  distance_sep = 0;
216
248
  nearest_other = 0;
217
249
 
@@ -219,8 +251,6 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
219
251
  for(j=i+1;j<preclust_size;j++){
220
252
  double distance = fc_get_distance_between(&clusters[i], &clusters[j]);
221
253
 
222
- // printf("distance between %f, %f and %f, %f is %f\n", clusters[i].x, clusters[i].y, clusters[j].x, clusters[j].y, distance);
223
-
224
254
  if(distance_sep == 0 || distance < distance_sep) {
225
255
  distance_sep = distance;
226
256
 
@@ -232,9 +262,13 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
232
262
  }
233
263
  }
234
264
 
265
+ // If two clusters have been identified for merging, this part merges
266
+ // them into the first cluster and removes the second cluster from the array
235
267
  if(nearest_other > 0) {
268
+ // merge into first cluster
236
269
  fc_combine_clusters(&clusters[nearest_origin], &clusters[nearest_other]);
237
270
 
271
+ // remove second cluster by creating temporary array without it
238
272
  CLUSTER *newarr = malloc(preclust_size * sizeof(CLUSTER));
239
273
  memcpy(&newarr[0], &clusters[0], nearest_other * sizeof(CLUSTER));
240
274
  memcpy(&newarr[nearest_other], &clusters[nearest_other+1], (preclust_size - (nearest_other + 1)) * sizeof(CLUSTER));
@@ -243,18 +277,22 @@ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER
243
277
  clusters = (CLUSTER*)_tmp;
244
278
  preclust_size = preclust_size - 1;
245
279
 
246
- for(i=0;i<preclust_size;i++)
247
- clusters[i] = newarr[i];
280
+ // and copying it back
281
+ memcpy(&clusters[0], &newarr[0], preclust_size * sizeof(CLUSTER));
248
282
 
249
283
  free(newarr);
250
284
  }
251
-
285
+ // keep looping until either everything is in one cluster, or all clusters are
286
+ // outside the separation distance.
252
287
  } while((separation == 0 || distance_sep < separation) && preclust_size > 1);
253
288
 
254
289
  *cluster_size = preclust_size;
255
290
  return clusters;
256
291
  }
257
292
 
293
+ /*
294
+ * Get the ruby class Cluster
295
+ */
258
296
  static VALUE fc_get_cluster_class() {
259
297
  ID cluster_module_id = rb_intern("Fastcluster");
260
298
  ID cluster_class_id = rb_intern("Cluster");
@@ -278,7 +316,7 @@ static VALUE fc_get_clusters(VALUE self) {
278
316
 
279
317
  // Create a native array of clusters from the ruby array of points
280
318
  VALUE pointArray = fc_get_points(self);
281
- long num_points = RARRAY(pointArray)->len;
319
+ long num_points = RARRAY_LEN(pointArray);
282
320
  CLUSTER native_point_array[num_points];
283
321
 
284
322
  fc_native_point_array(&native_point_array[0], pointArray, num_points);
@@ -322,4 +360,4 @@ void Init_clusterer() {
322
360
 
323
361
  rb_define_method(clustererClass, "clusters", fc_get_clusters, 0);
324
362
  rb_define_method(clustererClass, "points", fc_get_points, 0);
325
- }
363
+ }
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
1
+ require 'spec_helper'
2
2
  require 'benchmark'
3
3
 
4
4
  describe Fastcluster::Clusterer do
@@ -117,7 +117,7 @@ describe Fastcluster::Clusterer do
117
117
 
118
118
  describe "and separation 1" do
119
119
  before do
120
- require 'lib/fastcluster'
120
+ require 'fastcluster'
121
121
  @clusterer = Fastcluster::Clusterer.new(1, 0, @points)
122
122
  end
123
123
 
@@ -219,4 +219,4 @@ describe Fastcluster::Clusterer do
219
219
  end
220
220
  end
221
221
  end
222
- end
222
+ end
data/test.rb CHANGED
@@ -1,69 +1,15 @@
1
1
  require File.dirname(__FILE__) + '/lib/fastcluster'
2
2
  require 'benchmark'
3
3
 
4
- points = [[815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
5
- [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
6
- [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
7
- [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
8
- [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
9
- [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
10
- [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
11
- [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
12
- [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
13
- [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
14
- [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
15
- [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
16
- [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
17
- [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
18
- [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
19
- [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
20
- [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
21
- [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
22
- [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
23
- [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
24
- [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
25
- [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
26
- [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
27
- [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
28
- [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
29
- [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
30
- [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
31
- [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177],
32
- [815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
33
- [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
34
- [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
35
- [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
36
- [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
37
- [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
38
- [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
39
- [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
40
- [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
41
- [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
42
- [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
43
- [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
44
- [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
45
- [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
46
- [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
47
- [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
48
- [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
49
- [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
50
- [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
51
- [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
52
- [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
53
- [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
54
- [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
55
- [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
56
- [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
57
- [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
58
- [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
59
- [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177]]
4
+ points = [[237, 434], [282, 435], [281, 429], [241, 427], [259, 434], [499, 218], [254, 431], [222, 433], [253, 441], [212, 440], [252, 432], [279, 433], [248, 428], [249, 202], [249, 202], [252, 202], [252, 202], [562, 402], [728, 23], [227, 424], [267, 428], [247, 438], [290, 452]]
60
5
 
61
6
  puts Benchmark.measure {
62
- clusterer = Fastcluster::Clusterer.new(105, 5, points)
63
- clusters = clusterer.clusters
7
+ clusterer = Fastcluster::Clusterer.new(25, 15, points)
8
+ clusters = clusterer.clusters
9
+ puts clusters.inspect
64
10
 
65
- clusters.sort{|a,b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }.each do |cluster|
66
- puts cluster
67
- end
11
+ #clusters.sort{|a,b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }.each do |cluster|
12
+ # puts cluster
13
+ #end
68
14
 
69
15
  }
metadata CHANGED
@@ -1,80 +1,70 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: fastcluster
3
- version: !ruby/object:Gem::Version
4
- version: "0.9"
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.0'
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - Jeremy Wells
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
-
12
- date: 2009-10-24 00:00:00 +13:00
12
+ date: 2011-07-09 00:00:00.000000000 +12:00
13
13
  default_executable:
14
14
  dependencies: []
15
-
16
15
  description: A clustering library for 2 dimensional points
17
- email: jeremy@boost.co.nz
16
+ email:
17
+ - jemmyw@gmail.com
18
18
  executables: []
19
-
20
- extensions:
19
+ extensions:
21
20
  - ext/extconf.rb
22
- extra_rdoc_files:
23
- - CHANGELOG
24
- - README.rdoc
25
- - ext/clusterer.c
21
+ extra_rdoc_files: []
22
+ files:
26
23
  - ext/extconf.rb
27
- - lib/fastcluster.rb
24
+ - ext/clusterer.c
28
25
  - lib/fastcluster/cluster.rb
29
- files:
26
+ - lib/fastcluster.rb
30
27
  - CHANGELOG
31
- - Manifest
32
28
  - README.rdoc
33
- - Rakefile
34
- - ext/clusterer.c
35
- - ext/extconf.rb
36
- - lib/fastcluster.rb
37
- - lib/fastcluster/cluster.rb
38
29
  - spec/lib/fastcluster/cluster_spec.rb
39
30
  - spec/lib/fastcluster/clusterer_spec.rb
40
- - spec/spec.opts
41
31
  - spec/spec_helper.rb
42
32
  - spec/test_data.rb
43
33
  - test.rb
44
- - fastcluster.gemspec
45
34
  has_rdoc: true
46
35
  homepage: http://github.com/jemmyw/fastcluster
47
36
  licenses: []
48
-
49
37
  post_install_message:
50
- rdoc_options:
51
- - --line-numbers
52
- - --inline-source
53
- - --title
54
- - Fastcluster
55
- - --main
56
- - README.rdoc
57
- require_paths:
38
+ rdoc_options: []
39
+ require_paths:
58
40
  - lib
59
- - ext
60
- required_ruby_version: !ruby/object:Gem::Requirement
61
- requirements:
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- version: "0"
65
- version:
66
- required_rubygems_version: !ruby/object:Gem::Requirement
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- version: "1.2"
71
- version:
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ segments:
48
+ - 0
49
+ hash: 716520748893881833
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ segments:
57
+ - 0
58
+ hash: 716520748893881833
72
59
  requirements: []
73
-
74
- rubyforge_project: fastcluster
75
- rubygems_version: 1.3.5
60
+ rubyforge_project:
61
+ rubygems_version: 1.6.2
76
62
  signing_key:
77
63
  specification_version: 3
78
64
  summary: A clustering library for 2 dimensional points
79
- test_files: []
80
-
65
+ test_files:
66
+ - spec/lib/fastcluster/cluster_spec.rb
67
+ - spec/lib/fastcluster/clusterer_spec.rb
68
+ - spec/spec_helper.rb
69
+ - spec/test_data.rb
70
+ - test.rb
data/Manifest DELETED
@@ -1,14 +0,0 @@
1
- CHANGELOG
2
- Manifest
3
- README.rdoc
4
- Rakefile
5
- ext/clusterer.c
6
- ext/extconf.rb
7
- lib/fastcluster.rb
8
- lib/fastcluster/cluster.rb
9
- spec/lib/fastcluster/cluster_spec.rb
10
- spec/lib/fastcluster/clusterer_spec.rb
11
- spec/spec.opts
12
- spec/spec_helper.rb
13
- spec/test_data.rb
14
- test.rb
data/Rakefile DELETED
@@ -1,12 +0,0 @@
1
- require 'rubygems'
2
- require 'spec'
3
- require 'spec/rake/spectask'
4
- require 'echoe'
5
-
6
- Echoe.new("fastcluster") do |p|
7
- p.author = "Jeremy Wells"
8
- p.email = "jeremy@boost.co.nz"
9
- p.summary = "A clustering library for 2 dimensional points"
10
- p.description = "A clustering library for 2 dimensional points"
11
- p.url = "http://github.com/jemmyw/fastcluster"
12
- end
@@ -1,31 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- Gem::Specification.new do |s|
4
- s.name = %q{fastcluster}
5
- s.version = "0.9"
6
-
7
- s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = ["Jeremy Wells"]
9
- s.date = %q{2009-10-24}
10
- s.description = %q{A clustering library for 2 dimensional points}
11
- s.email = %q{jeremy@boost.co.nz}
12
- s.extensions = ["ext/extconf.rb"]
13
- s.extra_rdoc_files = ["CHANGELOG", "README.rdoc", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb"]
14
- s.files = ["CHANGELOG", "Manifest", "README.rdoc", "Rakefile", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb", "spec/lib/fastcluster/cluster_spec.rb", "spec/lib/fastcluster/clusterer_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "spec/test_data.rb", "test.rb", "fastcluster.gemspec"]
15
- s.homepage = %q{http://github.com/jemmyw/fastcluster}
16
- s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Fastcluster", "--main", "README.rdoc"]
17
- s.require_paths = ["lib", "ext"]
18
- s.rubyforge_project = %q{fastcluster}
19
- s.rubygems_version = %q{1.3.5}
20
- s.summary = %q{A clustering library for 2 dimensional points}
21
-
22
- if s.respond_to? :specification_version then
23
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
- s.specification_version = 3
25
-
26
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
- else
28
- end
29
- else
30
- end
31
- end
@@ -1,5 +0,0 @@
1
- --colour
2
- --format progress
3
- --loadby mtime
4
- --reverse
5
- --debugger