fastcluster 0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2 @@
1
+ v0.9 In testing
2
+ v0.1 Initial version
@@ -0,0 +1,14 @@
1
+ CHANGELOG
2
+ Manifest
3
+ README.rdoc
4
+ Rakefile
5
+ ext/clusterer.c
6
+ ext/extconf.rb
7
+ lib/fastcluster.rb
8
+ lib/fastcluster/cluster.rb
9
+ spec/lib/fastcluster/cluster_spec.rb
10
+ spec/lib/fastcluster/clusterer_spec.rb
11
+ spec/spec.opts
12
+ spec/spec_helper.rb
13
+ spec/test_data.rb
14
+ test.rb
@@ -0,0 +1,59 @@
1
+
2
+ = Simple and fast clustering library
3
+
4
+ * http://github.com/jemmyw/fastcluster
5
+
6
+ == DESCRIPTION
7
+
8
+ This gem provides a really simple way to cluster 2 dimensional points. It is based
9
+ on the Hierclust[http://hierclust.rubyforge.org/] gem by Brandt Kurowski,
10
+ except that it does not cluster hierarchically. The aim of this gem is to provide
11
+ the same clustering algorithm but to be as fast as possible.
12
+
13
+ == INSTALL:
14
+
15
+ * sudo gem install fastcluster
16
+
17
+ == EXAMPLE:
18
+
19
+ require 'fastcluster'
20
+ points = [[1, 1], [1, 2], [5, 9]]
21
+ clusterer = Fastcluster::Clusterer.new(3, 0, points)
22
+ clusterer.clusters.each do |cluster|
23
+ puts cluster.x
24
+ puts cluster.y
25
+ puts cluster.size
26
+ end
27
+
28
+ == ALGORITHM:
29
+
30
+ 1. All points are initially clusters with size 1
31
+ 2. Precluster - create a grid of size [resolution] and cluster the points in each grid space automatically
32
+ 3. Combine two closest clusters, the new cluster has the summed size and the averaged distance (size weighted)
33
+ between the clusters.
34
+ 4. Loop to 3 until no cluster is less that [separation] apart or only one cluster remains
35
+
36
+ == LICENSE:
37
+
38
+ (The MIT License)
39
+
40
+ Copyright (c) 2009 Jeremy Wells
41
+
42
+ Permission is hereby granted, free of charge, to any person obtaining
43
+ a copy of this software and associated documentation files (the
44
+ 'Software'), to deal in the Software without restriction, including
45
+ without limitation the rights to use, copy, modify, merge, publish,
46
+ distribute, sublicense, and/or sell copies of the Software, and to
47
+ permit persons to whom the Software is furnished to do so, subject to
48
+ the following conditions:
49
+
50
+ The above copyright notice and this permission notice shall be
51
+ included in all copies or substantial portions of the Software.
52
+
53
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
54
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
55
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
56
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
57
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
58
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
59
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,12 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'spec/rake/spectask'
4
+ require 'echoe'
5
+
6
+ Echoe.new("fastcluster") do |p|
7
+ p.author = "Jeremy Wells"
8
+ p.email = "jeremy@boost.co.nz"
9
+ p.summary = "A clustering library for 2 dimensional points"
10
+ p.description = "A clustering library for 2 dimensional points"
11
+ p.url = "http://github.com/jemmyw/fastcluster"
12
+ end
@@ -0,0 +1,325 @@
1
+ #include <ruby.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <math.h>
5
+
6
+ /*
7
+ *
8
+ * Algorithm:
9
+ * all points are initially clusters with size 1
10
+ * precluster - create a grid of size @resolution and cluster the points in each grid space automatically
11
+ * loop until no cluster is less that @separation apart
12
+ * combine two closest clusters, the new cluster has the summed size and the averaged distance (size weighted)
13
+ * between the clusters.
14
+ **/
15
+ typedef struct {
16
+ double x;
17
+ double y;
18
+ long size;
19
+ } CLUSTER;
20
+
21
+ /*
22
+ * An array of points to be clustered.
23
+ */
24
+ static VALUE fc_get_points(VALUE self) {
25
+ return rb_iv_get(self, "@points");
26
+ }
27
+
28
+ /*
29
+ * call-seq:
30
+ * add(x, y) -> nil
31
+ *
32
+ * Add a point to this clusterer.
33
+ */
34
+ static VALUE fc_add_point(VALUE self, VALUE x, VALUE y) {
35
+ long len = 2;
36
+ VALUE holdArray = rb_ary_new3(2, x, y);
37
+ VALUE pointArray = fc_get_points(self);
38
+ rb_ary_push(pointArray, holdArray);
39
+
40
+ return Qnil;
41
+ }
42
+
43
+ /*
44
+ * call-seq:
45
+ * <<(point) -> nil
46
+ *
47
+ * Add a point to this clusterer. The point must be in the format
48
+ * of an array with two number.
49
+ *
50
+ * Example:
51
+ * clusterer << [1, 2]
52
+ */
53
+ static VALUE fc_append_point(VALUE self, VALUE point) {
54
+ VALUE pointArray = fc_get_points(self);
55
+ rb_ary_push(pointArray, point);
56
+ return Qnil;
57
+ }
58
+
59
+ /*
60
+ * Calculate the distance (pythag) between two cluster points
61
+ */
62
+ static double fc_get_distance_between(CLUSTER * one, CLUSTER * two) {
63
+ double rr = pow((long)one->x - (long)two->x, 2) + pow((long)one->y - (long)two->y, 2);
64
+ return sqrt(rr);
65
+ }
66
+
67
+ /*
68
+ * Add a point to a cluster. This increments the size and calcualtes the average between
69
+ * the current cluster position and the new point.
70
+ */
71
+ static void fc_add_to_cluster(CLUSTER * dst, double x, double y) {
72
+ dst->x = ((dst->x * dst->size) + x) / (dst->size + 1);
73
+ dst->y = ((dst->y * dst->size) + y) / (dst->size + 1);
74
+ dst->size++;
75
+ }
76
+
77
+ /*
78
+ * Combine two clusters into one with an average center point
79
+ */
80
+ static void fc_combine_clusters(CLUSTER * dst, CLUSTER * src) {
81
+ dst->x = (dst->x*dst->size + src->x*src->size) / (dst->size+src->size);
82
+ dst->y = (dst->y*dst->size + src->y*src->size) / (dst->size+src->size);
83
+ dst->size = dst->size + src->size;
84
+ }
85
+
86
+ /*
87
+ * Get the maximum grid size
88
+ */
89
+ static long fc_get_max_grid(long resolution, CLUSTER * point_array, long num_points) {
90
+ int i;
91
+ int max_grid = 0;
92
+ for(i = 0; i < num_points; i++) {
93
+ CLUSTER * point = &point_array[i];
94
+ int xg = point->x/resolution;
95
+ int yg = point->y/resolution;
96
+ if(xg>max_grid)
97
+ max_grid = xg;
98
+ if(yg>max_grid)
99
+ max_grid = yg;
100
+ }
101
+ return max_grid+1;
102
+ }
103
+
104
+ /*
105
+ * call-seq:
106
+ * new(separation = 0, resolution = 0, points = nil)
107
+ *
108
+ * Create a new Clusterer. The new method accepts 3 optional arguments, separation,
109
+ * resolution and points.
110
+ *
111
+ * <tt>separation</tt> - The distance between clusters. The higher this number, the
112
+ * less clusters there will be. If this is 0 then no clustering will occur.
113
+ *
114
+ * <tt>resolution</tt> - If specified then the points are placed on a grid with each grid square
115
+ * being this size. Points falling in the same grid square are automatically clustered.
116
+ * This option should be specified clustering larger number of points to reduce processing time.
117
+ *
118
+ * <tt>points</tt> - An array of points. Each array item must be an array with
119
+ * two numbers (x, y). Example: <code>[[1, 2], [3, 4]]</code>.
120
+ */
121
+ static VALUE fc_initialize_clusterer(int argc, VALUE *argv, VALUE self) {
122
+ if(argc > 0)
123
+ rb_iv_set(self, "@separation", argv[0]);
124
+ else
125
+ rb_iv_set(self, "@separation", INT2FIX(0));
126
+
127
+ if(argc > 1)
128
+ rb_iv_set(self, "@resolution", argv[1]);
129
+ else
130
+ rb_iv_set(self, "@resolution", INT2FIX(0));
131
+
132
+ VALUE pointArray = rb_ary_new();
133
+ rb_iv_set(self, "@points", pointArray);
134
+
135
+ if(argc > 2) {
136
+ if(TYPE(argv[2]) == T_ARRAY) {
137
+ rb_iv_set(self, "@points", argv[2]);
138
+ }
139
+ }
140
+
141
+ return Qnil;
142
+ }
143
+
144
+ /*
145
+ * Turn the ruby array of points (format [[x,y], [x,y]]) into an array of
146
+ * CLUSTER
147
+ */
148
+ static void fc_native_point_array(CLUSTER * arrayPtr, VALUE rubyArray, long num_points) {
149
+ int i;
150
+ for(i=0;i<num_points;i++) {
151
+ VALUE holdArray = RARRAY(rubyArray)->ptr[i];
152
+ double x = NUM2DBL(RARRAY(holdArray)->ptr[0]);
153
+ double y = NUM2DBL(RARRAY(holdArray)->ptr[1]);
154
+
155
+ arrayPtr[i].x = x;
156
+ arrayPtr[i].y = y;
157
+ arrayPtr[i].size = 1;
158
+ }
159
+ }
160
+
161
+ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER * point_array, int num_points, long * cluster_size) {
162
+ int max_grid = fc_get_max_grid(resolution, &point_array[0], num_points);
163
+ int i, j;
164
+ long preclust_size = 0;
165
+
166
+ CLUSTER * cluster;
167
+ CLUSTER * clusters;
168
+
169
+ if(resolution > 0) {
170
+ CLUSTER grid_array[max_grid][max_grid];
171
+
172
+ for(i=0;i<max_grid;i++) {
173
+ for(j=0;j<max_grid;j++) {
174
+ grid_array[i][j].size = 0;
175
+ }
176
+ }
177
+
178
+ for(i = 0; i < num_points; i++) {
179
+ cluster = &point_array[i];
180
+
181
+ int gx = floor(cluster->x/resolution);
182
+ int gy = floor(cluster->y/resolution);
183
+
184
+ fc_add_to_cluster(&grid_array[gx][gy], cluster->x, cluster->y);
185
+
186
+ if(grid_array[gx][gy].size == 1) preclust_size++;
187
+ }
188
+
189
+ clusters = malloc(preclust_size * sizeof(CLUSTER));
190
+
191
+ int max_grid_total = max_grid * max_grid;
192
+ CLUSTER * gridPtr = grid_array[0];
193
+
194
+ int incr = 0;
195
+ for(i=0;i<max_grid_total;i++) {
196
+ if(gridPtr[i].size > 0) {
197
+ clusters[incr] = gridPtr[i];
198
+ incr++;
199
+ }
200
+ }
201
+ } else {
202
+ preclust_size = num_points;
203
+ clusters = malloc(preclust_size * sizeof(CLUSTER));
204
+ memcpy(&clusters[0], &point_array[0], preclust_size * sizeof(CLUSTER));
205
+ }
206
+
207
+ double distance_sep = 0;
208
+ long current_cluster_size = 0;
209
+ int found;
210
+ long nearest_origin;
211
+ long nearest_other;
212
+
213
+ do {
214
+ // calculate distance sep
215
+ distance_sep = 0;
216
+ nearest_other = 0;
217
+
218
+ for(i=0;i<preclust_size;i++){
219
+ for(j=i+1;j<preclust_size;j++){
220
+ double distance = fc_get_distance_between(&clusters[i], &clusters[j]);
221
+
222
+ // printf("distance between %f, %f and %f, %f is %f\n", clusters[i].x, clusters[i].y, clusters[j].x, clusters[j].y, distance);
223
+
224
+ if(distance_sep == 0 || distance < distance_sep) {
225
+ distance_sep = distance;
226
+
227
+ if(distance < separation || separation == 0) {
228
+ nearest_origin = i;
229
+ nearest_other = j;
230
+ }
231
+ }
232
+ }
233
+ }
234
+
235
+ if(nearest_other > 0) {
236
+ fc_combine_clusters(&clusters[nearest_origin], &clusters[nearest_other]);
237
+
238
+ CLUSTER *newarr = malloc(preclust_size * sizeof(CLUSTER));
239
+ memcpy(&newarr[0], &clusters[0], nearest_other * sizeof(CLUSTER));
240
+ memcpy(&newarr[nearest_other], &clusters[nearest_other+1], (preclust_size - (nearest_other + 1)) * sizeof(CLUSTER));
241
+
242
+ void *_tmp = realloc(clusters, ((preclust_size-1) * sizeof(CLUSTER)));
243
+ clusters = (CLUSTER*)_tmp;
244
+ preclust_size = preclust_size - 1;
245
+
246
+ for(i=0;i<preclust_size;i++)
247
+ clusters[i] = newarr[i];
248
+
249
+ free(newarr);
250
+ }
251
+
252
+ } while((separation == 0 || distance_sep < separation) && preclust_size > 1);
253
+
254
+ *cluster_size = preclust_size;
255
+ return clusters;
256
+ }
257
+
258
+ static VALUE fc_get_cluster_class() {
259
+ ID cluster_module_id = rb_intern("Fastcluster");
260
+ ID cluster_class_id = rb_intern("Cluster");
261
+ VALUE cluster_module = rb_const_get(rb_cObject, cluster_module_id);
262
+ return rb_const_get(cluster_module, cluster_class_id);
263
+ }
264
+
265
+ /*
266
+ * Return the clusters found for the points in this clusterer. This will be an
267
+ * array of Cluster objects.
268
+ *
269
+ * Example:
270
+ * clusterer = Fastcluster::Clusterer.new(3, 0, [[1, 1], [1, 2], [5, 9]])
271
+ * clusterer.clusters -> [(1.00, 1.50): 2, (5.00, 9.00): 1]
272
+ */
273
+ static VALUE fc_get_clusters(VALUE self) {
274
+ // Get the separation adn resolution from ruby
275
+ long separation = NUM2INT(rb_iv_get(self, "@separation"));
276
+ long resolution = NUM2INT(rb_iv_get(self, "@resolution"));
277
+ int i;
278
+
279
+ // Create a native array of clusters from the ruby array of points
280
+ VALUE pointArray = fc_get_points(self);
281
+ long num_points = RARRAY(pointArray)->len;
282
+ CLUSTER native_point_array[num_points];
283
+
284
+ fc_native_point_array(&native_point_array[0], pointArray, num_points);
285
+
286
+ // Calcualte the clusters
287
+ CLUSTER * clusters = NULL;
288
+ long cluster_size;
289
+
290
+ clusters = fc_calculate_clusters(separation, resolution, &native_point_array[0], num_points, &cluster_size);
291
+
292
+ // Create ruby array of clusters to return
293
+ VALUE cluster_class = fc_get_cluster_class();
294
+ VALUE ruby_cluster_array = rb_ary_new2(cluster_size);
295
+
296
+ for(i=0;i<cluster_size;i++) {
297
+ int arg_count = 3;
298
+ VALUE arg_array[arg_count];
299
+
300
+ arg_array[0] = rb_float_new(clusters[i].x);
301
+ arg_array[1] = rb_float_new(clusters[i].y);
302
+ arg_array[2] = INT2FIX(clusters[i].size);
303
+
304
+ VALUE cluster_obj = rb_class_new_instance(arg_count, arg_array, cluster_class);
305
+ rb_ary_push(ruby_cluster_array, cluster_obj);
306
+ }
307
+
308
+ // Free the clusters array
309
+ free(clusters);
310
+
311
+ return ruby_cluster_array;
312
+ }
313
+
314
+ void Init_clusterer() {
315
+ VALUE clustererModule = rb_define_module("Fastcluster");
316
+ VALUE clustererClass = rb_define_class_under(clustererModule, "Clusterer", rb_cObject);
317
+
318
+ rb_define_method(clustererClass, "initialize", fc_initialize_clusterer, -1);
319
+ rb_define_method(clustererClass, "add", fc_add_point, 2);
320
+
321
+ rb_define_method(clustererClass, "<<", fc_append_point, 1);
322
+
323
+ rb_define_method(clustererClass, "clusters", fc_get_clusters, 0);
324
+ rb_define_method(clustererClass, "points", fc_get_points, 0);
325
+ }
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ extension_name = 'clusterer'
3
+ dir_config(extension_name)
4
+ create_makefile(extension_name)
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{fastcluster}
5
+ s.version = "0.9"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Jeremy Wells"]
9
+ s.date = %q{2009-10-24}
10
+ s.description = %q{A clustering library for 2 dimensional points}
11
+ s.email = %q{jeremy@boost.co.nz}
12
+ s.extensions = ["ext/extconf.rb"]
13
+ s.extra_rdoc_files = ["CHANGELOG", "README.rdoc", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb"]
14
+ s.files = ["CHANGELOG", "Manifest", "README.rdoc", "Rakefile", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb", "spec/lib/fastcluster/cluster_spec.rb", "spec/lib/fastcluster/clusterer_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "spec/test_data.rb", "test.rb", "fastcluster.gemspec"]
15
+ s.homepage = %q{http://github.com/jemmyw/fastcluster}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Fastcluster", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib", "ext"]
18
+ s.rubyforge_project = %q{fastcluster}
19
+ s.rubygems_version = %q{1.3.5}
20
+ s.summary = %q{A clustering library for 2 dimensional points}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'fastcluster/cluster'
4
+ require File.dirname(__FILE__) + '/clusterer'
@@ -0,0 +1,25 @@
1
+ module Fastcluster
2
+ class Cluster
3
+ include Comparable
4
+
5
+ attr_reader :x, :y, :size
6
+
7
+ def initialize(x, y, size)
8
+ @x = x
9
+ @y = y
10
+ @size = size
11
+ end
12
+
13
+ def <=>(anOther)
14
+ size <=> anOther.size
15
+ end
16
+
17
+ def inspect
18
+ to_s
19
+ end
20
+
21
+ def to_s
22
+ '(%0.2f, %0.2f): %d' % [@x, @y, @size]
23
+ end
24
+ end
25
+ end
File without changes
@@ -0,0 +1,222 @@
1
+ require File.dirname(__FILE__) + '/../../spec_helper'
2
+ require 'benchmark'
3
+
4
+ describe Fastcluster::Clusterer do
5
+ before do
6
+ @points = POINTS
7
+ end
8
+
9
+ it 'should allow setting points in initializer' do
10
+ @clusterer = Fastcluster::Clusterer.new(105, 5, @points)
11
+ @clusterer.points.size.should == 168
12
+ end
13
+
14
+ describe 'instance' do
15
+ before do
16
+ @clusterer = Fastcluster::Clusterer.new(105, 5)
17
+ end
18
+
19
+ describe '#add' do
20
+ it 'should add an x y point to the clusterer' do
21
+ @clusterer.add(5, 10)
22
+ @clusterer.points.size.should == 1
23
+ @clusterer.points.first.should == [5, 10]
24
+ end
25
+ end
26
+
27
+ describe '#<<' do
28
+ it 'should add the value to the clusterer' do
29
+ @clusterer << [5, 10]
30
+ @clusterer.points.size.should == 1
31
+ @clusterer.points.first.should == [5, 10]
32
+ end
33
+ end
34
+ end
35
+
36
+ describe '#clusters' do
37
+ describe 'with large test' do
38
+ before do
39
+ @clusterer = Fastcluster::Clusterer.new(105, 5, @points)
40
+ @clusters = @clusterer.clusters.sort{|a, b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }
41
+ end
42
+
43
+ it 'should take less than 1 second' do
44
+ time = Benchmark.measure { @clusterer.clusters }
45
+ time.total.should be < 1
46
+ end
47
+
48
+ it 'should have as many cluster points as data points' do
49
+ @clusters.inject(0){|m, n| m + n.size }.should == @points.size
50
+ end
51
+
52
+ it 'should have 23 clusters' do
53
+ @clusters.size.should == 23
54
+ end
55
+
56
+ it 'should have a cluster of 108 points at 836 by 178' do
57
+ @clusters.last.size.should == 108
58
+ @clusters.last.x.should be_close(836, 1)
59
+ @clusters.last.y.should be_close(178, 1)
60
+ end
61
+
62
+ it 'should have a cluster of 1 point at 97 by 1203' do
63
+ @clusters.first.size.should == 1
64
+ @clusters.first.x.should be_close(97, 1)
65
+ @clusters.first.y.should be_close(1203, 1)
66
+ end
67
+ end
68
+
69
+ describe "with two points" do
70
+ before do
71
+ @clusterer = Fastcluster::Clusterer.new(0, 0, [[1, 5], [2, 8]])
72
+ end
73
+
74
+ it "should return one cluster" do
75
+ @clusterer.clusters.size.should == 1
76
+ end
77
+
78
+ it "should have two points in the cluster" do
79
+ @clusterer.clusters.first.size.should == 2
80
+ end
81
+ end
82
+
83
+ describe "with three points" do
84
+ before do
85
+ @clusterer = Fastcluster::Clusterer.new(0, 0, [[1, 2], [5, 6], [2, 3]])
86
+ @clusters = @clusterer.clusters
87
+ end
88
+
89
+ it "should return one cluster" do
90
+ @clusters.size.should == 1
91
+ end
92
+
93
+ it "containing three items" do
94
+ @clusters.first.size.should == 3
95
+ end
96
+ end
97
+
98
+ describe "with four points" do
99
+ before do
100
+ @points = [
101
+ [0, 1],
102
+ [1, 0],
103
+ [3, 4],
104
+ [4, 3],
105
+ ]
106
+ end
107
+
108
+ describe "and no separation" do
109
+ before do
110
+ @clusterer = Fastcluster::Clusterer.new(0, 0, @points)
111
+ end
112
+
113
+ it "should return one cluster" do
114
+ @clusterer.clusters.size.should == 1
115
+ end
116
+ end
117
+
118
+ describe "and separation 1" do
119
+ before do
120
+ require 'lib/fastcluster'
121
+ @clusterer = Fastcluster::Clusterer.new(1, 0, @points)
122
+ end
123
+
124
+ it "should return all four individual points" do
125
+ @clusterer.clusters.size.should == 4
126
+ end
127
+ end
128
+
129
+ describe "and separation 2" do
130
+ before do
131
+ @clusterer = Fastcluster::Clusterer.new(2, 0, @points)
132
+ end
133
+
134
+ it "should return two clusters" do
135
+ @clusterer.clusters.size.should == 2
136
+ end
137
+ end
138
+ end
139
+
140
+ describe "with eight points" do
141
+ before do
142
+ @points = [
143
+ [0, 1],
144
+ [1, 0],
145
+ [3, 4],
146
+ [4, 3],
147
+ [7, 8],
148
+ [8, 7],
149
+ [8, 9],
150
+ [9, 8]
151
+ ]
152
+ end
153
+
154
+ describe "and no separation" do
155
+ before do
156
+ @clusterer = Fastcluster::Clusterer.new(0, 0)
157
+ @points.each do |point|
158
+ @clusterer << point
159
+ end
160
+ end
161
+
162
+ it "should return one cluster when no minimum separation is given" do
163
+ @clusterer.clusters.size.should == 1
164
+ end
165
+ end
166
+
167
+ describe "and separation 1" do
168
+ before do
169
+ @clusterer = Fastcluster::Clusterer.new(1, 0)
170
+ @points.each do |point|
171
+ @clusterer << point
172
+ end
173
+ end
174
+
175
+ it "should have all eight points in individual clusters" do
176
+ @clusterer.clusters.size.should == 8
177
+ end
178
+ end
179
+
180
+ describe "and separation 3" do
181
+ describe "with no resolution limit" do
182
+ before do
183
+ @clusterer = Fastcluster::Clusterer.new(3, 0)
184
+ @points.each do |point|
185
+ @clusterer << point
186
+ end
187
+ @clusters = @clusterer.clusters.sort
188
+ end
189
+
190
+ it "should have three clusters" do
191
+ @clusters.size.should == 3
192
+ end
193
+
194
+ it "should have clusters size 2, 2, and 4 " do
195
+ @clusters[0].size.should == 2
196
+ @clusters[1].size.should == 2
197
+ @clusters[2].size.should == 4
198
+ end
199
+ end
200
+
201
+ describe "with coarse resolution" do
202
+ before do
203
+ @clusterer = Fastcluster::Clusterer.new(3, 5)
204
+ @points.each do |point|
205
+ @clusterer << point
206
+ end
207
+ @clusters = @clusterer.clusters.sort
208
+ end
209
+
210
+ it "should have three clusters" do
211
+ @clusters.size.should == 2
212
+ end
213
+
214
+ it "should have clusters size 2, 2, and 4 " do
215
+ @clusters[0].size.should == 4
216
+ @clusters[1].size.should == 4
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end
@@ -0,0 +1,5 @@
1
+ --colour
2
+ --format progress
3
+ --loadby mtime
4
+ --reverse
5
+ --debugger
@@ -0,0 +1,10 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems'
5
+ gem 'rspec'
6
+ require 'spec'
7
+ end
8
+
9
+ require File.dirname(__FILE__) + '/../lib/fastcluster'
10
+ require File.dirname(__FILE__) + '/test_data.rb'
@@ -0,0 +1,28 @@
1
+ POINTS = [[815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
2
+ [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
3
+ [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
4
+ [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
5
+ [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
6
+ [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
7
+ [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
8
+ [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
9
+ [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
10
+ [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
11
+ [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
12
+ [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
13
+ [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
14
+ [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
15
+ [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
16
+ [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
17
+ [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
18
+ [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
19
+ [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
20
+ [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
21
+ [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
22
+ [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
23
+ [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
24
+ [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
25
+ [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
26
+ [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
27
+ [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
28
+ [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177]]
data/test.rb ADDED
@@ -0,0 +1,69 @@
1
+ require File.dirname(__FILE__) + '/lib/fastcluster'
2
+ require 'benchmark'
3
+
4
+ points = [[815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
5
+ [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
6
+ [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
7
+ [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
8
+ [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
9
+ [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
10
+ [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
11
+ [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
12
+ [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
13
+ [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
14
+ [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
15
+ [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
16
+ [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
17
+ [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
18
+ [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
19
+ [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
20
+ [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
21
+ [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
22
+ [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
23
+ [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
24
+ [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
25
+ [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
26
+ [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
27
+ [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
28
+ [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
29
+ [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
30
+ [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
31
+ [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177],
32
+ [815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
33
+ [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
34
+ [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
35
+ [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
36
+ [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
37
+ [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
38
+ [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
39
+ [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
40
+ [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
41
+ [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
42
+ [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
43
+ [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
44
+ [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
45
+ [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
46
+ [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
47
+ [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
48
+ [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
49
+ [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
50
+ [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
51
+ [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
52
+ [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
53
+ [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
54
+ [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
55
+ [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
56
+ [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
57
+ [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
58
+ [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
59
+ [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177]]
60
+
61
+ puts Benchmark.measure {
62
+ clusterer = Fastcluster::Clusterer.new(105, 5, points)
63
+ clusters = clusterer.clusters
64
+
65
+ clusters.sort{|a,b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }.each do |cluster|
66
+ puts cluster
67
+ end
68
+
69
+ }
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fastcluster
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.9"
5
+ platform: ruby
6
+ authors:
7
+ - Jeremy Wells
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-24 00:00:00 +13:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A clustering library for 2 dimensional points
17
+ email: jeremy@boost.co.nz
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - CHANGELOG
24
+ - README.rdoc
25
+ - ext/clusterer.c
26
+ - ext/extconf.rb
27
+ - lib/fastcluster.rb
28
+ - lib/fastcluster/cluster.rb
29
+ files:
30
+ - CHANGELOG
31
+ - Manifest
32
+ - README.rdoc
33
+ - Rakefile
34
+ - ext/clusterer.c
35
+ - ext/extconf.rb
36
+ - lib/fastcluster.rb
37
+ - lib/fastcluster/cluster.rb
38
+ - spec/lib/fastcluster/cluster_spec.rb
39
+ - spec/lib/fastcluster/clusterer_spec.rb
40
+ - spec/spec.opts
41
+ - spec/spec_helper.rb
42
+ - spec/test_data.rb
43
+ - test.rb
44
+ - fastcluster.gemspec
45
+ has_rdoc: true
46
+ homepage: http://github.com/jemmyw/fastcluster
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options:
51
+ - --line-numbers
52
+ - --inline-source
53
+ - --title
54
+ - Fastcluster
55
+ - --main
56
+ - README.rdoc
57
+ require_paths:
58
+ - lib
59
+ - ext
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "1.2"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: fastcluster
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: A clustering library for 2 dimensional points
79
+ test_files: []
80
+