fastcluster 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ v0.9 In testing
2
+ v0.1 Initial version
@@ -0,0 +1,14 @@
1
+ CHANGELOG
2
+ Manifest
3
+ README.rdoc
4
+ Rakefile
5
+ ext/clusterer.c
6
+ ext/extconf.rb
7
+ lib/fastcluster.rb
8
+ lib/fastcluster/cluster.rb
9
+ spec/lib/fastcluster/cluster_spec.rb
10
+ spec/lib/fastcluster/clusterer_spec.rb
11
+ spec/spec.opts
12
+ spec/spec_helper.rb
13
+ spec/test_data.rb
14
+ test.rb
@@ -0,0 +1,59 @@
1
+
2
+ = Simple and fast clustering library
3
+
4
+ * http://github.com/jemmyw/fastcluster
5
+
6
+ == DESCRIPTION
7
+
8
+ This gem provides a really simple way to cluster 2 dimensional points. It is based
9
+ on the Hierclust[http://hierclust.rubyforge.org/] gem by Brandt Kurowski,
10
+ except that it does not cluster hierarchically. The aim of this gem is to provide
11
+ the same clustering algorithm but to be as fast as possible.
12
+
13
+ == INSTALL:
14
+
15
+ * sudo gem install fastcluster
16
+
17
+ == EXAMPLE:
18
+
19
+ require 'fastcluster'
20
+ points = [[1, 1], [1, 2], [5, 9]]
21
+ clusterer = Fastcluster::Clusterer.new(3, 0, points)
22
+ clusterer.clusters.each do |cluster|
23
+ puts cluster.x
24
+ puts cluster.y
25
+ puts cluster.size
26
+ end
27
+
28
+ == ALGORITHM:
29
+
30
+ 1. All points are initially clusters with size 1
31
+ 2. Precluster - create a grid of size [resolution] and cluster the points in each grid space automatically
32
+ 3. Combine two closest clusters, the new cluster has the summed size and the averaged distance (size weighted)
33
+ between the clusters.
34
+ 4. Loop to 3 until no cluster is less that [separation] apart or only one cluster remains
35
+
36
+ == LICENSE:
37
+
38
+ (The MIT License)
39
+
40
+ Copyright (c) 2009 Jeremy Wells
41
+
42
+ Permission is hereby granted, free of charge, to any person obtaining
43
+ a copy of this software and associated documentation files (the
44
+ 'Software'), to deal in the Software without restriction, including
45
+ without limitation the rights to use, copy, modify, merge, publish,
46
+ distribute, sublicense, and/or sell copies of the Software, and to
47
+ permit persons to whom the Software is furnished to do so, subject to
48
+ the following conditions:
49
+
50
+ The above copyright notice and this permission notice shall be
51
+ included in all copies or substantial portions of the Software.
52
+
53
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
54
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
55
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
56
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
57
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
58
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
59
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,12 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'spec/rake/spectask'
4
+ require 'echoe'
5
+
6
+ Echoe.new("fastcluster") do |p|
7
+ p.author = "Jeremy Wells"
8
+ p.email = "jeremy@boost.co.nz"
9
+ p.summary = "A clustering library for 2 dimensional points"
10
+ p.description = "A clustering library for 2 dimensional points"
11
+ p.url = "http://github.com/jemmyw/fastcluster"
12
+ end
@@ -0,0 +1,325 @@
1
+ #include <ruby.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <math.h>
5
+
6
+ /*
7
+ *
8
+ * Algorithm:
9
+ * all points are initially clusters with size 1
10
+ * precluster - create a grid of size @resolution and cluster the points in each grid space automatically
11
+ * loop until no cluster is less that @separation apart
12
+ * combine two closest clusters, the new cluster has the summed size and the averaged distance (size weighted)
13
+ * between the clusters.
14
+ **/
15
+ typedef struct {
16
+ double x;
17
+ double y;
18
+ long size;
19
+ } CLUSTER;
20
+
21
+ /*
22
+ * An array of points to be clustered.
23
+ */
24
+ static VALUE fc_get_points(VALUE self) {
25
+ return rb_iv_get(self, "@points");
26
+ }
27
+
28
+ /*
29
+ * call-seq:
30
+ * add(x, y) -> nil
31
+ *
32
+ * Add a point to this clusterer.
33
+ */
34
+ static VALUE fc_add_point(VALUE self, VALUE x, VALUE y) {
35
+ long len = 2;
36
+ VALUE holdArray = rb_ary_new3(2, x, y);
37
+ VALUE pointArray = fc_get_points(self);
38
+ rb_ary_push(pointArray, holdArray);
39
+
40
+ return Qnil;
41
+ }
42
+
43
+ /*
44
+ * call-seq:
45
+ * <<(point) -> nil
46
+ *
47
+ * Add a point to this clusterer. The point must be in the format
48
+ * of an array with two number.
49
+ *
50
+ * Example:
51
+ * clusterer << [1, 2]
52
+ */
53
+ static VALUE fc_append_point(VALUE self, VALUE point) {
54
+ VALUE pointArray = fc_get_points(self);
55
+ rb_ary_push(pointArray, point);
56
+ return Qnil;
57
+ }
58
+
59
+ /*
60
+ * Calculate the distance (pythag) between two cluster points
61
+ */
62
+ static double fc_get_distance_between(CLUSTER * one, CLUSTER * two) {
63
+ double rr = pow((long)one->x - (long)two->x, 2) + pow((long)one->y - (long)two->y, 2);
64
+ return sqrt(rr);
65
+ }
66
+
67
+ /*
68
+ * Add a point to a cluster. This increments the size and calcualtes the average between
69
+ * the current cluster position and the new point.
70
+ */
71
+ static void fc_add_to_cluster(CLUSTER * dst, double x, double y) {
72
+ dst->x = ((dst->x * dst->size) + x) / (dst->size + 1);
73
+ dst->y = ((dst->y * dst->size) + y) / (dst->size + 1);
74
+ dst->size++;
75
+ }
76
+
77
+ /*
78
+ * Combine two clusters into one with an average center point
79
+ */
80
+ static void fc_combine_clusters(CLUSTER * dst, CLUSTER * src) {
81
+ dst->x = (dst->x*dst->size + src->x*src->size) / (dst->size+src->size);
82
+ dst->y = (dst->y*dst->size + src->y*src->size) / (dst->size+src->size);
83
+ dst->size = dst->size + src->size;
84
+ }
85
+
86
+ /*
87
+ * Get the maximum grid size
88
+ */
89
+ static long fc_get_max_grid(long resolution, CLUSTER * point_array, long num_points) {
90
+ int i;
91
+ int max_grid = 0;
92
+ for(i = 0; i < num_points; i++) {
93
+ CLUSTER * point = &point_array[i];
94
+ int xg = point->x/resolution;
95
+ int yg = point->y/resolution;
96
+ if(xg>max_grid)
97
+ max_grid = xg;
98
+ if(yg>max_grid)
99
+ max_grid = yg;
100
+ }
101
+ return max_grid+1;
102
+ }
103
+
104
+ /*
105
+ * call-seq:
106
+ * new(separation = 0, resolution = 0, points = nil)
107
+ *
108
+ * Create a new Clusterer. The new method accepts 3 optional arguments, separation,
109
+ * resolution and points.
110
+ *
111
+ * <tt>separation</tt> - The distance between clusters. The higher this number, the
112
+ * less clusters there will be. If this is 0 then no clustering will occur.
113
+ *
114
+ * <tt>resolution</tt> - If specified then the points are placed on a grid with each grid square
115
+ * being this size. Points falling in the same grid square are automatically clustered.
116
+ * This option should be specified clustering larger number of points to reduce processing time.
117
+ *
118
+ * <tt>points</tt> - An array of points. Each array item must be an array with
119
+ * two numbers (x, y). Example: <code>[[1, 2], [3, 4]]</code>.
120
+ */
121
+ static VALUE fc_initialize_clusterer(int argc, VALUE *argv, VALUE self) {
122
+ if(argc > 0)
123
+ rb_iv_set(self, "@separation", argv[0]);
124
+ else
125
+ rb_iv_set(self, "@separation", INT2FIX(0));
126
+
127
+ if(argc > 1)
128
+ rb_iv_set(self, "@resolution", argv[1]);
129
+ else
130
+ rb_iv_set(self, "@resolution", INT2FIX(0));
131
+
132
+ VALUE pointArray = rb_ary_new();
133
+ rb_iv_set(self, "@points", pointArray);
134
+
135
+ if(argc > 2) {
136
+ if(TYPE(argv[2]) == T_ARRAY) {
137
+ rb_iv_set(self, "@points", argv[2]);
138
+ }
139
+ }
140
+
141
+ return Qnil;
142
+ }
143
+
144
+ /*
145
+ * Turn the ruby array of points (format [[x,y], [x,y]]) into an array of
146
+ * CLUSTER
147
+ */
148
+ static void fc_native_point_array(CLUSTER * arrayPtr, VALUE rubyArray, long num_points) {
149
+ int i;
150
+ for(i=0;i<num_points;i++) {
151
+ VALUE holdArray = RARRAY(rubyArray)->ptr[i];
152
+ double x = NUM2DBL(RARRAY(holdArray)->ptr[0]);
153
+ double y = NUM2DBL(RARRAY(holdArray)->ptr[1]);
154
+
155
+ arrayPtr[i].x = x;
156
+ arrayPtr[i].y = y;
157
+ arrayPtr[i].size = 1;
158
+ }
159
+ }
160
+
161
+ static CLUSTER *fc_calculate_clusters(long separation, long resolution, CLUSTER * point_array, int num_points, long * cluster_size) {
162
+ int max_grid = fc_get_max_grid(resolution, &point_array[0], num_points);
163
+ int i, j;
164
+ long preclust_size = 0;
165
+
166
+ CLUSTER * cluster;
167
+ CLUSTER * clusters;
168
+
169
+ if(resolution > 0) {
170
+ CLUSTER grid_array[max_grid][max_grid];
171
+
172
+ for(i=0;i<max_grid;i++) {
173
+ for(j=0;j<max_grid;j++) {
174
+ grid_array[i][j].size = 0;
175
+ }
176
+ }
177
+
178
+ for(i = 0; i < num_points; i++) {
179
+ cluster = &point_array[i];
180
+
181
+ int gx = floor(cluster->x/resolution);
182
+ int gy = floor(cluster->y/resolution);
183
+
184
+ fc_add_to_cluster(&grid_array[gx][gy], cluster->x, cluster->y);
185
+
186
+ if(grid_array[gx][gy].size == 1) preclust_size++;
187
+ }
188
+
189
+ clusters = malloc(preclust_size * sizeof(CLUSTER));
190
+
191
+ int max_grid_total = max_grid * max_grid;
192
+ CLUSTER * gridPtr = grid_array[0];
193
+
194
+ int incr = 0;
195
+ for(i=0;i<max_grid_total;i++) {
196
+ if(gridPtr[i].size > 0) {
197
+ clusters[incr] = gridPtr[i];
198
+ incr++;
199
+ }
200
+ }
201
+ } else {
202
+ preclust_size = num_points;
203
+ clusters = malloc(preclust_size * sizeof(CLUSTER));
204
+ memcpy(&clusters[0], &point_array[0], preclust_size * sizeof(CLUSTER));
205
+ }
206
+
207
+ double distance_sep = 0;
208
+ long current_cluster_size = 0;
209
+ int found;
210
+ long nearest_origin;
211
+ long nearest_other;
212
+
213
+ do {
214
+ // calculate distance sep
215
+ distance_sep = 0;
216
+ nearest_other = 0;
217
+
218
+ for(i=0;i<preclust_size;i++){
219
+ for(j=i+1;j<preclust_size;j++){
220
+ double distance = fc_get_distance_between(&clusters[i], &clusters[j]);
221
+
222
+ // printf("distance between %f, %f and %f, %f is %f\n", clusters[i].x, clusters[i].y, clusters[j].x, clusters[j].y, distance);
223
+
224
+ if(distance_sep == 0 || distance < distance_sep) {
225
+ distance_sep = distance;
226
+
227
+ if(distance < separation || separation == 0) {
228
+ nearest_origin = i;
229
+ nearest_other = j;
230
+ }
231
+ }
232
+ }
233
+ }
234
+
235
+ if(nearest_other > 0) {
236
+ fc_combine_clusters(&clusters[nearest_origin], &clusters[nearest_other]);
237
+
238
+ CLUSTER *newarr = malloc(preclust_size * sizeof(CLUSTER));
239
+ memcpy(&newarr[0], &clusters[0], nearest_other * sizeof(CLUSTER));
240
+ memcpy(&newarr[nearest_other], &clusters[nearest_other+1], (preclust_size - (nearest_other + 1)) * sizeof(CLUSTER));
241
+
242
+ void *_tmp = realloc(clusters, ((preclust_size-1) * sizeof(CLUSTER)));
243
+ clusters = (CLUSTER*)_tmp;
244
+ preclust_size = preclust_size - 1;
245
+
246
+ for(i=0;i<preclust_size;i++)
247
+ clusters[i] = newarr[i];
248
+
249
+ free(newarr);
250
+ }
251
+
252
+ } while((separation == 0 || distance_sep < separation) && preclust_size > 1);
253
+
254
+ *cluster_size = preclust_size;
255
+ return clusters;
256
+ }
257
+
258
+ static VALUE fc_get_cluster_class() {
259
+ ID cluster_module_id = rb_intern("Fastcluster");
260
+ ID cluster_class_id = rb_intern("Cluster");
261
+ VALUE cluster_module = rb_const_get(rb_cObject, cluster_module_id);
262
+ return rb_const_get(cluster_module, cluster_class_id);
263
+ }
264
+
265
+ /*
266
+ * Return the clusters found for the points in this clusterer. This will be an
267
+ * array of Cluster objects.
268
+ *
269
+ * Example:
270
+ * clusterer = Fastcluster::Clusterer.new(3, 0, [[1, 1], [1, 2], [5, 9]])
271
+ * clusterer.clusters -> [(1.00, 1.50): 2, (5.00, 9.00): 1]
272
+ */
273
+ static VALUE fc_get_clusters(VALUE self) {
274
+ // Get the separation adn resolution from ruby
275
+ long separation = NUM2INT(rb_iv_get(self, "@separation"));
276
+ long resolution = NUM2INT(rb_iv_get(self, "@resolution"));
277
+ int i;
278
+
279
+ // Create a native array of clusters from the ruby array of points
280
+ VALUE pointArray = fc_get_points(self);
281
+ long num_points = RARRAY(pointArray)->len;
282
+ CLUSTER native_point_array[num_points];
283
+
284
+ fc_native_point_array(&native_point_array[0], pointArray, num_points);
285
+
286
+ // Calcualte the clusters
287
+ CLUSTER * clusters = NULL;
288
+ long cluster_size;
289
+
290
+ clusters = fc_calculate_clusters(separation, resolution, &native_point_array[0], num_points, &cluster_size);
291
+
292
+ // Create ruby array of clusters to return
293
+ VALUE cluster_class = fc_get_cluster_class();
294
+ VALUE ruby_cluster_array = rb_ary_new2(cluster_size);
295
+
296
+ for(i=0;i<cluster_size;i++) {
297
+ int arg_count = 3;
298
+ VALUE arg_array[arg_count];
299
+
300
+ arg_array[0] = rb_float_new(clusters[i].x);
301
+ arg_array[1] = rb_float_new(clusters[i].y);
302
+ arg_array[2] = INT2FIX(clusters[i].size);
303
+
304
+ VALUE cluster_obj = rb_class_new_instance(arg_count, arg_array, cluster_class);
305
+ rb_ary_push(ruby_cluster_array, cluster_obj);
306
+ }
307
+
308
+ // Free the clusters array
309
+ free(clusters);
310
+
311
+ return ruby_cluster_array;
312
+ }
313
+
314
+ void Init_clusterer() {
315
+ VALUE clustererModule = rb_define_module("Fastcluster");
316
+ VALUE clustererClass = rb_define_class_under(clustererModule, "Clusterer", rb_cObject);
317
+
318
+ rb_define_method(clustererClass, "initialize", fc_initialize_clusterer, -1);
319
+ rb_define_method(clustererClass, "add", fc_add_point, 2);
320
+
321
+ rb_define_method(clustererClass, "<<", fc_append_point, 1);
322
+
323
+ rb_define_method(clustererClass, "clusters", fc_get_clusters, 0);
324
+ rb_define_method(clustererClass, "points", fc_get_points, 0);
325
+ }
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ extension_name = 'clusterer'
3
+ dir_config(extension_name)
4
+ create_makefile(extension_name)
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{fastcluster}
5
+ s.version = "0.9"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Jeremy Wells"]
9
+ s.date = %q{2009-10-24}
10
+ s.description = %q{A clustering library for 2 dimensional points}
11
+ s.email = %q{jeremy@boost.co.nz}
12
+ s.extensions = ["ext/extconf.rb"]
13
+ s.extra_rdoc_files = ["CHANGELOG", "README.rdoc", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb"]
14
+ s.files = ["CHANGELOG", "Manifest", "README.rdoc", "Rakefile", "ext/clusterer.c", "ext/extconf.rb", "lib/fastcluster.rb", "lib/fastcluster/cluster.rb", "spec/lib/fastcluster/cluster_spec.rb", "spec/lib/fastcluster/clusterer_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "spec/test_data.rb", "test.rb", "fastcluster.gemspec"]
15
+ s.homepage = %q{http://github.com/jemmyw/fastcluster}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Fastcluster", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib", "ext"]
18
+ s.rubyforge_project = %q{fastcluster}
19
+ s.rubygems_version = %q{1.3.5}
20
+ s.summary = %q{A clustering library for 2 dimensional points}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'fastcluster/cluster'
4
+ require File.dirname(__FILE__) + '/clusterer'
@@ -0,0 +1,25 @@
1
+ module Fastcluster
2
+ class Cluster
3
+ include Comparable
4
+
5
+ attr_reader :x, :y, :size
6
+
7
+ def initialize(x, y, size)
8
+ @x = x
9
+ @y = y
10
+ @size = size
11
+ end
12
+
13
+ def <=>(anOther)
14
+ size <=> anOther.size
15
+ end
16
+
17
+ def inspect
18
+ to_s
19
+ end
20
+
21
+ def to_s
22
+ '(%0.2f, %0.2f): %d' % [@x, @y, @size]
23
+ end
24
+ end
25
+ end
File without changes
@@ -0,0 +1,222 @@
1
+ require File.dirname(__FILE__) + '/../../spec_helper'
2
+ require 'benchmark'
3
+
4
+ describe Fastcluster::Clusterer do
5
+ before do
6
+ @points = POINTS
7
+ end
8
+
9
+ it 'should allow setting points in initializer' do
10
+ @clusterer = Fastcluster::Clusterer.new(105, 5, @points)
11
+ @clusterer.points.size.should == 168
12
+ end
13
+
14
+ describe 'instance' do
15
+ before do
16
+ @clusterer = Fastcluster::Clusterer.new(105, 5)
17
+ end
18
+
19
+ describe '#add' do
20
+ it 'should add an x y point to the clusterer' do
21
+ @clusterer.add(5, 10)
22
+ @clusterer.points.size.should == 1
23
+ @clusterer.points.first.should == [5, 10]
24
+ end
25
+ end
26
+
27
+ describe '#<<' do
28
+ it 'should add the value to the clusterer' do
29
+ @clusterer << [5, 10]
30
+ @clusterer.points.size.should == 1
31
+ @clusterer.points.first.should == [5, 10]
32
+ end
33
+ end
34
+ end
35
+
36
+ describe '#clusters' do
37
+ describe 'with large test' do
38
+ before do
39
+ @clusterer = Fastcluster::Clusterer.new(105, 5, @points)
40
+ @clusters = @clusterer.clusters.sort{|a, b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }
41
+ end
42
+
43
+ it 'should take less than 1 second' do
44
+ time = Benchmark.measure { @clusterer.clusters }
45
+ time.total.should be < 1
46
+ end
47
+
48
+ it 'should have as many cluster points as data points' do
49
+ @clusters.inject(0){|m, n| m + n.size }.should == @points.size
50
+ end
51
+
52
+ it 'should have 23 clusters' do
53
+ @clusters.size.should == 23
54
+ end
55
+
56
+ it 'should have a cluster of 108 points at 836 by 178' do
57
+ @clusters.last.size.should == 108
58
+ @clusters.last.x.should be_close(836, 1)
59
+ @clusters.last.y.should be_close(178, 1)
60
+ end
61
+
62
+ it 'should have a cluster of 1 point at 97 by 1203' do
63
+ @clusters.first.size.should == 1
64
+ @clusters.first.x.should be_close(97, 1)
65
+ @clusters.first.y.should be_close(1203, 1)
66
+ end
67
+ end
68
+
69
+ describe "with two points" do
70
+ before do
71
+ @clusterer = Fastcluster::Clusterer.new(0, 0, [[1, 5], [2, 8]])
72
+ end
73
+
74
+ it "should return one cluster" do
75
+ @clusterer.clusters.size.should == 1
76
+ end
77
+
78
+ it "should have two points in the cluster" do
79
+ @clusterer.clusters.first.size.should == 2
80
+ end
81
+ end
82
+
83
+ describe "with three points" do
84
+ before do
85
+ @clusterer = Fastcluster::Clusterer.new(0, 0, [[1, 2], [5, 6], [2, 3]])
86
+ @clusters = @clusterer.clusters
87
+ end
88
+
89
+ it "should return one cluster" do
90
+ @clusters.size.should == 1
91
+ end
92
+
93
+ it "containing three items" do
94
+ @clusters.first.size.should == 3
95
+ end
96
+ end
97
+
98
+ describe "with four points" do
99
+ before do
100
+ @points = [
101
+ [0, 1],
102
+ [1, 0],
103
+ [3, 4],
104
+ [4, 3],
105
+ ]
106
+ end
107
+
108
+ describe "and no separation" do
109
+ before do
110
+ @clusterer = Fastcluster::Clusterer.new(0, 0, @points)
111
+ end
112
+
113
+ it "should return one cluster" do
114
+ @clusterer.clusters.size.should == 1
115
+ end
116
+ end
117
+
118
+ describe "and separation 1" do
119
+ before do
120
+ require 'lib/fastcluster'
121
+ @clusterer = Fastcluster::Clusterer.new(1, 0, @points)
122
+ end
123
+
124
+ it "should return all four individual points" do
125
+ @clusterer.clusters.size.should == 4
126
+ end
127
+ end
128
+
129
+ describe "and separation 2" do
130
+ before do
131
+ @clusterer = Fastcluster::Clusterer.new(2, 0, @points)
132
+ end
133
+
134
+ it "should return two clusters" do
135
+ @clusterer.clusters.size.should == 2
136
+ end
137
+ end
138
+ end
139
+
140
+ describe "with eight points" do
141
+ before do
142
+ @points = [
143
+ [0, 1],
144
+ [1, 0],
145
+ [3, 4],
146
+ [4, 3],
147
+ [7, 8],
148
+ [8, 7],
149
+ [8, 9],
150
+ [9, 8]
151
+ ]
152
+ end
153
+
154
+ describe "and no separation" do
155
+ before do
156
+ @clusterer = Fastcluster::Clusterer.new(0, 0)
157
+ @points.each do |point|
158
+ @clusterer << point
159
+ end
160
+ end
161
+
162
+ it "should return one cluster when no minimum separation is given" do
163
+ @clusterer.clusters.size.should == 1
164
+ end
165
+ end
166
+
167
+ describe "and separation 1" do
168
+ before do
169
+ @clusterer = Fastcluster::Clusterer.new(1, 0)
170
+ @points.each do |point|
171
+ @clusterer << point
172
+ end
173
+ end
174
+
175
+ it "should have all eight points in individual clusters" do
176
+ @clusterer.clusters.size.should == 8
177
+ end
178
+ end
179
+
180
+ describe "and separation 3" do
181
+ describe "with no resolution limit" do
182
+ before do
183
+ @clusterer = Fastcluster::Clusterer.new(3, 0)
184
+ @points.each do |point|
185
+ @clusterer << point
186
+ end
187
+ @clusters = @clusterer.clusters.sort
188
+ end
189
+
190
+ it "should have three clusters" do
191
+ @clusters.size.should == 3
192
+ end
193
+
194
+ it "should have clusters size 2, 2, and 4 " do
195
+ @clusters[0].size.should == 2
196
+ @clusters[1].size.should == 2
197
+ @clusters[2].size.should == 4
198
+ end
199
+ end
200
+
201
+ describe "with coarse resolution" do
202
+ before do
203
+ @clusterer = Fastcluster::Clusterer.new(3, 5)
204
+ @points.each do |point|
205
+ @clusterer << point
206
+ end
207
+ @clusters = @clusterer.clusters.sort
208
+ end
209
+
210
+ it "should have three clusters" do
211
+ @clusters.size.should == 2
212
+ end
213
+
214
+ it "should have clusters size 2, 2, and 4 " do
215
+ @clusters[0].size.should == 4
216
+ @clusters[1].size.should == 4
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end
@@ -0,0 +1,5 @@
1
+ --colour
2
+ --format progress
3
+ --loadby mtime
4
+ --reverse
5
+ --debugger
@@ -0,0 +1,10 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems'
5
+ gem 'rspec'
6
+ require 'spec'
7
+ end
8
+
9
+ require File.dirname(__FILE__) + '/../lib/fastcluster'
10
+ require File.dirname(__FILE__) + '/test_data.rb'
@@ -0,0 +1,28 @@
1
+ POINTS = [[815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
2
+ [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
3
+ [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
4
+ [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
5
+ [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
6
+ [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
7
+ [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
8
+ [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
9
+ [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
10
+ [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
11
+ [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
12
+ [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
13
+ [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
14
+ [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
15
+ [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
16
+ [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
17
+ [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
18
+ [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
19
+ [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
20
+ [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
21
+ [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
22
+ [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
23
+ [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
24
+ [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
25
+ [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
26
+ [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
27
+ [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
28
+ [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177]]
data/test.rb ADDED
@@ -0,0 +1,69 @@
1
+ require File.dirname(__FILE__) + '/lib/fastcluster'
2
+ require 'benchmark'
3
+
4
+ points = [[815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
5
+ [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
6
+ [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
7
+ [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
8
+ [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
9
+ [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
10
+ [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
11
+ [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
12
+ [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
13
+ [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
14
+ [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
15
+ [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
16
+ [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
17
+ [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
18
+ [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
19
+ [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
20
+ [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
21
+ [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
22
+ [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
23
+ [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
24
+ [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
25
+ [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
26
+ [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
27
+ [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
28
+ [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
29
+ [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
30
+ [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
31
+ [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177],
32
+ [815, 183], [860, 176], [793, 176], [847, 176], [813, 176], [865, 183],
33
+ [804, 185], [813, 181], [797, 181], [193, 133], [905, 168], [821, 173],
34
+ [804, 178], [799, 180], [175, 360], [880, 176], [826, 171], [843, 181],
35
+ [857, 171], [443, 495], [857, 174], [382, 132], [97, 1203], [218, 281],
36
+ [97, 1778], [814, 181], [474, 133], [797, 179], [844, 180], [812, 178],
37
+ [846, 18], [795, 183], [862, 182], [840, 175], [788, 176], [478, 140],
38
+ [860, 173], [974, 16], [833, 181], [288, 130], [831, 874], [884, 174],
39
+ [834, 179], [836, 182], [181, 354], [346, 46], [855, 171], [855, 180],
40
+ [959, 177], [792, 181], [138, 1774], [283, 783], [815, 877], [807, 178],
41
+ [430, 497], [819, 181], [801, 176], [836, 182], [825, 176], [873, 179],
42
+ [847, 182], [124, 1678], [157, 132], [835, 177], [827, 180], [532, 1191],
43
+ [873, 178], [174, 128], [805, 1328], [798, 178], [502, 659], [804, 180],
44
+ [960, 178], [886, 182], [867, 183], [875, 179], [854, 176], [849, 185],
45
+ [136, 1783], [800, 181], [810, 180], [312, 782], [865, 177], [745, 125],
46
+ [833, 178], [882, 181], [834, 177], [821, 184], [899, 182], [26, 124],
47
+ [859, 182], [892, 174], [172, 134], [822, 182], [396, 135], [830, 184],
48
+ [792, 185], [427, 488], [818, 173], [832, 177], [406, 129], [852, 181],
49
+ [805, 177], [820, 175], [19, 14], [839, 183], [264, 127], [800, 172],
50
+ [811, 176], [827, 178], [896, 177], [863, 183], [813, 177], [458, 495],
51
+ [808, 181], [850, 748], [810, 184], [850, 181], [886, 179], [446, 497],
52
+ [851, 180], [592, 132], [300, 538], [794, 180], [815, 180], [57, 1772],
53
+ [799, 175], [821, 182], [135, 1681], [830, 483], [796, 173], [887, 40],
54
+ [823, 179], [864, 179], [322, 30], [886, 182], [808, 178], [823, 189],
55
+ [230, 360], [224, 275], [875, 179], [133, 1773], [844, 175], [433, 495],
56
+ [13, 15], [856, 178], [871, 181], [658, 130], [839, 183], [822, 83],
57
+ [827, 175], [848, 179], [894, 184], [833, 177], [828, 176], [482, 135],
58
+ [841, 177], [817, 184], [470, 140], [800, 180], [857, 15], [807, 187],
59
+ [425, 497], [64, 131], [852, 580], [883, 183], [836, 181], [878, 177]]
60
+
61
+ puts Benchmark.measure {
62
+ clusterer = Fastcluster::Clusterer.new(105, 5, points)
63
+ clusters = clusterer.clusters
64
+
65
+ clusters.sort{|a,b| a.size == b.size ? a.x <=> b.x : a.size <=> b.size }.each do |cluster|
66
+ puts cluster
67
+ end
68
+
69
+ }
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fastcluster
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.9"
5
+ platform: ruby
6
+ authors:
7
+ - Jeremy Wells
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-24 00:00:00 +13:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A clustering library for 2 dimensional points
17
+ email: jeremy@boost.co.nz
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - CHANGELOG
24
+ - README.rdoc
25
+ - ext/clusterer.c
26
+ - ext/extconf.rb
27
+ - lib/fastcluster.rb
28
+ - lib/fastcluster/cluster.rb
29
+ files:
30
+ - CHANGELOG
31
+ - Manifest
32
+ - README.rdoc
33
+ - Rakefile
34
+ - ext/clusterer.c
35
+ - ext/extconf.rb
36
+ - lib/fastcluster.rb
37
+ - lib/fastcluster/cluster.rb
38
+ - spec/lib/fastcluster/cluster_spec.rb
39
+ - spec/lib/fastcluster/clusterer_spec.rb
40
+ - spec/spec.opts
41
+ - spec/spec_helper.rb
42
+ - spec/test_data.rb
43
+ - test.rb
44
+ - fastcluster.gemspec
45
+ has_rdoc: true
46
+ homepage: http://github.com/jemmyw/fastcluster
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options:
51
+ - --line-numbers
52
+ - --inline-source
53
+ - --title
54
+ - Fastcluster
55
+ - --main
56
+ - README.rdoc
57
+ require_paths:
58
+ - lib
59
+ - ext
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "1.2"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: fastcluster
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: A clustering library for 2 dimensional points
79
+ test_files: []
80
+