rbcluster 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -3
- data/ext/rbcluster/rbcluster.c +66 -11
- data/lib/rbcluster.rb +1 -0
- data/lib/rbcluster/node.rb +7 -0
- data/lib/rbcluster/tree.rb +31 -4
- data/lib/rbcluster/version.rb +1 -1
- data/rbcluster.gemspec +1 -1
- data/spec/node_spec.rb +1 -0
- data/spec/tree_spec.rb +58 -0
- metadata +9 -8
- data/spec/cuttree_spec.rb +0 -6
data/README.md
CHANGED
@@ -3,6 +3,9 @@ rbcluster
|
|
3
3
|
|
4
4
|
Ruby bindings to the Cluster C library.
|
5
5
|
|
6
|
+
[![Build Status](https://secure.travis-ci.org/jarib/rbcluster.png)](http://travis-ci.org/jarib/rbcluster)
|
7
|
+
|
8
|
+
|
6
9
|
TODO
|
7
10
|
----
|
8
11
|
|
@@ -10,13 +13,12 @@ Functions:
|
|
10
13
|
|
11
14
|
* Cluster.clustercentroids
|
12
15
|
* Cluster.clustermedoids
|
13
|
-
* Cluster::Tree#
|
16
|
+
* Cluster::Tree#slice
|
14
17
|
|
15
18
|
Other:
|
16
19
|
|
17
|
-
* an examples/ folder
|
18
20
|
* make :transpose work
|
19
|
-
* specs for bad inputs
|
21
|
+
* more specs for bad inputs
|
20
22
|
|
21
23
|
DONE
|
22
24
|
----
|
@@ -36,6 +38,7 @@ See also
|
|
36
38
|
|
37
39
|
* http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
|
38
40
|
* http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster.pdf
|
41
|
+
* http://github.com/jarib/rbcluster
|
39
42
|
|
40
43
|
Note on Patches/Pull Requests
|
41
44
|
-----------------------------
|
data/ext/rbcluster/rbcluster.c
CHANGED
@@ -10,6 +10,7 @@
|
|
10
10
|
|
11
11
|
VALUE rbcluster_mCluster = Qnil;
|
12
12
|
VALUE rbcluster_cNode = Qnil;
|
13
|
+
VALUE rbcluster_cTree = Qnil;
|
13
14
|
|
14
15
|
VALUE rbcluster_rows2rb(double** data, int nrows, int ncols) {
|
15
16
|
VALUE rows = rb_ary_new2((long)nrows);
|
@@ -42,6 +43,26 @@ double* rbcluster_ary_to_doubles(VALUE data, int len) {
|
|
42
43
|
return result;
|
43
44
|
}
|
44
45
|
|
46
|
+
Node* rbcluster_ary_to_nodes(VALUE data, int* len) {
|
47
|
+
Check_Type(data, T_ARRAY);
|
48
|
+
|
49
|
+
long length = RARRAY_LEN(data);
|
50
|
+
|
51
|
+
Node* result = (Node*)malloc(length*sizeof(Node));
|
52
|
+
|
53
|
+
for(int i = 0; i < length; ++i)
|
54
|
+
{
|
55
|
+
VALUE node = rb_ary_entry(data, i);
|
56
|
+
|
57
|
+
result[i].left = NUM2INT(rb_ivar_get(node, rb_intern("@left")));
|
58
|
+
result[i].right = NUM2INT(rb_ivar_get(node, rb_intern("@right")));
|
59
|
+
result[i].distance = NUM2DBL(rb_ivar_get(node, rb_intern("@distance")));
|
60
|
+
}
|
61
|
+
|
62
|
+
*len = (int)length;
|
63
|
+
return result;
|
64
|
+
}
|
65
|
+
|
45
66
|
double** rbcluster_ary_to_rows(VALUE data, int* nrows, int* ncols) {
|
46
67
|
Check_Type(data, T_ARRAY);
|
47
68
|
long rows, cols;
|
@@ -265,7 +286,7 @@ VALUE rbcluster_kcluster(int argc, VALUE* argv, VALUE self) {
|
|
265
286
|
free(weight);
|
266
287
|
free(clusterid);
|
267
288
|
|
268
|
-
return rb_ary_new3(3, result,
|
289
|
+
return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
|
269
290
|
}
|
270
291
|
|
271
292
|
VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
|
@@ -329,7 +350,7 @@ VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
|
|
329
350
|
free(clusterid);
|
330
351
|
for(i = 1; i < nitems; ++i) free(distances[i]);
|
331
352
|
|
332
|
-
return rb_ary_new3(3, result,
|
353
|
+
return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
|
333
354
|
}
|
334
355
|
|
335
356
|
VALUE rbcluster_median(VALUE self, VALUE ary) {
|
@@ -345,7 +366,7 @@ VALUE rbcluster_median(VALUE self, VALUE ary) {
|
|
345
366
|
arr[i] = NUM2DBL(num);
|
346
367
|
}
|
347
368
|
|
348
|
-
return
|
369
|
+
return DBL2NUM(median((int)len, arr));
|
349
370
|
}
|
350
371
|
|
351
372
|
VALUE rbcluster_mean(VALUE self, VALUE ary) {
|
@@ -361,7 +382,7 @@ VALUE rbcluster_mean(VALUE self, VALUE ary) {
|
|
361
382
|
arr[i] = NUM2DBL(num);
|
362
383
|
}
|
363
384
|
|
364
|
-
return
|
385
|
+
return DBL2NUM(mean((int)len, arr));
|
365
386
|
}
|
366
387
|
|
367
388
|
VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
|
@@ -403,7 +424,7 @@ VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
|
|
403
424
|
VALUE row = rb_ary_new();
|
404
425
|
|
405
426
|
for(j = 0; j < i; ++j){
|
406
|
-
rb_ary_push(row,
|
427
|
+
rb_ary_push(row, DBL2NUM(distances[i][j]));
|
407
428
|
}
|
408
429
|
|
409
430
|
// first row is NULL
|
@@ -482,7 +503,7 @@ VALUE rbcluster_clusterdistance(int argc, VALUE* argv, VALUE self) {
|
|
482
503
|
rbcluster_free_rows(rows, nrows);
|
483
504
|
rbcluster_free_mask(mask, nrows);
|
484
505
|
|
485
|
-
return
|
506
|
+
return DBL2NUM(result);
|
486
507
|
}
|
487
508
|
|
488
509
|
VALUE rbcluster_create_node(Node* node) {
|
@@ -500,6 +521,10 @@ VALUE rbcluster_node_initialize(int argc, VALUE* argv, VALUE self) {
|
|
500
521
|
|
501
522
|
rb_scan_args(argc, argv, "21", &left, &right, &distance);
|
502
523
|
|
524
|
+
if(NIL_P(distance)) {
|
525
|
+
distance = DBL2NUM(0.0);
|
526
|
+
}
|
527
|
+
|
503
528
|
rb_ivar_set(self, rb_intern("@left"), left);
|
504
529
|
rb_ivar_set(self, rb_intern("@right"), right);
|
505
530
|
rb_ivar_set(self, rb_intern("@distance"), distance);
|
@@ -586,7 +611,8 @@ VALUE rbcluster_treecluster(int argc, VALUE* argv, VALUE self) {
|
|
586
611
|
rbcluster_free_rows(rows, nrows);
|
587
612
|
rbcluster_free_mask(mask, nrows);
|
588
613
|
|
589
|
-
|
614
|
+
VALUE args[] = { result };
|
615
|
+
return rb_class_new_instance(1, args, rbcluster_cTree);
|
590
616
|
}
|
591
617
|
|
592
618
|
VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
|
@@ -648,7 +674,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
|
|
648
674
|
for(j = 0; j < nygrid; ++j) {
|
649
675
|
jarr = rb_ary_new2(ncols);
|
650
676
|
for(k = 0; k < ncols; ++k) {
|
651
|
-
rb_ary_push(jarr,
|
677
|
+
rb_ary_push(jarr, DBL2NUM(celldata[i][j][k]));
|
652
678
|
}
|
653
679
|
rb_ary_push(iarr, jarr);
|
654
680
|
}
|
@@ -672,7 +698,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
|
|
672
698
|
return rb_ary_new3(2, rb_clusterid, rb_celldata);
|
673
699
|
}
|
674
700
|
|
675
|
-
void
|
701
|
+
void rbcluster_print_doubles(double* vals, int len) {
|
676
702
|
puts("[");
|
677
703
|
for(int i = 0; i < len; ++i) {
|
678
704
|
printf("\t%d: %f\n", i, vals[i]);
|
@@ -680,7 +706,7 @@ void print_doubles(double* vals, int len) {
|
|
680
706
|
puts("]");
|
681
707
|
}
|
682
708
|
|
683
|
-
void
|
709
|
+
void rbcluster_print_double_matrix(double** vals, int nrows, int ncols) {
|
684
710
|
puts("[");
|
685
711
|
for(int i = 0; i < nrows; ++i) {
|
686
712
|
printf("\t[ ");
|
@@ -724,7 +750,7 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
|
|
724
750
|
|
725
751
|
int ok = pca(nrows, ncols, u, v, w);
|
726
752
|
if(ok == -1) {
|
727
|
-
rb_raise(
|
753
|
+
rb_raise(rb_eNoMemError, "could not allocate memory");
|
728
754
|
} else if(ok > 0) {
|
729
755
|
rb_raise(rb_eStandardError, "svd failed to converge");
|
730
756
|
}
|
@@ -751,9 +777,37 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
|
|
751
777
|
return rb_ary_new3(4, mean, coordinates, pc, eigenvalues);
|
752
778
|
}
|
753
779
|
|
780
|
+
VALUE rbcluster_cuttree(VALUE self, VALUE nodes, VALUE clusters) {
|
781
|
+
int nelements, nclusters;
|
782
|
+
|
783
|
+
nclusters = NUM2INT(clusters);
|
784
|
+
|
785
|
+
Node* cnodes = rbcluster_ary_to_nodes(nodes, &nelements);
|
786
|
+
int n = nelements + 1;
|
787
|
+
|
788
|
+
if(nclusters < 1) {
|
789
|
+
rb_raise(rb_eArgError, "nclusters must be >= 1");
|
790
|
+
}
|
791
|
+
|
792
|
+
if(nclusters > n) {
|
793
|
+
rb_raise(rb_eArgError, "more clusters requested than items available");
|
794
|
+
}
|
795
|
+
|
796
|
+
int clusterid[n];
|
797
|
+
cuttree(n, cnodes, nclusters, clusterid);
|
798
|
+
free(cnodes);
|
799
|
+
|
800
|
+
if(clusterid[0] == -1) {
|
801
|
+
rb_raise(rb_eNoMemError, "could not allocate memory for cuttree()");
|
802
|
+
}
|
803
|
+
|
804
|
+
return rbcluster_ints2rb(clusterid, (long)n);
|
805
|
+
}
|
806
|
+
|
754
807
|
void Init_rbcluster() {
|
755
808
|
rbcluster_mCluster = rb_define_module("Cluster");
|
756
809
|
rbcluster_cNode = rb_define_class_under(rbcluster_mCluster, "Node", rb_cObject);
|
810
|
+
rbcluster_cTree = rb_define_class_under(rbcluster_mCluster, "Tree", rb_cObject);
|
757
811
|
|
758
812
|
rb_define_attr(rbcluster_cNode, "left", 1, 1);
|
759
813
|
rb_define_attr(rbcluster_cNode, "right", 1, 1);
|
@@ -770,6 +824,7 @@ void Init_rbcluster() {
|
|
770
824
|
rb_define_singleton_method(rbcluster_mCluster, "treecluster", rbcluster_treecluster, -1);
|
771
825
|
rb_define_singleton_method(rbcluster_mCluster, "somcluster", rbcluster_somcluster, -1);
|
772
826
|
rb_define_singleton_method(rbcluster_mCluster, "pca", rbcluster_pca, 1);
|
827
|
+
rb_define_singleton_method(rbcluster_mCluster, "cuttree", rbcluster_cuttree, 2);
|
773
828
|
|
774
829
|
rb_define_const(rbcluster_mCluster, "C_VERSION", rb_str_new2(CLUSTERVERSION));
|
775
830
|
}
|
data/lib/rbcluster.rb
CHANGED
data/lib/rbcluster/tree.rb
CHANGED
@@ -1,20 +1,47 @@
|
|
1
1
|
module Cluster
|
2
2
|
class Tree
|
3
3
|
def initialize(nodes)
|
4
|
-
|
5
|
-
|
6
|
-
nodes.each_with_index do |node, idx|
|
4
|
+
@nodes = Array(nodes)
|
5
|
+
@nodes.each_with_index do |node, idx|
|
7
6
|
unless node.kind_of?(Node)
|
8
7
|
raise ArgumentError, "expected #{Node.class}, got #{node.class} at index #{idx}"
|
9
8
|
end
|
10
9
|
end
|
11
10
|
|
12
|
-
@nodes = nodes
|
13
11
|
end
|
14
12
|
|
15
13
|
def size
|
16
14
|
@nodes.size
|
17
15
|
end
|
18
16
|
|
17
|
+
def to_a
|
18
|
+
@nodes.dup
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
@nodes.map { |e| "#{e}\n" }.join
|
23
|
+
end
|
24
|
+
|
25
|
+
def [](idx)
|
26
|
+
@nodes[idx]
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch(idx, &blk)
|
30
|
+
@nodes.fetch(idx, &blk)
|
31
|
+
end
|
32
|
+
|
33
|
+
def scale
|
34
|
+
max = @nodes.map { |e| e.distance }.max
|
35
|
+
@nodes.each do |node|
|
36
|
+
node.distance = node.distance /= max
|
37
|
+
end
|
38
|
+
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def cut(nclusters)
|
43
|
+
Cluster.cuttree(@nodes, nclusters)
|
44
|
+
end
|
45
|
+
|
19
46
|
end
|
20
47
|
end
|
data/lib/rbcluster/version.rb
CHANGED
data/rbcluster.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.platform = Gem::Platform::RUBY
|
9
9
|
s.authors = ["Jari Bakken", "Michiel Jan Laurens de Hoon"]
|
10
10
|
s.email = ["jari.bakken@gmail.com"]
|
11
|
-
s.homepage = "http://
|
11
|
+
s.homepage = "http://github.com/jarib/rbcluster"
|
12
12
|
s.summary = %q{Ruby bindings for the Cluster C library}
|
13
13
|
s.description = %q{This gem provides a Ruby extension to the clustering routines in the C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).}
|
14
14
|
|
data/spec/node_spec.rb
CHANGED
data/spec/tree_spec.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Cluster
|
4
|
+
describe Tree do
|
5
|
+
context "creating" do
|
6
|
+
it "should raise ArgumentError if not given an array of Nodes" do
|
7
|
+
lambda { Tree.new(1) }.should raise_error(ArgumentError)
|
8
|
+
lambda { Tree.new([Node.new(1,2), Node.new(2,3), nil]) }.should raise_error(ArgumentError)
|
9
|
+
lambda { Tree.new }.should raise_error(ArgumentError)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "returns a Tree instance when given an array of nodes" do
|
13
|
+
Tree.new([Node.new(1, 2)]).should be_kind_of(Tree)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "using" do
|
18
|
+
let :tree do
|
19
|
+
Cluster.treecluster([
|
20
|
+
[ 1.1, 2.2, 3.3, 4.4, 5.5],
|
21
|
+
[ 3.1, 3.2, 1.3, 2.4, 1.5],
|
22
|
+
[ 4.1, 2.2, 0.3, 5.4, 0.5],
|
23
|
+
[ 12.1, 2.0, 0.0, 5.0, 0.0]
|
24
|
+
])
|
25
|
+
end
|
26
|
+
|
27
|
+
it "fetches a copy of the node array" do
|
28
|
+
arr = tree.to_a
|
29
|
+
arr.should be_kind_of(Array)
|
30
|
+
arr.size.should == 3
|
31
|
+
arr.clear
|
32
|
+
|
33
|
+
tree.size.should == 3
|
34
|
+
end
|
35
|
+
|
36
|
+
it "has a string representation" do
|
37
|
+
tree.to_s.should include('(2, 1): 2.6')
|
38
|
+
end
|
39
|
+
|
40
|
+
it "can scale the tree" do
|
41
|
+
tree.scale
|
42
|
+
tree.to_a.each { |n| n.distance.should be_between(0, 1) }
|
43
|
+
end
|
44
|
+
|
45
|
+
it "can cut the tree" do
|
46
|
+
tree.cut(3).should == [1, 2, 2, 0]
|
47
|
+
end
|
48
|
+
|
49
|
+
it "gets a node" do
|
50
|
+
tree[0].should == tree.to_a[0]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "fetches a node" do
|
54
|
+
tree.fetch(0).should == tree.to_a[0]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbcluster
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -14,7 +14,7 @@ date: 2012-01-03 00:00:00.000000000Z
|
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rake-compiler
|
17
|
-
requirement: &
|
17
|
+
requirement: &2157667380 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2157667380
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: rspec
|
28
|
-
requirement: &
|
28
|
+
requirement: &2157666880 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 2.6.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *2157666880
|
37
37
|
description: This gem provides a Ruby extension to the clustering routines in the
|
38
38
|
C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).
|
39
39
|
email:
|
@@ -55,13 +55,13 @@ files:
|
|
55
55
|
- ext/rbcluster/extconf.rb
|
56
56
|
- ext/rbcluster/rbcluster.c
|
57
57
|
- lib/rbcluster.rb
|
58
|
+
- lib/rbcluster/node.rb
|
58
59
|
- lib/rbcluster/tree.rb
|
59
60
|
- lib/rbcluster/version.rb
|
60
61
|
- rbcluster.gemspec
|
61
62
|
- spec/clustercentroids_spec.rb
|
62
63
|
- spec/clusterdistance_spec.rb
|
63
64
|
- spec/clustermedoids_spec.rb
|
64
|
-
- spec/cuttree_spec.rb
|
65
65
|
- spec/kcluster_spec.rb
|
66
66
|
- spec/kmedoids_spec.rb
|
67
67
|
- spec/median_mean_spec.rb
|
@@ -69,8 +69,9 @@ files:
|
|
69
69
|
- spec/pca_spec.rb
|
70
70
|
- spec/somcluster_spec.rb
|
71
71
|
- spec/spec_helper.rb
|
72
|
+
- spec/tree_spec.rb
|
72
73
|
- spec/treecluster_spec.rb
|
73
|
-
homepage: http://
|
74
|
+
homepage: http://github.com/jarib/rbcluster
|
74
75
|
licenses: []
|
75
76
|
post_install_message:
|
76
77
|
rdoc_options: []
|
@@ -98,7 +99,6 @@ test_files:
|
|
98
99
|
- spec/clustercentroids_spec.rb
|
99
100
|
- spec/clusterdistance_spec.rb
|
100
101
|
- spec/clustermedoids_spec.rb
|
101
|
-
- spec/cuttree_spec.rb
|
102
102
|
- spec/kcluster_spec.rb
|
103
103
|
- spec/kmedoids_spec.rb
|
104
104
|
- spec/median_mean_spec.rb
|
@@ -106,5 +106,6 @@ test_files:
|
|
106
106
|
- spec/pca_spec.rb
|
107
107
|
- spec/somcluster_spec.rb
|
108
108
|
- spec/spec_helper.rb
|
109
|
+
- spec/tree_spec.rb
|
109
110
|
- spec/treecluster_spec.rb
|
110
111
|
has_rdoc:
|