rbcluster 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -3
- data/ext/rbcluster/rbcluster.c +66 -11
- data/lib/rbcluster.rb +1 -0
- data/lib/rbcluster/node.rb +7 -0
- data/lib/rbcluster/tree.rb +31 -4
- data/lib/rbcluster/version.rb +1 -1
- data/rbcluster.gemspec +1 -1
- data/spec/node_spec.rb +1 -0
- data/spec/tree_spec.rb +58 -0
- metadata +9 -8
- data/spec/cuttree_spec.rb +0 -6
data/README.md
CHANGED
@@ -3,6 +3,9 @@ rbcluster
|
|
3
3
|
|
4
4
|
Ruby bindings to the Cluster C library.
|
5
5
|
|
6
|
+
[](http://travis-ci.org/jarib/rbcluster)
|
7
|
+
|
8
|
+
|
6
9
|
TODO
|
7
10
|
----
|
8
11
|
|
@@ -10,13 +13,12 @@ Functions:
|
|
10
13
|
|
11
14
|
* Cluster.clustercentroids
|
12
15
|
* Cluster.clustermedoids
|
13
|
-
* Cluster::Tree#
|
16
|
+
* Cluster::Tree#slice
|
14
17
|
|
15
18
|
Other:
|
16
19
|
|
17
|
-
* an examples/ folder
|
18
20
|
* make :transpose work
|
19
|
-
* specs for bad inputs
|
21
|
+
* more specs for bad inputs
|
20
22
|
|
21
23
|
DONE
|
22
24
|
----
|
@@ -36,6 +38,7 @@ See also
|
|
36
38
|
|
37
39
|
* http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
|
38
40
|
* http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster.pdf
|
41
|
+
* http://github.com/jarib/rbcluster
|
39
42
|
|
40
43
|
Note on Patches/Pull Requests
|
41
44
|
-----------------------------
|
data/ext/rbcluster/rbcluster.c
CHANGED
@@ -10,6 +10,7 @@
|
|
10
10
|
|
11
11
|
VALUE rbcluster_mCluster = Qnil;
|
12
12
|
VALUE rbcluster_cNode = Qnil;
|
13
|
+
VALUE rbcluster_cTree = Qnil;
|
13
14
|
|
14
15
|
VALUE rbcluster_rows2rb(double** data, int nrows, int ncols) {
|
15
16
|
VALUE rows = rb_ary_new2((long)nrows);
|
@@ -42,6 +43,26 @@ double* rbcluster_ary_to_doubles(VALUE data, int len) {
|
|
42
43
|
return result;
|
43
44
|
}
|
44
45
|
|
46
|
+
Node* rbcluster_ary_to_nodes(VALUE data, int* len) {
|
47
|
+
Check_Type(data, T_ARRAY);
|
48
|
+
|
49
|
+
long length = RARRAY_LEN(data);
|
50
|
+
|
51
|
+
Node* result = (Node*)malloc(length*sizeof(Node));
|
52
|
+
|
53
|
+
for(int i = 0; i < length; ++i)
|
54
|
+
{
|
55
|
+
VALUE node = rb_ary_entry(data, i);
|
56
|
+
|
57
|
+
result[i].left = NUM2INT(rb_ivar_get(node, rb_intern("@left")));
|
58
|
+
result[i].right = NUM2INT(rb_ivar_get(node, rb_intern("@right")));
|
59
|
+
result[i].distance = NUM2DBL(rb_ivar_get(node, rb_intern("@distance")));
|
60
|
+
}
|
61
|
+
|
62
|
+
*len = (int)length;
|
63
|
+
return result;
|
64
|
+
}
|
65
|
+
|
45
66
|
double** rbcluster_ary_to_rows(VALUE data, int* nrows, int* ncols) {
|
46
67
|
Check_Type(data, T_ARRAY);
|
47
68
|
long rows, cols;
|
@@ -265,7 +286,7 @@ VALUE rbcluster_kcluster(int argc, VALUE* argv, VALUE self) {
|
|
265
286
|
free(weight);
|
266
287
|
free(clusterid);
|
267
288
|
|
268
|
-
return rb_ary_new3(3, result,
|
289
|
+
return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
|
269
290
|
}
|
270
291
|
|
271
292
|
VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
|
@@ -329,7 +350,7 @@ VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
|
|
329
350
|
free(clusterid);
|
330
351
|
for(i = 1; i < nitems; ++i) free(distances[i]);
|
331
352
|
|
332
|
-
return rb_ary_new3(3, result,
|
353
|
+
return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
|
333
354
|
}
|
334
355
|
|
335
356
|
VALUE rbcluster_median(VALUE self, VALUE ary) {
|
@@ -345,7 +366,7 @@ VALUE rbcluster_median(VALUE self, VALUE ary) {
|
|
345
366
|
arr[i] = NUM2DBL(num);
|
346
367
|
}
|
347
368
|
|
348
|
-
return
|
369
|
+
return DBL2NUM(median((int)len, arr));
|
349
370
|
}
|
350
371
|
|
351
372
|
VALUE rbcluster_mean(VALUE self, VALUE ary) {
|
@@ -361,7 +382,7 @@ VALUE rbcluster_mean(VALUE self, VALUE ary) {
|
|
361
382
|
arr[i] = NUM2DBL(num);
|
362
383
|
}
|
363
384
|
|
364
|
-
return
|
385
|
+
return DBL2NUM(mean((int)len, arr));
|
365
386
|
}
|
366
387
|
|
367
388
|
VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
|
@@ -403,7 +424,7 @@ VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
|
|
403
424
|
VALUE row = rb_ary_new();
|
404
425
|
|
405
426
|
for(j = 0; j < i; ++j){
|
406
|
-
rb_ary_push(row,
|
427
|
+
rb_ary_push(row, DBL2NUM(distances[i][j]));
|
407
428
|
}
|
408
429
|
|
409
430
|
// first row is NULL
|
@@ -482,7 +503,7 @@ VALUE rbcluster_clusterdistance(int argc, VALUE* argv, VALUE self) {
|
|
482
503
|
rbcluster_free_rows(rows, nrows);
|
483
504
|
rbcluster_free_mask(mask, nrows);
|
484
505
|
|
485
|
-
return
|
506
|
+
return DBL2NUM(result);
|
486
507
|
}
|
487
508
|
|
488
509
|
VALUE rbcluster_create_node(Node* node) {
|
@@ -500,6 +521,10 @@ VALUE rbcluster_node_initialize(int argc, VALUE* argv, VALUE self) {
|
|
500
521
|
|
501
522
|
rb_scan_args(argc, argv, "21", &left, &right, &distance);
|
502
523
|
|
524
|
+
if(NIL_P(distance)) {
|
525
|
+
distance = DBL2NUM(0.0);
|
526
|
+
}
|
527
|
+
|
503
528
|
rb_ivar_set(self, rb_intern("@left"), left);
|
504
529
|
rb_ivar_set(self, rb_intern("@right"), right);
|
505
530
|
rb_ivar_set(self, rb_intern("@distance"), distance);
|
@@ -586,7 +611,8 @@ VALUE rbcluster_treecluster(int argc, VALUE* argv, VALUE self) {
|
|
586
611
|
rbcluster_free_rows(rows, nrows);
|
587
612
|
rbcluster_free_mask(mask, nrows);
|
588
613
|
|
589
|
-
|
614
|
+
VALUE args[] = { result };
|
615
|
+
return rb_class_new_instance(1, args, rbcluster_cTree);
|
590
616
|
}
|
591
617
|
|
592
618
|
VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
|
@@ -648,7 +674,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
|
|
648
674
|
for(j = 0; j < nygrid; ++j) {
|
649
675
|
jarr = rb_ary_new2(ncols);
|
650
676
|
for(k = 0; k < ncols; ++k) {
|
651
|
-
rb_ary_push(jarr,
|
677
|
+
rb_ary_push(jarr, DBL2NUM(celldata[i][j][k]));
|
652
678
|
}
|
653
679
|
rb_ary_push(iarr, jarr);
|
654
680
|
}
|
@@ -672,7 +698,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
|
|
672
698
|
return rb_ary_new3(2, rb_clusterid, rb_celldata);
|
673
699
|
}
|
674
700
|
|
675
|
-
void
|
701
|
+
void rbcluster_print_doubles(double* vals, int len) {
|
676
702
|
puts("[");
|
677
703
|
for(int i = 0; i < len; ++i) {
|
678
704
|
printf("\t%d: %f\n", i, vals[i]);
|
@@ -680,7 +706,7 @@ void print_doubles(double* vals, int len) {
|
|
680
706
|
puts("]");
|
681
707
|
}
|
682
708
|
|
683
|
-
void
|
709
|
+
void rbcluster_print_double_matrix(double** vals, int nrows, int ncols) {
|
684
710
|
puts("[");
|
685
711
|
for(int i = 0; i < nrows; ++i) {
|
686
712
|
printf("\t[ ");
|
@@ -724,7 +750,7 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
|
|
724
750
|
|
725
751
|
int ok = pca(nrows, ncols, u, v, w);
|
726
752
|
if(ok == -1) {
|
727
|
-
rb_raise(
|
753
|
+
rb_raise(rb_eNoMemError, "could not allocate memory");
|
728
754
|
} else if(ok > 0) {
|
729
755
|
rb_raise(rb_eStandardError, "svd failed to converge");
|
730
756
|
}
|
@@ -751,9 +777,37 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
|
|
751
777
|
return rb_ary_new3(4, mean, coordinates, pc, eigenvalues);
|
752
778
|
}
|
753
779
|
|
780
|
+
VALUE rbcluster_cuttree(VALUE self, VALUE nodes, VALUE clusters) {
|
781
|
+
int nelements, nclusters;
|
782
|
+
|
783
|
+
nclusters = NUM2INT(clusters);
|
784
|
+
|
785
|
+
Node* cnodes = rbcluster_ary_to_nodes(nodes, &nelements);
|
786
|
+
int n = nelements + 1;
|
787
|
+
|
788
|
+
if(nclusters < 1) {
|
789
|
+
rb_raise(rb_eArgError, "nclusters must be >= 1");
|
790
|
+
}
|
791
|
+
|
792
|
+
if(nclusters > n) {
|
793
|
+
rb_raise(rb_eArgError, "more clusters requested than items available");
|
794
|
+
}
|
795
|
+
|
796
|
+
int clusterid[n];
|
797
|
+
cuttree(n, cnodes, nclusters, clusterid);
|
798
|
+
free(cnodes);
|
799
|
+
|
800
|
+
if(clusterid[0] == -1) {
|
801
|
+
rb_raise(rb_eNoMemError, "could not allocate memory for cuttree()");
|
802
|
+
}
|
803
|
+
|
804
|
+
return rbcluster_ints2rb(clusterid, (long)n);
|
805
|
+
}
|
806
|
+
|
754
807
|
void Init_rbcluster() {
|
755
808
|
rbcluster_mCluster = rb_define_module("Cluster");
|
756
809
|
rbcluster_cNode = rb_define_class_under(rbcluster_mCluster, "Node", rb_cObject);
|
810
|
+
rbcluster_cTree = rb_define_class_under(rbcluster_mCluster, "Tree", rb_cObject);
|
757
811
|
|
758
812
|
rb_define_attr(rbcluster_cNode, "left", 1, 1);
|
759
813
|
rb_define_attr(rbcluster_cNode, "right", 1, 1);
|
@@ -770,6 +824,7 @@ void Init_rbcluster() {
|
|
770
824
|
rb_define_singleton_method(rbcluster_mCluster, "treecluster", rbcluster_treecluster, -1);
|
771
825
|
rb_define_singleton_method(rbcluster_mCluster, "somcluster", rbcluster_somcluster, -1);
|
772
826
|
rb_define_singleton_method(rbcluster_mCluster, "pca", rbcluster_pca, 1);
|
827
|
+
rb_define_singleton_method(rbcluster_mCluster, "cuttree", rbcluster_cuttree, 2);
|
773
828
|
|
774
829
|
rb_define_const(rbcluster_mCluster, "C_VERSION", rb_str_new2(CLUSTERVERSION));
|
775
830
|
}
|
data/lib/rbcluster.rb
CHANGED
data/lib/rbcluster/tree.rb
CHANGED
@@ -1,20 +1,47 @@
|
|
1
1
|
module Cluster
|
2
2
|
class Tree
|
3
3
|
def initialize(nodes)
|
4
|
-
|
5
|
-
|
6
|
-
nodes.each_with_index do |node, idx|
|
4
|
+
@nodes = Array(nodes)
|
5
|
+
@nodes.each_with_index do |node, idx|
|
7
6
|
unless node.kind_of?(Node)
|
8
7
|
raise ArgumentError, "expected #{Node.class}, got #{node.class} at index #{idx}"
|
9
8
|
end
|
10
9
|
end
|
11
10
|
|
12
|
-
@nodes = nodes
|
13
11
|
end
|
14
12
|
|
15
13
|
def size
|
16
14
|
@nodes.size
|
17
15
|
end
|
18
16
|
|
17
|
+
def to_a
|
18
|
+
@nodes.dup
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
@nodes.map { |e| "#{e}\n" }.join
|
23
|
+
end
|
24
|
+
|
25
|
+
def [](idx)
|
26
|
+
@nodes[idx]
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch(idx, &blk)
|
30
|
+
@nodes.fetch(idx, &blk)
|
31
|
+
end
|
32
|
+
|
33
|
+
def scale
|
34
|
+
max = @nodes.map { |e| e.distance }.max
|
35
|
+
@nodes.each do |node|
|
36
|
+
node.distance = node.distance /= max
|
37
|
+
end
|
38
|
+
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def cut(nclusters)
|
43
|
+
Cluster.cuttree(@nodes, nclusters)
|
44
|
+
end
|
45
|
+
|
19
46
|
end
|
20
47
|
end
|
data/lib/rbcluster/version.rb
CHANGED
data/rbcluster.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.platform = Gem::Platform::RUBY
|
9
9
|
s.authors = ["Jari Bakken", "Michiel Jan Laurens de Hoon"]
|
10
10
|
s.email = ["jari.bakken@gmail.com"]
|
11
|
-
s.homepage = "http://
|
11
|
+
s.homepage = "http://github.com/jarib/rbcluster"
|
12
12
|
s.summary = %q{Ruby bindings for the Cluster C library}
|
13
13
|
s.description = %q{This gem provides a Ruby extension to the clustering routines in the C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).}
|
14
14
|
|
data/spec/node_spec.rb
CHANGED
data/spec/tree_spec.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Cluster
|
4
|
+
describe Tree do
|
5
|
+
context "creating" do
|
6
|
+
it "should raise ArgumentError if not given an array of Nodes" do
|
7
|
+
lambda { Tree.new(1) }.should raise_error(ArgumentError)
|
8
|
+
lambda { Tree.new([Node.new(1,2), Node.new(2,3), nil]) }.should raise_error(ArgumentError)
|
9
|
+
lambda { Tree.new }.should raise_error(ArgumentError)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "returns a Tree instance when given an array of nodes" do
|
13
|
+
Tree.new([Node.new(1, 2)]).should be_kind_of(Tree)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "using" do
|
18
|
+
let :tree do
|
19
|
+
Cluster.treecluster([
|
20
|
+
[ 1.1, 2.2, 3.3, 4.4, 5.5],
|
21
|
+
[ 3.1, 3.2, 1.3, 2.4, 1.5],
|
22
|
+
[ 4.1, 2.2, 0.3, 5.4, 0.5],
|
23
|
+
[ 12.1, 2.0, 0.0, 5.0, 0.0]
|
24
|
+
])
|
25
|
+
end
|
26
|
+
|
27
|
+
it "fetches a copy of the node array" do
|
28
|
+
arr = tree.to_a
|
29
|
+
arr.should be_kind_of(Array)
|
30
|
+
arr.size.should == 3
|
31
|
+
arr.clear
|
32
|
+
|
33
|
+
tree.size.should == 3
|
34
|
+
end
|
35
|
+
|
36
|
+
it "has a string representation" do
|
37
|
+
tree.to_s.should include('(2, 1): 2.6')
|
38
|
+
end
|
39
|
+
|
40
|
+
it "can scale the tree" do
|
41
|
+
tree.scale
|
42
|
+
tree.to_a.each { |n| n.distance.should be_between(0, 1) }
|
43
|
+
end
|
44
|
+
|
45
|
+
it "can cut the tree" do
|
46
|
+
tree.cut(3).should == [1, 2, 2, 0]
|
47
|
+
end
|
48
|
+
|
49
|
+
it "gets a node" do
|
50
|
+
tree[0].should == tree.to_a[0]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "fetches a node" do
|
54
|
+
tree.fetch(0).should == tree.to_a[0]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbcluster
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -14,7 +14,7 @@ date: 2012-01-03 00:00:00.000000000Z
|
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rake-compiler
|
17
|
-
requirement: &
|
17
|
+
requirement: &2157667380 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2157667380
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: rspec
|
28
|
-
requirement: &
|
28
|
+
requirement: &2157666880 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 2.6.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *2157666880
|
37
37
|
description: This gem provides a Ruby extension to the clustering routines in the
|
38
38
|
C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).
|
39
39
|
email:
|
@@ -55,13 +55,13 @@ files:
|
|
55
55
|
- ext/rbcluster/extconf.rb
|
56
56
|
- ext/rbcluster/rbcluster.c
|
57
57
|
- lib/rbcluster.rb
|
58
|
+
- lib/rbcluster/node.rb
|
58
59
|
- lib/rbcluster/tree.rb
|
59
60
|
- lib/rbcluster/version.rb
|
60
61
|
- rbcluster.gemspec
|
61
62
|
- spec/clustercentroids_spec.rb
|
62
63
|
- spec/clusterdistance_spec.rb
|
63
64
|
- spec/clustermedoids_spec.rb
|
64
|
-
- spec/cuttree_spec.rb
|
65
65
|
- spec/kcluster_spec.rb
|
66
66
|
- spec/kmedoids_spec.rb
|
67
67
|
- spec/median_mean_spec.rb
|
@@ -69,8 +69,9 @@ files:
|
|
69
69
|
- spec/pca_spec.rb
|
70
70
|
- spec/somcluster_spec.rb
|
71
71
|
- spec/spec_helper.rb
|
72
|
+
- spec/tree_spec.rb
|
72
73
|
- spec/treecluster_spec.rb
|
73
|
-
homepage: http://
|
74
|
+
homepage: http://github.com/jarib/rbcluster
|
74
75
|
licenses: []
|
75
76
|
post_install_message:
|
76
77
|
rdoc_options: []
|
@@ -98,7 +99,6 @@ test_files:
|
|
98
99
|
- spec/clustercentroids_spec.rb
|
99
100
|
- spec/clusterdistance_spec.rb
|
100
101
|
- spec/clustermedoids_spec.rb
|
101
|
-
- spec/cuttree_spec.rb
|
102
102
|
- spec/kcluster_spec.rb
|
103
103
|
- spec/kmedoids_spec.rb
|
104
104
|
- spec/median_mean_spec.rb
|
@@ -106,5 +106,6 @@ test_files:
|
|
106
106
|
- spec/pca_spec.rb
|
107
107
|
- spec/somcluster_spec.rb
|
108
108
|
- spec/spec_helper.rb
|
109
|
+
- spec/tree_spec.rb
|
109
110
|
- spec/treecluster_spec.rb
|
110
111
|
has_rdoc:
|