rbcluster 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -3,6 +3,9 @@ rbcluster
3
3
 
4
4
  Ruby bindings to the Cluster C library.
5
5
 
6
+ [![Build Status](https://secure.travis-ci.org/jarib/rbcluster.png)](http://travis-ci.org/jarib/rbcluster)
7
+
8
+
6
9
  TODO
7
10
  ----
8
11
 
@@ -10,13 +13,12 @@ Functions:
10
13
 
11
14
  * Cluster.clustercentroids
12
15
  * Cluster.clustermedoids
13
- * Cluster::Tree#{cut,slice,[],fetch}
16
+ * Cluster::Tree#slice
14
17
 
15
18
  Other:
16
19
 
17
- * an examples/ folder
18
20
  * make :transpose work
19
- * specs for bad inputs
21
+ * more specs for bad inputs
20
22
 
21
23
  DONE
22
24
  ----
@@ -36,6 +38,7 @@ See also
36
38
 
37
39
  * http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
38
40
  * http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster.pdf
41
+ * http://github.com/jarib/rbcluster
39
42
 
40
43
  Note on Patches/Pull Requests
41
44
  -----------------------------
@@ -10,6 +10,7 @@
10
10
 
11
11
  VALUE rbcluster_mCluster = Qnil;
12
12
  VALUE rbcluster_cNode = Qnil;
13
+ VALUE rbcluster_cTree = Qnil;
13
14
 
14
15
  VALUE rbcluster_rows2rb(double** data, int nrows, int ncols) {
15
16
  VALUE rows = rb_ary_new2((long)nrows);
@@ -42,6 +43,26 @@ double* rbcluster_ary_to_doubles(VALUE data, int len) {
42
43
  return result;
43
44
  }
44
45
 
46
+ Node* rbcluster_ary_to_nodes(VALUE data, int* len) {
47
+ Check_Type(data, T_ARRAY);
48
+
49
+ long length = RARRAY_LEN(data);
50
+
51
+ Node* result = (Node*)malloc(length*sizeof(Node));
52
+
53
+ for(int i = 0; i < length; ++i)
54
+ {
55
+ VALUE node = rb_ary_entry(data, i);
56
+
57
+ result[i].left = NUM2INT(rb_ivar_get(node, rb_intern("@left")));
58
+ result[i].right = NUM2INT(rb_ivar_get(node, rb_intern("@right")));
59
+ result[i].distance = NUM2DBL(rb_ivar_get(node, rb_intern("@distance")));
60
+ }
61
+
62
+ *len = (int)length;
63
+ return result;
64
+ }
65
+
45
66
  double** rbcluster_ary_to_rows(VALUE data, int* nrows, int* ncols) {
46
67
  Check_Type(data, T_ARRAY);
47
68
  long rows, cols;
@@ -265,7 +286,7 @@ VALUE rbcluster_kcluster(int argc, VALUE* argv, VALUE self) {
265
286
  free(weight);
266
287
  free(clusterid);
267
288
 
268
- return rb_ary_new3(3, result, rb_float_new(error), INT2NUM(ifound));
289
+ return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
269
290
  }
270
291
 
271
292
  VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
@@ -329,7 +350,7 @@ VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
329
350
  free(clusterid);
330
351
  for(i = 1; i < nitems; ++i) free(distances[i]);
331
352
 
332
- return rb_ary_new3(3, result, rb_float_new(error), INT2NUM(ifound));
353
+ return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
333
354
  }
334
355
 
335
356
  VALUE rbcluster_median(VALUE self, VALUE ary) {
@@ -345,7 +366,7 @@ VALUE rbcluster_median(VALUE self, VALUE ary) {
345
366
  arr[i] = NUM2DBL(num);
346
367
  }
347
368
 
348
- return rb_float_new(median((int)len, arr));
369
+ return DBL2NUM(median((int)len, arr));
349
370
  }
350
371
 
351
372
  VALUE rbcluster_mean(VALUE self, VALUE ary) {
@@ -361,7 +382,7 @@ VALUE rbcluster_mean(VALUE self, VALUE ary) {
361
382
  arr[i] = NUM2DBL(num);
362
383
  }
363
384
 
364
- return rb_float_new(mean((int)len, arr));
385
+ return DBL2NUM(mean((int)len, arr));
365
386
  }
366
387
 
367
388
  VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
@@ -403,7 +424,7 @@ VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
403
424
  VALUE row = rb_ary_new();
404
425
 
405
426
  for(j = 0; j < i; ++j){
406
- rb_ary_push(row, rb_float_new(distances[i][j]));
427
+ rb_ary_push(row, DBL2NUM(distances[i][j]));
407
428
  }
408
429
 
409
430
  // first row is NULL
@@ -482,7 +503,7 @@ VALUE rbcluster_clusterdistance(int argc, VALUE* argv, VALUE self) {
482
503
  rbcluster_free_rows(rows, nrows);
483
504
  rbcluster_free_mask(mask, nrows);
484
505
 
485
- return rb_float_new(result);
506
+ return DBL2NUM(result);
486
507
  }
487
508
 
488
509
  VALUE rbcluster_create_node(Node* node) {
@@ -500,6 +521,10 @@ VALUE rbcluster_node_initialize(int argc, VALUE* argv, VALUE self) {
500
521
 
501
522
  rb_scan_args(argc, argv, "21", &left, &right, &distance);
502
523
 
524
+ if(NIL_P(distance)) {
525
+ distance = DBL2NUM(0.0);
526
+ }
527
+
503
528
  rb_ivar_set(self, rb_intern("@left"), left);
504
529
  rb_ivar_set(self, rb_intern("@right"), right);
505
530
  rb_ivar_set(self, rb_intern("@distance"), distance);
@@ -586,7 +611,8 @@ VALUE rbcluster_treecluster(int argc, VALUE* argv, VALUE self) {
586
611
  rbcluster_free_rows(rows, nrows);
587
612
  rbcluster_free_mask(mask, nrows);
588
613
 
589
- return result;
614
+ VALUE args[] = { result };
615
+ return rb_class_new_instance(1, args, rbcluster_cTree);
590
616
  }
591
617
 
592
618
  VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
@@ -648,7 +674,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
648
674
  for(j = 0; j < nygrid; ++j) {
649
675
  jarr = rb_ary_new2(ncols);
650
676
  for(k = 0; k < ncols; ++k) {
651
- rb_ary_push(jarr, rb_float_new(celldata[i][j][k]));
677
+ rb_ary_push(jarr, DBL2NUM(celldata[i][j][k]));
652
678
  }
653
679
  rb_ary_push(iarr, jarr);
654
680
  }
@@ -672,7 +698,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
672
698
  return rb_ary_new3(2, rb_clusterid, rb_celldata);
673
699
  }
674
700
 
675
- void print_doubles(double* vals, int len) {
701
+ void rbcluster_print_doubles(double* vals, int len) {
676
702
  puts("[");
677
703
  for(int i = 0; i < len; ++i) {
678
704
  printf("\t%d: %f\n", i, vals[i]);
@@ -680,7 +706,7 @@ void print_doubles(double* vals, int len) {
680
706
  puts("]");
681
707
  }
682
708
 
683
- void print_double_matrix(double** vals, int nrows, int ncols) {
709
+ void rbcluster_print_double_matrix(double** vals, int nrows, int ncols) {
684
710
  puts("[");
685
711
  for(int i = 0; i < nrows; ++i) {
686
712
  printf("\t[ ");
@@ -724,7 +750,7 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
724
750
 
725
751
  int ok = pca(nrows, ncols, u, v, w);
726
752
  if(ok == -1) {
727
- rb_raise(rb_eStandardError, "could not allocate memory");
753
+ rb_raise(rb_eNoMemError, "could not allocate memory");
728
754
  } else if(ok > 0) {
729
755
  rb_raise(rb_eStandardError, "svd failed to converge");
730
756
  }
@@ -751,9 +777,37 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
751
777
  return rb_ary_new3(4, mean, coordinates, pc, eigenvalues);
752
778
  }
753
779
 
780
+ VALUE rbcluster_cuttree(VALUE self, VALUE nodes, VALUE clusters) {
781
+ int nelements, nclusters;
782
+
783
+ nclusters = NUM2INT(clusters);
784
+
785
+ Node* cnodes = rbcluster_ary_to_nodes(nodes, &nelements);
786
+ int n = nelements + 1;
787
+
788
+ if(nclusters < 1) {
789
+ rb_raise(rb_eArgError, "nclusters must be >= 1");
790
+ }
791
+
792
+ if(nclusters > n) {
793
+ rb_raise(rb_eArgError, "more clusters requested than items available");
794
+ }
795
+
796
+ int clusterid[n];
797
+ cuttree(n, cnodes, nclusters, clusterid);
798
+ free(cnodes);
799
+
800
+ if(clusterid[0] == -1) {
801
+ rb_raise(rb_eNoMemError, "could not allocate memory for cuttree()");
802
+ }
803
+
804
+ return rbcluster_ints2rb(clusterid, (long)n);
805
+ }
806
+
754
807
  void Init_rbcluster() {
755
808
  rbcluster_mCluster = rb_define_module("Cluster");
756
809
  rbcluster_cNode = rb_define_class_under(rbcluster_mCluster, "Node", rb_cObject);
810
+ rbcluster_cTree = rb_define_class_under(rbcluster_mCluster, "Tree", rb_cObject);
757
811
 
758
812
  rb_define_attr(rbcluster_cNode, "left", 1, 1);
759
813
  rb_define_attr(rbcluster_cNode, "right", 1, 1);
@@ -770,6 +824,7 @@ void Init_rbcluster() {
770
824
  rb_define_singleton_method(rbcluster_mCluster, "treecluster", rbcluster_treecluster, -1);
771
825
  rb_define_singleton_method(rbcluster_mCluster, "somcluster", rbcluster_somcluster, -1);
772
826
  rb_define_singleton_method(rbcluster_mCluster, "pca", rbcluster_pca, 1);
827
+ rb_define_singleton_method(rbcluster_mCluster, "cuttree", rbcluster_cuttree, 2);
773
828
 
774
829
  rb_define_const(rbcluster_mCluster, "C_VERSION", rb_str_new2(CLUSTERVERSION));
775
830
  }
@@ -1,5 +1,6 @@
1
1
  require 'rbcluster/version'
2
2
  require 'rbcluster/rbcluster'
3
3
  require 'rbcluster/tree'
4
+ require 'rbcluster/node'
4
5
 
5
6
  RbCluster = Cluster
@@ -0,0 +1,7 @@
1
+ module Cluster
2
+ class Node
3
+ def to_s
4
+ "(#{@left}, #{@right}): #{@distance}"
5
+ end
6
+ end
7
+ end
@@ -1,20 +1,47 @@
1
1
  module Cluster
2
2
  class Tree
3
3
  def initialize(nodes)
4
- raise NotImplementedError, "patches welcome :)"
5
-
6
- nodes.each_with_index do |node, idx|
4
+ @nodes = Array(nodes)
5
+ @nodes.each_with_index do |node, idx|
7
6
  unless node.kind_of?(Node)
8
7
  raise ArgumentError, "expected #{Node.class}, got #{node.class} at index #{idx}"
9
8
  end
10
9
  end
11
10
 
12
- @nodes = nodes
13
11
  end
14
12
 
15
13
  def size
16
14
  @nodes.size
17
15
  end
18
16
 
17
+ def to_a
18
+ @nodes.dup
19
+ end
20
+
21
+ def to_s
22
+ @nodes.map { |e| "#{e}\n" }.join
23
+ end
24
+
25
+ def [](idx)
26
+ @nodes[idx]
27
+ end
28
+
29
+ def fetch(idx, &blk)
30
+ @nodes.fetch(idx, &blk)
31
+ end
32
+
33
+ def scale
34
+ max = @nodes.map { |e| e.distance }.max
35
+ @nodes.each do |node|
36
+ node.distance = node.distance /= max
37
+ end
38
+
39
+ nil
40
+ end
41
+
42
+ def cut(nclusters)
43
+ Cluster.cuttree(@nodes, nclusters)
44
+ end
45
+
19
46
  end
20
47
  end
@@ -1,3 +1,3 @@
1
1
  module Cluster
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
8
8
  s.platform = Gem::Platform::RUBY
9
9
  s.authors = ["Jari Bakken", "Michiel Jan Laurens de Hoon"]
10
10
  s.email = ["jari.bakken@gmail.com"]
11
- s.homepage = "http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm"
11
+ s.homepage = "http://github.com/jarib/rbcluster"
12
12
  s.summary = %q{Ruby bindings for the Cluster C library}
13
13
  s.description = %q{This gem provides a Ruby extension to the clustering routines in the C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).}
14
14
 
@@ -6,6 +6,7 @@ module Cluster
6
6
  n = Node.new(2, 3)
7
7
  n.left.should == 2
8
8
  n.right.should == 3
9
+ n.distance.should == 0.0
9
10
  end
10
11
 
11
12
  it "takes an optional distance" do
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ module Cluster
4
+ describe Tree do
5
+ context "creating" do
6
+ it "should raise ArgumentError if not given an array of Nodes" do
7
+ lambda { Tree.new(1) }.should raise_error(ArgumentError)
8
+ lambda { Tree.new([Node.new(1,2), Node.new(2,3), nil]) }.should raise_error(ArgumentError)
9
+ lambda { Tree.new }.should raise_error(ArgumentError)
10
+ end
11
+
12
+ it "returns a Tree instance when given an array of nodes" do
13
+ Tree.new([Node.new(1, 2)]).should be_kind_of(Tree)
14
+ end
15
+ end
16
+
17
+ context "using" do
18
+ let :tree do
19
+ Cluster.treecluster([
20
+ [ 1.1, 2.2, 3.3, 4.4, 5.5],
21
+ [ 3.1, 3.2, 1.3, 2.4, 1.5],
22
+ [ 4.1, 2.2, 0.3, 5.4, 0.5],
23
+ [ 12.1, 2.0, 0.0, 5.0, 0.0]
24
+ ])
25
+ end
26
+
27
+ it "fetches a copy of the node array" do
28
+ arr = tree.to_a
29
+ arr.should be_kind_of(Array)
30
+ arr.size.should == 3
31
+ arr.clear
32
+
33
+ tree.size.should == 3
34
+ end
35
+
36
+ it "has a string representation" do
37
+ tree.to_s.should include('(2, 1): 2.6')
38
+ end
39
+
40
+ it "can scale the tree" do
41
+ tree.scale
42
+ tree.to_a.each { |n| n.distance.should be_between(0, 1) }
43
+ end
44
+
45
+ it "can cut the tree" do
46
+ tree.cut(3).should == [1, 2, 2, 0]
47
+ end
48
+
49
+ it "gets a node" do
50
+ tree[0].should == tree.to_a[0]
51
+ end
52
+
53
+ it "fetches a node" do
54
+ tree.fetch(0).should == tree.to_a[0]
55
+ end
56
+ end
57
+ end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbcluster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -14,7 +14,7 @@ date: 2012-01-03 00:00:00.000000000Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake-compiler
17
- requirement: &2154075120 !ruby/object:Gem::Requirement
17
+ requirement: &2157667380 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :development
24
24
  prerelease: false
25
- version_requirements: *2154075120
25
+ version_requirements: *2157667380
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: rspec
28
- requirement: &2154074620 !ruby/object:Gem::Requirement
28
+ requirement: &2157666880 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: 2.6.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *2154074620
36
+ version_requirements: *2157666880
37
37
  description: This gem provides a Ruby extension to the clustering routines in the
38
38
  C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).
39
39
  email:
@@ -55,13 +55,13 @@ files:
55
55
  - ext/rbcluster/extconf.rb
56
56
  - ext/rbcluster/rbcluster.c
57
57
  - lib/rbcluster.rb
58
+ - lib/rbcluster/node.rb
58
59
  - lib/rbcluster/tree.rb
59
60
  - lib/rbcluster/version.rb
60
61
  - rbcluster.gemspec
61
62
  - spec/clustercentroids_spec.rb
62
63
  - spec/clusterdistance_spec.rb
63
64
  - spec/clustermedoids_spec.rb
64
- - spec/cuttree_spec.rb
65
65
  - spec/kcluster_spec.rb
66
66
  - spec/kmedoids_spec.rb
67
67
  - spec/median_mean_spec.rb
@@ -69,8 +69,9 @@ files:
69
69
  - spec/pca_spec.rb
70
70
  - spec/somcluster_spec.rb
71
71
  - spec/spec_helper.rb
72
+ - spec/tree_spec.rb
72
73
  - spec/treecluster_spec.rb
73
- homepage: http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
74
+ homepage: http://github.com/jarib/rbcluster
74
75
  licenses: []
75
76
  post_install_message:
76
77
  rdoc_options: []
@@ -98,7 +99,6 @@ test_files:
98
99
  - spec/clustercentroids_spec.rb
99
100
  - spec/clusterdistance_spec.rb
100
101
  - spec/clustermedoids_spec.rb
101
- - spec/cuttree_spec.rb
102
102
  - spec/kcluster_spec.rb
103
103
  - spec/kmedoids_spec.rb
104
104
  - spec/median_mean_spec.rb
@@ -106,5 +106,6 @@ test_files:
106
106
  - spec/pca_spec.rb
107
107
  - spec/somcluster_spec.rb
108
108
  - spec/spec_helper.rb
109
+ - spec/tree_spec.rb
109
110
  - spec/treecluster_spec.rb
110
111
  has_rdoc:
@@ -1,6 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe "Cluster.cuttree" do
4
- pending
5
- end
6
-