rbcluster 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -3,6 +3,9 @@ rbcluster
3
3
 
4
4
  Ruby bindings to the Cluster C library.
5
5
 
6
+ [![Build Status](https://secure.travis-ci.org/jarib/rbcluster.png)](http://travis-ci.org/jarib/rbcluster)
7
+
8
+
6
9
  TODO
7
10
  ----
8
11
 
@@ -10,13 +13,12 @@ Functions:
10
13
 
11
14
  * Cluster.clustercentroids
12
15
  * Cluster.clustermedoids
13
- * Cluster::Tree#{cut,slice,[],fetch}
16
+ * Cluster::Tree#slice
14
17
 
15
18
  Other:
16
19
 
17
- * an examples/ folder
18
20
  * make :transpose work
19
- * specs for bad inputs
21
+ * more specs for bad inputs
20
22
 
21
23
  DONE
22
24
  ----
@@ -36,6 +38,7 @@ See also
36
38
 
37
39
  * http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
38
40
  * http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster.pdf
41
+ * http://github.com/jarib/rbcluster
39
42
 
40
43
  Note on Patches/Pull Requests
41
44
  -----------------------------
@@ -10,6 +10,7 @@
10
10
 
11
11
  VALUE rbcluster_mCluster = Qnil;
12
12
  VALUE rbcluster_cNode = Qnil;
13
+ VALUE rbcluster_cTree = Qnil;
13
14
 
14
15
  VALUE rbcluster_rows2rb(double** data, int nrows, int ncols) {
15
16
  VALUE rows = rb_ary_new2((long)nrows);
@@ -42,6 +43,26 @@ double* rbcluster_ary_to_doubles(VALUE data, int len) {
42
43
  return result;
43
44
  }
44
45
 
46
+ Node* rbcluster_ary_to_nodes(VALUE data, int* len) {
47
+ Check_Type(data, T_ARRAY);
48
+
49
+ long length = RARRAY_LEN(data);
50
+
51
+ Node* result = (Node*)malloc(length*sizeof(Node));
52
+
53
+ for(int i = 0; i < length; ++i)
54
+ {
55
+ VALUE node = rb_ary_entry(data, i);
56
+
57
+ result[i].left = NUM2INT(rb_ivar_get(node, rb_intern("@left")));
58
+ result[i].right = NUM2INT(rb_ivar_get(node, rb_intern("@right")));
59
+ result[i].distance = NUM2DBL(rb_ivar_get(node, rb_intern("@distance")));
60
+ }
61
+
62
+ *len = (int)length;
63
+ return result;
64
+ }
65
+
45
66
  double** rbcluster_ary_to_rows(VALUE data, int* nrows, int* ncols) {
46
67
  Check_Type(data, T_ARRAY);
47
68
  long rows, cols;
@@ -265,7 +286,7 @@ VALUE rbcluster_kcluster(int argc, VALUE* argv, VALUE self) {
265
286
  free(weight);
266
287
  free(clusterid);
267
288
 
268
- return rb_ary_new3(3, result, rb_float_new(error), INT2NUM(ifound));
289
+ return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
269
290
  }
270
291
 
271
292
  VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
@@ -329,7 +350,7 @@ VALUE rbcluster_kmedoids(int argc, VALUE* argv, VALUE self) {
329
350
  free(clusterid);
330
351
  for(i = 1; i < nitems; ++i) free(distances[i]);
331
352
 
332
- return rb_ary_new3(3, result, rb_float_new(error), INT2NUM(ifound));
353
+ return rb_ary_new3(3, result, DBL2NUM(error), INT2NUM(ifound));
333
354
  }
334
355
 
335
356
  VALUE rbcluster_median(VALUE self, VALUE ary) {
@@ -345,7 +366,7 @@ VALUE rbcluster_median(VALUE self, VALUE ary) {
345
366
  arr[i] = NUM2DBL(num);
346
367
  }
347
368
 
348
- return rb_float_new(median((int)len, arr));
369
+ return DBL2NUM(median((int)len, arr));
349
370
  }
350
371
 
351
372
  VALUE rbcluster_mean(VALUE self, VALUE ary) {
@@ -361,7 +382,7 @@ VALUE rbcluster_mean(VALUE self, VALUE ary) {
361
382
  arr[i] = NUM2DBL(num);
362
383
  }
363
384
 
364
- return rb_float_new(mean((int)len, arr));
385
+ return DBL2NUM(mean((int)len, arr));
365
386
  }
366
387
 
367
388
  VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
@@ -403,7 +424,7 @@ VALUE rbcluster_distancematrix(int argc, VALUE* argv, VALUE self) {
403
424
  VALUE row = rb_ary_new();
404
425
 
405
426
  for(j = 0; j < i; ++j){
406
- rb_ary_push(row, rb_float_new(distances[i][j]));
427
+ rb_ary_push(row, DBL2NUM(distances[i][j]));
407
428
  }
408
429
 
409
430
  // first row is NULL
@@ -482,7 +503,7 @@ VALUE rbcluster_clusterdistance(int argc, VALUE* argv, VALUE self) {
482
503
  rbcluster_free_rows(rows, nrows);
483
504
  rbcluster_free_mask(mask, nrows);
484
505
 
485
- return rb_float_new(result);
506
+ return DBL2NUM(result);
486
507
  }
487
508
 
488
509
  VALUE rbcluster_create_node(Node* node) {
@@ -500,6 +521,10 @@ VALUE rbcluster_node_initialize(int argc, VALUE* argv, VALUE self) {
500
521
 
501
522
  rb_scan_args(argc, argv, "21", &left, &right, &distance);
502
523
 
524
+ if(NIL_P(distance)) {
525
+ distance = DBL2NUM(0.0);
526
+ }
527
+
503
528
  rb_ivar_set(self, rb_intern("@left"), left);
504
529
  rb_ivar_set(self, rb_intern("@right"), right);
505
530
  rb_ivar_set(self, rb_intern("@distance"), distance);
@@ -586,7 +611,8 @@ VALUE rbcluster_treecluster(int argc, VALUE* argv, VALUE self) {
586
611
  rbcluster_free_rows(rows, nrows);
587
612
  rbcluster_free_mask(mask, nrows);
588
613
 
589
- return result;
614
+ VALUE args[] = { result };
615
+ return rb_class_new_instance(1, args, rbcluster_cTree);
590
616
  }
591
617
 
592
618
  VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
@@ -648,7 +674,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
648
674
  for(j = 0; j < nygrid; ++j) {
649
675
  jarr = rb_ary_new2(ncols);
650
676
  for(k = 0; k < ncols; ++k) {
651
- rb_ary_push(jarr, rb_float_new(celldata[i][j][k]));
677
+ rb_ary_push(jarr, DBL2NUM(celldata[i][j][k]));
652
678
  }
653
679
  rb_ary_push(iarr, jarr);
654
680
  }
@@ -672,7 +698,7 @@ VALUE rbcluster_somcluster(int argc, VALUE* argv, VALUE self) {
672
698
  return rb_ary_new3(2, rb_clusterid, rb_celldata);
673
699
  }
674
700
 
675
- void print_doubles(double* vals, int len) {
701
+ void rbcluster_print_doubles(double* vals, int len) {
676
702
  puts("[");
677
703
  for(int i = 0; i < len; ++i) {
678
704
  printf("\t%d: %f\n", i, vals[i]);
@@ -680,7 +706,7 @@ void print_doubles(double* vals, int len) {
680
706
  puts("]");
681
707
  }
682
708
 
683
- void print_double_matrix(double** vals, int nrows, int ncols) {
709
+ void rbcluster_print_double_matrix(double** vals, int nrows, int ncols) {
684
710
  puts("[");
685
711
  for(int i = 0; i < nrows; ++i) {
686
712
  printf("\t[ ");
@@ -724,7 +750,7 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
724
750
 
725
751
  int ok = pca(nrows, ncols, u, v, w);
726
752
  if(ok == -1) {
727
- rb_raise(rb_eStandardError, "could not allocate memory");
753
+ rb_raise(rb_eNoMemError, "could not allocate memory");
728
754
  } else if(ok > 0) {
729
755
  rb_raise(rb_eStandardError, "svd failed to converge");
730
756
  }
@@ -751,9 +777,37 @@ VALUE rbcluster_pca(VALUE self, VALUE data) {
751
777
  return rb_ary_new3(4, mean, coordinates, pc, eigenvalues);
752
778
  }
753
779
 
780
+ VALUE rbcluster_cuttree(VALUE self, VALUE nodes, VALUE clusters) {
781
+ int nelements, nclusters;
782
+
783
+ nclusters = NUM2INT(clusters);
784
+
785
+ Node* cnodes = rbcluster_ary_to_nodes(nodes, &nelements);
786
+ int n = nelements + 1;
787
+
788
+ if(nclusters < 1) {
789
+ rb_raise(rb_eArgError, "nclusters must be >= 1");
790
+ }
791
+
792
+ if(nclusters > n) {
793
+ rb_raise(rb_eArgError, "more clusters requested than items available");
794
+ }
795
+
796
+ int clusterid[n];
797
+ cuttree(n, cnodes, nclusters, clusterid);
798
+ free(cnodes);
799
+
800
+ if(clusterid[0] == -1) {
801
+ rb_raise(rb_eNoMemError, "could not allocate memory for cuttree()");
802
+ }
803
+
804
+ return rbcluster_ints2rb(clusterid, (long)n);
805
+ }
806
+
754
807
  void Init_rbcluster() {
755
808
  rbcluster_mCluster = rb_define_module("Cluster");
756
809
  rbcluster_cNode = rb_define_class_under(rbcluster_mCluster, "Node", rb_cObject);
810
+ rbcluster_cTree = rb_define_class_under(rbcluster_mCluster, "Tree", rb_cObject);
757
811
 
758
812
  rb_define_attr(rbcluster_cNode, "left", 1, 1);
759
813
  rb_define_attr(rbcluster_cNode, "right", 1, 1);
@@ -770,6 +824,7 @@ void Init_rbcluster() {
770
824
  rb_define_singleton_method(rbcluster_mCluster, "treecluster", rbcluster_treecluster, -1);
771
825
  rb_define_singleton_method(rbcluster_mCluster, "somcluster", rbcluster_somcluster, -1);
772
826
  rb_define_singleton_method(rbcluster_mCluster, "pca", rbcluster_pca, 1);
827
+ rb_define_singleton_method(rbcluster_mCluster, "cuttree", rbcluster_cuttree, 2);
773
828
 
774
829
  rb_define_const(rbcluster_mCluster, "C_VERSION", rb_str_new2(CLUSTERVERSION));
775
830
  }
@@ -1,5 +1,6 @@
1
1
  require 'rbcluster/version'
2
2
  require 'rbcluster/rbcluster'
3
3
  require 'rbcluster/tree'
4
+ require 'rbcluster/node'
4
5
 
5
6
  RbCluster = Cluster
@@ -0,0 +1,7 @@
1
+ module Cluster
2
+ class Node
3
+ def to_s
4
+ "(#{@left}, #{@right}): #{@distance}"
5
+ end
6
+ end
7
+ end
@@ -1,20 +1,47 @@
1
1
  module Cluster
2
2
  class Tree
3
3
  def initialize(nodes)
4
- raise NotImplementedError, "patches welcome :)"
5
-
6
- nodes.each_with_index do |node, idx|
4
+ @nodes = Array(nodes)
5
+ @nodes.each_with_index do |node, idx|
7
6
  unless node.kind_of?(Node)
8
7
  raise ArgumentError, "expected #{Node.class}, got #{node.class} at index #{idx}"
9
8
  end
10
9
  end
11
10
 
12
- @nodes = nodes
13
11
  end
14
12
 
15
13
  def size
16
14
  @nodes.size
17
15
  end
18
16
 
17
+ def to_a
18
+ @nodes.dup
19
+ end
20
+
21
+ def to_s
22
+ @nodes.map { |e| "#{e}\n" }.join
23
+ end
24
+
25
+ def [](idx)
26
+ @nodes[idx]
27
+ end
28
+
29
+ def fetch(idx, &blk)
30
+ @nodes.fetch(idx, &blk)
31
+ end
32
+
33
+ def scale
34
+ max = @nodes.map { |e| e.distance }.max
35
+ @nodes.each do |node|
36
+ node.distance = node.distance /= max
37
+ end
38
+
39
+ nil
40
+ end
41
+
42
+ def cut(nclusters)
43
+ Cluster.cuttree(@nodes, nclusters)
44
+ end
45
+
19
46
  end
20
47
  end
@@ -1,3 +1,3 @@
1
1
  module Cluster
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
8
8
  s.platform = Gem::Platform::RUBY
9
9
  s.authors = ["Jari Bakken", "Michiel Jan Laurens de Hoon"]
10
10
  s.email = ["jari.bakken@gmail.com"]
11
- s.homepage = "http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm"
11
+ s.homepage = "http://github.com/jarib/rbcluster"
12
12
  s.summary = %q{Ruby bindings for the Cluster C library}
13
13
  s.description = %q{This gem provides a Ruby extension to the clustering routines in the C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).}
14
14
 
@@ -6,6 +6,7 @@ module Cluster
6
6
  n = Node.new(2, 3)
7
7
  n.left.should == 2
8
8
  n.right.should == 3
9
+ n.distance.should == 0.0
9
10
  end
10
11
 
11
12
  it "takes an optional distance" do
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ module Cluster
4
+ describe Tree do
5
+ context "creating" do
6
+ it "should raise ArgumentError if not given an array of Nodes" do
7
+ lambda { Tree.new(1) }.should raise_error(ArgumentError)
8
+ lambda { Tree.new([Node.new(1,2), Node.new(2,3), nil]) }.should raise_error(ArgumentError)
9
+ lambda { Tree.new }.should raise_error(ArgumentError)
10
+ end
11
+
12
+ it "returns a Tree instance when given an array of nodes" do
13
+ Tree.new([Node.new(1, 2)]).should be_kind_of(Tree)
14
+ end
15
+ end
16
+
17
+ context "using" do
18
+ let :tree do
19
+ Cluster.treecluster([
20
+ [ 1.1, 2.2, 3.3, 4.4, 5.5],
21
+ [ 3.1, 3.2, 1.3, 2.4, 1.5],
22
+ [ 4.1, 2.2, 0.3, 5.4, 0.5],
23
+ [ 12.1, 2.0, 0.0, 5.0, 0.0]
24
+ ])
25
+ end
26
+
27
+ it "fetches a copy of the node array" do
28
+ arr = tree.to_a
29
+ arr.should be_kind_of(Array)
30
+ arr.size.should == 3
31
+ arr.clear
32
+
33
+ tree.size.should == 3
34
+ end
35
+
36
+ it "has a string representation" do
37
+ tree.to_s.should include('(2, 1): 2.6')
38
+ end
39
+
40
+ it "can scale the tree" do
41
+ tree.scale
42
+ tree.to_a.each { |n| n.distance.should be_between(0, 1) }
43
+ end
44
+
45
+ it "can cut the tree" do
46
+ tree.cut(3).should == [1, 2, 2, 0]
47
+ end
48
+
49
+ it "gets a node" do
50
+ tree[0].should == tree.to_a[0]
51
+ end
52
+
53
+ it "fetches a node" do
54
+ tree.fetch(0).should == tree.to_a[0]
55
+ end
56
+ end
57
+ end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbcluster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -14,7 +14,7 @@ date: 2012-01-03 00:00:00.000000000Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake-compiler
17
- requirement: &2154075120 !ruby/object:Gem::Requirement
17
+ requirement: &2157667380 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :development
24
24
  prerelease: false
25
- version_requirements: *2154075120
25
+ version_requirements: *2157667380
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: rspec
28
- requirement: &2154074620 !ruby/object:Gem::Requirement
28
+ requirement: &2157666880 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: 2.6.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *2154074620
36
+ version_requirements: *2157666880
37
37
  description: This gem provides a Ruby extension to the clustering routines in the
38
38
  C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).
39
39
  email:
@@ -55,13 +55,13 @@ files:
55
55
  - ext/rbcluster/extconf.rb
56
56
  - ext/rbcluster/rbcluster.c
57
57
  - lib/rbcluster.rb
58
+ - lib/rbcluster/node.rb
58
59
  - lib/rbcluster/tree.rb
59
60
  - lib/rbcluster/version.rb
60
61
  - rbcluster.gemspec
61
62
  - spec/clustercentroids_spec.rb
62
63
  - spec/clusterdistance_spec.rb
63
64
  - spec/clustermedoids_spec.rb
64
- - spec/cuttree_spec.rb
65
65
  - spec/kcluster_spec.rb
66
66
  - spec/kmedoids_spec.rb
67
67
  - spec/median_mean_spec.rb
@@ -69,8 +69,9 @@ files:
69
69
  - spec/pca_spec.rb
70
70
  - spec/somcluster_spec.rb
71
71
  - spec/spec_helper.rb
72
+ - spec/tree_spec.rb
72
73
  - spec/treecluster_spec.rb
73
- homepage: http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
74
+ homepage: http://github.com/jarib/rbcluster
74
75
  licenses: []
75
76
  post_install_message:
76
77
  rdoc_options: []
@@ -98,7 +99,6 @@ test_files:
98
99
  - spec/clustercentroids_spec.rb
99
100
  - spec/clusterdistance_spec.rb
100
101
  - spec/clustermedoids_spec.rb
101
- - spec/cuttree_spec.rb
102
102
  - spec/kcluster_spec.rb
103
103
  - spec/kmedoids_spec.rb
104
104
  - spec/median_mean_spec.rb
@@ -106,5 +106,6 @@ test_files:
106
106
  - spec/pca_spec.rb
107
107
  - spec/somcluster_spec.rb
108
108
  - spec/spec_helper.rb
109
+ - spec/tree_spec.rb
109
110
  - spec/treecluster_spec.rb
110
111
  has_rdoc:
@@ -1,6 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe "Cluster.cuttree" do
4
- pending
5
- end
6
-