flock 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,6 +1,6 @@
1
1
  = Flock
2
2
 
3
- Ruby bindings to Cluster 3.0, http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
3
+ Ruby bindings to {Cluster 3.0}[http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm]
4
4
 
5
5
  == Description
6
6
 
@@ -8,12 +8,14 @@ Provides bindings to K-Means clustering in Cluster 3.0
8
8
 
9
9
  == Synopsis
10
10
 
11
+ === Numeric Data
12
+
13
+ require 'pp'
11
14
  require 'flock'
12
15
 
13
16
  data = Array.new(13) {[]}
14
17
  mask = Array.new(13) {[]}
15
18
  weights = Array.new(13) {1.0}
16
- clusters = Array.new(13)
17
19
 
18
20
  data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
19
21
  data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
@@ -43,7 +45,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
43
45
  mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
44
46
  mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
45
47
 
46
- p Flock.kmeans(6, data, mask)
48
+ pp Flock.kmeans(6, data, mask)
47
49
 
48
50
  # method:
49
51
  # - Flock::METHOD_AVERAGE (default)
@@ -57,7 +59,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
57
59
  # - Flock::METRIC_ABSOLUTE_UNCENTERED_CORRELATION
58
60
  # - Flock::METRIC_SPEARMAN
59
61
  # - Flock::METRIC_KENDALL
60
- Flock.kmeans(
62
+ pp Flock.kmeans(
61
63
  6,
62
64
  data,
63
65
  mask,
@@ -67,6 +69,32 @@ Provides bindings to K-Means clustering in Cluster 3.0
67
69
  weights: Array.new(13) {1.0},
68
70
  )
69
71
 
72
+
73
+ === Sparse and Non-Numeric data
74
+
75
+ require 'pp'
76
+ require 'flock'
77
+
78
+ data = []
79
+ data << { apple: 1, orange: 1 }
80
+ data << { black: 1, white: 1 }
81
+ data << { white: 1, cyan: 1 }
82
+ data << { orange: 1 }
83
+ data << { apple: 1 }
84
+
85
+ pp Flock.sparse_kmeans(2, data)
86
+
87
+ # or even more simply (defaults to 1)
88
+
89
+ data = []
90
+ data << %w(apple orange)
91
+ data << %w(black white)
92
+ data << %w(white cyan)
93
+ data << %w(orange)
94
+ data << %w(apple)
95
+
96
+ pp Flock.sparse_kmeans(2, data)
97
+
70
98
  == TODO
71
99
 
72
100
  Bindings to,
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.1
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
+ require 'pp'
3
4
  require 'flock'
4
5
 
5
6
  data = Array.new(13) {[]}
6
7
  mask = Array.new(13) {[]}
7
8
  weights = Array.new(13) {1.0}
8
- clusters = Array.new(13)
9
9
 
10
10
  data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
11
11
  data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
@@ -35,5 +35,4 @@ mask[10][ 0]=1; mask[10][ 1]=1; mask[10][ 2]=1; mask[10][ 3] = 1;
35
35
  mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
36
36
  mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
37
37
 
38
- require 'pp'
39
38
  pp Flock.kmeans(6, data, mask)
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'pp'
4
+ require 'flock'
5
+
6
+ data = []
7
+ data << { apple: 1, orange: 1 }
8
+ data << { black: 1, white: 1 }
9
+ data << { white: 1, cyan: 1 }
10
+ data << { orange: 1 }
11
+ data << { apple: 1 }
12
+
13
+ pp Flock.sparse_kmeans(2, data)
14
+
15
+ data = []
16
+ data << %w(apple orange)
17
+ data << %w(black white)
18
+ data << %w(white cyan)
19
+ data << %w(orange)
20
+ data << %w(apple)
21
+
22
+ pp Flock.sparse_kmeans(2, data)
data/ext/flock.c CHANGED
@@ -15,11 +15,21 @@ int opt_int_value(VALUE option, char *key, int def) {
15
15
 
16
16
  VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
17
17
  VALUE size, data, mask, weights, options;
18
- rb_scan_args(argc, argv, "31", &size, &data, &mask, &options);
18
+ rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
19
+
20
+ if (TYPE(data) != T_ARRAY)
21
+ rb_raise(rb_eArgError, "data should be an array of arrays");
22
+
23
+ if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
24
+ rb_raise(rb_eArgError, "mask should be an array of arrays");
25
+
26
+ if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
27
+ rb_raise(rb_eArgError, "size should be > 0 and <= data size");
19
28
 
20
29
  int i,j;
21
30
  int nrows = RARRAY_LEN(data);
22
31
  int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
32
+ int nsets = NUM2INT(rb_Integer(size));
23
33
 
24
34
  double **cdata = (double**)malloc(sizeof(double*)*nrows);
25
35
  int **cmask = (int **)malloc(sizeof(int *)*nrows);
@@ -34,14 +44,14 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
34
44
  ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
35
45
  ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
36
46
  for (j = 0; j < ncols; j++) {
37
- cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
38
- cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
47
+ cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
48
+ cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
39
49
  }
40
50
  }
41
51
 
42
52
  weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
43
53
  for (i = 0; i < ncols; i++) {
44
- cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
54
+ cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
45
55
  }
46
56
 
47
57
  int transpose = opt_int_value(options, "transpose", 0);
@@ -60,10 +70,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
60
70
 
61
71
  int ifound;
62
72
  double error;
63
- kcluster(NUM2INT(size),
73
+ kcluster(nsets,
64
74
  nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
65
75
 
66
- getclustercentroids(NUM2INT(size),
76
+ getclustercentroids(nsets,
67
77
  nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
68
78
 
69
79
  VALUE result = rb_hash_new();
@@ -71,7 +81,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
71
81
  VALUE centroid = rb_ary_new();
72
82
 
73
83
  for (i = 0; i < nrows; i++) {
74
- rb_ary_push(cluster, INT2NUM(ccluster[i]));
84
+ rb_ary_push(cluster, INT2NUM(ccluster[i]));
75
85
  VALUE point = rb_ary_new();
76
86
  for (j = 0; j < ncols; j++)
77
87
  rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
data/flock.gemspec ADDED
@@ -0,0 +1,50 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{flock}
8
+ s.version = "0.2.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Bharanee Rathna"]
12
+ s.date = %q{2011-02-19}
13
+ s.description = %q{A thin ruby binding to Cluster 3.0}
14
+ s.email = ["deepfryed@gmail.com"]
15
+ s.extensions = ["ext/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ "README.rdoc",
21
+ "Rakefile",
22
+ "VERSION",
23
+ "ext/cluster.c",
24
+ "ext/cluster.h",
25
+ "ext/extconf.rb",
26
+ "ext/flock.c",
27
+ "flock.gemspec",
28
+ "lib/flock.rb"
29
+ ]
30
+ s.homepage = %q{http://github.com/deepfryed/flock}
31
+ s.rdoc_options = ["--charset=UTF-8"]
32
+ s.require_paths = ["lib"]
33
+ s.rubygems_version = %q{1.3.7}
34
+ s.summary = %q{Ruby bindings to Cluster 3.0.}
35
+ s.test_files = [
36
+ "examples/sparse.rb",
37
+ "examples/dense.rb"
38
+ ]
39
+
40
+ if s.respond_to? :specification_version then
41
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ else
46
+ end
47
+ else
48
+ end
49
+ end
50
+
data/lib/flock.rb ADDED
@@ -0,0 +1,35 @@
1
+ require_relative '../ext/flock'
2
+ module Flock
3
+
4
+ def self.sparse_hash_to_data sparse_data
5
+ dims = Hash[sparse_data.map(&:keys).flatten.uniq.map.with_index{|k,v| [k,v]}]
6
+ data = sparse_data.map do |sv|
7
+ vector = Array.new(dims.size) {0}
8
+ sv.each {|k,v| vector[dims[k]] = v }
9
+ vector
10
+ end
11
+ [dims,data]
12
+ end
13
+
14
+ def self.sparse_array_to_data sparse_data
15
+ dims = Hash[sparse_data.flatten.uniq.map.with_index{|k,v| [k,v]}]
16
+ data = sparse_data.map do |sv|
17
+ vector = Array.new(dims.size) {0}
18
+ sv.each {|k| vector[dims[k]] = 1 }
19
+ vector
20
+ end
21
+ [dims,data]
22
+ end
23
+
24
+ def self.sparse_kmeans size, sparse_data, options={}
25
+ dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
26
+
27
+ if options.key?(:weights)
28
+ weights = Array.new(dims.size) {1}
29
+ options[:weights].each {|k,v| weights[dims[k]] = v }
30
+ options[:weights] = weights
31
+ end
32
+
33
+ kmeans(size, data, nil, options)
34
+ end
35
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
+ - 2
7
8
  - 1
8
- - 0
9
- version: 0.1.0
9
+ version: 0.2.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Bharanee Rathna
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-18 00:00:00 +11:00
17
+ date: 2011-02-19 00:00:00 +11:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -35,7 +35,10 @@ files:
35
35
  - ext/cluster.h
36
36
  - ext/extconf.rb
37
37
  - ext/flock.c
38
- - examples/example.rb
38
+ - flock.gemspec
39
+ - lib/flock.rb
40
+ - examples/sparse.rb
41
+ - examples/dense.rb
39
42
  has_rdoc: true
40
43
  homepage: http://github.com/deepfryed/flock
41
44
  licenses: []
@@ -69,4 +72,5 @@ signing_key:
69
72
  specification_version: 3
70
73
  summary: Ruby bindings to Cluster 3.0.
71
74
  test_files:
72
- - examples/example.rb
75
+ - examples/sparse.rb
76
+ - examples/dense.rb