flock 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,6 +1,6 @@
1
1
  = Flock
2
2
 
3
- Ruby bindings to Cluster 3.0, http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
3
+ Ruby bindings to {Cluster 3.0}[http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm]
4
4
 
5
5
  == Description
6
6
 
@@ -8,12 +8,14 @@ Provides bindings to K-Means clustering in Cluster 3.0
8
8
 
9
9
  == Synopsis
10
10
 
11
+ === Numeric Data
12
+
13
+ require 'pp'
11
14
  require 'flock'
12
15
 
13
16
  data = Array.new(13) {[]}
14
17
  mask = Array.new(13) {[]}
15
18
  weights = Array.new(13) {1.0}
16
- clusters = Array.new(13)
17
19
 
18
20
  data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
19
21
  data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
@@ -43,7 +45,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
43
45
  mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
44
46
  mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
45
47
 
46
- p Flock.kmeans(6, data, mask)
48
+ pp Flock.kmeans(6, data, mask)
47
49
 
48
50
  # method:
49
51
  # - Flock::METHOD_AVERAGE (default)
@@ -57,7 +59,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
57
59
  # - Flock::METRIC_ABSOLUTE_UNCENTERED_CORRELATION
58
60
  # - Flock::METRIC_SPEARMAN
59
61
  # - Flock::METRIC_KENDALL
60
- Flock.kmeans(
62
+ pp Flock.kmeans(
61
63
  6,
62
64
  data,
63
65
  mask,
@@ -67,6 +69,32 @@ Provides bindings to K-Means clustering in Cluster 3.0
67
69
  weights: Array.new(13) {1.0},
68
70
  )
69
71
 
72
+
73
+ === Sparse and Non-Numeric data
74
+
75
+ require 'pp'
76
+ require 'flock'
77
+
78
+ data = []
79
+ data << { apple: 1, orange: 1 }
80
+ data << { black: 1, white: 1 }
81
+ data << { white: 1, cyan: 1 }
82
+ data << { orange: 1 }
83
+ data << { apple: 1 }
84
+
85
+ pp Flock.sparse_kmeans(2, data)
86
+
87
+ # or even more simply (defaults to 1)
88
+
89
+ data = []
90
+ data << %w(apple orange)
91
+ data << %w(black white)
92
+ data << %w(white cyan)
93
+ data << %w(orange)
94
+ data << %w(apple)
95
+
96
+ pp Flock.sparse_kmeans(2, data)
97
+
70
98
  == TODO
71
99
 
72
100
  Bindings to,
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.1
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
+ require 'pp'
3
4
  require 'flock'
4
5
 
5
6
  data = Array.new(13) {[]}
6
7
  mask = Array.new(13) {[]}
7
8
  weights = Array.new(13) {1.0}
8
- clusters = Array.new(13)
9
9
 
10
10
  data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
11
11
  data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
@@ -35,5 +35,4 @@ mask[10][ 0]=1; mask[10][ 1]=1; mask[10][ 2]=1; mask[10][ 3] = 1;
35
35
  mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
36
36
  mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
37
37
 
38
- require 'pp'
39
38
  pp Flock.kmeans(6, data, mask)
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'pp'
4
+ require 'flock'
5
+
6
+ data = []
7
+ data << { apple: 1, orange: 1 }
8
+ data << { black: 1, white: 1 }
9
+ data << { white: 1, cyan: 1 }
10
+ data << { orange: 1 }
11
+ data << { apple: 1 }
12
+
13
+ pp Flock.sparse_kmeans(2, data)
14
+
15
+ data = []
16
+ data << %w(apple orange)
17
+ data << %w(black white)
18
+ data << %w(white cyan)
19
+ data << %w(orange)
20
+ data << %w(apple)
21
+
22
+ pp Flock.sparse_kmeans(2, data)
data/ext/flock.c CHANGED
@@ -15,11 +15,21 @@ int opt_int_value(VALUE option, char *key, int def) {
15
15
 
16
16
  VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
17
17
  VALUE size, data, mask, weights, options;
18
- rb_scan_args(argc, argv, "31", &size, &data, &mask, &options);
18
+ rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
19
+
20
+ if (TYPE(data) != T_ARRAY)
21
+ rb_raise(rb_eArgError, "data should be an array of arrays");
22
+
23
+ if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
24
+ rb_raise(rb_eArgError, "mask should be an array of arrays");
25
+
26
+ if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
27
+ rb_raise(rb_eArgError, "size should be > 0 and <= data size");
19
28
 
20
29
  int i,j;
21
30
  int nrows = RARRAY_LEN(data);
22
31
  int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
32
+ int nsets = NUM2INT(rb_Integer(size));
23
33
 
24
34
  double **cdata = (double**)malloc(sizeof(double*)*nrows);
25
35
  int **cmask = (int **)malloc(sizeof(int *)*nrows);
@@ -34,14 +44,14 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
34
44
  ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
35
45
  ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
36
46
  for (j = 0; j < ncols; j++) {
37
- cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
38
- cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
47
+ cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
48
+ cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
39
49
  }
40
50
  }
41
51
 
42
52
  weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
43
53
  for (i = 0; i < ncols; i++) {
44
- cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
54
+ cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
45
55
  }
46
56
 
47
57
  int transpose = opt_int_value(options, "transpose", 0);
@@ -60,10 +70,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
60
70
 
61
71
  int ifound;
62
72
  double error;
63
- kcluster(NUM2INT(size),
73
+ kcluster(nsets,
64
74
  nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
65
75
 
66
- getclustercentroids(NUM2INT(size),
76
+ getclustercentroids(nsets,
67
77
  nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
68
78
 
69
79
  VALUE result = rb_hash_new();
@@ -71,7 +81,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
71
81
  VALUE centroid = rb_ary_new();
72
82
 
73
83
  for (i = 0; i < nrows; i++) {
74
- rb_ary_push(cluster, INT2NUM(ccluster[i]));
84
+ rb_ary_push(cluster, INT2NUM(ccluster[i]));
75
85
  VALUE point = rb_ary_new();
76
86
  for (j = 0; j < ncols; j++)
77
87
  rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
data/flock.gemspec ADDED
@@ -0,0 +1,50 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{flock}
8
+ s.version = "0.2.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Bharanee Rathna"]
12
+ s.date = %q{2011-02-19}
13
+ s.description = %q{A thin ruby binding to Cluster 3.0}
14
+ s.email = ["deepfryed@gmail.com"]
15
+ s.extensions = ["ext/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ "README.rdoc",
21
+ "Rakefile",
22
+ "VERSION",
23
+ "ext/cluster.c",
24
+ "ext/cluster.h",
25
+ "ext/extconf.rb",
26
+ "ext/flock.c",
27
+ "flock.gemspec",
28
+ "lib/flock.rb"
29
+ ]
30
+ s.homepage = %q{http://github.com/deepfryed/flock}
31
+ s.rdoc_options = ["--charset=UTF-8"]
32
+ s.require_paths = ["lib"]
33
+ s.rubygems_version = %q{1.3.7}
34
+ s.summary = %q{Ruby bindings to Cluster 3.0.}
35
+ s.test_files = [
36
+ "examples/sparse.rb",
37
+ "examples/dense.rb"
38
+ ]
39
+
40
+ if s.respond_to? :specification_version then
41
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ else
46
+ end
47
+ else
48
+ end
49
+ end
50
+
data/lib/flock.rb ADDED
@@ -0,0 +1,35 @@
1
+ require_relative '../ext/flock'
2
+ module Flock
3
+
4
+ def self.sparse_hash_to_data sparse_data
5
+ dims = Hash[sparse_data.map(&:keys).flatten.uniq.map.with_index{|k,v| [k,v]}]
6
+ data = sparse_data.map do |sv|
7
+ vector = Array.new(dims.size) {0}
8
+ sv.each {|k,v| vector[dims[k]] = v }
9
+ vector
10
+ end
11
+ [dims,data]
12
+ end
13
+
14
+ def self.sparse_array_to_data sparse_data
15
+ dims = Hash[sparse_data.flatten.uniq.map.with_index{|k,v| [k,v]}]
16
+ data = sparse_data.map do |sv|
17
+ vector = Array.new(dims.size) {0}
18
+ sv.each {|k| vector[dims[k]] = 1 }
19
+ vector
20
+ end
21
+ [dims,data]
22
+ end
23
+
24
+ def self.sparse_kmeans size, sparse_data, options={}
25
+ dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
26
+
27
+ if options.key?(:weights)
28
+ weights = Array.new(dims.size) {1}
29
+ options[:weights].each {|k,v| weights[dims[k]] = v }
30
+ options[:weights] = weights
31
+ end
32
+
33
+ kmeans(size, data, nil, options)
34
+ end
35
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
+ - 2
7
8
  - 1
8
- - 0
9
- version: 0.1.0
9
+ version: 0.2.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Bharanee Rathna
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-18 00:00:00 +11:00
17
+ date: 2011-02-19 00:00:00 +11:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -35,7 +35,10 @@ files:
35
35
  - ext/cluster.h
36
36
  - ext/extconf.rb
37
37
  - ext/flock.c
38
- - examples/example.rb
38
+ - flock.gemspec
39
+ - lib/flock.rb
40
+ - examples/sparse.rb
41
+ - examples/dense.rb
39
42
  has_rdoc: true
40
43
  homepage: http://github.com/deepfryed/flock
41
44
  licenses: []
@@ -69,4 +72,5 @@ signing_key:
69
72
  specification_version: 3
70
73
  summary: Ruby bindings to Cluster 3.0.
71
74
  test_files:
72
- - examples/example.rb
75
+ - examples/sparse.rb
76
+ - examples/dense.rb