flock 0.1.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +32 -4
- data/VERSION +1 -1
- data/examples/{example.rb → dense.rb} +1 -2
- data/examples/sparse.rb +22 -0
- data/ext/flock.c +17 -7
- data/flock.gemspec +50 -0
- data/lib/flock.rb +35 -0
- metadata +9 -5
data/README.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= Flock
|
2
2
|
|
3
|
-
Ruby bindings to Cluster 3.0
|
3
|
+
Ruby bindings to {Cluster 3.0}[http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm]
|
4
4
|
|
5
5
|
== Description
|
6
6
|
|
@@ -8,12 +8,14 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
8
8
|
|
9
9
|
== Synopsis
|
10
10
|
|
11
|
+
=== Numeric Data
|
12
|
+
|
13
|
+
require 'pp'
|
11
14
|
require 'flock'
|
12
15
|
|
13
16
|
data = Array.new(13) {[]}
|
14
17
|
mask = Array.new(13) {[]}
|
15
18
|
weights = Array.new(13) {1.0}
|
16
|
-
clusters = Array.new(13)
|
17
19
|
|
18
20
|
data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
|
19
21
|
data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
|
@@ -43,7 +45,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
43
45
|
mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
|
44
46
|
mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
|
45
47
|
|
46
|
-
|
48
|
+
pp Flock.kmeans(6, data, mask)
|
47
49
|
|
48
50
|
# method:
|
49
51
|
# - Flock::METHOD_AVERAGE (default)
|
@@ -57,7 +59,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
57
59
|
# - Flock::METRIC_ABSOLUTE_UNCENTERED_CORRELATION
|
58
60
|
# - Flock::METRIC_SPEARMAN
|
59
61
|
# - Flock::METRIC_KENDALL
|
60
|
-
Flock.kmeans(
|
62
|
+
pp Flock.kmeans(
|
61
63
|
6,
|
62
64
|
data,
|
63
65
|
mask,
|
@@ -67,6 +69,32 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
67
69
|
weights: Array.new(13) {1.0},
|
68
70
|
)
|
69
71
|
|
72
|
+
|
73
|
+
=== Sparse and Non-Numeric data
|
74
|
+
|
75
|
+
require 'pp'
|
76
|
+
require 'flock'
|
77
|
+
|
78
|
+
data = []
|
79
|
+
data << { apple: 1, orange: 1 }
|
80
|
+
data << { black: 1, white: 1 }
|
81
|
+
data << { white: 1, cyan: 1 }
|
82
|
+
data << { orange: 1 }
|
83
|
+
data << { apple: 1 }
|
84
|
+
|
85
|
+
pp Flock.sparse_kmeans(2, data)
|
86
|
+
|
87
|
+
# or even more simply (defaults to 1)
|
88
|
+
|
89
|
+
data = []
|
90
|
+
data << %w(apple orange)
|
91
|
+
data << %w(black white)
|
92
|
+
data << %w(white cyan)
|
93
|
+
data << %w(orange)
|
94
|
+
data << %w(apple)
|
95
|
+
|
96
|
+
pp Flock.sparse_kmeans(2, data)
|
97
|
+
|
70
98
|
== TODO
|
71
99
|
|
72
100
|
Bindings to,
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1
|
1
|
+
0.2.1
|
@@ -1,11 +1,11 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
+
require 'pp'
|
3
4
|
require 'flock'
|
4
5
|
|
5
6
|
data = Array.new(13) {[]}
|
6
7
|
mask = Array.new(13) {[]}
|
7
8
|
weights = Array.new(13) {1.0}
|
8
|
-
clusters = Array.new(13)
|
9
9
|
|
10
10
|
data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
|
11
11
|
data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
|
@@ -35,5 +35,4 @@ mask[10][ 0]=1; mask[10][ 1]=1; mask[10][ 2]=1; mask[10][ 3] = 1;
|
|
35
35
|
mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
|
36
36
|
mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
|
37
37
|
|
38
|
-
require 'pp'
|
39
38
|
pp Flock.kmeans(6, data, mask)
|
data/examples/sparse.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'flock'
|
5
|
+
|
6
|
+
data = []
|
7
|
+
data << { apple: 1, orange: 1 }
|
8
|
+
data << { black: 1, white: 1 }
|
9
|
+
data << { white: 1, cyan: 1 }
|
10
|
+
data << { orange: 1 }
|
11
|
+
data << { apple: 1 }
|
12
|
+
|
13
|
+
pp Flock.sparse_kmeans(2, data)
|
14
|
+
|
15
|
+
data = []
|
16
|
+
data << %w(apple orange)
|
17
|
+
data << %w(black white)
|
18
|
+
data << %w(white cyan)
|
19
|
+
data << %w(orange)
|
20
|
+
data << %w(apple)
|
21
|
+
|
22
|
+
pp Flock.sparse_kmeans(2, data)
|
data/ext/flock.c
CHANGED
@@ -15,11 +15,21 @@ int opt_int_value(VALUE option, char *key, int def) {
|
|
15
15
|
|
16
16
|
VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
17
17
|
VALUE size, data, mask, weights, options;
|
18
|
-
rb_scan_args(argc, argv, "
|
18
|
+
rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
|
19
|
+
|
20
|
+
if (TYPE(data) != T_ARRAY)
|
21
|
+
rb_raise(rb_eArgError, "data should be an array of arrays");
|
22
|
+
|
23
|
+
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
24
|
+
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
25
|
+
|
26
|
+
if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
|
27
|
+
rb_raise(rb_eArgError, "size should be > 0 and <= data size");
|
19
28
|
|
20
29
|
int i,j;
|
21
30
|
int nrows = RARRAY_LEN(data);
|
22
31
|
int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
|
32
|
+
int nsets = NUM2INT(rb_Integer(size));
|
23
33
|
|
24
34
|
double **cdata = (double**)malloc(sizeof(double*)*nrows);
|
25
35
|
int **cmask = (int **)malloc(sizeof(int *)*nrows);
|
@@ -34,14 +44,14 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
34
44
|
ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
|
35
45
|
ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
|
36
46
|
for (j = 0; j < ncols; j++) {
|
37
|
-
cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
|
38
|
-
cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
|
47
|
+
cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
|
48
|
+
cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
|
39
49
|
}
|
40
50
|
}
|
41
51
|
|
42
52
|
weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
|
43
53
|
for (i = 0; i < ncols; i++) {
|
44
|
-
cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
|
54
|
+
cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
|
45
55
|
}
|
46
56
|
|
47
57
|
int transpose = opt_int_value(options, "transpose", 0);
|
@@ -60,10 +70,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
60
70
|
|
61
71
|
int ifound;
|
62
72
|
double error;
|
63
|
-
kcluster(
|
73
|
+
kcluster(nsets,
|
64
74
|
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
|
65
75
|
|
66
|
-
getclustercentroids(
|
76
|
+
getclustercentroids(nsets,
|
67
77
|
nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
|
68
78
|
|
69
79
|
VALUE result = rb_hash_new();
|
@@ -71,7 +81,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
71
81
|
VALUE centroid = rb_ary_new();
|
72
82
|
|
73
83
|
for (i = 0; i < nrows; i++) {
|
74
|
-
rb_ary_push(cluster,
|
84
|
+
rb_ary_push(cluster, INT2NUM(ccluster[i]));
|
75
85
|
VALUE point = rb_ary_new();
|
76
86
|
for (j = 0; j < ncols; j++)
|
77
87
|
rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
|
data/flock.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{flock}
|
8
|
+
s.version = "0.2.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Bharanee Rathna"]
|
12
|
+
s.date = %q{2011-02-19}
|
13
|
+
s.description = %q{A thin ruby binding to Cluster 3.0}
|
14
|
+
s.email = ["deepfryed@gmail.com"]
|
15
|
+
s.extensions = ["ext/extconf.rb"]
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"README.rdoc",
|
21
|
+
"Rakefile",
|
22
|
+
"VERSION",
|
23
|
+
"ext/cluster.c",
|
24
|
+
"ext/cluster.h",
|
25
|
+
"ext/extconf.rb",
|
26
|
+
"ext/flock.c",
|
27
|
+
"flock.gemspec",
|
28
|
+
"lib/flock.rb"
|
29
|
+
]
|
30
|
+
s.homepage = %q{http://github.com/deepfryed/flock}
|
31
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
32
|
+
s.require_paths = ["lib"]
|
33
|
+
s.rubygems_version = %q{1.3.7}
|
34
|
+
s.summary = %q{Ruby bindings to Cluster 3.0.}
|
35
|
+
s.test_files = [
|
36
|
+
"examples/sparse.rb",
|
37
|
+
"examples/dense.rb"
|
38
|
+
]
|
39
|
+
|
40
|
+
if s.respond_to? :specification_version then
|
41
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
42
|
+
s.specification_version = 3
|
43
|
+
|
44
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
else
|
46
|
+
end
|
47
|
+
else
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
data/lib/flock.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative '../ext/flock'
|
2
|
+
module Flock
|
3
|
+
|
4
|
+
def self.sparse_hash_to_data sparse_data
|
5
|
+
dims = Hash[sparse_data.map(&:keys).flatten.uniq.map.with_index{|k,v| [k,v]}]
|
6
|
+
data = sparse_data.map do |sv|
|
7
|
+
vector = Array.new(dims.size) {0}
|
8
|
+
sv.each {|k,v| vector[dims[k]] = v }
|
9
|
+
vector
|
10
|
+
end
|
11
|
+
[dims,data]
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.sparse_array_to_data sparse_data
|
15
|
+
dims = Hash[sparse_data.flatten.uniq.map.with_index{|k,v| [k,v]}]
|
16
|
+
data = sparse_data.map do |sv|
|
17
|
+
vector = Array.new(dims.size) {0}
|
18
|
+
sv.each {|k| vector[dims[k]] = 1 }
|
19
|
+
vector
|
20
|
+
end
|
21
|
+
[dims,data]
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.sparse_kmeans size, sparse_data, options={}
|
25
|
+
dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
|
26
|
+
|
27
|
+
if options.key?(:weights)
|
28
|
+
weights = Array.new(dims.size) {1}
|
29
|
+
options[:weights].each {|k,v| weights[dims[k]] = v }
|
30
|
+
options[:weights] = weights
|
31
|
+
end
|
32
|
+
|
33
|
+
kmeans(size, data, nil, options)
|
34
|
+
end
|
35
|
+
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 2
|
7
8
|
- 1
|
8
|
-
|
9
|
-
version: 0.1.0
|
9
|
+
version: 0.2.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Bharanee Rathna
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-19 00:00:00 +11:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -35,7 +35,10 @@ files:
|
|
35
35
|
- ext/cluster.h
|
36
36
|
- ext/extconf.rb
|
37
37
|
- ext/flock.c
|
38
|
-
-
|
38
|
+
- flock.gemspec
|
39
|
+
- lib/flock.rb
|
40
|
+
- examples/sparse.rb
|
41
|
+
- examples/dense.rb
|
39
42
|
has_rdoc: true
|
40
43
|
homepage: http://github.com/deepfryed/flock
|
41
44
|
licenses: []
|
@@ -69,4 +72,5 @@ signing_key:
|
|
69
72
|
specification_version: 3
|
70
73
|
summary: Ruby bindings to Cluster 3.0.
|
71
74
|
test_files:
|
72
|
-
- examples/
|
75
|
+
- examples/sparse.rb
|
76
|
+
- examples/dense.rb
|