flock 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +32 -4
- data/VERSION +1 -1
- data/examples/{example.rb → dense.rb} +1 -2
- data/examples/sparse.rb +22 -0
- data/ext/flock.c +17 -7
- data/flock.gemspec +50 -0
- data/lib/flock.rb +35 -0
- metadata +9 -5
data/README.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= Flock
|
2
2
|
|
3
|
-
Ruby bindings to Cluster 3.0
|
3
|
+
Ruby bindings to {Cluster 3.0}[http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm]
|
4
4
|
|
5
5
|
== Description
|
6
6
|
|
@@ -8,12 +8,14 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
8
8
|
|
9
9
|
== Synopsis
|
10
10
|
|
11
|
+
=== Numeric Data
|
12
|
+
|
13
|
+
require 'pp'
|
11
14
|
require 'flock'
|
12
15
|
|
13
16
|
data = Array.new(13) {[]}
|
14
17
|
mask = Array.new(13) {[]}
|
15
18
|
weights = Array.new(13) {1.0}
|
16
|
-
clusters = Array.new(13)
|
17
19
|
|
18
20
|
data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
|
19
21
|
data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
|
@@ -43,7 +45,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
43
45
|
mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
|
44
46
|
mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
|
45
47
|
|
46
|
-
|
48
|
+
pp Flock.kmeans(6, data, mask)
|
47
49
|
|
48
50
|
# method:
|
49
51
|
# - Flock::METHOD_AVERAGE (default)
|
@@ -57,7 +59,7 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
57
59
|
# - Flock::METRIC_ABSOLUTE_UNCENTERED_CORRELATION
|
58
60
|
# - Flock::METRIC_SPEARMAN
|
59
61
|
# - Flock::METRIC_KENDALL
|
60
|
-
Flock.kmeans(
|
62
|
+
pp Flock.kmeans(
|
61
63
|
6,
|
62
64
|
data,
|
63
65
|
mask,
|
@@ -67,6 +69,32 @@ Provides bindings to K-Means clustering in Cluster 3.0
|
|
67
69
|
weights: Array.new(13) {1.0},
|
68
70
|
)
|
69
71
|
|
72
|
+
|
73
|
+
=== Sparse and Non-Numeric data
|
74
|
+
|
75
|
+
require 'pp'
|
76
|
+
require 'flock'
|
77
|
+
|
78
|
+
data = []
|
79
|
+
data << { apple: 1, orange: 1 }
|
80
|
+
data << { black: 1, white: 1 }
|
81
|
+
data << { white: 1, cyan: 1 }
|
82
|
+
data << { orange: 1 }
|
83
|
+
data << { apple: 1 }
|
84
|
+
|
85
|
+
pp Flock.sparse_kmeans(2, data)
|
86
|
+
|
87
|
+
# or even more simply (defaults to 1)
|
88
|
+
|
89
|
+
data = []
|
90
|
+
data << %w(apple orange)
|
91
|
+
data << %w(black white)
|
92
|
+
data << %w(white cyan)
|
93
|
+
data << %w(orange)
|
94
|
+
data << %w(apple)
|
95
|
+
|
96
|
+
pp Flock.sparse_kmeans(2, data)
|
97
|
+
|
70
98
|
== TODO
|
71
99
|
|
72
100
|
Bindings to,
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1
|
1
|
+
0.2.1
|
@@ -1,11 +1,11 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
+
require 'pp'
|
3
4
|
require 'flock'
|
4
5
|
|
5
6
|
data = Array.new(13) {[]}
|
6
7
|
mask = Array.new(13) {[]}
|
7
8
|
weights = Array.new(13) {1.0}
|
8
|
-
clusters = Array.new(13)
|
9
9
|
|
10
10
|
data[ 0][ 0]=0.1; data[ 0][ 1]=0.0; data[ 0][ 2]=9.6; data[ 0][ 3] = 5.6;
|
11
11
|
data[ 1][ 0]=1.4; data[ 1][ 1]=1.3; data[ 1][ 2]=0.0; data[ 1][ 3] = 3.8;
|
@@ -35,5 +35,4 @@ mask[10][ 0]=1; mask[10][ 1]=1; mask[10][ 2]=1; mask[10][ 3] = 1;
|
|
35
35
|
mask[11][ 0]=0; mask[11][ 1]=1; mask[11][ 2]=1; mask[11][ 3] = 1;
|
36
36
|
mask[12][ 0]=1; mask[12][ 1]=1; mask[12][ 2]=1; mask[12][ 3] = 1;
|
37
37
|
|
38
|
-
require 'pp'
|
39
38
|
pp Flock.kmeans(6, data, mask)
|
data/examples/sparse.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'flock'
|
5
|
+
|
6
|
+
data = []
|
7
|
+
data << { apple: 1, orange: 1 }
|
8
|
+
data << { black: 1, white: 1 }
|
9
|
+
data << { white: 1, cyan: 1 }
|
10
|
+
data << { orange: 1 }
|
11
|
+
data << { apple: 1 }
|
12
|
+
|
13
|
+
pp Flock.sparse_kmeans(2, data)
|
14
|
+
|
15
|
+
data = []
|
16
|
+
data << %w(apple orange)
|
17
|
+
data << %w(black white)
|
18
|
+
data << %w(white cyan)
|
19
|
+
data << %w(orange)
|
20
|
+
data << %w(apple)
|
21
|
+
|
22
|
+
pp Flock.sparse_kmeans(2, data)
|
data/ext/flock.c
CHANGED
@@ -15,11 +15,21 @@ int opt_int_value(VALUE option, char *key, int def) {
|
|
15
15
|
|
16
16
|
VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
17
17
|
VALUE size, data, mask, weights, options;
|
18
|
-
rb_scan_args(argc, argv, "
|
18
|
+
rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
|
19
|
+
|
20
|
+
if (TYPE(data) != T_ARRAY)
|
21
|
+
rb_raise(rb_eArgError, "data should be an array of arrays");
|
22
|
+
|
23
|
+
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
24
|
+
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
25
|
+
|
26
|
+
if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
|
27
|
+
rb_raise(rb_eArgError, "size should be > 0 and <= data size");
|
19
28
|
|
20
29
|
int i,j;
|
21
30
|
int nrows = RARRAY_LEN(data);
|
22
31
|
int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
|
32
|
+
int nsets = NUM2INT(rb_Integer(size));
|
23
33
|
|
24
34
|
double **cdata = (double**)malloc(sizeof(double*)*nrows);
|
25
35
|
int **cmask = (int **)malloc(sizeof(int *)*nrows);
|
@@ -34,14 +44,14 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
34
44
|
ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
|
35
45
|
ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
|
36
46
|
for (j = 0; j < ncols; j++) {
|
37
|
-
cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
|
38
|
-
cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
|
47
|
+
cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
|
48
|
+
cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
|
39
49
|
}
|
40
50
|
}
|
41
51
|
|
42
52
|
weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
|
43
53
|
for (i = 0; i < ncols; i++) {
|
44
|
-
cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
|
54
|
+
cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
|
45
55
|
}
|
46
56
|
|
47
57
|
int transpose = opt_int_value(options, "transpose", 0);
|
@@ -60,10 +70,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
60
70
|
|
61
71
|
int ifound;
|
62
72
|
double error;
|
63
|
-
kcluster(
|
73
|
+
kcluster(nsets,
|
64
74
|
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
|
65
75
|
|
66
|
-
getclustercentroids(
|
76
|
+
getclustercentroids(nsets,
|
67
77
|
nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
|
68
78
|
|
69
79
|
VALUE result = rb_hash_new();
|
@@ -71,7 +81,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
71
81
|
VALUE centroid = rb_ary_new();
|
72
82
|
|
73
83
|
for (i = 0; i < nrows; i++) {
|
74
|
-
rb_ary_push(cluster,
|
84
|
+
rb_ary_push(cluster, INT2NUM(ccluster[i]));
|
75
85
|
VALUE point = rb_ary_new();
|
76
86
|
for (j = 0; j < ncols; j++)
|
77
87
|
rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
|
data/flock.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{flock}
|
8
|
+
s.version = "0.2.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Bharanee Rathna"]
|
12
|
+
s.date = %q{2011-02-19}
|
13
|
+
s.description = %q{A thin ruby binding to Cluster 3.0}
|
14
|
+
s.email = ["deepfryed@gmail.com"]
|
15
|
+
s.extensions = ["ext/extconf.rb"]
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"README.rdoc",
|
21
|
+
"Rakefile",
|
22
|
+
"VERSION",
|
23
|
+
"ext/cluster.c",
|
24
|
+
"ext/cluster.h",
|
25
|
+
"ext/extconf.rb",
|
26
|
+
"ext/flock.c",
|
27
|
+
"flock.gemspec",
|
28
|
+
"lib/flock.rb"
|
29
|
+
]
|
30
|
+
s.homepage = %q{http://github.com/deepfryed/flock}
|
31
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
32
|
+
s.require_paths = ["lib"]
|
33
|
+
s.rubygems_version = %q{1.3.7}
|
34
|
+
s.summary = %q{Ruby bindings to Cluster 3.0.}
|
35
|
+
s.test_files = [
|
36
|
+
"examples/sparse.rb",
|
37
|
+
"examples/dense.rb"
|
38
|
+
]
|
39
|
+
|
40
|
+
if s.respond_to? :specification_version then
|
41
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
42
|
+
s.specification_version = 3
|
43
|
+
|
44
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
else
|
46
|
+
end
|
47
|
+
else
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
data/lib/flock.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative '../ext/flock'
|
2
|
+
module Flock
|
3
|
+
|
4
|
+
def self.sparse_hash_to_data sparse_data
|
5
|
+
dims = Hash[sparse_data.map(&:keys).flatten.uniq.map.with_index{|k,v| [k,v]}]
|
6
|
+
data = sparse_data.map do |sv|
|
7
|
+
vector = Array.new(dims.size) {0}
|
8
|
+
sv.each {|k,v| vector[dims[k]] = v }
|
9
|
+
vector
|
10
|
+
end
|
11
|
+
[dims,data]
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.sparse_array_to_data sparse_data
|
15
|
+
dims = Hash[sparse_data.flatten.uniq.map.with_index{|k,v| [k,v]}]
|
16
|
+
data = sparse_data.map do |sv|
|
17
|
+
vector = Array.new(dims.size) {0}
|
18
|
+
sv.each {|k| vector[dims[k]] = 1 }
|
19
|
+
vector
|
20
|
+
end
|
21
|
+
[dims,data]
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.sparse_kmeans size, sparse_data, options={}
|
25
|
+
dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
|
26
|
+
|
27
|
+
if options.key?(:weights)
|
28
|
+
weights = Array.new(dims.size) {1}
|
29
|
+
options[:weights].each {|k,v| weights[dims[k]] = v }
|
30
|
+
options[:weights] = weights
|
31
|
+
end
|
32
|
+
|
33
|
+
kmeans(size, data, nil, options)
|
34
|
+
end
|
35
|
+
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 2
|
7
8
|
- 1
|
8
|
-
|
9
|
-
version: 0.1.0
|
9
|
+
version: 0.2.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Bharanee Rathna
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-19 00:00:00 +11:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -35,7 +35,10 @@ files:
|
|
35
35
|
- ext/cluster.h
|
36
36
|
- ext/extconf.rb
|
37
37
|
- ext/flock.c
|
38
|
-
-
|
38
|
+
- flock.gemspec
|
39
|
+
- lib/flock.rb
|
40
|
+
- examples/sparse.rb
|
41
|
+
- examples/dense.rb
|
39
42
|
has_rdoc: true
|
40
43
|
homepage: http://github.com/deepfryed/flock
|
41
44
|
licenses: []
|
@@ -69,4 +72,5 @@ signing_key:
|
|
69
72
|
specification_version: 3
|
70
73
|
summary: Ruby bindings to Cluster 3.0.
|
71
74
|
test_files:
|
72
|
-
- examples/
|
75
|
+
- examples/sparse.rb
|
76
|
+
- examples/dense.rb
|