kmeans 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -48
- data/VERSION +1 -1
- data/doc/ChangeLog +11 -0
- data/examples/hcluster.rb +12 -0
- data/examples/kmeans.rb +43 -0
- data/kmeans.gemspec +6 -2
- data/lib/kmeans/hcluster.rb +82 -0
- data/lib/kmeans.rb +5 -9
- data/spec/lib/kmeans/hcluster_spec.rb +37 -0
- data/spec/lib/kmeans_spec.rb +1 -1
- metadata +6 -2
data/README.md
CHANGED
@@ -15,54 +15,7 @@ See also.
|
|
15
15
|
Tutorial
|
16
16
|
--------
|
17
17
|
|
18
|
-
|
19
|
-
require 'kmeans/pearson'
|
20
|
-
require 'kmeans/cluster'
|
21
|
-
data = {
|
22
|
-
"test01"=>
|
23
|
-
{"hoge"=>0,
|
24
|
-
"fuga"=>1,
|
25
|
-
"piyo"=>0
|
26
|
-
},
|
27
|
-
"test02"=>
|
28
|
-
{"hoge"=>2,
|
29
|
-
"fuga"=>1,
|
30
|
-
"piyo"=>3
|
31
|
-
},
|
32
|
-
"test03"=>
|
33
|
-
{"hoge"=>3,
|
34
|
-
"fuga"=>0,
|
35
|
-
"piyo"=>1
|
36
|
-
},
|
37
|
-
"test04"=>
|
38
|
-
{"hoge"=>0,
|
39
|
-
"fuga"=>2,
|
40
|
-
"piyo"=>0
|
41
|
-
},
|
42
|
-
"test05"=>
|
43
|
-
{"hoge"=>4,
|
44
|
-
"fuga"=>2,
|
45
|
-
"piyo"=>3
|
46
|
-
},
|
47
|
-
"test06"=>
|
48
|
-
{"hoge"=>3,
|
49
|
-
"fuga"=>1,
|
50
|
-
"piyo"=>1
|
51
|
-
},
|
52
|
-
} # 2-dimensional hash
|
53
|
-
cluster = Kmeans::Cluster.new(data, {
|
54
|
-
:centroids => 4,
|
55
|
-
:loop_max => 10
|
56
|
-
}) # Options can be omitted.
|
57
|
-
cluster.make_cluster
|
58
|
-
puts cluster.cluster # => {3=>["test01", "test04"], 1=>["test02"], 2=>["test03", "test05"], 0=>["test06"]}
|
59
|
-
# Result changes for each execution
|
60
|
-
|
61
|
-
|
62
|
-
Environment
|
63
|
-
-----------
|
64
|
-
|
65
|
-
After ruby 1.8.
|
18
|
+
See examples.
|
66
19
|
|
67
20
|
|
68
21
|
Development
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/doc/ChangeLog
CHANGED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
require 'kmeans/pearson'
|
5
|
+
require 'kmeans/hcluster'
|
6
|
+
|
7
|
+
blognames = ["The Superficial - Because You're Ugly", "Wonkette", "Publishing 2.0"]
|
8
|
+
data = [[0, 1, 0, 0, 3, 3, 0, 0, 3, 0, 6, 0, 1, 0, 4, 3, 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 5, 1, 0, 0, 3, 0, 0, 0, 0, 1, 5, 0, 0, 2, 1, 0, 1, 2, 1, 0, 0, 0, 5, 0, 3, 0, 0, 0, 1, 0, 2, 2, 0, 3, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 4, 0, 1, 0, 1, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 2, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 1, 2, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 21, 2, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 5, 0, 1, 0, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 24, 1, 0, 1, 0, 0, 0, 1, 9, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 4, 0, 1, 1, 0, 1, 2, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 1, 1, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 5, 1, 0, 0, 0, 1, 0, 0, 0, 2, 2, 0, 0, 1, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 4, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 0, 1, 5, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 1, 4, 0, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 2, 1, 3, 0, 0, 0, 0, 2, 0, 0, 2, 0, 9, 2, 0], [0, 2, 1, 0, 6, 2, 1, 0, 4, 5, 25, 0, 0, 0, 6, 12, 4, 2, 1, 4, 0, 3, 0, 1, 1, 3, 4, 0, 2, 0, 4, 3, 2, 1, 4, 0, 3, 3, 2, 12, 0, 1, 6, 4, 0, 0, 0, 0, 2, 1, 0, 0, 7, 2, 7, 2, 3, 5, 0, 0, 12, 0, 2, 10, 2, 2, 2, 0, 1, 9, 1, 0, 3, 1, 3, 5, 0, 6, 2, 2, 2, 1, 1, 0, 0, 2, 0, 0, 0, 2, 3, 2, 3, 5, 3, 6, 1, 2, 1, 4, 11, 0, 0, 4, 3, 1, 0, 3, 7, 2, 1, 1, 0, 8, 1, 6, 7, 0, 0, 0, 8, 5, 1, 0, 2, 2, 4, 0, 9, 0, 4, 2, 2, 2, 1, 0, 1, 0, 3, 1, 20, 6, 9, 1, 0, 2, 0, 9, 2, 5, 0, 1, 0, 0, 0, 1, 4, 13, 0, 2, 2, 0, 2, 1, 0, 0, 0, 6, 2, 1, 10, 1, 2, 3, 2, 7, 1, 2, 5, 1, 1, 1, 4, 0, 0, 6, 3, 7, 0, 2, 0, 3, 2, 2, 0, 4, 1, 1, 0, 16, 1, 2, 1, 3, 2, 7, 0, 0, 0, 3, 0, 5, 0, 1, 2, 4, 5, 3, 0, 4, 1, 16, 1, 2, 2, 1, 2, 5, 0, 3, 2, 0, 2, 0, 2, 1, 0, 3, 0, 2, 1, 4, 0, 6, 0, 3, 9, 2, 1, 2, 3, 4, 6, 1, 4, 1, 4, 2, 7, 0, 1, 0, 2, 4, 16, 2, 0, 5, 0, 1, 3, 0, 8, 1, 3, 7, 3, 2, 1, 2, 0, 4, 3, 0, 1, 2, 15, 10, 2, 2, 1, 6, 5, 1, 1, 1, 3, 0, 0, 8, 3, 1, 0, 0, 3, 6, 6, 1, 0, 1, 3, 2, 10, 0, 0, 0, 1, 1, 0, 5, 1, 2, 16, 2, 0, 2, 7, 2, 0, 6, 0, 4, 1, 0, 3, 1, 0, 2, 0, 0, 0, 3, 1, 1, 8, 0, 0, 2, 7, 1, 1, 5, 0, 1, 0, 0, 0, 7, 5, 1, 0, 2, 3, 5, 28, 0, 1, 1, 2, 5, 23, 2, 3, 0, 2, 0, 1, 2, 14, 0, 5, 0, 7, 3, 5, 0, 1, 3, 2, 5, 4, 0, 5, 0, 0, 3, 3, 1, 1, 1, 1, 0, 8, 0, 3, 3, 1, 9, 1, 3, 0, 1, 8, 0, 2, 1, 2, 4, 18, 9, 0, 2, 5, 4, 2, 0, 3, 0, 6, 0, 1, 0, 3, 1, 1, 3, 1, 0, 0, 8, 4, 12, 2, 2, 9, 3, 1, 2, 0, 0, 3, 0, 0, 1, 0, 3, 3, 2, 5, 0, 8, 6, 9, 10, 1, 5, 3, 2, 0, 1, 2, 3, 1, 2, 0, 0, 0, 4, 8, 3, 0, 0, 2, 0, 5, 3, 1, 2, 0, 2, 0, 1, 0, 2, 4, 11, 6, 1, 0, 6, 3, 0, 2, 0, 0, 4, 5, 0, 2, 0, 0, 5, 0, 0, 1, 0, 3, 0, 0, 1, 9, 6, 0, 9, 0, 3, 0, 1, 1, 3, 7, 1, 0, 3, 1, 7, 2, 9, 0, 0, 2, 1, 0, 3, 0, 8, 10, 6, 1, 2, 6, 5, 2, 3, 6, 0, 2, 0, 11, 7, 1, 3, 2, 3, 0, 1, 3, 0, 0, 0, 0, 5, 0, 6, 3, 4, 0, 1, 2, 0, 1, 1, 5, 5, 0, 3, 8, 6, 0, 1, 4, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 6, 4, 0, 0, 1, 2, 3, 0, 2, 2, 3, 1, 2, 4, 0, 1, 2, 2, 0, 1, 2, 6, 2, 4, 2, 3, 4, 2, 2, 0, 3, 2, 7, 3, 0, 1, 0, 1, 0, 4, 0, 0, 3, 0, 8, 0, 0, 7, 3, 1, 3, 2, 12, 0, 1, 2, 0, 1, 0, 2, 1, 3, 4, 3, 0, 5, 2, 0, 0, 2, 12, 1, 2, 4, 1, 2, 1, 0, 0, 2, 9, 1, 2, 1, 5, 3, 1, 5, 0, 2, 4, 0, 2, 0, 0, 4, 3, 19, 8, 7], [0, 0, 7, 4, 0, 1, 3, 6, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 4, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 1, 1, 1, 3, 0, 0, 13, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 0, 0, 1, 0, 0, 1, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 3, 2, 0, 0, 0, 0, 2, 1, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 1, 0, 6, 0, 0, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 3, 2, 0, 0, 3, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 0, 1, 1, 1, 3, 0, 1, 0, 0, 2, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 21, 0, 2, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 0, 0, 2, 1, 0, 0, 1, 9, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 7, 1, 1, 1, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 2, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 1, 1, 0, 0, 4, 0, 0, 0, 1, 1, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 1, 0, 2, 0, 2, 4, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 3, 0, 0, 1, 1, 0, 1, 3, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 3, 0, 6, 1, 0, 0, 1, 6, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 4, 0, 4, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 6, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 11, 0, 0, 0, 0, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 3, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0]]
|
9
|
+
|
10
|
+
cs = Kmeans::HCluster.new
|
11
|
+
clust = cs.hcluster(data)
|
12
|
+
print cs.printclust(clust, blognames)
|
data/examples/kmeans.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
require 'kmeans/pair'
|
5
|
+
require 'kmeans/pearson'
|
6
|
+
require 'kmeans/cluster'
|
7
|
+
|
8
|
+
testdata = {
|
9
|
+
"test01"=>
|
10
|
+
{"hoge"=>0,
|
11
|
+
"fuga"=>1,
|
12
|
+
"piyo"=>0
|
13
|
+
},
|
14
|
+
"test02"=>
|
15
|
+
{"hoge"=>2,
|
16
|
+
"fuga"=>1,
|
17
|
+
"piyo"=>3
|
18
|
+
},
|
19
|
+
"test03"=>
|
20
|
+
{"hoge"=>3,
|
21
|
+
"fuga"=>0,
|
22
|
+
"piyo"=>1
|
23
|
+
},
|
24
|
+
"test04"=>
|
25
|
+
{"hoge"=>0,
|
26
|
+
"fuga"=>2,
|
27
|
+
"piyo"=>0
|
28
|
+
},
|
29
|
+
"test05"=>
|
30
|
+
{"hoge"=>4,
|
31
|
+
"fuga"=>2,
|
32
|
+
"piyo"=>3
|
33
|
+
},
|
34
|
+
"test06"=>
|
35
|
+
{"hoge"=>3,
|
36
|
+
"fuga"=>1,
|
37
|
+
"piyo"=>1
|
38
|
+
},
|
39
|
+
}
|
40
|
+
|
41
|
+
cluster = Kmeans::Cluster.new(testdata, { :centroids => 3, :loop_max => 10 })
|
42
|
+
cluster.make_cluster
|
43
|
+
p cluster.cluster
|
data/kmeans.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "kmeans"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["id774"]
|
12
|
-
s.date = "2012-10-
|
12
|
+
s.date = "2012-10-11"
|
13
13
|
s.description = "K-means clustering"
|
14
14
|
s.email = "idnanashi@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,13 +26,17 @@ Gem::Specification.new do |s|
|
|
26
26
|
"doc/ChangeLog",
|
27
27
|
"doc/LICENSE",
|
28
28
|
"doc/README",
|
29
|
+
"examples/hcluster.rb",
|
30
|
+
"examples/kmeans.rb",
|
29
31
|
"kmeans.gemspec",
|
30
32
|
"lib/kmeans.rb",
|
31
33
|
"lib/kmeans/cluster.rb",
|
34
|
+
"lib/kmeans/hcluster.rb",
|
32
35
|
"lib/kmeans/pair.rb",
|
33
36
|
"lib/kmeans/pearson.rb",
|
34
37
|
"script/build",
|
35
38
|
"spec/lib/kmeans/cluster_spec.rb",
|
39
|
+
"spec/lib/kmeans/hcluster_spec.rb",
|
36
40
|
"spec/lib/kmeans/pair_spec.rb",
|
37
41
|
"spec/lib/kmeans/pearson_spec.rb",
|
38
42
|
"spec/lib/kmeans_spec.rb",
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module Kmeans
|
4
|
+
class Bicluster
|
5
|
+
def initialize(vec, left=nil, right=nil, distance=0.0, id=nil)
|
6
|
+
@left = left
|
7
|
+
@right = right
|
8
|
+
@vec = vec
|
9
|
+
@id = id
|
10
|
+
@distance = distance
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_accessor :left, :right, :vec, :id, :distance
|
14
|
+
end
|
15
|
+
|
16
|
+
class HCluster
|
17
|
+
def initialize
|
18
|
+
@out = ""
|
19
|
+
end
|
20
|
+
|
21
|
+
def printclust(clust, labels=nil, n=0)
|
22
|
+
n.times do
|
23
|
+
@out << " "
|
24
|
+
end
|
25
|
+
if clust.id < 0
|
26
|
+
@out << "-\n"
|
27
|
+
else
|
28
|
+
if labels == nil
|
29
|
+
@out << clust.id + "\n"
|
30
|
+
else
|
31
|
+
@out << labels[clust.id] + "\n"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
printclust(clust.left, labels, n+1) if clust.left != nil
|
35
|
+
printclust(clust.right, labels, n+1) if clust.right != nil
|
36
|
+
return @out
|
37
|
+
end
|
38
|
+
|
39
|
+
def hcluster(rows)
|
40
|
+
distances = Hash.new
|
41
|
+
currentclustid = -1
|
42
|
+
|
43
|
+
clust = Array.new
|
44
|
+
for i in 0...rows.length
|
45
|
+
c = Bicluster.new(rows[i])
|
46
|
+
c.id = i
|
47
|
+
clust.push(c)
|
48
|
+
end
|
49
|
+
|
50
|
+
while clust.length > 1
|
51
|
+
lowestpair = [0,1]
|
52
|
+
closest = Pearson.calc(clust[0].vec, clust[1].vec)
|
53
|
+
for i in 0...clust.length
|
54
|
+
for j in i+1...clust.length
|
55
|
+
if !distances.key?([clust[i].id, clust[j].id])
|
56
|
+
distances[[clust[i].id, clust[j].id]] = Pearson.calc(clust[i].vec, clust[j].vec)
|
57
|
+
end
|
58
|
+
d = distances[[clust[i].id, clust[j].id]]
|
59
|
+
if d < closest
|
60
|
+
closest = d
|
61
|
+
lowestpair = [i,j]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
mergevec = Array.new
|
67
|
+
for i in 0...clust[0].vec.length
|
68
|
+
m = (clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i])/2.0
|
69
|
+
mergevec.push(m)
|
70
|
+
end
|
71
|
+
|
72
|
+
newcluster = Bicluster.new(mergevec, clust[lowestpair[0]], clust[lowestpair[1]], closest, currentclustid)
|
73
|
+
|
74
|
+
currentclustid -= 1
|
75
|
+
clust.delete_at(lowestpair[1])
|
76
|
+
clust.delete_at(lowestpair[0])
|
77
|
+
clust.push(newcluster)
|
78
|
+
end
|
79
|
+
return clust[0]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/kmeans.rb
CHANGED
@@ -2,13 +2,9 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
4
|
module Kmeans
|
5
|
-
VERSION = "0.0.
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
require 'pair'
|
12
|
-
require 'pearson'
|
13
|
-
require 'cluster'
|
5
|
+
VERSION = "0.0.3"
|
6
|
+
require File.dirname(__FILE__) + "/kmeans/pair"
|
7
|
+
require File.dirname(__FILE__) + "/kmeans/pearson"
|
8
|
+
require File.dirname(__FILE__) + "/kmeans/cluster"
|
9
|
+
require File.dirname(__FILE__) + "/kmeans/hcluster"
|
14
10
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/../../spec_helper'
|
4
|
+
|
5
|
+
describe Kmeans::HCluster do
|
6
|
+
before :all do
|
7
|
+
@blognames = ["The Superficial - Because You're Ugly", "Wonkette", "Publishing 2.0"]
|
8
|
+
@data = [[0, 1, 0, 0, 3, 3, 0, 0, 3, 0, 6, 0, 1, 0, 4, 3, 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 5, 1, 0, 0, 3, 0, 0, 0, 0, 1, 5, 0, 0, 2, 1, 0, 1, 2, 1, 0, 0, 0, 5, 0, 3, 0, 0, 0, 1, 0, 2, 2, 0, 3, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 4, 0, 1, 0, 1, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 2, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 1, 2, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 21, 2, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 5, 0, 1, 0, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 24, 1, 0, 1, 0, 0, 0, 1, 9, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 4, 0, 1, 1, 0, 1, 2, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 1, 1, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 5, 1, 0, 0, 0, 1, 0, 0, 0, 2, 2, 0, 0, 1, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 4, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 0, 1, 5, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 1, 4, 0, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 2, 1, 3, 0, 0, 0, 0, 2, 0, 0, 2, 0, 9, 2, 0], [0, 2, 1, 0, 6, 2, 1, 0, 4, 5, 25, 0, 0, 0, 6, 12, 4, 2, 1, 4, 0, 3, 0, 1, 1, 3, 4, 0, 2, 0, 4, 3, 2, 1, 4, 0, 3, 3, 2, 12, 0, 1, 6, 4, 0, 0, 0, 0, 2, 1, 0, 0, 7, 2, 7, 2, 3, 5, 0, 0, 12, 0, 2, 10, 2, 2, 2, 0, 1, 9, 1, 0, 3, 1, 3, 5, 0, 6, 2, 2, 2, 1, 1, 0, 0, 2, 0, 0, 0, 2, 3, 2, 3, 5, 3, 6, 1, 2, 1, 4, 11, 0, 0, 4, 3, 1, 0, 3, 7, 2, 1, 1, 0, 8, 1, 6, 7, 0, 0, 0, 8, 5, 1, 0, 2, 2, 4, 0, 9, 0, 4, 2, 2, 2, 1, 0, 1, 0, 3, 1, 20, 6, 9, 1, 0, 2, 0, 9, 2, 5, 0, 1, 0, 0, 0, 1, 4, 13, 0, 2, 2, 0, 2, 1, 0, 0, 0, 6, 2, 1, 10, 1, 2, 3, 2, 7, 1, 2, 5, 1, 1, 1, 4, 0, 0, 6, 3, 7, 0, 2, 0, 3, 2, 2, 0, 4, 1, 1, 0, 16, 1, 2, 1, 3, 2, 7, 0, 0, 0, 3, 0, 5, 0, 1, 2, 4, 5, 3, 0, 4, 1, 16, 1, 2, 2, 1, 2, 5, 0, 3, 2, 0, 2, 0, 2, 1, 0, 3, 0, 2, 1, 4, 0, 6, 0, 3, 9, 2, 1, 2, 3, 4, 6, 1, 4, 1, 4, 2, 7, 0, 1, 0, 2, 4, 16, 2, 0, 5, 0, 1, 3, 0, 8, 1, 3, 7, 3, 2, 1, 2, 0, 4, 3, 0, 1, 2, 15, 10, 2, 2, 1, 6, 5, 1, 1, 1, 3, 0, 0, 8, 3, 1, 0, 0, 3, 6, 6, 1, 0, 1, 3, 2, 10, 0, 0, 0, 1, 1, 0, 5, 1, 2, 16, 2, 0, 2, 7, 2, 0, 6, 0, 4, 1, 0, 3, 1, 0, 2, 0, 0, 0, 3, 1, 1, 8, 0, 0, 2, 7, 1, 1, 5, 0, 1, 0, 0, 0, 7, 5, 1, 0, 2, 3, 5, 28, 0, 1, 1, 2, 5, 23, 2, 3, 0, 2, 0, 1, 2, 14, 0, 5, 0, 7, 3, 5, 0, 1, 3, 2, 5, 4, 0, 5, 0, 0, 3, 3, 1, 1, 1, 1, 0, 8, 0, 3, 3, 1, 9, 1, 3, 0, 1, 8, 0, 2, 1, 2, 4, 18, 9, 0, 2, 5, 4, 2, 0, 3, 0, 6, 0, 1, 0, 3, 1, 1, 3, 1, 0, 0, 8, 4, 12, 2, 2, 9, 3, 1, 2, 0, 0, 3, 0, 0, 1, 0, 3, 3, 2, 5, 0, 8, 6, 9, 10, 1, 5, 3, 2, 0, 1, 2, 3, 1, 2, 0, 0, 0, 4, 8, 3, 0, 0, 2, 0, 5, 3, 1, 2, 0, 2, 0, 1, 0, 2, 4, 11, 6, 1, 0, 6, 3, 0, 2, 0, 0, 4, 5, 0, 2, 0, 0, 5, 0, 0, 1, 0, 3, 0, 0, 1, 9, 6, 0, 9, 0, 3, 0, 1, 1, 3, 7, 1, 0, 3, 1, 7, 2, 9, 0, 0, 2, 1, 0, 3, 0, 8, 10, 6, 1, 2, 6, 5, 2, 3, 6, 0, 2, 0, 11, 7, 1, 3, 2, 3, 0, 1, 3, 0, 0, 0, 0, 5, 0, 6, 3, 4, 0, 1, 2, 0, 1, 1, 5, 5, 0, 3, 8, 6, 0, 1, 4, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 6, 4, 0, 0, 1, 2, 3, 0, 2, 2, 3, 1, 2, 4, 0, 1, 2, 2, 0, 1, 2, 6, 2, 4, 2, 3, 4, 2, 2, 0, 3, 2, 7, 3, 0, 1, 0, 1, 0, 4, 0, 0, 3, 0, 8, 0, 0, 7, 3, 1, 3, 2, 12, 0, 1, 2, 0, 1, 0, 2, 1, 3, 4, 3, 0, 5, 2, 0, 0, 2, 12, 1, 2, 4, 1, 2, 1, 0, 0, 2, 9, 1, 2, 1, 5, 3, 1, 5, 0, 2, 4, 0, 2, 0, 0, 4, 3, 19, 8, 7], [0, 0, 7, 4, 0, 1, 3, 6, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 4, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 1, 1, 1, 3, 0, 0, 13, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 0, 0, 1, 0, 0, 1, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 3, 2, 0, 0, 0, 0, 2, 1, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 1, 0, 6, 0, 0, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 3, 2, 0, 0, 3, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 0, 1, 1, 1, 3, 0, 1, 0, 0, 2, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 21, 0, 2, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 1, 0, 0, 2, 1, 0, 0, 1, 9, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 7, 1, 1, 1, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 2, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 1, 1, 0, 0, 4, 0, 0, 0, 1, 1, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 1, 0, 2, 0, 2, 4, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 3, 0, 0, 1, 1, 0, 1, 3, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 3, 0, 6, 1, 0, 0, 1, 6, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 4, 0, 4, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 6, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 11, 0, 0, 0, 0, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 3, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0]]
|
9
|
+
end
|
10
|
+
|
11
|
+
context 'の HCluster クラスにおいて' do
|
12
|
+
describe '二次元配列を渡すと' do
|
13
|
+
it "Kmeans::Bicluster クラスが返却される" do
|
14
|
+
cs = Kmeans::HCluster.new
|
15
|
+
clust = cs.hcluster(@data)
|
16
|
+
clust.class.should == Kmeans::Bicluster
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'の HCluster クラスにおいて' do
|
22
|
+
describe 'printclust メソッドを呼ぶと' do
|
23
|
+
it "クラスタの中身が返却される" do
|
24
|
+
cs = Kmeans::HCluster.new
|
25
|
+
clust = cs.hcluster(@data)
|
26
|
+
out = cs.printclust(clust, @blognames)
|
27
|
+
out.should ==
|
28
|
+
"-
|
29
|
+
Wonkette
|
30
|
+
-
|
31
|
+
The Superficial - Because You're Ugly
|
32
|
+
Publishing 2.0
|
33
|
+
"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/spec/lib/kmeans_spec.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-10-
|
12
|
+
date: 2012-10-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: cucumber
|
@@ -76,13 +76,17 @@ files:
|
|
76
76
|
- doc/ChangeLog
|
77
77
|
- doc/LICENSE
|
78
78
|
- doc/README
|
79
|
+
- examples/hcluster.rb
|
80
|
+
- examples/kmeans.rb
|
79
81
|
- kmeans.gemspec
|
80
82
|
- lib/kmeans.rb
|
81
83
|
- lib/kmeans/cluster.rb
|
84
|
+
- lib/kmeans/hcluster.rb
|
82
85
|
- lib/kmeans/pair.rb
|
83
86
|
- lib/kmeans/pearson.rb
|
84
87
|
- script/build
|
85
88
|
- spec/lib/kmeans/cluster_spec.rb
|
89
|
+
- spec/lib/kmeans/hcluster_spec.rb
|
86
90
|
- spec/lib/kmeans/pair_spec.rb
|
87
91
|
- spec/lib/kmeans/pearson_spec.rb
|
88
92
|
- spec/lib/kmeans_spec.rb
|