buncher 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/buncher.rb +8 -8
- data/lib/buncher/buncher.bundle +0 -0
- data/test/test_buncher.rb +18 -8
- metadata +2 -2
data/lib/buncher.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'buncher/buncher'
|
2
2
|
module Buncher
|
3
|
-
VERSION = "1.0.
|
3
|
+
VERSION = "1.0.2"
|
4
4
|
# your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
|
5
5
|
class Cluster
|
6
6
|
attr_accessor :elements
|
@@ -46,9 +46,9 @@ module Buncher
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
def self.fK(centers,last_sK, last_aK)
|
49
|
+
def self.fK(centers,last_sK, last_aK,weights)
|
50
50
|
# from here - http://www.ee.columbia.edu/~dpwe/papers/PhamDN05-kmeans.pdf
|
51
|
-
sK = centers.inject(0) {|acc, val| acc + val.distortion}
|
51
|
+
sK = centers.inject(0) {|acc, val| acc + val.distortion(weights)}
|
52
52
|
aK = calc_aK(centers, last_aK) if centers.size > 1
|
53
53
|
if centers.size == 1 || (last_sK||0).zero?
|
54
54
|
[1,sK, aK || 0]
|
@@ -60,21 +60,21 @@ module Buncher
|
|
60
60
|
|
61
61
|
# run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
|
62
62
|
# http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
|
63
|
-
def self.cluster(elements)
|
63
|
+
def self.cluster(elements, weights)
|
64
64
|
changed=true
|
65
65
|
round=0
|
66
66
|
solutions={}
|
67
67
|
# try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 25% .. ok, I did
|
68
68
|
# that.
|
69
69
|
not_clustered = last_sK = last_aK =last_fK=nil
|
70
|
-
max_clusters=[1,elements.size].max
|
70
|
+
max_clusters=[1,(elements.size/2).floor].max
|
71
71
|
(1..max_clusters).each do |number_clusters|
|
72
|
-
initial_centers = choose_centers(elements, number_clusters) # C++ Native code
|
72
|
+
initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
|
73
73
|
centers = initial_centers.map(&:dup)
|
74
|
-
centers = kmeans(centers,elements) ## C++ Native code
|
74
|
+
centers = kmeans(centers,elements,weights) ## C++ Native code
|
75
75
|
yield(elements, centers, initial_centers) if block_given?
|
76
76
|
not_clustered ||=centers
|
77
|
-
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK)
|
77
|
+
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
|
78
78
|
puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
|
79
79
|
puts
|
80
80
|
solutions[last_fK]=centers
|
data/lib/buncher/buncher.bundle
CHANGED
Binary file
|
data/test/test_buncher.rb
CHANGED
@@ -64,11 +64,17 @@ def plot(file_name,points,centers, initial_centers=nil)
|
|
64
64
|
end
|
65
65
|
|
66
66
|
class TestBuncher < Minitest::Test
|
67
|
-
WORKING=<<-'EOS'
|
67
|
+
# WORKING=<<-'EOS'
|
68
|
+
def test_distance
|
69
|
+
element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
70
|
+
element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
71
|
+
weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
|
72
|
+
assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
|
73
|
+
end
|
68
74
|
def test_should_find_the_one_center
|
69
75
|
elements = [[1,1]]
|
70
76
|
starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
|
71
|
-
new_centers = Buncher::kmeans(starting_centers, elements)
|
77
|
+
new_centers = Buncher::kmeans(starting_centers, elements, [1]*2)
|
72
78
|
dump(new_centers)
|
73
79
|
assert_in_delta(new_centers.first.center[0],1.0,0.01)
|
74
80
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
@@ -76,32 +82,35 @@ WORKING=<<-'EOS'
|
|
76
82
|
|
77
83
|
def test_choose_centers_wrapper
|
78
84
|
elements = [[1,1]]
|
79
|
-
new_centers = Buncher::choose_centers(elements, 1)
|
85
|
+
new_centers = Buncher::choose_centers(elements,[1]*2, 1)
|
80
86
|
dump(new_centers)
|
81
87
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
82
88
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
83
89
|
end
|
84
90
|
def test_choose_centers_wrapper2
|
85
91
|
elements = init_data(100,3)
|
86
|
-
new_centers = Buncher::choose_centers(elements, 3)
|
92
|
+
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
87
93
|
dump(new_centers)
|
88
94
|
assert_equal(new_centers.size,3)
|
89
95
|
end
|
96
|
+
# EOS
|
90
97
|
def test_should_find_one_cluster
|
91
98
|
elements = [[1,1]]
|
92
|
-
new_centers = Buncher::cluster(elements)
|
99
|
+
new_centers = Buncher::cluster(elements,[1]*2)
|
93
100
|
dump(new_centers)
|
94
101
|
assert_equal(new_centers.size,1)
|
95
102
|
end
|
103
|
+
|
104
|
+
# WWW=<<-'EOS'
|
105
|
+
|
96
106
|
def test_choose_centers_wrapper2
|
97
107
|
elements = init_data(100,3)
|
98
|
-
new_centers = Buncher::choose_centers(elements, 3)
|
108
|
+
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
99
109
|
plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
|
100
110
|
dump(new_centers)
|
101
111
|
`open /tmp/kmeans_initial_#{new_centers.size}.png`
|
102
112
|
assert_equal(new_centers.size,3)
|
103
113
|
end
|
104
|
-
EOS
|
105
114
|
|
106
115
|
def test_gaussian_distribution_of_100_points_in_3_clusters
|
107
116
|
12.times do |run|
|
@@ -109,7 +118,7 @@ EOS
|
|
109
118
|
elements = init_data(100,3)
|
110
119
|
# elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
|
111
120
|
# new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
|
112
|
-
new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers|
|
121
|
+
new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
|
113
122
|
plot("/tmp/#{run}_centers_#{centers.size}.png",elements,centers, initial_centers)
|
114
123
|
}
|
115
124
|
puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
|
@@ -118,4 +127,5 @@ EOS
|
|
118
127
|
assert_equal(3,new_centers.size)
|
119
128
|
end
|
120
129
|
end
|
130
|
+
# EOS
|
121
131
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buncher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-04-
|
12
|
+
date: 2015-04-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: minitest
|