buncher 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/buncher.rb +8 -8
- data/lib/buncher/buncher.bundle +0 -0
- data/test/test_buncher.rb +18 -8
- metadata +2 -2
data/lib/buncher.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
require 'buncher/buncher'
|
|
2
2
|
module Buncher
|
|
3
|
-
VERSION = "1.0.
|
|
3
|
+
VERSION = "1.0.2"
|
|
4
4
|
# your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
|
|
5
5
|
class Cluster
|
|
6
6
|
attr_accessor :elements
|
|
@@ -46,9 +46,9 @@ module Buncher
|
|
|
46
46
|
end
|
|
47
47
|
end
|
|
48
48
|
|
|
49
|
-
def self.fK(centers,last_sK, last_aK)
|
|
49
|
+
def self.fK(centers,last_sK, last_aK,weights)
|
|
50
50
|
# from here - http://www.ee.columbia.edu/~dpwe/papers/PhamDN05-kmeans.pdf
|
|
51
|
-
sK = centers.inject(0) {|acc, val| acc + val.distortion}
|
|
51
|
+
sK = centers.inject(0) {|acc, val| acc + val.distortion(weights)}
|
|
52
52
|
aK = calc_aK(centers, last_aK) if centers.size > 1
|
|
53
53
|
if centers.size == 1 || (last_sK||0).zero?
|
|
54
54
|
[1,sK, aK || 0]
|
|
@@ -60,21 +60,21 @@ module Buncher
|
|
|
60
60
|
|
|
61
61
|
# run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
|
|
62
62
|
# http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
|
|
63
|
-
def self.cluster(elements)
|
|
63
|
+
def self.cluster(elements, weights)
|
|
64
64
|
changed=true
|
|
65
65
|
round=0
|
|
66
66
|
solutions={}
|
|
67
67
|
# try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 25% .. ok, I did
|
|
68
68
|
# that.
|
|
69
69
|
not_clustered = last_sK = last_aK =last_fK=nil
|
|
70
|
-
max_clusters=[1,elements.size].max
|
|
70
|
+
max_clusters=[1,(elements.size/2).floor].max
|
|
71
71
|
(1..max_clusters).each do |number_clusters|
|
|
72
|
-
initial_centers = choose_centers(elements, number_clusters) # C++ Native code
|
|
72
|
+
initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
|
|
73
73
|
centers = initial_centers.map(&:dup)
|
|
74
|
-
centers = kmeans(centers,elements) ## C++ Native code
|
|
74
|
+
centers = kmeans(centers,elements,weights) ## C++ Native code
|
|
75
75
|
yield(elements, centers, initial_centers) if block_given?
|
|
76
76
|
not_clustered ||=centers
|
|
77
|
-
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK)
|
|
77
|
+
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
|
|
78
78
|
puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
|
|
79
79
|
puts
|
|
80
80
|
solutions[last_fK]=centers
|
data/lib/buncher/buncher.bundle
CHANGED
|
Binary file
|
data/test/test_buncher.rb
CHANGED
|
@@ -64,11 +64,17 @@ def plot(file_name,points,centers, initial_centers=nil)
|
|
|
64
64
|
end
|
|
65
65
|
|
|
66
66
|
class TestBuncher < Minitest::Test
|
|
67
|
-
WORKING=<<-'EOS'
|
|
67
|
+
# WORKING=<<-'EOS'
|
|
68
|
+
def test_distance
|
|
69
|
+
element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
|
70
|
+
element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
|
71
|
+
weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
|
|
72
|
+
assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
|
|
73
|
+
end
|
|
68
74
|
def test_should_find_the_one_center
|
|
69
75
|
elements = [[1,1]]
|
|
70
76
|
starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
|
|
71
|
-
new_centers = Buncher::kmeans(starting_centers, elements)
|
|
77
|
+
new_centers = Buncher::kmeans(starting_centers, elements, [1]*2)
|
|
72
78
|
dump(new_centers)
|
|
73
79
|
assert_in_delta(new_centers.first.center[0],1.0,0.01)
|
|
74
80
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
|
@@ -76,32 +82,35 @@ WORKING=<<-'EOS'
|
|
|
76
82
|
|
|
77
83
|
def test_choose_centers_wrapper
|
|
78
84
|
elements = [[1,1]]
|
|
79
|
-
new_centers = Buncher::choose_centers(elements, 1)
|
|
85
|
+
new_centers = Buncher::choose_centers(elements,[1]*2, 1)
|
|
80
86
|
dump(new_centers)
|
|
81
87
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
|
82
88
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
|
83
89
|
end
|
|
84
90
|
def test_choose_centers_wrapper2
|
|
85
91
|
elements = init_data(100,3)
|
|
86
|
-
new_centers = Buncher::choose_centers(elements, 3)
|
|
92
|
+
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
|
87
93
|
dump(new_centers)
|
|
88
94
|
assert_equal(new_centers.size,3)
|
|
89
95
|
end
|
|
96
|
+
# EOS
|
|
90
97
|
def test_should_find_one_cluster
|
|
91
98
|
elements = [[1,1]]
|
|
92
|
-
new_centers = Buncher::cluster(elements)
|
|
99
|
+
new_centers = Buncher::cluster(elements,[1]*2)
|
|
93
100
|
dump(new_centers)
|
|
94
101
|
assert_equal(new_centers.size,1)
|
|
95
102
|
end
|
|
103
|
+
|
|
104
|
+
# WWW=<<-'EOS'
|
|
105
|
+
|
|
96
106
|
def test_choose_centers_wrapper2
|
|
97
107
|
elements = init_data(100,3)
|
|
98
|
-
new_centers = Buncher::choose_centers(elements, 3)
|
|
108
|
+
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
|
99
109
|
plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
|
|
100
110
|
dump(new_centers)
|
|
101
111
|
`open /tmp/kmeans_initial_#{new_centers.size}.png`
|
|
102
112
|
assert_equal(new_centers.size,3)
|
|
103
113
|
end
|
|
104
|
-
EOS
|
|
105
114
|
|
|
106
115
|
def test_gaussian_distribution_of_100_points_in_3_clusters
|
|
107
116
|
12.times do |run|
|
|
@@ -109,7 +118,7 @@ EOS
|
|
|
109
118
|
elements = init_data(100,3)
|
|
110
119
|
# elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
|
|
111
120
|
# new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
|
|
112
|
-
new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers|
|
|
121
|
+
new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
|
|
113
122
|
plot("/tmp/#{run}_centers_#{centers.size}.png",elements,centers, initial_centers)
|
|
114
123
|
}
|
|
115
124
|
puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
|
|
@@ -118,4 +127,5 @@ EOS
|
|
|
118
127
|
assert_equal(3,new_centers.size)
|
|
119
128
|
end
|
|
120
129
|
end
|
|
130
|
+
# EOS
|
|
121
131
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: buncher
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.2
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2015-04-
|
|
12
|
+
date: 2015-04-25 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: minitest
|