buncher 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/buncher.rb +4 -4
- data/lib/buncher/buncher.bundle +0 -0
- data/test/test_buncher.rb +27 -23
- metadata +2 -2
data/lib/buncher.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'buncher/buncher'
|
2
2
|
module Buncher
|
3
|
-
VERSION = "1.0.
|
3
|
+
VERSION = "1.0.4"
|
4
4
|
# your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
|
5
5
|
class Cluster
|
6
6
|
attr_accessor :elements
|
@@ -58,7 +58,7 @@ module Buncher
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
# run the clustering algorithm until have calculated the
|
61
|
+
# run the clustering algorithm until we have calculated the best number of clusters, taken from this paper:
|
62
62
|
# http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
|
63
63
|
def self.cluster(elements, weights,options={})
|
64
64
|
solutions={}
|
@@ -76,11 +76,11 @@ module Buncher
|
|
76
76
|
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
|
77
77
|
puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
|
78
78
|
solutions[last_fK]=centers if number_clusters >= min_size
|
79
|
-
# break if number_clusters ==
|
79
|
+
# break if number_clusters == 3 ## debugging
|
80
80
|
end
|
81
81
|
min_fK =solutions.keys.sort.first || 1.0
|
82
82
|
if min_fK > 0.85
|
83
|
-
|
83
|
+
elements.map {|ele| Cluster.new(ele,[ele])} # ie, not clustered at all
|
84
84
|
else
|
85
85
|
solutions[min_fK]
|
86
86
|
end
|
data/lib/buncher/buncher.bundle
CHANGED
Binary file
|
data/test/test_buncher.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require "buncher"
|
2
2
|
require 'rubystats'
|
3
3
|
require 'gnuplot'
|
4
|
+
require "minitest/focus"
|
4
5
|
|
5
6
|
def dump(centers)
|
6
|
-
puts "centers are"
|
7
|
-
centers.each {|ccc| puts "center #{ccc.center.inspect} #{ccc.elements[0]}, #{ccc.elements[1]}"}
|
7
|
+
puts "#{centers.size} centers are"
|
8
|
+
centers.each {|ccc| puts "center #{ccc.center.inspect} elements #{ccc.elements[0]}, #{ccc.elements[1]}"}
|
8
9
|
end
|
9
10
|
|
10
11
|
def init_data(number_points, number_clusters)
|
@@ -64,13 +65,19 @@ def plot(file_name,points,centers, initial_centers=nil)
|
|
64
65
|
end
|
65
66
|
|
66
67
|
class TestBuncher < Minitest::Test
|
67
|
-
|
68
|
+
|
68
69
|
def test_distance
|
69
70
|
element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
70
71
|
element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
71
72
|
weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
|
72
73
|
assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
|
73
74
|
end
|
75
|
+
def test_distance_nan
|
76
|
+
element1 = [nil, 1, 1]
|
77
|
+
element2 = [nil, nil, 1]
|
78
|
+
weights = [1] * 3
|
79
|
+
assert_in_delta(Buncher::distance(element1,element2, weights),0.5,0.01)
|
80
|
+
end
|
74
81
|
def test_should_find_the_one_center
|
75
82
|
elements = [[1,1]]
|
76
83
|
starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
|
@@ -80,9 +87,13 @@ class TestBuncher < Minitest::Test
|
|
80
87
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
81
88
|
end
|
82
89
|
|
90
|
+
|
83
91
|
def test_choose_centers_wrapper
|
84
92
|
elements = [[1,1]]
|
93
|
+
puts "test_choose_centers_wrapper - start"
|
85
94
|
new_centers = Buncher::choose_centers(elements,[1]*2, 1)
|
95
|
+
puts "test_choose_centers_wrapper - well well"
|
96
|
+
puts new_centers.inspect
|
86
97
|
dump(new_centers)
|
87
98
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
88
99
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
@@ -91,41 +102,34 @@ class TestBuncher < Minitest::Test
|
|
91
102
|
elements = init_data(100,3)
|
92
103
|
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
93
104
|
dump(new_centers)
|
94
|
-
assert_equal(new_centers.size
|
105
|
+
assert_equal(3, new_centers.size)
|
95
106
|
end
|
96
|
-
|
107
|
+
|
108
|
+
|
97
109
|
def test_min_size_works
|
98
|
-
elements = init_data(100,
|
110
|
+
elements = init_data(100,1)
|
99
111
|
new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
|
100
|
-
|
101
|
-
assert_equal(new_centers.size,4)
|
102
|
-
end
|
103
|
-
|
104
|
-
# WWW=<<-'EOS'
|
105
|
-
|
106
|
-
def test_choose_centers_wrapper2
|
107
|
-
elements = init_data(100,3)
|
108
|
-
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
109
|
-
plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
|
110
|
-
dump(new_centers)
|
111
|
-
`open /tmp/kmeans_initial_#{new_centers.size}.png`
|
112
|
-
assert_equal(new_centers.size,3)
|
112
|
+
assert(new_centers.size >=2,"at least 2 centers")
|
113
113
|
end
|
114
|
-
|
114
|
+
focus
|
115
115
|
def test_gaussian_distribution_of_100_points_in_3_clusters
|
116
|
-
|
116
|
+
1.times do |run|
|
117
117
|
# srand(843284148793854177950180651080082381)
|
118
118
|
elements = init_data(100,3)
|
119
119
|
# elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
|
120
|
+
graphfile=nil;
|
120
121
|
# new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
|
121
122
|
new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
|
122
|
-
|
123
|
+
graphfile = "/tmp/#{run}_centers_#{centers.size}.png"
|
124
|
+
plot(graphfile,elements,centers, initial_centers)
|
123
125
|
}
|
124
126
|
puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
|
125
127
|
puts "ERROR "*4 if new_centers.size != 3
|
126
128
|
puts
|
127
129
|
assert_equal(3,new_centers.size)
|
130
|
+
refute(new_centers.first.elements.empty?)
|
131
|
+
graphfile = "/tmp/#{run}_centers_3.png"
|
132
|
+
`open #{graphfile}`
|
128
133
|
end
|
129
134
|
end
|
130
|
-
# EOS
|
131
135
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buncher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-04-
|
12
|
+
date: 2015-04-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: minitest
|