buncher 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  require 'buncher/buncher'
2
2
  module Buncher
3
- VERSION = "1.0.3"
3
+ VERSION = "1.0.4"
4
4
  # your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
5
5
  class Cluster
6
6
  attr_accessor :elements
@@ -58,7 +58,7 @@ module Buncher
58
58
  end
59
59
  end
60
60
 
61
- # run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
61
+ # run the clustering algorithm until we have calculated the best number of clusters, taken from this paper:
62
62
  # http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
63
63
  def self.cluster(elements, weights,options={})
64
64
  solutions={}
@@ -76,11 +76,11 @@ module Buncher
76
76
  last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
77
77
  puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
78
78
  solutions[last_fK]=centers if number_clusters >= min_size
79
- # break if number_clusters == 2 ## debugging
79
+ # break if number_clusters == 3 ## debugging
80
80
  end
81
81
  min_fK =solutions.keys.sort.first || 1.0
82
82
  if min_fK > 0.85
83
- nil # ie, not clustered at all
83
+ elements.map {|ele| Cluster.new(ele,[ele])} # ie, not clustered at all
84
84
  else
85
85
  solutions[min_fK]
86
86
  end
Binary file
@@ -1,10 +1,11 @@
1
1
  require "buncher"
2
2
  require 'rubystats'
3
3
  require 'gnuplot'
4
+ require "minitest/focus"
4
5
 
5
6
  def dump(centers)
6
- puts "centers are"
7
- centers.each {|ccc| puts "center #{ccc.center.inspect} #{ccc.elements[0]}, #{ccc.elements[1]}"}
7
+ puts "#{centers.size} centers are"
8
+ centers.each {|ccc| puts "center #{ccc.center.inspect} elements #{ccc.elements[0]}, #{ccc.elements[1]}"}
8
9
  end
9
10
 
10
11
  def init_data(number_points, number_clusters)
@@ -64,13 +65,19 @@ def plot(file_name,points,centers, initial_centers=nil)
64
65
  end
65
66
 
66
67
  class TestBuncher < Minitest::Test
67
- # WORKING=<<-'EOS'
68
+
68
69
  def test_distance
69
70
  element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
70
71
  element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
71
72
  weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
72
73
  assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
73
74
  end
75
+ def test_distance_nan
76
+ element1 = [nil, 1, 1]
77
+ element2 = [nil, nil, 1]
78
+ weights = [1] * 3
79
+ assert_in_delta(Buncher::distance(element1,element2, weights),0.5,0.01)
80
+ end
74
81
  def test_should_find_the_one_center
75
82
  elements = [[1,1]]
76
83
  starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
@@ -80,9 +87,13 @@ class TestBuncher < Minitest::Test
80
87
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
81
88
  end
82
89
 
90
+
83
91
  def test_choose_centers_wrapper
84
92
  elements = [[1,1]]
93
+ puts "test_choose_centers_wrapper - start"
85
94
  new_centers = Buncher::choose_centers(elements,[1]*2, 1)
95
+ puts "test_choose_centers_wrapper - well well"
96
+ puts new_centers.inspect
86
97
  dump(new_centers)
87
98
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
88
99
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
@@ -91,41 +102,34 @@ class TestBuncher < Minitest::Test
91
102
  elements = init_data(100,3)
92
103
  new_centers = Buncher::choose_centers(elements, [1]*2, 3)
93
104
  dump(new_centers)
94
- assert_equal(new_centers.size,3)
105
+ assert_equal(3, new_centers.size)
95
106
  end
96
- # EOS
107
+
108
+
97
109
  def test_min_size_works
98
- elements = init_data(100,4)
110
+ elements = init_data(100,1)
99
111
  new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
100
- dump(new_centers)
101
- assert_equal(new_centers.size,4)
102
- end
103
-
104
- # WWW=<<-'EOS'
105
-
106
- def test_choose_centers_wrapper2
107
- elements = init_data(100,3)
108
- new_centers = Buncher::choose_centers(elements, [1]*2, 3)
109
- plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
110
- dump(new_centers)
111
- `open /tmp/kmeans_initial_#{new_centers.size}.png`
112
- assert_equal(new_centers.size,3)
112
+ assert(new_centers.size >=2,"at least 2 centers")
113
113
  end
114
-
114
+ focus
115
115
  def test_gaussian_distribution_of_100_points_in_3_clusters
116
- 12.times do |run|
116
+ 1.times do |run|
117
117
  # srand(843284148793854177950180651080082381)
118
118
  elements = init_data(100,3)
119
119
  # elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
120
+ graphfile=nil;
120
121
  # new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
121
122
  new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
122
- plot("/tmp/#{run}_centers_#{centers.size}.png",elements,centers, initial_centers)
123
+ graphfile = "/tmp/#{run}_centers_#{centers.size}.png"
124
+ plot(graphfile,elements,centers, initial_centers)
123
125
  }
124
126
  puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
125
127
  puts "ERROR "*4 if new_centers.size != 3
126
128
  puts
127
129
  assert_equal(3,new_centers.size)
130
+ refute(new_centers.first.elements.empty?)
131
+ graphfile = "/tmp/#{run}_centers_3.png"
132
+ `open #{graphfile}`
128
133
  end
129
134
  end
130
- # EOS
131
135
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buncher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-04-25 00:00:00.000000000 Z
12
+ date: 2015-04-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest