buncher 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  require 'buncher/buncher'
2
2
  module Buncher
3
- VERSION = "1.0.3"
3
+ VERSION = "1.0.4"
4
4
  # your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
5
5
  class Cluster
6
6
  attr_accessor :elements
@@ -58,7 +58,7 @@ module Buncher
58
58
  end
59
59
  end
60
60
 
61
- # run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
61
+ # run the clustering algorithm until we have calculated the best number of clusters, taken from this paper:
62
62
  # http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
63
63
  def self.cluster(elements, weights,options={})
64
64
  solutions={}
@@ -76,11 +76,11 @@ module Buncher
76
76
  last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
77
77
  puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
78
78
  solutions[last_fK]=centers if number_clusters >= min_size
79
- # break if number_clusters == 2 ## debugging
79
+ # break if number_clusters == 3 ## debugging
80
80
  end
81
81
  min_fK =solutions.keys.sort.first || 1.0
82
82
  if min_fK > 0.85
83
- nil # ie, not clustered at all
83
+ elements.map {|ele| Cluster.new(ele,[ele])} # ie, not clustered at all
84
84
  else
85
85
  solutions[min_fK]
86
86
  end
Binary file
@@ -1,10 +1,11 @@
1
1
  require "buncher"
2
2
  require 'rubystats'
3
3
  require 'gnuplot'
4
+ require "minitest/focus"
4
5
 
5
6
  def dump(centers)
6
- puts "centers are"
7
- centers.each {|ccc| puts "center #{ccc.center.inspect} #{ccc.elements[0]}, #{ccc.elements[1]}"}
7
+ puts "#{centers.size} centers are"
8
+ centers.each {|ccc| puts "center #{ccc.center.inspect} elements #{ccc.elements[0]}, #{ccc.elements[1]}"}
8
9
  end
9
10
 
10
11
  def init_data(number_points, number_clusters)
@@ -64,13 +65,19 @@ def plot(file_name,points,centers, initial_centers=nil)
64
65
  end
65
66
 
66
67
  class TestBuncher < Minitest::Test
67
- # WORKING=<<-'EOS'
68
+
68
69
  def test_distance
69
70
  element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
70
71
  element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
71
72
  weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
72
73
  assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
73
74
  end
75
+ def test_distance_nan
76
+ element1 = [nil, 1, 1]
77
+ element2 = [nil, nil, 1]
78
+ weights = [1] * 3
79
+ assert_in_delta(Buncher::distance(element1,element2, weights),0.5,0.01)
80
+ end
74
81
  def test_should_find_the_one_center
75
82
  elements = [[1,1]]
76
83
  starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
@@ -80,9 +87,13 @@ class TestBuncher < Minitest::Test
80
87
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
81
88
  end
82
89
 
90
+
83
91
  def test_choose_centers_wrapper
84
92
  elements = [[1,1]]
93
+ puts "test_choose_centers_wrapper - start"
85
94
  new_centers = Buncher::choose_centers(elements,[1]*2, 1)
95
+ puts "test_choose_centers_wrapper - well well"
96
+ puts new_centers.inspect
86
97
  dump(new_centers)
87
98
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
88
99
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
@@ -91,41 +102,34 @@ class TestBuncher < Minitest::Test
91
102
  elements = init_data(100,3)
92
103
  new_centers = Buncher::choose_centers(elements, [1]*2, 3)
93
104
  dump(new_centers)
94
- assert_equal(new_centers.size,3)
105
+ assert_equal(3, new_centers.size)
95
106
  end
96
- # EOS
107
+
108
+
97
109
  def test_min_size_works
98
- elements = init_data(100,4)
110
+ elements = init_data(100,1)
99
111
  new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
100
- dump(new_centers)
101
- assert_equal(new_centers.size,4)
102
- end
103
-
104
- # WWW=<<-'EOS'
105
-
106
- def test_choose_centers_wrapper2
107
- elements = init_data(100,3)
108
- new_centers = Buncher::choose_centers(elements, [1]*2, 3)
109
- plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
110
- dump(new_centers)
111
- `open /tmp/kmeans_initial_#{new_centers.size}.png`
112
- assert_equal(new_centers.size,3)
112
+ assert(new_centers.size >=2,"at least 2 centers")
113
113
  end
114
-
114
+ focus
115
115
  def test_gaussian_distribution_of_100_points_in_3_clusters
116
- 12.times do |run|
116
+ 1.times do |run|
117
117
  # srand(843284148793854177950180651080082381)
118
118
  elements = init_data(100,3)
119
119
  # elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
120
+ graphfile=nil;
120
121
  # new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
121
122
  new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
122
- plot("/tmp/#{run}_centers_#{centers.size}.png",elements,centers, initial_centers)
123
+ graphfile = "/tmp/#{run}_centers_#{centers.size}.png"
124
+ plot(graphfile,elements,centers, initial_centers)
123
125
  }
124
126
  puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
125
127
  puts "ERROR "*4 if new_centers.size != 3
126
128
  puts
127
129
  assert_equal(3,new_centers.size)
130
+ refute(new_centers.first.elements.empty?)
131
+ graphfile = "/tmp/#{run}_centers_3.png"
132
+ `open #{graphfile}`
128
133
  end
129
134
  end
130
- # EOS
131
135
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buncher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-04-25 00:00:00.000000000 Z
12
+ date: 2015-04-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest