buncher 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  require 'buncher/buncher'
2
2
  module Buncher
3
- VERSION = "1.0.1"
3
+ VERSION = "1.0.2"
4
4
  # your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
5
5
  class Cluster
6
6
  attr_accessor :elements
@@ -46,9 +46,9 @@ module Buncher
46
46
  end
47
47
  end
48
48
 
49
- def self.fK(centers,last_sK, last_aK)
49
+ def self.fK(centers,last_sK, last_aK,weights)
50
50
  # from here - http://www.ee.columbia.edu/~dpwe/papers/PhamDN05-kmeans.pdf
51
- sK = centers.inject(0) {|acc, val| acc + val.distortion}
51
+ sK = centers.inject(0) {|acc, val| acc + val.distortion(weights)}
52
52
  aK = calc_aK(centers, last_aK) if centers.size > 1
53
53
  if centers.size == 1 || (last_sK||0).zero?
54
54
  [1,sK, aK || 0]
@@ -60,21 +60,21 @@ module Buncher
60
60
 
61
61
  # run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
62
62
  # http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
63
- def self.cluster(elements)
63
+ def self.cluster(elements, weights)
64
64
  changed=true
65
65
  round=0
66
66
  solutions={}
67
67
  # try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 25% .. ok, I did
68
68
  # that.
69
69
  not_clustered = last_sK = last_aK =last_fK=nil
70
- max_clusters=[1,elements.size].max
70
+ max_clusters=[1,(elements.size/2).floor].max
71
71
  (1..max_clusters).each do |number_clusters|
72
- initial_centers = choose_centers(elements, number_clusters) # C++ Native code
72
+ initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
73
73
  centers = initial_centers.map(&:dup)
74
- centers = kmeans(centers,elements) ## C++ Native code
74
+ centers = kmeans(centers,elements,weights) ## C++ Native code
75
75
  yield(elements, centers, initial_centers) if block_given?
76
76
  not_clustered ||=centers
77
- last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK)
77
+ last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
78
78
  puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
79
79
  puts
80
80
  solutions[last_fK]=centers
Binary file
@@ -64,11 +64,17 @@ def plot(file_name,points,centers, initial_centers=nil)
64
64
  end
65
65
 
66
66
  class TestBuncher < Minitest::Test
67
- WORKING=<<-'EOS'
67
+ # WORKING=<<-'EOS'
68
+ def test_distance
69
+ element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
70
+ element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
71
+ weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
72
+ assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
73
+ end
68
74
  def test_should_find_the_one_center
69
75
  elements = [[1,1]]
70
76
  starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
71
- new_centers = Buncher::kmeans(starting_centers, elements)
77
+ new_centers = Buncher::kmeans(starting_centers, elements, [1]*2)
72
78
  dump(new_centers)
73
79
  assert_in_delta(new_centers.first.center[0],1.0,0.01)
74
80
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
@@ -76,32 +82,35 @@ WORKING=<<-'EOS'
76
82
 
77
83
  def test_choose_centers_wrapper
78
84
  elements = [[1,1]]
79
- new_centers = Buncher::choose_centers(elements, 1)
85
+ new_centers = Buncher::choose_centers(elements,[1]*2, 1)
80
86
  dump(new_centers)
81
87
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
82
88
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
83
89
  end
84
90
  def test_choose_centers_wrapper2
85
91
  elements = init_data(100,3)
86
- new_centers = Buncher::choose_centers(elements, 3)
92
+ new_centers = Buncher::choose_centers(elements, [1]*2, 3)
87
93
  dump(new_centers)
88
94
  assert_equal(new_centers.size,3)
89
95
  end
96
+ # EOS
90
97
  def test_should_find_one_cluster
91
98
  elements = [[1,1]]
92
- new_centers = Buncher::cluster(elements)
99
+ new_centers = Buncher::cluster(elements,[1]*2)
93
100
  dump(new_centers)
94
101
  assert_equal(new_centers.size,1)
95
102
  end
103
+
104
+ # WWW=<<-'EOS'
105
+
96
106
  def test_choose_centers_wrapper2
97
107
  elements = init_data(100,3)
98
- new_centers = Buncher::choose_centers(elements, 3)
108
+ new_centers = Buncher::choose_centers(elements, [1]*2, 3)
99
109
  plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
100
110
  dump(new_centers)
101
111
  `open /tmp/kmeans_initial_#{new_centers.size}.png`
102
112
  assert_equal(new_centers.size,3)
103
113
  end
104
- EOS
105
114
 
106
115
  def test_gaussian_distribution_of_100_points_in_3_clusters
107
116
  12.times do |run|
@@ -109,7 +118,7 @@ EOS
109
118
  elements = init_data(100,3)
110
119
  # elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
111
120
  # new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
112
- new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers|
121
+ new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
113
122
  plot("/tmp/#{run}_centers_#{centers.size}.png",elements,centers, initial_centers)
114
123
  }
115
124
  puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
@@ -118,4 +127,5 @@ EOS
118
127
  assert_equal(3,new_centers.size)
119
128
  end
120
129
  end
130
+ # EOS
121
131
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buncher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-04-24 00:00:00.000000000 Z
12
+ date: 2015-04-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest