buncher 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  require 'buncher/buncher'
2
2
  module Buncher
3
- VERSION = "1.0.1"
3
+ VERSION = "1.0.2"
4
4
  # your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
5
5
  class Cluster
6
6
  attr_accessor :elements
@@ -46,9 +46,9 @@ module Buncher
46
46
  end
47
47
  end
48
48
 
49
- def self.fK(centers,last_sK, last_aK)
49
+ def self.fK(centers,last_sK, last_aK,weights)
50
50
  # from here - http://www.ee.columbia.edu/~dpwe/papers/PhamDN05-kmeans.pdf
51
- sK = centers.inject(0) {|acc, val| acc + val.distortion}
51
+ sK = centers.inject(0) {|acc, val| acc + val.distortion(weights)}
52
52
  aK = calc_aK(centers, last_aK) if centers.size > 1
53
53
  if centers.size == 1 || (last_sK||0).zero?
54
54
  [1,sK, aK || 0]
@@ -60,21 +60,21 @@ module Buncher
60
60
 
61
61
  # run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
62
62
  # http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
63
- def self.cluster(elements)
63
+ def self.cluster(elements, weights)
64
64
  changed=true
65
65
  round=0
66
66
  solutions={}
67
67
  # try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 25% .. ok, I did
68
68
  # that.
69
69
  not_clustered = last_sK = last_aK =last_fK=nil
70
- max_clusters=[1,elements.size].max
70
+ max_clusters=[1,(elements.size/2).floor].max
71
71
  (1..max_clusters).each do |number_clusters|
72
- initial_centers = choose_centers(elements, number_clusters) # C++ Native code
72
+ initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
73
73
  centers = initial_centers.map(&:dup)
74
- centers = kmeans(centers,elements) ## C++ Native code
74
+ centers = kmeans(centers,elements,weights) ## C++ Native code
75
75
  yield(elements, centers, initial_centers) if block_given?
76
76
  not_clustered ||=centers
77
- last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK)
77
+ last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
78
78
  puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
79
79
  puts
80
80
  solutions[last_fK]=centers
Binary file
@@ -64,11 +64,17 @@ def plot(file_name,points,centers, initial_centers=nil)
64
64
  end
65
65
 
66
66
  class TestBuncher < Minitest::Test
67
- WORKING=<<-'EOS'
67
+ # WORKING=<<-'EOS'
68
+ def test_distance
69
+ element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
70
+ element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
71
+ weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
72
+ assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
73
+ end
68
74
  def test_should_find_the_one_center
69
75
  elements = [[1,1]]
70
76
  starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
71
- new_centers = Buncher::kmeans(starting_centers, elements)
77
+ new_centers = Buncher::kmeans(starting_centers, elements, [1]*2)
72
78
  dump(new_centers)
73
79
  assert_in_delta(new_centers.first.center[0],1.0,0.01)
74
80
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
@@ -76,32 +82,35 @@ WORKING=<<-'EOS'
76
82
 
77
83
  def test_choose_centers_wrapper
78
84
  elements = [[1,1]]
79
- new_centers = Buncher::choose_centers(elements, 1)
85
+ new_centers = Buncher::choose_centers(elements,[1]*2, 1)
80
86
  dump(new_centers)
81
87
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
82
88
  assert_in_delta(new_centers.first.center[1],1.0,0.01)
83
89
  end
84
90
  def test_choose_centers_wrapper2
85
91
  elements = init_data(100,3)
86
- new_centers = Buncher::choose_centers(elements, 3)
92
+ new_centers = Buncher::choose_centers(elements, [1]*2, 3)
87
93
  dump(new_centers)
88
94
  assert_equal(new_centers.size,3)
89
95
  end
96
+ # EOS
90
97
  def test_should_find_one_cluster
91
98
  elements = [[1,1]]
92
- new_centers = Buncher::cluster(elements)
99
+ new_centers = Buncher::cluster(elements,[1]*2)
93
100
  dump(new_centers)
94
101
  assert_equal(new_centers.size,1)
95
102
  end
103
+
104
+ # WWW=<<-'EOS'
105
+
96
106
  def test_choose_centers_wrapper2
97
107
  elements = init_data(100,3)
98
- new_centers = Buncher::choose_centers(elements, 3)
108
+ new_centers = Buncher::choose_centers(elements, [1]*2, 3)
99
109
  plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
100
110
  dump(new_centers)
101
111
  `open /tmp/kmeans_initial_#{new_centers.size}.png`
102
112
  assert_equal(new_centers.size,3)
103
113
  end
104
- EOS
105
114
 
106
115
  def test_gaussian_distribution_of_100_points_in_3_clusters
107
116
  12.times do |run|
@@ -109,7 +118,7 @@ EOS
109
118
  elements = init_data(100,3)
110
119
  # elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
111
120
  # new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
112
- new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers|
121
+ new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
113
122
  plot("/tmp/#{run}_centers_#{centers.size}.png",elements,centers, initial_centers)
114
123
  }
115
124
  puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
@@ -118,4 +127,5 @@ EOS
118
127
  assert_equal(3,new_centers.size)
119
128
  end
120
129
  end
130
+ # EOS
121
131
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buncher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-04-24 00:00:00.000000000 Z
12
+ date: 2015-04-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest