buncher 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/buncher.rb +8 -10
  2. data/test/test_buncher.rb +5 -5
  3. metadata +1 -1
data/lib/buncher.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'buncher/buncher'
2
2
  module Buncher
3
- VERSION = "1.0.2"
3
+ VERSION = "1.0.3"
4
4
  # your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
5
5
  class Cluster
6
6
  attr_accessor :elements
@@ -60,14 +60,13 @@ module Buncher
60
60
 
61
61
  # run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
62
62
  # http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
63
- def self.cluster(elements, weights)
64
- changed=true
65
- round=0
63
+ def self.cluster(elements, weights,options={})
66
64
  solutions={}
67
- # try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 25% .. ok, I did
65
+ min_size=options[:min_size] || 1
66
+ # try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 50% .. ok, I did
68
67
  # that.
69
68
  not_clustered = last_sK = last_aK =last_fK=nil
70
- max_clusters=[1,(elements.size/2).floor].max
69
+ max_clusters=[min_size,(elements.size/2).floor].max
71
70
  (1..max_clusters).each do |number_clusters|
72
71
  initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
73
72
  centers = initial_centers.map(&:dup)
@@ -76,13 +75,12 @@ module Buncher
76
75
  not_clustered ||=centers
77
76
  last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
78
77
  puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
79
- puts
80
- solutions[last_fK]=centers
78
+ solutions[last_fK]=centers if number_clusters >= min_size
81
79
  # break if number_clusters == 2 ## debugging
82
80
  end
83
- min_fK =solutions.keys.sort.first
81
+ min_fK =solutions.keys.sort.first || 1.0
84
82
  if min_fK > 0.85
85
- not_clustered # ie, not clustered at all
83
+ nil # ie, not clustered at all
86
84
  else
87
85
  solutions[min_fK]
88
86
  end
data/test/test_buncher.rb CHANGED
@@ -12,7 +12,7 @@ def init_data(number_points, number_clusters)
12
12
  elements=[]
13
13
  extra=0
14
14
  index=0
15
- seed=[[0,1],[0,0],[1,0]]
15
+ seed=[[0,1],[0,0],[1,0],[1,1]]
16
16
  number_clusters.times do
17
17
  # gens = [Rubystats::NormalDistribution.new(rand(0..1), 0.05), Rubystats::NormalDistribution.new(rand(0..1), 0.05)]
18
18
  gens = [Rubystats::NormalDistribution.new(seed[index][0], 0.05), Rubystats::NormalDistribution.new(seed[index][1], 0.05)]
@@ -94,11 +94,11 @@ class TestBuncher < Minitest::Test
94
94
  assert_equal(new_centers.size,3)
95
95
  end
96
96
  # EOS
97
- def test_should_find_one_cluster
98
- elements = [[1,1]]
99
- new_centers = Buncher::cluster(elements,[1]*2)
97
+ def test_min_size_works
98
+ elements = init_data(100,4)
99
+ new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
100
100
  dump(new_centers)
101
- assert_equal(new_centers.size,1)
101
+ assert_equal(new_centers.size,4)
102
102
  end
103
103
 
104
104
  # WWW=<<-'EOS'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buncher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: