buncher 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/buncher.rb +8 -10
  2. data/test/test_buncher.rb +5 -5
  3. metadata +1 -1
data/lib/buncher.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'buncher/buncher'
2
2
  module Buncher
3
- VERSION = "1.0.2"
3
+ VERSION = "1.0.3"
4
4
  # your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
5
5
  class Cluster
6
6
  attr_accessor :elements
@@ -60,14 +60,13 @@ module Buncher
60
60
 
61
61
  # run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
62
62
  # http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
63
- def self.cluster(elements, weights)
64
- changed=true
65
- round=0
63
+ def self.cluster(elements, weights,options={})
66
64
  solutions={}
67
- # try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 25% .. ok, I did
65
+ min_size=options[:min_size] || 1
66
+ # try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 50% .. ok, I did
68
67
  # that.
69
68
  not_clustered = last_sK = last_aK =last_fK=nil
70
- max_clusters=[1,(elements.size/2).floor].max
69
+ max_clusters=[min_size,(elements.size/2).floor].max
71
70
  (1..max_clusters).each do |number_clusters|
72
71
  initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
73
72
  centers = initial_centers.map(&:dup)
@@ -76,13 +75,12 @@ module Buncher
76
75
  not_clustered ||=centers
77
76
  last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
78
77
  puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
79
- puts
80
- solutions[last_fK]=centers
78
+ solutions[last_fK]=centers if number_clusters >= min_size
81
79
  # break if number_clusters == 2 ## debugging
82
80
  end
83
- min_fK =solutions.keys.sort.first
81
+ min_fK =solutions.keys.sort.first || 1.0
84
82
  if min_fK > 0.85
85
- not_clustered # ie, not clustered at all
83
+ nil # ie, not clustered at all
86
84
  else
87
85
  solutions[min_fK]
88
86
  end
data/test/test_buncher.rb CHANGED
@@ -12,7 +12,7 @@ def init_data(number_points, number_clusters)
12
12
  elements=[]
13
13
  extra=0
14
14
  index=0
15
- seed=[[0,1],[0,0],[1,0]]
15
+ seed=[[0,1],[0,0],[1,0],[1,1]]
16
16
  number_clusters.times do
17
17
  # gens = [Rubystats::NormalDistribution.new(rand(0..1), 0.05), Rubystats::NormalDistribution.new(rand(0..1), 0.05)]
18
18
  gens = [Rubystats::NormalDistribution.new(seed[index][0], 0.05), Rubystats::NormalDistribution.new(seed[index][1], 0.05)]
@@ -94,11 +94,11 @@ class TestBuncher < Minitest::Test
94
94
  assert_equal(new_centers.size,3)
95
95
  end
96
96
  # EOS
97
- def test_should_find_one_cluster
98
- elements = [[1,1]]
99
- new_centers = Buncher::cluster(elements,[1]*2)
97
+ def test_min_size_works
98
+ elements = init_data(100,4)
99
+ new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
100
100
  dump(new_centers)
101
- assert_equal(new_centers.size,1)
101
+ assert_equal(new_centers.size,4)
102
102
  end
103
103
 
104
104
  # WWW=<<-'EOS'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buncher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: