davidrichards-kmeans 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +33 -0
- data/VERSION.yml +4 -0
- data/bin/kmeans +30 -0
- data/lib/ext/enumerable.rb +57 -0
- data/lib/ext/object.rb +16 -0
- data/lib/include_kmeans.rb +2 -0
- data/lib/kmeans.rb +6 -0
- data/lib/kmeans/agent.rb +83 -0
- data/lib/kmeans/centroid.rb +45 -0
- data/lib/kmeans/node.rb +126 -0
- data/spec/ext/enumerable_spec.rb +45 -0
- data/spec/ext/object_spec.rb +23 -0
- data/spec/kmeans/agent_spec.rb +30 -0
- data/spec/kmeans/centroid_spec.rb +56 -0
- data/spec/kmeans/node_spec.rb +132 -0
- data/spec/kmeans_spec.rb +7 -0
- data/spec/spec_helper.rb +14 -0
- metadata +83 -0
data/README.rdoc
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
== KMeans
|
|
2
|
+
|
|
3
|
+
This is another KMeans implementation. I have a few things that I need to work on:
|
|
4
|
+
|
|
5
|
+
* Extract the NodeList information out of the Node to make it more flexible
|
|
6
|
+
* Work on an online version of this code
|
|
7
|
+
|
|
8
|
+
The basic idea is that I wanted to solve a problem for a client, but I didn't want to throw the code away. So you get this.
|
|
9
|
+
|
|
10
|
+
==Usage
|
|
11
|
+
|
|
12
|
+
For now, a very quick demonstration. This creates a one-dimensional node list with number ranges between 1 and 3, 5, and 6 through 10.
|
|
13
|
+
|
|
14
|
+
@node_list = (1..100).map { while (n = rand(10) + 1) == 4 or n == 6; end; n}
|
|
15
|
+
@kmeans = KMeans::Agent.call(:k => 3, *@node_list)
|
|
16
|
+
puts @kmeans.centroids.inspect
|
|
17
|
+
# => [KMeans::Centroid:[223/26], KMeans::Centroid:[37/17], KMeans::Centroid:[5]]
|
|
18
|
+
@kmeans.centroids.map {|c| c.position.first.to_f}
|
|
19
|
+
# => [8.57692307692308, 2.17647058823529, 5.0]
|
|
20
|
+
|
|
21
|
+
I'll put a more interesting example together at another time.
|
|
22
|
+
|
|
23
|
+
==Installation
|
|
24
|
+
|
|
25
|
+
sudo gem install davidrichards-kmeans
|
|
26
|
+
|
|
27
|
+
=== Dependencies
|
|
28
|
+
|
|
29
|
+
* TeguGears
|
|
30
|
+
|
|
31
|
+
==COPYRIGHT
|
|
32
|
+
|
|
33
|
+
Copyright (c) 2009 David Richards. See LICENSE for details.
|
data/VERSION.yml
ADDED
data/bin/kmeans
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
|
2
|
+
require 'yaml'
|
|
3
|
+
|
|
4
|
+
version_hash = YAML.load_file(File.join(File.dirname(__FILE__), %w(.. VERSION.yml)))
|
|
5
|
+
version = [version_hash[:major].to_s, version_hash[:minor].to_s, version_hash[:patch].to_s].join(".")
|
|
6
|
+
kmeans_file = File.join(File.dirname(__FILE__), %w(.. lib kmeans))
|
|
7
|
+
config_file = File.join(File.dirname(__FILE__), %w(.. lib include_kmeans))
|
|
8
|
+
|
|
9
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
|
10
|
+
|
|
11
|
+
require 'optparse'
|
|
12
|
+
options = {:irb => 'irb', :r => ''}
|
|
13
|
+
OptionParser.new do |opt|
|
|
14
|
+
opt.banner = "Usage: console [environment] [options]"
|
|
15
|
+
opt.on("--irb=[#{irb}]", 'Invoke a different irb.') { |v| options[:irb] = v }
|
|
16
|
+
opt.on("-r=file", 'Require other files') do |v|
|
|
17
|
+
options[:r] ||= []
|
|
18
|
+
options[:r] << v
|
|
19
|
+
end
|
|
20
|
+
opt.parse!(ARGV)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
libs = " -r irb/completion -r #{kmeans_file} -r #{config_file}"
|
|
24
|
+
r = options[:r]
|
|
25
|
+
r ||= []
|
|
26
|
+
r.each {|f| libs << " -r #{f}"}
|
|
27
|
+
|
|
28
|
+
puts "Loading KMeans (#{version})"
|
|
29
|
+
|
|
30
|
+
exec "#{options[:irb]} #{libs} --simple-prompt"
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
module Enumerable
|
|
2
|
+
def map_with_index(&block)
|
|
3
|
+
val = []
|
|
4
|
+
self.each_with_index { |e, i| val << yield(e, i) }
|
|
5
|
+
val
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def map_with_index!(&block)
|
|
9
|
+
self.each_with_index do |e, i|
|
|
10
|
+
val = yield(e, i)
|
|
11
|
+
self[i] = val
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Expects an array of scalars
|
|
16
|
+
def scale_with(ary)
|
|
17
|
+
val = []
|
|
18
|
+
self.map_with_index { |e, i| val << e * ary[i] }
|
|
19
|
+
val
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def scale_with!(ary)
|
|
23
|
+
self.map_with_index! { |e, i| e * ary[i] }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def sum
|
|
27
|
+
val = any? {|e| e.is_a?(Float)} ? 0.0 : 0
|
|
28
|
+
self.inject(val) {|s, e| s += e}
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def average
|
|
32
|
+
sum/size
|
|
33
|
+
end
|
|
34
|
+
alias :mean :average
|
|
35
|
+
alias :avg :average
|
|
36
|
+
|
|
37
|
+
# Returns the position (or first position) of the minimal value. So,
|
|
38
|
+
# [3,2,1,4,5,0].min_position is 5
|
|
39
|
+
def min_position
|
|
40
|
+
mp = [nil, nil]
|
|
41
|
+
each_with_index do |e, i|
|
|
42
|
+
mp = e, i unless mp.first
|
|
43
|
+
mp = e, i if e < mp.first
|
|
44
|
+
end
|
|
45
|
+
mp.last
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def max_position
|
|
49
|
+
mp = [nil, nil]
|
|
50
|
+
each_with_index do |e, i|
|
|
51
|
+
mp = e, i unless mp.first
|
|
52
|
+
mp = e, i if e > mp.first
|
|
53
|
+
end
|
|
54
|
+
mp.last
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
data/lib/ext/object.rb
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Object
|
|
2
|
+
|
|
3
|
+
# Simpler way to handle a random number between to values
|
|
4
|
+
def rand_between(a, b)
|
|
5
|
+
return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
|
|
6
|
+
range = (a - b).abs + 1
|
|
7
|
+
rand(range) + [a,b].min
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Handles non-integers
|
|
11
|
+
def rand_in_floats(a, b)
|
|
12
|
+
range = (a - b).abs
|
|
13
|
+
(rand * range) + [a,b].min
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
end
|
data/lib/kmeans.rb
ADDED
data/lib/kmeans/agent.rb
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
module KMeans #:nodoc:
|
|
2
|
+
class Agent
|
|
3
|
+
|
|
4
|
+
include TeguGears
|
|
5
|
+
|
|
6
|
+
class << self
|
|
7
|
+
# Only works if the agent was processed as an online algorithm
|
|
8
|
+
def rebase(*node_list)
|
|
9
|
+
return false unless @@agent
|
|
10
|
+
@@agent.rebase(*node_list)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# The number of clusters we're after
|
|
15
|
+
attr_reader :k
|
|
16
|
+
|
|
17
|
+
# Whether we're interested in keeping the results after processing. To re-process:
|
|
18
|
+
# Agent.rebase(*new_node_list)
|
|
19
|
+
attr_reader :online
|
|
20
|
+
|
|
21
|
+
# The centroids used for the clustering
|
|
22
|
+
attr_reader :centroids
|
|
23
|
+
|
|
24
|
+
# All the affectd nodes
|
|
25
|
+
def nodes
|
|
26
|
+
Node.nodes
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Example:
|
|
30
|
+
# KMeans::Agent.call(3, [1,2,1], [2,1,3], ...)
|
|
31
|
+
# KMeans::Agent.call(:k => 3, :centroids => [[1,2,3],[2,3,4],[3,4,2]], [1,2,1], [2,1,3], ...)
|
|
32
|
+
def process(opts={}, *node_list)
|
|
33
|
+
|
|
34
|
+
unless self.online
|
|
35
|
+
Node.clear_nodes!
|
|
36
|
+
@centroids = nil
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@scaling = opts.fetch(:scaling, false) if opts.is_a?(Hash)
|
|
41
|
+
Node.add_nodes(*node_list)
|
|
42
|
+
|
|
43
|
+
if opts.is_a?(Hash)
|
|
44
|
+
@k = opts[:k]
|
|
45
|
+
@centroids = opts.fetch(:centroids, false)
|
|
46
|
+
@online = opts.fetch(:online, false)
|
|
47
|
+
else
|
|
48
|
+
@k = opts
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
stabilize_centroids
|
|
52
|
+
@@agent = self if self.online
|
|
53
|
+
self
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def rebase(*node_list)
|
|
58
|
+
Node.add_nodes(*node_list)
|
|
59
|
+
stabilize_centroids
|
|
60
|
+
self
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
protected
|
|
64
|
+
|
|
65
|
+
# This is the core algorithm: assign nodes to centroids, rebalance
|
|
66
|
+
# centroids, repeat until no new assignment is necessary.
|
|
67
|
+
def stabilize_centroids
|
|
68
|
+
@centroids ||= infer_centroids(@k)
|
|
69
|
+
n = Node.cluster_to(@centroids)
|
|
70
|
+
while n > 0
|
|
71
|
+
@centroids.each { |c| c.rebalance }
|
|
72
|
+
n = Node.cluster_to(@centroids)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def infer_centroids(k)
|
|
77
|
+
(1..k).map do
|
|
78
|
+
Node.random_centroid
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
module KMeans #:nodoc:
|
|
2
|
+
class Centroid
|
|
3
|
+
|
|
4
|
+
# The position of the centroid, or a value for every dimension
|
|
5
|
+
attr_reader :position
|
|
6
|
+
|
|
7
|
+
def initialize(position)
|
|
8
|
+
@position = position
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def nodes
|
|
12
|
+
@nodes ||= []
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def add_nodes(*new_nodes)
|
|
16
|
+
new_nodes.each do |node|
|
|
17
|
+
self.nodes << node
|
|
18
|
+
node.centroid = self
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
alias :add_node :add_nodes
|
|
22
|
+
|
|
23
|
+
def remove_nodes(*nodes)
|
|
24
|
+
nodes.each do |node|
|
|
25
|
+
self.nodes.delete(node)
|
|
26
|
+
node.centroid = nil
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
alias :remove_node :remove_nodes
|
|
30
|
+
|
|
31
|
+
# Finds a new centroid based on the nodes currently attached to it
|
|
32
|
+
def rebalance
|
|
33
|
+
return true if nodes.empty?
|
|
34
|
+
size = nodes.first. position.size
|
|
35
|
+
@position = (0...size).map do |i|
|
|
36
|
+
self.nodes.map { |e| e. position[i] }.mean
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def inspect
|
|
41
|
+
"KMeans::Centroid:#{self.position.inspect}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/kmeans/node.rb
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
module KMeans #:nodoc:
|
|
2
|
+
class Node
|
|
3
|
+
class << self
|
|
4
|
+
|
|
5
|
+
# Clears out all nodes. Don't do this while working on a specific
|
|
6
|
+
# problem, but in-between problems. This nodes-on-the-Node stuff needs
|
|
7
|
+
# to be adjusted when I have some time.
|
|
8
|
+
def clear_nodes!
|
|
9
|
+
self.nodes.clear
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Returns the number of re-assignments made
|
|
13
|
+
def cluster_to(centroids, scaling=nil)
|
|
14
|
+
n = 0
|
|
15
|
+
self.nodes.each do |node|
|
|
16
|
+
position = centroids.map { |centroid| node.distance(centroid, scaling) }.min_position
|
|
17
|
+
target_centroid = centroids[position]
|
|
18
|
+
next if target_centroid == node.centroid
|
|
19
|
+
node.move_to(target_centroid)
|
|
20
|
+
n += 1
|
|
21
|
+
end
|
|
22
|
+
n
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Gets the max and min on every dimension for every node
|
|
26
|
+
def find_boundaries
|
|
27
|
+
return nil if self.nodes.empty?
|
|
28
|
+
return @@boundaries if @@boundaries
|
|
29
|
+
# Building this long-hand for good reason...
|
|
30
|
+
dimensional_array = []
|
|
31
|
+
self.dimension_size.times {dimensional_array << [nil, nil]}
|
|
32
|
+
@@boundaries = self.nodes.inject(dimensional_array) do |list, node|
|
|
33
|
+
node.position.each_with_index do |dimension, i|
|
|
34
|
+
list[i][0] ||= dimension
|
|
35
|
+
list[i][1] ||= dimension
|
|
36
|
+
list[i][0] = dimension if dimension < list[i][0]
|
|
37
|
+
list[i][1] = dimension if dimension > list[i][1]
|
|
38
|
+
end
|
|
39
|
+
list
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
alias :boundaries :find_boundaries
|
|
43
|
+
|
|
44
|
+
# A list of nodes initialized
|
|
45
|
+
def nodes
|
|
46
|
+
@@nodes ||= []
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def dimension_size
|
|
50
|
+
@@dimension_size
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def dimension_size=(val)
|
|
54
|
+
@@dimension_size = val
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Instantiates the node object and adds them to the list. Example:
|
|
58
|
+
# Node.add_nodes [1,2,3], [4,5,3], [2,1,3]
|
|
59
|
+
# Node.add_nodes *node_list
|
|
60
|
+
def add_nodes(*nodes)
|
|
61
|
+
nodes.each do |node|
|
|
62
|
+
node = new(*node)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Adds a node, clears the cache, asserts that the parameter is a node.
|
|
67
|
+
def add_node(node)
|
|
68
|
+
node = new(*node) unless node.is_a?(Node)
|
|
69
|
+
self.dimension_size = node.position.size if self.nodes.empty?
|
|
70
|
+
raise ArgumentError, "Node does not have the right number of positions" unless
|
|
71
|
+
node.position.size == self.dimension_size
|
|
72
|
+
self.nodes << node
|
|
73
|
+
@@boundaries = nil
|
|
74
|
+
node
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# A centroid that fits between the boundaries on each dimension. The
|
|
78
|
+
# boundaries for a 3-dimensional model might look like:
|
|
79
|
+
# [[1,5], [0,10], [1,100]]
|
|
80
|
+
# This means that the first dimension can be between 1 and 5, the second
|
|
81
|
+
# between 0 and 10, and the third between 1 and 100.
|
|
82
|
+
def random_centroid
|
|
83
|
+
position = (0...self.dimension_size).inject([]) do |list, i|
|
|
84
|
+
list << rand_between(self.boundaries[i].first, self.boundaries[i].last)
|
|
85
|
+
end
|
|
86
|
+
Centroid.new(position)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# The position on every dimension
|
|
92
|
+
attr_reader :position
|
|
93
|
+
|
|
94
|
+
# Creates a node based only on the position. The dimension
|
|
95
|
+
# cardinality is enforced on the class level (I.e., all nodes must have
|
|
96
|
+
# the same number of position.)
|
|
97
|
+
def initialize(*position)
|
|
98
|
+
@position = position
|
|
99
|
+
Node.add_node(self)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Records which centroid the node is assigned to
|
|
103
|
+
attr_accessor :centroid
|
|
104
|
+
|
|
105
|
+
def distance(centroid=nil, scaling=nil)
|
|
106
|
+
centroid ||= self.centroid
|
|
107
|
+
op = centroid.position
|
|
108
|
+
map = self.position.map_with_index do |e, i|
|
|
109
|
+
if scaling
|
|
110
|
+
(
|
|
111
|
+
(op[i] * scaling[i]) -
|
|
112
|
+
(e * scaling[i])
|
|
113
|
+
) ** 2
|
|
114
|
+
else
|
|
115
|
+
(op[i] - e) ** 2
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
Math.sqrt(map.sum)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def move_to(centroid)
|
|
122
|
+
self.centroid.remove_node(self) if self.centroid
|
|
123
|
+
centroid.add_node(self)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
|
2
|
+
|
|
3
|
+
describe Enumerable do
|
|
4
|
+
|
|
5
|
+
before do
|
|
6
|
+
@a = [1,2,3]
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "should have a map_with_index" do
|
|
10
|
+
@a.map_with_index {|e, i| i * 2}.should eql([0, 2, 4])
|
|
11
|
+
@a.should eql([1,2,3])
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "should have a destructive map with index" do
|
|
15
|
+
@a.map_with_index! {|e, i| i * 2}.should eql([0, 2, 4])
|
|
16
|
+
@a.should eql([0,2,4])
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "should be able to scale an array by another array" do
|
|
20
|
+
@a.scale_with([2,2,2]).should eql([2,4,6])
|
|
21
|
+
@a.should eql([1,2,3])
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
it "should have a dextructive scale with method" do
|
|
25
|
+
@a.scale_with!([2,2,2]).should eql([2,4,6])
|
|
26
|
+
@a.should eql([2,4,6])
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "should be able to do a simple sum" do
|
|
30
|
+
@a.sum.should eql(6)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "should be able to calculate the mean" do
|
|
34
|
+
@a.mean.should eql(2)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "should know the position of the minimum value" do
|
|
38
|
+
@a.min_position.should eql(0)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it "should know the position of the maximum value" do
|
|
42
|
+
@a.max_position.should eql(2)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
|
2
|
+
|
|
3
|
+
describe Object do
|
|
4
|
+
it "should be able to generate a random number between two integers" do
|
|
5
|
+
val = (1..100).map {rand_between(1,10)}
|
|
6
|
+
(val.min >= 1).should be_true
|
|
7
|
+
(val.max <= 10).should be_true
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it "should be able to generate a random number between two floats" do
|
|
11
|
+
val = (1..100).map {rand_in_floats(1.0,10.0)}
|
|
12
|
+
(val.min >= 1).should be_true
|
|
13
|
+
(val.max <= 10).should be_true
|
|
14
|
+
val.all? {|v| v.should be_is_a(Float)}
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "should be able to work with floats from rand_between" do
|
|
18
|
+
val = (1..100).map {rand_between(1.0,10.0)}
|
|
19
|
+
(val.min >= 1).should be_true
|
|
20
|
+
(val.max <= 10).should be_true
|
|
21
|
+
val.all? {|v| v.should be_is_a(Float)}
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
|
2
|
+
|
|
3
|
+
describe Agent do
|
|
4
|
+
before(:all) do
|
|
5
|
+
@node_list = [[1,1,1], [2,2,2], [3,3,3], [7,7,7], [8,8,8],[9,9,9]]
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
it "should be able to process things simply" do
|
|
9
|
+
agent = Agent.process(:k => 2, *@node_list)
|
|
10
|
+
centroid_map = agent.centroids.map {|c| c.position}
|
|
11
|
+
centroid_map.should be_include([2,2,2])
|
|
12
|
+
centroid_map.should be_include([8,8,8])
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "should be able to assume the first parameter is the k parameter in a simple usage situation" do
|
|
16
|
+
agent = Agent.process(2, *@node_list)
|
|
17
|
+
centroid_map = agent.centroids.map {|c| c.position}
|
|
18
|
+
centroid_map.should be_include([2,2,2])
|
|
19
|
+
centroid_map.should be_include([8,8,8])
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "should allow centroids to be passed in to simplify processing" do
|
|
23
|
+
c1 = Centroid.new([4,4,4])
|
|
24
|
+
c2 = Centroid.new([5,5,5])
|
|
25
|
+
agent = Agent.process(:k => 2, :centroids => [c1, c2], *@node_list)
|
|
26
|
+
c1.position.should eql([2,2,2])
|
|
27
|
+
c2.position.should eql([8,8,8])
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
|
2
|
+
|
|
3
|
+
describe Centroid do
|
|
4
|
+
|
|
5
|
+
before do
|
|
6
|
+
@c = Centroid.new([1,2,3])
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "should initialize with a position list" do
|
|
10
|
+
c = Centroid.new([1,2,3])
|
|
11
|
+
c.position.should eql([1,2,3])
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "should have node storage" do
|
|
15
|
+
@c.nodes.should be_is_a(Array)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "should be able to add a node, to the centroid and the node (an optimization decision)" do
|
|
19
|
+
@n = new_node
|
|
20
|
+
@c.add_node(@n)
|
|
21
|
+
@n.centroid.should eql(@c)
|
|
22
|
+
@c.nodes.should be_include(@n)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "should be able to add many nodes at a time" do
|
|
26
|
+
@c.add_nodes(new_node, new_node, new_node)
|
|
27
|
+
@c.nodes.size.should eql(3)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "should be able to remove a node from the centroid and the node" do
|
|
31
|
+
@n = new_node
|
|
32
|
+
@c.add_node(@n)
|
|
33
|
+
@c.remove_node(@n)
|
|
34
|
+
@c.nodes.should_not be_include(@n)
|
|
35
|
+
@n.centroid.should be_nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it "should be able to remove a list of nodes" do
|
|
39
|
+
n1, n2, n3 = new_node, new_node, new_node
|
|
40
|
+
@c.add_node(n1, n2, n3)
|
|
41
|
+
@c.nodes.should eql([n1, n2, n3])
|
|
42
|
+
@c.remove_nodes(n2, n3)
|
|
43
|
+
@c.nodes.should eql([n1])
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "should be able to rebalance the center of the centroid, based on the nodes" do
|
|
47
|
+
n1 = Node.new(1,2,3)
|
|
48
|
+
@c.add_node(n1)
|
|
49
|
+
@c.rebalance
|
|
50
|
+
@c.position.should eql([1,2,3])
|
|
51
|
+
n2 = Node.new(2,4,6)
|
|
52
|
+
@c.add_node(n2)
|
|
53
|
+
@c.rebalance
|
|
54
|
+
@c.position.should eql([3/2, 6/2, 9/2])
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
|
2
|
+
|
|
3
|
+
describe Node do
|
|
4
|
+
|
|
5
|
+
before do
|
|
6
|
+
Node.clear_nodes!
|
|
7
|
+
@@boundaries = nil
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
context "instance methods" do
|
|
11
|
+
before do
|
|
12
|
+
@node = Node.new(1,2,3)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "should record the position at initialization" do
|
|
16
|
+
@node.position.should eql([1,2,3])
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "should know about all new nodes on the class level" do
|
|
20
|
+
Node.nodes.should be_include(@node)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "should have a centroid accessor" do
|
|
24
|
+
@node.centroid = :whatever
|
|
25
|
+
@node.centroid.should eql(:whatever)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# square root of 1^2 + 2^2 + 3^2 == square root of 14
|
|
29
|
+
it "should be able to calculate the euclidian distance between a node and its centroid" do
|
|
30
|
+
@c = Centroid.new([0,0,0])
|
|
31
|
+
@c.add_node(@node)
|
|
32
|
+
@node.distance.should eql(Math.sqrt(14))
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it "should be able to scale the distance, so that some dimensions count more than others" do
|
|
36
|
+
@c = Centroid.new([0,0,0])
|
|
37
|
+
@node.distance(@c, [3,1,1]).should be_close(4.69, 0.001)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "should be moveable from one centroid to another" do
|
|
41
|
+
c1 = Centroid.new([0,0,0])
|
|
42
|
+
c2 = Centroid.new([1,1,1])
|
|
43
|
+
c1.add_node(@node)
|
|
44
|
+
@node.move_to(c2)
|
|
45
|
+
c1.nodes.should_not be_include(@node)
|
|
46
|
+
c2.nodes.should be_include(@node)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "should be able to clear all existing nodes for pseudo flexibility" do
|
|
50
|
+
n1 = Node.new(1,2,3)
|
|
51
|
+
Node.nodes.should be_include(n1)
|
|
52
|
+
Node.clear_nodes!
|
|
53
|
+
Node.nodes.should be_empty
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "should be able to supply the max and min boundaries for each dimension" do
|
|
59
|
+
n1 = Node.new 1
|
|
60
|
+
n2 = Node.new 5
|
|
61
|
+
Node.boundaries.should eql([[1,5]])
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "should be able to find boundaries with the right cardinality" do
|
|
65
|
+
Node.new(1,2,3)
|
|
66
|
+
Node.boundaries.should_not be_nil
|
|
67
|
+
Node.send(:class_variable_get, :@@boundaries).should eql([[1,1], [2,2], [3,3]])
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "should be able to generate a random centroid within the limits of all the nodes already created" do
|
|
71
|
+
n1 = Node.new 1
|
|
72
|
+
n2 = Node.new 5
|
|
73
|
+
100.times do
|
|
74
|
+
Node.random_centroid.should be_is_a(Centroid)
|
|
75
|
+
(Node.random_centroid.position.first >= 1).should be_true
|
|
76
|
+
(Node.random_centroid.position.first <= 5).should be_true
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it "should be able to add a node with just the dimensions" do
|
|
81
|
+
Node.clear_nodes!
|
|
82
|
+
Node.add_node([1,2,3])
|
|
83
|
+
Node.nodes.first.position.should eql([1,2,3])
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it "should keep the cardinality of the dimensions consitent" do
|
|
87
|
+
Node.new(1,2,3)
|
|
88
|
+
lambda{Node.new(1,2)}.should raise_error(ArgumentError, 'Node does not have the right number of positions')
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it "should reset the boundaries any time a new node is added" do
|
|
92
|
+
Node.new(1,2,3)
|
|
93
|
+
Node.find_boundaries
|
|
94
|
+
Node.send(:class_variable_get, :@@boundaries).should eql([[1,1], [2,2], [3,3]])
|
|
95
|
+
Node.new(2,2,2)
|
|
96
|
+
Node.send(:class_variable_get, :@@boundaries).should be_nil
|
|
97
|
+
Node.find_boundaries
|
|
98
|
+
Node.send(:class_variable_get, :@@boundaries).should eql([[1,2], [2,2], [2,3]])
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it "should be able to add many nodes at a time" do
|
|
102
|
+
Node.add_nodes [1,2,3], [4,5,3], [2,1,3]
|
|
103
|
+
Node.nodes.map{|n| n.position}.should eql([[1,2,3], [4,5,3], [2,1,3]])
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it "should know the dimension size" do
|
|
107
|
+
Node.add_node [1,2,3]
|
|
108
|
+
Node.dimension_size.should eql(3)
|
|
109
|
+
Node.clear_nodes!
|
|
110
|
+
Node.add_node [1,2]
|
|
111
|
+
Node.dimension_size.should eql(2)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it "should be able to cluster the nodes" do
|
|
115
|
+
n1 = Node.new 1,1,1
|
|
116
|
+
n2 = Node.new 2,2,2
|
|
117
|
+
n3 = Node.new 3,3,3
|
|
118
|
+
n4 = Node.new 7,7,7
|
|
119
|
+
n5 = Node.new 8,8,8
|
|
120
|
+
n6 = Node.new 9,9,9
|
|
121
|
+
c1 = Centroid.new([2,2,2])
|
|
122
|
+
c2 = Centroid.new([8,8,8])
|
|
123
|
+
Node.cluster_to([c1, c2])
|
|
124
|
+
c1.nodes.should be_include(n1)
|
|
125
|
+
c1.nodes.should be_include(n2)
|
|
126
|
+
c1.nodes.should be_include(n3)
|
|
127
|
+
c2.nodes.should be_include(n4)
|
|
128
|
+
c2.nodes.should be_include(n5)
|
|
129
|
+
c2.nodes.should be_include(n6)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
end
|
data/spec/kmeans_spec.rb
ADDED
data/spec/spec_helper.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: davidrichards-kmeans
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.3
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- David Richards
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
|
|
12
|
+
date: 2009-07-22 00:00:00 -07:00
|
|
13
|
+
default_executable: kmeans
|
|
14
|
+
dependencies:
|
|
15
|
+
- !ruby/object:Gem::Dependency
|
|
16
|
+
name: davidrichards-tegu_gears
|
|
17
|
+
type: :runtime
|
|
18
|
+
version_requirement:
|
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
20
|
+
requirements:
|
|
21
|
+
- - ">="
|
|
22
|
+
- !ruby/object:Gem::Version
|
|
23
|
+
version: "0"
|
|
24
|
+
version:
|
|
25
|
+
description: A simple KMeans algorithm
|
|
26
|
+
email: davidlamontrichards@gmail.com
|
|
27
|
+
executables:
|
|
28
|
+
- kmeans
|
|
29
|
+
extensions: []
|
|
30
|
+
|
|
31
|
+
extra_rdoc_files: []
|
|
32
|
+
|
|
33
|
+
files:
|
|
34
|
+
- README.rdoc
|
|
35
|
+
- VERSION.yml
|
|
36
|
+
- bin/kmeans
|
|
37
|
+
- lib/ext
|
|
38
|
+
- lib/ext/enumerable.rb
|
|
39
|
+
- lib/ext/object.rb
|
|
40
|
+
- lib/include_kmeans.rb
|
|
41
|
+
- lib/kmeans
|
|
42
|
+
- lib/kmeans/agent.rb
|
|
43
|
+
- lib/kmeans/centroid.rb
|
|
44
|
+
- lib/kmeans/node.rb
|
|
45
|
+
- lib/kmeans.rb
|
|
46
|
+
- spec/ext
|
|
47
|
+
- spec/ext/enumerable_spec.rb
|
|
48
|
+
- spec/ext/object_spec.rb
|
|
49
|
+
- spec/kmeans
|
|
50
|
+
- spec/kmeans/agent_spec.rb
|
|
51
|
+
- spec/kmeans/centroid_spec.rb
|
|
52
|
+
- spec/kmeans/node_spec.rb
|
|
53
|
+
- spec/kmeans_spec.rb
|
|
54
|
+
- spec/spec_helper.rb
|
|
55
|
+
has_rdoc: true
|
|
56
|
+
homepage: http://github.com/davidrichards/kmeans
|
|
57
|
+
post_install_message:
|
|
58
|
+
rdoc_options:
|
|
59
|
+
- --inline-source
|
|
60
|
+
- --charset=UTF-8
|
|
61
|
+
require_paths:
|
|
62
|
+
- lib
|
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: "0"
|
|
68
|
+
version:
|
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
|
+
requirements:
|
|
71
|
+
- - ">="
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
version: "0"
|
|
74
|
+
version:
|
|
75
|
+
requirements: []
|
|
76
|
+
|
|
77
|
+
rubyforge_project:
|
|
78
|
+
rubygems_version: 1.2.0
|
|
79
|
+
signing_key:
|
|
80
|
+
specification_version: 2
|
|
81
|
+
summary: KMeans for clustering
|
|
82
|
+
test_files: []
|
|
83
|
+
|