k_means 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +5 -0
- data/LICENSE +20 -0
- data/README.rdoc +50 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/benchmark/benchmark_ai4r.rb +27 -0
- data/k_means.gemspec +65 -0
- data/lib/centroid.rb +49 -0
- data/lib/ext/enumerable.rb +10 -0
- data/lib/ext/object.rb +16 -0
- data/lib/k_means.rb +78 -0
- data/lib/node.rb +44 -0
- data/profiling/profile.rb +12 -0
- data/test/ext/test_enumerable.rb +11 -0
- data/test/ext/test_object.rb +18 -0
- data/test/helper.rb +13 -0
- data/test/test_centroid.rb +45 -0
- data/test/test_k_means.rb +24 -0
- data/test/test_node.rb +43 -0
- metadata +80 -0
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 reddavis
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
= KMeans
|
2
|
+
|
3
|
+
Attempting to build a fast, memory efficient K-Means program.
|
4
|
+
|
5
|
+
== Install
|
6
|
+
|
7
|
+
gem sources -a http://gems.github.com
|
8
|
+
sudo gem install reddavis-k_means
|
9
|
+
|
10
|
+
== How To Use
|
11
|
+
require 'rubygems'
|
12
|
+
require 'k_means'
|
13
|
+
|
14
|
+
data = [[1,1], [1,2], [1,1], [1000, 1000], [500, 500]]
|
15
|
+
kmeans = KMeans.new(@data, :centroids => 2)
|
16
|
+
kmeans.inspect # Use kmeans.view to get hold of the un-inspected array
|
17
|
+
=> [[3, 4], [0, 1, 2]]
|
18
|
+
|
19
|
+
== Benchmarks
|
20
|
+
|
21
|
+
# 1000 records with 50 dimensions
|
22
|
+
data = Array.new(1000) {Array.new(50) {rand(10)}}
|
23
|
+
ai4r_data = Ai4r::Data::DataSet.new(:data_items=> data)
|
24
|
+
|
25
|
+
# Clustering can happen in magical ways
|
26
|
+
# so lets do it over multiple times
|
27
|
+
n = 5
|
28
|
+
|
29
|
+
Benchmark.bm do |x|
|
30
|
+
x.report('KMeans') do
|
31
|
+
n.times { KMeans.new(data) }
|
32
|
+
end
|
33
|
+
x.report("Ai4R") do
|
34
|
+
n.times do
|
35
|
+
b = Ai4r::Clusterers::KMeans.new
|
36
|
+
b.build(ai4r_data, 4)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
user system total real
|
41
|
+
KMeans 15.960000 0.030000 15.990000 ( 16.062639)
|
42
|
+
Ai4R 70.230000 0.180000 70.410000 ( 70.704843)
|
43
|
+
|
44
|
+
== Thanks
|
45
|
+
|
46
|
+
* David Richards - For his code reviews and all round helpfulness. - http://github.com/davidrichards
|
47
|
+
|
48
|
+
== Copyright
|
49
|
+
|
50
|
+
Copyright (c) 2009 Red Davis. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "k_means"
|
8
|
+
gem.summary = %Q{K Means algorithm}
|
9
|
+
gem.description = %Q{Attempting to create a fast, memory efficient KMeans}
|
10
|
+
gem.email = "reddavis@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/reddavis/k_means"
|
12
|
+
gem.authors = ["reddavis"]
|
13
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rake/testtask'
|
21
|
+
Rake::TestTask.new(:test) do |test|
|
22
|
+
test.libs << 'lib' << 'test'
|
23
|
+
test.pattern = 'test/**/test_*.rb'
|
24
|
+
test.verbose = true
|
25
|
+
end
|
26
|
+
|
27
|
+
begin
|
28
|
+
require 'rcov/rcovtask'
|
29
|
+
Rcov::RcovTask.new do |test|
|
30
|
+
test.libs << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
rescue LoadError
|
35
|
+
task :rcov do
|
36
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
if File.exist?('VERSION')
|
48
|
+
version = File.read('VERSION')
|
49
|
+
else
|
50
|
+
version = ""
|
51
|
+
end
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "k_means #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'benchmarker'
|
4
|
+
require 'ai4r'
|
5
|
+
require File.dirname(__FILE__) + '/../lib/k_means'
|
6
|
+
|
7
|
+
data = Array.new(500) {Array.new(50) {rand(10)}}
|
8
|
+
|
9
|
+
ai4r_data = Ai4r::Data::DataSet.new(:data_items=> data)
|
10
|
+
|
11
|
+
# Clustering can happen in magical ways
|
12
|
+
# so lets do it over multiple times
|
13
|
+
n = 2
|
14
|
+
|
15
|
+
Benchmarker.go('lib') do
|
16
|
+
Benchmark.bm do |x|
|
17
|
+
x.report('Mine') do
|
18
|
+
n.times { KMeans.new(data) }
|
19
|
+
end
|
20
|
+
# x.report("Ai4R") do
|
21
|
+
# n.times do
|
22
|
+
#b = Ai4r::Clusterers::KMeans.new
|
23
|
+
#b.build(ai4r_data, 4)
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
end
|
27
|
+
end
|
data/k_means.gemspec
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{k_means}
|
8
|
+
s.version = "0.0.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["reddavis"]
|
12
|
+
s.date = %q{2009-11-25}
|
13
|
+
s.description = %q{Attempting to create a fast, memory efficient KMeans}
|
14
|
+
s.email = %q{reddavis@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"benchmark/benchmark_ai4r.rb",
|
27
|
+
"k_means.gemspec",
|
28
|
+
"lib/centroid.rb",
|
29
|
+
"lib/ext/enumerable.rb",
|
30
|
+
"lib/ext/object.rb",
|
31
|
+
"lib/k_means.rb",
|
32
|
+
"lib/node.rb",
|
33
|
+
"profiling/profile.rb",
|
34
|
+
"test/ext/test_enumerable.rb",
|
35
|
+
"test/ext/test_object.rb",
|
36
|
+
"test/helper.rb",
|
37
|
+
"test/test_centroid.rb",
|
38
|
+
"test/test_k_means.rb",
|
39
|
+
"test/test_node.rb"
|
40
|
+
]
|
41
|
+
s.homepage = %q{http://github.com/reddavis/k_means}
|
42
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
43
|
+
s.require_paths = ["lib"]
|
44
|
+
s.rubygems_version = %q{1.3.5}
|
45
|
+
s.summary = %q{K Means algorithm}
|
46
|
+
s.test_files = [
|
47
|
+
"test/ext/test_enumerable.rb",
|
48
|
+
"test/ext/test_object.rb",
|
49
|
+
"test/helper.rb",
|
50
|
+
"test/test_centroid.rb",
|
51
|
+
"test/test_k_means.rb",
|
52
|
+
"test/test_node.rb"
|
53
|
+
]
|
54
|
+
|
55
|
+
if s.respond_to? :specification_version then
|
56
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
57
|
+
s.specification_version = 3
|
58
|
+
|
59
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
60
|
+
else
|
61
|
+
end
|
62
|
+
else
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
data/lib/centroid.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
class Centroid
|
2
|
+
|
3
|
+
class << self
|
4
|
+
def create_centroids(amount, nodes)
|
5
|
+
ranges = create_ranges(nodes, nodes[0].position.size)
|
6
|
+
(1..amount).map do
|
7
|
+
position = ranges.inject([]) do |array, range|
|
8
|
+
array << rand_between(range[0], range[1])
|
9
|
+
end
|
10
|
+
new(position)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def create_ranges(nodes, dimensions)
|
17
|
+
ranges = Array.new(dimensions) {[0.0, 0.0]}
|
18
|
+
nodes.each do |node|
|
19
|
+
node.position.each_with_index do |position, index|
|
20
|
+
# Bottom range
|
21
|
+
ranges[index][0] = position if position < ranges[index][0]
|
22
|
+
# Top range
|
23
|
+
ranges[index][1] = position if position > ranges[index][1]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
ranges
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_accessor :position
|
31
|
+
|
32
|
+
def initialize(position)
|
33
|
+
@position = position
|
34
|
+
end
|
35
|
+
|
36
|
+
# Finds the average distance of all the nodes assigned to
|
37
|
+
# the centroid and then moves the centroid to that position
|
38
|
+
def reposition(nodes)
|
39
|
+
return if nodes.empty?
|
40
|
+
averages = [0.0] * nodes[0].position.size
|
41
|
+
nodes.each do |node|
|
42
|
+
node.position.each_with_index do |position, index|
|
43
|
+
averages[index] += position
|
44
|
+
end
|
45
|
+
end
|
46
|
+
@position = averages.map {|x| x / nodes.size}
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/lib/ext/object.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
class Object
|
2
|
+
|
3
|
+
# Simpler way to handle a random number between to values
|
4
|
+
def rand_between(a, b)
|
5
|
+
return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
|
6
|
+
range = (a - b).abs + 1
|
7
|
+
rand(range) + [a,b].min
|
8
|
+
end
|
9
|
+
|
10
|
+
# Handles non-integers
|
11
|
+
def rand_in_floats(a, b)
|
12
|
+
range = (a - b).abs
|
13
|
+
(rand * range) + [a,b].min
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/lib/k_means.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
$: << File.dirname(__FILE__)
|
2
|
+
require 'centroid'
|
3
|
+
require 'node'
|
4
|
+
require 'ext/enumerable'
|
5
|
+
require 'ext/object'
|
6
|
+
|
7
|
+
class KMeans
|
8
|
+
|
9
|
+
attr_reader :centroids, :nodes
|
10
|
+
|
11
|
+
def initialize(data, options={})
|
12
|
+
k = options[:centroids] || 4
|
13
|
+
@verbose = options[:verbose] == true ? true : nil
|
14
|
+
|
15
|
+
@nodes = Node.create_nodes(data)
|
16
|
+
@centroids = Centroid.create_centroids(k, @nodes)
|
17
|
+
|
18
|
+
perform_cluster_process
|
19
|
+
end
|
20
|
+
|
21
|
+
def inspect
|
22
|
+
@centroid_pockets.inspect
|
23
|
+
end
|
24
|
+
|
25
|
+
def view
|
26
|
+
@centroid_pockets
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def perform_cluster_process
|
32
|
+
iterations, updates = 0, 1
|
33
|
+
while updates > 0 && iterations < 100
|
34
|
+
iterations += 1
|
35
|
+
verbose_message("Iteration #{iterations}")
|
36
|
+
updates = 0
|
37
|
+
updates += update_nodes
|
38
|
+
reposition_centroids
|
39
|
+
end
|
40
|
+
place_nodes_into_pockets
|
41
|
+
end
|
42
|
+
|
43
|
+
# This creates an array of arrays
|
44
|
+
# Each internal array represents a centroid
|
45
|
+
# and each in the array represents the nodes index
|
46
|
+
def place_nodes_into_pockets
|
47
|
+
centroid_pockets = Array.new(@centroids.size) {[]}
|
48
|
+
@centroids.each_with_index do |centroid, centroid_index|
|
49
|
+
@nodes.each_with_index do |node, node_index|
|
50
|
+
if node.closest_centroid == centroid
|
51
|
+
centroid_pockets[centroid_index] << node_index
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
@centroid_pockets = centroid_pockets
|
56
|
+
end
|
57
|
+
|
58
|
+
def update_nodes
|
59
|
+
sum = 0
|
60
|
+
@nodes.each do |node|
|
61
|
+
sum += node.update_closest_centroid(@centroids)
|
62
|
+
end
|
63
|
+
sum
|
64
|
+
end
|
65
|
+
|
66
|
+
def reposition_centroids
|
67
|
+
@centroids.each do |centroid|
|
68
|
+
nodes = []
|
69
|
+
@nodes.each {|n| nodes << n if n.closest_centroid == centroid}
|
70
|
+
centroid.reposition(nodes)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def verbose_message(message)
|
75
|
+
puts message if @verbose
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
data/lib/node.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
class Node
|
2
|
+
|
3
|
+
class << self
|
4
|
+
def create_nodes(data)
|
5
|
+
nodes = []
|
6
|
+
data.each do |position|
|
7
|
+
nodes << new(position)
|
8
|
+
end
|
9
|
+
nodes
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_accessor :position, :best_distance, :closest_centroid
|
14
|
+
|
15
|
+
def initialize(position)
|
16
|
+
@position = position
|
17
|
+
end
|
18
|
+
|
19
|
+
def update_closest_centroid(centroids)
|
20
|
+
calculate_initial_centroid(centroids.first) unless @closest_centroid
|
21
|
+
updated = false
|
22
|
+
centroids.each do |centroid|
|
23
|
+
distance = calculate_distance(centroid)
|
24
|
+
if distance < best_distance
|
25
|
+
updated = true
|
26
|
+
@closest_centroid = centroid
|
27
|
+
@best_distance = distance
|
28
|
+
end
|
29
|
+
end
|
30
|
+
updated == true ? 1 : 0
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def calculate_initial_centroid(centroid)
|
36
|
+
@closest_centroid = centroid
|
37
|
+
@best_distance = calculate_distance(centroid)
|
38
|
+
end
|
39
|
+
|
40
|
+
def calculate_distance(centroid)
|
41
|
+
@position.euclidean_distance(centroid.position)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../lib/k_means'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'ruby-prof'
|
4
|
+
|
5
|
+
data = Array.new(100) {Array.new(2) {rand}}
|
6
|
+
|
7
|
+
result = RubyProf.profile do
|
8
|
+
a = KMeans.new(data)
|
9
|
+
end
|
10
|
+
|
11
|
+
printer = RubyProf::FlatPrinter.new(result)
|
12
|
+
printer.print(STDOUT, 0)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestObject < Test::Unit::TestCase
|
4
|
+
context "Random Number Between" do
|
5
|
+
|
6
|
+
should "return a number between 10 and 20" do
|
7
|
+
n = rand_between(10, 20)
|
8
|
+
assert_between(10..20, n)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "return a float between 10.0 and 10.9" do
|
12
|
+
n = rand_between(10.0, 10.9)
|
13
|
+
assert_between(10..11, n)
|
14
|
+
assert_kind_of Float, n
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
6
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
|
+
require 'k_means'
|
8
|
+
|
9
|
+
class Test::Unit::TestCase
|
10
|
+
def assert_between(range, n)
|
11
|
+
assert range === n
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestCentroid < Test::Unit::TestCase
|
4
|
+
context "A Centroid" do
|
5
|
+
|
6
|
+
setup do
|
7
|
+
@centroid = Centroid.new([1, 2, 3])
|
8
|
+
end
|
9
|
+
|
10
|
+
should "return an array" do
|
11
|
+
assert_kind_of Array, @centroid.position
|
12
|
+
end
|
13
|
+
|
14
|
+
should "return an array of centroids" do
|
15
|
+
centroids = Centroid.create_centroids(4, create_nodes)
|
16
|
+
assert_kind_of Array, centroids
|
17
|
+
assert_kind_of Centroid, centroids.first
|
18
|
+
end
|
19
|
+
|
20
|
+
should "create 4 centroids" do
|
21
|
+
centroids = Centroid.create_centroids(4, create_nodes)
|
22
|
+
assert_equal 4, centroids.size
|
23
|
+
end
|
24
|
+
|
25
|
+
should "reposition nodes" do
|
26
|
+
nodes = create_nodes
|
27
|
+
average_position = [0.0] * nodes[0].position.size
|
28
|
+
nodes.each do |node|
|
29
|
+
node.position.each_with_index do |position, index|
|
30
|
+
average_position[index] += position
|
31
|
+
end
|
32
|
+
end
|
33
|
+
average_position.map! {|x| x / 2}
|
34
|
+
@centroid.reposition(create_nodes)
|
35
|
+
assert_equal average_position, @centroid.position
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def create_nodes
|
43
|
+
Node.create_nodes([[1,2,3], [4,5,6]])
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestKMeans < Test::Unit::TestCase
|
4
|
+
context "A KMeans Instance" do
|
5
|
+
|
6
|
+
setup do
|
7
|
+
@data = Array.new(200) {Array.new(2) {rand}}
|
8
|
+
@kmeans = KMeans.new(@data, :centroids => 2)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "return an array" do
|
12
|
+
assert_kind_of String, @kmeans.inspect
|
13
|
+
end
|
14
|
+
|
15
|
+
should "have 2 centroids" do
|
16
|
+
assert_equal 2, @kmeans.centroids.size
|
17
|
+
end
|
18
|
+
|
19
|
+
should "have 200 nodes" do
|
20
|
+
assert_equal 200, @kmeans.nodes.size
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
data/test/test_node.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestNode < Test::Unit::TestCase
|
4
|
+
context "A Data Instance" do
|
5
|
+
|
6
|
+
setup do
|
7
|
+
@node = Node.new([4, 4])
|
8
|
+
end
|
9
|
+
|
10
|
+
should "return an array" do
|
11
|
+
assert_kind_of Array, @node.position
|
12
|
+
end
|
13
|
+
|
14
|
+
should "create an array of nodes" do
|
15
|
+
data = Array.new(10) {Array.new(2) {rand}}
|
16
|
+
nodes = Node.create_nodes(data)
|
17
|
+
assert_kind_of Array, nodes
|
18
|
+
end
|
19
|
+
|
20
|
+
should "create 10 nodes" do
|
21
|
+
data = Array.new(10) {Array.new(2) {rand}}
|
22
|
+
nodes = Node.create_nodes(data)
|
23
|
+
assert_equal 10, nodes.size
|
24
|
+
end
|
25
|
+
|
26
|
+
should "initialize closest centroid" do
|
27
|
+
a = @node.closest_centroid
|
28
|
+
centroids = [Centroid.new([4, 4]), Centroid.new([5, 4])]
|
29
|
+
@node.update_closest_centroid(centroids)
|
30
|
+
assert_not_equal nil, @node.closest_centroid
|
31
|
+
end
|
32
|
+
|
33
|
+
should "update closest centroid" do
|
34
|
+
centroids = [Centroid.new([5, 4])]
|
35
|
+
@node.update_closest_centroid(centroids)
|
36
|
+
a = @node.closest_centroid
|
37
|
+
@node.update_closest_centroid([Centroid.new([4,4])])
|
38
|
+
assert_not_equal a, @node.closest_centroid
|
39
|
+
assert_equal 0, @node.best_distance
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: k_means
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- reddavis
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-25 00:00:00 +00:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Attempting to create a fast, memory efficient KMeans
|
17
|
+
email: reddavis@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- LICENSE
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- .document
|
27
|
+
- .gitignore
|
28
|
+
- LICENSE
|
29
|
+
- README.rdoc
|
30
|
+
- Rakefile
|
31
|
+
- VERSION
|
32
|
+
- benchmark/benchmark_ai4r.rb
|
33
|
+
- k_means.gemspec
|
34
|
+
- lib/centroid.rb
|
35
|
+
- lib/ext/enumerable.rb
|
36
|
+
- lib/ext/object.rb
|
37
|
+
- lib/k_means.rb
|
38
|
+
- lib/node.rb
|
39
|
+
- profiling/profile.rb
|
40
|
+
- test/ext/test_enumerable.rb
|
41
|
+
- test/ext/test_object.rb
|
42
|
+
- test/helper.rb
|
43
|
+
- test/test_centroid.rb
|
44
|
+
- test/test_k_means.rb
|
45
|
+
- test/test_node.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://github.com/reddavis/k_means
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --charset=UTF-8
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 1.3.5
|
71
|
+
signing_key:
|
72
|
+
specification_version: 3
|
73
|
+
summary: K Means algorithm
|
74
|
+
test_files:
|
75
|
+
- test/ext/test_enumerable.rb
|
76
|
+
- test/ext/test_object.rb
|
77
|
+
- test/helper.rb
|
78
|
+
- test/test_centroid.rb
|
79
|
+
- test/test_k_means.rb
|
80
|
+
- test/test_node.rb
|