carmenere 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/carmenere/algorithm.rb +28 -0
- data/lib/carmenere/cluster.rb +30 -0
- data/lib/carmenere/kmeans/algorithm.rb +57 -0
- data/lib/carmenere/kmeans/cluster.rb +6 -0
- data/lib/carmenere/node.rb +54 -0
- data/lib/carmenere/singlelinkage/algorithm.rb +41 -0
- data/lib/carmenere/singlelinkage/cluster.rb +33 -0
- data/lib/carmenere.rb +7 -0
- metadata +51 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 7c713abdf23594300b13fe5ea26f28f668256315
|
|
4
|
+
data.tar.gz: a2fea35a639d442ce30e881292ebadc320525f8e
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 63fb5e4e6e823df504690f770a9c3c7b0f84899b1d6faec790d06052517b0ba9b358c45d6bb6953d1ea698b1fcaad7029c6087cadb484cab3286ab6829b895eb
|
|
7
|
+
data.tar.gz: 47a6dfbc4b553adaaf09febfefedad64ca19f5d5de9a89206f80bf1062bf369a5610be5d48ef229e5a548da8442dfc0446e2ff00763f7664c1eba75ccefd559b
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
module Carmenere
|
|
2
|
+
|
|
3
|
+
class Algorithm
|
|
4
|
+
|
|
5
|
+
attr_reader :nodes
|
|
6
|
+
|
|
7
|
+
attr_reader :distance_matrix
|
|
8
|
+
|
|
9
|
+
def initialize k, nodes
|
|
10
|
+
@k = k
|
|
11
|
+
@nodes = Set.new(nodes).freeze
|
|
12
|
+
@clusters = Set.new(@nodes.map{ |n| Cluster.new([n]) })
|
|
13
|
+
@distance_matrix = @nodes.each.with_object({}) do |i, row|
|
|
14
|
+
row[i] = @nodes.each.with_object({}) do |j, col|
|
|
15
|
+
col[j] = i.distance(j) if i != j
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# If a block is provided, it should yield each iteration of the clusters,
|
|
21
|
+
# otherwise, just returns the final state of the space.
|
|
22
|
+
def run
|
|
23
|
+
raise NotImplementedError.new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
|
|
3
|
+
module Carmenere
|
|
4
|
+
|
|
5
|
+
class Cluster < Set
|
|
6
|
+
|
|
7
|
+
# Minimum distance to other cluster
|
|
8
|
+
def distance other
|
|
9
|
+
self.reduce(nil) do |m, i|
|
|
10
|
+
other.reduce(m) do |m, j|
|
|
11
|
+
d = i.distance j
|
|
12
|
+
m = if m.nil? or m > d then d else m end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def to_s
|
|
18
|
+
"{" + self.reduce("") do |m, n|
|
|
19
|
+
m += ", " unless m.empty?
|
|
20
|
+
m += n.to_s
|
|
21
|
+
end + "}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def inspect
|
|
25
|
+
self.to_s
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
|
|
3
|
+
module Carmenere::KMeans
|
|
4
|
+
|
|
5
|
+
def self.centroids_eql? old_centroids, new_centroids
|
|
6
|
+
[old_centroids, new_centroids].each do |i|
|
|
7
|
+
unless i.is_a?(Set) and i.all?{ |j| j.is_a?(Carmenere::Node) }
|
|
8
|
+
raise TypeError.new("#{i.class} is not Set of Node")
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
old_centroids.count == new_centroids.count and old_centroids.all? do |i|
|
|
12
|
+
new_centroids.any? do |j|
|
|
13
|
+
i.attr_eql? j
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class Algorithm < Carmenere::Algorithm
|
|
19
|
+
|
|
20
|
+
attr_reader :centroids
|
|
21
|
+
|
|
22
|
+
def initialize centroids, nodes, &mean
|
|
23
|
+
super centroids.count, nodes
|
|
24
|
+
@mean = mean
|
|
25
|
+
@centroids = centroids
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def run
|
|
29
|
+
old_centroids = Set.new
|
|
30
|
+
centroids = Set.new @centroids
|
|
31
|
+
centroid_clusters = {nil => nil}
|
|
32
|
+
until Carmenere::KMeans::centroids_eql? old_centroids, centroids
|
|
33
|
+
old_centroids = centroids
|
|
34
|
+
node_centroids = @nodes.each.with_object({}) do |node, h|
|
|
35
|
+
h[node] = centroids.min_by do |c|
|
|
36
|
+
node.distance(c)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
centroid_clusters = centroids.each.with_object({}) do |centroid, h|
|
|
40
|
+
nodes = node_centroids.lazy.select do |_, c|
|
|
41
|
+
c.attr_eql?(centroid)
|
|
42
|
+
end.map do |n, c|
|
|
43
|
+
n
|
|
44
|
+
end
|
|
45
|
+
h[centroid] = Carmenere::KMeans::Cluster.new nodes
|
|
46
|
+
end
|
|
47
|
+
yield centroid_clusters if block_given?
|
|
48
|
+
centroids = Set.new centroid_clusters.values.map do |cluster|
|
|
49
|
+
@mean.call(cluster)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
centroid_clusters
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
module Carmenere
|
|
2
|
+
|
|
3
|
+
# Represents any node that can be used regardless of algorithm or cluster.
|
|
4
|
+
# The nodes attributes, problem domain-specific, are stored in
|
|
5
|
+
# Node#attributes.
|
|
6
|
+
class Node
|
|
7
|
+
|
|
8
|
+
include Comparable
|
|
9
|
+
|
|
10
|
+
attr_reader :name
|
|
11
|
+
attr_reader :attributes
|
|
12
|
+
|
|
13
|
+
def initialize name, attributes
|
|
14
|
+
@name = name
|
|
15
|
+
@cache = Hash.new
|
|
16
|
+
@attributes = attributes
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def <=> node
|
|
20
|
+
@name <=> node.name
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def hash
|
|
24
|
+
@name.hash
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def to_s
|
|
28
|
+
@name
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def inspect
|
|
32
|
+
@name
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def distance other
|
|
36
|
+
c = @cache[other]
|
|
37
|
+
if c
|
|
38
|
+
c
|
|
39
|
+
else
|
|
40
|
+
@cache[other] = other.cache[self] = yield
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def attr_eql? other
|
|
45
|
+
return @attributes == other.attributes
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
protected
|
|
49
|
+
|
|
50
|
+
attr_reader :cache
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
module Carmenere::SingleLinkage
|
|
2
|
+
|
|
3
|
+
class Algorithm < Carmenere::Algorithm
|
|
4
|
+
|
|
5
|
+
attr_reader :nodes
|
|
6
|
+
|
|
7
|
+
attr_reader :distance_matrix
|
|
8
|
+
|
|
9
|
+
def initialize k, nodes
|
|
10
|
+
super k, nodes
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# If step is true, then yields a copy of the array of clusters
|
|
14
|
+
def run
|
|
15
|
+
(nodes.size - @k).times do
|
|
16
|
+
a, b = self.closest_clusters
|
|
17
|
+
@clusters.delete a
|
|
18
|
+
@clusters.delete b
|
|
19
|
+
@clusters.add Cluster.new(a | b)
|
|
20
|
+
yield @clusters.to_a if block_given?
|
|
21
|
+
end
|
|
22
|
+
@clusters.to_a
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def closest_clusters
|
|
26
|
+
@clusters.reduce(nil) do |m, i|
|
|
27
|
+
@clusters.reduce(m) do |m, j|
|
|
28
|
+
d = i.distance j
|
|
29
|
+
m = if i != j and (m.nil? or m[2] > d)
|
|
30
|
+
[i, j, d]
|
|
31
|
+
else
|
|
32
|
+
m
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
module Carmenere::SingleLinkage
|
|
2
|
+
|
|
3
|
+
class Cluster < Carmenere::Cluster
|
|
4
|
+
|
|
5
|
+
# Minimum distance to other cluster
|
|
6
|
+
def distance other
|
|
7
|
+
self.reduce(nil) do |m, i|
|
|
8
|
+
other.reduce(m) do |m, j|
|
|
9
|
+
d = i.distance j
|
|
10
|
+
m = if m.nil? or m > d then d else m end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_s
|
|
16
|
+
"{" + self.reduce("") do |m, n|
|
|
17
|
+
m += ", " unless m.empty?
|
|
18
|
+
m += n.to_s
|
|
19
|
+
end + "}"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def inspect
|
|
23
|
+
self.to_s
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def run
|
|
27
|
+
raise NotImplementedError.new
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
|
data/lib/carmenere.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: carmenere
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Johnny Lee Othon
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2015-04-18 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: Cluster algorithms
|
|
14
|
+
email: jleeothon@gmail.com
|
|
15
|
+
executables: []
|
|
16
|
+
extensions: []
|
|
17
|
+
extra_rdoc_files: []
|
|
18
|
+
files:
|
|
19
|
+
- lib/carmenere.rb
|
|
20
|
+
- lib/carmenere/algorithm.rb
|
|
21
|
+
- lib/carmenere/cluster.rb
|
|
22
|
+
- lib/carmenere/kmeans/algorithm.rb
|
|
23
|
+
- lib/carmenere/kmeans/cluster.rb
|
|
24
|
+
- lib/carmenere/node.rb
|
|
25
|
+
- lib/carmenere/singlelinkage/algorithm.rb
|
|
26
|
+
- lib/carmenere/singlelinkage/cluster.rb
|
|
27
|
+
homepage: https://github.com/jleeothon/carmenere
|
|
28
|
+
licenses:
|
|
29
|
+
- MIT
|
|
30
|
+
metadata: {}
|
|
31
|
+
post_install_message:
|
|
32
|
+
rdoc_options: []
|
|
33
|
+
require_paths:
|
|
34
|
+
- lib
|
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
41
|
+
requirements:
|
|
42
|
+
- - ">="
|
|
43
|
+
- !ruby/object:Gem::Version
|
|
44
|
+
version: '0'
|
|
45
|
+
requirements: []
|
|
46
|
+
rubyforge_project:
|
|
47
|
+
rubygems_version: 2.2.2
|
|
48
|
+
signing_key:
|
|
49
|
+
specification_version: 4
|
|
50
|
+
summary: Single Linkage and K-means algorithms for clustering
|
|
51
|
+
test_files: []
|