data_mining 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5da253c99db083a7e8a8694ef479299269b95c87
4
+ data.tar.gz: 873c011a2e26d9c1c11165a7ce9aa401d0d50250
5
+ SHA512:
6
+ metadata.gz: 6f2f01babec563518f49ddd12ac23a55865245ab6b1c9862b88836d5544eb2be6ca49574f8261ed6f0d215e5880c14804dc5f54ae1757220b1bc41a71eeb780f
7
+ data.tar.gz: 027e465483ca27939f8b43167ae9aacfd3cd709545e9bfb38c3a5cc31c0d39d5f2e1b23ec74d4c3319e01c5b4552c2f9ec41f15140ae5cb34e4b6ea73185be32
@@ -0,0 +1,105 @@
1
+ module DataMining
2
+ # Density-Based clustering / Outlier-Detection Algorithu
3
+ class DBScan
4
+ # Find clusters and outliers
5
+ #
6
+ # Example:
7
+ # >> input = [[:p1, 1], [:p2, 2], [:p3, 10]]
8
+ # >> radius = 3
9
+ # >> min_points = 2
10
+ # >> dbscan = DataMining::DBScan.cluster(input, radius, min_points)
11
+ # >> dbscan.build!
12
+ # >>
13
+ # >> dbscan.clusters # gives array of clusters found (:p1, :p2)
14
+ # >>
15
+ # >> dbscan.outliers # gives array of outliers found (:p3)
16
+ #
17
+ # Arguments:
18
+ # data: (array of arrays, like [[:id, value], [:id2, value2]]
19
+ # radius: (integer)
20
+ # min_points: (integer)
21
+
22
+ def self.cluster(data, radius, min_points)
23
+ DBScan.new(data, radius, min_points)
24
+ end
25
+
26
+ def initialize(data, radius, min_points)
27
+ @data = data.map { |i, v| DataMining::Point.new(i, v) }
28
+ @radius = radius
29
+ @min_points = min_points
30
+ @current_cluster_id = 0
31
+ @clusters = {}
32
+ @unvisited_points = @data.shuffle
33
+ end
34
+
35
+ def build!
36
+ dbscan
37
+ clusters
38
+ end
39
+
40
+ def outliers
41
+ @data.select { |p| !p.assigned_to_cluster? }
42
+ end
43
+
44
+ def clusters
45
+ @clusters.map { |cluster, points| { cluster => points.each(&:id) } }
46
+ end
47
+
48
+ private
49
+
50
+ def dbscan
51
+ until unvisited_points.empty?
52
+ p = unvisited_points.pop
53
+ p.visit!
54
+
55
+ neighborhood = get_neighborhood(p)
56
+ create_cluster(p, neighborhood) if core_object?(neighborhood)
57
+ end
58
+ end
59
+
60
+ def unvisited_points
61
+ @unvisited_points.select! { |p| !p.visited? }
62
+ @unvisited_points
63
+ end
64
+
65
+ def create_cluster(point, neighborhood)
66
+ @current_cluster_id += 1
67
+ point.assign_to_cluster!
68
+ (@clusters[@current_cluster_id] ||= []) << point
69
+ fill_current_cluster(neighborhood)
70
+ end
71
+
72
+ def fill_current_cluster(neighborhood)
73
+ neighborhood.each do |neighbor|
74
+ elaborate(neighbor) unless neighbor.visited?
75
+ neighbor.assign_to_cluster!
76
+ end
77
+ end
78
+
79
+ def elaborate(point)
80
+ point.visit!
81
+ @clusters[@current_cluster_id] << point unless point.assigned_to_cluster?
82
+ neighborhood = get_neighborhood(point)
83
+ fill_current_cluster(neighborhood) if core_object?(neighborhood)
84
+ end
85
+
86
+ # use map instead of each?
87
+ def get_neighborhood(point)
88
+ neighborhood = []
89
+ @data.each { |p| neighborhood << p if neighbors?(p, point) }
90
+ neighborhood
91
+ end
92
+
93
+ def core_object?(neighborhood)
94
+ return true if neighborhood.size >= (@min_points - 1)
95
+ false
96
+ end
97
+
98
+ def neighbors?(p1, p2)
99
+ return true if (p1.value - p2.value).abs <= @radius && p1 != p2
100
+ false
101
+ end
102
+ end
103
+ end
104
+
105
+ require 'data_mining/point'
@@ -0,0 +1,35 @@
1
+ module DataMining
2
+ # Point class
3
+ class Point
4
+ attr_reader :id, :value
5
+
6
+ # Represents a Point for the DBScan Algorithm
7
+ #
8
+ # Arguments:
9
+ # id: (symbol)
10
+ # value: (integer)
11
+
12
+ def initialize(id, value)
13
+ @id = id
14
+ @value = value
15
+ @visited = false
16
+ @in_a_cluster = false
17
+ end
18
+
19
+ def assigned_to_cluster?
20
+ @in_a_cluster
21
+ end
22
+
23
+ def assign_to_cluster!
24
+ @in_a_cluster = true
25
+ end
26
+
27
+ def visited?
28
+ @visited
29
+ end
30
+
31
+ def visit!
32
+ @visited = true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,4 @@
1
+ module DataMining
2
+ end
3
+
4
+ require 'data_mining/dbscan'
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_mining
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Stuefer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A collection of data mining algorithms
14
+ email: mstuefer@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/data_mining.rb
20
+ - lib/data_mining/dbscan.rb
21
+ - lib/data_mining/point.rb
22
+ homepage: http://rubygems.org/gems/data_mining
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.4.6
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Data-Mining-Algorithms
46
+ test_files: []