data_mining 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5da253c99db083a7e8a8694ef479299269b95c87
4
+ data.tar.gz: 873c011a2e26d9c1c11165a7ce9aa401d0d50250
5
+ SHA512:
6
+ metadata.gz: 6f2f01babec563518f49ddd12ac23a55865245ab6b1c9862b88836d5544eb2be6ca49574f8261ed6f0d215e5880c14804dc5f54ae1757220b1bc41a71eeb780f
7
+ data.tar.gz: 027e465483ca27939f8b43167ae9aacfd3cd709545e9bfb38c3a5cc31c0d39d5f2e1b23ec74d4c3319e01c5b4552c2f9ec41f15140ae5cb34e4b6ea73185be32
@@ -0,0 +1,105 @@
1
+ module DataMining
2
+ # Density-Based clustering / Outlier-Detection Algorithu
3
+ class DBScan
4
+ # Find clusters and outliers
5
+ #
6
+ # Example:
7
+ # >> input = [[:p1, 1], [:p2, 2], [:p3, 10]]
8
+ # >> radius = 3
9
+ # >> min_points = 2
10
+ # >> dbscan = DataMining::DBScan.cluster(input, radius, min_points)
11
+ # >> dbscan.build!
12
+ # >>
13
+ # >> dbscan.clusters # gives array of clusters found (:p1, :p2)
14
+ # >>
15
+ # >> dbscan.outliers # gives array of outliers found (:p3)
16
+ #
17
+ # Arguments:
18
+ # data: (array of arrays, like [[:id, value], [:id2, value2]]
19
+ # radius: (integer)
20
+ # min_points: (integer)
21
+
22
+ def self.cluster(data, radius, min_points)
23
+ DBScan.new(data, radius, min_points)
24
+ end
25
+
26
+ def initialize(data, radius, min_points)
27
+ @data = data.map { |i, v| DataMining::Point.new(i, v) }
28
+ @radius = radius
29
+ @min_points = min_points
30
+ @current_cluster_id = 0
31
+ @clusters = {}
32
+ @unvisited_points = @data.shuffle
33
+ end
34
+
35
+ def build!
36
+ dbscan
37
+ clusters
38
+ end
39
+
40
+ def outliers
41
+ @data.select { |p| !p.assigned_to_cluster? }
42
+ end
43
+
44
+ def clusters
45
+ @clusters.map { |cluster, points| { cluster => points.each(&:id) } }
46
+ end
47
+
48
+ private
49
+
50
+ def dbscan
51
+ until unvisited_points.empty?
52
+ p = unvisited_points.pop
53
+ p.visit!
54
+
55
+ neighborhood = get_neighborhood(p)
56
+ create_cluster(p, neighborhood) if core_object?(neighborhood)
57
+ end
58
+ end
59
+
60
+ def unvisited_points
61
+ @unvisited_points.select! { |p| !p.visited? }
62
+ @unvisited_points
63
+ end
64
+
65
+ def create_cluster(point, neighborhood)
66
+ @current_cluster_id += 1
67
+ point.assign_to_cluster!
68
+ (@clusters[@current_cluster_id] ||= []) << point
69
+ fill_current_cluster(neighborhood)
70
+ end
71
+
72
+ def fill_current_cluster(neighborhood)
73
+ neighborhood.each do |neighbor|
74
+ elaborate(neighbor) unless neighbor.visited?
75
+ neighbor.assign_to_cluster!
76
+ end
77
+ end
78
+
79
+ def elaborate(point)
80
+ point.visit!
81
+ @clusters[@current_cluster_id] << point unless point.assigned_to_cluster?
82
+ neighborhood = get_neighborhood(point)
83
+ fill_current_cluster(neighborhood) if core_object?(neighborhood)
84
+ end
85
+
86
+ # use map instead of each?
87
+ def get_neighborhood(point)
88
+ neighborhood = []
89
+ @data.each { |p| neighborhood << p if neighbors?(p, point) }
90
+ neighborhood
91
+ end
92
+
93
+ def core_object?(neighborhood)
94
+ return true if neighborhood.size >= (@min_points - 1)
95
+ false
96
+ end
97
+
98
+ def neighbors?(p1, p2)
99
+ return true if (p1.value - p2.value).abs <= @radius && p1 != p2
100
+ false
101
+ end
102
+ end
103
+ end
104
+
105
+ require 'data_mining/point'
@@ -0,0 +1,35 @@
1
+ module DataMining
2
+ # Point class
3
+ class Point
4
+ attr_reader :id, :value
5
+
6
+ # Represents a Point for the DBScan Algorithm
7
+ #
8
+ # Arguments:
9
+ # id: (symbol)
10
+ # value: (integer)
11
+
12
+ def initialize(id, value)
13
+ @id = id
14
+ @value = value
15
+ @visited = false
16
+ @in_a_cluster = false
17
+ end
18
+
19
+ def assigned_to_cluster?
20
+ @in_a_cluster
21
+ end
22
+
23
+ def assign_to_cluster!
24
+ @in_a_cluster = true
25
+ end
26
+
27
+ def visited?
28
+ @visited
29
+ end
30
+
31
+ def visit!
32
+ @visited = true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,4 @@
1
+ module DataMining
2
+ end
3
+
4
+ require 'data_mining/dbscan'
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_mining
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Stuefer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A collection of data mining algorithms
14
+ email: mstuefer@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/data_mining.rb
20
+ - lib/data_mining/dbscan.rb
21
+ - lib/data_mining/point.rb
22
+ homepage: http://rubygems.org/gems/data_mining
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.4.6
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Data-Mining-Algorithms
46
+ test_files: []