data_mining 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/data_mining.rb +1 -0
- data/lib/data_mining/apriori.rb +60 -0
- data/lib/data_mining/dbscan.rb +7 -8
- data/lib/data_mining/point.rb +1 -1
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60ae285b5d026048b81017c733fbbb7dc44742cf
|
4
|
+
data.tar.gz: da11d5c783dcc0926e9ea507b49eb120f509f069
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4067507c2086ed6f3d9ff5a641ff9e62f55f58d4a39d3eb06b85dae1544c57bf000a6c7ba0586a8aa31fd5f8e86274384a613bf680964a1d21d8d7af5b7c5b0a
|
7
|
+
data.tar.gz: 16c4bb014d46c2a209d1ad194797f98f3986455653efc642afa4d3f59e419f5671bac6c2a05620883f9adb57fd409f093991e5bee6f2ada0ac99402dca5bce59
|
data/lib/data_mining.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
module DataMining
|
2
|
+
# Apriori Algorithm for frequent set mining and association rule learning
|
3
|
+
class Apriori
|
4
|
+
attr_reader :results
|
5
|
+
|
6
|
+
def initialize(transactions, minimum_support)
|
7
|
+
@transactions = transactions.select(&:flatten!).each(&:shift)
|
8
|
+
@min_support = minimum_support
|
9
|
+
@results = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def mine!
|
13
|
+
apriori
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def apriori
|
19
|
+
tmp = starting_set
|
20
|
+
i = 1
|
21
|
+
while tmp.size > 0
|
22
|
+
@results[i] = tmp
|
23
|
+
i += 1
|
24
|
+
tmp = next_set(tmp)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def starting_set
|
29
|
+
frequent_items.reject { |_, v| v < @min_support }.keys.sort.map { |i| [i] }
|
30
|
+
end
|
31
|
+
|
32
|
+
def frequent_items
|
33
|
+
@transactions.each_with_object(Hash.new(0)) do |sets, hash|
|
34
|
+
sets.each { |item| hash[item] += 1 }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def next_set(itemsets)
|
39
|
+
itemsets.each_with_object([]) do |set, arr|
|
40
|
+
possible_candidates(set, itemsets).each do |candidate|
|
41
|
+
arr.push(candidate) if satisfies_min_sup(candidate)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def possible_candidates(itemset, itemsets)
|
47
|
+
itemsets.each_with_object([]) do |set, arr|
|
48
|
+
arr.push(itemset + [set.last]) if set.last > itemset.last
|
49
|
+
end.uniq
|
50
|
+
end
|
51
|
+
|
52
|
+
def satisfies_min_sup(candidate)
|
53
|
+
return true if (@transactions.inject(0) do |counter, entry|
|
54
|
+
counter += 1 if (candidate - entry).empty?
|
55
|
+
counter
|
56
|
+
end >= @min_support)
|
57
|
+
false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/data_mining/dbscan.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module DataMining
|
2
|
-
# Density-Based clustering / Outlier-Detection
|
2
|
+
# Density-Based clustering / Outlier-Detection Algorithm
|
3
3
|
class DBScan
|
4
4
|
# Find clusters and outliers
|
5
5
|
#
|
@@ -18,11 +18,6 @@ module DataMining
|
|
18
18
|
# data: (array of arrays, like [[:id, value], [:id2, value2]]
|
19
19
|
# radius: (integer)
|
20
20
|
# min_points: (integer)
|
21
|
-
|
22
|
-
def self.cluster(data, radius, min_points)
|
23
|
-
DBScan.new(data, radius, min_points)
|
24
|
-
end
|
25
|
-
|
26
21
|
def initialize(data, radius, min_points)
|
27
22
|
@data = data.map { |i, v| DataMining::Point.new(i, v) }
|
28
23
|
@radius = radius
|
@@ -32,7 +27,7 @@ module DataMining
|
|
32
27
|
@unvisited_points = @data.shuffle
|
33
28
|
end
|
34
29
|
|
35
|
-
def
|
30
|
+
def cluster!
|
36
31
|
dbscan
|
37
32
|
clusters
|
38
33
|
end
|
@@ -109,7 +104,11 @@ module DataMining
|
|
109
104
|
def euclidean_distance(p1, p2)
|
110
105
|
p1 = p1.value
|
111
106
|
p2 = p2.value
|
112
|
-
Math.sqrt(
|
107
|
+
Math.sqrt(
|
108
|
+
p1.each_with_index.inject(0) do |sum, (v, i)|
|
109
|
+
sum + ((v - p2[i])**2)
|
110
|
+
end
|
111
|
+
)
|
113
112
|
end
|
114
113
|
end
|
115
114
|
end
|
data/lib/data_mining/point.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_mining
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Stuefer
|
@@ -9,7 +9,35 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-06-23 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: minitest
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest-reporters
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
13
41
|
description: A collection of data mining algorithms
|
14
42
|
email: mstuefer@gmail.com
|
15
43
|
executables: []
|
@@ -17,6 +45,7 @@ extensions: []
|
|
17
45
|
extra_rdoc_files: []
|
18
46
|
files:
|
19
47
|
- lib/data_mining.rb
|
48
|
+
- lib/data_mining/apriori.rb
|
20
49
|
- lib/data_mining/dbscan.rb
|
21
50
|
- lib/data_mining/point.rb
|
22
51
|
homepage: https://github.com/mstuefer/data_mining
|