data_mining 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_mining.rb +1 -0
- data/lib/data_mining/apriori.rb +60 -0
- data/lib/data_mining/dbscan.rb +7 -8
- data/lib/data_mining/point.rb +1 -1
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60ae285b5d026048b81017c733fbbb7dc44742cf
|
4
|
+
data.tar.gz: da11d5c783dcc0926e9ea507b49eb120f509f069
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4067507c2086ed6f3d9ff5a641ff9e62f55f58d4a39d3eb06b85dae1544c57bf000a6c7ba0586a8aa31fd5f8e86274384a613bf680964a1d21d8d7af5b7c5b0a
|
7
|
+
data.tar.gz: 16c4bb014d46c2a209d1ad194797f98f3986455653efc642afa4d3f59e419f5671bac6c2a05620883f9adb57fd409f093991e5bee6f2ada0ac99402dca5bce59
|
data/lib/data_mining.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
module DataMining
|
2
|
+
# Apriori Algorithm for frequent set mining and association rule learning
|
3
|
+
class Apriori
|
4
|
+
attr_reader :results
|
5
|
+
|
6
|
+
def initialize(transactions, minimum_support)
|
7
|
+
@transactions = transactions.select(&:flatten!).each(&:shift)
|
8
|
+
@min_support = minimum_support
|
9
|
+
@results = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def mine!
|
13
|
+
apriori
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def apriori
|
19
|
+
tmp = starting_set
|
20
|
+
i = 1
|
21
|
+
while tmp.size > 0
|
22
|
+
@results[i] = tmp
|
23
|
+
i += 1
|
24
|
+
tmp = next_set(tmp)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def starting_set
|
29
|
+
frequent_items.reject { |_, v| v < @min_support }.keys.sort.map { |i| [i] }
|
30
|
+
end
|
31
|
+
|
32
|
+
def frequent_items
|
33
|
+
@transactions.each_with_object(Hash.new(0)) do |sets, hash|
|
34
|
+
sets.each { |item| hash[item] += 1 }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def next_set(itemsets)
|
39
|
+
itemsets.each_with_object([]) do |set, arr|
|
40
|
+
possible_candidates(set, itemsets).each do |candidate|
|
41
|
+
arr.push(candidate) if satisfies_min_sup(candidate)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def possible_candidates(itemset, itemsets)
|
47
|
+
itemsets.each_with_object([]) do |set, arr|
|
48
|
+
arr.push(itemset + [set.last]) if set.last > itemset.last
|
49
|
+
end.uniq
|
50
|
+
end
|
51
|
+
|
52
|
+
def satisfies_min_sup(candidate)
|
53
|
+
return true if (@transactions.inject(0) do |counter, entry|
|
54
|
+
counter += 1 if (candidate - entry).empty?
|
55
|
+
counter
|
56
|
+
end >= @min_support)
|
57
|
+
false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/data_mining/dbscan.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module DataMining
|
2
|
-
# Density-Based clustering / Outlier-Detection
|
2
|
+
# Density-Based clustering / Outlier-Detection Algorithm
|
3
3
|
class DBScan
|
4
4
|
# Find clusters and outliers
|
5
5
|
#
|
@@ -18,11 +18,6 @@ module DataMining
|
|
18
18
|
# data: (array of arrays, like [[:id, value], [:id2, value2]]
|
19
19
|
# radius: (integer)
|
20
20
|
# min_points: (integer)
|
21
|
-
|
22
|
-
def self.cluster(data, radius, min_points)
|
23
|
-
DBScan.new(data, radius, min_points)
|
24
|
-
end
|
25
|
-
|
26
21
|
def initialize(data, radius, min_points)
|
27
22
|
@data = data.map { |i, v| DataMining::Point.new(i, v) }
|
28
23
|
@radius = radius
|
@@ -32,7 +27,7 @@ module DataMining
|
|
32
27
|
@unvisited_points = @data.shuffle
|
33
28
|
end
|
34
29
|
|
35
|
-
def
|
30
|
+
def cluster!
|
36
31
|
dbscan
|
37
32
|
clusters
|
38
33
|
end
|
@@ -109,7 +104,11 @@ module DataMining
|
|
109
104
|
def euclidean_distance(p1, p2)
|
110
105
|
p1 = p1.value
|
111
106
|
p2 = p2.value
|
112
|
-
Math.sqrt(
|
107
|
+
Math.sqrt(
|
108
|
+
p1.each_with_index.inject(0) do |sum, (v, i)|
|
109
|
+
sum + ((v - p2[i])**2)
|
110
|
+
end
|
111
|
+
)
|
113
112
|
end
|
114
113
|
end
|
115
114
|
end
|
data/lib/data_mining/point.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_mining
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Stuefer
|
@@ -9,7 +9,35 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-06-23 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: minitest
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest-reporters
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
13
41
|
description: A collection of data mining algorithms
|
14
42
|
email: mstuefer@gmail.com
|
15
43
|
executables: []
|
@@ -17,6 +45,7 @@ extensions: []
|
|
17
45
|
extra_rdoc_files: []
|
18
46
|
files:
|
19
47
|
- lib/data_mining.rb
|
48
|
+
- lib/data_mining/apriori.rb
|
20
49
|
- lib/data_mining/dbscan.rb
|
21
50
|
- lib/data_mining/point.rb
|
22
51
|
homepage: https://github.com/mstuefer/data_mining
|