data_mining 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/data_mining.rb +1 -0
- data/lib/data_mining/apriori.rb +14 -10
- data/lib/data_mining/dbscan.rb +4 -17
- data/lib/data_mining/page_rank.rb +53 -0
- metadata +32 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28e3a1a81909e9619dff89714def02389e68c9da
|
4
|
+
data.tar.gz: 2a2260b0fcc65003fdb037b1f859941c8100710d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 851076c8ca90e25ff3dcec0d6e02d513e8779dbbad190c386b7252ddc227c9b05bc822b4c5422d5cff7e27681d59b4086ff2ffcae08030327e9ed07d3fdd97a1
|
7
|
+
data.tar.gz: 83ea46cdadee44244b09265333041a239e0e5534df65ee58cd0f1833c01d2b48179df823ec6116e544d9f4a525df5608a7d86ee715d872f841f2b95d6809d8e6
|
data/lib/data_mining.rb
CHANGED
data/lib/data_mining/apriori.rb
CHANGED
@@ -1,28 +1,32 @@
|
|
1
1
|
module DataMining
|
2
2
|
# Apriori Algorithm for frequent set mining and association rule learning
|
3
3
|
class Apriori
|
4
|
+
# Find frequent item sets
|
5
|
+
#
|
6
|
+
# Arguments:
|
7
|
+
# transactions: (array of arrays, like [[:id, [transactions]] .. ])
|
8
|
+
# minimum_support: (integer)
|
4
9
|
attr_reader :results
|
5
10
|
|
6
11
|
def initialize(transactions, minimum_support)
|
7
12
|
@transactions = transactions.select(&:flatten!).each(&:shift)
|
8
13
|
@min_support = minimum_support
|
9
|
-
@results =
|
14
|
+
@results = []
|
10
15
|
end
|
11
16
|
|
12
17
|
def mine!
|
13
18
|
apriori
|
14
19
|
end
|
15
20
|
|
21
|
+
def item_sets_size(size)
|
22
|
+
@results[size - 1]
|
23
|
+
end
|
24
|
+
|
16
25
|
private
|
17
26
|
|
18
27
|
def apriori
|
19
|
-
|
20
|
-
|
21
|
-
while tmp.size > 0
|
22
|
-
@results[i] = tmp
|
23
|
-
i += 1
|
24
|
-
tmp = next_set(tmp)
|
25
|
-
end
|
28
|
+
@results << starting_set
|
29
|
+
@results << next_set(@results.last) until @results.last.empty?
|
26
30
|
end
|
27
31
|
|
28
32
|
def starting_set
|
@@ -38,14 +42,14 @@ module DataMining
|
|
38
42
|
def next_set(itemsets)
|
39
43
|
itemsets.each_with_object([]) do |set, arr|
|
40
44
|
possible_candidates(set, itemsets).each do |candidate|
|
41
|
-
arr
|
45
|
+
arr << candidate if satisfies_min_sup(candidate)
|
42
46
|
end
|
43
47
|
end
|
44
48
|
end
|
45
49
|
|
46
50
|
def possible_candidates(itemset, itemsets)
|
47
51
|
itemsets.each_with_object([]) do |set, arr|
|
48
|
-
arr
|
52
|
+
arr << (itemset + [set.last]) if set.last > itemset.last
|
49
53
|
end.uniq
|
50
54
|
end
|
51
55
|
|
data/lib/data_mining/dbscan.rb
CHANGED
@@ -3,19 +3,8 @@ module DataMining
|
|
3
3
|
class DBScan
|
4
4
|
# Find clusters and outliers
|
5
5
|
#
|
6
|
-
# Example:
|
7
|
-
# >> input = [[:p1, [1,1]], [:p2, [2,1]], [:p3, [10,11]]]
|
8
|
-
# >> radius = 3
|
9
|
-
# >> min_points = 2
|
10
|
-
# >> dbscan = DataMining::DBScan.cluster(input, radius, min_points)
|
11
|
-
# >> dbscan.build!
|
12
|
-
# >>
|
13
|
-
# >> dbscan.clusters # gives array of clusters found (:p1, :p2)
|
14
|
-
# >>
|
15
|
-
# >> dbscan.outliers # gives array of outliers found (:p3)
|
16
|
-
#
|
17
6
|
# Arguments:
|
18
|
-
# data: (array of arrays, like [[:id, value], [:id2, value2]]
|
7
|
+
# data: (array of arrays, like [[:id, value], [:id2, value2]])
|
19
8
|
# radius: (integer)
|
20
9
|
# min_points: (integer)
|
21
10
|
def initialize(data, radius, min_points)
|
@@ -29,7 +18,6 @@ module DataMining
|
|
29
18
|
|
30
19
|
def cluster!
|
31
20
|
dbscan
|
32
|
-
clusters
|
33
21
|
end
|
34
22
|
|
35
23
|
def outliers
|
@@ -78,11 +66,10 @@ module DataMining
|
|
78
66
|
fill_current_cluster(neighborhood) if core_object?(neighborhood)
|
79
67
|
end
|
80
68
|
|
81
|
-
# use map instead of each?
|
82
69
|
def get_neighborhood(point)
|
83
|
-
|
84
|
-
|
85
|
-
|
70
|
+
@data.each_with_object([]) do |p, neighborhood|
|
71
|
+
neighborhood << p if neighbors?(p, point)
|
72
|
+
end
|
86
73
|
end
|
87
74
|
|
88
75
|
def core_object?(neighborhood)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module DataMining
|
2
|
+
# PageRank Algorithm to measure the importance of nodes in a graph
|
3
|
+
class PageRank
|
4
|
+
attr_reader :graph, :ranks
|
5
|
+
# Measure importance of nodes
|
6
|
+
#
|
7
|
+
# Arguments:
|
8
|
+
# graph: (array of arrays, like:
|
9
|
+
# [[:p1, [:p2]], [:p2, [:p1, :p3]], [:p3, [:p2]]]
|
10
|
+
# damping_factor: (double between 0 and 1)
|
11
|
+
def initialize(graph, damping_factor = 0.85, iterations = 100)
|
12
|
+
@graph = graph.to_h
|
13
|
+
# { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
|
14
|
+
@outlinks = Hash.new { |_, key| @graph[key].size }
|
15
|
+
# { :p1 => 1, :p2 => 2, :p3 => 1 }
|
16
|
+
@inlinks = Hash.new { |_, key| inlinks(key) }
|
17
|
+
# { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
|
18
|
+
@ranks = Hash.new(1.0 / @graph.size)
|
19
|
+
# { :p1 => 1/3, :p2 => 1/3, ... }
|
20
|
+
|
21
|
+
@damper = damping_factor
|
22
|
+
@iterations = iterations
|
23
|
+
end
|
24
|
+
|
25
|
+
def rank!
|
26
|
+
pagerank
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def inlinks(key)
|
32
|
+
@graph.select { |_, v| v.include?(key) }.keys
|
33
|
+
end
|
34
|
+
|
35
|
+
def pagerank
|
36
|
+
@iterations.times { @ranks = next_state }
|
37
|
+
end
|
38
|
+
|
39
|
+
def next_state
|
40
|
+
@graph.each_with_object({}) do |(node, _), ranks|
|
41
|
+
ranks[node] = term + @damper * sum_incoming_scores(node)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def sum_incoming_scores(node)
|
46
|
+
@inlinks[node].map { |id| @ranks[id] / @outlinks[id] }.inject(:+).to_f
|
47
|
+
end
|
48
|
+
|
49
|
+
def term
|
50
|
+
@term ||= ((1 - @damper) / @graph.size)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
metadata
CHANGED
@@ -1,43 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_mining
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Stuefer
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: minitest
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
|
-
- -
|
16
|
+
- - ~>
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '5.7'
|
20
|
-
|
19
|
+
name: minitest
|
21
20
|
prerelease: false
|
21
|
+
type: :development
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.7'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: minitest-reporters
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
|
-
- -
|
30
|
+
- - ~>
|
32
31
|
- !ruby/object:Gem::Version
|
33
32
|
version: '1.0'
|
34
|
-
|
33
|
+
name: minitest-reporters
|
35
34
|
prerelease: false
|
35
|
+
type: :development
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ~>
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.10'
|
47
|
+
name: simplecov
|
48
|
+
prerelease: false
|
49
|
+
type: :development
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.10'
|
41
55
|
description: A collection of data mining algorithms
|
42
56
|
email: mstuefer@gmail.com
|
43
57
|
executables: []
|
@@ -45,31 +59,32 @@ extensions: []
|
|
45
59
|
extra_rdoc_files: []
|
46
60
|
files:
|
47
61
|
- lib/data_mining.rb
|
48
|
-
- lib/data_mining/apriori.rb
|
49
62
|
- lib/data_mining/dbscan.rb
|
50
63
|
- lib/data_mining/point.rb
|
64
|
+
- lib/data_mining/apriori.rb
|
65
|
+
- lib/data_mining/page_rank.rb
|
51
66
|
homepage: https://github.com/mstuefer/data_mining
|
52
67
|
licenses:
|
53
68
|
- MIT
|
54
69
|
metadata: {}
|
55
|
-
post_install_message:
|
70
|
+
post_install_message:
|
56
71
|
rdoc_options: []
|
57
72
|
require_paths:
|
58
73
|
- lib
|
59
74
|
required_ruby_version: !ruby/object:Gem::Requirement
|
60
75
|
requirements:
|
61
|
-
- -
|
76
|
+
- - '>='
|
62
77
|
- !ruby/object:Gem::Version
|
63
78
|
version: '0'
|
64
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
80
|
requirements:
|
66
|
-
- -
|
81
|
+
- - '>='
|
67
82
|
- !ruby/object:Gem::Version
|
68
83
|
version: '0'
|
69
84
|
requirements: []
|
70
|
-
rubyforge_project:
|
71
|
-
rubygems_version: 2.
|
72
|
-
signing_key:
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 2.1.9
|
87
|
+
signing_key:
|
73
88
|
specification_version: 4
|
74
89
|
summary: Data-Mining-Algorithms
|
75
90
|
test_files: []
|