data_mining 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_mining.rb +1 -0
- data/lib/data_mining/apriori.rb +14 -10
- data/lib/data_mining/dbscan.rb +4 -17
- data/lib/data_mining/page_rank.rb +53 -0
- metadata +32 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28e3a1a81909e9619dff89714def02389e68c9da
|
4
|
+
data.tar.gz: 2a2260b0fcc65003fdb037b1f859941c8100710d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 851076c8ca90e25ff3dcec0d6e02d513e8779dbbad190c386b7252ddc227c9b05bc822b4c5422d5cff7e27681d59b4086ff2ffcae08030327e9ed07d3fdd97a1
|
7
|
+
data.tar.gz: 83ea46cdadee44244b09265333041a239e0e5534df65ee58cd0f1833c01d2b48179df823ec6116e544d9f4a525df5608a7d86ee715d872f841f2b95d6809d8e6
|
data/lib/data_mining.rb
CHANGED
data/lib/data_mining/apriori.rb
CHANGED
@@ -1,28 +1,32 @@
|
|
1
1
|
module DataMining
|
2
2
|
# Apriori Algorithm for frequent set mining and association rule learning
|
3
3
|
class Apriori
|
4
|
+
# Find frequent item sets
|
5
|
+
#
|
6
|
+
# Arguments:
|
7
|
+
# transactions: (array of arrays, like [[:id, [transactions]] .. ])
|
8
|
+
# minimum_support: (integer)
|
4
9
|
attr_reader :results
|
5
10
|
|
6
11
|
def initialize(transactions, minimum_support)
|
7
12
|
@transactions = transactions.select(&:flatten!).each(&:shift)
|
8
13
|
@min_support = minimum_support
|
9
|
-
@results =
|
14
|
+
@results = []
|
10
15
|
end
|
11
16
|
|
12
17
|
def mine!
|
13
18
|
apriori
|
14
19
|
end
|
15
20
|
|
21
|
+
def item_sets_size(size)
|
22
|
+
@results[size - 1]
|
23
|
+
end
|
24
|
+
|
16
25
|
private
|
17
26
|
|
18
27
|
def apriori
|
19
|
-
|
20
|
-
|
21
|
-
while tmp.size > 0
|
22
|
-
@results[i] = tmp
|
23
|
-
i += 1
|
24
|
-
tmp = next_set(tmp)
|
25
|
-
end
|
28
|
+
@results << starting_set
|
29
|
+
@results << next_set(@results.last) until @results.last.empty?
|
26
30
|
end
|
27
31
|
|
28
32
|
def starting_set
|
@@ -38,14 +42,14 @@ module DataMining
|
|
38
42
|
def next_set(itemsets)
|
39
43
|
itemsets.each_with_object([]) do |set, arr|
|
40
44
|
possible_candidates(set, itemsets).each do |candidate|
|
41
|
-
arr
|
45
|
+
arr << candidate if satisfies_min_sup(candidate)
|
42
46
|
end
|
43
47
|
end
|
44
48
|
end
|
45
49
|
|
46
50
|
def possible_candidates(itemset, itemsets)
|
47
51
|
itemsets.each_with_object([]) do |set, arr|
|
48
|
-
arr
|
52
|
+
arr << (itemset + [set.last]) if set.last > itemset.last
|
49
53
|
end.uniq
|
50
54
|
end
|
51
55
|
|
data/lib/data_mining/dbscan.rb
CHANGED
@@ -3,19 +3,8 @@ module DataMining
|
|
3
3
|
class DBScan
|
4
4
|
# Find clusters and outliers
|
5
5
|
#
|
6
|
-
# Example:
|
7
|
-
# >> input = [[:p1, [1,1]], [:p2, [2,1]], [:p3, [10,11]]]
|
8
|
-
# >> radius = 3
|
9
|
-
# >> min_points = 2
|
10
|
-
# >> dbscan = DataMining::DBScan.cluster(input, radius, min_points)
|
11
|
-
# >> dbscan.build!
|
12
|
-
# >>
|
13
|
-
# >> dbscan.clusters # gives array of clusters found (:p1, :p2)
|
14
|
-
# >>
|
15
|
-
# >> dbscan.outliers # gives array of outliers found (:p3)
|
16
|
-
#
|
17
6
|
# Arguments:
|
18
|
-
# data: (array of arrays, like [[:id, value], [:id2, value2]]
|
7
|
+
# data: (array of arrays, like [[:id, value], [:id2, value2]])
|
19
8
|
# radius: (integer)
|
20
9
|
# min_points: (integer)
|
21
10
|
def initialize(data, radius, min_points)
|
@@ -29,7 +18,6 @@ module DataMining
|
|
29
18
|
|
30
19
|
def cluster!
|
31
20
|
dbscan
|
32
|
-
clusters
|
33
21
|
end
|
34
22
|
|
35
23
|
def outliers
|
@@ -78,11 +66,10 @@ module DataMining
|
|
78
66
|
fill_current_cluster(neighborhood) if core_object?(neighborhood)
|
79
67
|
end
|
80
68
|
|
81
|
-
# use map instead of each?
|
82
69
|
def get_neighborhood(point)
|
83
|
-
|
84
|
-
|
85
|
-
|
70
|
+
@data.each_with_object([]) do |p, neighborhood|
|
71
|
+
neighborhood << p if neighbors?(p, point)
|
72
|
+
end
|
86
73
|
end
|
87
74
|
|
88
75
|
def core_object?(neighborhood)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module DataMining
|
2
|
+
# PageRank Algorithm to measure the importance of nodes in a graph
|
3
|
+
class PageRank
|
4
|
+
attr_reader :graph, :ranks
|
5
|
+
# Measure importance of nodes
|
6
|
+
#
|
7
|
+
# Arguments:
|
8
|
+
# graph: (array of arrays, like:
|
9
|
+
# [[:p1, [:p2]], [:p2, [:p1, :p3]], [:p3, [:p2]]]
|
10
|
+
# damping_factor: (double between 0 and 1)
|
11
|
+
def initialize(graph, damping_factor = 0.85, iterations = 100)
|
12
|
+
@graph = graph.to_h
|
13
|
+
# { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
|
14
|
+
@outlinks = Hash.new { |_, key| @graph[key].size }
|
15
|
+
# { :p1 => 1, :p2 => 2, :p3 => 1 }
|
16
|
+
@inlinks = Hash.new { |_, key| inlinks(key) }
|
17
|
+
# { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
|
18
|
+
@ranks = Hash.new(1.0 / @graph.size)
|
19
|
+
# { :p1 => 1/3, :p2 => 1/3, ... }
|
20
|
+
|
21
|
+
@damper = damping_factor
|
22
|
+
@iterations = iterations
|
23
|
+
end
|
24
|
+
|
25
|
+
def rank!
|
26
|
+
pagerank
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def inlinks(key)
|
32
|
+
@graph.select { |_, v| v.include?(key) }.keys
|
33
|
+
end
|
34
|
+
|
35
|
+
def pagerank
|
36
|
+
@iterations.times { @ranks = next_state }
|
37
|
+
end
|
38
|
+
|
39
|
+
def next_state
|
40
|
+
@graph.each_with_object({}) do |(node, _), ranks|
|
41
|
+
ranks[node] = term + @damper * sum_incoming_scores(node)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def sum_incoming_scores(node)
|
46
|
+
@inlinks[node].map { |id| @ranks[id] / @outlinks[id] }.inject(:+).to_f
|
47
|
+
end
|
48
|
+
|
49
|
+
def term
|
50
|
+
@term ||= ((1 - @damper) / @graph.size)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
metadata
CHANGED
@@ -1,43 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_mining
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Stuefer
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: minitest
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
|
-
- -
|
16
|
+
- - ~>
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '5.7'
|
20
|
-
|
19
|
+
name: minitest
|
21
20
|
prerelease: false
|
21
|
+
type: :development
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.7'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: minitest-reporters
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
|
-
- -
|
30
|
+
- - ~>
|
32
31
|
- !ruby/object:Gem::Version
|
33
32
|
version: '1.0'
|
34
|
-
|
33
|
+
name: minitest-reporters
|
35
34
|
prerelease: false
|
35
|
+
type: :development
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ~>
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.10'
|
47
|
+
name: simplecov
|
48
|
+
prerelease: false
|
49
|
+
type: :development
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.10'
|
41
55
|
description: A collection of data mining algorithms
|
42
56
|
email: mstuefer@gmail.com
|
43
57
|
executables: []
|
@@ -45,31 +59,32 @@ extensions: []
|
|
45
59
|
extra_rdoc_files: []
|
46
60
|
files:
|
47
61
|
- lib/data_mining.rb
|
48
|
-
- lib/data_mining/apriori.rb
|
49
62
|
- lib/data_mining/dbscan.rb
|
50
63
|
- lib/data_mining/point.rb
|
64
|
+
- lib/data_mining/apriori.rb
|
65
|
+
- lib/data_mining/page_rank.rb
|
51
66
|
homepage: https://github.com/mstuefer/data_mining
|
52
67
|
licenses:
|
53
68
|
- MIT
|
54
69
|
metadata: {}
|
55
|
-
post_install_message:
|
70
|
+
post_install_message:
|
56
71
|
rdoc_options: []
|
57
72
|
require_paths:
|
58
73
|
- lib
|
59
74
|
required_ruby_version: !ruby/object:Gem::Requirement
|
60
75
|
requirements:
|
61
|
-
- -
|
76
|
+
- - '>='
|
62
77
|
- !ruby/object:Gem::Version
|
63
78
|
version: '0'
|
64
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
80
|
requirements:
|
66
|
-
- -
|
81
|
+
- - '>='
|
67
82
|
- !ruby/object:Gem::Version
|
68
83
|
version: '0'
|
69
84
|
requirements: []
|
70
|
-
rubyforge_project:
|
71
|
-
rubygems_version: 2.
|
72
|
-
signing_key:
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 2.1.9
|
87
|
+
signing_key:
|
73
88
|
specification_version: 4
|
74
89
|
summary: Data-Mining-Algorithms
|
75
90
|
test_files: []
|