data_mining 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 60ae285b5d026048b81017c733fbbb7dc44742cf
4
- data.tar.gz: da11d5c783dcc0926e9ea507b49eb120f509f069
3
+ metadata.gz: 28e3a1a81909e9619dff89714def02389e68c9da
4
+ data.tar.gz: 2a2260b0fcc65003fdb037b1f859941c8100710d
5
5
  SHA512:
6
- metadata.gz: 4067507c2086ed6f3d9ff5a641ff9e62f55f58d4a39d3eb06b85dae1544c57bf000a6c7ba0586a8aa31fd5f8e86274384a613bf680964a1d21d8d7af5b7c5b0a
7
- data.tar.gz: 16c4bb014d46c2a209d1ad194797f98f3986455653efc642afa4d3f59e419f5671bac6c2a05620883f9adb57fd409f093991e5bee6f2ada0ac99402dca5bce59
6
+ metadata.gz: 851076c8ca90e25ff3dcec0d6e02d513e8779dbbad190c386b7252ddc227c9b05bc822b4c5422d5cff7e27681d59b4086ff2ffcae08030327e9ed07d3fdd97a1
7
+ data.tar.gz: 83ea46cdadee44244b09265333041a239e0e5534df65ee58cd0f1833c01d2b48179df823ec6116e544d9f4a525df5608a7d86ee715d872f841f2b95d6809d8e6
data/lib/data_mining.rb CHANGED
@@ -3,3 +3,4 @@ end
3
3
 
4
4
  require 'data_mining/dbscan'
5
5
  require 'data_mining/apriori'
6
+ require 'data_mining/page_rank'
@@ -1,28 +1,32 @@
1
1
  module DataMining
2
2
  # Apriori Algorithm for frequent set mining and association rule learning
3
3
  class Apriori
4
+ # Find frequent item sets
5
+ #
6
+ # Arguments:
7
+ # transactions: (array of arrays, like [[:id, [transactions]] .. ])
8
+ # minimum_support: (integer)
4
9
  attr_reader :results
5
10
 
6
11
  def initialize(transactions, minimum_support)
7
12
  @transactions = transactions.select(&:flatten!).each(&:shift)
8
13
  @min_support = minimum_support
9
- @results = {}
14
+ @results = []
10
15
  end
11
16
 
12
17
  def mine!
13
18
  apriori
14
19
  end
15
20
 
21
+ def item_sets_size(size)
22
+ @results[size - 1]
23
+ end
24
+
16
25
  private
17
26
 
18
27
  def apriori
19
- tmp = starting_set
20
- i = 1
21
- while tmp.size > 0
22
- @results[i] = tmp
23
- i += 1
24
- tmp = next_set(tmp)
25
- end
28
+ @results << starting_set
29
+ @results << next_set(@results.last) until @results.last.empty?
26
30
  end
27
31
 
28
32
  def starting_set
@@ -38,14 +42,14 @@ module DataMining
38
42
  def next_set(itemsets)
39
43
  itemsets.each_with_object([]) do |set, arr|
40
44
  possible_candidates(set, itemsets).each do |candidate|
41
- arr.push(candidate) if satisfies_min_sup(candidate)
45
+ arr << candidate if satisfies_min_sup(candidate)
42
46
  end
43
47
  end
44
48
  end
45
49
 
46
50
  def possible_candidates(itemset, itemsets)
47
51
  itemsets.each_with_object([]) do |set, arr|
48
- arr.push(itemset + [set.last]) if set.last > itemset.last
52
+ arr << (itemset + [set.last]) if set.last > itemset.last
49
53
  end.uniq
50
54
  end
51
55
 
@@ -3,19 +3,8 @@ module DataMining
3
3
  class DBScan
4
4
  # Find clusters and outliers
5
5
  #
6
- # Example:
7
- # >> input = [[:p1, [1,1]], [:p2, [2,1]], [:p3, [10,11]]]
8
- # >> radius = 3
9
- # >> min_points = 2
10
- # >> dbscan = DataMining::DBScan.cluster(input, radius, min_points)
11
- # >> dbscan.build!
12
- # >>
13
- # >> dbscan.clusters # gives array of clusters found (:p1, :p2)
14
- # >>
15
- # >> dbscan.outliers # gives array of outliers found (:p3)
16
- #
17
6
  # Arguments:
18
- # data: (array of arrays, like [[:id, value], [:id2, value2]]
7
+ # data: (array of arrays, like [[:id, value], [:id2, value2]])
19
8
  # radius: (integer)
20
9
  # min_points: (integer)
21
10
  def initialize(data, radius, min_points)
@@ -29,7 +18,6 @@ module DataMining
29
18
 
30
19
  def cluster!
31
20
  dbscan
32
- clusters
33
21
  end
34
22
 
35
23
  def outliers
@@ -78,11 +66,10 @@ module DataMining
78
66
  fill_current_cluster(neighborhood) if core_object?(neighborhood)
79
67
  end
80
68
 
81
- # use map instead of each?
82
69
  def get_neighborhood(point)
83
- neighborhood = []
84
- @data.each { |p| neighborhood << p if neighbors?(p, point) }
85
- neighborhood
70
+ @data.each_with_object([]) do |p, neighborhood|
71
+ neighborhood << p if neighbors?(p, point)
72
+ end
86
73
  end
87
74
 
88
75
  def core_object?(neighborhood)
@@ -0,0 +1,53 @@
1
+ module DataMining
2
+ # PageRank Algorithm to measure the importance of nodes in a graph
3
+ class PageRank
4
+ attr_reader :graph, :ranks
5
+ # Measure importance of nodes
6
+ #
7
+ # Arguments:
8
+ # graph: (array of arrays, like:
9
+ # [[:p1, [:p2]], [:p2, [:p1, :p3]], [:p3, [:p2]]]
10
+ # damping_factor: (double between 0 and 1)
11
+ def initialize(graph, damping_factor = 0.85, iterations = 100)
12
+ @graph = graph.to_h
13
+ # { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
14
+ @outlinks = Hash.new { |_, key| @graph[key].size }
15
+ # { :p1 => 1, :p2 => 2, :p3 => 1 }
16
+ @inlinks = Hash.new { |_, key| inlinks(key) }
17
+ # { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
18
+ @ranks = Hash.new(1.0 / @graph.size)
19
+ # { :p1 => 1/3, :p2 => 1/3, ... }
20
+
21
+ @damper = damping_factor
22
+ @iterations = iterations
23
+ end
24
+
25
+ def rank!
26
+ pagerank
27
+ end
28
+
29
+ private
30
+
31
+ def inlinks(key)
32
+ @graph.select { |_, v| v.include?(key) }.keys
33
+ end
34
+
35
+ def pagerank
36
+ @iterations.times { @ranks = next_state }
37
+ end
38
+
39
+ def next_state
40
+ @graph.each_with_object({}) do |(node, _), ranks|
41
+ ranks[node] = term + @damper * sum_incoming_scores(node)
42
+ end
43
+ end
44
+
45
+ def sum_incoming_scores(node)
46
+ @inlinks[node].map { |id| @ranks[id] / @outlinks[id] }.inject(:+).to_f
47
+ end
48
+
49
+ def term
50
+ @term ||= ((1 - @damper) / @graph.size)
51
+ end
52
+ end
53
+ end
metadata CHANGED
@@ -1,43 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_mining
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Stuefer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: minitest
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
- - - "~>"
16
+ - - ~>
18
17
  - !ruby/object:Gem::Version
19
18
  version: '5.7'
20
- type: :development
19
+ name: minitest
21
20
  prerelease: false
21
+ type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5.7'
27
27
  - !ruby/object:Gem::Dependency
28
- name: minitest-reporters
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
- - - "~>"
30
+ - - ~>
32
31
  - !ruby/object:Gem::Version
33
32
  version: '1.0'
34
- type: :development
33
+ name: minitest-reporters
35
34
  prerelease: false
35
+ type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: '0.10'
47
+ name: simplecov
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.10'
41
55
  description: A collection of data mining algorithms
42
56
  email: mstuefer@gmail.com
43
57
  executables: []
@@ -45,31 +59,32 @@ extensions: []
45
59
  extra_rdoc_files: []
46
60
  files:
47
61
  - lib/data_mining.rb
48
- - lib/data_mining/apriori.rb
49
62
  - lib/data_mining/dbscan.rb
50
63
  - lib/data_mining/point.rb
64
+ - lib/data_mining/apriori.rb
65
+ - lib/data_mining/page_rank.rb
51
66
  homepage: https://github.com/mstuefer/data_mining
52
67
  licenses:
53
68
  - MIT
54
69
  metadata: {}
55
- post_install_message:
70
+ post_install_message:
56
71
  rdoc_options: []
57
72
  require_paths:
58
73
  - lib
59
74
  required_ruby_version: !ruby/object:Gem::Requirement
60
75
  requirements:
61
- - - ">="
76
+ - - '>='
62
77
  - !ruby/object:Gem::Version
63
78
  version: '0'
64
79
  required_rubygems_version: !ruby/object:Gem::Requirement
65
80
  requirements:
66
- - - ">="
81
+ - - '>='
67
82
  - !ruby/object:Gem::Version
68
83
  version: '0'
69
84
  requirements: []
70
- rubyforge_project:
71
- rubygems_version: 2.4.6
72
- signing_key:
85
+ rubyforge_project:
86
+ rubygems_version: 2.1.9
87
+ signing_key:
73
88
  specification_version: 4
74
89
  summary: Data-Mining-Algorithms
75
90
  test_files: []