data_mining 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 60ae285b5d026048b81017c733fbbb7dc44742cf
4
- data.tar.gz: da11d5c783dcc0926e9ea507b49eb120f509f069
3
+ metadata.gz: 28e3a1a81909e9619dff89714def02389e68c9da
4
+ data.tar.gz: 2a2260b0fcc65003fdb037b1f859941c8100710d
5
5
  SHA512:
6
- metadata.gz: 4067507c2086ed6f3d9ff5a641ff9e62f55f58d4a39d3eb06b85dae1544c57bf000a6c7ba0586a8aa31fd5f8e86274384a613bf680964a1d21d8d7af5b7c5b0a
7
- data.tar.gz: 16c4bb014d46c2a209d1ad194797f98f3986455653efc642afa4d3f59e419f5671bac6c2a05620883f9adb57fd409f093991e5bee6f2ada0ac99402dca5bce59
6
+ metadata.gz: 851076c8ca90e25ff3dcec0d6e02d513e8779dbbad190c386b7252ddc227c9b05bc822b4c5422d5cff7e27681d59b4086ff2ffcae08030327e9ed07d3fdd97a1
7
+ data.tar.gz: 83ea46cdadee44244b09265333041a239e0e5534df65ee58cd0f1833c01d2b48179df823ec6116e544d9f4a525df5608a7d86ee715d872f841f2b95d6809d8e6
data/lib/data_mining.rb CHANGED
@@ -3,3 +3,4 @@ end
3
3
 
4
4
  require 'data_mining/dbscan'
5
5
  require 'data_mining/apriori'
6
+ require 'data_mining/page_rank'
@@ -1,28 +1,32 @@
1
1
  module DataMining
2
2
  # Apriori Algorithm for frequent set mining and association rule learning
3
3
  class Apriori
4
+ # Find frequent item sets
5
+ #
6
+ # Arguments:
7
+ # transactions: (array of arrays, like [[:id, [transactions]] .. ])
8
+ # minimum_support: (integer)
4
9
  attr_reader :results
5
10
 
6
11
  def initialize(transactions, minimum_support)
7
12
  @transactions = transactions.select(&:flatten!).each(&:shift)
8
13
  @min_support = minimum_support
9
- @results = {}
14
+ @results = []
10
15
  end
11
16
 
12
17
  def mine!
13
18
  apriori
14
19
  end
15
20
 
21
+ def item_sets_size(size)
22
+ @results[size - 1]
23
+ end
24
+
16
25
  private
17
26
 
18
27
  def apriori
19
- tmp = starting_set
20
- i = 1
21
- while tmp.size > 0
22
- @results[i] = tmp
23
- i += 1
24
- tmp = next_set(tmp)
25
- end
28
+ @results << starting_set
29
+ @results << next_set(@results.last) until @results.last.empty?
26
30
  end
27
31
 
28
32
  def starting_set
@@ -38,14 +42,14 @@ module DataMining
38
42
  def next_set(itemsets)
39
43
  itemsets.each_with_object([]) do |set, arr|
40
44
  possible_candidates(set, itemsets).each do |candidate|
41
- arr.push(candidate) if satisfies_min_sup(candidate)
45
+ arr << candidate if satisfies_min_sup(candidate)
42
46
  end
43
47
  end
44
48
  end
45
49
 
46
50
  def possible_candidates(itemset, itemsets)
47
51
  itemsets.each_with_object([]) do |set, arr|
48
- arr.push(itemset + [set.last]) if set.last > itemset.last
52
+ arr << (itemset + [set.last]) if set.last > itemset.last
49
53
  end.uniq
50
54
  end
51
55
 
@@ -3,19 +3,8 @@ module DataMining
3
3
  class DBScan
4
4
  # Find clusters and outliers
5
5
  #
6
- # Example:
7
- # >> input = [[:p1, [1,1]], [:p2, [2,1]], [:p3, [10,11]]]
8
- # >> radius = 3
9
- # >> min_points = 2
10
- # >> dbscan = DataMining::DBScan.cluster(input, radius, min_points)
11
- # >> dbscan.build!
12
- # >>
13
- # >> dbscan.clusters # gives array of clusters found (:p1, :p2)
14
- # >>
15
- # >> dbscan.outliers # gives array of outliers found (:p3)
16
- #
17
6
  # Arguments:
18
- # data: (array of arrays, like [[:id, value], [:id2, value2]]
7
+ # data: (array of arrays, like [[:id, value], [:id2, value2]])
19
8
  # radius: (integer)
20
9
  # min_points: (integer)
21
10
  def initialize(data, radius, min_points)
@@ -29,7 +18,6 @@ module DataMining
29
18
 
30
19
  def cluster!
31
20
  dbscan
32
- clusters
33
21
  end
34
22
 
35
23
  def outliers
@@ -78,11 +66,10 @@ module DataMining
78
66
  fill_current_cluster(neighborhood) if core_object?(neighborhood)
79
67
  end
80
68
 
81
- # use map instead of each?
82
69
  def get_neighborhood(point)
83
- neighborhood = []
84
- @data.each { |p| neighborhood << p if neighbors?(p, point) }
85
- neighborhood
70
+ @data.each_with_object([]) do |p, neighborhood|
71
+ neighborhood << p if neighbors?(p, point)
72
+ end
86
73
  end
87
74
 
88
75
  def core_object?(neighborhood)
@@ -0,0 +1,53 @@
1
+ module DataMining
2
+ # PageRank Algorithm to measure the importance of nodes in a graph
3
+ class PageRank
4
+ attr_reader :graph, :ranks
5
+ # Measure importance of nodes
6
+ #
7
+ # Arguments:
8
+ # graph: (array of arrays, like:
9
+ # [[:p1, [:p2]], [:p2, [:p1, :p3]], [:p3, [:p2]]]
10
+ # damping_factor: (double between 0 and 1)
11
+ def initialize(graph, damping_factor = 0.85, iterations = 100)
12
+ @graph = graph.to_h
13
+ # { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
14
+ @outlinks = Hash.new { |_, key| @graph[key].size }
15
+ # { :p1 => 1, :p2 => 2, :p3 => 1 }
16
+ @inlinks = Hash.new { |_, key| inlinks(key) }
17
+ # { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
18
+ @ranks = Hash.new(1.0 / @graph.size)
19
+ # { :p1 => 1/3, :p2 => 1/3, ... }
20
+
21
+ @damper = damping_factor
22
+ @iterations = iterations
23
+ end
24
+
25
+ def rank!
26
+ pagerank
27
+ end
28
+
29
+ private
30
+
31
+ def inlinks(key)
32
+ @graph.select { |_, v| v.include?(key) }.keys
33
+ end
34
+
35
+ def pagerank
36
+ @iterations.times { @ranks = next_state }
37
+ end
38
+
39
+ def next_state
40
+ @graph.each_with_object({}) do |(node, _), ranks|
41
+ ranks[node] = term + @damper * sum_incoming_scores(node)
42
+ end
43
+ end
44
+
45
+ def sum_incoming_scores(node)
46
+ @inlinks[node].map { |id| @ranks[id] / @outlinks[id] }.inject(:+).to_f
47
+ end
48
+
49
+ def term
50
+ @term ||= ((1 - @damper) / @graph.size)
51
+ end
52
+ end
53
+ end
metadata CHANGED
@@ -1,43 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_mining
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Stuefer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: minitest
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
- - - "~>"
16
+ - - ~>
18
17
  - !ruby/object:Gem::Version
19
18
  version: '5.7'
20
- type: :development
19
+ name: minitest
21
20
  prerelease: false
21
+ type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5.7'
27
27
  - !ruby/object:Gem::Dependency
28
- name: minitest-reporters
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
- - - "~>"
30
+ - - ~>
32
31
  - !ruby/object:Gem::Version
33
32
  version: '1.0'
34
- type: :development
33
+ name: minitest-reporters
35
34
  prerelease: false
35
+ type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: '0.10'
47
+ name: simplecov
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.10'
41
55
  description: A collection of data mining algorithms
42
56
  email: mstuefer@gmail.com
43
57
  executables: []
@@ -45,31 +59,32 @@ extensions: []
45
59
  extra_rdoc_files: []
46
60
  files:
47
61
  - lib/data_mining.rb
48
- - lib/data_mining/apriori.rb
49
62
  - lib/data_mining/dbscan.rb
50
63
  - lib/data_mining/point.rb
64
+ - lib/data_mining/apriori.rb
65
+ - lib/data_mining/page_rank.rb
51
66
  homepage: https://github.com/mstuefer/data_mining
52
67
  licenses:
53
68
  - MIT
54
69
  metadata: {}
55
- post_install_message:
70
+ post_install_message:
56
71
  rdoc_options: []
57
72
  require_paths:
58
73
  - lib
59
74
  required_ruby_version: !ruby/object:Gem::Requirement
60
75
  requirements:
61
- - - ">="
76
+ - - '>='
62
77
  - !ruby/object:Gem::Version
63
78
  version: '0'
64
79
  required_rubygems_version: !ruby/object:Gem::Requirement
65
80
  requirements:
66
- - - ">="
81
+ - - '>='
67
82
  - !ruby/object:Gem::Version
68
83
  version: '0'
69
84
  requirements: []
70
- rubyforge_project:
71
- rubygems_version: 2.4.6
72
- signing_key:
85
+ rubyforge_project:
86
+ rubygems_version: 2.1.9
87
+ signing_key:
73
88
  specification_version: 4
74
89
  summary: Data-Mining-Algorithms
75
90
  test_files: []