data_mining 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 974c994068ee3cf5b01c62f5b406f5ae327d17d3
4
- data.tar.gz: e95b21a5a62291a3342b32b2b16db1ba31ae9e99
3
+ metadata.gz: 60ae285b5d026048b81017c733fbbb7dc44742cf
4
+ data.tar.gz: da11d5c783dcc0926e9ea507b49eb120f509f069
5
5
  SHA512:
6
- metadata.gz: e81c069008ec95bcc3598b29437890a497917b33e3a873dacbca64265f98681b5f1f8e2484c7fe893597d09837574a35fc846c049cd68c33e159d17c78b5abd2
7
- data.tar.gz: b51412c6449a85a22ca5258d2731847abea52a498202bc12f6e4167c1ad28989f60e634c015e0f19409dde655b026a2dd20b5d3eb492f3b1ddbc9db83d9faeef
6
+ metadata.gz: 4067507c2086ed6f3d9ff5a641ff9e62f55f58d4a39d3eb06b85dae1544c57bf000a6c7ba0586a8aa31fd5f8e86274384a613bf680964a1d21d8d7af5b7c5b0a
7
+ data.tar.gz: 16c4bb014d46c2a209d1ad194797f98f3986455653efc642afa4d3f59e419f5671bac6c2a05620883f9adb57fd409f093991e5bee6f2ada0ac99402dca5bce59
data/lib/data_mining.rb CHANGED
@@ -2,3 +2,4 @@ module DataMining
2
2
  end
3
3
 
4
4
  require 'data_mining/dbscan'
5
+ require 'data_mining/apriori'
@@ -0,0 +1,60 @@
1
+ module DataMining
2
+ # Apriori Algorithm for frequent set mining and association rule learning
3
+ class Apriori
4
+ attr_reader :results
5
+
6
+ def initialize(transactions, minimum_support)
7
+ @transactions = transactions.select(&:flatten!).each(&:shift)
8
+ @min_support = minimum_support
9
+ @results = {}
10
+ end
11
+
12
+ def mine!
13
+ apriori
14
+ end
15
+
16
+ private
17
+
18
+ def apriori
19
+ tmp = starting_set
20
+ i = 1
21
+ while tmp.size > 0
22
+ @results[i] = tmp
23
+ i += 1
24
+ tmp = next_set(tmp)
25
+ end
26
+ end
27
+
28
+ def starting_set
29
+ frequent_items.reject { |_, v| v < @min_support }.keys.sort.map { |i| [i] }
30
+ end
31
+
32
+ def frequent_items
33
+ @transactions.each_with_object(Hash.new(0)) do |sets, hash|
34
+ sets.each { |item| hash[item] += 1 }
35
+ end
36
+ end
37
+
38
+ def next_set(itemsets)
39
+ itemsets.each_with_object([]) do |set, arr|
40
+ possible_candidates(set, itemsets).each do |candidate|
41
+ arr.push(candidate) if satisfies_min_sup(candidate)
42
+ end
43
+ end
44
+ end
45
+
46
+ def possible_candidates(itemset, itemsets)
47
+ itemsets.each_with_object([]) do |set, arr|
48
+ arr.push(itemset + [set.last]) if set.last > itemset.last
49
+ end.uniq
50
+ end
51
+
52
+ def satisfies_min_sup(candidate)
53
+ return true if (@transactions.inject(0) do |counter, entry|
54
+ counter += 1 if (candidate - entry).empty?
55
+ counter
56
+ end >= @min_support)
57
+ false
58
+ end
59
+ end
60
+ end
@@ -1,5 +1,5 @@
1
1
  module DataMining
2
- # Density-Based clustering / Outlier-Detection Algorithu
2
+ # Density-Based clustering / Outlier-Detection Algorithm
3
3
  class DBScan
4
4
  # Find clusters and outliers
5
5
  #
@@ -18,11 +18,6 @@ module DataMining
18
18
  # data: (array of arrays, like [[:id, value], [:id2, value2]]
19
19
  # radius: (integer)
20
20
  # min_points: (integer)
21
-
22
- def self.cluster(data, radius, min_points)
23
- DBScan.new(data, radius, min_points)
24
- end
25
-
26
21
  def initialize(data, radius, min_points)
27
22
  @data = data.map { |i, v| DataMining::Point.new(i, v) }
28
23
  @radius = radius
@@ -32,7 +27,7 @@ module DataMining
32
27
  @unvisited_points = @data.shuffle
33
28
  end
34
29
 
35
- def build!
30
+ def cluster!
36
31
  dbscan
37
32
  clusters
38
33
  end
@@ -109,7 +104,11 @@ module DataMining
109
104
  def euclidean_distance(p1, p2)
110
105
  p1 = p1.value
111
106
  p2 = p2.value
112
- Math.sqrt(p1.each_with_index.inject(0) { |sum, (v, i)| sum + ((v - p2[i])**2) })
107
+ Math.sqrt(
108
+ p1.each_with_index.inject(0) do |sum, (v, i)|
109
+ sum + ((v - p2[i])**2)
110
+ end
111
+ )
113
112
  end
114
113
  end
115
114
  end
@@ -7,7 +7,7 @@ module DataMining
7
7
  #
8
8
  # Arguments:
9
9
  # id: (symbol)
10
- # value: (integer)
10
+ # value: (array)
11
11
 
12
12
  def initialize(id, value)
13
13
  @id = id
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_mining
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Stuefer
@@ -9,7 +9,35 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-06-23 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest-reporters
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
13
41
  description: A collection of data mining algorithms
14
42
  email: mstuefer@gmail.com
15
43
  executables: []
@@ -17,6 +45,7 @@ extensions: []
17
45
  extra_rdoc_files: []
18
46
  files:
19
47
  - lib/data_mining.rb
48
+ - lib/data_mining/apriori.rb
20
49
  - lib/data_mining/dbscan.rb
21
50
  - lib/data_mining/point.rb
22
51
  homepage: https://github.com/mstuefer/data_mining