data_mining 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 974c994068ee3cf5b01c62f5b406f5ae327d17d3
4
- data.tar.gz: e95b21a5a62291a3342b32b2b16db1ba31ae9e99
3
+ metadata.gz: 60ae285b5d026048b81017c733fbbb7dc44742cf
4
+ data.tar.gz: da11d5c783dcc0926e9ea507b49eb120f509f069
5
5
  SHA512:
6
- metadata.gz: e81c069008ec95bcc3598b29437890a497917b33e3a873dacbca64265f98681b5f1f8e2484c7fe893597d09837574a35fc846c049cd68c33e159d17c78b5abd2
7
- data.tar.gz: b51412c6449a85a22ca5258d2731847abea52a498202bc12f6e4167c1ad28989f60e634c015e0f19409dde655b026a2dd20b5d3eb492f3b1ddbc9db83d9faeef
6
+ metadata.gz: 4067507c2086ed6f3d9ff5a641ff9e62f55f58d4a39d3eb06b85dae1544c57bf000a6c7ba0586a8aa31fd5f8e86274384a613bf680964a1d21d8d7af5b7c5b0a
7
+ data.tar.gz: 16c4bb014d46c2a209d1ad194797f98f3986455653efc642afa4d3f59e419f5671bac6c2a05620883f9adb57fd409f093991e5bee6f2ada0ac99402dca5bce59
data/lib/data_mining.rb CHANGED
@@ -2,3 +2,4 @@ module DataMining
2
2
  end
3
3
 
4
4
  require 'data_mining/dbscan'
5
+ require 'data_mining/apriori'
@@ -0,0 +1,60 @@
1
+ module DataMining
2
+ # Apriori Algorithm for frequent set mining and association rule learning
3
+ class Apriori
4
+ attr_reader :results
5
+
6
+ def initialize(transactions, minimum_support)
7
+ @transactions = transactions.select(&:flatten!).each(&:shift)
8
+ @min_support = minimum_support
9
+ @results = {}
10
+ end
11
+
12
+ def mine!
13
+ apriori
14
+ end
15
+
16
+ private
17
+
18
+ def apriori
19
+ tmp = starting_set
20
+ i = 1
21
+ while tmp.size > 0
22
+ @results[i] = tmp
23
+ i += 1
24
+ tmp = next_set(tmp)
25
+ end
26
+ end
27
+
28
+ def starting_set
29
+ frequent_items.reject { |_, v| v < @min_support }.keys.sort.map { |i| [i] }
30
+ end
31
+
32
+ def frequent_items
33
+ @transactions.each_with_object(Hash.new(0)) do |sets, hash|
34
+ sets.each { |item| hash[item] += 1 }
35
+ end
36
+ end
37
+
38
+ def next_set(itemsets)
39
+ itemsets.each_with_object([]) do |set, arr|
40
+ possible_candidates(set, itemsets).each do |candidate|
41
+ arr.push(candidate) if satisfies_min_sup(candidate)
42
+ end
43
+ end
44
+ end
45
+
46
+ def possible_candidates(itemset, itemsets)
47
+ itemsets.each_with_object([]) do |set, arr|
48
+ arr.push(itemset + [set.last]) if set.last > itemset.last
49
+ end.uniq
50
+ end
51
+
52
+ def satisfies_min_sup(candidate)
53
+ return true if (@transactions.inject(0) do |counter, entry|
54
+ counter += 1 if (candidate - entry).empty?
55
+ counter
56
+ end >= @min_support)
57
+ false
58
+ end
59
+ end
60
+ end
@@ -1,5 +1,5 @@
1
1
  module DataMining
2
- # Density-Based clustering / Outlier-Detection Algorithu
2
+ # Density-Based clustering / Outlier-Detection Algorithm
3
3
  class DBScan
4
4
  # Find clusters and outliers
5
5
  #
@@ -18,11 +18,6 @@ module DataMining
18
18
  # data: (array of arrays, like [[:id, value], [:id2, value2]]
19
19
  # radius: (integer)
20
20
  # min_points: (integer)
21
-
22
- def self.cluster(data, radius, min_points)
23
- DBScan.new(data, radius, min_points)
24
- end
25
-
26
21
  def initialize(data, radius, min_points)
27
22
  @data = data.map { |i, v| DataMining::Point.new(i, v) }
28
23
  @radius = radius
@@ -32,7 +27,7 @@ module DataMining
32
27
  @unvisited_points = @data.shuffle
33
28
  end
34
29
 
35
- def build!
30
+ def cluster!
36
31
  dbscan
37
32
  clusters
38
33
  end
@@ -109,7 +104,11 @@ module DataMining
109
104
  def euclidean_distance(p1, p2)
110
105
  p1 = p1.value
111
106
  p2 = p2.value
112
- Math.sqrt(p1.each_with_index.inject(0) { |sum, (v, i)| sum + ((v - p2[i])**2) })
107
+ Math.sqrt(
108
+ p1.each_with_index.inject(0) do |sum, (v, i)|
109
+ sum + ((v - p2[i])**2)
110
+ end
111
+ )
113
112
  end
114
113
  end
115
114
  end
@@ -7,7 +7,7 @@ module DataMining
7
7
  #
8
8
  # Arguments:
9
9
  # id: (symbol)
10
- # value: (integer)
10
+ # value: (array)
11
11
 
12
12
  def initialize(id, value)
13
13
  @id = id
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_mining
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Stuefer
@@ -9,7 +9,35 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-06-23 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest-reporters
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
13
41
  description: A collection of data mining algorithms
14
42
  email: mstuefer@gmail.com
15
43
  executables: []
@@ -17,6 +45,7 @@ extensions: []
17
45
  extra_rdoc_files: []
18
46
  files:
19
47
  - lib/data_mining.rb
48
+ - lib/data_mining/apriori.rb
20
49
  - lib/data_mining/dbscan.rb
21
50
  - lib/data_mining/point.rb
22
51
  homepage: https://github.com/mstuefer/data_mining