apriori-ruby 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 34233d76397403835023b9acc4fbc879105ecefd
4
+ data.tar.gz: 8f7c58b988c48461be7021268bb5957c66dd2488
5
+ SHA512:
6
+ metadata.gz: 4713c04c787db63bb82c4a61faca537b67c6f77bc2bcc6f482da3946380c70721e46f2dcbb4ead9d9cbb24e2b33abdd06262d5bd1445b90dce39d3715da2a6ff
7
+ data.tar.gz: 87d53ba9aef49e50e3761e28108413827442c32280a0fa03b5bf342e52113c15b437542416121488ab5066b1234a68a53f78efb3d7c50a8060bd671e319c6113
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --color
2
+ --warnings
3
+ --format documentation
4
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ # A sample Gemfile
2
+ source "https://rubygems.org"
3
+ gemspec
4
+ gem 'factory_girl'
data/Gemfile.lock ADDED
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ apriori (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (4.1.5)
10
+ i18n (~> 0.6, >= 0.6.9)
11
+ json (~> 1.7, >= 1.7.7)
12
+ minitest (~> 5.1)
13
+ thread_safe (~> 0.1)
14
+ tzinfo (~> 1.1)
15
+ diff-lcs (1.2.5)
16
+ factory_girl (4.4.0)
17
+ activesupport (>= 3.0.0)
18
+ i18n (0.6.11)
19
+ json (1.8.1)
20
+ minitest (5.4.0)
21
+ rake (10.3.2)
22
+ rspec (3.0.0)
23
+ rspec-core (~> 3.0.0)
24
+ rspec-expectations (~> 3.0.0)
25
+ rspec-mocks (~> 3.0.0)
26
+ rspec-core (3.0.4)
27
+ rspec-support (~> 3.0.0)
28
+ rspec-expectations (3.0.4)
29
+ diff-lcs (>= 1.2.0, < 2.0)
30
+ rspec-support (~> 3.0.0)
31
+ rspec-mocks (3.0.4)
32
+ rspec-support (~> 3.0.0)
33
+ rspec-support (3.0.4)
34
+ thread_safe (0.3.4)
35
+ tzinfo (1.2.2)
36
+ thread_safe (~> 0.1)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ apriori!
43
+ bundler (~> 1.7)
44
+ factory_girl
45
+ rake (~> 10.0)
46
+ rspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Bryan Mulvihill
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,16 @@
1
+ Apriori Algorithm
2
+ ===========
3
+ http://en.wikipedia.org/wiki/Apriori_algorithm
4
+
5
+ Implementation Project for CS 634 - Data Mining
6
+
7
+ **Project is still in progress**
8
+ Requirements: Ruby 2.1.X
9
+ Installation
10
+ ```bash
11
+ gem install apriori-ruby
12
+ ```
13
+ Or add to Gemfile
14
+ ```ruby
15
+ gem 'apriori-ruby'
16
+ ```
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/apriori.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'apriori/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "apriori-ruby"
8
+ spec.version = Apriori::VERSION
9
+ spec.authors = ["Bryan Mulvihill"]
10
+ spec.email = ["mulvihill.bryan@gmail.com"]
11
+ spec.summary = %q{Ruby implementation of Apriori Algorithm}
12
+ spec.homepage = "https://github.com/bmulvihill/apriori"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency 'rspec'
23
+ end
@@ -0,0 +1,16 @@
1
+ module Apriori
2
+ class Algorithm
3
+ attr_accessor :min_support, :min_confidence, :item_set
4
+
5
+ def initialize(item_set)
6
+ @item_set = item_set
7
+ end
8
+
9
+ def mine(min_support=0, min_confidence=0)
10
+ @min_support, @min_confidence = min_support, min_confidence
11
+ item_set.create_frequent_item_sets(min_support)
12
+ item_set.create_association_rules(min_confidence)
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,71 @@
1
+ module Apriori
2
+ class ItemSet
3
+ attr_reader :data_set, :item_set, :candidates, :iteration, :min_support, :min_confidence
4
+
5
+ def initialize(data_set)
6
+ @data_set = data_set
7
+ end
8
+
9
+ def frequent_item_sets
10
+ @frequent_item_sets ||= []
11
+ end
12
+
13
+ def create_frequent_item_sets min_support
14
+ @min_support = min_support
15
+ @iteration = 0
16
+ @candidates = convert_initial_data_set
17
+ while candidates.any?
18
+ @iteration += 1
19
+ @candidates = list.make_candidates
20
+ frequent_item_sets << list unless iteration == 1
21
+ end
22
+ frequent_item_sets
23
+ end
24
+
25
+ def create_association_rules min_confidence
26
+ rules ={}
27
+ frequent_item_sets.each do |freq_lists|
28
+ freq_lists.sets.each do |set|
29
+ List.create_subsets(set).each do |combo|
30
+ rule_name = "#{combo.join(',')}=>#{(set.flatten - combo.flatten).join(',')}"
31
+ rules[rule_name] = confidence(combo.flatten, (set.flatten - combo.flatten))
32
+ end
33
+ end
34
+ end
35
+ rules.select{|k,v| v >= min_confidence}
36
+ end
37
+
38
+ def support item
39
+ (count_frequency(item).to_f / data_set.size) * 100
40
+ end
41
+
42
+ def confidence set1, set2
43
+ (support(set1 + set2) / support(set1)) * 100
44
+ end
45
+
46
+ def count_frequency set
47
+ data_set.map do |transaction, items|
48
+ contains_all?(items, set)
49
+ end.reject {|item| item == false }.size
50
+ end
51
+
52
+ def contains_all? set, subset
53
+ set.to_set.superset? subset.to_set
54
+ end
55
+
56
+ private
57
+
58
+ def list
59
+ @list ||= {}
60
+ @list[iteration] ||= List.new(reject_candidates, iteration)
61
+ end
62
+
63
+ def reject_candidates
64
+ candidates.reject{|item| support(item) < min_support}
65
+ end
66
+
67
+ def convert_initial_data_set
68
+ @data_set.values.flatten.uniq.map{|item| [item]}
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,33 @@
1
+ module Apriori
2
+ class List
3
+ attr_reader :sets, :iteration
4
+
5
+ def initialize sets, iteration
6
+ @sets = sets
7
+ @iteration = iteration
8
+ end
9
+
10
+ def self.create_subsets set
11
+ (1).upto(set.size - 1).flat_map { |n| set.combination(n).to_a }
12
+ end
13
+
14
+ def make_candidates
15
+ if iteration <= 2
16
+ sets.flatten.combination(iteration).to_a
17
+ else
18
+ self_join prune
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def self_join set
25
+ set.map{|a1| set.select{|a2| a1[0...-1] == a2[0...-1]}.flatten.uniq}.uniq
26
+ end
27
+
28
+ def prune
29
+ sets.reject{|a1| sets.select{|a2| a1[0...-1] == a2[0...-1]}.size == 1}
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module Apriori
2
+ VERSION = "0.0.4"
3
+ end
data/lib/apriori.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'apriori/algorithm.rb'
2
+ require 'apriori/item_set.rb'
3
+ require 'apriori/list.rb'
4
+ require 'set'
5
+ require 'apriori/version.rb'
@@ -0,0 +1,18 @@
1
+ FactoryGirl.define do
2
+ factory :algorithm, class: Apriori::Algorithm do
3
+ initialize_with {new(FactoryGirl.build(:item_set))}
4
+ end
5
+
6
+ factory :item_set, class: Apriori::ItemSet do
7
+ initialize_with {new(FactoryGirl.build(:sample_data))}
8
+ end
9
+
10
+ factory :sample_data, class: Hash do
11
+ t1 ['Mango', 'Onion', 'Nintendo', 'Keychain', 'Eggs', 'Yoyo']
12
+ t2 ['Doll', 'Onion', 'Nintendo', 'Keychain', 'Eggs', 'Yoyo']
13
+ t3 ['Mango', 'Apple', 'Keychain', 'Eggs']
14
+ t4 ['Mango', 'Umbrella', 'Corn', 'Keychain', 'Yoyo']
15
+ t5 ['Corn', 'Onion', 'Onion', 'Keychain', 'Icecream', 'Eggs']
16
+ initialize_with { attributes }
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ describe Apriori::Algorithm do
2
+ before do
3
+ @apriori = FactoryGirl.build(:algorithm)
4
+ end
5
+
6
+ context '#mine' do
7
+ it 'returns all association rules meeting the minimum support and confidence' do
8
+ @apriori.mine(50)
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,50 @@
1
+ describe Apriori::ItemSet do
2
+ before do
3
+ data = FactoryGirl.build(:sample_data)
4
+ @item_set = Apriori::ItemSet.new(data)
5
+ end
6
+
7
+ context '#confidence' do
8
+ it 'will return a rule with support and confidence' do
9
+ set1 = ['Eggs']
10
+ set2 = ['Onion', 'Keychain']
11
+ expect(@item_set.confidence(set1, set2)).to eql(75.0)
12
+ end
13
+ end
14
+
15
+ context '#support' do
16
+ it 'will return the support of an item' do
17
+ item = ['Mango']
18
+ expect(@item_set.support(item)).to eql((3.to_f/5) * 100)
19
+ end
20
+ end
21
+
22
+ context '#create_association_rules' do
23
+ it 'creates association rules for all combinations' do
24
+ @set = Apriori::ItemSet.new({:t1 => ['1','2','3'], :t2 => ['1','2','4'], :t3 => ['1','4','5']})
25
+ @set.create_frequent_item_sets(60)
26
+ expect(@set.create_association_rules(60)).to eql({"1=>2"=>66.66666666666666, "2=>1"=>100.0, "1=>4"=>66.66666666666666, "4=>1"=>100.0})
27
+ end
28
+ end
29
+
30
+ context '#count_frequency' do
31
+ it 'will return the frequency of an item in the data set' do
32
+ item = ['Mango']
33
+ expect(@item_set.count_frequency(item)).to eql(3)
34
+ end
35
+ end
36
+
37
+ context '#contains_all?' do
38
+ it 'will return true if one array contains all elements of sub set' do
39
+ set = [1,2,3]
40
+ subset = [2,1]
41
+ expect(@item_set.contains_all?(set, subset)).to be true
42
+ end
43
+
44
+ it 'will return false if one array does not contain all the elements of a sub set' do
45
+ set = [1,2,3,4]
46
+ subset = [2,1,5]
47
+ expect(@item_set.contains_all?(set, subset)).to be false
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,33 @@
1
+ describe Apriori::List do
2
+ context '#make_candidates' do
3
+ it 'will return all combinations for first two iterations' do
4
+ array =['Hi1','Hi2','Hi3']
5
+ @list = Apriori::List.new(array,2)
6
+ expect(@list.make_candidates).to eql([['Hi1','Hi2'],['Hi1','Hi3'],['Hi2','Hi3']])
7
+ end
8
+
9
+ it 'will return the self join after the first two iterations' do
10
+ array = [['Hi1','Hi2'], ['Hi1','Hi3'], ['Blah1', 'Blah2'], ['Blah1', 'Blah3']]
11
+ @list = Apriori::List.new(array,3)
12
+ expect(@list.make_candidates).to eql([['Hi1','Hi2','Hi3'],['Blah1','Blah2','Blah3']])
13
+ end
14
+
15
+ it 'will prune elements' do
16
+ array = [['Hi1','Hi2'], ['Hi1','Hi3'], ['Blah1', 'Blah2'], ['Blah1', 'Blah3'], ['Blarg1', 'Blarg2']]
17
+ @list = Apriori::List.new(array,3)
18
+ expect(@list.make_candidates).to eql([['Hi1','Hi2','Hi3'],['Blah1','Blah2','Blah3']])
19
+ end
20
+ end
21
+
22
+ context '#self.create_subsets' do
23
+ it 'returns nothing if the subset size is 1' do
24
+ array = [[1],[2],[3]]
25
+ expect(Apriori::List.create_subsets([1])).to eql([])
26
+ end
27
+
28
+ it 'returns all possible subsets of an array' do
29
+ array = [[1,2,3]]
30
+ expect(Apriori::List.create_subsets([1,2,3])).to eql([[1],[2],[3],[1,2],[1,3],[2,3]])
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,7 @@
1
+ require 'factory_girl'
2
+ require 'apriori'
3
+
4
+ RSpec.configure do |config|
5
+ config.include FactoryGirl::Syntax::Methods
6
+ FactoryGirl.find_definitions
7
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apriori-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ platform: ruby
6
+ authors:
7
+ - Bryan Mulvihill
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - mulvihill.bryan@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - apriori.gemspec
71
+ - lib/apriori.rb
72
+ - lib/apriori/algorithm.rb
73
+ - lib/apriori/item_set.rb
74
+ - lib/apriori/list.rb
75
+ - lib/apriori/version.rb
76
+ - spec/factories/apriori/apriori.rb
77
+ - spec/lib/apriori/algorithm_spec.rb
78
+ - spec/lib/apriori/item_set_spec.rb
79
+ - spec/lib/apriori/list_spec.rb
80
+ - spec/spec_helper.rb
81
+ homepage: https://github.com/bmulvihill/apriori
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.2.2
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Ruby implementation of Apriori Algorithm
105
+ test_files:
106
+ - spec/factories/apriori/apriori.rb
107
+ - spec/lib/apriori/algorithm_spec.rb
108
+ - spec/lib/apriori/item_set_spec.rb
109
+ - spec/lib/apriori/list_spec.rb
110
+ - spec/spec_helper.rb