apriori-ruby 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 34233d76397403835023b9acc4fbc879105ecefd
4
+ data.tar.gz: 8f7c58b988c48461be7021268bb5957c66dd2488
5
+ SHA512:
6
+ metadata.gz: 4713c04c787db63bb82c4a61faca537b67c6f77bc2bcc6f482da3946380c70721e46f2dcbb4ead9d9cbb24e2b33abdd06262d5bd1445b90dce39d3715da2a6ff
7
+ data.tar.gz: 87d53ba9aef49e50e3761e28108413827442c32280a0fa03b5bf342e52113c15b437542416121488ab5066b1234a68a53f78efb3d7c50a8060bd671e319c6113
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --color
2
+ --warnings
3
+ --format documentation
4
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ # A sample Gemfile
2
+ source "https://rubygems.org"
3
+ gemspec
4
+ gem 'factory_girl'
data/Gemfile.lock ADDED
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ apriori (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (4.1.5)
10
+ i18n (~> 0.6, >= 0.6.9)
11
+ json (~> 1.7, >= 1.7.7)
12
+ minitest (~> 5.1)
13
+ thread_safe (~> 0.1)
14
+ tzinfo (~> 1.1)
15
+ diff-lcs (1.2.5)
16
+ factory_girl (4.4.0)
17
+ activesupport (>= 3.0.0)
18
+ i18n (0.6.11)
19
+ json (1.8.1)
20
+ minitest (5.4.0)
21
+ rake (10.3.2)
22
+ rspec (3.0.0)
23
+ rspec-core (~> 3.0.0)
24
+ rspec-expectations (~> 3.0.0)
25
+ rspec-mocks (~> 3.0.0)
26
+ rspec-core (3.0.4)
27
+ rspec-support (~> 3.0.0)
28
+ rspec-expectations (3.0.4)
29
+ diff-lcs (>= 1.2.0, < 2.0)
30
+ rspec-support (~> 3.0.0)
31
+ rspec-mocks (3.0.4)
32
+ rspec-support (~> 3.0.0)
33
+ rspec-support (3.0.4)
34
+ thread_safe (0.3.4)
35
+ tzinfo (1.2.2)
36
+ thread_safe (~> 0.1)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ apriori!
43
+ bundler (~> 1.7)
44
+ factory_girl
45
+ rake (~> 10.0)
46
+ rspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Bryan Mulvihill
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,16 @@
1
+ Apriori Algorithm
2
+ ===========
3
+ http://en.wikipedia.org/wiki/Apriori_algorithm
4
+
5
+ Implementation Project for CS 634 - Data Mining
6
+
7
+ **Project is still in progress**
8
+ Requirements: Ruby 2.1.X
9
+ Installation
10
+ ```bash
11
+ gem install apriori-ruby
12
+ ```
13
+ Or add to Gemfile
14
+ ```ruby
15
+ gem 'apriori-ruby'
16
+ ```
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/apriori.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'apriori/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "apriori-ruby"
8
+ spec.version = Apriori::VERSION
9
+ spec.authors = ["Bryan Mulvihill"]
10
+ spec.email = ["mulvihill.bryan@gmail.com"]
11
+ spec.summary = %q{Ruby implementation of Apriori Algorithm}
12
+ spec.homepage = "https://github.com/bmulvihill/apriori"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency 'rspec'
23
+ end
@@ -0,0 +1,16 @@
1
+ module Apriori
2
+ class Algorithm
3
+ attr_accessor :min_support, :min_confidence, :item_set
4
+
5
+ def initialize(item_set)
6
+ @item_set = item_set
7
+ end
8
+
9
+ def mine(min_support=0, min_confidence=0)
10
+ @min_support, @min_confidence = min_support, min_confidence
11
+ item_set.create_frequent_item_sets(min_support)
12
+ item_set.create_association_rules(min_confidence)
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,71 @@
1
+ module Apriori
2
+ class ItemSet
3
+ attr_reader :data_set, :item_set, :candidates, :iteration, :min_support, :min_confidence
4
+
5
+ def initialize(data_set)
6
+ @data_set = data_set
7
+ end
8
+
9
+ def frequent_item_sets
10
+ @frequent_item_sets ||= []
11
+ end
12
+
13
+ def create_frequent_item_sets min_support
14
+ @min_support = min_support
15
+ @iteration = 0
16
+ @candidates = convert_initial_data_set
17
+ while candidates.any?
18
+ @iteration += 1
19
+ @candidates = list.make_candidates
20
+ frequent_item_sets << list unless iteration == 1
21
+ end
22
+ frequent_item_sets
23
+ end
24
+
25
+ def create_association_rules min_confidence
26
+ rules ={}
27
+ frequent_item_sets.each do |freq_lists|
28
+ freq_lists.sets.each do |set|
29
+ List.create_subsets(set).each do |combo|
30
+ rule_name = "#{combo.join(',')}=>#{(set.flatten - combo.flatten).join(',')}"
31
+ rules[rule_name] = confidence(combo.flatten, (set.flatten - combo.flatten))
32
+ end
33
+ end
34
+ end
35
+ rules.select{|k,v| v >= min_confidence}
36
+ end
37
+
38
+ def support item
39
+ (count_frequency(item).to_f / data_set.size) * 100
40
+ end
41
+
42
+ def confidence set1, set2
43
+ (support(set1 + set2) / support(set1)) * 100
44
+ end
45
+
46
+ def count_frequency set
47
+ data_set.map do |transaction, items|
48
+ contains_all?(items, set)
49
+ end.reject {|item| item == false }.size
50
+ end
51
+
52
+ def contains_all? set, subset
53
+ set.to_set.superset? subset.to_set
54
+ end
55
+
56
+ private
57
+
58
+ def list
59
+ @list ||= {}
60
+ @list[iteration] ||= List.new(reject_candidates, iteration)
61
+ end
62
+
63
+ def reject_candidates
64
+ candidates.reject{|item| support(item) < min_support}
65
+ end
66
+
67
+ def convert_initial_data_set
68
+ @data_set.values.flatten.uniq.map{|item| [item]}
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,33 @@
1
+ module Apriori
2
+ class List
3
+ attr_reader :sets, :iteration
4
+
5
+ def initialize sets, iteration
6
+ @sets = sets
7
+ @iteration = iteration
8
+ end
9
+
10
+ def self.create_subsets set
11
+ (1).upto(set.size - 1).flat_map { |n| set.combination(n).to_a }
12
+ end
13
+
14
+ def make_candidates
15
+ if iteration <= 2
16
+ sets.flatten.combination(iteration).to_a
17
+ else
18
+ self_join prune
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def self_join set
25
+ set.map{|a1| set.select{|a2| a1[0...-1] == a2[0...-1]}.flatten.uniq}.uniq
26
+ end
27
+
28
+ def prune
29
+ sets.reject{|a1| sets.select{|a2| a1[0...-1] == a2[0...-1]}.size == 1}
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module Apriori
2
+ VERSION = "0.0.4"
3
+ end
data/lib/apriori.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'apriori/algorithm.rb'
2
+ require 'apriori/item_set.rb'
3
+ require 'apriori/list.rb'
4
+ require 'set'
5
+ require 'apriori/version.rb'
@@ -0,0 +1,18 @@
1
+ FactoryGirl.define do
2
+ factory :algorithm, class: Apriori::Algorithm do
3
+ initialize_with {new(FactoryGirl.build(:item_set))}
4
+ end
5
+
6
+ factory :item_set, class: Apriori::ItemSet do
7
+ initialize_with {new(FactoryGirl.build(:sample_data))}
8
+ end
9
+
10
+ factory :sample_data, class: Hash do
11
+ t1 ['Mango', 'Onion', 'Nintendo', 'Keychain', 'Eggs', 'Yoyo']
12
+ t2 ['Doll', 'Onion', 'Nintendo', 'Keychain', 'Eggs', 'Yoyo']
13
+ t3 ['Mango', 'Apple', 'Keychain', 'Eggs']
14
+ t4 ['Mango', 'Umbrella', 'Corn', 'Keychain', 'Yoyo']
15
+ t5 ['Corn', 'Onion', 'Onion', 'Keychain', 'Icecream', 'Eggs']
16
+ initialize_with { attributes }
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ describe Apriori::Algorithm do
2
+ before do
3
+ @apriori = FactoryGirl.build(:algorithm)
4
+ end
5
+
6
+ context '#mine' do
7
+ it 'returns all association rules meeting the minimum support and confidence' do
8
+ @apriori.mine(50)
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,50 @@
1
+ describe Apriori::ItemSet do
2
+ before do
3
+ data = FactoryGirl.build(:sample_data)
4
+ @item_set = Apriori::ItemSet.new(data)
5
+ end
6
+
7
+ context '#confidence' do
8
+ it 'will return a rule with support and confidence' do
9
+ set1 = ['Eggs']
10
+ set2 = ['Onion', 'Keychain']
11
+ expect(@item_set.confidence(set1, set2)).to eql(75.0)
12
+ end
13
+ end
14
+
15
+ context '#support' do
16
+ it 'will return the support of an item' do
17
+ item = ['Mango']
18
+ expect(@item_set.support(item)).to eql((3.to_f/5) * 100)
19
+ end
20
+ end
21
+
22
+ context '#create_association_rules' do
23
+ it 'creates association rules for all combinations' do
24
+ @set = Apriori::ItemSet.new({:t1 => ['1','2','3'], :t2 => ['1','2','4'], :t3 => ['1','4','5']})
25
+ @set.create_frequent_item_sets(60)
26
+ expect(@set.create_association_rules(60)).to eql({"1=>2"=>66.66666666666666, "2=>1"=>100.0, "1=>4"=>66.66666666666666, "4=>1"=>100.0})
27
+ end
28
+ end
29
+
30
+ context '#count_frequency' do
31
+ it 'will return the frequency of an item in the data set' do
32
+ item = ['Mango']
33
+ expect(@item_set.count_frequency(item)).to eql(3)
34
+ end
35
+ end
36
+
37
+ context '#contains_all?' do
38
+ it 'will return true if one array contains all elements of sub set' do
39
+ set = [1,2,3]
40
+ subset = [2,1]
41
+ expect(@item_set.contains_all?(set, subset)).to be true
42
+ end
43
+
44
+ it 'will return false if one array does not contain all the elements of a sub set' do
45
+ set = [1,2,3,4]
46
+ subset = [2,1,5]
47
+ expect(@item_set.contains_all?(set, subset)).to be false
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,33 @@
1
+ describe Apriori::List do
2
+ context '#make_candidates' do
3
+ it 'will return all combinations for first two iterations' do
4
+ array =['Hi1','Hi2','Hi3']
5
+ @list = Apriori::List.new(array,2)
6
+ expect(@list.make_candidates).to eql([['Hi1','Hi2'],['Hi1','Hi3'],['Hi2','Hi3']])
7
+ end
8
+
9
+ it 'will return the self join after the first two iterations' do
10
+ array = [['Hi1','Hi2'], ['Hi1','Hi3'], ['Blah1', 'Blah2'], ['Blah1', 'Blah3']]
11
+ @list = Apriori::List.new(array,3)
12
+ expect(@list.make_candidates).to eql([['Hi1','Hi2','Hi3'],['Blah1','Blah2','Blah3']])
13
+ end
14
+
15
+ it 'will prune elements' do
16
+ array = [['Hi1','Hi2'], ['Hi1','Hi3'], ['Blah1', 'Blah2'], ['Blah1', 'Blah3'], ['Blarg1', 'Blarg2']]
17
+ @list = Apriori::List.new(array,3)
18
+ expect(@list.make_candidates).to eql([['Hi1','Hi2','Hi3'],['Blah1','Blah2','Blah3']])
19
+ end
20
+ end
21
+
22
+ context '#self.create_subsets' do
23
+ it 'returns nothing if the subset size is 1' do
24
+ array = [[1],[2],[3]]
25
+ expect(Apriori::List.create_subsets([1])).to eql([])
26
+ end
27
+
28
+ it 'returns all possible subsets of an array' do
29
+ array = [[1,2,3]]
30
+ expect(Apriori::List.create_subsets([1,2,3])).to eql([[1],[2],[3],[1,2],[1,3],[2,3]])
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,7 @@
1
+ require 'factory_girl'
2
+ require 'apriori'
3
+
4
+ RSpec.configure do |config|
5
+ config.include FactoryGirl::Syntax::Methods
6
+ FactoryGirl.find_definitions
7
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apriori-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ platform: ruby
6
+ authors:
7
+ - Bryan Mulvihill
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - mulvihill.bryan@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - apriori.gemspec
71
+ - lib/apriori.rb
72
+ - lib/apriori/algorithm.rb
73
+ - lib/apriori/item_set.rb
74
+ - lib/apriori/list.rb
75
+ - lib/apriori/version.rb
76
+ - spec/factories/apriori/apriori.rb
77
+ - spec/lib/apriori/algorithm_spec.rb
78
+ - spec/lib/apriori/item_set_spec.rb
79
+ - spec/lib/apriori/list_spec.rb
80
+ - spec/spec_helper.rb
81
+ homepage: https://github.com/bmulvihill/apriori
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.2.2
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Ruby implementation of Apriori Algorithm
105
+ test_files:
106
+ - spec/factories/apriori/apriori.rb
107
+ - spec/lib/apriori/algorithm_spec.rb
108
+ - spec/lib/apriori/item_set_spec.rb
109
+ - spec/lib/apriori/list_spec.rb
110
+ - spec/spec_helper.rb