apriori 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +16 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +149 -0
- data/Rakefile +15 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +81 -0
- data/config/requirements.rb +29 -0
- data/examples/01_simple_example.rb +32 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori.rb +133 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +89 -0
- data/lib/apriori/version.rb +9 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +13 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +248 -0
- data/website/index.txt +152 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +226 -0
data/tasks/apriori.rake
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
namespace :apriori do
|
|
2
|
+
desc "Download the example data"
|
|
3
|
+
task :get_example_data do
|
|
4
|
+
puts <<-EOF
|
|
5
|
+
Sorry! This isn't implemented in a portable way just yet.
|
|
6
|
+
If you're on a *nix system try:
|
|
7
|
+
|
|
8
|
+
curl -0 http://fimi.cs.helsinki.fi/data/kosarak.dat > examples/test_data/kosarak.dat
|
|
9
|
+
curl -0 http://fimi.cs.helsinki.fi/data/retail.dat > examples/test_data/retail.dat
|
|
10
|
+
|
|
11
|
+
Patches welcome! (http://github.com/jashmenn/apriori/tree/master)
|
|
12
|
+
EOF
|
|
13
|
+
|
|
14
|
+
# open("http://www.ruby-lang.org/") {|f|
|
|
15
|
+
# f.each_line {|line| p line}
|
|
16
|
+
# }
|
|
17
|
+
# }
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
end
|
data/tasks/attic.rake
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# This task is not actually used.
|
|
2
|
+
# Use "ruby setup.rb" instead.
|
|
3
|
+
# This task is just here to show you the idea of what is going on
|
|
4
|
+
ext_base = File.join(File.dirname(__FILE__), "ext")
|
|
5
|
+
task "get_it_done" do
|
|
6
|
+
sh "
|
|
7
|
+
pushd #{ext_base}/util/src/;
|
|
8
|
+
rm *.o
|
|
9
|
+
make;
|
|
10
|
+
popd;
|
|
11
|
+
pushd #{ext_base}/math/src;
|
|
12
|
+
rm *.o
|
|
13
|
+
# dont need to make this
|
|
14
|
+
popd;
|
|
15
|
+
pushd #{ext_base}/apriori/src/;
|
|
16
|
+
rm *.o;
|
|
17
|
+
make;
|
|
18
|
+
popd;
|
|
19
|
+
pushd #{ext_base};
|
|
20
|
+
rm *.o;
|
|
21
|
+
rm *.bundle;
|
|
22
|
+
rm Makefile;
|
|
23
|
+
ruby extconf.rb;
|
|
24
|
+
make;
|
|
25
|
+
popd;
|
|
26
|
+
ruby -d test/apriori_test.rb;
|
|
27
|
+
"
|
|
28
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
desc 'Release the website and new gem version'
|
|
2
|
+
task :deploy => [:check_version, :website, :release] do
|
|
3
|
+
puts "Remember to create SVN tag:"
|
|
4
|
+
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
|
5
|
+
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
|
6
|
+
puts "Suggested comment:"
|
|
7
|
+
puts "Tagging release #{CHANGES}"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
|
11
|
+
task :local_deploy => [:website_generate, :install_gem]
|
|
12
|
+
|
|
13
|
+
task :check_version do
|
|
14
|
+
unless ENV['VERSION']
|
|
15
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
|
16
|
+
exit
|
|
17
|
+
end
|
|
18
|
+
unless ENV['VERSION'] == VERS
|
|
19
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
|
20
|
+
exit
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
desc 'Install the package as a gem, without generating documentation(ri/rdoc)'
|
|
25
|
+
task :install_gem_no_doc => [:clean, :package] do
|
|
26
|
+
sh "#{'sudo ' unless Hoe::WINDOZE }gem install pkg/*.gem --no-rdoc --no-ri"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
namespace :manifest do
|
|
30
|
+
desc 'Recreate Manifest.txt to include ALL files'
|
|
31
|
+
task :refresh do
|
|
32
|
+
`rake check_manifest | patch -p0 > Manifest.txt`
|
|
33
|
+
end
|
|
34
|
+
end
|
data/tasks/install.rake
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
task :extension do
|
|
2
|
+
# just compile the extension, dont do a typical setup.rb install
|
|
3
|
+
sh "ruby setup.rb clean"
|
|
4
|
+
sh "ruby setup.rb config"
|
|
5
|
+
sh "ruby setup.rb setup"
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
namespace :github do
|
|
9
|
+
desc "Generate the Gemspec for github"
|
|
10
|
+
task :generate_gemspec do
|
|
11
|
+
sh "rake --silent debug_gem > apriori.gemspec"
|
|
12
|
+
end
|
|
13
|
+
end
|
data/tasks/website.rake
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
desc 'Generate website files'
|
|
2
|
+
task :website_generate => :ruby_env do
|
|
3
|
+
(Dir['website/**/*.txt'] - Dir['website/version*.txt']).each do |txt|
|
|
4
|
+
sh %{ #{RUBY_APP} script/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
|
5
|
+
end
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
desc 'Upload website files to rubyforge'
|
|
9
|
+
task :website_upload do
|
|
10
|
+
host = "#{rubyforge_username}@rubyforge.org"
|
|
11
|
+
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
|
12
|
+
local_dir = 'website'
|
|
13
|
+
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
desc 'Generate and upload website files'
|
|
17
|
+
task :website => [:website_generate, :website_upload, :publish_docs]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
|
2
|
+
require 'apriori'; include Apriori; puts do_apriori(["apriori", "test/fixtures/sample.txt", "test/fixtures/results.txt"])
|
|
3
|
+
puts
|
|
4
|
+
puts
|
|
5
|
+
puts
|
|
6
|
+
# puts test_hash_ap("one", "two", :hello => "world")
|
|
7
|
+
# puts do_apriori
|
|
8
|
+
#puts find_association_rules("one", "two", :hello => "world")
|
|
9
|
+
|
|
10
|
+
# ruby -r 'Apriori/apriori' -e 'include Apriori; puts test_converting_array(["apriori", "test/sample.txt", "test/results.txt"])'
|
|
11
|
+
# puts test1
|
|
12
|
+
# => 10
|
|
13
|
+
# pp help
|
data/test/test_helper.rb
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/../test_helper.rb'
|
|
2
|
+
|
|
3
|
+
class TestApriori < Test::Unit::TestCase
|
|
4
|
+
|
|
5
|
+
def setup
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
# require 'apriori'; include Apriori; puts do_apriori(["apriori", "test/fixtures/sample.txt", "test/fixtures/results.txt"])
|
|
9
|
+
|
|
10
|
+
def test_truth
|
|
11
|
+
input = File.join(FIXTURES_DIR + "/market_basket_string_test.txt")
|
|
12
|
+
assert rules = Apriori.find_association_rules(input)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_calling_all_the_options
|
|
16
|
+
input = File.join(FIXTURES_DIR + "/market_basket_string_test.txt")
|
|
17
|
+
assert rules = Apriori.find_association_rules(input,
|
|
18
|
+
:min_items => 2,
|
|
19
|
+
:max_items => 5,
|
|
20
|
+
:min_support => 1,
|
|
21
|
+
:max_support => 100,
|
|
22
|
+
:min_confidence => 20)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_giving_the_transactions
|
|
26
|
+
transactions = [ %w{beer doritos},
|
|
27
|
+
%w{apple cheese},
|
|
28
|
+
%w{beer doritos},
|
|
29
|
+
%w{apple cheese},
|
|
30
|
+
%w{apple cheese},
|
|
31
|
+
%w{apple doritos} ]
|
|
32
|
+
|
|
33
|
+
rules = Apriori.find_association_rules(transactions,
|
|
34
|
+
:min_items => 2,
|
|
35
|
+
:max_items => 5,
|
|
36
|
+
:min_support => 1,
|
|
37
|
+
:max_support => 100,
|
|
38
|
+
:min_confidence => 20)
|
|
39
|
+
|
|
40
|
+
assert is = Apriori::AssociationRule.parse_line("apple <- cheese (50.0/3, 100.0)")
|
|
41
|
+
assert rules.include?(is)
|
|
42
|
+
|
|
43
|
+
# puts rules.join("\n")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_returning_no_results
|
|
47
|
+
# currently the application exits. thats no good! just give a message
|
|
48
|
+
transactions = [ %w{beer doritos},
|
|
49
|
+
%w{apple cheese},
|
|
50
|
+
%w{beer doritos},
|
|
51
|
+
%w{apple cheese},
|
|
52
|
+
%w{apple cheese},
|
|
53
|
+
%w{apple doritos} ]
|
|
54
|
+
|
|
55
|
+
# crazy numbers that will never return any rules
|
|
56
|
+
rules = Apriori.find_association_rules(transactions,
|
|
57
|
+
:min_items => 5,
|
|
58
|
+
:max_items => 5,
|
|
59
|
+
:min_support => 100,
|
|
60
|
+
:max_support => 100,
|
|
61
|
+
:min_confidence => 20)
|
|
62
|
+
|
|
63
|
+
assert_equal 0, rules.size
|
|
64
|
+
|
|
65
|
+
# also, make notes that if you get more rules, a common thing to do is to reduce the min_support to a very small float
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/../test_helper.rb'
|
|
2
|
+
|
|
3
|
+
class TestAssociationRulesAndParsingApriori < Test::Unit::TestCase
|
|
4
|
+
include Apriori
|
|
5
|
+
|
|
6
|
+
def setup
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def test_reading_from_a_file
|
|
10
|
+
input = File.join(FIXTURES_DIR + "/market_basket_results_test.txt")
|
|
11
|
+
assert rules = AssociationRule.from_file(input)
|
|
12
|
+
assert_equal 5, rules.size
|
|
13
|
+
|
|
14
|
+
assert is = AssociationRule.parse_line("apple <- doritos (50.0/3, 33.3)")
|
|
15
|
+
assert rules.include?(is)
|
|
16
|
+
assert is = AssociationRule.parse_line("foo <- bar baz bangle (66.7/4, 75.0)")
|
|
17
|
+
assert rules.include?(is)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_parsing_individual_lines
|
|
21
|
+
assert is = AssociationRule.parse_line("doritos <- beer (33.3/2, 100.0)")
|
|
22
|
+
wanted = {
|
|
23
|
+
:consequent => "doritos",
|
|
24
|
+
:antecedent => ["beer"],
|
|
25
|
+
:support => 33.3,
|
|
26
|
+
:num_antecedent_transactions => 2,
|
|
27
|
+
:confidence => 100.0,
|
|
28
|
+
}
|
|
29
|
+
wanted.each do |key,value|
|
|
30
|
+
assert_equal value, is.send(key), "Expected itemset '#{key}' to be '#{value}'"
|
|
31
|
+
end
|
|
32
|
+
assert_equal "doritos <- beer (33.3/2, 100.0)", is.to_s
|
|
33
|
+
|
|
34
|
+
assert is = AssociationRule.parse_line("apple <- doritos (50.0/3, 33.3)")
|
|
35
|
+
wanted = {
|
|
36
|
+
:consequent => "apple",
|
|
37
|
+
:antecedent => ["doritos"],
|
|
38
|
+
:support => 50.0,
|
|
39
|
+
:num_antecedent_transactions => 3,
|
|
40
|
+
:confidence => 33.3,
|
|
41
|
+
}
|
|
42
|
+
wanted.each do |key,value|
|
|
43
|
+
assert_equal value, is.send(key), "Expected itemset '#{key}' to be '#{value}'"
|
|
44
|
+
end
|
|
45
|
+
assert_equal "apple <- doritos (50.0/3, 33.3)", is.to_s
|
|
46
|
+
|
|
47
|
+
assert is = AssociationRule.parse_line("foo <- bar baz (66.7, 75.0)")
|
|
48
|
+
wanted = {
|
|
49
|
+
:consequent => "foo",
|
|
50
|
+
:antecedent => ["bar", "baz"],
|
|
51
|
+
:support => 66.7,
|
|
52
|
+
:num_antecedent_transactions => nil,
|
|
53
|
+
:confidence => 75.0,
|
|
54
|
+
}
|
|
55
|
+
wanted.each do |key,value|
|
|
56
|
+
assert_equal value, is.send(key), "Expected itemset '#{key}' to be '#{value}'"
|
|
57
|
+
end
|
|
58
|
+
assert_equal "foo <- bar baz (66.7, 75.0)", is.to_s
|
|
59
|
+
|
|
60
|
+
# foo <- bar baz bangle (66.7/4, 75.0)
|
|
61
|
+
assert is = AssociationRule.parse_line("foo <- bar baz bangle (66.7/4, 75.0)")
|
|
62
|
+
wanted = {
|
|
63
|
+
:consequent => "foo",
|
|
64
|
+
:antecedent => ["bar", "baz", "bangle"],
|
|
65
|
+
:support => 66.7,
|
|
66
|
+
:num_antecedent_transactions => 4,
|
|
67
|
+
:confidence => 75.0,
|
|
68
|
+
}
|
|
69
|
+
wanted.each do |key,value|
|
|
70
|
+
assert_equal value, is.send(key), "Expected itemset '#{key}' to be '#{value}'"
|
|
71
|
+
end
|
|
72
|
+
assert_equal "foo <- bar baz bangle (66.7/4, 75.0)", is.to_s
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def test_association_rule_equality
|
|
76
|
+
assert is = AssociationRule.parse_line("doritos <- beer (33.3/2, 100.0)")
|
|
77
|
+
assert is2 = AssociationRule.parse_line("doritos <- beer (33.3/2, 100.0)")
|
|
78
|
+
assert_equal is, is2
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
end
|
data/website/index.html
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
4
|
+
<head>
|
|
5
|
+
<link rel="stylesheet" href="stylesheets/screen.css" type="text/css" media="screen" />
|
|
6
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
7
|
+
<title>
|
|
8
|
+
apriori
|
|
9
|
+
</title>
|
|
10
|
+
<script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
|
|
11
|
+
<style>
|
|
12
|
+
|
|
13
|
+
</style>
|
|
14
|
+
<script type="text/javascript">
|
|
15
|
+
window.onload = function() {
|
|
16
|
+
settings = {
|
|
17
|
+
tl: { radius: 10 },
|
|
18
|
+
tr: { radius: 10 },
|
|
19
|
+
bl: { radius: 10 },
|
|
20
|
+
br: { radius: 10 },
|
|
21
|
+
antiAlias: true,
|
|
22
|
+
autoPad: true,
|
|
23
|
+
validTags: ["div"]
|
|
24
|
+
}
|
|
25
|
+
var versionBox = new curvyCorners(settings, document.getElementById("version"));
|
|
26
|
+
versionBox.applyCornersToAll();
|
|
27
|
+
}
|
|
28
|
+
</script>
|
|
29
|
+
</head>
|
|
30
|
+
<body>
|
|
31
|
+
<div id="main">
|
|
32
|
+
|
|
33
|
+
<h1>apriori</h1>
|
|
34
|
+
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/apriori"; return false'>
|
|
35
|
+
<p>Get Version</p>
|
|
36
|
+
<a href="http://rubyforge.org/projects/apriori" class="numbers">0.2.1</a>
|
|
37
|
+
<img src="images/2118201782_4e87521dc9_m.jpg" />
|
|
38
|
+
</div>
|
|
39
|
+
<h1>→ ‘find item associations’</h1>
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
<h2>What</h2>
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
<p>Ruby Apriori is a library to efficiently find item association rules within
|
|
46
|
+
large sets of transactions. This library provides a Ruby interface to Christian
|
|
47
|
+
Borgelt’s C implementation of this algorithm.</p>
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
<p>From Christian Borgelt’s Apriori:http://www.borgelt.net/apriori.html documentation:</p>
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
<blockquote>
|
|
54
|
+
<p>Association rule induction is a powerful method for so-called market basket
|
|
55
|
+
analysis, which aims at finding regularities in the shopping behavior of
|
|
56
|
+
customers.</p>
|
|
57
|
+
</blockquote>
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
<blockquote>
|
|
61
|
+
<p>With the induction of association rules one tries to find sets of
|
|
62
|
+
products that are frequently bought together, so that from the presence of
|
|
63
|
+
certain products in a shopping cart one can infer (with a high probability)
|
|
64
|
+
that certain other products are present.</p>
|
|
65
|
+
</blockquote>
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
<blockquote>
|
|
69
|
+
<p>An association rule is a rule like “If a customer buys wine and bread, he often
|
|
70
|
+
buys cheese, too.”</p>
|
|
71
|
+
</blockquote>
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
<blockquote>
|
|
75
|
+
<p>An association rule states that if we pick a customer at random and find out
|
|
76
|
+
that he selected certain items (bought certain products, chose certain options
|
|
77
|
+
etc.), we can be confident, quantified by a percentage, that he also selected
|
|
78
|
+
certain other items (bought certain other products, chose certain other options
|
|
79
|
+
etc.).</p>
|
|
80
|
+
</blockquote>
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
<p>This Ruby library provides a convenient way to use this algorithm from Ruby.</p>
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
<p>Original Apriori C code by Christian Borgelt.</p>
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
<h2>Installing</h2>
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
<p><pre class='syntax'><span class="ident">gem</span> <span class="ident">install</span> <span class="punct">--</span><span class="ident">source</span> <span class="ident">http</span><span class="punct">:/</span><span class="regex"></span><span class="punct">/</span><span class="ident">gems</span><span class="punct">.</span><span class="ident">github</span><span class="punct">.</span><span class="ident">com</span> <span class="ident">jashmenn</span><span class="punct">-</span><span class="ident">apriori</span></pre></p>
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
<h2>The Algorithm:</h2>
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
<p>This document is not an introduction to the Apriori algorithm. To find out more about Apriori see:</p>
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
<ul>
|
|
102
|
+
<li><a href="http://www.borgelt.net/papers/cstat_02.pdf">http://www.borgelt.net/papers/cstat_02.pdf</a></li>
|
|
103
|
+
<li><a href="http://www.borgelt.net/papers/fimi_03.pdf">http://www.borgelt.net/papers/fimi_03.pdf</a></li>
|
|
104
|
+
<li><a href="http://www.borgelt.net/apriori.html">http://www.borgelt.net/apriori.html</a></li>
|
|
105
|
+
<li><a href="http://en.wikipedia.org/wiki/Apriori_algorithm">http://en.wikipedia.org/wiki/Apriori_algorithm</a></li>
|
|
106
|
+
</ul>
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
<h2>Demonstration of usage</h2>
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
<p><pre class='syntax'>
|
|
113
|
+
<span class="ident">require</span> <span class="punct">'</span><span class="string">apriori</span><span class="punct">'</span>
|
|
114
|
+
|
|
115
|
+
<span class="ident">transactions</span> <span class="punct">=</span> <span class="punct">[</span> <span class="punct">%w{</span><span class="string">beer doritos</span><span class="punct">},</span>
|
|
116
|
+
<span class="punct">%w{</span><span class="string">apple cheese</span><span class="punct">},</span>
|
|
117
|
+
<span class="punct">%w{</span><span class="string">beer doritos</span><span class="punct">},</span>
|
|
118
|
+
<span class="punct">%w{</span><span class="string">apple cheese</span><span class="punct">},</span>
|
|
119
|
+
<span class="punct">%w{</span><span class="string">apple cheese</span><span class="punct">},</span>
|
|
120
|
+
<span class="punct">%w{</span><span class="string">apple doritos</span><span class="punct">}</span> <span class="punct">]</span>
|
|
121
|
+
|
|
122
|
+
<span class="ident">rules</span> <span class="punct">=</span> <span class="constant">Apriori</span><span class="punct">.</span><span class="ident">find_association_rules</span><span class="punct">(</span><span class="ident">transactions</span><span class="punct">,</span>
|
|
123
|
+
<span class="symbol">:min_items</span> <span class="punct">=></span> <span class="number">2</span><span class="punct">,</span>
|
|
124
|
+
<span class="symbol">:max_items</span> <span class="punct">=></span> <span class="number">5</span><span class="punct">,</span>
|
|
125
|
+
<span class="symbol">:min_support</span> <span class="punct">=></span> <span class="number">1</span><span class="punct">,</span>
|
|
126
|
+
<span class="symbol">:max_support</span> <span class="punct">=></span> <span class="number">100</span><span class="punct">,</span>
|
|
127
|
+
<span class="symbol">:min_confidence</span> <span class="punct">=></span> <span class="number">20</span><span class="punct">)</span>
|
|
128
|
+
|
|
129
|
+
<span class="ident">puts</span> <span class="ident">rules</span><span class="punct">.</span><span class="ident">join</span><span class="punct">("</span><span class="string"><span class="escape">\n</span></span><span class="punct">")</span>
|
|
130
|
+
|
|
131
|
+
<span class="comment"># Results: </span>
|
|
132
|
+
<span class="comment"># doritos <- beer (33.3/2, 100.0)</span>
|
|
133
|
+
<span class="comment"># beer <- doritos (50.0/3, 66.7)</span>
|
|
134
|
+
<span class="comment"># apple <- doritos (50.0/3, 33.3)</span>
|
|
135
|
+
<span class="comment"># doritos <- apple (66.7/4, 25.0)</span>
|
|
136
|
+
<span class="comment"># apple <- cheese (50.0/3, 100.0)</span>
|
|
137
|
+
<span class="comment"># cheese <- apple (66.7/4, 75.0)</span>
|
|
138
|
+
|
|
139
|
+
<span class="comment"># NOTE:</span>
|
|
140
|
+
<span class="comment"># doritos <- beer (33.3/2, 100.0)</span>
|
|
141
|
+
<span class="comment"># means: </span>
|
|
142
|
+
<span class="comment"># * beer appears in 33.3% (2 total) of the transactions (the support)</span>
|
|
143
|
+
<span class="comment"># * beer implies doritos 100% of the time (the confidence)</span>
|
|
144
|
+
</pre></p>
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
<p>See the <ins>examples</ins> directory for more examples of usage.</p>
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
<h2>Forum</h2>
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
<p><a href="http://groups.google.com/group/apriori">http://groups.google.com/group/apriori</a></p>
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
<p><span class="caps">TODO</span> – create Google Group – apriori</p>
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
<h2>How to submit patches</h2>
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
<ul>
|
|
163
|
+
<li>github: <a href="http://github.com/jashmenn/apriori/tree/master">http://github.com/jashmenn/apriori/tree/master</a></li>
|
|
164
|
+
</ul>
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
<pre>git clone git://github.com/jashmenn/apriori.git</pre>
|
|
168
|
+
|
|
169
|
+
<h3>Build and test instructions</h3>
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
<pre>cd apriori
|
|
173
|
+
rake test
|
|
174
|
+
rake install_gem</pre>
|
|
175
|
+
|
|
176
|
+
<p>h3.</p>
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
<p>If you get the error:</p>
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
<pre><code>undefined method `add_development_dependency' for #<Gem::Specification:0x2aabcba63ed8></code></pre>
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
<p>This is because hoe 1.7.0 requires rubygems 1.2.0. To upgrade, simply do the following:</p>
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
<pre><code>gem update --system</code></pre>
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
<h2>License</h2>
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
<h3>Apriori C code</h3>
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
<p>Copyright© Christian Borgelt</p>
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
<p>Modified and under the <span class="caps">LGPL</span> license.
|
|
201
|
+
See <pre>ext/apriori/doc/copying</pre> for details.</p>
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
<h3>Ruby Apriori Extension</h3>
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
<p>(The <span class="caps">MIT</span> License)</p>
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
<p>Copyright© 2008 Nate Murray</p>
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
<p>Permission is hereby granted, free of charge, to any person obtaining
|
|
214
|
+
a copy of this software and associated documentation files (the
|
|
215
|
+
‘Software’), to deal in the Software without restriction, including
|
|
216
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
217
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
218
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
219
|
+
the following conditions:</p>
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
<p>The above copyright notice and this permission notice shall be
|
|
223
|
+
included in all copies or substantial portions of the Software.</p>
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
<p><span class="caps">THE SOFTWARE IS PROVIDED</span> ‘AS IS’, <span class="caps">WITHOUT WARRANTY OF ANY KIND</span>,
|
|
227
|
+
<span class="caps">EXPRESS OR IMPLIED</span>, INCLUDING <span class="caps">BUT NOT LIMITED TO THE WARRANTIES OF</span>
|
|
228
|
+
<span class="caps">MERCHANTABILITY</span>, FITNESS <span class="caps">FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT</span>.
|
|
229
|
+
<span class="caps">IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY</span>
|
|
230
|
+
<span class="caps">CLAIM</span>, DAMAGES <span class="caps">OR OTHER LIABILITY</span>, WHETHER <span class="caps">IN AN ACTION OF CONTRACT</span>,
|
|
231
|
+
<span class="caps">TORT OR OTHERWISE</span>, ARISING <span class="caps">FROM</span>, OUT <span class="caps">OF OR IN CONNECTION WITH THE</span>
|
|
232
|
+
<span class="caps">SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE</span>.</p>
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
<h2>Contact</h2>
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
<p>Comments are welcome. Send an email to <a href="mailto:nate@natemurray.com">Nate Murray</a> via the <a href="http://groups.google.com/group/apriori">forum</a></p>
|
|
239
|
+
<p class="coda">
|
|
240
|
+
<a href="nate@natemurray.com">Nate Murray</a>, 8th September 2008<br>
|
|
241
|
+
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
|
242
|
+
</p>
|
|
243
|
+
</div>
|
|
244
|
+
|
|
245
|
+
<!-- insert site tracking codes here, like Google Urchin -->
|
|
246
|
+
|
|
247
|
+
</body>
|
|
248
|
+
</html>
|