apriori 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,16 @@
1
+ == 0.2.1 2008-09-12
2
+ * updated requirements to check for the proper rubygems version
3
+
4
+ == 0.2.0 2008-09-12
5
+ * changed the formatting of the association rules to_s to be more intuitive to those not familiar with formal prob.
6
+
7
+ == 0.1.2 2008-09-05
8
+ * fixed some bugs for case-sensitive machines
9
+
10
+ == 0.1.1 2008-09-04
11
+ * created first rubygem
12
+
13
+ == 0.0.1 2008-08-30
14
+
15
+ * 1 major enhancement:
16
+ * Initial release
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Nate Murray
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,121 @@
1
+ History.txt
2
+ License.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ TODO.txt
7
+ attic/c_ext_test1/MyTest/MyTest.c
8
+ attic/c_ext_test1/MyTest/extconf.rb
9
+ attic/c_ext_test1/mytest.rb
10
+ attic/test.c
11
+ config/hoe.rb
12
+ config/requirements.rb
13
+ examples/01_simple_example.rb
14
+ examples/02_small_file_example.rb
15
+ examples/03_large_file_example.rb
16
+ examples/test_data/market_basket_basic_test.dat
17
+ ext/Apriori.c
18
+ ext/Makefile
19
+ ext/apriori/doc/apriori.html
20
+ ext/apriori/doc/arem.gp
21
+ ext/apriori/doc/c_rev.gp
22
+ ext/apriori/doc/chi2.tex
23
+ ext/apriori/doc/copying
24
+ ext/apriori/doc/line.gif
25
+ ext/apriori/doc/uparrow.gif
26
+ ext/apriori/ex/flg2set
27
+ ext/apriori/ex/hdr2set
28
+ ext/apriori/ex/readme
29
+ ext/apriori/ex/row2set
30
+ ext/apriori/ex/rulesort
31
+ ext/apriori/ex/tab2set
32
+ ext/apriori/ex/test.app
33
+ ext/apriori/ex/test.rul
34
+ ext/apriori/ex/test1.rul
35
+ ext/apriori/ex/test1.tab
36
+ ext/apriori/ex/test2.tab
37
+ ext/apriori/ex/test3.tab
38
+ ext/apriori/ex/test4.tab
39
+ ext/apriori/ex/test5.tab
40
+ ext/apriori/ex/tid2set
41
+ ext/apriori/ex/xhdr2set
42
+ ext/apriori/src/apriori.c
43
+ ext/apriori/src/apriori.dsp
44
+ ext/apriori/src/apriori.dsw
45
+ ext/apriori/src/apriori.mak
46
+ ext/apriori/src/istree.c
47
+ ext/apriori/src/istree.h
48
+ ext/apriori/src/makefile
49
+ ext/apriori/src/tract.c
50
+ ext/apriori/src/tract.h
51
+ ext/apriori_wrapper.c
52
+ ext/apriori_wrapper.h
53
+ ext/extconf.rb
54
+ ext/math/doc/copying
55
+ ext/math/src/chi2.c
56
+ ext/math/src/chi2.h
57
+ ext/math/src/choose.c
58
+ ext/math/src/choose.h
59
+ ext/math/src/gamma.c
60
+ ext/math/src/gamma.h
61
+ ext/math/src/intexp.c
62
+ ext/math/src/intexp.h
63
+ ext/math/src/makefile
64
+ ext/math/src/math.mak
65
+ ext/math/src/normal.c
66
+ ext/math/src/normal.h
67
+ ext/math/src/radfn.c
68
+ ext/math/src/radfn.h
69
+ ext/math/src/zeta.c
70
+ ext/math/src/zeta.h
71
+ ext/pre-clean.rb
72
+ ext/pre-setup.rb
73
+ ext/util/doc/copying
74
+ ext/util/src/listops.c
75
+ ext/util/src/listops.h
76
+ ext/util/src/makefile
77
+ ext/util/src/memsys.c
78
+ ext/util/src/memsys.h
79
+ ext/util/src/nstats.c
80
+ ext/util/src/nstats.h
81
+ ext/util/src/params.c
82
+ ext/util/src/params.h
83
+ ext/util/src/parse.c
84
+ ext/util/src/parse.h
85
+ ext/util/src/scan.c
86
+ ext/util/src/scan.h
87
+ ext/util/src/symtab.c
88
+ ext/util/src/symtab.h
89
+ ext/util/src/tabscan.c
90
+ ext/util/src/tabscan.h
91
+ ext/util/src/util.mak
92
+ ext/util/src/vecops.c
93
+ ext/util/src/vecops.h
94
+ lib/apriori.rb
95
+ lib/apriori/adapter.rb
96
+ lib/apriori/association_rule.rb
97
+ lib/apriori/version.rb
98
+ script/console
99
+ script/destroy
100
+ script/generate
101
+ script/txt2html
102
+ setup.rb
103
+ tasks/apriori.rake
104
+ tasks/attic.rake
105
+ tasks/deployment.rake
106
+ tasks/environment.rake
107
+ tasks/website.rake
108
+ tasks/install.rake
109
+ test/apriori_test.rb
110
+ test/fixtures/market_basket_results_test.txt
111
+ test/fixtures/market_basket_string_test.txt
112
+ test/fixtures/results.txt
113
+ test/fixtures/sample.txt
114
+ test/test_helper.rb
115
+ test/unit/test_apriori.rb
116
+ test/unit/test_itemsets_and_parsing.rb
117
+ website/index.html
118
+ website/index.txt
119
+ website/javascripts/rounded_corners_lite.inc.js
120
+ website/stylesheets/screen.css
121
+ website/template.html.erb
@@ -0,0 +1,149 @@
1
+ = apriori
2
+
3
+ * This project can be found at: http://github.com/jashmenn/apriori/tree/master
4
+ * Christian Borgelt's original C code can be found at: http://www.borgelt.net/apriori.html
5
+
6
+ == DESCRIPTION:
7
+
8
+ Ruby Apriori is a library to efficiently find item association rules within
9
+ large sets of transactions. This library provides a Ruby interface to Christian
10
+ Borgelt's C implementation of this algorithm.
11
+
12
+ From Christian Borgelt's Apriori:http://www.borgelt.net/apriori.html documentation:
13
+
14
+ Association rule induction is a powerful method for so-called market basket
15
+ analysis, which aims at finding regularities in the shopping behavior of
16
+ customers.
17
+
18
+ With the induction of association rules one tries to find sets of
19
+ products that are frequently bought together, so that from the presence of
20
+ certain products in a shopping cart one can infer (with a high probability)
21
+ that certain other products are present.
22
+
23
+ An association rule is a rule like "If a customer buys wine and bread, he often
24
+ buys cheese, too."
25
+
26
+ An association rule states that if we pick a customer at random and find out
27
+ that he selected certain items (bought certain products, chose certain options
28
+ etc.), we can be confident, quantified by a percentage, that he also selected
29
+ certain other items (bought certain other products, chose certain other options
30
+ etc.).
31
+
32
+ This Ruby library provides a convenient way to use this algorithm from Ruby.
33
+
34
+ Original Apriori C code by Christian Borgelt.
35
+
36
+ == THE ALGORITHM:
37
+
38
+ This document is not an introduction to the Apriori algorithm. To find out more about Apriori see:
39
+
40
+ * http://www.borgelt.net/papers/cstat_02.pdf
41
+ * http://www.borgelt.net/papers/fimi_03.pdf
42
+ * http://www.borgelt.net/apriori.html
43
+ * http://en.wikipedia.org/wiki/Apriori_algorithm
44
+
45
+ == FEATURES:
46
+
47
+ * Supports easy use from Ruby data types
48
+ * Supports large data files
49
+
50
+ == EXAMPLE USAGE:
51
+
52
+ require 'apriori'
53
+
54
+ transactions = [ %w{beer doritos},
55
+ %w{apple cheese},
56
+ %w{beer doritos},
57
+ %w{apple cheese},
58
+ %w{apple cheese},
59
+ %w{apple doritos} ]
60
+
61
+ rules = Apriori.find_association_rules(transactions,
62
+ :min_items => 2,
63
+ :max_items => 5,
64
+ :min_support => 1,
65
+ :max_support => 100,
66
+ :min_confidence => 20)
67
+
68
+ puts rules.join("\n")
69
+
70
+ # Results:
71
+ # doritos <- beer (33.3/2, 100.0)
72
+ # beer <- doritos (50.0/3, 66.7)
73
+ # apple <- doritos (50.0/3, 33.3)
74
+ # doritos <- apple (66.7/4, 25.0)
75
+ # apple <- cheese (50.0/3, 100.0)
76
+ # cheese <- apple (66.7/4, 75.0)
77
+
78
+ # NOTE:
79
+ # doritos <- beer (33.3/2, 100.0)
80
+ # means:
81
+ # * beer appears in 33.3% (2 total) of the transactions (the support)
82
+ # * beer implies doritos 100% of the time (the confidence)
83
+
84
+ See the +examples+ directory for more examples of usage.
85
+
86
+ == EXAMPLE DATA:
87
+
88
+ Example data can be found at:
89
+
90
+ http://fimi.cs.helsinki.fi/data/
91
+
92
+ == REQUIREMENTS:
93
+
94
+ This library is compiled using a slightly modified version of Christian
95
+ Borgelt's original C implementation of Apriori. The original code can be found
96
+ at: http://www.borgelt.net/apriori.html
97
+
98
+ * +gcc+ or similar compiler
99
+ * <tt>ruby.h</tt>
100
+
101
+ == INSTALL:
102
+
103
+ * gem install --source http://gems.github.com jashmenn-apriori
104
+
105
+ == FAQ:
106
+
107
+ If you get the error:
108
+
109
+ undefined method `add_development_dependency' for #<Gem::Specification:0x2aabcba63ed8>
110
+
111
+ This is because hoe 1.7.0 requires rubygems 1.2.0. To upgrade, simply do the following:
112
+
113
+ gem update --system
114
+
115
+ == LICENSE:
116
+
117
+ === Apriori C code
118
+
119
+ Copyright (c) Christian Borgelt
120
+
121
+ Modified and under the LGPL license.
122
+ See ext/apriori/doc/copying for details.
123
+
124
+ === Ruby Apriori Extension
125
+
126
+ (The MIT License)
127
+
128
+ Copyright (c) 2008 Nate Murray
129
+
130
+ Permission is hereby granted, free of charge, to any person obtaining
131
+ a copy of this software and associated documentation files (the
132
+ 'Software'), to deal in the Software without restriction, including
133
+ without limitation the rights to use, copy, modify, merge, publish,
134
+ distribute, sublicense, and/or sell copies of the Software, and to
135
+ permit persons to whom the Software is furnished to do so, subject to
136
+ the following conditions:
137
+
138
+ The above copyright notice and this permission notice shall be
139
+ included in all copies or substantial portions of the Software.
140
+
141
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
142
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
143
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
144
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
145
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
146
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
147
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
148
+
149
+ =========================================================================
@@ -0,0 +1,15 @@
1
+ require 'config/requirements'
2
+ require 'config/hoe' # setup Hoe + all gem configuration
3
+
4
+ class Rake::Task
5
+ def abandon
6
+ prerequisites.clear
7
+ @actions.clear
8
+ end
9
+ end
10
+
11
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
12
+
13
+ Rake::Task[:default].abandon
14
+ task :default => :extension
15
+
@@ -0,0 +1,60 @@
1
+ == 0.1.0 2008-09-04
2
+ * make a ruby gem
3
+ * deal with association rules that only have 1 item
4
+
5
+ * newgem thoughts:
6
+ * Ability for README.txt and website to be the same thing
7
+ * syntax highlighting on website
8
+
9
+ == 0.0.1 2008-09-02
10
+
11
+ + Clean up documentation
12
+ * website
13
+ + README.txt
14
+ + get documentation to build
15
+ + create example files
16
+ + apriori.rb
17
+
18
+ + Clean up licensing everywhere
19
+ + get the typical install to work
20
+ + clean up the rakefile
21
+ + refactor find_itemsets to be find_association_rules
22
+
23
+ = What options to support later:
24
+
25
+ -t# target type (default: association rules)
26
+ (s: item sets, c: closed item sets, m: maximal item sets,
27
+ r: association rules, h: association hyperedges)
28
+ -k# item separator for output (default: " ")
29
+ -p# output format for support/confidence (default: "%.1f")
30
+ -y print lift value (confidence divided by prior)
31
+ -g write output in scanable form (quote certain characters)
32
+
33
+ -q# sort items w.r.t. their frequency (default: 2)
34
+ (1: ascending, -1: descending, 0: do not sort,
35
+ 2: ascending, -2: descending w.r.t. transaction size sum)
36
+ -b/f/r# blank characters, field and record separators
37
+ (default: " \t\r", " \t", "\n")
38
+ appfile file stating item appearances (optional)
39
+
40
+
41
+ = What options to support even later:
42
+ -e# additional evaluation measure (default: none)
43
+ -! print a list of additional evaluation measures
44
+ -d# minimal value of additional evaluation measure (default: 10%)
45
+ -v print value of additional rule evaluation measure
46
+ -l do not load transactions into memory (work on input file)
47
+ -u# filter unused items from transactions (default: 0.1)
48
+ (0: do not filter items w.r.t. usage in sets,
49
+ <0: fraction of removed items for filtering,
50
+ >0: take execution times ratio into account)
51
+ -h do not organize transactions as a prefix tree
52
+ -j use quicksort to sort the transactions (default: heapsort)
53
+ -z minimize memory usage (default: maximize speed)
54
+ -C# comment characters (default: "#")
55
+
56
+
57
+ Don't even know what they do:
58
+ -o use original definition of the support of a rule (body & head)
59
+ -x extended support output (print both rule support types)
60
+
@@ -0,0 +1,23 @@
1
+ // Include the Ruby headers and goodies
2
+ #include "ruby.h"
3
+
4
+ // Defining a space for information and references about the module to be stored internally
5
+ VALUE MyTest = Qnil;
6
+
7
+ // Prototype for the initialization method - Ruby calls this, not you
8
+ void Init_mytest();
9
+
10
+ // Prototype for our method 'test1' - methods are prefixed by 'method_' here
11
+ VALUE method_test1(VALUE self);
12
+
13
+ // The initialization method for this module
14
+ void Init_mytest() {
15
+ MyTest = rb_define_module("MyTest");
16
+ rb_define_method(MyTest, "test1", method_test1, 0);
17
+ }
18
+
19
+ // Our 'test1' method.. it simply returns a value of '10' for now.
20
+ VALUE method_test1(VALUE self) {
21
+ int x = 10;
22
+ return INT2NUM(x);
23
+ }
@@ -0,0 +1,11 @@
1
+ # Loads mkmf which is used to make makefiles for Ruby extensions
2
+ require 'mkmf'
3
+
4
+ # Give it a name
5
+ extension_name = 'mytest'
6
+
7
+ # The destination
8
+ dir_config(extension_name)
9
+
10
+ # Do the work
11
+ create_makefile(extension_name)
@@ -0,0 +1,10 @@
1
+ # Load in the extension (on OS X this loads ./MyTest/mytest.bundle - unsure about Linux, possibly mytest.so)
2
+ require 'MyTest/mytest'
3
+
4
+ # MyTest is now a module, so we need to include it
5
+ include MyTest
6
+
7
+ # Call and print the result from the test1 method
8
+ puts test1
9
+
10
+ # => 10
@@ -0,0 +1,12 @@
1
+ #include "apriori_wrapper.h"
2
+
3
+ /*
4
+ #include "scan.h"
5
+ #include "tract.h"
6
+ #include "istree.h"
7
+ */
8
+
9
+ int main (int argc, char *argv[])
10
+ {
11
+ return 0;
12
+ }