apriori 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,1301 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
2
+ <!-- ===================================================================
3
+ File : apriori.html
4
+ Contents: Description of apriori program
5
+ Author : Christian Borgelt
6
+ ==================================================================== -->
7
+ <html>
8
+ <head>
9
+ <title>Apriori Documentation</title>
10
+ </head>
11
+
12
+ <!-- =============================================================== -->
13
+
14
+ <body bgcolor=white>
15
+ <h1><a name="top">Apriori</a></h1>
16
+ <h3>Finding Association Rules/Hyperedges with the Apriori Algorithm</h3>
17
+
18
+ <!-- =============================================================== -->
19
+ <p><img src="line.gif" alt="" height=7 width=704></p>
20
+
21
+ <h3>Contents</h3>
22
+ <ul type=disc>
23
+ <li><a href="#intro">Introduction</a></li>
24
+ <li><a href="#terms">Support and Confidence</a>
25
+ <ul type=circle>
26
+ <li><a href="#suppset">Support of an Item Set</a></li>
27
+ <li><a href="#confrule">Confidence of an Association Rule</a></li>
28
+ <li><a href="#supprule">Support of an Association Rule</a></li>
29
+ </ul></li>
30
+ <li><a href="#target">Target Types</a>
31
+ <ul type=circle>
32
+ <li><a href="#assrules">Association Rules</a></li>
33
+ <li><a href="#itemsets">Frequent Item Sets</a></li>
34
+ <li><a href="#closed">Closed Item Sets</a></li>
35
+ <li><a href="#maximal">Maximal Item Sets</a></li>
36
+ <li><a href="#hyperedges">Association Hyperedges</a></li>
37
+ </ul></li>
38
+ <li><a href="#select">Extended Rule Selection</a>
39
+ <ul type=circle>
40
+ <li><a href="#diff">
41
+ Absolute Confidence Difference to Prior</a></li>
42
+ <li><a href="#quotient">
43
+ Difference of Confidence Quotient to 1</a></li>
44
+ <li><a href="#improve">
45
+ Absolute Difference of Improvement Value to 1</a></li>
46
+ <li><a href="#info">
47
+ Information Difference to Prior</a></li>
48
+ <li><a href="#chi2">
49
+ Normalized chi<sup>2</sup> Measure</a></li>
50
+ <li><a href="#behavior">
51
+ Selection Behavior of the Measures</a></li>
52
+ <li><a href="#appear">Item Appearances</a></li>
53
+ </ul></li>
54
+ <li><a href="#select">Extended Item Set Selection</a>
55
+ <ul type=circle>
56
+ <li><a href="#logquot">
57
+ Binary Logarithm of Support Quotient</a></li>
58
+ <li><a href="#suppquot">
59
+ Difference of Support Quotient to 1</a></li>
60
+ </ul></li>
61
+ <li><a href="#tatree">Transaction Prefix Tree</a></li>
62
+ <li><a href="#options">Program Invocation and Options</a></li>
63
+ <li><a href="#input">Input Format</a>
64
+ <ul type=circle>
65
+ <li><a href="#transin">Format of the Transactions File</a></li>
66
+ <li><a href="#appearin">Format of the Item Appearances File</a></li>
67
+ </ul></li>
68
+ <li><a href="#output">Output Format</a>
69
+ <ul type=circle>
70
+ <li><a href="#ruleout">Output Format for Association Rules</a></li>
71
+ <li><a href="#setout">Output Format for Frequent Item Sets</a></li>
72
+ <li><a href="#edgeout">Output Format for Association Hyperedges</a>
73
+ </li>
74
+ </ul></li>
75
+ <li><a href="#compopt">Compilation Options</a></li>
76
+ <li><a href="#copying">Copying</a></li>
77
+ <li><a href="#download">Download</a></li>
78
+ <li><a href="#contact">Contact</a></li>
79
+ </ul>
80
+
81
+ <!-- =============================================================== -->
82
+ <p><img src="line.gif" alt="" height=7 width=704></p>
83
+
84
+ <h3><a name="intro">Introduction</a></h3>
85
+
86
+ <p>Association rule induction [Agrawal et al. 1993] is a powerful method
87
+ for so-called <i>market basket analysis</i>, which aims at finding
88
+ regularities in the shopping behavior of customers of supermarkets,
89
+ mail-order companies and the like. With the induction of association
90
+ rules one tries to find sets of products that are frequently bought
91
+ together, so that from the presence of certain products in a shopping
92
+ cart one can infer (with a high probability) that certain other products
93
+ are present. Such information, expressed in the form of rules, can
94
+ often be used to increase the number of items sold, for instance, by
95
+ appropriately arranging the products in the shelves of a supermarket
96
+ (they may, for example, be placed adjacent to each other in order to
97
+ invite even more customers to buy them together) or by directly
98
+ suggesting items to a customer, which may be of interest for him/her.
99
+ </p>
100
+
101
+ <p>An <i>association rule</i> is a rule like "If a customer buys wine
102
+ and bread, he often buys cheese, too." It expresses an association
103
+ between (sets of) <i>items</i>, which may be products of a supermarket
104
+ or a mail-order company, special equipment options of a car, optional
105
+ services offered by telecommunication companies etc. An association
106
+ rule states that if we pick a customer at random and find out that
107
+ he selected certain items (bought certain products, chose certain
108
+ options etc.), we can be confident, quantified by a percentage, that
109
+ he also selected certain other items (bought certain other products,
110
+ chose certain other options etc.).</p>
111
+
112
+ <p>Of course, we do not want just any association rules, we want
113
+ "good" rules, rules that are "expressive" and "reliable". The standard
114
+ measures to assess association rules are the <i>support</i> and the
115
+ <i>confidence</i> of a rule, both of which are computed from the
116
+ <i>support</i> of certain item sets. These notions are discussed
117
+ <a href="#terms">here</a> in more detail. However, these standard
118
+ criteria are often not sufficient to restrict the set of rules to
119
+ the interesting ones. Therefore some additional rule evaluation
120
+ measures are considered <a href="#select">here</a>.</p>
121
+
122
+ <p>The main problem of association rule induction is that there are
123
+ so many possible rules. For example, for the product range of a
124
+ supermarket, which may consist of several thousand different products,
125
+ there are billions of possible association rules. It is obvious that
126
+ such a vast amount of rules cannot be processed by inspecting each
127
+ one in turn. Therefore efficient algorithms are needed that restrict
128
+ the search space and check only a subset of all rules, but, if possible,
129
+ without missing important rules. One such algorithm is the apriori
130
+ algorithm, which was developed by [Agrawal et al. 1994] and which
131
+ is implemented in a specific way in my apriori program. A brief
132
+ description of some implementation aspects can be found in these
133
+ papers:</p>
134
+ <ul type=disc>
135
+ <li><b>Induction of Association Rules: Apriori Implementation</b><br>
136
+ Christian Borgelt and Rudolf Kruse<br>
137
+ <i>15th Conference on Computational Statistics</i>
138
+ (Compstat 2002, Berlin, Germany)<br>
139
+ Physica Verlag, Heidelberg, Germany 2002<br>
140
+ (6 pages)
141
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/cstat_02.pdf">
142
+ cstat_02.pdf</a> (105 kb)
143
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/cstat_02.ps.gz">
144
+ cstat_02.ps.gz</a> (91 kb)</li>
145
+ <li><b>Efficient Implementations of Apriori and Eclat</b><br>
146
+ Christian Borgelt.<br>
147
+ <i>Workshop of Frequent Item Set Mining Implementations</i>
148
+ (FIMI 2003, Melbourne, FL, USA).<br>
149
+ (9 pages)
150
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/fimi_03.pdf">
151
+ fimi_03.pdf</a> (304 kb)
152
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/fimi_03.ps.gz">
153
+ fimi_03.ps.gz</a> (197 kb)</li>
154
+ </ul>
155
+
156
+ <p>By the way: Earlier versions of my apriori program
157
+ are incorporated in the well-known data mining tool
158
+ <a href="http://www.spss.com/Clementine/">Clementine</a>
159
+ (apriori version 1.8 in Clementine version 5.0,
160
+ apriori version 2.7 in Clementine version 7.0), available from
161
+ <a href="http://www.spss.com">SPSS</a>. Newer versions of Clementine
162
+ still use my program, but I am not completely sure about the version
163
+ number of the underlying apriori program.</p>
164
+
165
+ <p>Enjoy,<br>
166
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/">
167
+ Christian Borgelt</a></p>
168
+
169
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
170
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
171
+ <td width=5></td>
172
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
173
+ </table>
174
+
175
+ <!-- =============================================================== -->
176
+ <p><img src="line.gif" alt="" height=7 width=704></p>
177
+
178
+ <h3><a name="terms">Support and Confidence</a></h3>
179
+
180
+ <h4><a name="suppset">Support of an Item Set</a></h4>
181
+
182
+ <p>Let T be the set of all transactions under consideration, e.g.,
183
+ let T be the set of all "baskets" or "carts" of products bought by the
184
+ customers of a supermarket - on a given day if you like. The support
185
+ of an item set S is the percentage of those transactions in T which
186
+ contain S. In the supermarket example this is the number of "baskets"
187
+ that contain a given set S of products, for example S = { bread, wine,
188
+ cheese }. If U is the set of all transactions that contain all items
189
+ in S, then</p>
190
+ <p>support(S) = (|U| / |T|) *100%,</p>
191
+ <p>where |U| and |T| are the number of elements in U and T,
192
+ respectively. For example, if a customer buys the set
193
+ X = { milk, bread, apples, wine, sausages, cheese, onions, potatoes }
194
+ then S is obviously a subset of X, hence S is in U. If there are 318
195
+ customers and 242 of them buy such a set U or a similar one that
196
+ contains S, then support(S) = 76.1%.</p>
197
+
198
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
199
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
200
+ <td width=5></td>
201
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
202
+ </table>
203
+
204
+ <!-- =============================================================== -->
205
+
206
+ <h4><a name="confrule">Confidence of an Association Rule</a></h4>
207
+
208
+ <p>This is the measure used by [Agrawal et al. 1993], the inventors of
209
+ the apriori algorithm, to evaluate association rules. The confidence
210
+ of a rule R = "A and B -&gt; C" is the support of the set of all items
211
+ that appear in the rule divided by the support of the antecedent of
212
+ the rule, i.e.</p>
213
+ <p>confidence(R) = (support({A, B, C}) / support({A, B})) *100%.</p>
214
+ <p>More intuitively, the confidence of a rule is the number of cases in
215
+ which the rule is correct relative to the number of cases in which it
216
+ is applicable. For example, let R = "wine and bread -&gt; cheese". If a
217
+ customer buys wine and bread, then the rule is applicable and it says
218
+ that he/she can be expected to buy cheese. If he/she does not buy wine
219
+ or does not buy bread or buys neither, than the rule is not applicable
220
+ and thus (obviously) does not say anything about this customer.</p>
221
+
222
+ <p>If the rule is applicable, it says that the customer can be expected
223
+ to buy cheese. But he/she may or may not buy cheese, that is, the rule
224
+ may or may not be correct. Of course, we are interested in how good the
225
+ rule is, i.e., how often its prediction that the customer buys cheese
226
+ is correct. The rule confidence measures this: It states the percentage
227
+ of cases in which the rule is correct. It computes the percentage
228
+ relative to the number of cases in which the antecedent holds, since
229
+ these are the cases in which the rule makes a prediction that can be
230
+ true or false. If the antecedent does not hold, then the rule does not
231
+ make a prediction, so these cases are excluded.</p>
232
+
233
+ <p>With this measure a rule is selected if its confidence exceeds or
234
+ is equal to a given lower limit. That is, we look for rules that have
235
+ a high probability of being true, i.e., we look for "good" rules, which
236
+ make correct (or very often correct) predictions. My apriori program
237
+ always uses this measure to select association rules. The default value
238
+ for the confidence limit is 80%. It can be changed with the option
239
+ <tt>-c</tt>.</p>
240
+
241
+ <p>In addition to the rule confidence my apriori program lets you
242
+ select several other rule evaluation measures, which are explained
243
+ below, but it will also use rule confidence. If you want to rely
244
+ entirely on some other measure, you can do so by setting the minimal
245
+ rule confidence to zero. (Attention: If you have a large number of
246
+ items, setting the minimal rule confidence to zero can result in
247
+ <i>very</i> high memory consumption.)</p>
248
+
249
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
250
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
251
+ <td width=5></td>
252
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
253
+ </table>
254
+
255
+ <!-- =============================================================== -->
256
+
257
+ <h4><a name="supprule">Support of an Association Rule</a></h4>
258
+
259
+ <p>The support of rules may cause some confusion, because I use this
260
+ term in a different way than [Agrawal et al. 1993] do. For them, the
261
+ support of a rule "A and B -&gt; C" is the support of the set {A, B, C}.
262
+ This is fine if rule confidence is the only rule evaluation measure,
263
+ but it causes problems if some other measure is used. For these other
264
+ measures it is often much more appropriate to call the support of the
265
+ antecedent of the rule, i.e. the support of {A, B} in the example above,
266
+ the support of the rule.</p>
267
+
268
+ <p>The difference can also be stated in the following way: For [Agrawal
269
+ et al. 1993], the support of the rule is the (relative) number of cases
270
+ in which the rule is correct (i.e., in which the presence of the item C
271
+ follows from the presence of the items A and B), whereas for me (and
272
+ thus my apriori program) the support of a rule is the (relative) number
273
+ of cases in which it is applicable (i.e., in which the antecedent of the
274
+ rule holds), although in some of these cases it may be false (because
275
+ only the items A and B are present, but the item C is missing).</p>
276
+
277
+ <p>One reason for this, as already mentioned, is that the definition
278
+ of [Agrawal et al. 1993] does not work well for evaluation measures
279
+ other than rule confidence. This is explained in more detail below.
280
+ Another reason is that I prefer the support of a rule to say something
281
+ about the "statistical" support of a rule and its confidence, i.e.,
282
+ from how many cases the confidence is computed in order to express
283
+ how well founded the assertion about the confidence is.</p>
284
+
285
+ <p>Maybe an example will make this clearer. Suppose you have a die which
286
+ you suspect to be biased. To test this hypothesis, you throw the die,
287
+ say, a thousand times. 307 times the 6 turns up. Hence you assume that
288
+ the die is actually biased, since the relative frequency is about 30%
289
+ although for an unbiased die it should be around 17%. Now, what is the
290
+ "statistical" support of this assertion, i.e., on how many experiments
291
+ does it rest? Obviously it rests on all 1000 experiments and not only
292
+ on the 307 experiments in which the 6 turned up. This is so, simply
293
+ because you had to do 1000 experiments to find out that the relative
294
+ frequency is around 30% and not only the 307 in which a 6 turned up.</p>
295
+
296
+ <p>Or suppose you are doing an opinion poll to find out about the
297
+ acceptance of a certain political party, maybe with the usual question
298
+ "If an election were held next Sunday ...?" You ask 2000 persons, of
299
+ which 857 say that they would vote for the party you are interested in.
300
+ What is the support of the assertion that this party would get around
301
+ 43% of all votes? It is the size of your sample, i.e., all 2000 persons,
302
+ and not only the 857 that answered in the positive. Again you had to ask
303
+ all 2000 people to find out about the percentage of 43%. Of course, you
304
+ could have asked fewer people, say, 100, of which, say, 43 said that
305
+ they would vote for the party, but then your assertion would be less
306
+ reliable, because it is less "supported". The number of votes for the
307
+ party could also be 40% or 50%, because of some random influences. Such
308
+ deviations are much less likely, if you asked 2000 persons, since then
309
+ the random influences can be expected to cancel out.</p>
310
+
311
+ <p>The rule support can be used to select association rules by stating
312
+ a lower bound for the support of a rule. This is equivalent to saying
313
+ that you are interested only in such rules that have a large enough
314
+ statistical basis (since my apriori program uses the term "support"
315
+ in my interpretation and not in the one used by [Agrawal et al. 1993]).
316
+ The default value for the support limit is 10%. It can be changed
317
+ with the option <tt>-s</tt>. If the number given is negative, it is
318
+ interpreted as an absolute number (number of transactions) rather than
319
+ a percentage. (Note that in this case the option <tt>-a</tt> reverses
320
+ its meaning: if it is not given only the absolute support is printed,
321
+ if it is added, the relative supoort is printed, too.) The lower bound
322
+ for the rule support is combined with the lower bound for the rule
323
+ confidence, i.e., my apriori program generates only rules the confidence
324
+ of which is greater than or equal to the confidence limit <i>and</i> the
325
+ support of which is greater than or equal to the support limit.</p>
326
+
327
+ <p>Despite the above arguments in favor of my definition of the support
328
+ of an association rule, a rule support compatibility mode is available.
329
+ With the option <tt>-o</tt> the original rule support definition can be
330
+ selected. In this case the support of an association rule is the support
331
+ of the set with the items in the antecedent and the consequent of the
332
+ rule, i.e. the support of a rule as defined in [Agrawal et al. 1993].
333
+ </p>
334
+
335
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
336
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
337
+ <td width=5></td>
338
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
339
+ </table>
340
+
341
+ <!-- =============================================================== -->
342
+ <p><img src="line.gif" alt="" height=7 width=704></p>
343
+
344
+ <h3><a name="target">Target Types</a></h3>
345
+
346
+ <p>The target type, which can be selected via the option <tt>-t</tt>,
347
+ is either association rules (option <tt>-tr</tt>, default), frequent
348
+ item sets (option <tt>-ts</tt>), closed item sets (option <tt>-tc</tt>),
349
+ maximal item sets (option <tt>-tm</tt>), or association hyperedges
350
+ (option <tt>-th</tt>).</p>
351
+
352
+ <!-- =============================================================== -->
353
+
354
+ <h4><a name="assrules">Association Rules (default, option -tr)</a></h4>
355
+
356
+ <p>By default my apriori program produces association rules with
357
+ a single item in the consequent. The restriction to single item
358
+ consequents is due to the following considerations: In the first place,
359
+ association rule mining usually produces too many rules even if one
360
+ confines oneself to rules with only one item in the consequent. So why
361
+ should one make the situation worse by allowing more than one item in
362
+ the consequent? (It merely blows up the output size.)</p>
363
+
364
+ <p>Secondly, I do not know any application in which rules with more
365
+ than one item in the consequent are of any real use. The reason, in
366
+ my opinion, is that such more complex rules add almost nothing to the
367
+ insights about the data set. To understand this, consider the simpler
368
+ rules that correspond to a rule with multiple items in the consequent,
369
+ that is, rules having the same antecedent and consequents with only
370
+ single items from the consequent of the complex rule. All of these
371
+ rules must necessarily be in the output, because neither their support
372
+ nor their confidence can be less than that of the more complex rule.
373
+ That is, if you have a rule c d &lt;- a b, you will necessarily also
374
+ have the rules c &lt;- a b and d &lt;- a b in the output. Of course,
375
+ these latter two rules together do <i>not</i> say the same as the more
376
+ complex rule. However, what do you gain from the additional information
377
+ the more complex rule gives you? How can you use it? And is this little
378
+ extra information worth having to analyze a much bigger rule set?</p>
379
+
380
+ <!-- =============================================================== -->
381
+
382
+ <h4><a name="itemsets">Frequent Item Sets (option -ts)</a></h4>
383
+
384
+ <p>Sometimes one may not want to find association rules, but only the
385
+ frequent item sets underlying them. That is, one wants to find all
386
+ item sets with a support exceeding a certain threshold. My apriori
387
+ program supports this search, too: If the option <tt>-ts</tt> is
388
+ given, only frequent item sets are determined.</p>
389
+
390
+ <!-- =============================================================== -->
391
+
392
+ <h4><a name="closed">Closed Item Sets (option -tc)</a></h4>
393
+
394
+ <p>A frequent item set is called <i>closed</i> if no superset has the
395
+ same support. If the option <tt>-tc</tt> is given, the found frequent
396
+ item sets are subsequently filtered and only the closed item sets
397
+ are kept.</p>
398
+
399
+ <!-- =============================================================== -->
400
+
401
+ <h4><a name="maximal">Maximal Item Sets (option -tm)</a></h4>
402
+
403
+ <p>A frequent item set is called <i>maximal</i> if no superset is
404
+ frequent, i.e., has a support exceeding the minimal support. If the
405
+ option <tt>-tm</tt> is given, the found frequent item sets are
406
+ subsequently filtered and only the maximal item sets are kept.</p>
407
+
408
+ <!-- =============================================================== -->
409
+
410
+ <h4><a name="hyperedges">Association Hyperedges (option -th)</a></h4>
411
+
412
+ <p>My apriori program can also find association hyperedges, i.e., sets
413
+ of items that are strongly predictive w.r.t. each other. In this mode
414
+ no rules are generated, only item sets are selected. The selection
415
+ criterion is as follows: Given an item set with enough support (option
416
+ <tt>-s</tt>), all rules are checked which can be formed using this set
417
+ with all items appearing in the rule. For example, for the item set
418
+ {a b c}, the rules c &lt;- a b, b &lt;- a c, a &lt;- b c would be
419
+ considered. The confidences of these rules are computed and averaged.
420
+ If the resulting average confidence is greater than the minimal
421
+ confidence (option <tt>-c</tt>), the item set is selected. (I am
422
+ grateful to Bastien Duclaux for requesting the possibility to generate
423
+ association hyperedges.)</p>
424
+
425
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
426
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
427
+ <td width=5></td>
428
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
429
+ </table>
430
+
431
+ <!-- =============================================================== -->
432
+ <p><img src="line.gif" alt="" height=7 width=704></p>
433
+
434
+ <h3><a name="select">Extended Rule Selection</a></h3>
435
+
436
+ <p>If rules are selected using the rule confidence, the following
437
+ problem arises: "Good" rules (rules that are often true) are not
438
+ always "interesting" rules (rules that reveal something about the
439
+ interdependence of the items). You certainly know the examples that
440
+ are usually given to illustrate this fact. For instance, it is easy
441
+ to find out in a medical database that the rule "pregnant -&gt; female"
442
+ is true with a confidence of 100%. Hence it is a perfect rule, it
443
+ never fails, but, of course, this is not very surprising. Although
444
+ the measures explained below cannot deal with this problem (which is
445
+ semantical), they may be able to improve on the results in a related
446
+ case.</p>
447
+
448
+ <p>Let us look at the supermarket example again and let us assume
449
+ that 60% of all customers buy some kind of bread. Consider the rule
450
+ "cheese -&gt; bread", which holds with a confidence of, say, 62%.
451
+ Is this an important rule? Obviously not, since the fact that the
452
+ customer buys cheese does not have a significant influence on him/her
453
+ buying bread: The percentages are almost the same. But if you had set
454
+ a confidence limit of 60%, you would get both rules "-&gt; bread"
455
+ (confidence 60%) and "cheese -&gt; bread" (confidence 62%), although
456
+ the first would suffice (the first, since it is the simpler of the
457
+ two). The idea of all measures that can be used in addition or instead
458
+ of rule confidence is to handle such situations and to suppress the
459
+ second rule.</p>
460
+
461
+ <p>In addition, consider the following case: Assume that the confidence
462
+ of the rule "cheese -&gt; bread" is not 62% but 35%. With a confidence
463
+ limit of 60% it would not be selected, but it may be very important to
464
+ know about this rule! Together with cheese bread is bought much less
465
+ frequent than it is bought at all. Is cheese some kind of substitute
466
+ for bread, so that one does not need any bread, if one has cheese? Ok,
467
+ maybe this is not a very good example. However, what can be seen is
468
+ that a rule with low confidence can be very interesting, since it may
469
+ capture an important influence. Furthermore, this is a way to express
470
+ negation (though only in the consequent of a rule), since
471
+ "cheese -&gt; bread" with confidence 35% is obviously equivalent to
472
+ "cheese -&gt; no bread" with confidence 65%. This also makes clear
473
+ why the support of the item set that contains all items in the body
474
+ <i>and</i> the head of the rule is not appropriate for this measure.
475
+ An important rule may have confidence 0 and thus a support (in the
476
+ interpretation of [Agrawal et al. 1993]) of 0. Hence it is not
477
+ reasonable to set a lower bound for this kind of support.</p>
478
+
479
+ <p>I hope that the intention underlying all this is already clear:
480
+ Potentially interesting rules differ significantly in their confidence
481
+ from the confidence of rules with the same consequent, but a simpler
482
+ antecedent. Adding an item to the antecedent is informative only if it
483
+ significantly changes the confidence of the rule. Otherwise the simpler
484
+ rule suffices.</p>
485
+
486
+ <p>Unfortunately the measures other than rule confidence do not solve
487
+ the rule selection problem in the very general form in which it was
488
+ stated above. It is not that easy to deal with all rules that have a
489
+ simpler antecedent, to keep track of which of these rules were selected
490
+ (this obviously influences the selection of more complicated rules),
491
+ to deal with the special type of Poincare paradox that can occur, etc.
492
+ Hence the measures always compare the confidence of a rule with the
493
+ confidence of the rule with empty antecedent, i.e. with the relative
494
+ frequency of the consequent.</p>
495
+
496
+ <p>I call the confidence of a rule with empty antecedent the prior
497
+ confidence, since it is the confidence that the item in the consequent
498
+ of the rule will be present in an item set prior to any information
499
+ about other items that are present. The confidence of a rule with
500
+ non-empty antecedent (and the same consequent) I call the posterior
501
+ confidence, since it is the confidence that the item in the consequent
502
+ of the rule will be present after it gets known that the items in the
503
+ antecedent of the rule are present.</p>
504
+
505
+ <p>All measures that can be used in addition to rule confidence are
506
+ computed from these two values: the prior confidence and the posterior
507
+ confidence. Only those rules are selected for which the value of the
508
+ chosen additional evaluation measure exceeds or is equal to a certain
509
+ limit. The measures are chosen with the option <tt>-e</tt>, the limit
510
+ is passed to the program via the option <tt>-d</tt>. The default value
511
+ for the limit is 10%.</p>
512
+
513
+ <p>All additional rule evaluation measures are combined with the limits
514
+ for rule confidence and rule support. I.e., my apriori program selects
515
+ only those rules the confidence of which is greater than or equal to
516
+ the confidence limit, the support of which is greater than or equal to
517
+ the support limit, <i>and</i> for which the additional evaluation value
518
+ is greater than or equal to the limit for this measure. The default is
519
+ to use no additional evaluation measure, i.e., to rely only on rule
520
+ confidence and rule support. Of course you can remove the restriction
521
+ that the rule confidence must exceed a certain limit by simply setting
522
+ this limit to zero. In this case rules are selected using only the
523
+ limits for the rule support and the additional evaluation measure.
524
+ (Attention: If you have a large number of items, setting the minimal
525
+ rule confidence to zero can result in <i>very</i> high memory
526
+ consumption.)</p>
527
+
528
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
529
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
530
+ <td width=5></td>
531
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
532
+ </table>
533
+
534
+ <!-- =============================================================== -->
535
+
536
+ <h4><a name="diff">Absolute Confidence Difference to Prior
537
+ (option <tt>-ed</tt> or <tt>-e1</tt>)</a></h4>
538
+
539
+ <p>The simplest way to compare the two confidences is to compute the
540
+ absolute value of their difference. I.e., if "-&gt; bread" has a
541
+ confidence of 60% and "cheese -&gt; bread" has a confidence of 62%,
542
+ then the value of this measure is 2%. The parameter given via the
543
+ option <tt>-d</tt> to the program states a lower bound for this
544
+ difference. It follows that this measure selects rules the confidence
545
+ of which differs more than a given threshold from the corresponding
546
+ prior confidence. For example, with the option <tt>-d20</tt> (and, of
547
+ course, the option <tt>-ed</tt> to select the measure) for the item
548
+ "bread" only rules with a confidence less than 40% or greater than 80%
549
+ would be selected. Of course, for other items, with a different prior
550
+ confidence, the upper and lower bounds are different, too.</p>
551
+
552
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
553
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
554
+ <td width=5></td>
555
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
556
+ </table>
557
+
558
+ <!-- =============================================================== -->
559
+
560
+ <h4><a name="quotient">Difference of Confidence Quotient to 1
561
+ (option <tt>-eq</tt> or <tt>-e2</tt>)</a></h4>
562
+
563
+ <p>An equally simple way to compare the two confidences is to compute
564
+ their quotient. Since either the prior or the posterior confidence
565
+ can be greater (which was handled by computing the absolute value
566
+ for the previous measure), this quotient or its reciprocal, whichever
567
+ is smaller, is then compared to one. A quotient of one says that the
568
+ rule is not interesting, since the prior and the posterior confidence
569
+ are identical. The more the quotient differs from one, the more
570
+ "interesting" the rule is. Hence, just as above, a lower bound for
571
+ this difference is given via the option <tt>-d</tt>. For the bread
572
+ example, with the option <tt>-d20</tt> rules with a confidence less
573
+ than or equal to (1 -20%) *60% = 0.8 *60% = 48% or a confidence greater
574
+ than or equal to 60% / (1 -20%) = 60% / 0.8 = 75% are selected. The
575
+ difference between this measure and the absolute confidence difference
576
+ to the prior is that the deviation that is considered to be significant
577
+ depends on the prior confidence. If it is high, then the deviation of
578
+ the posterior confidence must also be high, and if it is low, then
579
+ the deviation need only be low. For example, if "-&gt; bread" had a
580
+ confidence of only 30%, then the option <tt>-d20</tt> (just as above)
581
+ would select rules the confidence of which is less than 0.8 *30% = 24%
582
+ or greater than 30% /0.8 = 37.5%. As you can see, for a prior confidence
583
+ of 60% the deviation has to be at least 12%/15%, for a prior confidence
584
+ of 30% it has to be only 6%/7.5% in order to make a rule eligible.
585
+ The idea is that an increment of the confidence from 30% to 40% is more
586
+ important than an increment from 60% to 70%, since the relative change
587
+ is greater.</p>
588
+
589
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
590
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
591
+ <td width=5></td>
592
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
593
+ </table>
594
+
595
+ <!-- =============================================================== -->
596
+
597
+ <h4><a name="improve">Absolute Difference of Improvement Value to 1
598
+ (option <tt>-ea</tt> or <tt>-e3</tt>)</a></h4>
599
+
600
+ <p>This measure is very similar to the preceding one. Actually, if
601
+ the confidence of a rule is smaller than the prior confidence, then
602
+ it coincides with it. The improvement value is simply the posterior
603
+ confidence divided by the prior confidence. It is greater than
604
+ one if the confidence increases due to the antecedent, and it is
605
+ smaller than one if the confidence decreases due to the antecedent.
606
+ By computing the absolute value of the difference to one, the
607
+ improvement value can easily be made a rule selection measure.
608
+ The advantage of this measure over the preceding one is that it is
609
+ symmetric w.r.t. changes of the confidence due to the antecedent of
610
+ a rule. For the bread example, with the option <tt>-d20</tt> rules with
611
+ a confidence less than or equal to (1 -20%) *60% = 0.8 *60% = 48% or a
612
+ confidence greater than or equal to (1 +20%) *60% = 1.2 * 60% = 72%
613
+ are selected. (Note the difference of 72% compared to 75% for the
614
+ preceding measure.) Similarly, for the second bread example
615
+ discussed above, the numbers are 0.8 *30% = 24% and 1.2 *30% = 36%.
616
+ Note that this is the only measure for which a value greater than 100
617
+ may be specified with the <tt>-d</tt> option, since it can exceed
618
+ 100% if the posterior confidence of a rule exceeds twice the prior
619
+ confidence. (I am grateful to Roland Jonscher, who pointed out this
620
+ measure to me.)</p>
621
+
622
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
623
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
624
+ <td width=5></td>
625
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
626
+ </table>
627
+
628
+ <!-- =============================================================== -->
629
+
630
+ <h4><a name="info">Information Difference to Prior
631
+ (option <tt>-ei</tt> or <tt>-e4</tt>)</a></h4>
632
+
633
+ <p>This measure is simply the information gain criterion that can be
634
+ used in decision tree learners like C4.5 to select the split attributes.
635
+ Its idea is as follows: Without any further information about other
636
+ items in the set, we have a certain probability (or, to be exact, a
637
+ relative frequency) distribution for, say "bread" and "no bread".
638
+ Let us assume it is 60% : 40% (prior confidence of the item "bread",
639
+ just as above). This distribution has a certain entropy</p>
640
+ <p>H = - P(bread) log<sub>2</sub> P(bread)
641
+ - P(no bread) log<sub>2</sub> P(no bread),</p>
642
+ <p>where P(bread) is equivalent to the support of "bread", which in
643
+ turn is equivalent to the prior confidence of "bread". The entropy of a
644
+ probability distribution is, intuitively, a lower bound on the number
645
+ of yes-no-questions you have to ask in order to determine the actual
646
+ value. This cannot be understood very well with only two possible
647
+ values, but it can be made to work for this case too. I skip the
648
+ details here, they are not that important.</p>
649
+
650
+ <p>After we get the information that the items in the antecedent of
651
+ the rule are present (say, cheese), we have a different probability
652
+ distribution, say 35% : 65%. I.e., P(bread|cheese) = 0.35 and
653
+ P(no bread|cheese) = 0.65. If we also know the support of the item
654
+ "cheese" (let it be P(cheese) = 0.4 and P(no cheese) = 0.6), then
655
+ we can also compute the probabilities P(bread|no cheese) = 0.77 and
656
+ P(no bread|no cheese) = 0.23. Hence we have two posterior probability
657
+ distributions. The question now is: How much information do we receive
658
+ from observing the antecedent of the rule? Information is measured
659
+ as a reduction of entropy. Hence the entropies of the two conditional
660
+ probability distributions (for "cheese" and "no cheese") are computed
661
+ and summed weighted with the probability of their occurrence (i.e.,
662
+ the relative frequency of "cheese" and "no cheese", respectively).
663
+ This gives the expected value of the posterior or conditional entropy.
664
+ The difference of this value to the prior entropy (see above) is the
665
+ gain in information from the antecedent of the rule or, as I called
666
+ it, the information difference to the prior.</p>
667
+
668
+ <p>The value that can be given via the <tt>-d</tt> option is a lower
669
+ bound for the information gain, measured in hundreds of a bit. Since
670
+ all items can only be present or absent, the information gain can be
671
+ at most one bit. Therefore a percent value is still reasonable.</p>
672
+
673
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
674
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
675
+ <td width=5></td>
676
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
677
+ </table>
678
+
679
+ <!-- =============================================================== -->
680
+
681
+ <h4><a name="chi2">Normalized</a> chi<sup>2</sup> Measure
682
+ (option <tt>-ec</tt> or <tt>-e5</tt>)</h4>
683
+
684
+ <p>The chi<sup>2</sup> measure is well known from statistics. It is
685
+ often used to measure the difference between a supposed independent
686
+ distribution of two discrete variables and the actual joint distribution
687
+ in order to determine how strongly two variables depend on each other.
688
+ This measure (as it is defined in statistics) contains the number of
689
+ cases it is computed from as a factor. This is not very appropriate
690
+ if one wants to evaluate rules that can have varying support. Hence
691
+ this factor is removed by simply dividing the measure by the number
692
+ of items sets (the total number, i.e. with the names used above, the
693
+ number of sets in X). With this normalization, the chi<sup>2</sup>
694
+ measure can assume values between 0 (no dependence) and 1 (very strong
695
+ dependence). The value that can be given via the <tt>-d</tt> option is
696
+ a lower bound for the strength of the dependence of the head on the
697
+ body in percent (0 - no dependence, 100 - perfect dependence). Only
698
+ those rules are selected, in which the head depends on the body with
699
+ a higher degree of dependence.</p>
700
+
701
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
702
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
703
+ <td width=5></td>
704
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
705
+ </table>
706
+
707
+ <!-- =============================================================== -->
708
+
709
+ <h4><a name="behavior">Selection Behavior of the Measures</a></h4>
710
+
711
+ <p>In the directory <tt>apriori/doc</tt> you can find a Gnuplot script
712
+ named <tt>arem.gp</tt> (<tt>arem</tt> stands for additional rule
713
+ evaluation measures) which visualizes the behavior of the additional
714
+ rule evaluation measures. This script draws eight 3d graphs, one for
715
+ the absolute confidence difference, one for the difference of the
716
+ confidence quotient to one, three for the information difference to
717
+ the prior confidence and three for the normalized chi<sup>2</sup>
718
+ measure. All graphs show the value of an additional rule evaluation
719
+ measure over a plane defined by the prior and the posterior confidence
720
+ of a rule. The latter two measures need three graphs, since they depend
721
+ on the antecedent support of a rule as a third parameter. Setting a
722
+ minimal value for an additional rule evaluation measure is like
723
+ flooding the corresponding graph landscape up to a certain level
724
+ (given as a percentage, since all considered measures assume values
725
+ between 0 and 1). Only those rules are selected that sit on dry land.
726
+ </p>
727
+
728
+ <p>The first graph shows the behavior of the absolute confidence
729
+ difference. For the diagonal, i.e. the line where the prior and the
730
+ posterior confidence are identical, its value is zero (as expected).
731
+ The more the two confidences differ, the higher the value of this
732
+ measure gets, but in a linear way.</p>
733
+
734
+ <p>The second graph shows the behavior of the confidence quotient
735
+ to one. Again its value is zero for the diagonal (as the value of
736
+ all measures is) and becomes greater the more the prior and the
737
+ posterior confidence differ. But it is much steeper for a small
738
+ prior confidence than for a large one and it is non-linear.</p>
739
+
740
+ <p>The third to fifth graph show the information difference to the
741
+ prior confidence for an antecedent support (which is identical to the
742
+ rule support in my interpretation, see above) of 0.2 (20%), 0.3 (30%)
743
+ and 0.4 (40%). The regions at the margins, where the measure is zero,
744
+ correspond to impossible combinations of prior and posterior confidence
745
+ and antecedent support. As you can see, the valley gets narrower with
746
+ increasing antecedent support. I.e., with the same minimal value for
747
+ this measure, rules with low antecedent support need a higher confidence
748
+ difference to be selected than rules with a high antecedent support.
749
+ This nicely models the statistical significance of confidence changes.
750
+ If you only have a few cases to support your rule, even a large
751
+ deviation from the prior confidence can be explained by random
752
+ fluctuations, since only a few transactions need to be different to
753
+ produce a considerable change. However, if the antecedent support
754
+ is large, even a small deviation (in percent) has to be considered
755
+ significant (non random), since it takes a lot of changes to
756
+ transactions to produce even a small change in the percentage.
757
+ This dependence on the antecedent support of the rule and that the
758
+ valley is not pointed at the diagonal (which means that even a low
759
+ minimal value can exclude a lot of rules) is the main difference
760
+ between the information gain and the normalized chi<sup>2</sup>
761
+ measure on the one hand and the absolute confidence difference and
762
+ difference of the confidence quotient to one on the other.</p>
763
+
764
+ <p>The sixth to eighth graph show the normalized chi<sup>2</sup> measure
765
+ for an antecedent support of 0.2, 0.3, and 0.4. The valleys are very
766
+ similar to those for the information difference to the prior confidence,
767
+ they only have slightly steeper flanks, especially for low antecedent
768
+ support. So in practice there is no big difference between the
769
+ information difference and the normalized chi<sup>2</sup> measure.</p>
770
+
771
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
772
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
773
+ <td width=5></td>
774
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
775
+ </table>
776
+
777
+ <!-- =============================================================== -->
778
+
779
+ <h4><a name="appear">Item Appearances</a></h4>
780
+
781
+ <p>My apriori program provides a simple way to restrict the rules to
782
+ generate w.r.t. the items that shall appear in them. It accepts a third
783
+ optional input file, in which item appearances can be given. For each
784
+ item it can be stated whether it may appear in the body (antecedent)
785
+ of a rule, in the head (consequent), or in both. A description of the
786
+ format of this additional input file, including examples, can be found
787
+ <a href="#appearin">here</a>. If no item appearances file is given, the
788
+ rule selection is not restricted. (I am grateful to the people at
789
+ Integral Solutions Ltd., who developed the well-known data mining tool
790
+ <a href="http://www.spss.com/Clementine/">Clementine</a>, but are now
791
+ part of <a href="http://www.spss.com">SPSS</a>, for requesting the
792
+ possibility to restrict item appearances.)</p>
793
+
794
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
795
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
796
+ <td width=5></td>
797
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
798
+ </table>
799
+
800
+ <!-- =============================================================== -->
801
+ <p><img src="line.gif" alt="" height=7 width=704></p>
802
+
803
+ <h3><a name="select">Extended Item Set Selection</a></h3>
804
+
805
+ <p>Since version 4.20 there are extended selection possibilities for
806
+ frequent item sets, too. (These were added due to a coopertion with
807
+ Sonja Gruen, FU Berlin.)</p>
808
+
809
+ <!-- =============================================================== -->
810
+
811
+ <h4><a name="logquot">Binary Logarithm of Support Quotient</a></h4>
812
+
813
+ <p>An expected value for the support of an item set is computed from
814
+ the support values of the individual items, assuming independence.
815
+ Then the binary logarithm of the quotient of actual support and
816
+ expected support is computed. A minimum value for this measure can
817
+ be set with the option <tt>-d</tt>. In this case only frequent item
818
+ sets for which this measure exceeds the given threshold are kept.</p>
819
+
820
+ <!-- =============================================================== -->
821
+
822
+ <h4><a name="suppquot">Difference of Support Quotient to 1</a></h4>
823
+
824
+ <p>As with the preceding measure the quotient of actual and expected
825
+ support of an item set is computed and compared to 1 (a value of 1
826
+ signifies independence of the items). A minimum value for this measure
827
+ can be set with the option <tt>-d</tt>. In this case only frequent item
828
+ sets for which this measure exceeds the given threshold are kept.</p>
829
+
830
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
831
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
832
+ <td width=5></td>
833
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
834
+ </table>
835
+
836
+ <!-- =============================================================== -->
837
+ <p><img src="line.gif" alt="" height=7 width=704></p>
838
+
839
+ <h3><a name="tatree">Transaction Prefix Tree</a></h3>
840
+
841
+ <p>The counting process can be sped up by organizing the transactions
842
+ into a prefix tree. That is, the items in each transaction are sorted
843
+ and then transactions with the same prefix are grouped together and
844
+ are counted, as one may say, in parallel. This way of organizing the
845
+ transactions was added in version 4.03 and is the default behavior now.
846
+ If you prefer that the transactions are treated individually (i.e., the
847
+ transactions are stored in a simple list and only one transaction is
848
+ counted at a time), use the option <tt>-h</tt>.</p>
849
+
850
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
851
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
852
+ <td width=5></td>
853
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
854
+ </table>
855
+
856
+ <!-- =============================================================== -->
857
+ <p><img src="line.gif" alt="" height=7 width=704></p>
858
+
859
+ <h3><a name="options">Program Invocation and Options</a></h3>
860
+
861
+ <p>My apriori program is invoked as follows:</p>
862
+ <p><tt>apriori [options] infile outfile [appfile]</tt></p>
863
+ <p>The normal arguments are:</p>
864
+ <table border=0 cellpadding=0 cellspacing=0>
865
+ <tr><td>infile</td><td width=10></td>
866
+ <td>file to read transactions from</td></tr>
867
+ <tr><td>outfile</td><td></td>
868
+ <td>file to write association rules / hyperedges to</td></tr>
869
+ <tr><td>appfile</td><td></td>
870
+ <td>file stating item appearances (optional)</td></tr>
871
+ </table>
872
+ <p>The possible options are:</p>
873
+ <table border=0 cellpadding=0 cellspacing=0>
874
+ <tr><td><tt>-t#</tt></td><td width=10></td>
875
+ <td>target type (default: association rules)</td></tr>
876
+ <tr><td><tt></tt></td><td width=10></td>
877
+ <td>(s: itemsets, c: closed itemsets, m: maximal itemsets,<br>
878
+ <font color="white">(</font>r: association rules,
879
+ h: association hyperedges)</td></tr>
880
+ <tr><td><tt>-m#</tt></td><td></td>
881
+ <td>minimal number of items per set/rule/hyperedge
882
+ (default: 1)</td></tr>
883
+ <tr><td><tt>-n#</tt></td><td></td>
884
+ <td>maximal number of items per set/rule/hyperedge
885
+ (default: 5)</td></tr>
886
+ <tr><td><tt>-s#</tt></td><td></td>
887
+ <td>minimal support of a set/rule/hyperedge
888
+ (default: 10%)</td></tr>
889
+ <tr><td><tt>-S#</tt></td><td></td>
890
+ <td>minimal support of a set/rule/hyperedge
891
+ (default: 100%)</td></tr>
892
+ <tr><td><tt>-c#</tt></td><td></td>
893
+ <td>minimal confidence of a rule/hyperedge
894
+ (default: 80%)</td></tr>
895
+ <tr><td><tt>-o</tt></td><td></td>
896
+ <td>use original definition of the support of a rule
897
+ (body & head)</td></tr>
898
+ <tr><td><tt>-k#</tt></td><td></td>
899
+ <td>item separator for output (default: " ")</td></tr>
900
+ <tr><td><tt>-p#</tt></td><td></td>
901
+ <td>output format for support/confidence (default: "%.1f%%")</td></tr>
902
+ <tr><td><tt>-x</tt></td><td></td>
903
+ <td>extended support output (print both rule support types)
904
+ </td></tr>
905
+ <tr><td><tt>-a</tt></td><td></td>
906
+ <td>print absolute support (number of transactions)</td></tr>
907
+ <tr><td><tt>-y</tt></td><td></td>
908
+ <td>print lift value (confidence divided by prior)</td></tr>
909
+ <tr><td><tt>-e#</tt></td><td></td>
910
+ <td>additional rule evaluation measure (default: none)</td></tr>
911
+ <tr><td><tt>-!</tt></td><td></td>
912
+ <td>print a list of additional rule evaluation measures</td></tr>
913
+ <tr><td><tt>-d#</tt></td><td></td>
914
+ <td>minimal value of additional evaluation measure
915
+ (default: 10%)</td></tr>
916
+ <tr><td><tt>-v</tt></td><td></td>
917
+ <td>print value of additional rule evaluation measure</td></tr>
918
+ <tr><td><tt>-g</tt></td><td></td>
919
+ <td>write output in scanable form
920
+ (quote certain characters)</td></tr>
921
+ <tr><td><tt>-l</tt></td><td></td>
922
+ <td>do not load transactions into memory
923
+ (work on input file)</td></tr>
924
+ <tr><td><tt>-q#</tt></td><td></td>
925
+ <td>sort items w.r.t. their frequency (default: 1)</td></tr>
926
+ <tr><td><tt></tt></td><td></td>
927
+ <td>(1: ascending, -1: descending, 0: do not sort,</td></tr>
928
+ <tr><td><tt></tt></td><td></td>
929
+ <td><font color="white">(</font>2: ascending, -2: descending
930
+ w.r.t. transaction size sum)</td></tr>
931
+ <tr><td><tt>-u#</tt></td><td></td>
932
+ <td>filter unused items from transactions (default: 0.5)</td></tr>
933
+ <tr><td><tt></tt></td><td></td>
934
+ <td>(0: do not filter items w.r.t. usage in item sets,<br>
935
+ &lt;0: fraction of removed items for filtering,<br>
936
+ &gt;0: take execution times ratio into account)</td></tr>
937
+ <tr><td><tt>-h</tt></td><td></td>
938
+ <td>do not organize transactions as a prefix tree</td></tr>
939
+ <tr><td><tt>-j</tt></td><td></td>
940
+ <td>use quicksort to sort the transactions (default: heapsort)
941
+ </td></tr>
942
+ <tr><td><tt>-z</tt></td><td></td>
943
+ <td>minimize memory usage (default: maximize speed)</td></tr>
944
+ <tr><td><tt>-i#</tt></td><td></td>
945
+ <td>ignore records starting with characters in the given
946
+ string</td></tr>
947
+ <tr><td valign="top"><tt>-b/f/r#</tt></td><td></td>
948
+ <td>blank characters, field and record separators</td></tr>
949
+ <tr><td><tt></tt></td><td></td>
950
+ <td>(default: "<tt> \t\r</tt>", "<tt> \t</tt>", "<tt>\n</tt>")
951
+ </td></tr>
952
+ </table>
953
+ <p>(<tt>#</tt> always means a number, a letter, or a string that
954
+ specifies the parameter of the option.)</p>
955
+ <p>Note that the effect of the option <tt>-z</tt> can depend heavily
956
+ on how the items are sorted (option <tt>-q</tt>). Highest savings
957
+ in memory usually result if items are sorted with descending
958
+ frequency (<tt>-q-1</tt>). However, this often worsens the
959
+ processing time considerably.</p>
960
+ <p>A note on the option <tt>-j</tt>: Constructing the prefix tree for
961
+ the transactions requires sorting the transactions. Since version
962
+ 4.17 heap sort is the default sorting method for the transactions,
963
+ because it turned out that in conjunction with the item sorting
964
+ (and especially for artificial datasets like T10I4D100K) quicksort
965
+ can lead to very bad processing times (almost worst case behavior,
966
+ i.e., O(n<sup>2</sup>) run time for the sorting). However, sometimes
967
+ this is not a problem and then quicksort is slightly faster, which
968
+ can be activated with the option -j.</p>
969
+
970
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
971
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
972
+ <td width=5></td>
973
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
974
+ </table>
975
+
976
+ <!-- =============================================================== -->
977
+ <p><img src="line.gif" alt="" height=7 width=704></p>
978
+
979
+ <h3><a name="input">Input Format</a></h3>
980
+
981
+ <h4><a name="transin">Format of the Transactions File</a></h4>
982
+
983
+ <p>A text file structured by field and record separators and blanks.
984
+ Record separators, not surprisingly, separate records, usually lines,
985
+ field separators fields (or columns), usually words. Blanks are used
986
+ to fill fields (columns), e.g. to align them. In the transactions
987
+ file each record must contain one transaction, i.e. a list of item
988
+ identifiers, which are separated by field separators. An empty record
989
+ is interpreted as an empty transaction.</p>
990
+
991
+ <p>Examples can be found in the directory <tt>apriori/ex</tt> in the
992
+ source package. Refer to the file <tt>apriori/ex/readme</tt>, which
993
+ explains how to process the different example files in the directory
994
+ <tt>apriori/ex</tt> in the source package.</p>
995
+
996
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
997
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
998
+ <td width=5></td>
999
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1000
+ </table>
1001
+
1002
+ <!-- =============================================================== -->
1003
+
1004
+ <h4><a name="appearin">Format of the Item Appearances File</a></h4>
1005
+
1006
+ <p>A text file structured by field and record separators and blanks.
1007
+ (Note: For this file the same field and record separators and blanks
1008
+ are used as for the transactions file.)</p>
1009
+
1010
+ <p>The first record, which must have one field, contains the default
1011
+ appearance to be used with all items not mentioned in the appearances
1012
+ file. Other records state the appearance of specific items. The first
1013
+ field states the item, the second the appearance indicator. If no
1014
+ appearance indicator is given, the item will be ignored (i.e. may
1015
+ appear neither in the body (antecedent) nor in the head (consequent)
1016
+ of a rule). Empty records are ignored.</p>
1017
+
1018
+ <p>The following appearance indicators are recognized:</p>
1019
+ <ul type=circle>
1020
+ <li>item may appear only in rule bodies (antecedents):<br>
1021
+ <tt>i in b body a ante antecedent</tt></li>
1022
+ <li>item may appear only in rule heads (consequents):<br>
1023
+ <tt>o out h head c cons consequent</tt></li>
1024
+ <li>item may appear in rule bodies (antecedents)
1025
+ or in rule heads (consequents):<br>
1026
+ <tt>io inout bh b&amp;h ac a&amp;c both</tt></li>
1027
+ <li>item may appear neither in rule bodies (antecedents)
1028
+ nor in rule heads (consequents):<br>
1029
+ <tt>n neither none ign ignore -</tt></li>
1030
+ </ul>
1031
+
1032
+ <p><b>Example 1:</b>
1033
+ Generate only rules with item "x" in the consequent.</p>
1034
+ <p><tt>in<br>
1035
+ x out</tt></p>
1036
+
1037
+ <p><b>Example 2:</b>
1038
+ Item "x" may appear only in a rule head (consequent),
1039
+ item "y" only in a rule body (antecedent);
1040
+ appearance of all other items is not restricted.</p>
1041
+ <p><tt>both<br>
1042
+ x head<br>
1043
+ y body</tt></p>
1044
+
1045
+ <p>Providing no item appearances file is equivalent to an item
1046
+ appearances file containing only an indicator like "both", which
1047
+ does not restrict the appearance of any items.</p>
1048
+
1049
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1050
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1051
+ <td width=5></td>
1052
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1053
+ </table>
1054
+
1055
+ <!-- =============================================================== -->
1056
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1057
+
1058
+ <h3><a name="output">Output Format</a></h3>
1059
+
1060
+ <h4><a name="ruleout">Output Format for Association Rules</a></h4>
1061
+
1062
+ <p>Each line of the output file contains one association rule in the
1063
+ format</p>
1064
+ <p><tt>c &lt;- a b ... (x%, y%)</tt></p>
1065
+ <p>where a, b, and c are item identifiers, and</p>
1066
+
1067
+ <table border=0 cellpadding=0 cellspacing=0>
1068
+ <tr><td valign=top>x</td><td width=10></td>
1069
+ <td>the percentage of transactions that contain all items appearing
1070
+ in the rule body (antecedent), that is, in the example above,
1071
+ a and b. (support of the rule, i.e., the support in my
1072
+ interpretation)</td>
1073
+ <tr><td valign=top>y</td><td></td>
1074
+ <td>the confidence of the rule, which is computed as the quotient of
1075
+ the percentage of transactions that contain all items appearing in
1076
+ the rule body (antecedent) and the rule head (consequent) - that is,
1077
+ in the example above, a, b, and c - and the above percentage x.</td>
1078
+ </tr>
1079
+ </table>
1080
+
1081
+ <p>If the option -o is used, x is replaced by the rule support in the
1082
+ original definition (i.e., the one used by [Agrawal et al. 1993]),
1083
+ namely the percentage of transactions that contain all items appearing
1084
+ in the rule (antecedent) and the rule head (consequent), that is, in
1085
+ the example above, a, b, and c. The value of y, however, is still
1086
+ computed from the value of x as described above.</p>
1087
+
1088
+ <p>If the option -x is given, both types of rule support (support of
1089
+ all items in the rule and support of the items in the body/antecedent
1090
+ of the rule) will be printed. The confidence of a rule (see above) is
1091
+ the quotient of the two support values (* 100%), i.e., a rule will
1092
+ be printed as</p>
1093
+ <p><tt>c &lt;- a b ... (x<sub>1</sub>%, x<sub>2</sub>%, y%)</tt></p>
1094
+ <p>where x<sub>1</sub> is the support of the set of all items in the
1095
+ rule, x<sub>2</sub> is the support of the set of items in the body
1096
+ (antecedent) of the rule, and y = x<sub>1</sub>/x<sub>2</sub> * 100%
1097
+ is the confidence of the rule.</p>
1098
+
1099
+ <p>If the option -a is given, the support percentage x is supplemented
1100
+ by the absolute number of transactions underlying it:</p>
1101
+ <p><tt>c &lt;- a b ... (x%/s, y%)</tt></p>
1102
+ <p>where s is the absolute number of transactions. If the option -x is
1103
+ given, the absolute support is printed for both types of rule support.
1104
+ </p>
1105
+
1106
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1107
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1108
+ <td width=5></td>
1109
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1110
+ </table>
1111
+
1112
+ <!-- =============================================================== -->
1113
+
1114
+ <h4><a name="setout">Output Format for Frequent Item Sets</a></h4>
1115
+
1116
+ <p>Each line of the output file contains one item set in the format</p>
1117
+ <p><tt>a b c ... (x%)</tt></p>
1118
+ <p>where a, b, and c are item identifiers and x is the percentage of
1119
+ transactions that contain this item set (item set support).</p>
1120
+
1121
+ <p>If the option -a is given, this percentage is supplemented by the
1122
+ absolute number of transactions underlying it:</p>
1123
+ <p><tt>a b c ... (x%/s)</tt></p>
1124
+ <p>where s is the absolute number of transactions.</p>
1125
+
1126
+ <p>If the option -x is given, the percentage of transactions that are
1127
+ identical to the item set is printed, too (whereas the normal support
1128
+ is the percentage of transactions that are a superset of the item set):
1129
+ </p>
1130
+ <p><tt>a b c ... (x%, %y)</tt></p>
1131
+ <p>where x is the normal item set support and y is the percentage of
1132
+ transactions identical to the item set. (This output option was added
1133
+ in response to a request by Laura Maruster.) If the option -a is also
1134
+ given, both percentages are supplemented by the absolute number of
1135
+ transactions underlying these percentages.</p>
1136
+
1137
+ <p>Note that for frequent item sets the option -x cannot be combined
1138
+ with the option -y. That is, in order to compute the second support
1139
+ measure for item sets, the transactions have to be loaded into memory.
1140
+ </p>
1141
+
1142
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1143
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1144
+ <td width=5></td>
1145
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1146
+ </table>
1147
+
1148
+ <!-- =============================================================== -->
1149
+
1150
+ <h4><a name="edgeout">Output Format for Association Hyperedges</a></h4>
1151
+
1152
+ <p>Each line of the output file contains one hyperedge the format</p>
1153
+ <p><tt>a b c ... (x%, y%)</tt></p>
1154
+ <p>where a, b, and c are item identifiers, and</p>
1155
+
1156
+ <table border=0 cellpadding=0 cellspacing=0>
1157
+ <tr><td valign=top>x</td><td width=10></td>
1158
+ <td>the percentage of transactions that contain all items appearing
1159
+ in the hyperedge, that is, in the example above, a, b, and c.</td>
1160
+ </tr>
1161
+ <tr><td valign=top>y</td><td></td>
1162
+ <td>the average confidence of all rules that can be formed using
1163
+ the items in the hyperedge with all items appearing in the rule
1164
+ (see above), i.e., for the example above, the average confidence
1165
+ of the rules c &lt;- a b, b &lt;- a c, and a &lt;- b c.</td></tr>
1166
+ </table>
1167
+
1168
+ <p>If the option -a is given, the support percentage x is supplemented
1169
+ by the absolute number of transactions underlying it:</p>
1170
+ <p><tt>a b c ... (x%/s, y%)</tt></p>
1171
+ <p>where s is the absolute number of transactions.</p>
1172
+
1173
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1174
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1175
+ <td width=5></td>
1176
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1177
+ </table>
1178
+
1179
+ <!-- =============================================================== -->
1180
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1181
+
1182
+ <h3><a name="compopt">Compilation Options</a></h3>
1183
+
1184
+ <p>The program can be compiled with two additional compilation options
1185
+ (see <tt>makefile</tt>), namely <tt>-DBENCH</tt> and <tt>-DARCH64</tt>.
1186
+ </p>
1187
+
1188
+ <p>Compiling the program with <tt>-DBENCH</tt> produces a version that
1189
+ prints some benchmark information on termination, in particular about
1190
+ the memory used during the item set tree construction (number of nodes,
1191
+ counters, necessary counters, child pointers, necessary child pointers).
1192
+ Collecting the memory usage information slightly, but negligibly
1193
+ increases the execution time.</p>
1194
+
1195
+ <p>Compiling the program with <tt>-DARCH64</tt> produces a version for
1196
+ 64 bit machines (architecture model: pointers are 64 bits, integers are
1197
+ 32 bits wide), by removing some alignment issues in the transaction and
1198
+ item set tree representations, which would otherwise lead to bus errors.
1199
+ These adaptations slightly, but negligibly increase memory consumption.
1200
+ (I am grateful to Anthony Casaletto, SPSS Inc., for helping me a lot to
1201
+ identify these alignment problems, by compiling and testing the program
1202
+ on a 64 bit machine, since I do not have access to one.)</p>
1203
+
1204
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1205
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1206
+ <td width=5></td>
1207
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1208
+ </table>
1209
+
1210
+ <!-- =============================================================== -->
1211
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1212
+
1213
+ <h3><a name="copying">Copying</a></h3>
1214
+
1215
+ <p>apriori -
1216
+ find association rules/hyperedges with apriori algorithm<br>
1217
+ copyright &copy; 1996-2003 Christian Borgelt</p>
1218
+
1219
+ <p>This program is free software; you can redistribute it and/or
1220
+ modify it under the terms of the
1221
+ <a href="http://www.fsf.org/copyleft/lesser.html">
1222
+ GNU Lesser (Library) General Public License</a> as published by the
1223
+ <a href="http://www.fsf.org">Free Software Foundation</a>.</p>
1224
+
1225
+ <p>This program is distributed in the hope that it will be useful,
1226
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1227
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1228
+ <a href="http://www.fsf.org/copyleft/lesser.html">
1229
+ GNU Lesser (Library) General Public License</a> for more details.</p>
1230
+
1231
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1232
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1233
+ <td width=5></td>
1234
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1235
+ </table>
1236
+
1237
+ <!-- =============================================================== -->
1238
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1239
+
1240
+ <h3><a name="download">Download</a></h3>
1241
+
1242
+ <p><a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/apriori.html">
1243
+ Download page</a> with most recent version.</p>
1244
+
1245
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1246
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1247
+ <td width=5></td>
1248
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1249
+ </table>
1250
+
1251
+ <!-- =============================================================== -->
1252
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1253
+
1254
+ <h3><a name="contact">Contact</a></h3>
1255
+
1256
+ <table border=0 cellpadding=0 cellspacing=0>
1257
+ <tr><td valign=top>Snail mail:</td><td width=10></td>
1258
+ <td><a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/index.html">
1259
+ Christian Borgelt</a><br>
1260
+ <a href="http://fuzzy.cs.uni-magdeburg.de/index.html">
1261
+ Working Group Neural Networks and Fuzzy Systems</a><br>
1262
+ <a href="http://www-iws.cs.uni-magdeburg.de/iws.html">
1263
+ Department of Knowledge Processing and Language Engineering</a><br>
1264
+ <a href="http://www.cs.uni-magdeburg.de/">
1265
+ School of Computer Science</a><br>
1266
+ <a href="http://www.uni-magdeburg.de/">
1267
+ Otto-von-Guericke-University of Magdeburg</a><br>
1268
+ Universit&auml;tsplatz 2<br>
1269
+ D-39106 Magdeburg<br>
1270
+ Germany</td></tr>
1271
+ <tr><td valign=top>E-mail:</td><td></td>
1272
+ <td><a href="mailto:christian.borgelt@cs.uni-magdeburg.de">
1273
+ christian.borgelt@cs.uni-magdeburg.de</a><br>
1274
+ <a href="mailto:borgelt@iws.cs.uni-magdeburg.de">
1275
+ borgelt@iws.cs.uni-magdeburg.de</a></td></tr>
1276
+ <tr><td>Phone:</td><td></td>
1277
+ <td>+49 391 67 12700</td></tr>
1278
+ <tr><td>Fax:</td><td></td>
1279
+ <td>+49 391 67 12018</td></tr>
1280
+ <tr><td>Office:</td><td></td>
1281
+ <td>29.015</td></tr>
1282
+ </table>
1283
+
1284
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1285
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1286
+ <td width=5></td>
1287
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1288
+ </table>
1289
+
1290
+ <!-- =============================================================== -->
1291
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1292
+
1293
+ <address>&copy; 2002-2004
1294
+ <a href="mailto:borgelt@iws.cs.uni-magdeburg.de">Christian Borgelt</a>
1295
+ </address>
1296
+ <!-- Created: Thu May 24 12:28:05 CEST 2001 -->
1297
+ <!-- hhmts start -->
1298
+ Last modified: Tue Nov 23 13:49:10 CET 2004
1299
+ <!-- hhmts end -->
1300
+ </body>
1301
+ </html>