apriori 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,1301 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
2
+ <!-- ===================================================================
3
+ File : apriori.html
4
+ Contents: Description of apriori program
5
+ Author : Christian Borgelt
6
+ ==================================================================== -->
7
+ <html>
8
+ <head>
9
+ <title>Apriori Documentation</title>
10
+ </head>
11
+
12
+ <!-- =============================================================== -->
13
+
14
+ <body bgcolor=white>
15
+ <h1><a name="top">Apriori</a></h1>
16
+ <h3>Finding Association Rules/Hyperedges with the Apriori Algorithm</h3>
17
+
18
+ <!-- =============================================================== -->
19
+ <p><img src="line.gif" alt="" height=7 width=704></p>
20
+
21
+ <h3>Contents</h3>
22
+ <ul type=disc>
23
+ <li><a href="#intro">Introduction</a></li>
24
+ <li><a href="#terms">Support and Confidence</a>
25
+ <ul type=circle>
26
+ <li><a href="#suppset">Support of an Item Set</a></li>
27
+ <li><a href="#confrule">Confidence of an Association Rule</a></li>
28
+ <li><a href="#supprule">Support of an Association Rule</a></li>
29
+ </ul></li>
30
+ <li><a href="#target">Target Types</a>
31
+ <ul type=circle>
32
+ <li><a href="#assrules">Association Rules</a></li>
33
+ <li><a href="#itemsets">Frequent Item Sets</a></li>
34
+ <li><a href="#closed">Closed Item Sets</a></li>
35
+ <li><a href="#maximal">Maximal Item Sets</a></li>
36
+ <li><a href="#hyperedges">Association Hyperedges</a></li>
37
+ </ul></li>
38
+ <li><a href="#select">Extended Rule Selection</a>
39
+ <ul type=circle>
40
+ <li><a href="#diff">
41
+ Absolute Confidence Difference to Prior</a></li>
42
+ <li><a href="#quotient">
43
+ Difference of Confidence Quotient to 1</a></li>
44
+ <li><a href="#improve">
45
+ Absolute Difference of Improvement Value to 1</a></li>
46
+ <li><a href="#info">
47
+ Information Difference to Prior</a></li>
48
+ <li><a href="#chi2">
49
+ Normalized chi<sup>2</sup> Measure</a></li>
50
+ <li><a href="#behavior">
51
+ Selection Behavior of the Measures</a></li>
52
+ <li><a href="#appear">Item Appearances</a></li>
53
+ </ul></li>
54
+ <li><a href="#select">Extended Item Set Selection</a>
55
+ <ul type=circle>
56
+ <li><a href="#logquot">
57
+ Binary Logarithm of Support Quotient</a></li>
58
+ <li><a href="#suppquot">
59
+ Difference of Support Quotient to 1</a></li>
60
+ </ul></li>
61
+ <li><a href="#tatree">Transaction Prefix Tree</a></li>
62
+ <li><a href="#options">Program Invocation and Options</a></li>
63
+ <li><a href="#input">Input Format</a>
64
+ <ul type=circle>
65
+ <li><a href="#transin">Format of the Transactions File</a></li>
66
+ <li><a href="#appearin">Format of the Item Appearances File</a></li>
67
+ </ul></li>
68
+ <li><a href="#output">Output Format</a>
69
+ <ul type=circle>
70
+ <li><a href="#ruleout">Output Format for Association Rules</a></li>
71
+ <li><a href="#setout">Output Format for Frequent Item Sets</a></li>
72
+ <li><a href="#edgeout">Output Format for Association Hyperedges</a>
73
+ </li>
74
+ </ul></li>
75
+ <li><a href="#compopt">Compilation Options</a></li>
76
+ <li><a href="#copying">Copying</a></li>
77
+ <li><a href="#download">Download</a></li>
78
+ <li><a href="#contact">Contact</a></li>
79
+ </ul>
80
+
81
+ <!-- =============================================================== -->
82
+ <p><img src="line.gif" alt="" height=7 width=704></p>
83
+
84
+ <h3><a name="intro">Introduction</a></h3>
85
+
86
+ <p>Association rule induction [Agrawal et al. 1993] is a powerful method
87
+ for so-called <i>market basket analysis</i>, which aims at finding
88
+ regularities in the shopping behavior of customers of supermarkets,
89
+ mail-order companies and the like. With the induction of association
90
+ rules one tries to find sets of products that are frequently bought
91
+ together, so that from the presence of certain products in a shopping
92
+ cart one can infer (with a high probability) that certain other products
93
+ are present. Such information, expressed in the form of rules, can
94
+ often be used to increase the number of items sold, for instance, by
95
+ appropriately arranging the products in the shelves of a supermarket
96
+ (they may, for example, be placed adjacent to each other in order to
97
+ invite even more customers to buy them together) or by directly
98
+ suggesting items to a customer, which may be of interest for him/her.
99
+ </p>
100
+
101
+ <p>An <i>association rule</i> is a rule like "If a customer buys wine
102
+ and bread, he often buys cheese, too." It expresses an association
103
+ between (sets of) <i>items</i>, which may be products of a supermarket
104
+ or a mail-order company, special equipment options of a car, optional
105
+ services offered by telecommunication companies etc. An association
106
+ rule states that if we pick a customer at random and find out that
107
+ he selected certain items (bought certain products, chose certain
108
+ options etc.), we can be confident, quantified by a percentage, that
109
+ he also selected certain other items (bought certain other products,
110
+ chose certain other options etc.).</p>
111
+
112
+ <p>Of course, we do not want just any association rules, we want
113
+ "good" rules, rules that are "expressive" and "reliable". The standard
114
+ measures to assess association rules are the <i>support</i> and the
115
+ <i>confidence</i> of a rule, both of which are computed from the
116
+ <i>support</i> of certain item sets. These notions are discussed
117
+ <a href="#terms">here</a> in more detail. However, these standard
118
+ criteria are often not sufficient to restrict the set of rules to
119
+ the interesting ones. Therefore some additional rule evaluation
120
+ measures are considered <a href="#select">here</a>.</p>
121
+
122
+ <p>The main problem of association rule induction is that there are
123
+ so many possible rules. For example, for the product range of a
124
+ supermarket, which may consist of several thousand different products,
125
+ there are billions of possible association rules. It is obvious that
126
+ such a vast amount of rules cannot be processed by inspecting each
127
+ one in turn. Therefore efficient algorithms are needed that restrict
128
+ the search space and check only a subset of all rules, but, if possible,
129
+ without missing important rules. One such algorithm is the apriori
130
+ algorithm, which was developed by [Agrawal et al. 1994] and which
131
+ is implemented in a specific way in my apriori program. A brief
132
+ description of some implementation aspects can be found in these
133
+ papers:</p>
134
+ <ul type=disc>
135
+ <li><b>Induction of Association Rules: Apriori Implementation</b><br>
136
+ Christian Borgelt and Rudolf Kruse<br>
137
+ <i>15th Conference on Computational Statistics</i>
138
+ (Compstat 2002, Berlin, Germany)<br>
139
+ Physica Verlag, Heidelberg, Germany 2002<br>
140
+ (6 pages)
141
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/cstat_02.pdf">
142
+ cstat_02.pdf</a> (105 kb)
143
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/cstat_02.ps.gz">
144
+ cstat_02.ps.gz</a> (91 kb)</li>
145
+ <li><b>Efficient Implementations of Apriori and Eclat</b><br>
146
+ Christian Borgelt.<br>
147
+ <i>Workshop of Frequent Item Set Mining Implementations</i>
148
+ (FIMI 2003, Melbourne, FL, USA).<br>
149
+ (9 pages)
150
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/fimi_03.pdf">
151
+ fimi_03.pdf</a> (304 kb)
152
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/papers/fimi_03.ps.gz">
153
+ fimi_03.ps.gz</a> (197 kb)</li>
154
+ </ul>
155
+
156
+ <p>By the way: Earlier versions of my apriori program
157
+ are incorporated in the well-known data mining tool
158
+ <a href="http://www.spss.com/Clementine/">Clementine</a>
159
+ (apriori version 1.8 in Clementine version 5.0,
160
+ apriori version 2.7 in Clementine version 7.0), available from
161
+ <a href="http://www.spss.com">SPSS</a>. Newer versions of Clementine
162
+ still use my program, but I am not completely sure about the version
163
+ number of the underlying apriori program.</p>
164
+
165
+ <p>Enjoy,<br>
166
+ <a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/">
167
+ Christian Borgelt</a></p>
168
+
169
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
170
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
171
+ <td width=5></td>
172
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
173
+ </table>
174
+
175
+ <!-- =============================================================== -->
176
+ <p><img src="line.gif" alt="" height=7 width=704></p>
177
+
178
+ <h3><a name="terms">Support and Confidence</a></h3>
179
+
180
+ <h4><a name="suppset">Support of an Item Set</a></h4>
181
+
182
+ <p>Let T be the set of all transactions under consideration, e.g.,
183
+ let T be the set of all "baskets" or "carts" of products bought by the
184
+ customers of a supermarket - on a given day if you like. The support
185
+ of an item set S is the percentage of those transactions in T which
186
+ contain S. In the supermarket example this is the number of "baskets"
187
+ that contain a given set S of products, for example S = { bread, wine,
188
+ cheese }. If U is the set of all transactions that contain all items
189
+ in S, then</p>
190
+ <p>support(S) = (|U| / |T|) *100%,</p>
191
+ <p>where |U| and |T| are the number of elements in U and T,
192
+ respectively. For example, if a customer buys the set
193
+ X = { milk, bread, apples, wine, sausages, cheese, onions, potatoes }
194
+ then S is obviously a subset of X, hence S is in U. If there are 318
195
+ customers and 242 of them buy such a set U or a similar one that
196
+ contains S, then support(S) = 76.1%.</p>
197
+
198
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
199
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
200
+ <td width=5></td>
201
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
202
+ </table>
203
+
204
+ <!-- =============================================================== -->
205
+
206
+ <h4><a name="confrule">Confidence of an Association Rule</a></h4>
207
+
208
+ <p>This is the measure used by [Agrawal et al. 1993], the inventors of
209
+ the apriori algorithm, to evaluate association rules. The confidence
210
+ of a rule R = "A and B -&gt; C" is the support of the set of all items
211
+ that appear in the rule divided by the support of the antecedent of
212
+ the rule, i.e.</p>
213
+ <p>confidence(R) = (support({A, B, C}) / support({A, B})) *100%.</p>
214
+ <p>More intuitively, the confidence of a rule is the number of cases in
215
+ which the rule is correct relative to the number of cases in which it
216
+ is applicable. For example, let R = "wine and bread -&gt; cheese". If a
217
+ customer buys wine and bread, then the rule is applicable and it says
218
+ that he/she can be expected to buy cheese. If he/she does not buy wine
219
+ or does not buy bread or buys neither, than the rule is not applicable
220
+ and thus (obviously) does not say anything about this customer.</p>
221
+
222
+ <p>If the rule is applicable, it says that the customer can be expected
223
+ to buy cheese. But he/she may or may not buy cheese, that is, the rule
224
+ may or may not be correct. Of course, we are interested in how good the
225
+ rule is, i.e., how often its prediction that the customer buys cheese
226
+ is correct. The rule confidence measures this: It states the percentage
227
+ of cases in which the rule is correct. It computes the percentage
228
+ relative to the number of cases in which the antecedent holds, since
229
+ these are the cases in which the rule makes a prediction that can be
230
+ true or false. If the antecedent does not hold, then the rule does not
231
+ make a prediction, so these cases are excluded.</p>
232
+
233
+ <p>With this measure a rule is selected if its confidence exceeds or
234
+ is equal to a given lower limit. That is, we look for rules that have
235
+ a high probability of being true, i.e., we look for "good" rules, which
236
+ make correct (or very often correct) predictions. My apriori program
237
+ always uses this measure to select association rules. The default value
238
+ for the confidence limit is 80%. It can be changed with the option
239
+ <tt>-c</tt>.</p>
240
+
241
+ <p>In addition to the rule confidence my apriori program lets you
242
+ select several other rule evaluation measures, which are explained
243
+ below, but it will also use rule confidence. If you want to rely
244
+ entirely on some other measure, you can do so by setting the minimal
245
+ rule confidence to zero. (Attention: If you have a large number of
246
+ items, setting the minimal rule confidence to zero can result in
247
+ <i>very</i> high memory consumption.)</p>
248
+
249
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
250
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
251
+ <td width=5></td>
252
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
253
+ </table>
254
+
255
+ <!-- =============================================================== -->
256
+
257
+ <h4><a name="supprule">Support of an Association Rule</a></h4>
258
+
259
+ <p>The support of rules may cause some confusion, because I use this
260
+ term in a different way than [Agrawal et al. 1993] do. For them, the
261
+ support of a rule "A and B -&gt; C" is the support of the set {A, B, C}.
262
+ This is fine if rule confidence is the only rule evaluation measure,
263
+ but it causes problems if some other measure is used. For these other
264
+ measures it is often much more appropriate to call the support of the
265
+ antecedent of the rule, i.e. the support of {A, B} in the example above,
266
+ the support of the rule.</p>
267
+
268
+ <p>The difference can also be stated in the following way: For [Agrawal
269
+ et al. 1993], the support of the rule is the (relative) number of cases
270
+ in which the rule is correct (i.e., in which the presence of the item C
271
+ follows from the presence of the items A and B), whereas for me (and
272
+ thus my apriori program) the support of a rule is the (relative) number
273
+ of cases in which it is applicable (i.e., in which the antecedent of the
274
+ rule holds), although in some of these cases it may be false (because
275
+ only the items A and B are present, but the item C is missing).</p>
276
+
277
+ <p>One reason for this, as already mentioned, is that the definition
278
+ of [Agrawal et al. 1993] does not work well for evaluation measures
279
+ other than rule confidence. This is explained in more detail below.
280
+ Another reason is that I prefer the support of a rule to say something
281
+ about the "statistical" support of a rule and its confidence, i.e.,
282
+ from how many cases the confidence is computed in order to express
283
+ how well founded the assertion about the confidence is.</p>
284
+
285
+ <p>Maybe an example will make this clearer. Suppose you have a die which
286
+ you suspect to be biased. To test this hypothesis, you throw the die,
287
+ say, a thousand times. 307 times the 6 turns up. Hence you assume that
288
+ the die is actually biased, since the relative frequency is about 30%
289
+ although for an unbiased die it should be around 17%. Now, what is the
290
+ "statistical" support of this assertion, i.e., on how many experiments
291
+ does it rest? Obviously it rests on all 1000 experiments and not only
292
+ on the 307 experiments in which the 6 turned up. This is so, simply
293
+ because you had to do 1000 experiments to find out that the relative
294
+ frequency is around 30% and not only the 307 in which a 6 turned up.</p>
295
+
296
+ <p>Or suppose you are doing an opinion poll to find out about the
297
+ acceptance of a certain political party, maybe with the usual question
298
+ "If an election were held next Sunday ...?" You ask 2000 persons, of
299
+ which 857 say that they would vote for the party you are interested in.
300
+ What is the support of the assertion that this party would get around
301
+ 43% of all votes? It is the size of your sample, i.e., all 2000 persons,
302
+ and not only the 857 that answered in the positive. Again you had to ask
303
+ all 2000 people to find out about the percentage of 43%. Of course, you
304
+ could have asked fewer people, say, 100, of which, say, 43 said that
305
+ they would vote for the party, but then your assertion would be less
306
+ reliable, because it is less "supported". The number of votes for the
307
+ party could also be 40% or 50%, because of some random influences. Such
308
+ deviations are much less likely, if you asked 2000 persons, since then
309
+ the random influences can be expected to cancel out.</p>
310
+
311
+ <p>The rule support can be used to select association rules by stating
312
+ a lower bound for the support of a rule. This is equivalent to saying
313
+ that you are interested only in such rules that have a large enough
314
+ statistical basis (since my apriori program uses the term "support"
315
+ in my interpretation and not in the one used by [Agrawal et al. 1993]).
316
+ The default value for the support limit is 10%. It can be changed
317
+ with the option <tt>-s</tt>. If the number given is negative, it is
318
+ interpreted as an absolute number (number of transactions) rather than
319
+ a percentage. (Note that in this case the option <tt>-a</tt> reverses
320
+ its meaning: if it is not given only the absolute support is printed,
321
+ if it is added, the relative supoort is printed, too.) The lower bound
322
+ for the rule support is combined with the lower bound for the rule
323
+ confidence, i.e., my apriori program generates only rules the confidence
324
+ of which is greater than or equal to the confidence limit <i>and</i> the
325
+ support of which is greater than or equal to the support limit.</p>
326
+
327
+ <p>Despite the above arguments in favor of my definition of the support
328
+ of an association rule, a rule support compatibility mode is available.
329
+ With the option <tt>-o</tt> the original rule support definition can be
330
+ selected. In this case the support of an association rule is the support
331
+ of the set with the items in the antecedent and the consequent of the
332
+ rule, i.e. the support of a rule as defined in [Agrawal et al. 1993].
333
+ </p>
334
+
335
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
336
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
337
+ <td width=5></td>
338
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
339
+ </table>
340
+
341
+ <!-- =============================================================== -->
342
+ <p><img src="line.gif" alt="" height=7 width=704></p>
343
+
344
+ <h3><a name="target">Target Types</a></h3>
345
+
346
+ <p>The target type, which can be selected via the option <tt>-t</tt>,
347
+ is either association rules (option <tt>-tr</tt>, default), frequent
348
+ item sets (option <tt>-ts</tt>), closed item sets (option <tt>-tc</tt>),
349
+ maximal item sets (option <tt>-tm</tt>), or association hyperedges
350
+ (option <tt>-th</tt>).</p>
351
+
352
+ <!-- =============================================================== -->
353
+
354
+ <h4><a name="assrules">Association Rules (default, option -tr)</a></h4>
355
+
356
+ <p>By default my apriori program produces association rules with
357
+ a single item in the consequent. The restriction to single item
358
+ consequents is due to the following considerations: In the first place,
359
+ association rule mining usually produces too many rules even if one
360
+ confines oneself to rules with only one item in the consequent. So why
361
+ should one make the situation worse by allowing more than one item in
362
+ the consequent? (It merely blows up the output size.)</p>
363
+
364
+ <p>Secondly, I do not know any application in which rules with more
365
+ than one item in the consequent are of any real use. The reason, in
366
+ my opinion, is that such more complex rules add almost nothing to the
367
+ insights about the data set. To understand this, consider the simpler
368
+ rules that correspond to a rule with multiple items in the consequent,
369
+ that is, rules having the same antecedent and consequents with only
370
+ single items from the consequent of the complex rule. All of these
371
+ rules must necessarily be in the output, because neither their support
372
+ nor their confidence can be less than that of the more complex rule.
373
+ That is, if you have a rule c d &lt;- a b, you will necessarily also
374
+ have the rules c &lt;- a b and d &lt;- a b in the output. Of course,
375
+ these latter two rules together do <i>not</i> say the same as the more
376
+ complex rule. However, what do you gain from the additional information
377
+ the more complex rule gives you? How can you use it? And is this little
378
+ extra information worth having to analyze a much bigger rule set?</p>
379
+
380
+ <!-- =============================================================== -->
381
+
382
+ <h4><a name="itemsets">Frequent Item Sets (option -ts)</a></h4>
383
+
384
+ <p>Sometimes one may not want to find association rules, but only the
385
+ frequent item sets underlying them. That is, one wants to find all
386
+ item sets with a support exceeding a certain threshold. My apriori
387
+ program supports this search, too: If the option <tt>-ts</tt> is
388
+ given, only frequent item sets are determined.</p>
389
+
390
+ <!-- =============================================================== -->
391
+
392
+ <h4><a name="closed">Closed Item Sets (option -tc)</a></h4>
393
+
394
+ <p>A frequent item set is called <i>closed</i> if no superset has the
395
+ same support. If the option <tt>-tc</tt> is given, the found frequent
396
+ item sets are subsequently filtered and only the closed item sets
397
+ are kept.</p>
398
+
399
+ <!-- =============================================================== -->
400
+
401
+ <h4><a name="maximal">Maximal Item Sets (option -tm)</a></h4>
402
+
403
+ <p>A frequent item set is called <i>maximal</i> if no superset is
404
+ frequent, i.e., has a support exceeding the minimal support. If the
405
+ option <tt>-tm</tt> is given, the found frequent item sets are
406
+ subsequently filtered and only the maximal item sets are kept.</p>
407
+
408
+ <!-- =============================================================== -->
409
+
410
+ <h4><a name="hyperedges">Association Hyperedges (option -th)</a></h4>
411
+
412
+ <p>My apriori program can also find association hyperedges, i.e., sets
413
+ of items that are strongly predictive w.r.t. each other. In this mode
414
+ no rules are generated, only item sets are selected. The selection
415
+ criterion is as follows: Given an item set with enough support (option
416
+ <tt>-s</tt>), all rules are checked which can be formed using this set
417
+ with all items appearing in the rule. For example, for the item set
418
+ {a b c}, the rules c &lt;- a b, b &lt;- a c, a &lt;- b c would be
419
+ considered. The confidences of these rules are computed and averaged.
420
+ If the resulting average confidence is greater than the minimal
421
+ confidence (option <tt>-c</tt>), the item set is selected. (I am
422
+ grateful to Bastien Duclaux for requesting the possibility to generate
423
+ association hyperedges.)</p>
424
+
425
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
426
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
427
+ <td width=5></td>
428
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
429
+ </table>
430
+
431
+ <!-- =============================================================== -->
432
+ <p><img src="line.gif" alt="" height=7 width=704></p>
433
+
434
+ <h3><a name="select">Extended Rule Selection</a></h3>
435
+
436
+ <p>If rules are selected using the rule confidence, the following
437
+ problem arises: "Good" rules (rules that are often true) are not
438
+ always "interesting" rules (rules that reveal something about the
439
+ interdependence of the items). You certainly know the examples that
440
+ are usually given to illustrate this fact. For instance, it is easy
441
+ to find out in a medical database that the rule "pregnant -&gt; female"
442
+ is true with a confidence of 100%. Hence it is a perfect rule, it
443
+ never fails, but, of course, this is not very surprising. Although
444
+ the measures explained below cannot deal with this problem (which is
445
+ semantical), they may be able to improve on the results in a related
446
+ case.</p>
447
+
448
+ <p>Let us look at the supermarket example again and let us assume
449
+ that 60% of all customers buy some kind of bread. Consider the rule
450
+ "cheese -&gt; bread", which holds with a confidence of, say, 62%.
451
+ Is this an important rule? Obviously not, since the fact that the
452
+ customer buys cheese does not have a significant influence on him/her
453
+ buying bread: The percentages are almost the same. But if you had set
454
+ a confidence limit of 60%, you would get both rules "-&gt; bread"
455
+ (confidence 60%) and "cheese -&gt; bread" (confidence 62%), although
456
+ the first would suffice (the first, since it is the simpler of the
457
+ two). The idea of all measures that can be used in addition or instead
458
+ of rule confidence is to handle such situations and to suppress the
459
+ second rule.</p>
460
+
461
+ <p>In addition, consider the following case: Assume that the confidence
462
+ of the rule "cheese -&gt; bread" is not 62% but 35%. With a confidence
463
+ limit of 60% it would not be selected, but it may be very important to
464
+ know about this rule! Together with cheese bread is bought much less
465
+ frequent than it is bought at all. Is cheese some kind of substitute
466
+ for bread, so that one does not need any bread, if one has cheese? Ok,
467
+ maybe this is not a very good example. However, what can be seen is
468
+ that a rule with low confidence can be very interesting, since it may
469
+ capture an important influence. Furthermore, this is a way to express
470
+ negation (though only in the consequent of a rule), since
471
+ "cheese -&gt; bread" with confidence 35% is obviously equivalent to
472
+ "cheese -&gt; no bread" with confidence 65%. This also makes clear
473
+ why the support of the item set that contains all items in the body
474
+ <i>and</i> the head of the rule is not appropriate for this measure.
475
+ An important rule may have confidence 0 and thus a support (in the
476
+ interpretation of [Agrawal et al. 1993]) of 0. Hence it is not
477
+ reasonable to set a lower bound for this kind of support.</p>
478
+
479
+ <p>I hope that the intention underlying all this is already clear:
480
+ Potentially interesting rules differ significantly in their confidence
481
+ from the confidence of rules with the same consequent, but a simpler
482
+ antecedent. Adding an item to the antecedent is informative only if it
483
+ significantly changes the confidence of the rule. Otherwise the simpler
484
+ rule suffices.</p>
485
+
486
+ <p>Unfortunately the measures other than rule confidence do not solve
487
+ the rule selection problem in the very general form in which it was
488
+ stated above. It is not that easy to deal with all rules that have a
489
+ simpler antecedent, to keep track of which of these rules were selected
490
+ (this obviously influences the selection of more complicated rules),
491
+ to deal with the special type of Poincare paradox that can occur, etc.
492
+ Hence the measures always compare the confidence of a rule with the
493
+ confidence of the rule with empty antecedent, i.e. with the relative
494
+ frequency of the consequent.</p>
495
+
496
+ <p>I call the confidence of a rule with empty antecedent the prior
497
+ confidence, since it is the confidence that the item in the consequent
498
+ of the rule will be present in an item set prior to any information
499
+ about other items that are present. The confidence of a rule with
500
+ non-empty antecedent (and the same consequent) I call the posterior
501
+ confidence, since it is the confidence that the item in the consequent
502
+ of the rule will be present after it gets known that the items in the
503
+ antecedent of the rule are present.</p>
504
+
505
+ <p>All measures that can be used in addition to rule confidence are
506
+ computed from these two values: the prior confidence and the posterior
507
+ confidence. Only those rules are selected for which the value of the
508
+ chosen additional evaluation measure exceeds or is equal to a certain
509
+ limit. The measures are chosen with the option <tt>-e</tt>, the limit
510
+ is passed to the program via the option <tt>-d</tt>. The default value
511
+ for the limit is 10%.</p>
512
+
513
+ <p>All additional rule evaluation measures are combined with the limits
514
+ for rule confidence and rule support. I.e., my apriori program selects
515
+ only those rules the confidence of which is greater than or equal to
516
+ the confidence limit, the support of which is greater than or equal to
517
+ the support limit, <i>and</i> for which the additional evaluation value
518
+ is greater than or equal to the limit for this measure. The default is
519
+ to use no additional evaluation measure, i.e., to rely only on rule
520
+ confidence and rule support. Of course you can remove the restriction
521
+ that the rule confidence must exceed a certain limit by simply setting
522
+ this limit to zero. In this case rules are selected using only the
523
+ limits for the rule support and the additional evaluation measure.
524
+ (Attention: If you have a large number of items, setting the minimal
525
+ rule confidence to zero can result in <i>very</i> high memory
526
+ consumption.)</p>
527
+
528
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
529
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
530
+ <td width=5></td>
531
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
532
+ </table>
533
+
534
+ <!-- =============================================================== -->
535
+
536
+ <h4><a name="diff">Absolute Confidence Difference to Prior
537
+ (option <tt>-ed</tt> or <tt>-e1</tt>)</a></h4>
538
+
539
+ <p>The simplest way to compare the two confidences is to compute the
540
+ absolute value of their difference. I.e., if "-&gt; bread" has a
541
+ confidence of 60% and "cheese -&gt; bread" has a confidence of 62%,
542
+ then the value of this measure is 2%. The parameter given via the
543
+ option <tt>-d</tt> to the program states a lower bound for this
544
+ difference. It follows that this measure selects rules the confidence
545
+ of which differs more than a given threshold from the corresponding
546
+ prior confidence. For example, with the option <tt>-d20</tt> (and, of
547
+ course, the option <tt>-ed</tt> to select the measure) for the item
548
+ "bread" only rules with a confidence less than 40% or greater than 80%
549
+ would be selected. Of course, for other items, with a different prior
550
+ confidence, the upper and lower bounds are different, too.</p>
551
+
552
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
553
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
554
+ <td width=5></td>
555
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
556
+ </table>
557
+
558
+ <!-- =============================================================== -->
559
+
560
+ <h4><a name="quotient">Difference of Confidence Quotient to 1
561
+ (option <tt>-eq</tt> or <tt>-e2</tt>)</a></h4>
562
+
563
+ <p>An equally simple way to compare the two confidences is to compute
564
+ their quotient. Since either the prior or the posterior confidence
565
+ can be greater (which was handled by computing the absolute value
566
+ for the previous measure), this quotient or its reciprocal, whichever
567
+ is smaller, is then compared to one. A quotient of one says that the
568
+ rule is not interesting, since the prior and the posterior confidence
569
+ are identical. The more the quotient differs from one, the more
570
+ "interesting" the rule is. Hence, just as above, a lower bound for
571
+ this difference is given via the option <tt>-d</tt>. For the bread
572
+ example, with the option <tt>-d20</tt> rules with a confidence less
573
+ than or equal to (1 -20%) *60% = 0.8 *60% = 48% or a confidence greater
574
+ than or equal to 60% / (1 -20%) = 60% / 0.8 = 75% are selected. The
575
+ difference between this measure and the absolute confidence difference
576
+ to the prior is that the deviation that is considered to be significant
577
+ depends on the prior confidence. If it is high, then the deviation of
578
+ the posterior confidence must also be high, and if it is low, then
579
+ the deviation need only be low. For example, if "-&gt; bread" had a
580
+ confidence of only 30%, then the option <tt>-d20</tt> (just as above)
581
+ would select rules the confidence of which is less than 0.8 *30% = 24%
582
+ or greater than 30% /0.8 = 37.5%. As you can see, for a prior confidence
583
+ of 60% the deviation has to be at least 12%/15%, for a prior confidence
584
+ of 30% it has to be only 6%/7.5% in order to make a rule eligible.
585
+ The idea is that an increment of the confidence from 30% to 40% is more
586
+ important than an increment from 60% to 70%, since the relative change
587
+ is greater.</p>
588
+
589
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
590
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
591
+ <td width=5></td>
592
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
593
+ </table>
594
+
595
+ <!-- =============================================================== -->
596
+
597
+ <h4><a name="improve">Absolute Difference of Improvement Value to 1
598
+ (option <tt>-ea</tt> or <tt>-e3</tt>)</a></h4>
599
+
600
+ <p>This measure is very similar to the preceding one. Actually, if
601
+ the confidence of a rule is smaller than the prior confidence, then
602
+ it coincides with it. The improvement value is simply the posterior
603
+ confidence divided by the prior confidence. It is greater than
604
+ one if the confidence increases due to the antecedent, and it is
605
+ smaller than one if the confidence decreases due to the antecedent.
606
+ By computing the absolute value of the difference to one, the
607
+ improvement value can easily be made a rule selection measure.
608
+ The advantage of this measure over the preceding one is that it is
609
+ symmetric w.r.t. changes of the confidence due to the antecedent of
610
+ a rule. For the bread example, with the option <tt>-d20</tt> rules with
611
+ a confidence less than or equal to (1 -20%) *60% = 0.8 *60% = 48% or a
612
+ confidence greater than or equal to (1 +20%) *60% = 1.2 * 60% = 72%
613
+ are selected. (Note the difference of 72% compared to 75% for the
614
+ preceding measure.) Similarly, for the second bread example
615
+ discussed above, the numbers are 0.8 *30% = 24% and 1.2 *30% = 36%.
616
+ Note that this is the only measure for which a value greater than 100
617
+ may be specified with the <tt>-d</tt> option, since it can exceed
618
+ 100% if the posterior confidence of a rule exceeds twice the prior
619
+ confidence. (I am grateful to Roland Jonscher, who pointed out this
620
+ measure to me.)</p>
621
+
622
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
623
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
624
+ <td width=5></td>
625
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
626
+ </table>
627
+
628
+ <!-- =============================================================== -->
629
+
630
+ <h4><a name="info">Information Difference to Prior
631
+ (option <tt>-ei</tt> or <tt>-e4</tt>)</a></h4>
632
+
633
+ <p>This measure is simply the information gain criterion that can be
634
+ used in decision tree learners like C4.5 to select the split attributes.
635
+ Its idea is as follows: Without any further information about other
636
+ items in the set, we have a certain probability (or, to be exact, a
637
+ relative frequency) distribution for, say "bread" and "no bread".
638
+ Let us assume it is 60% : 40% (prior confidence of the item "bread",
639
+ just as above). This distribution has a certain entropy</p>
640
+ <p>H = - P(bread) log<sub>2</sub> P(bread)
641
+ - P(no bread) log<sub>2</sub> P(no bread),</p>
642
+ <p>where P(bread) is equivalent to the support of "bread", which in
643
+ turn is equivalent to the prior confidence of "bread". The entropy of a
644
+ probability distribution is, intuitively, a lower bound on the number
645
+ of yes-no-questions you have to ask in order to determine the actual
646
+ value. This cannot be understood very well with only two possible
647
+ values, but it can be made to work for this case too. I skip the
648
+ details here, they are not that important.</p>
649
+
650
+ <p>After we get the information that the items in the antecedent of
651
+ the rule are present (say, cheese), we have a different probability
652
+ distribution, say 35% : 65%. I.e., P(bread|cheese) = 0.35 and
653
+ P(no bread|cheese) = 0.65. If we also know the support of the item
654
+ "cheese" (let it be P(cheese) = 0.4 and P(no cheese) = 0.6), then
655
+ we can also compute the probabilities P(bread|no cheese) = 0.77 and
656
+ P(no bread|no cheese) = 0.23. Hence we have two posterior probability
657
+ distributions. The question now is: How much information do we receive
658
+ from observing the antecedent of the rule? Information is measured
659
+ as a reduction of entropy. Hence the entropies of the two conditional
660
+ probability distributions (for "cheese" and "no cheese") are computed
661
+ and summed weighted with the probability of their occurrence (i.e.,
662
+ the relative frequency of "cheese" and "no cheese", respectively).
663
+ This gives the expected value of the posterior or conditional entropy.
664
+ The difference of this value to the prior entropy (see above) is the
665
+ gain in information from the antecedent of the rule or, as I called
666
+ it, the information difference to the prior.</p>
667
+
668
+ <p>The value that can be given via the <tt>-d</tt> option is a lower
669
+ bound for the information gain, measured in hundreds of a bit. Since
670
+ all items can only be present or absent, the information gain can be
671
+ at most one bit. Therefore a percent value is still reasonable.</p>
672
+
673
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
674
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
675
+ <td width=5></td>
676
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
677
+ </table>
678
+
679
+ <!-- =============================================================== -->
680
+
681
+ <h4><a name="chi2">Normalized</a> chi<sup>2</sup> Measure
682
+ (option <tt>-ec</tt> or <tt>-e5</tt>)</h4>
683
+
684
+ <p>The chi<sup>2</sup> measure is well known from statistics. It is
685
+ often used to measure the difference between a supposed independent
686
+ distribution of two discrete variables and the actual joint distribution
687
+ in order to determine how strongly two variables depend on each other.
688
+ This measure (as it is defined in statistics) contains the number of
689
+ cases it is computed from as a factor. This is not very appropriate
690
+ if one wants to evaluate rules that can have varying support. Hence
691
+ this factor is removed by simply dividing the measure by the number
692
+ of items sets (the total number, i.e. with the names used above, the
693
+ number of sets in X). With this normalization, the chi<sup>2</sup>
694
+ measure can assume values between 0 (no dependence) and 1 (very strong
695
+ dependence). The value that can be given via the <tt>-d</tt> option is
696
+ a lower bound for the strength of the dependence of the head on the
697
+ body in percent (0 - no dependence, 100 - perfect dependence). Only
698
+ those rules are selected, in which the head depends on the body with
699
+ a higher degree of dependence.</p>
700
+
701
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
702
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
703
+ <td width=5></td>
704
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
705
+ </table>
706
+
707
+ <!-- =============================================================== -->
708
+
709
+ <h4><a name="behavior">Selection Behavior of the Measures</a></h4>
710
+
711
+ <p>In the directory <tt>apriori/doc</tt> you can find a Gnuplot script
712
+ named <tt>arem.gp</tt> (<tt>arem</tt> stands for additional rule
713
+ evaluation measures) which visualizes the behavior of the additional
714
+ rule evaluation measures. This script draws eight 3d graphs, one for
715
+ the absolute confidence difference, one for the difference of the
716
+ confidence quotient to one, three for the information difference to
717
+ the prior confidence and three for the normalized chi<sup>2</sup>
718
+ measure. All graphs show the value of an additional rule evaluation
719
+ measure over a plane defined by the prior and the posterior confidence
720
+ of a rule. The latter two measures need three graphs, since they depend
721
+ on the antecedent support of a rule as a third parameter. Setting a
722
+ minimal value for an additional rule evaluation measure is like
723
+ flooding the corresponding graph landscape up to a certain level
724
+ (given as a percentage, since all considered measures assume values
725
+ between 0 and 1). Only those rules are selected that sit on dry land.
726
+ </p>
727
+
728
+ <p>The first graph shows the behavior of the absolute confidence
729
+ difference. For the diagonal, i.e. the line where the prior and the
730
+ posterior confidence are identical, its value is zero (as expected).
731
+ The more the two confidences differ, the higher the value of this
732
+ measure gets, but in a linear way.</p>
733
+
734
+ <p>The second graph shows the behavior of the confidence quotient
735
+ to one. Again its value is zero for the diagonal (as the value of
736
+ all measures is) and becomes greater the more the prior and the
737
+ posterior confidence differ. But it is much steeper for a small
738
+ prior confidence than for a large one and it is non-linear.</p>
739
+
740
+ <p>The third to fifth graph show the information difference to the
741
+ prior confidence for an antecedent support (which is identical to the
742
+ rule support in my interpretation, see above) of 0.2 (20%), 0.3 (30%)
743
+ and 0.4 (40%). The regions at the margins, where the measure is zero,
744
+ correspond to impossible combinations of prior and posterior confidence
745
+ and antecedent support. As you can see, the valley gets narrower with
746
+ increasing antecedent support. I.e., with the same minimal value for
747
+ this measure, rules with low antecedent support need a higher confidence
748
+ difference to be selected than rules with a high antecedent support.
749
+ This nicely models the statistical significance of confidence changes.
750
+ If you only have a few cases to support your rule, even a large
751
+ deviation from the prior confidence can be explained by random
752
+ fluctuations, since only a few transactions need to be different to
753
+ produce a considerable change. However, if the antecedent support
754
+ is large, even a small deviation (in percent) has to be considered
755
+ significant (non random), since it takes a lot of changes to
756
+ transactions to produce even a small change in the percentage.
757
+ This dependence on the antecedent support of the rule and that the
758
+ valley is not pointed at the diagonal (which means that even a low
759
+ minimal value can exclude a lot of rules) is the main difference
760
+ between the information gain and the normalized chi<sup>2</sup>
761
+ measure on the one hand and the absolute confidence difference and
762
+ difference of the confidence quotient to one on the other.</p>
763
+
764
+ <p>The sixth to eighth graph show the normalized chi<sup>2</sup> measure
765
+ for an antecedent support of 0.2, 0.3, and 0.4. The valleys are very
766
+ similar to those for the information difference to the prior confidence,
767
+ they only have slightly steeper flanks, especially for low antecedent
768
+ support. So in practice there is no big difference between the
769
+ information difference and the normalized chi<sup>2</sup> measure.</p>
770
+
771
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
772
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
773
+ <td width=5></td>
774
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
775
+ </table>
776
+
777
+ <!-- =============================================================== -->
778
+
779
+ <h4><a name="appear">Item Appearances</a></h4>
780
+
781
+ <p>My apriori program provides a simple way to restrict the rules to
782
+ generate w.r.t. the items that shall appear in them. It accepts a third
783
+ optional input file, in which item appearances can be given. For each
784
+ item it can be stated whether it may appear in the body (antecedent)
785
+ of a rule, in the head (consequent), or in both. A description of the
786
+ format of this additional input file, including examples, can be found
787
+ <a href="#appearin">here</a>. If no item appearances file is given, the
788
+ rule selection is not restricted. (I am grateful to the people at
789
+ Integral Solutions Ltd., who developed the well-known data mining tool
790
+ <a href="http://www.spss.com/Clementine/">Clementine</a>, but are now
791
+ part of <a href="http://www.spss.com">SPSS</a>, for requesting the
792
+ possibility to restrict item appearances.)</p>
793
+
794
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
795
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
796
+ <td width=5></td>
797
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
798
+ </table>
799
+
800
+ <!-- =============================================================== -->
801
+ <p><img src="line.gif" alt="" height=7 width=704></p>
802
+
803
+ <h3><a name="select">Extended Item Set Selection</a></h3>
804
+
805
+ <p>Since version 4.20 there are extended selection possibilities for
806
+ frequent item sets, too. (These were added due to a coopertion with
807
+ Sonja Gruen, FU Berlin.)</p>
808
+
809
+ <!-- =============================================================== -->
810
+
811
+ <h4><a name="logquot">Binary Logarithm of Support Quotient</a></h4>
812
+
813
+ <p>An expected value for the support of an item set is computed from
814
+ the support values of the individual items, assuming independence.
815
+ Then the binary logarithm of the quotient of actual support and
816
+ expected support is computed. A minimum value for this measure can
817
+ be set with the option <tt>-d</tt>. In this case only frequent item
818
+ sets for which this measure exceeds the given threshold are kept.</p>
819
+
820
+ <!-- =============================================================== -->
821
+
822
+ <h4><a name="suppquot">Difference of Support Quotient to 1</a></h4>
823
+
824
+ <p>As with the preceding measure the quotient of actual and expected
825
+ support of an item set is computed and compared to 1 (a value of 1
826
+ signifies independence of the items). A minimum value for this measure
827
+ can be set with the option <tt>-d</tt>. In this case only frequent item
828
+ sets for which this measure exceeds the given threshold are kept.</p>
829
+
830
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
831
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
832
+ <td width=5></td>
833
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
834
+ </table>
835
+
836
+ <!-- =============================================================== -->
837
+ <p><img src="line.gif" alt="" height=7 width=704></p>
838
+
839
+ <h3><a name="tatree">Transaction Prefix Tree</a></h3>
840
+
841
+ <p>The counting process can be sped up by organizing the transactions
842
+ into a prefix tree. That is, the items in each transaction are sorted
843
+ and then transactions with the same prefix are grouped together and
844
+ are counted, as one may say, in parallel. This way of organizing the
845
+ transactions was added in version 4.03 and is the default behavior now.
846
+ If you prefer that the transactions are treated individually (i.e., the
847
+ transactions are stored in a simple list and only one transaction is
848
+ counted at a time), use the option <tt>-h</tt>.</p>
849
+
850
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
851
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
852
+ <td width=5></td>
853
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
854
+ </table>
855
+
856
+ <!-- =============================================================== -->
857
+ <p><img src="line.gif" alt="" height=7 width=704></p>
858
+
859
+ <h3><a name="options">Program Invocation and Options</a></h3>
860
+
861
+ <p>My apriori program is invoked as follows:</p>
862
+ <p><tt>apriori [options] infile outfile [appfile]</tt></p>
863
+ <p>The normal arguments are:</p>
864
+ <table border=0 cellpadding=0 cellspacing=0>
865
+ <tr><td>infile</td><td width=10></td>
866
+ <td>file to read transactions from</td></tr>
867
+ <tr><td>outfile</td><td></td>
868
+ <td>file to write association rules / hyperedges to</td></tr>
869
+ <tr><td>appfile</td><td></td>
870
+ <td>file stating item appearances (optional)</td></tr>
871
+ </table>
872
+ <p>The possible options are:</p>
873
+ <table border=0 cellpadding=0 cellspacing=0>
874
+ <tr><td><tt>-t#</tt></td><td width=10></td>
875
+ <td>target type (default: association rules)</td></tr>
876
+ <tr><td><tt></tt></td><td width=10></td>
877
+ <td>(s: itemsets, c: closed itemsets, m: maximal itemsets,<br>
878
+ <font color="white">(</font>r: association rules,
879
+ h: association hyperedges)</td></tr>
880
+ <tr><td><tt>-m#</tt></td><td></td>
881
+ <td>minimal number of items per set/rule/hyperedge
882
+ (default: 1)</td></tr>
883
+ <tr><td><tt>-n#</tt></td><td></td>
884
+ <td>maximal number of items per set/rule/hyperedge
885
+ (default: 5)</td></tr>
886
+ <tr><td><tt>-s#</tt></td><td></td>
887
+ <td>minimal support of a set/rule/hyperedge
888
+ (default: 10%)</td></tr>
889
+ <tr><td><tt>-S#</tt></td><td></td>
890
+ <td>minimal support of a set/rule/hyperedge
891
+ (default: 100%)</td></tr>
892
+ <tr><td><tt>-c#</tt></td><td></td>
893
+ <td>minimal confidence of a rule/hyperedge
894
+ (default: 80%)</td></tr>
895
+ <tr><td><tt>-o</tt></td><td></td>
896
+ <td>use original definition of the support of a rule
897
+ (body & head)</td></tr>
898
+ <tr><td><tt>-k#</tt></td><td></td>
899
+ <td>item separator for output (default: " ")</td></tr>
900
+ <tr><td><tt>-p#</tt></td><td></td>
901
+ <td>output format for support/confidence (default: "%.1f%%")</td></tr>
902
+ <tr><td><tt>-x</tt></td><td></td>
903
+ <td>extended support output (print both rule support types)
904
+ </td></tr>
905
+ <tr><td><tt>-a</tt></td><td></td>
906
+ <td>print absolute support (number of transactions)</td></tr>
907
+ <tr><td><tt>-y</tt></td><td></td>
908
+ <td>print lift value (confidence divided by prior)</td></tr>
909
+ <tr><td><tt>-e#</tt></td><td></td>
910
+ <td>additional rule evaluation measure (default: none)</td></tr>
911
+ <tr><td><tt>-!</tt></td><td></td>
912
+ <td>print a list of additional rule evaluation measures</td></tr>
913
+ <tr><td><tt>-d#</tt></td><td></td>
914
+ <td>minimal value of additional evaluation measure
915
+ (default: 10%)</td></tr>
916
+ <tr><td><tt>-v</tt></td><td></td>
917
+ <td>print value of additional rule evaluation measure</td></tr>
918
+ <tr><td><tt>-g</tt></td><td></td>
919
+ <td>write output in scanable form
920
+ (quote certain characters)</td></tr>
921
+ <tr><td><tt>-l</tt></td><td></td>
922
+ <td>do not load transactions into memory
923
+ (work on input file)</td></tr>
924
+ <tr><td><tt>-q#</tt></td><td></td>
925
+ <td>sort items w.r.t. their frequency (default: 1)</td></tr>
926
+ <tr><td><tt></tt></td><td></td>
927
+ <td>(1: ascending, -1: descending, 0: do not sort,</td></tr>
928
+ <tr><td><tt></tt></td><td></td>
929
+ <td><font color="white">(</font>2: ascending, -2: descending
930
+ w.r.t. transaction size sum)</td></tr>
931
+ <tr><td><tt>-u#</tt></td><td></td>
932
+ <td>filter unused items from transactions (default: 0.5)</td></tr>
933
+ <tr><td><tt></tt></td><td></td>
934
+ <td>(0: do not filter items w.r.t. usage in item sets,<br>
935
+ &lt;0: fraction of removed items for filtering,<br>
936
+ &gt;0: take execution times ratio into account)</td></tr>
937
+ <tr><td><tt>-h</tt></td><td></td>
938
+ <td>do not organize transactions as a prefix tree</td></tr>
939
+ <tr><td><tt>-j</tt></td><td></td>
940
+ <td>use quicksort to sort the transactions (default: heapsort)
941
+ </td></tr>
942
+ <tr><td><tt>-z</tt></td><td></td>
943
+ <td>minimize memory usage (default: maximize speed)</td></tr>
944
+ <tr><td><tt>-i#</tt></td><td></td>
945
+ <td>ignore records starting with characters in the given
946
+ string</td></tr>
947
+ <tr><td valign="top"><tt>-b/f/r#</tt></td><td></td>
948
+ <td>blank characters, field and record separators</td></tr>
949
+ <tr><td><tt></tt></td><td></td>
950
+ <td>(default: "<tt> \t\r</tt>", "<tt> \t</tt>", "<tt>\n</tt>")
951
+ </td></tr>
952
+ </table>
953
+ <p>(<tt>#</tt> always means a number, a letter, or a string that
954
+ specifies the parameter of the option.)</p>
955
+ <p>Note that the effect of the option <tt>-z</tt> can depend heavily
956
+ on how the items are sorted (option <tt>-q</tt>). Highest savings
957
+ in memory usually result if items are sorted with descending
958
+ frequency (<tt>-q-1</tt>). However, this often worsens the
959
+ processing time considerably.</p>
960
+ <p>A note on the option <tt>-j</tt>: Constructing the prefix tree for
961
+ the transactions requires sorting the transactions. Since version
962
+ 4.17 heap sort is the default sorting method for the transactions,
963
+ because it turned out that in conjunction with the item sorting
964
+ (and especially for artificial datasets like T10I4D100K) quicksort
965
+ can lead to very bad processing times (almost worst case behavior,
966
+ i.e., O(n<sup>2</sup>) run time for the sorting). However, sometimes
967
+ this is not a problem and then quicksort is slightly faster, which
968
+ can be activated with the option -j.</p>
969
+
970
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
971
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
972
+ <td width=5></td>
973
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
974
+ </table>
975
+
976
+ <!-- =============================================================== -->
977
+ <p><img src="line.gif" alt="" height=7 width=704></p>
978
+
979
+ <h3><a name="input">Input Format</a></h3>
980
+
981
+ <h4><a name="transin">Format of the Transactions File</a></h4>
982
+
983
+ <p>A text file structured by field and record separators and blanks.
984
+ Record separators, not surprisingly, separate records, usually lines,
985
+ field separators fields (or columns), usually words. Blanks are used
986
+ to fill fields (columns), e.g. to align them. In the transactions
987
+ file each record must contain one transaction, i.e. a list of item
988
+ identifiers, which are separated by field separators. An empty record
989
+ is interpreted as an empty transaction.</p>
990
+
991
+ <p>Examples can be found in the directory <tt>apriori/ex</tt> in the
992
+ source package. Refer to the file <tt>apriori/ex/readme</tt>, which
993
+ explains how to process the different example files in the directory
994
+ <tt>apriori/ex</tt> in the source package.</p>
995
+
996
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
997
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
998
+ <td width=5></td>
999
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1000
+ </table>
1001
+
1002
+ <!-- =============================================================== -->
1003
+
1004
+ <h4><a name="appearin">Format of the Item Appearances File</a></h4>
1005
+
1006
+ <p>A text file structured by field and record separators and blanks.
1007
+ (Note: For this file the same field and record separators and blanks
1008
+ are used as for the transactions file.)</p>
1009
+
1010
+ <p>The first record, which must have one field, contains the default
1011
+ appearance to be used with all items not mentioned in the appearances
1012
+ file. Other records state the appearance of specific items. The first
1013
+ field states the item, the second the appearance indicator. If no
1014
+ appearance indicator is given, the item will be ignored (i.e. may
1015
+ appear neither in the body (antecedent) nor in the head (consequent)
1016
+ of a rule). Empty records are ignored.</p>
1017
+
1018
+ <p>The following appearance indicators are recognized:</p>
1019
+ <ul type=circle>
1020
+ <li>item may appear only in rule bodies (antecedents):<br>
1021
+ <tt>i in b body a ante antecedent</tt></li>
1022
+ <li>item may appear only in rule heads (consequents):<br>
1023
+ <tt>o out h head c cons consequent</tt></li>
1024
+ <li>item may appear in rule bodies (antecedents)
1025
+ or in rule heads (consequents):<br>
1026
+ <tt>io inout bh b&amp;h ac a&amp;c both</tt></li>
1027
+ <li>item may appear neither in rule bodies (antecedents)
1028
+ nor in rule heads (consequents):<br>
1029
+ <tt>n neither none ign ignore -</tt></li>
1030
+ </ul>
1031
+
1032
+ <p><b>Example 1:</b>
1033
+ Generate only rules with item "x" in the consequent.</p>
1034
+ <p><tt>in<br>
1035
+ x out</tt></p>
1036
+
1037
+ <p><b>Example 2:</b>
1038
+ Item "x" may appear only in a rule head (consequent),
1039
+ item "y" only in a rule body (antecedent);
1040
+ appearance of all other items is not restricted.</p>
1041
+ <p><tt>both<br>
1042
+ x head<br>
1043
+ y body</tt></p>
1044
+
1045
+ <p>Providing no item appearances file is equivalent to an item
1046
+ appearances file containing only an indicator like "both", which
1047
+ does not restrict the appearance of any items.</p>
1048
+
1049
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1050
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1051
+ <td width=5></td>
1052
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1053
+ </table>
1054
+
1055
+ <!-- =============================================================== -->
1056
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1057
+
1058
+ <h3><a name="output">Output Format</a></h3>
1059
+
1060
+ <h4><a name="ruleout">Output Format for Association Rules</a></h4>
1061
+
1062
+ <p>Each line of the output file contains one association rule in the
1063
+ format</p>
1064
+ <p><tt>c &lt;- a b ... (x%, y%)</tt></p>
1065
+ <p>where a, b, and c are item identifiers, and</p>
1066
+
1067
+ <table border=0 cellpadding=0 cellspacing=0>
1068
+ <tr><td valign=top>x</td><td width=10></td>
1069
+ <td>the percentage of transactions that contain all items appearing
1070
+ in the rule body (antecedent), that is, in the example above,
1071
+ a and b. (support of the rule, i.e., the support in my
1072
+ interpretation)</td>
1073
+ <tr><td valign=top>y</td><td></td>
1074
+ <td>the confidence of the rule, which is computed as the quotient of
1075
+ the percentage of transactions that contain all items appearing in
1076
+ the rule body (antecedent) and the rule head (consequent) - that is,
1077
+ in the example above, a, b, and c - and the above percentage x.</td>
1078
+ </tr>
1079
+ </table>
1080
+
1081
+ <p>If the option -o is used, x is replaced by the rule support in the
1082
+ original definition (i.e., the one used by [Agrawal et al. 1993]),
1083
+ namely the percentage of transactions that contain all items appearing
1084
+ in the rule (antecedent) and the rule head (consequent), that is, in
1085
+ the example above, a, b, and c. The value of y, however, is still
1086
+ computed from the value of x as described above.</p>
1087
+
1088
+ <p>If the option -x is given, both types of rule support (support of
1089
+ all items in the rule and support of the items in the body/antecedent
1090
+ of the rule) will be printed. The confidence of a rule (see above) is
1091
+ the quotient of the two support values (* 100%), i.e., a rule will
1092
+ be printed as</p>
1093
+ <p><tt>c &lt;- a b ... (x<sub>1</sub>%, x<sub>2</sub>%, y%)</tt></p>
1094
+ <p>where x<sub>1</sub> is the support of the set of all items in the
1095
+ rule, x<sub>2</sub> is the support of the set of items in the body
1096
+ (antecedent) of the rule, and y = x<sub>1</sub>/x<sub>2</sub> * 100%
1097
+ is the confidence of the rule.</p>
1098
+
1099
+ <p>If the option -a is given, the support percentage x is supplemented
1100
+ by the absolute number of transactions underlying it:</p>
1101
+ <p><tt>c &lt;- a b ... (x%/s, y%)</tt></p>
1102
+ <p>where s is the absolute number of transactions. If the option -x is
1103
+ given, the absolute support is printed for both types of rule support.
1104
+ </p>
1105
+
1106
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1107
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1108
+ <td width=5></td>
1109
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1110
+ </table>
1111
+
1112
+ <!-- =============================================================== -->
1113
+
1114
+ <h4><a name="setout">Output Format for Frequent Item Sets</a></h4>
1115
+
1116
+ <p>Each line of the output file contains one item set in the format</p>
1117
+ <p><tt>a b c ... (x%)</tt></p>
1118
+ <p>where a, b, and c are item identifiers and x is the percentage of
1119
+ transactions that contain this item set (item set support).</p>
1120
+
1121
+ <p>If the option -a is given, this percentage is supplemented by the
1122
+ absolute number of transactions underlying it:</p>
1123
+ <p><tt>a b c ... (x%/s)</tt></p>
1124
+ <p>where s is the absolute number of transactions.</p>
1125
+
1126
+ <p>If the option -x is given, the percentage of transactions that are
1127
+ identical to the item set is printed, too (whereas the normal support
1128
+ is the percentage of transactions that are a superset of the item set):
1129
+ </p>
1130
+ <p><tt>a b c ... (x%, %y)</tt></p>
1131
+ <p>where x is the normal item set support and y is the percentage of
1132
+ transactions identical to the item set. (This output option was added
1133
+ in response to a request by Laura Maruster.) If the option -a is also
1134
+ given, both percentages are supplemented by the absolute number of
1135
+ transactions underlying these percentages.</p>
1136
+
1137
+ <p>Note that for frequent item sets the option -x cannot be combined
1138
+ with the option -y. That is, in order to compute the second support
1139
+ measure for item sets, the transactions have to be loaded into memory.
1140
+ </p>
1141
+
1142
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1143
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1144
+ <td width=5></td>
1145
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1146
+ </table>
1147
+
1148
+ <!-- =============================================================== -->
1149
+
1150
+ <h4><a name="edgeout">Output Format for Association Hyperedges</a></h4>
1151
+
1152
+ <p>Each line of the output file contains one hyperedge the format</p>
1153
+ <p><tt>a b c ... (x%, y%)</tt></p>
1154
+ <p>where a, b, and c are item identifiers, and</p>
1155
+
1156
+ <table border=0 cellpadding=0 cellspacing=0>
1157
+ <tr><td valign=top>x</td><td width=10></td>
1158
+ <td>the percentage of transactions that contain all items appearing
1159
+ in the hyperedge, that is, in the example above, a, b, and c.</td>
1160
+ </tr>
1161
+ <tr><td valign=top>y</td><td></td>
1162
+ <td>the average confidence of all rules that can be formed using
1163
+ the items in the hyperedge with all items appearing in the rule
1164
+ (see above), i.e., for the example above, the average confidence
1165
+ of the rules c &lt;- a b, b &lt;- a c, and a &lt;- b c.</td></tr>
1166
+ </table>
1167
+
1168
+ <p>If the option -a is given, the support percentage x is supplemented
1169
+ by the absolute number of transactions underlying it:</p>
1170
+ <p><tt>a b c ... (x%/s, y%)</tt></p>
1171
+ <p>where s is the absolute number of transactions.</p>
1172
+
1173
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1174
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1175
+ <td width=5></td>
1176
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1177
+ </table>
1178
+
1179
+ <!-- =============================================================== -->
1180
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1181
+
1182
+ <h3><a name="compopt">Compilation Options</a></h3>
1183
+
1184
+ <p>The program can be compiled with two additional compilation options
1185
+ (see <tt>makefile</tt>), namely <tt>-DBENCH</tt> and <tt>-DARCH64</tt>.
1186
+ </p>
1187
+
1188
+ <p>Compiling the program with <tt>-DBENCH</tt> produces a version that
1189
+ prints some benchmark information on termination, in particular about
1190
+ the memory used during the item set tree construction (number of nodes,
1191
+ counters, necessary counters, child pointers, necessary child pointers).
1192
+ Collecting the memory usage information slightly, but negligibly
1193
+ increases the execution time.</p>
1194
+
1195
+ <p>Compiling the program with <tt>-DARCH64</tt> produces a version for
1196
+ 64 bit machines (architecture model: pointers are 64 bits, integers are
1197
+ 32 bits wide), by removing some alignment issues in the transaction and
1198
+ item set tree representations, which would otherwise lead to bus errors.
1199
+ These adaptations slightly, but negligibly increase memory consumption.
1200
+ (I am grateful to Anthony Casaletto, SPSS Inc., for helping me a lot to
1201
+ identify these alignment problems, by compiling and testing the program
1202
+ on a 64 bit machine, since I do not have access to one.)</p>
1203
+
1204
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1205
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1206
+ <td width=5></td>
1207
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1208
+ </table>
1209
+
1210
+ <!-- =============================================================== -->
1211
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1212
+
1213
+ <h3><a name="copying">Copying</a></h3>
1214
+
1215
+ <p>apriori -
1216
+ find association rules/hyperedges with apriori algorithm<br>
1217
+ copyright &copy; 1996-2003 Christian Borgelt</p>
1218
+
1219
+ <p>This program is free software; you can redistribute it and/or
1220
+ modify it under the terms of the
1221
+ <a href="http://www.fsf.org/copyleft/lesser.html">
1222
+ GNU Lesser (Library) General Public License</a> as published by the
1223
+ <a href="http://www.fsf.org">Free Software Foundation</a>.</p>
1224
+
1225
+ <p>This program is distributed in the hope that it will be useful,
1226
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1227
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1228
+ <a href="http://www.fsf.org/copyleft/lesser.html">
1229
+ GNU Lesser (Library) General Public License</a> for more details.</p>
1230
+
1231
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1232
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1233
+ <td width=5></td>
1234
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1235
+ </table>
1236
+
1237
+ <!-- =============================================================== -->
1238
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1239
+
1240
+ <h3><a name="download">Download</a></h3>
1241
+
1242
+ <p><a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/apriori.html">
1243
+ Download page</a> with most recent version.</p>
1244
+
1245
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1246
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1247
+ <td width=5></td>
1248
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1249
+ </table>
1250
+
1251
+ <!-- =============================================================== -->
1252
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1253
+
1254
+ <h3><a name="contact">Contact</a></h3>
1255
+
1256
+ <table border=0 cellpadding=0 cellspacing=0>
1257
+ <tr><td valign=top>Snail mail:</td><td width=10></td>
1258
+ <td><a href="http://fuzzy.cs.uni-magdeburg.de/~borgelt/index.html">
1259
+ Christian Borgelt</a><br>
1260
+ <a href="http://fuzzy.cs.uni-magdeburg.de/index.html">
1261
+ Working Group Neural Networks and Fuzzy Systems</a><br>
1262
+ <a href="http://www-iws.cs.uni-magdeburg.de/iws.html">
1263
+ Department of Knowledge Processing and Language Engineering</a><br>
1264
+ <a href="http://www.cs.uni-magdeburg.de/">
1265
+ School of Computer Science</a><br>
1266
+ <a href="http://www.uni-magdeburg.de/">
1267
+ Otto-von-Guericke-University of Magdeburg</a><br>
1268
+ Universit&auml;tsplatz 2<br>
1269
+ D-39106 Magdeburg<br>
1270
+ Germany</td></tr>
1271
+ <tr><td valign=top>E-mail:</td><td></td>
1272
+ <td><a href="mailto:christian.borgelt@cs.uni-magdeburg.de">
1273
+ christian.borgelt@cs.uni-magdeburg.de</a><br>
1274
+ <a href="mailto:borgelt@iws.cs.uni-magdeburg.de">
1275
+ borgelt@iws.cs.uni-magdeburg.de</a></td></tr>
1276
+ <tr><td>Phone:</td><td></td>
1277
+ <td>+49 391 67 12700</td></tr>
1278
+ <tr><td>Fax:</td><td></td>
1279
+ <td>+49 391 67 12018</td></tr>
1280
+ <tr><td>Office:</td><td></td>
1281
+ <td>29.015</td></tr>
1282
+ </table>
1283
+
1284
+ <table width="100%" border=0 cellpadding=0 cellspacing=0>
1285
+ <tr><td width="95%" align=right><a href="#top">back to the top</a></td>
1286
+ <td width=5></td>
1287
+ <td><a href="#top"><img src="uparrow.gif" border=0></a></td></tr>
1288
+ </table>
1289
+
1290
+ <!-- =============================================================== -->
1291
+ <p><img src="line.gif" alt="" height=7 width=704></p>
1292
+
1293
+ <address>&copy; 2002-2004
1294
+ <a href="mailto:borgelt@iws.cs.uni-magdeburg.de">Christian Borgelt</a>
1295
+ </address>
1296
+ <!-- Created: Thu May 24 12:28:05 CEST 2001 -->
1297
+ <!-- hhmts start -->
1298
+ Last modified: Tue Nov 23 13:49:10 CET 2004
1299
+ <!-- hhmts end -->
1300
+ </body>
1301
+ </html>