jashmenn-apriori 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +139 -0
- data/Rakefile +4 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +77 -0
- data/config/requirements.rb +15 -0
- data/examples/01_simple_example.rb +23 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +85 -0
- data/lib/apriori/version.rb +9 -0
- data/lib/apriori.rb +133 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +6 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +233 -0
- data/website/index.txt +142 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +188 -0
@@ -0,0 +1,68 @@
|
|
1
|
+
#-----------------------------------------------------------------------
|
2
|
+
# File : arem.gp
|
3
|
+
# Contents: visualization of the selection behaviour
|
4
|
+
# of the additional rule evaluation measures
|
5
|
+
# Author : Christian Borgelt
|
6
|
+
# History : 22.09.1998 file created
|
7
|
+
#-----------------------------------------------------------------------
|
8
|
+
# set terminal postscript
|
9
|
+
|
10
|
+
set view 50,325,1
|
11
|
+
set hidden3d
|
12
|
+
set isosamples 20,20
|
13
|
+
|
14
|
+
set xrange [0.001:0.999]
|
15
|
+
set yrange [0.001:0.999]
|
16
|
+
set zrange [0:1]
|
17
|
+
|
18
|
+
min(a,b) = a < b ? a : b
|
19
|
+
|
20
|
+
diff(x,y) = abs(x -y)
|
21
|
+
|
22
|
+
quot(x,y) = 1 -min(x/y, y/x)
|
23
|
+
|
24
|
+
info(x,y,s) = (x >= y*s) && (1-x >= s *(1-y)) \
|
25
|
+
? (s*y*log(y/x) +(x-s*y)*log((x-s*y)/(x*(1-s))) \
|
26
|
+
+ s*(1-y)*log((1-y)/(1-x)) \
|
27
|
+
+ (1-x-s*(1-y))*log((1-x-s*(1-y))/((1-x)*(1-s)))) /log(2) : 0
|
28
|
+
|
29
|
+
chi2(x,y,s) = (x >= y*s) && (1-x >= s *(1-y)) \
|
30
|
+
? (x*s -y*s)**2 /(x*(1-x)*s*(1-s)) : 0
|
31
|
+
|
32
|
+
set title "d_diff (independent of antecedent support)"
|
33
|
+
set xlabel "c_prior"
|
34
|
+
set ylabel "c_post"
|
35
|
+
set zlabel "d_diff"
|
36
|
+
splot diff(x,y)
|
37
|
+
pause -1 "Hit return to continue"
|
38
|
+
|
39
|
+
set title "d_quot (independent of antecedent support)"
|
40
|
+
set zlabel "d_quot"
|
41
|
+
splot quot(x,y)
|
42
|
+
pause -1 "Hit return to continue"
|
43
|
+
|
44
|
+
set zlabel "d_info"
|
45
|
+
set title "d_info with antecedent support 0.2"
|
46
|
+
splot info(x,y,0.2)
|
47
|
+
pause -1 "Hit return to continue"
|
48
|
+
|
49
|
+
set title "d_info with antecedent support 0.3"
|
50
|
+
splot info(x,y,0.3)
|
51
|
+
pause -1 "Hit return to continue"
|
52
|
+
|
53
|
+
set title "d_info with antecedent support 0.4"
|
54
|
+
splot info(x,y,0.4)
|
55
|
+
pause -1 "Hit return to continue"
|
56
|
+
|
57
|
+
set zlabel "d_chi^2"
|
58
|
+
set title "d_chi^2 with antecedent support 0.2"
|
59
|
+
splot chi2(x,y,0.2)
|
60
|
+
pause -1 "Hit return to continue"
|
61
|
+
|
62
|
+
set title "d_chi^2 with antecedent support 0.3"
|
63
|
+
splot chi2(x,y,0.3)
|
64
|
+
pause -1 "Hit return to continue"
|
65
|
+
|
66
|
+
set title "d_chi^2 with antecedent support 0.4"
|
67
|
+
splot chi2(x,y,0.4)
|
68
|
+
pause -1 "Hit return to continue"
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#-----------------------------------------------------------------------
|
2
|
+
# File : c_rev.gp
|
3
|
+
# Contents: visualization of the dependence between posterior confidence
|
4
|
+
# and reversed confidence, i.e. the confidence of the reversed
|
5
|
+
# rule antecedent <- consequent
|
6
|
+
# Author : Christian Borgelt
|
7
|
+
# History : 23.09.1998 file created
|
8
|
+
#-----------------------------------------------------------------------
|
9
|
+
# set terminal postscript
|
10
|
+
|
11
|
+
set view 50,330,1
|
12
|
+
set xrange [0.001:0.999]
|
13
|
+
set yrange [0.001:0.999]
|
14
|
+
set zrange [0:1]
|
15
|
+
set isosamples 20
|
16
|
+
set hidden3d
|
17
|
+
|
18
|
+
set xlabel "c_post"
|
19
|
+
set ylabel "c_rev"
|
20
|
+
|
21
|
+
min(a,b) = a < b ? a : b
|
22
|
+
|
23
|
+
diff(x,y,p) = (1 -p -(y*p)/x +y*p >= 0) ? abs(x -p) : 0
|
24
|
+
|
25
|
+
quot(x,y,p) = (1 -p -(y*p)/x +y*p >= 0) ? 1 -min(x/p,p/x) : 0
|
26
|
+
|
27
|
+
i(x,y,p) = (y*p*log((y*p)/(p*(y*p)/x)) \
|
28
|
+
+ (p-y*p)*log((p-y*p)/(p*(1-(y*p)/x))) \
|
29
|
+
+ ((y*p)/x-y*p)*log(((y*p)/x-y*p)/((1-p)*(y*p)/x)) \
|
30
|
+
+ (1-p-(y*p)/x+y*p)*log((1-p-(y*p)/x+y*p) \
|
31
|
+
/((1-p)*(1-(y*p)/x)))) /log(2)
|
32
|
+
|
33
|
+
info(x,y,p) = (1 -p -(y*p)/x +y*p >= 0) ? i(x,y,p) : 0
|
34
|
+
|
35
|
+
chi2(x,y,p) = (1 -p -(y*p)/x +y*p >= 0) \
|
36
|
+
? (p*((y*p)/x)-y*p)**2 /(p*(1-p)*((y*p)/x)*(1-(y*p)/x)) : 0
|
37
|
+
|
38
|
+
set zlabel "d_diff"
|
39
|
+
set title "prior confidence 0.2"
|
40
|
+
splot diff(x,y,0.2)
|
41
|
+
pause -1 "Hit return to continue"
|
42
|
+
|
43
|
+
set title "prior confidence 0.3"
|
44
|
+
splot diff(x,y,0.3)
|
45
|
+
pause -1 "Hit return to continue"
|
46
|
+
|
47
|
+
set title "prior confidence 0.4"
|
48
|
+
splot diff(x,y,0.4)
|
49
|
+
pause -1 "Hit return to continue"
|
50
|
+
|
51
|
+
set zlabel "d_quot"
|
52
|
+
set title "prior confidence 0.2"
|
53
|
+
splot quot(x,y,0.2)
|
54
|
+
pause -1 "Hit return to continue"
|
55
|
+
|
56
|
+
set title "prior confidence 0.3"
|
57
|
+
splot quot(x,y,0.3)
|
58
|
+
pause -1 "Hit return to continue"
|
59
|
+
|
60
|
+
set title "prior confidence 0.4"
|
61
|
+
splot quot(x,y,0.4)
|
62
|
+
pause -1 "Hit return to continue"
|
63
|
+
|
64
|
+
set zlabel "d_info"
|
65
|
+
set title "prior confidence 0.2"
|
66
|
+
splot info(x,y,0.2)
|
67
|
+
pause -1 "Hit return to continue"
|
68
|
+
|
69
|
+
set title "prior confidence 0.3"
|
70
|
+
splot info(x,y,0.3)
|
71
|
+
pause -1 "Hit return to continue"
|
72
|
+
|
73
|
+
set title "prior confidence 0.4"
|
74
|
+
splot info(x,y,0.4)
|
75
|
+
pause -1 "Hit return to continue"
|
76
|
+
|
77
|
+
set zlabel "d_chi^2"
|
78
|
+
set title "prior confidence 0.2"
|
79
|
+
splot chi2(x,y,0.2)
|
80
|
+
pause -1 "Hit return to continue"
|
81
|
+
|
82
|
+
set title "prior confidence 0.3"
|
83
|
+
splot chi2(x,y,0.3)
|
84
|
+
pause -1 "Hit return to continue"
|
85
|
+
|
86
|
+
set title "prior confidence 0.4"
|
87
|
+
splot chi2(x,y,0.4)
|
88
|
+
pause -1 "Hit return to continue"
|
89
|
+
|
@@ -0,0 +1,156 @@
|
|
1
|
+
\documentclass[a4paper]{article}
|
2
|
+
\oddsidemargin 2.1mm
|
3
|
+
\textwidth 155mm
|
4
|
+
\topmargin -12mm
|
5
|
+
\textheight 230mm
|
6
|
+
|
7
|
+
\def\tabstrut{\rule{0pt}{2.4ex}}
|
8
|
+
\def\eq{\!\!\!=\!\!\!}
|
9
|
+
|
10
|
+
\begin{document}
|
11
|
+
|
12
|
+
\subsection*{The Normalized $\chi^2$ Measure
|
13
|
+
for Association Rule Evaluation}
|
14
|
+
|
15
|
+
Let $C$ and $A$ be two attributes with domains
|
16
|
+
$\mbox{dom}(A) = \{ a_1, \ldots a_{n_A} \}$ and
|
17
|
+
$\mbox{dom}(C) = \{ c_1, \ldots c_{n_C} \}$, respectively,
|
18
|
+
and let $\cal X$ be a dataset over $C$ and $A$.
|
19
|
+
Let $N_{ij}$, $1 \le i \le n_C$, $1 \le j \le n_A$, be the number of
|
20
|
+
sample cases in $\cal X$, which contain both the attribute values~$c_i$
|
21
|
+
and $a_j$. Furthermore, let
|
22
|
+
\[ N_{i.} = \sum_{j=1}^{n_A} N_{ij}, \qquad
|
23
|
+
N_{.j} = \sum_{i=1}^{n_C} N_{ij}, \qquad\mbox{and}\qquad
|
24
|
+
N_{..} = \sum_{i=1}^{n_C} \sum_{j=1}^{n_A} N_{ij} = |{\cal X}|. \]
|
25
|
+
Finally, let
|
26
|
+
\[ p_{i.} = \frac{N_{i.}}{N_{..}}, \qquad
|
27
|
+
p_{.j} = \frac{N_{.j}}{N_{..}}, \qquad\mbox{and}\qquad
|
28
|
+
p_{ij} = \frac{N_{ij}}{N_{..}} \]
|
29
|
+
be the probabilities of the attribute values and their combinations,
|
30
|
+
as they can be estimated from these numbers. Then the well-known
|
31
|
+
$\chi^2$ measure is usually defined as
|
32
|
+
\begin{eqnarray*}
|
33
|
+
\chi^2(C,A)
|
34
|
+
& = & \sum_{i=1}^{n_C} \sum_{j=1}^{n_A}
|
35
|
+
\frac{(E_{ij} -N_{ij})^2}{E_{ij}}
|
36
|
+
\qquad\mbox{where}\quad E_{ij} = \frac{N_{i.}N_{.j}}{N_{..}} \\
|
37
|
+
& = & \sum_{i=1}^{n_C} \sum_{j=1}^{n_A}
|
38
|
+
\frac{\left(\frac{N_{i.}N_{.j}}{N_{..}} -N_{ij}\right)^2}
|
39
|
+
{\frac{N_{i.}N_{.j}}{N_{..}}}
|
40
|
+
~~=~~ \sum_{i=1}^{n_C} \sum_{j=1}^{n_A}
|
41
|
+
\frac{N_{..}^2 \left(\frac{N_{i.\phantom{j}}}{N_{..}}
|
42
|
+
\frac{N_{.j}}{N_{..}}
|
43
|
+
- \frac{N_{ij}}{N_{..}}\right)^2}
|
44
|
+
{N_{..}\; \frac{N_{i.\phantom{j}}}{N_{..}}
|
45
|
+
\frac{N_{.j}}{N_{..}}} \\
|
46
|
+
& = & N_{..} \sum_{i=1}^{n_C} \sum_{j=1}^{n_A}
|
47
|
+
\frac{(p_{i.}\;p_{.j} - p_{ij})^2}{p_{i.}\;p_{.j}}
|
48
|
+
~~=~~ N_{..} \sum_{i=1}^{n_C} \sum_{j=1}^{n_A}
|
49
|
+
\frac{(N_{i.}\;N_{.j} - N_{..}N_{ij})^2}{N_{i.}\;N_{.j}}.
|
50
|
+
\end{eqnarray*}
|
51
|
+
This measure is often normalized by dividing it by the
|
52
|
+
size~$N_{..} = |{\cal X}|$ of the dataset to remove the
|
53
|
+
dependence on the number of sample cases.
|
54
|
+
|
55
|
+
For association rule evaluation, $C$ refers the consequent and $A$ to
|
56
|
+
the antecedent of the rule. Both have two values, which we denote by
|
57
|
+
$c_0$, $c_1$ and $a_0$, $a_1$, respectively. $c_0$ means that the
|
58
|
+
consequent of the rule is not satisfied, $c_1$ that it is satisfied;
|
59
|
+
likewise for $A$. Then we have to compute the $\chi^2$ measure from
|
60
|
+
the $2 \times 2$ contingency table
|
61
|
+
\begin{center}
|
62
|
+
\begin{tabular}{|l|c|c|l|} \cline{2-3}
|
63
|
+
\multicolumn{1}{l|}{}
|
64
|
+
& $a_0$ & $a_1$ \\ \hline
|
65
|
+
$c_0$ & $N_{00}$ & $N_{01}$ & $N_{0.}$\tabstrut \\ \hline
|
66
|
+
$c_1$ & $N_{10}$ & $N_{11}$ & $N_{1.}$\tabstrut \\ \hline
|
67
|
+
\multicolumn{1}{l|}{}
|
68
|
+
& $N_{.0}$ & $N_{.1}$ & $N_{..}$\tabstrut \\ \cline{2-4}
|
69
|
+
\end{tabular}
|
70
|
+
\end{center}
|
71
|
+
or the estimated probability table
|
72
|
+
\begin{center}
|
73
|
+
\begin{tabular}{|l|c|c|l|} \cline{2-3}
|
74
|
+
\multicolumn{1}{l|}{}
|
75
|
+
& $a_0$ & $a_1$ \\ \hline
|
76
|
+
$c_0$ & $p_{00}$ & $p_{01}$ & $p_{0.}$\tabstrut \\ \hline
|
77
|
+
$c_1$ & $p_{10}$ & $p_{11}$ & $p_{1.}$\tabstrut \\ \hline
|
78
|
+
\multicolumn{1}{l|}{}
|
79
|
+
& $p_{.0}$ & $p_{.1}$ & $1$\tabstrut \\ \cline{2-4}
|
80
|
+
\end{tabular}
|
81
|
+
\end{center}
|
82
|
+
That is, we have
|
83
|
+
\begin{eqnarray*}
|
84
|
+
\frac{\chi^2(C,A)}{N_{..}}
|
85
|
+
& = & \sum_{i=0}^1 \sum_{j=0}^1
|
86
|
+
\frac{(p_{i.}\;p_{.j} - p_{ij})^2}{p_{i.}\;p_{.j}}. \\
|
87
|
+
& = & \frac{(p_{0.}\;p_{.0} -p_{00})^2}{p_{0.}\;p_{.0}}
|
88
|
+
+ \frac{(p_{0.}\;p_{.1} -p_{01})^2}{p_{0.}\;p_{.1}}
|
89
|
+
+ \frac{(p_{1.}\;p_{.0} -p_{10})^2}{p_{1.}\;p_{.0}}
|
90
|
+
+ \frac{(p_{1.}\;p_{.1} -p_{11})^2}{p_{1.}\;p_{.1}}
|
91
|
+
\end{eqnarray*}
|
92
|
+
Now we can exploit
|
93
|
+
\[ p_{00} + p_{01} = p_{0.}, \quad
|
94
|
+
p_{10} + p_{10} = p_{1.}, \quad
|
95
|
+
p_{00} + p_{10} = p_{.0}, \quad
|
96
|
+
p_{01} + p_{11} = p_{.1}, \quad
|
97
|
+
p_{0.} + p_{1.} = 1, \quad
|
98
|
+
p_{.0} + p_{.1} = 1, \]
|
99
|
+
which leads to
|
100
|
+
\begin{eqnarray*}
|
101
|
+
p_{0.}\;p_{.0} -p_{00}
|
102
|
+
& = & (1 -p_{1.})(1 -p_{.1}) -(1 -p_{1.} -p_{.1} +p_{11})
|
103
|
+
~~=~~ p_{1.}\;p_{.1} -p_{11}, \\
|
104
|
+
p_{0.}\;p_{.1} -p_{01}
|
105
|
+
& = & (1 -p_{1.})p_{.1} -(p_{.1} -p_{11})
|
106
|
+
~~=~~ p_{11} -p_{1.}\;p_{.1}, \\
|
107
|
+
p_{1.}\;p_{.0} -p_{10}
|
108
|
+
& = & p_{1.}(1 -p_{.1}) -(p_{1.} -p_{11})
|
109
|
+
~~=~~ p_{11} -p_{1.}\;p_{.1}. \\
|
110
|
+
\end{eqnarray*}
|
111
|
+
Therefore it is
|
112
|
+
\begin{eqnarray*}
|
113
|
+
\frac{\chi^2(C,A)}{N_{..}}
|
114
|
+
& = & \frac{(p_{1.}\;p_{.1} -p_{11})^2}{(1 -p_{1.})(1 -p_{.1})}
|
115
|
+
+ \frac{(p_{1.}\;p_{.1} -p_{11})^2}{(1 -p_{1.})\;p_{.1}}
|
116
|
+
+ \frac{(p_{1.}\;p_{.1} -p_{11})^2}{p_{1.}(1 -p_{.1})}
|
117
|
+
+ \frac{(p_{1.}\;p_{.1} -p_{11})^2}{p_{1.}\;p_{.1}} \\
|
118
|
+
& = & \frac{(p_{1.}\;p_{.1} -p_{11})^2
|
119
|
+
(p_{1.}\;p_{.1}
|
120
|
+
+p_{1.}(1 -p_{.1})
|
121
|
+
+(1 -p_{1.})p_{.1}
|
122
|
+
+(1 -p_{1.})(1 -p_{.1}))}
|
123
|
+
{p_{1.}(1 -p_{1.})p_{.1}(1 -p_{.1})} \\
|
124
|
+
& = & \frac{(p_{1.}\;p_{.1} -p_{11})^2
|
125
|
+
(p_{1.}\;p_{.1}
|
126
|
+
+p_{1.} -p_{1.}\;p_{.1}
|
127
|
+
+p_{.1} -p_{1.}\;p_{.1}
|
128
|
+
+1 -p_{1.} -p_{.1} +p_{1.}\;p_{.1})}
|
129
|
+
{p_{1.}(1 -p_{1.})p_{.1}(1 -p_{.1})} \\
|
130
|
+
& = & \frac{(p_{1.}\;p_{.1} -p_{11})^2}
|
131
|
+
{p_{1.}(1 -p_{1.})p_{.1}(1 -p_{.1})}.
|
132
|
+
\end{eqnarray*}
|
133
|
+
In the program, $p_{1.}$ (argument {\tt head}), $p_{.1}$
|
134
|
+
(argument {\tt body}) and $p_{1|1} = \frac{p_{11}}{p_{.1}}$
|
135
|
+
(argument {\tt post}, rule confidence) are passed to the routine
|
136
|
+
that computes the measure, so the actual computation is
|
137
|
+
\begin{eqnarray*}
|
138
|
+
\frac{\chi^2(C,A)}{N_{..}}
|
139
|
+
& = & \frac{(p_{1.}\;p_{.1} -p_{1|1}\;p_{.1})^2}
|
140
|
+
{p_{1.}(1 -p_{1.})p_{.1}(1 -p_{.1})}.
|
141
|
+
~~=~~ \frac{((p_{1.} -p_{1|1})p_{.1})^2}
|
142
|
+
{p_{1.}(1 -p_{1.})p_{.1}(1 -p_{.1})}.
|
143
|
+
\end{eqnarray*}
|
144
|
+
In an analogous way the measure can also be computed from the absolute
|
145
|
+
frequencies $N_{ij}$, $N_{i.}$, $N_{.j}$ and $N_{..}$, namely as
|
146
|
+
\begin{eqnarray*}
|
147
|
+
\frac{\chi^2(C,A)}{N_{..}}
|
148
|
+
& = & \frac{(N_{1.}N_{.1} -N_{..}N_{11})^2}
|
149
|
+
{N_{1.}(N_{..} -N_{1.})N_{.1}(N_{..} -N_{.1})}.
|
150
|
+
\end{eqnarray*}
|
151
|
+
\end{document}
|
152
|
+
|
153
|
+
%%% Local Variables:
|
154
|
+
%%% mode: latex
|
155
|
+
%%% TeX-master: t
|
156
|
+
%%% End:
|