jashmenn-apriori 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +139 -0
- data/Rakefile +4 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +77 -0
- data/config/requirements.rb +15 -0
- data/examples/01_simple_example.rb +23 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +85 -0
- data/lib/apriori/version.rb +9 -0
- data/lib/apriori.rb +133 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +6 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +233 -0
- data/website/index.txt +142 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +188 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : istree.h
|
3
|
+
Contents: item set tree management
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1996.01.22 file created
|
6
|
+
1996.01.29 ISNODE.offset and ISNODE.id added
|
7
|
+
1996.02.08 ISTREE.tacnt, ISTREE.curr, ISTREE.index,
|
8
|
+
ISTREE.head and ISTREE.conf added
|
9
|
+
1996.03.28 support made relative to number of item sets
|
10
|
+
1996.11.23 ISTREE.levels (first nodes of each level) added
|
11
|
+
1996.11.24 ISTREE.arem (add. rule evaluation measure) added
|
12
|
+
1997.08.18 chi^2 evaluation measure added
|
13
|
+
parameter 'minlen' added to function ist_init()
|
14
|
+
1998.02.11 parameter 'minval' added to function ist_init()
|
15
|
+
1998.05.14 item set tree navigation functions added
|
16
|
+
1998.08.08 parameter 'apps' added to function ist_create()
|
17
|
+
1998.08.20 structure ISNODE redesigned
|
18
|
+
1998.09.07 function ist_hedge added
|
19
|
+
1998.12.08 function ist_gettac added,
|
20
|
+
float changed to double
|
21
|
+
1999.02.05 long int changed to int
|
22
|
+
1999.08.26 functions ist_first and ist_last added
|
23
|
+
1999.11.05 rule evaluation measure EM_AIMP added
|
24
|
+
1999.11.08 parameter 'aval' added to function ist_rule
|
25
|
+
2001.04.01 functions ist_set and ist_getcntx added
|
26
|
+
2001.12.28 sort function moved to module tract
|
27
|
+
2002.02.07 function ist_clear removed, ist_settac added
|
28
|
+
2002.02.11 optional use of identifier maps in nodes added
|
29
|
+
2002.02.12 ist_first and ist_last replaced by ist_next
|
30
|
+
2003.03.12 parameter lift added to function ist_rule
|
31
|
+
2003.07.17 functions ist_itemcnt and ist_check added
|
32
|
+
2003.07.18 function ist_maxfrq added (item set filter)
|
33
|
+
2003.08.11 item set filtering generalized (ist_filter)
|
34
|
+
2004.05.09 parameter 'aval' added to function ist_set
|
35
|
+
2008.03.24 creation based on ITEMSET structure
|
36
|
+
----------------------------------------------------------------------*/
|
37
|
+
#ifndef __ISTREE__
|
38
|
+
#define __ISTREE__
|
39
|
+
#include "tract.h"
|
40
|
+
|
41
|
+
/*----------------------------------------------------------------------
|
42
|
+
Preprocessor Definitions
|
43
|
+
----------------------------------------------------------------------*/
|
44
|
+
/* --- additional evaluation measures --- */
|
45
|
+
#define EM_NONE 0 /* no measure */
|
46
|
+
#define EM_DIFF 1 /* absolute conf. difference to prior */
|
47
|
+
#define EM_QUOT 2 /* difference of conf. quotient to 1 */
|
48
|
+
#define EM_AIMP 3 /* abs. diff. of improvement to 1 */
|
49
|
+
#define EM_INFO 4 /* information difference to prior */
|
50
|
+
#define EM_CHI2 5 /* normalized chi^2 measure */
|
51
|
+
#define EM_PVAL 6 /* p-value of chi^2 measure */
|
52
|
+
#define EM_UNKNOWN 7 /* unknown measure */
|
53
|
+
|
54
|
+
/* --- item appearances --- */
|
55
|
+
#define IST_IGNORE 0 /* ignore item */
|
56
|
+
#define IST_BODY 1 /* item may appear in rule body */
|
57
|
+
#define IST_HEAD 2 /* item may appear in rule head */
|
58
|
+
#define IST_BOTH (IST_HEAD|IST_BODY)
|
59
|
+
|
60
|
+
/* --- search mode flags --- */
|
61
|
+
#define IST_MEMOPT 4 /* optimize memory usage */
|
62
|
+
|
63
|
+
/* --- item set filter modes --- */
|
64
|
+
#define IST_CLEAR 0 /* clear markers */
|
65
|
+
#define IST_CLOSED 1 /* closed item sets */
|
66
|
+
#define IST_MAXFRQ 2 /* maximal item sets */
|
67
|
+
|
68
|
+
/*----------------------------------------------------------------------
|
69
|
+
Type Definitions
|
70
|
+
----------------------------------------------------------------------*/
|
71
|
+
typedef struct _isnode { /* --- item set node --- */
|
72
|
+
struct _isnode *parent; /* parent node */
|
73
|
+
struct _isnode *succ; /* successor node on same level */
|
74
|
+
int id; /* identifier used in parent node */
|
75
|
+
int chcnt; /* number of child nodes */
|
76
|
+
int size; /* size of counter vector */
|
77
|
+
int offset; /* offset of counter vector */
|
78
|
+
int cnts[1]; /* counter vector */
|
79
|
+
} ISNODE; /* (item set node) */
|
80
|
+
|
81
|
+
typedef struct { /* --- item set tree --- */
|
82
|
+
ITEMSET *set; /* underlying item set */
|
83
|
+
int mode; /* search mode (e.g. support def.) */
|
84
|
+
int tacnt; /* number of transactions */
|
85
|
+
int vsz; /* size of level vector */
|
86
|
+
int height; /* tree height (number of levels) */
|
87
|
+
ISNODE **lvls; /* first node of each level */
|
88
|
+
int rule; /* minimal support of an assoc. rule */
|
89
|
+
int supp; /* minimal support of an item set */
|
90
|
+
double conf; /* minimal confidence of a rule */
|
91
|
+
int arem; /* additional rule evaluation measure */
|
92
|
+
double minval; /* minimal evaluation measure value */
|
93
|
+
ISNODE *curr; /* current node for traversal */
|
94
|
+
int size; /* size of item set/rule/hyperedge */
|
95
|
+
ISNODE *node; /* item set node for extraction */
|
96
|
+
int index; /* index in item set node */
|
97
|
+
ISNODE *head; /* head item node for extraction */
|
98
|
+
int item; /* head item of previous rule */
|
99
|
+
int *buf; /* buffer for paths (support check) */
|
100
|
+
int *path; /* current path / (partial) item set */
|
101
|
+
int plen; /* current path length */
|
102
|
+
int hdonly; /* head only item in current set */
|
103
|
+
int *map; /* to create identifier maps */
|
104
|
+
#ifdef BENCH /* if benchmark version */
|
105
|
+
int sccnt; /* number of support counters */
|
106
|
+
int scnec; /* number of necessary supp. counters */
|
107
|
+
int cpcnt; /* number of child pointers */
|
108
|
+
int cpnec; /* number of necessary child pointers */
|
109
|
+
int bytes; /* number of bytes used */
|
110
|
+
#endif
|
111
|
+
} ISTREE; /* (item set tree) */
|
112
|
+
|
113
|
+
/*----------------------------------------------------------------------
|
114
|
+
Functions
|
115
|
+
----------------------------------------------------------------------*/
|
116
|
+
extern ISTREE* ist_create (ITEMSET *set, int mode,
|
117
|
+
int supp, double conf);
|
118
|
+
extern void ist_delete (ISTREE *ist);
|
119
|
+
extern int ist_itemcnt (ISTREE *ist);
|
120
|
+
|
121
|
+
extern void ist_count (ISTREE *ist, int *set, int cnt);
|
122
|
+
extern void ist_countx (ISTREE *ist, TATREE *tat);
|
123
|
+
extern int ist_settac (ISTREE *ist, int cnt);
|
124
|
+
extern int ist_gettac (ISTREE *ist);
|
125
|
+
extern int ist_check (ISTREE *ist, char *marks);
|
126
|
+
extern int ist_addlvl (ISTREE *ist);
|
127
|
+
extern int ist_height (ISTREE *ist);
|
128
|
+
|
129
|
+
extern void ist_up (ISTREE *ist, int root);
|
130
|
+
extern int ist_down (ISTREE *ist, int item);
|
131
|
+
extern int ist_next (ISTREE *ist, int item);
|
132
|
+
extern void ist_setcnt (ISTREE *ist, int item, int cnt);
|
133
|
+
extern int ist_getcnt (ISTREE *ist, int item);
|
134
|
+
extern int ist_getcntx (ISTREE *ist, int *set, int cnt);
|
135
|
+
|
136
|
+
extern void ist_filter (ISTREE *ist, int mode);
|
137
|
+
extern void ist_init (ISTREE *ist, int minlen,
|
138
|
+
int arem, double minval);
|
139
|
+
extern int ist_set (ISTREE *ist, int *set, int *supp,
|
140
|
+
double *aval);
|
141
|
+
extern int ist_rule (ISTREE *ist, int *rule, int *supp,
|
142
|
+
double *conf, double *lift, double *aval);
|
143
|
+
extern int ist_hedge (ISTREE *ist, int *hedge, int *supp,
|
144
|
+
double *conf, double *aval);
|
145
|
+
extern int ist_group (ISTREE *ist, int *asmb, int *supp,
|
146
|
+
double *aval);
|
147
|
+
|
148
|
+
#ifndef NDEBUG
|
149
|
+
extern void ist_show (ISTREE *ist);
|
150
|
+
#endif
|
151
|
+
|
152
|
+
/*----------------------------------------------------------------------
|
153
|
+
Preprocessor Definitions
|
154
|
+
----------------------------------------------------------------------*/
|
155
|
+
#define ist_itemcnt(t) ((t)->levels[0]->size)
|
156
|
+
#define ist_settac(t,n) ((t)->tacnt = (n))
|
157
|
+
#define ist_gettac(t) ((t)->tacnt)
|
158
|
+
#define ist_height(t) ((t)->height)
|
159
|
+
|
160
|
+
#endif
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#-----------------------------------------------------------------------
|
2
|
+
# File : makefile
|
3
|
+
# Contents: build apriori program
|
4
|
+
# Author : Christian Borgelt
|
5
|
+
# History : ??.??.1995 file created
|
6
|
+
# 1997.10.13 macro ADDFLAGS added
|
7
|
+
# 1997.12.07 minor improvements
|
8
|
+
# 1998.01.04 table scanner management added
|
9
|
+
# 1999.11.11 vector operations module added
|
10
|
+
# 2000.11.04 modules vecops, symtab, and tabscan made external
|
11
|
+
# 2001.11.18 module tract (transaction management) added
|
12
|
+
# 2003.12.12 preprocessor definition ARCH64 added
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
CC = gcc
|
15
|
+
CFBASE = -ansi -Wall -pedantic -I$(UTILDIR) -I$(MATHDIR) $(ADDFLAGS)
|
16
|
+
CFLAGS = $(CFBASE) -DNDEBUG -O3
|
17
|
+
# CFLAGS = $(CFBASE) -DNDEBUG -O3 -DBENCH
|
18
|
+
# CFLAGS = $(CFBASE) -DNDEBUG -O3 -DARCH64
|
19
|
+
# CFLAGS = $(CFBASE) -g
|
20
|
+
# CFLAGS = $(CFBASE) -g -DARCH64
|
21
|
+
# CFLAGS = $(CFBASE) -g -DSTORAGE $(ADDINC)
|
22
|
+
LDFLAGS =
|
23
|
+
LIBS = -lm
|
24
|
+
# ADDINC = -I../../misc/src
|
25
|
+
# ADDOBJ = storage.o
|
26
|
+
|
27
|
+
UTILDIR = ../../util/src
|
28
|
+
MATHDIR = ../../math/src
|
29
|
+
HDRS = $(UTILDIR)/vecops.h $(UTILDIR)/symtab.h \
|
30
|
+
$(UTILDIR)/tabscan.h $(UTILDIR)/scan.h \
|
31
|
+
$(MATHDIR)/gamma.h $(MATHDIR)/chi2.h \
|
32
|
+
tract.h istree.h
|
33
|
+
OBJS = $(UTILDIR)/vecops.o $(UTILDIR)/nimap.o \
|
34
|
+
$(UTILDIR)/tabscan.o $(UTILDIR)/scform.o \
|
35
|
+
$(MATHDIR)/gamma.o $(MATHDIR)/chi2.o \
|
36
|
+
tract.o istree.o apriori.o $(ADDOBJ)
|
37
|
+
|
38
|
+
#-----------------------------------------------------------------------
|
39
|
+
# Build Program
|
40
|
+
#-----------------------------------------------------------------------
|
41
|
+
all: apriori
|
42
|
+
|
43
|
+
apriori: $(OBJS) makefile
|
44
|
+
$(CC) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
|
45
|
+
|
46
|
+
#-----------------------------------------------------------------------
|
47
|
+
# Main Program
|
48
|
+
#-----------------------------------------------------------------------
|
49
|
+
apriori.o: tract.h istree.h $(UTILDIR)/symtab.h
|
50
|
+
apriori.o: apriori.c makefile
|
51
|
+
$(CC) $(CFLAGS) -c apriori.c -o $@
|
52
|
+
|
53
|
+
#-----------------------------------------------------------------------
|
54
|
+
# Item and Transaction Management
|
55
|
+
#-----------------------------------------------------------------------
|
56
|
+
tract.o: tract.h $(UTILDIR)/symtab.h
|
57
|
+
tract.o: tract.c makefile
|
58
|
+
$(CC) $(CFLAGS) -c tract.c -o $@
|
59
|
+
|
60
|
+
#-----------------------------------------------------------------------
|
61
|
+
# Frequent Item Set Tree Management
|
62
|
+
#-----------------------------------------------------------------------
|
63
|
+
istree.o: istree.h tract.h $(MATHDIR)/gamma.h
|
64
|
+
istree.o: istree.c makefile
|
65
|
+
$(CC) $(CFLAGS) -c istree.c -o $@
|
66
|
+
|
67
|
+
#-----------------------------------------------------------------------
|
68
|
+
# External Modules
|
69
|
+
#-----------------------------------------------------------------------
|
70
|
+
$(UTILDIR)/vecops.o:
|
71
|
+
cd $(UTILDIR); $(MAKE) vecops.o ADDFLAGS=$(ADDFLAGS)
|
72
|
+
$(UTILDIR)/nimap.o:
|
73
|
+
cd $(UTILDIR); $(MAKE) nimap.o ADDFLAGS=$(ADDFLAGS)
|
74
|
+
$(UTILDIR)/tabscan.o:
|
75
|
+
cd $(UTILDIR); $(MAKE) tabscan.o ADDFLAGS=$(ADDFLAGS)
|
76
|
+
$(UTILDIR)/scform.o:
|
77
|
+
cd $(UTILDIR); $(MAKE) scform.o ADDFLAGS=$(ADDFLAGS)
|
78
|
+
$(MATHDIR)/gamma.o:
|
79
|
+
cd $(MATHDIR); $(MAKE) gamma.o ADDFLAGS=$(ADDFLAGS)
|
80
|
+
$(MATHDIR)/chi2.o:
|
81
|
+
cd $(MATHDIR); $(MAKE) chi2.o ADDFLAGS=$(ADDFLAGS)
|
82
|
+
|
83
|
+
#-----------------------------------------------------------------------
|
84
|
+
# Storage Debugging
|
85
|
+
#-----------------------------------------------------------------------
|
86
|
+
storage.o: ../../misc/src/storage.h
|
87
|
+
storage.o: ../../misc/src/storage.c
|
88
|
+
$(CC) $(CFLAGS) -c ../../misc/src/storage.c -o $@
|
89
|
+
|
90
|
+
#-----------------------------------------------------------------------
|
91
|
+
# Install
|
92
|
+
#-----------------------------------------------------------------------
|
93
|
+
install:
|
94
|
+
cp apriori $(HOME)/bin
|
95
|
+
|
96
|
+
#-----------------------------------------------------------------------
|
97
|
+
# Clean up
|
98
|
+
#-----------------------------------------------------------------------
|
99
|
+
clean:
|
100
|
+
rm -f *.o *~ *.flc core apriori
|
101
|
+
cd $(UTILDIR); $(MAKE) clean
|
102
|
+
cd $(MATHDIR); $(MAKE) clean
|
103
|
+
|
104
|
+
localclean:
|
105
|
+
rm -f *.o *~ *.flc core apriori
|