jashmenn-apriori 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +139 -0
- data/Rakefile +4 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +77 -0
- data/config/requirements.rb +15 -0
- data/examples/01_simple_example.rb +23 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +85 -0
- data/lib/apriori/version.rb +9 -0
- data/lib/apriori.rb +133 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +6 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +233 -0
- data/website/index.txt +142 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +188 -0
@@ -0,0 +1,261 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : tract.h
|
3
|
+
Contents: item and transaction management
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 2001.11.18 file created from file apriori.c
|
6
|
+
2001.12.28 first version completed
|
7
|
+
2001.01.02 ta_sort mapped to v_intsort
|
8
|
+
2002.02.19 transaction tree functions added
|
9
|
+
2003.07.17 functions is_filter, ta_filter, tas_filter added
|
10
|
+
2003.08.21 parameter 'heap' added to tas_sort, tat_create
|
11
|
+
2003.09.12 function tas_total added
|
12
|
+
2003.09.20 empty transactions in input made possible
|
13
|
+
2004.11.20 function tat_mark added
|
14
|
+
2004.12.11 access functions for extended frequency added
|
15
|
+
2004.12.15 function nim_trunc added
|
16
|
+
2006.11.26 structures ISFMTR and ISEVAL added
|
17
|
+
2007.02.13 adapted to modified tabscan module
|
18
|
+
2008.06.30 support argument to ise_eval changed to double
|
19
|
+
----------------------------------------------------------------------*/
|
20
|
+
#ifndef __TRACT__
|
21
|
+
#define __TRACT__
|
22
|
+
#ifndef NIMAPFN
|
23
|
+
#define NIMAPFN
|
24
|
+
#endif
|
25
|
+
#include "vecops.h"
|
26
|
+
#include "symtab.h"
|
27
|
+
#include "tabscan.h"
|
28
|
+
|
29
|
+
/*----------------------------------------------------------------------
|
30
|
+
Preprocessor Definitions
|
31
|
+
----------------------------------------------------------------------*/
|
32
|
+
/* --- item appearance flags --- */
|
33
|
+
#define APP_NONE 0x00 /* item should be ignored */
|
34
|
+
#define APP_BODY 0x01 /* item may appear in rule body */
|
35
|
+
#define APP_HEAD 0x02 /* item may appear in rule head */
|
36
|
+
#define APP_BOTH (APP_HEAD|APP_BODY)
|
37
|
+
|
38
|
+
/* --- error codes --- */
|
39
|
+
#define E_NONE 0 /* no error */
|
40
|
+
#define E_NOMEM (-1) /* not enough memory */
|
41
|
+
#define E_FOPEN (-2) /* cannot open file */
|
42
|
+
#define E_FREAD (-3) /* read error on file */
|
43
|
+
#define E_FWRITE (-4) /* write error on file */
|
44
|
+
|
45
|
+
#define E_ITEMEXP (-16) /* item expected */
|
46
|
+
#define E_DUPITEM (-17) /* duplicate item */
|
47
|
+
#define E_APPEXP (-18) /* appearance indicator expected */
|
48
|
+
#define E_UNKAPP (-19) /* unknown appearance indicator */
|
49
|
+
#define E_FLDCNT (-20) /* too many fields */
|
50
|
+
|
51
|
+
/*----------------------------------------------------------------------
|
52
|
+
Type Definitions
|
53
|
+
----------------------------------------------------------------------*/
|
54
|
+
typedef struct { /* --- an item --- */
|
55
|
+
int id; /* item identifier */
|
56
|
+
int frq; /* frequency in transactions */
|
57
|
+
int xfq; /* extended frequency (t.a. sizes) */
|
58
|
+
int app; /* appearance indicator */
|
59
|
+
} ITEM; /* (item) */
|
60
|
+
|
61
|
+
typedef struct { /* --- an item set --- */
|
62
|
+
NIMAP *nimap; /* name/identifier map */
|
63
|
+
TABSCAN *tscan; /* table scanner */
|
64
|
+
char chars[4]; /* special characters */
|
65
|
+
int tac; /* transaction counter */
|
66
|
+
int app; /* default appearance indicator */
|
67
|
+
int vsz; /* size of transaction buffer */
|
68
|
+
int cnt; /* number of items in transaction */
|
69
|
+
int *items; /* items in transaction */
|
70
|
+
} ITEMSET; /* (item set) */
|
71
|
+
|
72
|
+
typedef struct { /* --- an item set evaluator --- */
|
73
|
+
double logta; /* logarithm of num. of transactions */
|
74
|
+
double *logfs; /* logarithms of item frequencies */
|
75
|
+
double lsums[1]; /* sums of logarithms for prefixes */
|
76
|
+
} ISEVAL; /* (item set evaluator) */
|
77
|
+
|
78
|
+
typedef struct { /* --- item set formatter --- */
|
79
|
+
int cnt; /* number of formatted item names */
|
80
|
+
int len; /* length of description in buffer */
|
81
|
+
int *offs; /* prefix lengths in output buffer */
|
82
|
+
char *buf; /* output buffer */
|
83
|
+
const char *names[1]; /* formatted item names */
|
84
|
+
} ISFMTR; /* (item set formatter) */
|
85
|
+
|
86
|
+
typedef struct { /* --- a transaction --- */
|
87
|
+
int cnt; /* number of items */
|
88
|
+
int items[1]; /* item identifier vector */
|
89
|
+
} TRACT; /* (transaction) */
|
90
|
+
|
91
|
+
typedef struct { /* --- a transaction set --- */
|
92
|
+
ITEMSET *itemset; /* underlying item set */
|
93
|
+
int max; /* maximum number of items per t.a. */
|
94
|
+
int vsz; /* size of transaction vector */
|
95
|
+
int cnt; /* number of transactions */
|
96
|
+
int total; /* total number of items */
|
97
|
+
TRACT **tracts; /* transaction vector */
|
98
|
+
} TASET; /* (transaction set) */
|
99
|
+
|
100
|
+
typedef struct _tatree { /* --- a transaction tree (node) --- */
|
101
|
+
int cnt; /* number of transactions */
|
102
|
+
int max; /* size of largest transaction */
|
103
|
+
int size; /* node size (number of children) */
|
104
|
+
int items[1]; /* next items in rep. transactions */
|
105
|
+
} TATREE; /* (transaction tree) */
|
106
|
+
|
107
|
+
/*----------------------------------------------------------------------
|
108
|
+
Item Set Functions
|
109
|
+
----------------------------------------------------------------------*/
|
110
|
+
extern ITEMSET* is_create (int cnt);
|
111
|
+
extern void is_delete (ITEMSET *iset);
|
112
|
+
extern TABSCAN* is_tabscan (ITEMSET *iset);
|
113
|
+
extern void is_chars (ITEMSET *iset, const char *blanks,
|
114
|
+
const char *fldseps,
|
115
|
+
const char *recseps,
|
116
|
+
const char *cominds);
|
117
|
+
|
118
|
+
extern int is_cnt (ITEMSET *iset);
|
119
|
+
extern int is_item (ITEMSET *iset, const char *name);
|
120
|
+
extern const char* is_name (ITEMSET *iset, int item);
|
121
|
+
|
122
|
+
extern int is_gettac (ITEMSET *iset);
|
123
|
+
extern int is_settac (ITEMSET *iset, int cnt);
|
124
|
+
extern int is_addtac (ITEMSET *iset, int cnt);
|
125
|
+
extern int is_getfrq (ITEMSET *iset, int item);
|
126
|
+
extern int is_setfrq (ITEMSET *iset, int item, int frq);
|
127
|
+
extern int is_addfrq (ITEMSET *iset, int item, int frq);
|
128
|
+
extern int is_getxfq (ITEMSET *iset, int item);
|
129
|
+
extern int is_setxfq (ITEMSET *iset, int item, int frq);
|
130
|
+
extern int is_getapp (ITEMSET *iset, int item);
|
131
|
+
extern int is_setapp (ITEMSET *iset, int item, int app);
|
132
|
+
|
133
|
+
extern int is_readapp (ITEMSET *iset, FILE *file);
|
134
|
+
extern int is_read (ITEMSET *iset, FILE *file);
|
135
|
+
|
136
|
+
extern int is_recode (ITEMSET *iset, int minfrq,
|
137
|
+
int dir, int *map);
|
138
|
+
extern void is_trunc (ITEMSET *iset, int cnt);
|
139
|
+
extern int is_filter (ITEMSET *iset, const char *marks);
|
140
|
+
extern int is_tsize (ITEMSET *iset);
|
141
|
+
extern int* is_tract (ITEMSET *iset);
|
142
|
+
|
143
|
+
/*----------------------------------------------------------------------
|
144
|
+
Item Set Evaluation Functions
|
145
|
+
----------------------------------------------------------------------*/
|
146
|
+
extern ISEVAL* ise_create (ITEMSET *iset, int tacnt);
|
147
|
+
extern void ise_delete (ISEVAL *eval);
|
148
|
+
extern double ise_eval (ISEVAL *eval, int *ids, int cnt, int pre,
|
149
|
+
double supp);
|
150
|
+
|
151
|
+
/*----------------------------------------------------------------------
|
152
|
+
Item Set Formatting Functions
|
153
|
+
----------------------------------------------------------------------*/
|
154
|
+
extern ISFMTR* isf_create (ITEMSET *iset, int scan);
|
155
|
+
extern void isf_delete (ISFMTR *fmt);
|
156
|
+
extern const char* isf_format (ISFMTR *fmt, int *ids, int cnt, int pre);
|
157
|
+
extern int isf_length (ISFMTR *fmt);
|
158
|
+
extern void isf_print (ISFMTR *fmt, FILE *out);
|
159
|
+
|
160
|
+
/*----------------------------------------------------------------------
|
161
|
+
Transaction Functions
|
162
|
+
----------------------------------------------------------------------*/
|
163
|
+
extern void ta_sort (int *items, int n);
|
164
|
+
extern int ta_unique (int *items, int n);
|
165
|
+
extern int ta_filter (int *items, int n, const char *marks);
|
166
|
+
|
167
|
+
/*----------------------------------------------------------------------
|
168
|
+
Transaction Set Functions
|
169
|
+
----------------------------------------------------------------------*/
|
170
|
+
extern TASET* tas_create (ITEMSET *itemset);
|
171
|
+
extern void tas_delete (TASET *taset, int delis);
|
172
|
+
extern ITEMSET* tas_itemset (TASET *taset);
|
173
|
+
|
174
|
+
extern int tas_cnt (TASET *taset);
|
175
|
+
extern int tas_add (TASET *taset, const int *items, int n);
|
176
|
+
extern int* tas_tract (TASET *taset, int index);
|
177
|
+
extern int tas_tsize (TASET *taset, int index);
|
178
|
+
extern int tas_total (TASET *taset);
|
179
|
+
|
180
|
+
extern void tas_recode (TASET *taset, int *map, int cnt);
|
181
|
+
extern int tas_filter (TASET *taset, const char *marks);
|
182
|
+
extern void tas_shuffle (TASET *taset, double randfn(void));
|
183
|
+
extern void tas_sort (TASET *taset, int heap);
|
184
|
+
extern int tas_occur (TASET *taset, const int *items, int n);
|
185
|
+
|
186
|
+
#ifndef NDEBUG
|
187
|
+
extern void tas_show (TASET *taset);
|
188
|
+
#endif
|
189
|
+
|
190
|
+
/*----------------------------------------------------------------------
|
191
|
+
Transaction Tree Functions
|
192
|
+
----------------------------------------------------------------------*/
|
193
|
+
extern TATREE* tat_create (TASET *taset, int heap);
|
194
|
+
extern void tat_delete (TATREE *tat);
|
195
|
+
extern int tat_cnt (TATREE *tat);
|
196
|
+
extern int tat_max (TATREE *tat);
|
197
|
+
extern int tat_size (TATREE *tat);
|
198
|
+
extern int* tat_items (TATREE *tat);
|
199
|
+
extern int tat_item (TATREE *tat, int index);
|
200
|
+
extern TATREE* tat_child (TATREE *tat, int index);
|
201
|
+
extern void tat_mark (TATREE *tat);
|
202
|
+
|
203
|
+
#ifndef NDEBUG
|
204
|
+
extern void tat_show (TATREE *tat);
|
205
|
+
#endif
|
206
|
+
|
207
|
+
/*----------------------------------------------------------------------
|
208
|
+
Preprocessor Definitions
|
209
|
+
----------------------------------------------------------------------*/
|
210
|
+
#define is_tabscan(s) ((s)->tscan)
|
211
|
+
|
212
|
+
#define is_cnt(s) nim_cnt((s)->nimap)
|
213
|
+
#define is_name(s,i) nim_name(nim_byid((s)->nimap, i))
|
214
|
+
#define is_gettac(s) ((s)->tac)
|
215
|
+
#define is_settac(s,n) ((s)->tac = (n))
|
216
|
+
#define is_addtac(s,n) ((s)->tac += (n))
|
217
|
+
#define is_getfrq(s,i) (((ITEM*)nim_byid((s)->nimap, i))->frq)
|
218
|
+
#define is_setfrq(s,i,f) (((ITEM*)nim_byid((s)->nimap, i))->frq = (f))
|
219
|
+
#define is_addfrq(s,i,f) (((ITEM*)nim_byid((s)->nimap, i))->frq += (f))
|
220
|
+
#define is_getxfq(s,i) (((ITEM*)nim_byid((s)->nimap, i))->xfq)
|
221
|
+
#define is_setxfq(s,i,f) (((ITEM*)nim_byid((s)->nimap, i))->xfq = (f))
|
222
|
+
#define is_getapp(s,i) (((ITEM*)nim_byid((s)->nimap, i))->app)
|
223
|
+
#define is_setapp(s,i,a) (((ITEM*)nim_byid((s)->nimap, i))->app = (a))
|
224
|
+
|
225
|
+
#define is_trunc(s,n) nim_trunc((s)->nimap, n)
|
226
|
+
|
227
|
+
#define is_tsize(s) ((s)->cnt)
|
228
|
+
#define is_tract(s) ((s)->items)
|
229
|
+
|
230
|
+
/*--------------------------------------------------------------------*/
|
231
|
+
#define ise_delete(e) free(e)
|
232
|
+
|
233
|
+
/*--------------------------------------------------------------------*/
|
234
|
+
#define isf_length(f) ((f)->len)
|
235
|
+
#define isf_print(f,o) fwrite((f)->buf, sizeof(char), (f)->len, o)
|
236
|
+
|
237
|
+
/*--------------------------------------------------------------------*/
|
238
|
+
#define ta_sort(v,n) v_intsort(v,n)
|
239
|
+
|
240
|
+
/*--------------------------------------------------------------------*/
|
241
|
+
#define tas_itemset(s) ((s)->itemset)
|
242
|
+
#define tas_cnt(s) ((s)->cnt)
|
243
|
+
#define tas_max(s) ((s)->max)
|
244
|
+
|
245
|
+
#define tas_tract(s,i) ((s)->tracts[i]->items)
|
246
|
+
#define tas_tsize(s,i) ((s)->tracts[i]->cnt)
|
247
|
+
#define tas_total(s) ((s)->total)
|
248
|
+
|
249
|
+
#define tas_shuffle(s,f) v_shuffle((s)->tracts, (s)->cnt, f)
|
250
|
+
|
251
|
+
/*--------------------------------------------------------------------*/
|
252
|
+
#define tat_cnt(t) ((t)->cnt)
|
253
|
+
#define tat_max(t) ((t)->max)
|
254
|
+
#define tat_size(t) ((t)->size)
|
255
|
+
#define tat_item(t,i) ((t)->items[i])
|
256
|
+
#define tat_items(t) ((t)->items)
|
257
|
+
#ifndef ARCH64
|
258
|
+
#define tat_child(t,i) (((TATREE**)((t)->items +(t)->size))[i])
|
259
|
+
#endif
|
260
|
+
|
261
|
+
#endif
|