jashmenn-apriori 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +139 -0
  5. data/Rakefile +4 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +77 -0
  12. data/config/requirements.rb +15 -0
  13. data/examples/01_simple_example.rb +23 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori/adapter.rb +13 -0
  95. data/lib/apriori/association_rule.rb +85 -0
  96. data/lib/apriori/version.rb +9 -0
  97. data/lib/apriori.rb +133 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +6 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +233 -0
  118. data/website/index.txt +142 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +188 -0
@@ -0,0 +1,261 @@
1
+ /*----------------------------------------------------------------------
2
+ File : tract.h
3
+ Contents: item and transaction management
4
+ Author : Christian Borgelt
5
+ History : 2001.11.18 file created from file apriori.c
6
+ 2001.12.28 first version completed
7
+ 2001.01.02 ta_sort mapped to v_intsort
8
+ 2002.02.19 transaction tree functions added
9
+ 2003.07.17 functions is_filter, ta_filter, tas_filter added
10
+ 2003.08.21 parameter 'heap' added to tas_sort, tat_create
11
+ 2003.09.12 function tas_total added
12
+ 2003.09.20 empty transactions in input made possible
13
+ 2004.11.20 function tat_mark added
14
+ 2004.12.11 access functions for extended frequency added
15
+ 2004.12.15 function nim_trunc added
16
+ 2006.11.26 structures ISFMTR and ISEVAL added
17
+ 2007.02.13 adapted to modified tabscan module
18
+ 2008.06.30 support argument to ise_eval changed to double
19
+ ----------------------------------------------------------------------*/
20
+ #ifndef __TRACT__
21
+ #define __TRACT__
22
+ #ifndef NIMAPFN
23
+ #define NIMAPFN
24
+ #endif
25
+ #include "vecops.h"
26
+ #include "symtab.h"
27
+ #include "tabscan.h"
28
+
29
+ /*----------------------------------------------------------------------
30
+ Preprocessor Definitions
31
+ ----------------------------------------------------------------------*/
32
+ /* --- item appearance flags --- */
33
+ #define APP_NONE 0x00 /* item should be ignored */
34
+ #define APP_BODY 0x01 /* item may appear in rule body */
35
+ #define APP_HEAD 0x02 /* item may appear in rule head */
36
+ #define APP_BOTH (APP_HEAD|APP_BODY)
37
+
38
+ /* --- error codes --- */
39
+ #define E_NONE 0 /* no error */
40
+ #define E_NOMEM (-1) /* not enough memory */
41
+ #define E_FOPEN (-2) /* cannot open file */
42
+ #define E_FREAD (-3) /* read error on file */
43
+ #define E_FWRITE (-4) /* write error on file */
44
+
45
+ #define E_ITEMEXP (-16) /* item expected */
46
+ #define E_DUPITEM (-17) /* duplicate item */
47
+ #define E_APPEXP (-18) /* appearance indicator expected */
48
+ #define E_UNKAPP (-19) /* unknown appearance indicator */
49
+ #define E_FLDCNT (-20) /* too many fields */
50
+
51
+ /*----------------------------------------------------------------------
52
+ Type Definitions
53
+ ----------------------------------------------------------------------*/
54
+ typedef struct { /* --- an item --- */
55
+ int id; /* item identifier */
56
+ int frq; /* frequency in transactions */
57
+ int xfq; /* extended frequency (t.a. sizes) */
58
+ int app; /* appearance indicator */
59
+ } ITEM; /* (item) */
60
+
61
+ typedef struct { /* --- an item set --- */
62
+ NIMAP *nimap; /* name/identifier map */
63
+ TABSCAN *tscan; /* table scanner */
64
+ char chars[4]; /* special characters */
65
+ int tac; /* transaction counter */
66
+ int app; /* default appearance indicator */
67
+ int vsz; /* size of transaction buffer */
68
+ int cnt; /* number of items in transaction */
69
+ int *items; /* items in transaction */
70
+ } ITEMSET; /* (item set) */
71
+
72
+ typedef struct { /* --- an item set evaluator --- */
73
+ double logta; /* logarithm of num. of transactions */
74
+ double *logfs; /* logarithms of item frequencies */
75
+ double lsums[1]; /* sums of logarithms for prefixes */
76
+ } ISEVAL; /* (item set evaluator) */
77
+
78
+ typedef struct { /* --- item set formatter --- */
79
+ int cnt; /* number of formatted item names */
80
+ int len; /* length of description in buffer */
81
+ int *offs; /* prefix lengths in output buffer */
82
+ char *buf; /* output buffer */
83
+ const char *names[1]; /* formatted item names */
84
+ } ISFMTR; /* (item set formatter) */
85
+
86
+ typedef struct { /* --- a transaction --- */
87
+ int cnt; /* number of items */
88
+ int items[1]; /* item identifier vector */
89
+ } TRACT; /* (transaction) */
90
+
91
+ typedef struct { /* --- a transaction set --- */
92
+ ITEMSET *itemset; /* underlying item set */
93
+ int max; /* maximum number of items per t.a. */
94
+ int vsz; /* size of transaction vector */
95
+ int cnt; /* number of transactions */
96
+ int total; /* total number of items */
97
+ TRACT **tracts; /* transaction vector */
98
+ } TASET; /* (transaction set) */
99
+
100
+ typedef struct _tatree { /* --- a transaction tree (node) --- */
101
+ int cnt; /* number of transactions */
102
+ int max; /* size of largest transaction */
103
+ int size; /* node size (number of children) */
104
+ int items[1]; /* next items in rep. transactions */
105
+ } TATREE; /* (transaction tree) */
106
+
107
+ /*----------------------------------------------------------------------
108
+ Item Set Functions
109
+ ----------------------------------------------------------------------*/
110
+ extern ITEMSET* is_create (int cnt);
111
+ extern void is_delete (ITEMSET *iset);
112
+ extern TABSCAN* is_tabscan (ITEMSET *iset);
113
+ extern void is_chars (ITEMSET *iset, const char *blanks,
114
+ const char *fldseps,
115
+ const char *recseps,
116
+ const char *cominds);
117
+
118
+ extern int is_cnt (ITEMSET *iset);
119
+ extern int is_item (ITEMSET *iset, const char *name);
120
+ extern const char* is_name (ITEMSET *iset, int item);
121
+
122
+ extern int is_gettac (ITEMSET *iset);
123
+ extern int is_settac (ITEMSET *iset, int cnt);
124
+ extern int is_addtac (ITEMSET *iset, int cnt);
125
+ extern int is_getfrq (ITEMSET *iset, int item);
126
+ extern int is_setfrq (ITEMSET *iset, int item, int frq);
127
+ extern int is_addfrq (ITEMSET *iset, int item, int frq);
128
+ extern int is_getxfq (ITEMSET *iset, int item);
129
+ extern int is_setxfq (ITEMSET *iset, int item, int frq);
130
+ extern int is_getapp (ITEMSET *iset, int item);
131
+ extern int is_setapp (ITEMSET *iset, int item, int app);
132
+
133
+ extern int is_readapp (ITEMSET *iset, FILE *file);
134
+ extern int is_read (ITEMSET *iset, FILE *file);
135
+
136
+ extern int is_recode (ITEMSET *iset, int minfrq,
137
+ int dir, int *map);
138
+ extern void is_trunc (ITEMSET *iset, int cnt);
139
+ extern int is_filter (ITEMSET *iset, const char *marks);
140
+ extern int is_tsize (ITEMSET *iset);
141
+ extern int* is_tract (ITEMSET *iset);
142
+
143
+ /*----------------------------------------------------------------------
144
+ Item Set Evaluation Functions
145
+ ----------------------------------------------------------------------*/
146
+ extern ISEVAL* ise_create (ITEMSET *iset, int tacnt);
147
+ extern void ise_delete (ISEVAL *eval);
148
+ extern double ise_eval (ISEVAL *eval, int *ids, int cnt, int pre,
149
+ double supp);
150
+
151
+ /*----------------------------------------------------------------------
152
+ Item Set Formatting Functions
153
+ ----------------------------------------------------------------------*/
154
+ extern ISFMTR* isf_create (ITEMSET *iset, int scan);
155
+ extern void isf_delete (ISFMTR *fmt);
156
+ extern const char* isf_format (ISFMTR *fmt, int *ids, int cnt, int pre);
157
+ extern int isf_length (ISFMTR *fmt);
158
+ extern void isf_print (ISFMTR *fmt, FILE *out);
159
+
160
+ /*----------------------------------------------------------------------
161
+ Transaction Functions
162
+ ----------------------------------------------------------------------*/
163
+ extern void ta_sort (int *items, int n);
164
+ extern int ta_unique (int *items, int n);
165
+ extern int ta_filter (int *items, int n, const char *marks);
166
+
167
+ /*----------------------------------------------------------------------
168
+ Transaction Set Functions
169
+ ----------------------------------------------------------------------*/
170
+ extern TASET* tas_create (ITEMSET *itemset);
171
+ extern void tas_delete (TASET *taset, int delis);
172
+ extern ITEMSET* tas_itemset (TASET *taset);
173
+
174
+ extern int tas_cnt (TASET *taset);
175
+ extern int tas_add (TASET *taset, const int *items, int n);
176
+ extern int* tas_tract (TASET *taset, int index);
177
+ extern int tas_tsize (TASET *taset, int index);
178
+ extern int tas_total (TASET *taset);
179
+
180
+ extern void tas_recode (TASET *taset, int *map, int cnt);
181
+ extern int tas_filter (TASET *taset, const char *marks);
182
+ extern void tas_shuffle (TASET *taset, double randfn(void));
183
+ extern void tas_sort (TASET *taset, int heap);
184
+ extern int tas_occur (TASET *taset, const int *items, int n);
185
+
186
+ #ifndef NDEBUG
187
+ extern void tas_show (TASET *taset);
188
+ #endif
189
+
190
+ /*----------------------------------------------------------------------
191
+ Transaction Tree Functions
192
+ ----------------------------------------------------------------------*/
193
+ extern TATREE* tat_create (TASET *taset, int heap);
194
+ extern void tat_delete (TATREE *tat);
195
+ extern int tat_cnt (TATREE *tat);
196
+ extern int tat_max (TATREE *tat);
197
+ extern int tat_size (TATREE *tat);
198
+ extern int* tat_items (TATREE *tat);
199
+ extern int tat_item (TATREE *tat, int index);
200
+ extern TATREE* tat_child (TATREE *tat, int index);
201
+ extern void tat_mark (TATREE *tat);
202
+
203
+ #ifndef NDEBUG
204
+ extern void tat_show (TATREE *tat);
205
+ #endif
206
+
207
+ /*----------------------------------------------------------------------
208
+ Preprocessor Definitions
209
+ ----------------------------------------------------------------------*/
210
+ #define is_tabscan(s) ((s)->tscan)
211
+
212
+ #define is_cnt(s) nim_cnt((s)->nimap)
213
+ #define is_name(s,i) nim_name(nim_byid((s)->nimap, i))
214
+ #define is_gettac(s) ((s)->tac)
215
+ #define is_settac(s,n) ((s)->tac = (n))
216
+ #define is_addtac(s,n) ((s)->tac += (n))
217
+ #define is_getfrq(s,i) (((ITEM*)nim_byid((s)->nimap, i))->frq)
218
+ #define is_setfrq(s,i,f) (((ITEM*)nim_byid((s)->nimap, i))->frq = (f))
219
+ #define is_addfrq(s,i,f) (((ITEM*)nim_byid((s)->nimap, i))->frq += (f))
220
+ #define is_getxfq(s,i) (((ITEM*)nim_byid((s)->nimap, i))->xfq)
221
+ #define is_setxfq(s,i,f) (((ITEM*)nim_byid((s)->nimap, i))->xfq = (f))
222
+ #define is_getapp(s,i) (((ITEM*)nim_byid((s)->nimap, i))->app)
223
+ #define is_setapp(s,i,a) (((ITEM*)nim_byid((s)->nimap, i))->app = (a))
224
+
225
+ #define is_trunc(s,n) nim_trunc((s)->nimap, n)
226
+
227
+ #define is_tsize(s) ((s)->cnt)
228
+ #define is_tract(s) ((s)->items)
229
+
230
+ /*--------------------------------------------------------------------*/
231
+ #define ise_delete(e) free(e)
232
+
233
+ /*--------------------------------------------------------------------*/
234
+ #define isf_length(f) ((f)->len)
235
+ #define isf_print(f,o) fwrite((f)->buf, sizeof(char), (f)->len, o)
236
+
237
+ /*--------------------------------------------------------------------*/
238
+ #define ta_sort(v,n) v_intsort(v,n)
239
+
240
+ /*--------------------------------------------------------------------*/
241
+ #define tas_itemset(s) ((s)->itemset)
242
+ #define tas_cnt(s) ((s)->cnt)
243
+ #define tas_max(s) ((s)->max)
244
+
245
+ #define tas_tract(s,i) ((s)->tracts[i]->items)
246
+ #define tas_tsize(s,i) ((s)->tracts[i]->cnt)
247
+ #define tas_total(s) ((s)->total)
248
+
249
+ #define tas_shuffle(s,f) v_shuffle((s)->tracts, (s)->cnt, f)
250
+
251
+ /*--------------------------------------------------------------------*/
252
+ #define tat_cnt(t) ((t)->cnt)
253
+ #define tat_max(t) ((t)->max)
254
+ #define tat_size(t) ((t)->size)
255
+ #define tat_item(t,i) ((t)->items[i])
256
+ #define tat_items(t) ((t)->items)
257
+ #ifndef ARCH64
258
+ #define tat_child(t,i) (((TATREE**)((t)->items +(t)->size))[i])
259
+ #endif
260
+
261
+ #endif