jashmenn-apriori 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +139 -0
  5. data/Rakefile +4 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +77 -0
  12. data/config/requirements.rb +15 -0
  13. data/examples/01_simple_example.rb +23 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori/adapter.rb +13 -0
  95. data/lib/apriori/association_rule.rb +85 -0
  96. data/lib/apriori/version.rb +9 -0
  97. data/lib/apriori.rb +133 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +6 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +233 -0
  118. data/website/index.txt +142 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +188 -0
@@ -0,0 +1,160 @@
1
+ /*----------------------------------------------------------------------
2
+ File : istree.h
3
+ Contents: item set tree management
4
+ Author : Christian Borgelt
5
+ History : 1996.01.22 file created
6
+ 1996.01.29 ISNODE.offset and ISNODE.id added
7
+ 1996.02.08 ISTREE.tacnt, ISTREE.curr, ISTREE.index,
8
+ ISTREE.head and ISTREE.conf added
9
+ 1996.03.28 support made relative to number of item sets
10
+ 1996.11.23 ISTREE.levels (first nodes of each level) added
11
+ 1996.11.24 ISTREE.arem (add. rule evaluation measure) added
12
+ 1997.08.18 chi^2 evaluation measure added
13
+ parameter 'minlen' added to function ist_init()
14
+ 1998.02.11 parameter 'minval' added to function ist_init()
15
+ 1998.05.14 item set tree navigation functions added
16
+ 1998.08.08 parameter 'apps' added to function ist_create()
17
+ 1998.08.20 structure ISNODE redesigned
18
+ 1998.09.07 function ist_hedge added
19
+ 1998.12.08 function ist_gettac added,
20
+ float changed to double
21
+ 1999.02.05 long int changed to int
22
+ 1999.08.26 functions ist_first and ist_last added
23
+ 1999.11.05 rule evaluation measure EM_AIMP added
24
+ 1999.11.08 parameter 'aval' added to function ist_rule
25
+ 2001.04.01 functions ist_set and ist_getcntx added
26
+ 2001.12.28 sort function moved to module tract
27
+ 2002.02.07 function ist_clear removed, ist_settac added
28
+ 2002.02.11 optional use of identifier maps in nodes added
29
+ 2002.02.12 ist_first and ist_last replaced by ist_next
30
+ 2003.03.12 parameter lift added to function ist_rule
31
+ 2003.07.17 functions ist_itemcnt and ist_check added
32
+ 2003.07.18 function ist_maxfrq added (item set filter)
33
+ 2003.08.11 item set filtering generalized (ist_filter)
34
+ 2004.05.09 parameter 'aval' added to function ist_set
35
+ 2008.03.24 creation based on ITEMSET structure
36
+ ----------------------------------------------------------------------*/
37
+ #ifndef __ISTREE__
38
+ #define __ISTREE__
39
+ #include "tract.h"
40
+
41
+ /*----------------------------------------------------------------------
42
+ Preprocessor Definitions
43
+ ----------------------------------------------------------------------*/
44
+ /* --- additional evaluation measures --- */
45
+ #define EM_NONE 0 /* no measure */
46
+ #define EM_DIFF 1 /* absolute conf. difference to prior */
47
+ #define EM_QUOT 2 /* difference of conf. quotient to 1 */
48
+ #define EM_AIMP 3 /* abs. diff. of improvement to 1 */
49
+ #define EM_INFO 4 /* information difference to prior */
50
+ #define EM_CHI2 5 /* normalized chi^2 measure */
51
+ #define EM_PVAL 6 /* p-value of chi^2 measure */
52
+ #define EM_UNKNOWN 7 /* unknown measure */
53
+
54
+ /* --- item appearances --- */
55
+ #define IST_IGNORE 0 /* ignore item */
56
+ #define IST_BODY 1 /* item may appear in rule body */
57
+ #define IST_HEAD 2 /* item may appear in rule head */
58
+ #define IST_BOTH (IST_HEAD|IST_BODY)
59
+
60
+ /* --- search mode flags --- */
61
+ #define IST_MEMOPT 4 /* optimize memory usage */
62
+
63
+ /* --- item set filter modes --- */
64
+ #define IST_CLEAR 0 /* clear markers */
65
+ #define IST_CLOSED 1 /* closed item sets */
66
+ #define IST_MAXFRQ 2 /* maximal item sets */
67
+
68
+ /*----------------------------------------------------------------------
69
+ Type Definitions
70
+ ----------------------------------------------------------------------*/
71
+ typedef struct _isnode { /* --- item set node --- */
72
+ struct _isnode *parent; /* parent node */
73
+ struct _isnode *succ; /* successor node on same level */
74
+ int id; /* identifier used in parent node */
75
+ int chcnt; /* number of child nodes */
76
+ int size; /* size of counter vector */
77
+ int offset; /* offset of counter vector */
78
+ int cnts[1]; /* counter vector */
79
+ } ISNODE; /* (item set node) */
80
+
81
+ typedef struct { /* --- item set tree --- */
82
+ ITEMSET *set; /* underlying item set */
83
+ int mode; /* search mode (e.g. support def.) */
84
+ int tacnt; /* number of transactions */
85
+ int vsz; /* size of level vector */
86
+ int height; /* tree height (number of levels) */
87
+ ISNODE **lvls; /* first node of each level */
88
+ int rule; /* minimal support of an assoc. rule */
89
+ int supp; /* minimal support of an item set */
90
+ double conf; /* minimal confidence of a rule */
91
+ int arem; /* additional rule evaluation measure */
92
+ double minval; /* minimal evaluation measure value */
93
+ ISNODE *curr; /* current node for traversal */
94
+ int size; /* size of item set/rule/hyperedge */
95
+ ISNODE *node; /* item set node for extraction */
96
+ int index; /* index in item set node */
97
+ ISNODE *head; /* head item node for extraction */
98
+ int item; /* head item of previous rule */
99
+ int *buf; /* buffer for paths (support check) */
100
+ int *path; /* current path / (partial) item set */
101
+ int plen; /* current path length */
102
+ int hdonly; /* head only item in current set */
103
+ int *map; /* to create identifier maps */
104
+ #ifdef BENCH /* if benchmark version */
105
+ int sccnt; /* number of support counters */
106
+ int scnec; /* number of necessary supp. counters */
107
+ int cpcnt; /* number of child pointers */
108
+ int cpnec; /* number of necessary child pointers */
109
+ int bytes; /* number of bytes used */
110
+ #endif
111
+ } ISTREE; /* (item set tree) */
112
+
113
+ /*----------------------------------------------------------------------
114
+ Functions
115
+ ----------------------------------------------------------------------*/
116
+ extern ISTREE* ist_create (ITEMSET *set, int mode,
117
+ int supp, double conf);
118
+ extern void ist_delete (ISTREE *ist);
119
+ extern int ist_itemcnt (ISTREE *ist);
120
+
121
+ extern void ist_count (ISTREE *ist, int *set, int cnt);
122
+ extern void ist_countx (ISTREE *ist, TATREE *tat);
123
+ extern int ist_settac (ISTREE *ist, int cnt);
124
+ extern int ist_gettac (ISTREE *ist);
125
+ extern int ist_check (ISTREE *ist, char *marks);
126
+ extern int ist_addlvl (ISTREE *ist);
127
+ extern int ist_height (ISTREE *ist);
128
+
129
+ extern void ist_up (ISTREE *ist, int root);
130
+ extern int ist_down (ISTREE *ist, int item);
131
+ extern int ist_next (ISTREE *ist, int item);
132
+ extern void ist_setcnt (ISTREE *ist, int item, int cnt);
133
+ extern int ist_getcnt (ISTREE *ist, int item);
134
+ extern int ist_getcntx (ISTREE *ist, int *set, int cnt);
135
+
136
+ extern void ist_filter (ISTREE *ist, int mode);
137
+ extern void ist_init (ISTREE *ist, int minlen,
138
+ int arem, double minval);
139
+ extern int ist_set (ISTREE *ist, int *set, int *supp,
140
+ double *aval);
141
+ extern int ist_rule (ISTREE *ist, int *rule, int *supp,
142
+ double *conf, double *lift, double *aval);
143
+ extern int ist_hedge (ISTREE *ist, int *hedge, int *supp,
144
+ double *conf, double *aval);
145
+ extern int ist_group (ISTREE *ist, int *asmb, int *supp,
146
+ double *aval);
147
+
148
+ #ifndef NDEBUG
149
+ extern void ist_show (ISTREE *ist);
150
+ #endif
151
+
152
+ /*----------------------------------------------------------------------
153
+ Preprocessor Definitions
154
+ ----------------------------------------------------------------------*/
155
+ #define ist_itemcnt(t) ((t)->levels[0]->size)
156
+ #define ist_settac(t,n) ((t)->tacnt = (n))
157
+ #define ist_gettac(t) ((t)->tacnt)
158
+ #define ist_height(t) ((t)->height)
159
+
160
+ #endif
@@ -0,0 +1,105 @@
1
+ #-----------------------------------------------------------------------
2
+ # File : makefile
3
+ # Contents: build apriori program
4
+ # Author : Christian Borgelt
5
+ # History : ??.??.1995 file created
6
+ # 1997.10.13 macro ADDFLAGS added
7
+ # 1997.12.07 minor improvements
8
+ # 1998.01.04 table scanner management added
9
+ # 1999.11.11 vector operations module added
10
+ # 2000.11.04 modules vecops, symtab, and tabscan made external
11
+ # 2001.11.18 module tract (transaction management) added
12
+ # 2003.12.12 preprocessor definition ARCH64 added
13
+ #-----------------------------------------------------------------------
14
+ CC = gcc
15
+ CFBASE = -ansi -Wall -pedantic -I$(UTILDIR) -I$(MATHDIR) $(ADDFLAGS)
16
+ CFLAGS = $(CFBASE) -DNDEBUG -O3
17
+ # CFLAGS = $(CFBASE) -DNDEBUG -O3 -DBENCH
18
+ # CFLAGS = $(CFBASE) -DNDEBUG -O3 -DARCH64
19
+ # CFLAGS = $(CFBASE) -g
20
+ # CFLAGS = $(CFBASE) -g -DARCH64
21
+ # CFLAGS = $(CFBASE) -g -DSTORAGE $(ADDINC)
22
+ LDFLAGS =
23
+ LIBS = -lm
24
+ # ADDINC = -I../../misc/src
25
+ # ADDOBJ = storage.o
26
+
27
+ UTILDIR = ../../util/src
28
+ MATHDIR = ../../math/src
29
+ HDRS = $(UTILDIR)/vecops.h $(UTILDIR)/symtab.h \
30
+ $(UTILDIR)/tabscan.h $(UTILDIR)/scan.h \
31
+ $(MATHDIR)/gamma.h $(MATHDIR)/chi2.h \
32
+ tract.h istree.h
33
+ OBJS = $(UTILDIR)/vecops.o $(UTILDIR)/nimap.o \
34
+ $(UTILDIR)/tabscan.o $(UTILDIR)/scform.o \
35
+ $(MATHDIR)/gamma.o $(MATHDIR)/chi2.o \
36
+ tract.o istree.o apriori.o $(ADDOBJ)
37
+
38
+ #-----------------------------------------------------------------------
39
+ # Build Program
40
+ #-----------------------------------------------------------------------
41
+ all: apriori
42
+
43
+ apriori: $(OBJS) makefile
44
+ $(CC) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
45
+
46
+ #-----------------------------------------------------------------------
47
+ # Main Program
48
+ #-----------------------------------------------------------------------
49
+ apriori.o: tract.h istree.h $(UTILDIR)/symtab.h
50
+ apriori.o: apriori.c makefile
51
+ $(CC) $(CFLAGS) -c apriori.c -o $@
52
+
53
+ #-----------------------------------------------------------------------
54
+ # Item and Transaction Management
55
+ #-----------------------------------------------------------------------
56
+ tract.o: tract.h $(UTILDIR)/symtab.h
57
+ tract.o: tract.c makefile
58
+ $(CC) $(CFLAGS) -c tract.c -o $@
59
+
60
+ #-----------------------------------------------------------------------
61
+ # Frequent Item Set Tree Management
62
+ #-----------------------------------------------------------------------
63
+ istree.o: istree.h tract.h $(MATHDIR)/gamma.h
64
+ istree.o: istree.c makefile
65
+ $(CC) $(CFLAGS) -c istree.c -o $@
66
+
67
+ #-----------------------------------------------------------------------
68
+ # External Modules
69
+ #-----------------------------------------------------------------------
70
+ $(UTILDIR)/vecops.o:
71
+ cd $(UTILDIR); $(MAKE) vecops.o ADDFLAGS=$(ADDFLAGS)
72
+ $(UTILDIR)/nimap.o:
73
+ cd $(UTILDIR); $(MAKE) nimap.o ADDFLAGS=$(ADDFLAGS)
74
+ $(UTILDIR)/tabscan.o:
75
+ cd $(UTILDIR); $(MAKE) tabscan.o ADDFLAGS=$(ADDFLAGS)
76
+ $(UTILDIR)/scform.o:
77
+ cd $(UTILDIR); $(MAKE) scform.o ADDFLAGS=$(ADDFLAGS)
78
+ $(MATHDIR)/gamma.o:
79
+ cd $(MATHDIR); $(MAKE) gamma.o ADDFLAGS=$(ADDFLAGS)
80
+ $(MATHDIR)/chi2.o:
81
+ cd $(MATHDIR); $(MAKE) chi2.o ADDFLAGS=$(ADDFLAGS)
82
+
83
+ #-----------------------------------------------------------------------
84
+ # Storage Debugging
85
+ #-----------------------------------------------------------------------
86
+ storage.o: ../../misc/src/storage.h
87
+ storage.o: ../../misc/src/storage.c
88
+ $(CC) $(CFLAGS) -c ../../misc/src/storage.c -o $@
89
+
90
+ #-----------------------------------------------------------------------
91
+ # Install
92
+ #-----------------------------------------------------------------------
93
+ install:
94
+ cp apriori $(HOME)/bin
95
+
96
+ #-----------------------------------------------------------------------
97
+ # Clean up
98
+ #-----------------------------------------------------------------------
99
+ clean:
100
+ rm -f *.o *~ *.flc core apriori
101
+ cd $(UTILDIR); $(MAKE) clean
102
+ cd $(MATHDIR); $(MAKE) clean
103
+
104
+ localclean:
105
+ rm -f *.o *~ *.flc core apriori