apriori 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +16 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +149 -0
- data/Rakefile +15 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +81 -0
- data/config/requirements.rb +29 -0
- data/examples/01_simple_example.rb +32 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori.rb +133 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +89 -0
- data/lib/apriori/version.rb +9 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +13 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +248 -0
- data/website/index.txt +152 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +226 -0
data/ext/util/src/scan.h
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : scan.h
|
3
|
+
Contents: scanner (lexical analysis of a character stream)
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1996.01.16 file created
|
6
|
+
1996.02.21 definition of BUFSIZE made global
|
7
|
+
1996.03.17 special tokens for keywords removed
|
8
|
+
1998.02.08 recover and error message functions added
|
9
|
+
1998.02.13 token T_RGT (right arrow '->') added
|
10
|
+
1998.03.04 definitions of T_ID and T_NUM exchanged
|
11
|
+
1998.04.17 token T_LFT (left arrow '<-') added
|
12
|
+
1998.05.27 token T_CMP (two char comparison operator) added
|
13
|
+
1998.05.31 field f removed from struct TOKVAL
|
14
|
+
2000.11.23 functions sc_len and sc_form added
|
15
|
+
2001.07.15 scanner made an object
|
16
|
+
2006.02.02 token T_DASH (undirected edge '--') added
|
17
|
+
----------------------------------------------------------------------*/
|
18
|
+
#ifndef __SCAN__
|
19
|
+
#define __SCAN__
|
20
|
+
#include <stdio.h>
|
21
|
+
|
22
|
+
/*----------------------------------------------------------------------
|
23
|
+
Preprocessor Definitions
|
24
|
+
----------------------------------------------------------------------*/
|
25
|
+
/* --- sizes --- */
|
26
|
+
#define SC_BUFSIZE 1024 /* size of scan buffer */
|
27
|
+
|
28
|
+
#ifdef SC_SCAN
|
29
|
+
/* --- tokens --- */
|
30
|
+
#define T_EOF 256 /* end of file */
|
31
|
+
#define T_NUM 257 /* number (floating point) */
|
32
|
+
#define T_ID 258 /* identifier or string */
|
33
|
+
#define T_RGT 259 /* right arrow '->' */
|
34
|
+
#define T_LFT 260 /* left arrow '<-' */
|
35
|
+
#define T_DASH 261 /* dash '--' */
|
36
|
+
#define T_CMP 262 /* two char. comparison, e.g. '<=' */
|
37
|
+
|
38
|
+
/* --- error codes --- */
|
39
|
+
#define E_NONE 0 /* no error */
|
40
|
+
#define E_NOMEM (-1) /* not enough memory */
|
41
|
+
#define E_FOPEN (-2) /* file open failed */
|
42
|
+
#define E_FREAD (-3) /* file read failed */
|
43
|
+
#define E_FWRITE (-4) /* file write failed (unused) */
|
44
|
+
#define E_CHAR (-5) /* invalid character */
|
45
|
+
#define E_BUFOVF (-6) /* scan buffer overflow */
|
46
|
+
#define E_UNTSTR (-7) /* unterminated string */
|
47
|
+
#define E_UNTCOM (-8) /* unterminated comment */
|
48
|
+
#define E_STATE (-9) /* invalid scanner state */
|
49
|
+
#define E_GARBAGE (-10) /* garbage at end of file */
|
50
|
+
|
51
|
+
#endif
|
52
|
+
/*----------------------------------------------------------------------
|
53
|
+
Type Definitions
|
54
|
+
----------------------------------------------------------------------*/
|
55
|
+
#ifdef SC_SCAN
|
56
|
+
typedef struct { /* --- scanner --- */
|
57
|
+
FILE *file; /* file to scan */
|
58
|
+
int start; /* start line of comment */
|
59
|
+
int line, pline; /* input line of file */
|
60
|
+
int token, ptoken; /* token */
|
61
|
+
int len, plen; /* length of token value */
|
62
|
+
int back; /* flag for backward step */
|
63
|
+
char *value; /* token value */
|
64
|
+
char buf[2][SC_BUFSIZE]; /* scan buffers */
|
65
|
+
FILE *errfile; /* error output stream */
|
66
|
+
int lncnt; /* error message line count */
|
67
|
+
int msgcnt; /* number of add. error messages */
|
68
|
+
const char **msgs; /* additional error messages */
|
69
|
+
char fname[1]; /* name of file to scan */
|
70
|
+
} SCAN; /* (scanner) */
|
71
|
+
#endif
|
72
|
+
|
73
|
+
/*----------------------------------------------------------------------
|
74
|
+
Functions
|
75
|
+
----------------------------------------------------------------------*/
|
76
|
+
extern int sc_fmtlen (const char *s, int *len);
|
77
|
+
extern int sc_format (char *dst, const char *src, int quotes);
|
78
|
+
|
79
|
+
#ifdef SC_SCAN
|
80
|
+
extern SCAN* sc_create (const char *fname);
|
81
|
+
extern void sc_delete (SCAN *scan);
|
82
|
+
extern const char* sc_fname (SCAN *scan);
|
83
|
+
extern int sc_line (SCAN *scan);
|
84
|
+
|
85
|
+
extern int sc_next (SCAN *scan);
|
86
|
+
extern int sc_nexter (SCAN *scan);
|
87
|
+
extern int sc_back (SCAN *scan);
|
88
|
+
|
89
|
+
extern int sc_token (SCAN *scan);
|
90
|
+
extern const char* sc_value (SCAN *scan);
|
91
|
+
extern int sc_len (SCAN *scan);
|
92
|
+
extern int sc_eof (SCAN *scan);
|
93
|
+
|
94
|
+
extern int sc_recover (SCAN *scan,
|
95
|
+
int stop, int beg, int end, int level);
|
96
|
+
extern void sc_errfile (SCAN *scan, FILE *file, int lncnt);
|
97
|
+
extern void sc_errmsgs (SCAN *scan, const char *msgs[], int cnt);
|
98
|
+
extern int sc_error (SCAN *scan, int code, ...);
|
99
|
+
|
100
|
+
/*----------------------------------------------------------------------
|
101
|
+
Preprocessor Definitions
|
102
|
+
----------------------------------------------------------------------*/
|
103
|
+
#define sc_fname(s) ((const char*)(s)->fname)
|
104
|
+
#define sc_line(s) ((s)->line)
|
105
|
+
|
106
|
+
#define sc_token(s) ((s)->token)
|
107
|
+
#define sc_value(s) ((const char*)(s)->value)
|
108
|
+
#define sc_len(s) ((s)->len)
|
109
|
+
|
110
|
+
#endif /* #ifdef SC_SCAN */
|
111
|
+
#endif
|
@@ -0,0 +1,443 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : symtab.c
|
3
|
+
Contents: symbol table management
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1995.10.22 file created
|
6
|
+
1995.10.30 functions made independent of symbol data
|
7
|
+
1995.11.26 symbol types and visibility levels added
|
8
|
+
1996.01.04 st_clear added
|
9
|
+
1996.02.27 st_insert modified
|
10
|
+
1996.06.28 dynamic bucket vector enlargement added
|
11
|
+
1996.07.04 bug in bucket reorganization removed
|
12
|
+
1997.04.01 functions st_clear and st_remove combined
|
13
|
+
1997.07.29 minor improvements
|
14
|
+
1997.08.05 minor improvements
|
15
|
+
1997.11.16 some comments improved
|
16
|
+
1998.02.06 default table sizes changed
|
17
|
+
1998.05.31 list of all symbols removed
|
18
|
+
1998.06.20 deletion function moved to st_create
|
19
|
+
1998.07.14 minor improvements
|
20
|
+
1998.09.01 bug in function _sort removed, assertions added
|
21
|
+
1998.09.25 hash function improved
|
22
|
+
1998.09.28 types ULONG and CCHAR removed, st_stats added
|
23
|
+
1999.02.04 long int changed to int
|
24
|
+
1999.11.10 name/identifier map management added
|
25
|
+
2003.08.15 renamed new to nel in st_insert (C++ compat.)
|
26
|
+
2004.12.15 function nim_trunc added
|
27
|
+
2004.12.28 bug in function nim_trunc fixed
|
28
|
+
----------------------------------------------------------------------*/
|
29
|
+
#include <stdio.h>
|
30
|
+
#include <stdlib.h>
|
31
|
+
#include <string.h>
|
32
|
+
#include <limits.h>
|
33
|
+
#include <assert.h>
|
34
|
+
#include "symtab.h"
|
35
|
+
#ifdef NIMAPFN
|
36
|
+
#include "vecops.h"
|
37
|
+
#endif
|
38
|
+
#ifdef STORAGE
|
39
|
+
#include "storage.h"
|
40
|
+
#endif
|
41
|
+
|
42
|
+
/*----------------------------------------------------------------------
|
43
|
+
Preprocessor Definitions
|
44
|
+
----------------------------------------------------------------------*/
|
45
|
+
#define DFLT_INIT 1023 /* default initial hash table size */
|
46
|
+
#if (INT_MAX > 32767)
|
47
|
+
#define DFLT_MAX 1048575 /* default maximal hash table size */
|
48
|
+
#else
|
49
|
+
#define DFLT_MAX 16383 /* default maximal hash table size */
|
50
|
+
#endif
|
51
|
+
#define BLKSIZE 256 /* block size for identifier vector */
|
52
|
+
|
53
|
+
/*----------------------------------------------------------------------
|
54
|
+
Default Hash Function
|
55
|
+
----------------------------------------------------------------------*/
|
56
|
+
|
57
|
+
static unsigned _hdflt (const char *name, int type)
|
58
|
+
{ /* --- default hash function */
|
59
|
+
register unsigned h = type; /* hash value */
|
60
|
+
|
61
|
+
while (*name) h ^= (h << 3) ^ (unsigned)(*name++);
|
62
|
+
return h; /* compute hash value */
|
63
|
+
} /* _hdflt() */
|
64
|
+
|
65
|
+
/*----------------------------------------------------------------------
|
66
|
+
Auxiliary Functions
|
67
|
+
----------------------------------------------------------------------*/
|
68
|
+
|
69
|
+
static void _delsym (SYMTAB *tab)
|
70
|
+
{ /* --- delete all symbols */
|
71
|
+
int i; /* loop variable */
|
72
|
+
STE *ste, *tmp; /* to traverse the symbol list */
|
73
|
+
|
74
|
+
for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
|
75
|
+
ste = tab->bvec[i]; /* get the next bucket list, */
|
76
|
+
tab->bvec[i] = NULL; /* clear the bucket vector entry, */
|
77
|
+
while (ste) { /* and traverse the bucket list */
|
78
|
+
tmp = ste; /* note the symbol to delete */
|
79
|
+
ste = ste->succ; /* and get the next symbol */
|
80
|
+
if (tab->delfn) tab->delfn(tmp +1);
|
81
|
+
free(tmp); /* if a deletion function is given, */
|
82
|
+
} /* call it and then deallocate */
|
83
|
+
} /* the symbol table element */
|
84
|
+
} /* _delsym() */
|
85
|
+
|
86
|
+
/*--------------------------------------------------------------------*/
|
87
|
+
|
88
|
+
static STE** _merge (STE *in[], int cnt[], STE **out)
|
89
|
+
{ /* --- merge two lists into one */
|
90
|
+
int k; /* index of input list */
|
91
|
+
|
92
|
+
do { /* compare and merge loop */
|
93
|
+
k = (in[0]->level > in[1]->level) ? 0 : 1;
|
94
|
+
*out = in[k]; /* append the element on the higher */
|
95
|
+
out = &(*out)->succ; /* level to the output list and */
|
96
|
+
in[k] = *out; /* remove it from the input list */
|
97
|
+
} while (--cnt[k] > 0); /* while both lists are not empty */
|
98
|
+
*out = in[k ^= 1]; /* append remaining elements */
|
99
|
+
while (--cnt[k] >= 0) /* while not at the end of the list */
|
100
|
+
out = &(*out)->succ; /* go to the successor element */
|
101
|
+
in[k] = *out; /* set new start of the input list */
|
102
|
+
*out = NULL; /* terminate the output list and */
|
103
|
+
return out; /* return new end of the output list */
|
104
|
+
} /* _merge() */
|
105
|
+
|
106
|
+
/*--------------------------------------------------------------------*/
|
107
|
+
|
108
|
+
static STE* _sort (STE *list)
|
109
|
+
{ /* --- sort a hash bucket list */
|
110
|
+
STE *ste; /* to traverse the list, buffer */
|
111
|
+
STE *in[2], *out[2]; /* input and output lists */
|
112
|
+
STE **end[2]; /* ends of output lists */
|
113
|
+
int cnt[2]; /* number of elements to merge */
|
114
|
+
int run; /* run length in input lists */
|
115
|
+
int rem; /* elements in remainder collection */
|
116
|
+
int oid; /* index of output list */
|
117
|
+
|
118
|
+
if (!list) return list; /* empty lists need not to be sorted */
|
119
|
+
oid = 0; out[0] = list; /* traverse list elements */
|
120
|
+
for (ste = list->succ; ste; ste = ste->succ)
|
121
|
+
if ((oid ^= 1) == 0) list = list->succ;
|
122
|
+
out[1] = list->succ; /* split list into two equal parts */
|
123
|
+
list = list->succ = NULL; /* initialize remainder collection */
|
124
|
+
run = 1; rem = 0; /* and run length */
|
125
|
+
while (out[1]) { /* while there are two lists */
|
126
|
+
in [0] = out[0]; in [1] = out[1]; /* move output list to input */
|
127
|
+
end[0] = out; end[1] = out+1; /* reinitialize end pointers */
|
128
|
+
out[1] = NULL; oid = 0; /* start with 1st output list */
|
129
|
+
do { /* merge loop */
|
130
|
+
cnt[0] = cnt[1] = run; /* merge run elements from the */
|
131
|
+
end[oid] = _merge(in, cnt, end[oid]); /* input lists */
|
132
|
+
oid ^= 1; /* toggle index of output list */
|
133
|
+
} while (in[1]); /* while both lists are not empty */
|
134
|
+
if (in[0]) { /* if there is one input list left */
|
135
|
+
if (!list) /* if there is no rem. collection, */
|
136
|
+
list = in[0]; /* just note the rem. input list */
|
137
|
+
else { /* if there is a rem. collection, */
|
138
|
+
cnt[0] = run; cnt[1] = rem; in[1] = list;
|
139
|
+
_merge(in, cnt, &list); /* merge it and the input list to */
|
140
|
+
} /* get the new renmainder collection */
|
141
|
+
rem += run; /* there are now run more elements */
|
142
|
+
} /* in the remainder collection */
|
143
|
+
run <<= 1; /* double run length */
|
144
|
+
} /* while (out[1]) .. */
|
145
|
+
if (rem > 0) { /* if there is a rem. collection */
|
146
|
+
in[0] = out[0]; cnt[0] = run;
|
147
|
+
in[1] = list; cnt[1] = rem;
|
148
|
+
_merge(in, cnt, out); /* merge it to the output list */
|
149
|
+
} /* and store the result in out[0] */
|
150
|
+
return out[0]; /* return the sorted list */
|
151
|
+
} /* _sort() */
|
152
|
+
|
153
|
+
/*--------------------------------------------------------------------*/
|
154
|
+
|
155
|
+
static void _reorg (SYMTAB *tab)
|
156
|
+
{ /* --- reorganize a hash table */
|
157
|
+
int i; /* loop variable */
|
158
|
+
int size; /* new bucket vector size */
|
159
|
+
STE **p; /* new bucket vector, buffer */
|
160
|
+
STE *ste; /* to traverse symbol table elements */
|
161
|
+
STE *list = NULL; /* list of all symbols */
|
162
|
+
|
163
|
+
size = (tab->size << 1) +1; /* calculate new vector size */
|
164
|
+
if (size > tab->max) /* if new size exceeds maximum, */
|
165
|
+
size = tab->max; /* set the maximal size */
|
166
|
+
for (p = &list, i = tab->size; --i >= 0; ) {
|
167
|
+
*p = tab->bvec[i]; /* traverse the bucket vector and */
|
168
|
+
while (*p) p = &(*p)->succ; /* link all bucket lists together */
|
169
|
+
} /* (collect symbols) */
|
170
|
+
p = (STE**)realloc(tab->bvec, size *sizeof(STE*));
|
171
|
+
if (!p) return; /* enlarge bucket vector */
|
172
|
+
tab->bvec = p; /* set new bucket vector */
|
173
|
+
tab->size = size; /* and its size */
|
174
|
+
for (p += i = size; --i >= 0; )
|
175
|
+
*--p = NULL; /* clear the hash buckets */
|
176
|
+
while (list) { /* traverse list of all symbols */
|
177
|
+
ste = list; list = list->succ; /* get next symbol */
|
178
|
+
i = tab->hash(ste->name, ste->type) %size;
|
179
|
+
ste->succ = tab->bvec[i]; /* compute the hash bucket index */
|
180
|
+
tab->bvec[i] = ste; /* and insert the symbol at */
|
181
|
+
} /* the head of the bucket list */
|
182
|
+
for (i = size; --i >= 0; ) /* sort bucket lists according to */
|
183
|
+
tab->bvec[i] = _sort(tab->bvec[i]); /* the visibility level */
|
184
|
+
} /* _reorg() */
|
185
|
+
|
186
|
+
/*----------------------------------------------------------------------
|
187
|
+
Symbol Table Functions
|
188
|
+
----------------------------------------------------------------------*/
|
189
|
+
|
190
|
+
SYMTAB* st_create (int init, int max, HASHFN hash, SYMFN delfn)
|
191
|
+
{ /* --- create a symbol table */
|
192
|
+
SYMTAB *tab; /* created symbol table */
|
193
|
+
|
194
|
+
if (init <= 0) init = DFLT_INIT; /* check and adapt initial */
|
195
|
+
if (max <= 0) max = DFLT_MAX; /* and maximal vector size */
|
196
|
+
tab = (SYMTAB*)malloc(sizeof(SYMTAB));
|
197
|
+
if (!tab) return NULL; /* allocate symbol table body */
|
198
|
+
tab->bvec = (STE**)calloc(init, sizeof(STE*));
|
199
|
+
if (!tab->bvec) { free(tab); return NULL; }
|
200
|
+
tab->level = tab->cnt = 0; /* allocate bucket vector */
|
201
|
+
tab->size = init; /* and initialize fields */
|
202
|
+
tab->max = max; /* of symbol table body */
|
203
|
+
tab->hash = (hash) ? hash : _hdflt;
|
204
|
+
tab->delfn = delfn;
|
205
|
+
tab->vsz = INT_MAX;
|
206
|
+
tab->ids = NULL;
|
207
|
+
return tab; /* return created symbol table */
|
208
|
+
} /* st_create() */
|
209
|
+
|
210
|
+
/*--------------------------------------------------------------------*/
|
211
|
+
|
212
|
+
void ap_st_delete (SYMTAB *tab)
|
213
|
+
{ /* --- delete a symbol table */
|
214
|
+
assert(tab && tab->bvec); /* check argument */
|
215
|
+
_delsym(tab); /* delete all symbols, */
|
216
|
+
free(tab->bvec); /* the bucket vector, */
|
217
|
+
if (tab->ids) free(tab->ids); /* the identifier vector, */
|
218
|
+
free(tab); /* and the symbol table body */
|
219
|
+
} /* ap_st_delete() */
|
220
|
+
|
221
|
+
/*--------------------------------------------------------------------*/
|
222
|
+
|
223
|
+
void* ap_st_insert (SYMTAB *tab, const char *name, int type,
|
224
|
+
unsigned size)
|
225
|
+
{ /* --- insert a symbol */
|
226
|
+
unsigned h; /* hash value */
|
227
|
+
int i; /* index of hash bucket */
|
228
|
+
STE *ste; /* to traverse bucket list */
|
229
|
+
STE *nel; /* new symbol table element */
|
230
|
+
|
231
|
+
assert(tab && name /* check the function arguments */
|
232
|
+
&& ((size >= sizeof(int)) || (tab->vsz == INT_MAX)));
|
233
|
+
if ((tab->cnt /4 > tab->size) /* if buckets are rather full and */
|
234
|
+
&& (tab->size < tab->max)) /* table does not have maximal size, */
|
235
|
+
_reorg(tab); /* reorganize the hash table */
|
236
|
+
|
237
|
+
h = tab->hash(name, type); /* compute the hash value and */
|
238
|
+
i = h % tab->size; /* the index of the hash bucket */
|
239
|
+
ste = tab->bvec[i]; /* get first element in bucket */
|
240
|
+
while (ste) { /* traverse the bucket list */
|
241
|
+
if ((type == ste->type) /* if symbol found */
|
242
|
+
&& (strcmp(name, ste->name) == 0))
|
243
|
+
break; /* abort the loop */
|
244
|
+
ste = ste->succ; /* otherwise get the successor */
|
245
|
+
} /* element in the hash bucket */
|
246
|
+
if (ste /* if symbol found on current level */
|
247
|
+
&& (ste->level == tab->level))
|
248
|
+
return EXISTS; /* return 'symbol exists' */
|
249
|
+
|
250
|
+
#ifdef NIMAPFN /* if name/identifier map management */
|
251
|
+
if (tab->cnt >= tab->vsz) { /* if the identifier vector is full */
|
252
|
+
int vsz, **tmp; /* (new) id vector and its size */
|
253
|
+
vsz = tab->vsz +((tab->vsz > BLKSIZE) ? tab->vsz >> 1 : BLKSIZE);
|
254
|
+
tmp = (int**)realloc(tab->ids, vsz *sizeof(int*));
|
255
|
+
if (!tmp) return NULL; /* resize the identifier vector and */
|
256
|
+
tab->ids = tmp; tab->vsz = vsz; /* set new vector and its size */
|
257
|
+
} /* (no resizing for symbol tables */
|
258
|
+
#endif /* since then tab->vsz = MAX_INT) */
|
259
|
+
|
260
|
+
nel = (STE*)malloc(sizeof(STE) +size +strlen(name) +1);
|
261
|
+
if (!nel) return NULL; /* allocate memory for new symbol */
|
262
|
+
nel->name = (char*)(nel+1) +size; /* and organize it */
|
263
|
+
strcpy(nel->name, name); /* note the symbol name, */
|
264
|
+
nel->type = type; /* the symbol type, and the */
|
265
|
+
nel->level = tab->level; /* current visibility level */
|
266
|
+
nel->succ = tab->bvec[i]; /* insert new symbol at the head */
|
267
|
+
tab->bvec[i] = nel++; /* of the bucket list */
|
268
|
+
#ifdef NIMAPFN /* if name/identifier maps are */
|
269
|
+
if (tab->ids) { /* supported and this is such a map */
|
270
|
+
tab->ids[tab->cnt] = (int*)nel;
|
271
|
+
*(int*)nel = tab->cnt; /* store the new symbol */
|
272
|
+
} /* in the identifier vector */
|
273
|
+
#endif /* and set the symbol identifier */
|
274
|
+
tab->cnt++; /* increment the symbol counter */
|
275
|
+
return nel; /* return pointer to data field */
|
276
|
+
} /* ap_st_insert() */
|
277
|
+
|
278
|
+
/*--------------------------------------------------------------------*/
|
279
|
+
|
280
|
+
int st_remove (SYMTAB *tab, const char *name, int type)
|
281
|
+
{ /* --- remove a symbol/all symbols */
|
282
|
+
int i; /* index of hash bucket */
|
283
|
+
STE **p, *ste; /* to traverse bucket list */
|
284
|
+
|
285
|
+
assert(tab); /* check for a valid symbol table */
|
286
|
+
|
287
|
+
/* --- remove all symbols --- */
|
288
|
+
if (!name) { /* if no symbol name given */
|
289
|
+
_delsym(tab); /* delete all symbols */
|
290
|
+
tab->cnt = tab->level = 0; /* reset visibility level */
|
291
|
+
return 0; /* and symbol counter */
|
292
|
+
} /* and return 'ok' */
|
293
|
+
|
294
|
+
/* --- remove one symbol --- */
|
295
|
+
i = tab->hash(name, type) % tab->size;
|
296
|
+
p = tab->bvec +i; /* compute index of hash bucket */
|
297
|
+
while (*p) { /* and traverse bucket list */
|
298
|
+
if (((*p)->type == type) /* if symbol found */
|
299
|
+
&& (strcmp(name, (*p)->name) == 0))
|
300
|
+
break; /* abort loop */
|
301
|
+
p = &(*p)->succ; /* otherwise get successor */
|
302
|
+
} /* in hash bucket */
|
303
|
+
ste = *p; /* if the symbol does not exist, */
|
304
|
+
if (!ste) return -1; /* abort the function */
|
305
|
+
*p = ste->succ; /* remove symbol from hash bucket */
|
306
|
+
if (tab->delfn) tab->delfn(ste +1); /* delete user data */
|
307
|
+
free(ste); /* and symbol table element */
|
308
|
+
tab->cnt--; /* decrement symbol counter */
|
309
|
+
return 0; /* return 'ok' */
|
310
|
+
} /* st_remove() */
|
311
|
+
|
312
|
+
/*--------------------------------------------------------------------*/
|
313
|
+
/* The problem is: ruby core has a function 'st_lookup'. so now what? */
|
314
|
+
|
315
|
+
void* ap_st_lookup (SYMTAB *tab, const char *name, int type)
|
316
|
+
{ /* --- look up a symbol */
|
317
|
+
int i; /* index of hash bucket */
|
318
|
+
STE *ste; /* to traverse bucket list */
|
319
|
+
|
320
|
+
assert(tab && name); /* check arguments */
|
321
|
+
i = tab->hash(name, type) % tab->size;
|
322
|
+
ste = tab->bvec[i]; /* compute index of hash bucket */
|
323
|
+
while (ste) { /* and traverse bucket list */
|
324
|
+
if ((ste->type == type) /* if symbol found */
|
325
|
+
&& (strcmp(name, ste->name) == 0))
|
326
|
+
return ste +1; /* return pointer to assoc. data */
|
327
|
+
ste = ste->succ; /* otherwise get successor */
|
328
|
+
} /* in hash bucket */
|
329
|
+
return NULL; /* return 'not found' */
|
330
|
+
} /* ap_st_lookup() */
|
331
|
+
|
332
|
+
/*--------------------------------------------------------------------*/
|
333
|
+
|
334
|
+
void st_endblk (SYMTAB *tab)
|
335
|
+
{ /* --- remove one visibility level */
|
336
|
+
int i; /* loop variable */
|
337
|
+
STE *ste, *tmp; /* to traverse bucket lists */
|
338
|
+
|
339
|
+
assert(tab); /* check for a valid symbol table */
|
340
|
+
if (tab->level <= 0) return; /* if on level 0, abort */
|
341
|
+
for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
|
342
|
+
ste = tab->bvec[i]; /* get next bucket list */
|
343
|
+
while (ste /* and remove all symbols */
|
344
|
+
&& (ste->level >= tab->level)) { /* of higher level */
|
345
|
+
tmp = ste; /* note symbol and */
|
346
|
+
ste = ste->succ; /* get successor */
|
347
|
+
if (tab->delfn) tab->delfn(tmp +1);
|
348
|
+
free(tmp); /* delete user data and */
|
349
|
+
tab->cnt--; /* symbol table element */
|
350
|
+
} /* and decrement symbol counter */
|
351
|
+
tab->bvec[i] = ste; /* set new start of bucket list */
|
352
|
+
}
|
353
|
+
tab->level--; /* go up one level */
|
354
|
+
} /* st_endblk() */
|
355
|
+
|
356
|
+
/*--------------------------------------------------------------------*/
|
357
|
+
#ifndef NDEBUG
|
358
|
+
|
359
|
+
void st_stats (const SYMTAB *tab)
|
360
|
+
{ /* --- compute and print statistics */
|
361
|
+
const STE *ste; /* to traverse bucket lists */
|
362
|
+
int i; /* loop variable */
|
363
|
+
int used; /* number of used hash buckets */
|
364
|
+
int len; /* length of current bucket list */
|
365
|
+
int min, max; /* min. and max. bucket list length */
|
366
|
+
int cnts[10]; /* counter for bucket list lengths */
|
367
|
+
|
368
|
+
assert(tab); /* check for a valid symbol table */
|
369
|
+
min = INT_MAX; max = used = 0;/* initialize variables */
|
370
|
+
for (i = 10; --i >= 0; ) cnts[i] = 0;
|
371
|
+
for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
|
372
|
+
len = 0; /* determine bucket list length */
|
373
|
+
for (ste = tab->bvec[i]; ste; ste = ste->succ) len++;
|
374
|
+
if (len > 0) used++; /* count used hash buckets */
|
375
|
+
if (len < min) min = len; /* determine minimal and */
|
376
|
+
if (len > max) max = len; /* maximal list length */
|
377
|
+
cnts[(len >= 9) ? 9 : len]++;
|
378
|
+
} /* count list length */
|
379
|
+
printf("number of symbols : %d\n", tab->cnt);
|
380
|
+
printf("number of hash buckets: %d\n", tab->size);
|
381
|
+
printf("used hash buckets : %d\n", used);
|
382
|
+
printf("minimal list length : %d\n", min);
|
383
|
+
printf("maximal list length : %d\n", max);
|
384
|
+
printf("average list length : %g\n", (double)tab->cnt/tab->size);
|
385
|
+
printf("ditto, of used buckets: %g\n", (double)tab->cnt/used);
|
386
|
+
printf("length distribution :\n");
|
387
|
+
for (i = 0; i < 9; i++) printf("%3d ", i);
|
388
|
+
printf(" >8\n");
|
389
|
+
for (i = 0; i < 9; i++) printf("%3d ", cnts[i]);
|
390
|
+
printf("%3d\n", cnts[9]);
|
391
|
+
} /* st_stats() */
|
392
|
+
|
393
|
+
#endif
|
394
|
+
/*----------------------------------------------------------------------
|
395
|
+
Name/Identifier Map Functions
|
396
|
+
----------------------------------------------------------------------*/
|
397
|
+
#ifdef NIMAPFN
|
398
|
+
|
399
|
+
NIMAP* nim_create (int init, int max, HASHFN hash, SYMFN delfn)
|
400
|
+
{ /* --- create a name/identifier map */
|
401
|
+
NIMAP *nim; /* created name/identifier map */
|
402
|
+
|
403
|
+
nim = st_create(init, max, hash, delfn);
|
404
|
+
if (!nim) return NULL; /* create a name/identifier map */
|
405
|
+
nim->vsz = 0; /* and clear the id. vector size */
|
406
|
+
return nim; /* return created name/id map */
|
407
|
+
} /* nim_create() */
|
408
|
+
|
409
|
+
/*--------------------------------------------------------------------*/
|
410
|
+
|
411
|
+
void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
|
412
|
+
int *map, int dir)
|
413
|
+
{ /* --- sort name/identifier map */
|
414
|
+
int i; /* loop variable */
|
415
|
+
int **p; /* to traverse the value vector */
|
416
|
+
|
417
|
+
assert(nim && cmpfn); /* check the function arguments */
|
418
|
+
v_sort(nim->ids, nim->cnt, cmpfn, data);
|
419
|
+
if (!map) { /* if no conversion map is requested */
|
420
|
+
for (p = nim->ids +(i = nim->cnt); --i >= 0; )
|
421
|
+
**--p = i; } /* just set new identifiers */
|
422
|
+
else { /* if a conversion map is requested, */
|
423
|
+
p = nim->ids +(i = nim->cnt); /* traverse the sorted vector */
|
424
|
+
if (dir < 0) /* if backward map (i.e. new -> old) */
|
425
|
+
while (--i >= 0) { map[i] = **--p; **p = i; }
|
426
|
+
else /* if forward map (i.e. old -> new) */
|
427
|
+
while (--i >= 0) { map[**--p] = i; **p = i; }
|
428
|
+
} /* (build conversion map) */
|
429
|
+
} /* nim_sort() */
|
430
|
+
|
431
|
+
/*--------------------------------------------------------------------*/
|
432
|
+
|
433
|
+
void nim_trunc (NIMAP *nim, int n)
|
434
|
+
{ /* --- truncate name/identifier map */
|
435
|
+
int *id; /* to access the identifiers */
|
436
|
+
|
437
|
+
while (nim->cnt > n) { /* while to remove mappings */
|
438
|
+
id = nim->ids[nim->cnt -1]; /* get the identifier object */
|
439
|
+
st_remove(nim, st_name(id), 0);
|
440
|
+
} /* remove the symbol table element */
|
441
|
+
} /* nim_trunc() */
|
442
|
+
|
443
|
+
#endif
|