apriori 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,111 @@
1
+ /*----------------------------------------------------------------------
2
+ File : scan.h
3
+ Contents: scanner (lexical analysis of a character stream)
4
+ Author : Christian Borgelt
5
+ History : 1996.01.16 file created
6
+ 1996.02.21 definition of BUFSIZE made global
7
+ 1996.03.17 special tokens for keywords removed
8
+ 1998.02.08 recover and error message functions added
9
+ 1998.02.13 token T_RGT (right arrow '->') added
10
+ 1998.03.04 definitions of T_ID and T_NUM exchanged
11
+ 1998.04.17 token T_LFT (left arrow '<-') added
12
+ 1998.05.27 token T_CMP (two char comparison operator) added
13
+ 1998.05.31 field f removed from struct TOKVAL
14
+ 2000.11.23 functions sc_len and sc_form added
15
+ 2001.07.15 scanner made an object
16
+ 2006.02.02 token T_DASH (undirected edge '--') added
17
+ ----------------------------------------------------------------------*/
18
+ #ifndef __SCAN__
19
+ #define __SCAN__
20
+ #include <stdio.h>
21
+
22
+ /*----------------------------------------------------------------------
23
+ Preprocessor Definitions
24
+ ----------------------------------------------------------------------*/
25
+ /* --- sizes --- */
26
+ #define SC_BUFSIZE 1024 /* size of scan buffer */
27
+
28
+ #ifdef SC_SCAN
29
+ /* --- tokens --- */
30
+ #define T_EOF 256 /* end of file */
31
+ #define T_NUM 257 /* number (floating point) */
32
+ #define T_ID 258 /* identifier or string */
33
+ #define T_RGT 259 /* right arrow '->' */
34
+ #define T_LFT 260 /* left arrow '<-' */
35
+ #define T_DASH 261 /* dash '--' */
36
+ #define T_CMP 262 /* two char. comparison, e.g. '<=' */
37
+
38
+ /* --- error codes --- */
39
+ #define E_NONE 0 /* no error */
40
+ #define E_NOMEM (-1) /* not enough memory */
41
+ #define E_FOPEN (-2) /* file open failed */
42
+ #define E_FREAD (-3) /* file read failed */
43
+ #define E_FWRITE (-4) /* file write failed (unused) */
44
+ #define E_CHAR (-5) /* invalid character */
45
+ #define E_BUFOVF (-6) /* scan buffer overflow */
46
+ #define E_UNTSTR (-7) /* unterminated string */
47
+ #define E_UNTCOM (-8) /* unterminated comment */
48
+ #define E_STATE (-9) /* invalid scanner state */
49
+ #define E_GARBAGE (-10) /* garbage at end of file */
50
+
51
+ #endif
52
+ /*----------------------------------------------------------------------
53
+ Type Definitions
54
+ ----------------------------------------------------------------------*/
55
+ #ifdef SC_SCAN
56
+ typedef struct { /* --- scanner --- */
57
+ FILE *file; /* file to scan */
58
+ int start; /* start line of comment */
59
+ int line, pline; /* input line of file */
60
+ int token, ptoken; /* token */
61
+ int len, plen; /* length of token value */
62
+ int back; /* flag for backward step */
63
+ char *value; /* token value */
64
+ char buf[2][SC_BUFSIZE]; /* scan buffers */
65
+ FILE *errfile; /* error output stream */
66
+ int lncnt; /* error message line count */
67
+ int msgcnt; /* number of add. error messages */
68
+ const char **msgs; /* additional error messages */
69
+ char fname[1]; /* name of file to scan */
70
+ } SCAN; /* (scanner) */
71
+ #endif
72
+
73
+ /*----------------------------------------------------------------------
74
+ Functions
75
+ ----------------------------------------------------------------------*/
76
+ extern int sc_fmtlen (const char *s, int *len);
77
+ extern int sc_format (char *dst, const char *src, int quotes);
78
+
79
+ #ifdef SC_SCAN
80
+ extern SCAN* sc_create (const char *fname);
81
+ extern void sc_delete (SCAN *scan);
82
+ extern const char* sc_fname (SCAN *scan);
83
+ extern int sc_line (SCAN *scan);
84
+
85
+ extern int sc_next (SCAN *scan);
86
+ extern int sc_nexter (SCAN *scan);
87
+ extern int sc_back (SCAN *scan);
88
+
89
+ extern int sc_token (SCAN *scan);
90
+ extern const char* sc_value (SCAN *scan);
91
+ extern int sc_len (SCAN *scan);
92
+ extern int sc_eof (SCAN *scan);
93
+
94
+ extern int sc_recover (SCAN *scan,
95
+ int stop, int beg, int end, int level);
96
+ extern void sc_errfile (SCAN *scan, FILE *file, int lncnt);
97
+ extern void sc_errmsgs (SCAN *scan, const char *msgs[], int cnt);
98
+ extern int sc_error (SCAN *scan, int code, ...);
99
+
100
+ /*----------------------------------------------------------------------
101
+ Preprocessor Definitions
102
+ ----------------------------------------------------------------------*/
103
+ #define sc_fname(s) ((const char*)(s)->fname)
104
+ #define sc_line(s) ((s)->line)
105
+
106
+ #define sc_token(s) ((s)->token)
107
+ #define sc_value(s) ((const char*)(s)->value)
108
+ #define sc_len(s) ((s)->len)
109
+
110
+ #endif /* #ifdef SC_SCAN */
111
+ #endif
@@ -0,0 +1,443 @@
1
+ /*----------------------------------------------------------------------
2
+ File : symtab.c
3
+ Contents: symbol table management
4
+ Author : Christian Borgelt
5
+ History : 1995.10.22 file created
6
+ 1995.10.30 functions made independent of symbol data
7
+ 1995.11.26 symbol types and visibility levels added
8
+ 1996.01.04 st_clear added
9
+ 1996.02.27 st_insert modified
10
+ 1996.06.28 dynamic bucket vector enlargement added
11
+ 1996.07.04 bug in bucket reorganization removed
12
+ 1997.04.01 functions st_clear and st_remove combined
13
+ 1997.07.29 minor improvements
14
+ 1997.08.05 minor improvements
15
+ 1997.11.16 some comments improved
16
+ 1998.02.06 default table sizes changed
17
+ 1998.05.31 list of all symbols removed
18
+ 1998.06.20 deletion function moved to st_create
19
+ 1998.07.14 minor improvements
20
+ 1998.09.01 bug in function _sort removed, assertions added
21
+ 1998.09.25 hash function improved
22
+ 1998.09.28 types ULONG and CCHAR removed, st_stats added
23
+ 1999.02.04 long int changed to int
24
+ 1999.11.10 name/identifier map management added
25
+ 2003.08.15 renamed new to nel in st_insert (C++ compat.)
26
+ 2004.12.15 function nim_trunc added
27
+ 2004.12.28 bug in function nim_trunc fixed
28
+ ----------------------------------------------------------------------*/
29
+ #include <stdio.h>
30
+ #include <stdlib.h>
31
+ #include <string.h>
32
+ #include <limits.h>
33
+ #include <assert.h>
34
+ #include "symtab.h"
35
+ #ifdef NIMAPFN
36
+ #include "vecops.h"
37
+ #endif
38
+ #ifdef STORAGE
39
+ #include "storage.h"
40
+ #endif
41
+
42
+ /*----------------------------------------------------------------------
43
+ Preprocessor Definitions
44
+ ----------------------------------------------------------------------*/
45
+ #define DFLT_INIT 1023 /* default initial hash table size */
46
+ #if (INT_MAX > 32767)
47
+ #define DFLT_MAX 1048575 /* default maximal hash table size */
48
+ #else
49
+ #define DFLT_MAX 16383 /* default maximal hash table size */
50
+ #endif
51
+ #define BLKSIZE 256 /* block size for identifier vector */
52
+
53
+ /*----------------------------------------------------------------------
54
+ Default Hash Function
55
+ ----------------------------------------------------------------------*/
56
+
57
+ static unsigned _hdflt (const char *name, int type)
58
+ { /* --- default hash function */
59
+ register unsigned h = type; /* hash value */
60
+
61
+ while (*name) h ^= (h << 3) ^ (unsigned)(*name++);
62
+ return h; /* compute hash value */
63
+ } /* _hdflt() */
64
+
65
+ /*----------------------------------------------------------------------
66
+ Auxiliary Functions
67
+ ----------------------------------------------------------------------*/
68
+
69
+ static void _delsym (SYMTAB *tab)
70
+ { /* --- delete all symbols */
71
+ int i; /* loop variable */
72
+ STE *ste, *tmp; /* to traverse the symbol list */
73
+
74
+ for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
75
+ ste = tab->bvec[i]; /* get the next bucket list, */
76
+ tab->bvec[i] = NULL; /* clear the bucket vector entry, */
77
+ while (ste) { /* and traverse the bucket list */
78
+ tmp = ste; /* note the symbol to delete */
79
+ ste = ste->succ; /* and get the next symbol */
80
+ if (tab->delfn) tab->delfn(tmp +1);
81
+ free(tmp); /* if a deletion function is given, */
82
+ } /* call it and then deallocate */
83
+ } /* the symbol table element */
84
+ } /* _delsym() */
85
+
86
+ /*--------------------------------------------------------------------*/
87
+
88
+ static STE** _merge (STE *in[], int cnt[], STE **out)
89
+ { /* --- merge two lists into one */
90
+ int k; /* index of input list */
91
+
92
+ do { /* compare and merge loop */
93
+ k = (in[0]->level > in[1]->level) ? 0 : 1;
94
+ *out = in[k]; /* append the element on the higher */
95
+ out = &(*out)->succ; /* level to the output list and */
96
+ in[k] = *out; /* remove it from the input list */
97
+ } while (--cnt[k] > 0); /* while both lists are not empty */
98
+ *out = in[k ^= 1]; /* append remaining elements */
99
+ while (--cnt[k] >= 0) /* while not at the end of the list */
100
+ out = &(*out)->succ; /* go to the successor element */
101
+ in[k] = *out; /* set new start of the input list */
102
+ *out = NULL; /* terminate the output list and */
103
+ return out; /* return new end of the output list */
104
+ } /* _merge() */
105
+
106
+ /*--------------------------------------------------------------------*/
107
+
108
+ static STE* _sort (STE *list)
109
+ { /* --- sort a hash bucket list */
110
+ STE *ste; /* to traverse the list, buffer */
111
+ STE *in[2], *out[2]; /* input and output lists */
112
+ STE **end[2]; /* ends of output lists */
113
+ int cnt[2]; /* number of elements to merge */
114
+ int run; /* run length in input lists */
115
+ int rem; /* elements in remainder collection */
116
+ int oid; /* index of output list */
117
+
118
+ if (!list) return list; /* empty lists need not to be sorted */
119
+ oid = 0; out[0] = list; /* traverse list elements */
120
+ for (ste = list->succ; ste; ste = ste->succ)
121
+ if ((oid ^= 1) == 0) list = list->succ;
122
+ out[1] = list->succ; /* split list into two equal parts */
123
+ list = list->succ = NULL; /* initialize remainder collection */
124
+ run = 1; rem = 0; /* and run length */
125
+ while (out[1]) { /* while there are two lists */
126
+ in [0] = out[0]; in [1] = out[1]; /* move output list to input */
127
+ end[0] = out; end[1] = out+1; /* reinitialize end pointers */
128
+ out[1] = NULL; oid = 0; /* start with 1st output list */
129
+ do { /* merge loop */
130
+ cnt[0] = cnt[1] = run; /* merge run elements from the */
131
+ end[oid] = _merge(in, cnt, end[oid]); /* input lists */
132
+ oid ^= 1; /* toggle index of output list */
133
+ } while (in[1]); /* while both lists are not empty */
134
+ if (in[0]) { /* if there is one input list left */
135
+ if (!list) /* if there is no rem. collection, */
136
+ list = in[0]; /* just note the rem. input list */
137
+ else { /* if there is a rem. collection, */
138
+ cnt[0] = run; cnt[1] = rem; in[1] = list;
139
+ _merge(in, cnt, &list); /* merge it and the input list to */
140
+ } /* get the new renmainder collection */
141
+ rem += run; /* there are now run more elements */
142
+ } /* in the remainder collection */
143
+ run <<= 1; /* double run length */
144
+ } /* while (out[1]) .. */
145
+ if (rem > 0) { /* if there is a rem. collection */
146
+ in[0] = out[0]; cnt[0] = run;
147
+ in[1] = list; cnt[1] = rem;
148
+ _merge(in, cnt, out); /* merge it to the output list */
149
+ } /* and store the result in out[0] */
150
+ return out[0]; /* return the sorted list */
151
+ } /* _sort() */
152
+
153
+ /*--------------------------------------------------------------------*/
154
+
155
+ static void _reorg (SYMTAB *tab)
156
+ { /* --- reorganize a hash table */
157
+ int i; /* loop variable */
158
+ int size; /* new bucket vector size */
159
+ STE **p; /* new bucket vector, buffer */
160
+ STE *ste; /* to traverse symbol table elements */
161
+ STE *list = NULL; /* list of all symbols */
162
+
163
+ size = (tab->size << 1) +1; /* calculate new vector size */
164
+ if (size > tab->max) /* if new size exceeds maximum, */
165
+ size = tab->max; /* set the maximal size */
166
+ for (p = &list, i = tab->size; --i >= 0; ) {
167
+ *p = tab->bvec[i]; /* traverse the bucket vector and */
168
+ while (*p) p = &(*p)->succ; /* link all bucket lists together */
169
+ } /* (collect symbols) */
170
+ p = (STE**)realloc(tab->bvec, size *sizeof(STE*));
171
+ if (!p) return; /* enlarge bucket vector */
172
+ tab->bvec = p; /* set new bucket vector */
173
+ tab->size = size; /* and its size */
174
+ for (p += i = size; --i >= 0; )
175
+ *--p = NULL; /* clear the hash buckets */
176
+ while (list) { /* traverse list of all symbols */
177
+ ste = list; list = list->succ; /* get next symbol */
178
+ i = tab->hash(ste->name, ste->type) %size;
179
+ ste->succ = tab->bvec[i]; /* compute the hash bucket index */
180
+ tab->bvec[i] = ste; /* and insert the symbol at */
181
+ } /* the head of the bucket list */
182
+ for (i = size; --i >= 0; ) /* sort bucket lists according to */
183
+ tab->bvec[i] = _sort(tab->bvec[i]); /* the visibility level */
184
+ } /* _reorg() */
185
+
186
+ /*----------------------------------------------------------------------
187
+ Symbol Table Functions
188
+ ----------------------------------------------------------------------*/
189
+
190
+ SYMTAB* st_create (int init, int max, HASHFN hash, SYMFN delfn)
191
+ { /* --- create a symbol table */
192
+ SYMTAB *tab; /* created symbol table */
193
+
194
+ if (init <= 0) init = DFLT_INIT; /* check and adapt initial */
195
+ if (max <= 0) max = DFLT_MAX; /* and maximal vector size */
196
+ tab = (SYMTAB*)malloc(sizeof(SYMTAB));
197
+ if (!tab) return NULL; /* allocate symbol table body */
198
+ tab->bvec = (STE**)calloc(init, sizeof(STE*));
199
+ if (!tab->bvec) { free(tab); return NULL; }
200
+ tab->level = tab->cnt = 0; /* allocate bucket vector */
201
+ tab->size = init; /* and initialize fields */
202
+ tab->max = max; /* of symbol table body */
203
+ tab->hash = (hash) ? hash : _hdflt;
204
+ tab->delfn = delfn;
205
+ tab->vsz = INT_MAX;
206
+ tab->ids = NULL;
207
+ return tab; /* return created symbol table */
208
+ } /* st_create() */
209
+
210
+ /*--------------------------------------------------------------------*/
211
+
212
+ void ap_st_delete (SYMTAB *tab)
213
+ { /* --- delete a symbol table */
214
+ assert(tab && tab->bvec); /* check argument */
215
+ _delsym(tab); /* delete all symbols, */
216
+ free(tab->bvec); /* the bucket vector, */
217
+ if (tab->ids) free(tab->ids); /* the identifier vector, */
218
+ free(tab); /* and the symbol table body */
219
+ } /* ap_st_delete() */
220
+
221
+ /*--------------------------------------------------------------------*/
222
+
223
+ void* ap_st_insert (SYMTAB *tab, const char *name, int type,
224
+ unsigned size)
225
+ { /* --- insert a symbol */
226
+ unsigned h; /* hash value */
227
+ int i; /* index of hash bucket */
228
+ STE *ste; /* to traverse bucket list */
229
+ STE *nel; /* new symbol table element */
230
+
231
+ assert(tab && name /* check the function arguments */
232
+ && ((size >= sizeof(int)) || (tab->vsz == INT_MAX)));
233
+ if ((tab->cnt /4 > tab->size) /* if buckets are rather full and */
234
+ && (tab->size < tab->max)) /* table does not have maximal size, */
235
+ _reorg(tab); /* reorganize the hash table */
236
+
237
+ h = tab->hash(name, type); /* compute the hash value and */
238
+ i = h % tab->size; /* the index of the hash bucket */
239
+ ste = tab->bvec[i]; /* get first element in bucket */
240
+ while (ste) { /* traverse the bucket list */
241
+ if ((type == ste->type) /* if symbol found */
242
+ && (strcmp(name, ste->name) == 0))
243
+ break; /* abort the loop */
244
+ ste = ste->succ; /* otherwise get the successor */
245
+ } /* element in the hash bucket */
246
+ if (ste /* if symbol found on current level */
247
+ && (ste->level == tab->level))
248
+ return EXISTS; /* return 'symbol exists' */
249
+
250
+ #ifdef NIMAPFN /* if name/identifier map management */
251
+ if (tab->cnt >= tab->vsz) { /* if the identifier vector is full */
252
+ int vsz, **tmp; /* (new) id vector and its size */
253
+ vsz = tab->vsz +((tab->vsz > BLKSIZE) ? tab->vsz >> 1 : BLKSIZE);
254
+ tmp = (int**)realloc(tab->ids, vsz *sizeof(int*));
255
+ if (!tmp) return NULL; /* resize the identifier vector and */
256
+ tab->ids = tmp; tab->vsz = vsz; /* set new vector and its size */
257
+ } /* (no resizing for symbol tables */
258
+ #endif /* since then tab->vsz = MAX_INT) */
259
+
260
+ nel = (STE*)malloc(sizeof(STE) +size +strlen(name) +1);
261
+ if (!nel) return NULL; /* allocate memory for new symbol */
262
+ nel->name = (char*)(nel+1) +size; /* and organize it */
263
+ strcpy(nel->name, name); /* note the symbol name, */
264
+ nel->type = type; /* the symbol type, and the */
265
+ nel->level = tab->level; /* current visibility level */
266
+ nel->succ = tab->bvec[i]; /* insert new symbol at the head */
267
+ tab->bvec[i] = nel++; /* of the bucket list */
268
+ #ifdef NIMAPFN /* if name/identifier maps are */
269
+ if (tab->ids) { /* supported and this is such a map */
270
+ tab->ids[tab->cnt] = (int*)nel;
271
+ *(int*)nel = tab->cnt; /* store the new symbol */
272
+ } /* in the identifier vector */
273
+ #endif /* and set the symbol identifier */
274
+ tab->cnt++; /* increment the symbol counter */
275
+ return nel; /* return pointer to data field */
276
+ } /* ap_st_insert() */
277
+
278
+ /*--------------------------------------------------------------------*/
279
+
280
+ int st_remove (SYMTAB *tab, const char *name, int type)
281
+ { /* --- remove a symbol/all symbols */
282
+ int i; /* index of hash bucket */
283
+ STE **p, *ste; /* to traverse bucket list */
284
+
285
+ assert(tab); /* check for a valid symbol table */
286
+
287
+ /* --- remove all symbols --- */
288
+ if (!name) { /* if no symbol name given */
289
+ _delsym(tab); /* delete all symbols */
290
+ tab->cnt = tab->level = 0; /* reset visibility level */
291
+ return 0; /* and symbol counter */
292
+ } /* and return 'ok' */
293
+
294
+ /* --- remove one symbol --- */
295
+ i = tab->hash(name, type) % tab->size;
296
+ p = tab->bvec +i; /* compute index of hash bucket */
297
+ while (*p) { /* and traverse bucket list */
298
+ if (((*p)->type == type) /* if symbol found */
299
+ && (strcmp(name, (*p)->name) == 0))
300
+ break; /* abort loop */
301
+ p = &(*p)->succ; /* otherwise get successor */
302
+ } /* in hash bucket */
303
+ ste = *p; /* if the symbol does not exist, */
304
+ if (!ste) return -1; /* abort the function */
305
+ *p = ste->succ; /* remove symbol from hash bucket */
306
+ if (tab->delfn) tab->delfn(ste +1); /* delete user data */
307
+ free(ste); /* and symbol table element */
308
+ tab->cnt--; /* decrement symbol counter */
309
+ return 0; /* return 'ok' */
310
+ } /* st_remove() */
311
+
312
+ /*--------------------------------------------------------------------*/
313
+ /* The problem is: ruby core has a function 'st_lookup'. so now what? */
314
+
315
+ void* ap_st_lookup (SYMTAB *tab, const char *name, int type)
316
+ { /* --- look up a symbol */
317
+ int i; /* index of hash bucket */
318
+ STE *ste; /* to traverse bucket list */
319
+
320
+ assert(tab && name); /* check arguments */
321
+ i = tab->hash(name, type) % tab->size;
322
+ ste = tab->bvec[i]; /* compute index of hash bucket */
323
+ while (ste) { /* and traverse bucket list */
324
+ if ((ste->type == type) /* if symbol found */
325
+ && (strcmp(name, ste->name) == 0))
326
+ return ste +1; /* return pointer to assoc. data */
327
+ ste = ste->succ; /* otherwise get successor */
328
+ } /* in hash bucket */
329
+ return NULL; /* return 'not found' */
330
+ } /* ap_st_lookup() */
331
+
332
+ /*--------------------------------------------------------------------*/
333
+
334
+ void st_endblk (SYMTAB *tab)
335
+ { /* --- remove one visibility level */
336
+ int i; /* loop variable */
337
+ STE *ste, *tmp; /* to traverse bucket lists */
338
+
339
+ assert(tab); /* check for a valid symbol table */
340
+ if (tab->level <= 0) return; /* if on level 0, abort */
341
+ for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
342
+ ste = tab->bvec[i]; /* get next bucket list */
343
+ while (ste /* and remove all symbols */
344
+ && (ste->level >= tab->level)) { /* of higher level */
345
+ tmp = ste; /* note symbol and */
346
+ ste = ste->succ; /* get successor */
347
+ if (tab->delfn) tab->delfn(tmp +1);
348
+ free(tmp); /* delete user data and */
349
+ tab->cnt--; /* symbol table element */
350
+ } /* and decrement symbol counter */
351
+ tab->bvec[i] = ste; /* set new start of bucket list */
352
+ }
353
+ tab->level--; /* go up one level */
354
+ } /* st_endblk() */
355
+
356
+ /*--------------------------------------------------------------------*/
357
+ #ifndef NDEBUG
358
+
359
+ void st_stats (const SYMTAB *tab)
360
+ { /* --- compute and print statistics */
361
+ const STE *ste; /* to traverse bucket lists */
362
+ int i; /* loop variable */
363
+ int used; /* number of used hash buckets */
364
+ int len; /* length of current bucket list */
365
+ int min, max; /* min. and max. bucket list length */
366
+ int cnts[10]; /* counter for bucket list lengths */
367
+
368
+ assert(tab); /* check for a valid symbol table */
369
+ min = INT_MAX; max = used = 0;/* initialize variables */
370
+ for (i = 10; --i >= 0; ) cnts[i] = 0;
371
+ for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
372
+ len = 0; /* determine bucket list length */
373
+ for (ste = tab->bvec[i]; ste; ste = ste->succ) len++;
374
+ if (len > 0) used++; /* count used hash buckets */
375
+ if (len < min) min = len; /* determine minimal and */
376
+ if (len > max) max = len; /* maximal list length */
377
+ cnts[(len >= 9) ? 9 : len]++;
378
+ } /* count list length */
379
+ printf("number of symbols : %d\n", tab->cnt);
380
+ printf("number of hash buckets: %d\n", tab->size);
381
+ printf("used hash buckets : %d\n", used);
382
+ printf("minimal list length : %d\n", min);
383
+ printf("maximal list length : %d\n", max);
384
+ printf("average list length : %g\n", (double)tab->cnt/tab->size);
385
+ printf("ditto, of used buckets: %g\n", (double)tab->cnt/used);
386
+ printf("length distribution :\n");
387
+ for (i = 0; i < 9; i++) printf("%3d ", i);
388
+ printf(" >8\n");
389
+ for (i = 0; i < 9; i++) printf("%3d ", cnts[i]);
390
+ printf("%3d\n", cnts[9]);
391
+ } /* st_stats() */
392
+
393
+ #endif
394
+ /*----------------------------------------------------------------------
395
+ Name/Identifier Map Functions
396
+ ----------------------------------------------------------------------*/
397
+ #ifdef NIMAPFN
398
+
399
+ NIMAP* nim_create (int init, int max, HASHFN hash, SYMFN delfn)
400
+ { /* --- create a name/identifier map */
401
+ NIMAP *nim; /* created name/identifier map */
402
+
403
+ nim = st_create(init, max, hash, delfn);
404
+ if (!nim) return NULL; /* create a name/identifier map */
405
+ nim->vsz = 0; /* and clear the id. vector size */
406
+ return nim; /* return created name/id map */
407
+ } /* nim_create() */
408
+
409
+ /*--------------------------------------------------------------------*/
410
+
411
+ void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
412
+ int *map, int dir)
413
+ { /* --- sort name/identifier map */
414
+ int i; /* loop variable */
415
+ int **p; /* to traverse the value vector */
416
+
417
+ assert(nim && cmpfn); /* check the function arguments */
418
+ v_sort(nim->ids, nim->cnt, cmpfn, data);
419
+ if (!map) { /* if no conversion map is requested */
420
+ for (p = nim->ids +(i = nim->cnt); --i >= 0; )
421
+ **--p = i; } /* just set new identifiers */
422
+ else { /* if a conversion map is requested, */
423
+ p = nim->ids +(i = nim->cnt); /* traverse the sorted vector */
424
+ if (dir < 0) /* if backward map (i.e. new -> old) */
425
+ while (--i >= 0) { map[i] = **--p; **p = i; }
426
+ else /* if forward map (i.e. old -> new) */
427
+ while (--i >= 0) { map[**--p] = i; **p = i; }
428
+ } /* (build conversion map) */
429
+ } /* nim_sort() */
430
+
431
+ /*--------------------------------------------------------------------*/
432
+
433
+ void nim_trunc (NIMAP *nim, int n)
434
+ { /* --- truncate name/identifier map */
435
+ int *id; /* to access the identifiers */
436
+
437
+ while (nim->cnt > n) { /* while to remove mappings */
438
+ id = nim->ids[nim->cnt -1]; /* get the identifier object */
439
+ st_remove(nim, st_name(id), 0);
440
+ } /* remove the symbol table element */
441
+ } /* nim_trunc() */
442
+
443
+ #endif