apriori 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,111 @@
1
+ /*----------------------------------------------------------------------
2
+ File : scan.h
3
+ Contents: scanner (lexical analysis of a character stream)
4
+ Author : Christian Borgelt
5
+ History : 1996.01.16 file created
6
+ 1996.02.21 definition of BUFSIZE made global
7
+ 1996.03.17 special tokens for keywords removed
8
+ 1998.02.08 recover and error message functions added
9
+ 1998.02.13 token T_RGT (right arrow '->') added
10
+ 1998.03.04 definitions of T_ID and T_NUM exchanged
11
+ 1998.04.17 token T_LFT (left arrow '<-') added
12
+ 1998.05.27 token T_CMP (two char comparison operator) added
13
+ 1998.05.31 field f removed from struct TOKVAL
14
+ 2000.11.23 functions sc_len and sc_form added
15
+ 2001.07.15 scanner made an object
16
+ 2006.02.02 token T_DASH (undirected edge '--') added
17
+ ----------------------------------------------------------------------*/
18
+ #ifndef __SCAN__
19
+ #define __SCAN__
20
+ #include <stdio.h>
21
+
22
+ /*----------------------------------------------------------------------
23
+ Preprocessor Definitions
24
+ ----------------------------------------------------------------------*/
25
+ /* --- sizes --- */
26
+ #define SC_BUFSIZE 1024 /* size of scan buffer */
27
+
28
+ #ifdef SC_SCAN
29
+ /* --- tokens --- */
30
+ #define T_EOF 256 /* end of file */
31
+ #define T_NUM 257 /* number (floating point) */
32
+ #define T_ID 258 /* identifier or string */
33
+ #define T_RGT 259 /* right arrow '->' */
34
+ #define T_LFT 260 /* left arrow '<-' */
35
+ #define T_DASH 261 /* dash '--' */
36
+ #define T_CMP 262 /* two char. comparison, e.g. '<=' */
37
+
38
+ /* --- error codes --- */
39
+ #define E_NONE 0 /* no error */
40
+ #define E_NOMEM (-1) /* not enough memory */
41
+ #define E_FOPEN (-2) /* file open failed */
42
+ #define E_FREAD (-3) /* file read failed */
43
+ #define E_FWRITE (-4) /* file write failed (unused) */
44
+ #define E_CHAR (-5) /* invalid character */
45
+ #define E_BUFOVF (-6) /* scan buffer overflow */
46
+ #define E_UNTSTR (-7) /* unterminated string */
47
+ #define E_UNTCOM (-8) /* unterminated comment */
48
+ #define E_STATE (-9) /* invalid scanner state */
49
+ #define E_GARBAGE (-10) /* garbage at end of file */
50
+
51
+ #endif
52
+ /*----------------------------------------------------------------------
53
+ Type Definitions
54
+ ----------------------------------------------------------------------*/
55
+ #ifdef SC_SCAN
56
+ typedef struct { /* --- scanner --- */
57
+ FILE *file; /* file to scan */
58
+ int start; /* start line of comment */
59
+ int line, pline; /* input line of file */
60
+ int token, ptoken; /* token */
61
+ int len, plen; /* length of token value */
62
+ int back; /* flag for backward step */
63
+ char *value; /* token value */
64
+ char buf[2][SC_BUFSIZE]; /* scan buffers */
65
+ FILE *errfile; /* error output stream */
66
+ int lncnt; /* error message line count */
67
+ int msgcnt; /* number of add. error messages */
68
+ const char **msgs; /* additional error messages */
69
+ char fname[1]; /* name of file to scan */
70
+ } SCAN; /* (scanner) */
71
+ #endif
72
+
73
+ /*----------------------------------------------------------------------
74
+ Functions
75
+ ----------------------------------------------------------------------*/
76
+ extern int sc_fmtlen (const char *s, int *len);
77
+ extern int sc_format (char *dst, const char *src, int quotes);
78
+
79
+ #ifdef SC_SCAN
80
+ extern SCAN* sc_create (const char *fname);
81
+ extern void sc_delete (SCAN *scan);
82
+ extern const char* sc_fname (SCAN *scan);
83
+ extern int sc_line (SCAN *scan);
84
+
85
+ extern int sc_next (SCAN *scan);
86
+ extern int sc_nexter (SCAN *scan);
87
+ extern int sc_back (SCAN *scan);
88
+
89
+ extern int sc_token (SCAN *scan);
90
+ extern const char* sc_value (SCAN *scan);
91
+ extern int sc_len (SCAN *scan);
92
+ extern int sc_eof (SCAN *scan);
93
+
94
+ extern int sc_recover (SCAN *scan,
95
+ int stop, int beg, int end, int level);
96
+ extern void sc_errfile (SCAN *scan, FILE *file, int lncnt);
97
+ extern void sc_errmsgs (SCAN *scan, const char *msgs[], int cnt);
98
+ extern int sc_error (SCAN *scan, int code, ...);
99
+
100
+ /*----------------------------------------------------------------------
101
+ Preprocessor Definitions
102
+ ----------------------------------------------------------------------*/
103
+ #define sc_fname(s) ((const char*)(s)->fname)
104
+ #define sc_line(s) ((s)->line)
105
+
106
+ #define sc_token(s) ((s)->token)
107
+ #define sc_value(s) ((const char*)(s)->value)
108
+ #define sc_len(s) ((s)->len)
109
+
110
+ #endif /* #ifdef SC_SCAN */
111
+ #endif
@@ -0,0 +1,443 @@
1
+ /*----------------------------------------------------------------------
2
+ File : symtab.c
3
+ Contents: symbol table management
4
+ Author : Christian Borgelt
5
+ History : 1995.10.22 file created
6
+ 1995.10.30 functions made independent of symbol data
7
+ 1995.11.26 symbol types and visibility levels added
8
+ 1996.01.04 st_clear added
9
+ 1996.02.27 st_insert modified
10
+ 1996.06.28 dynamic bucket vector enlargement added
11
+ 1996.07.04 bug in bucket reorganization removed
12
+ 1997.04.01 functions st_clear and st_remove combined
13
+ 1997.07.29 minor improvements
14
+ 1997.08.05 minor improvements
15
+ 1997.11.16 some comments improved
16
+ 1998.02.06 default table sizes changed
17
+ 1998.05.31 list of all symbols removed
18
+ 1998.06.20 deletion function moved to st_create
19
+ 1998.07.14 minor improvements
20
+ 1998.09.01 bug in function _sort removed, assertions added
21
+ 1998.09.25 hash function improved
22
+ 1998.09.28 types ULONG and CCHAR removed, st_stats added
23
+ 1999.02.04 long int changed to int
24
+ 1999.11.10 name/identifier map management added
25
+ 2003.08.15 renamed new to nel in st_insert (C++ compat.)
26
+ 2004.12.15 function nim_trunc added
27
+ 2004.12.28 bug in function nim_trunc fixed
28
+ ----------------------------------------------------------------------*/
29
+ #include <stdio.h>
30
+ #include <stdlib.h>
31
+ #include <string.h>
32
+ #include <limits.h>
33
+ #include <assert.h>
34
+ #include "symtab.h"
35
+ #ifdef NIMAPFN
36
+ #include "vecops.h"
37
+ #endif
38
+ #ifdef STORAGE
39
+ #include "storage.h"
40
+ #endif
41
+
42
+ /*----------------------------------------------------------------------
43
+ Preprocessor Definitions
44
+ ----------------------------------------------------------------------*/
45
+ #define DFLT_INIT 1023 /* default initial hash table size */
46
+ #if (INT_MAX > 32767)
47
+ #define DFLT_MAX 1048575 /* default maximal hash table size */
48
+ #else
49
+ #define DFLT_MAX 16383 /* default maximal hash table size */
50
+ #endif
51
+ #define BLKSIZE 256 /* block size for identifier vector */
52
+
53
+ /*----------------------------------------------------------------------
54
+ Default Hash Function
55
+ ----------------------------------------------------------------------*/
56
+
57
+ static unsigned _hdflt (const char *name, int type)
58
+ { /* --- default hash function */
59
+ register unsigned h = type; /* hash value */
60
+
61
+ while (*name) h ^= (h << 3) ^ (unsigned)(*name++);
62
+ return h; /* compute hash value */
63
+ } /* _hdflt() */
64
+
65
+ /*----------------------------------------------------------------------
66
+ Auxiliary Functions
67
+ ----------------------------------------------------------------------*/
68
+
69
+ static void _delsym (SYMTAB *tab)
70
+ { /* --- delete all symbols */
71
+ int i; /* loop variable */
72
+ STE *ste, *tmp; /* to traverse the symbol list */
73
+
74
+ for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
75
+ ste = tab->bvec[i]; /* get the next bucket list, */
76
+ tab->bvec[i] = NULL; /* clear the bucket vector entry, */
77
+ while (ste) { /* and traverse the bucket list */
78
+ tmp = ste; /* note the symbol to delete */
79
+ ste = ste->succ; /* and get the next symbol */
80
+ if (tab->delfn) tab->delfn(tmp +1);
81
+ free(tmp); /* if a deletion function is given, */
82
+ } /* call it and then deallocate */
83
+ } /* the symbol table element */
84
+ } /* _delsym() */
85
+
86
+ /*--------------------------------------------------------------------*/
87
+
88
+ static STE** _merge (STE *in[], int cnt[], STE **out)
89
+ { /* --- merge two lists into one */
90
+ int k; /* index of input list */
91
+
92
+ do { /* compare and merge loop */
93
+ k = (in[0]->level > in[1]->level) ? 0 : 1;
94
+ *out = in[k]; /* append the element on the higher */
95
+ out = &(*out)->succ; /* level to the output list and */
96
+ in[k] = *out; /* remove it from the input list */
97
+ } while (--cnt[k] > 0); /* while both lists are not empty */
98
+ *out = in[k ^= 1]; /* append remaining elements */
99
+ while (--cnt[k] >= 0) /* while not at the end of the list */
100
+ out = &(*out)->succ; /* go to the successor element */
101
+ in[k] = *out; /* set new start of the input list */
102
+ *out = NULL; /* terminate the output list and */
103
+ return out; /* return new end of the output list */
104
+ } /* _merge() */
105
+
106
+ /*--------------------------------------------------------------------*/
107
+
108
+ static STE* _sort (STE *list)
109
+ { /* --- sort a hash bucket list */
110
+ STE *ste; /* to traverse the list, buffer */
111
+ STE *in[2], *out[2]; /* input and output lists */
112
+ STE **end[2]; /* ends of output lists */
113
+ int cnt[2]; /* number of elements to merge */
114
+ int run; /* run length in input lists */
115
+ int rem; /* elements in remainder collection */
116
+ int oid; /* index of output list */
117
+
118
+ if (!list) return list; /* empty lists need not to be sorted */
119
+ oid = 0; out[0] = list; /* traverse list elements */
120
+ for (ste = list->succ; ste; ste = ste->succ)
121
+ if ((oid ^= 1) == 0) list = list->succ;
122
+ out[1] = list->succ; /* split list into two equal parts */
123
+ list = list->succ = NULL; /* initialize remainder collection */
124
+ run = 1; rem = 0; /* and run length */
125
+ while (out[1]) { /* while there are two lists */
126
+ in [0] = out[0]; in [1] = out[1]; /* move output list to input */
127
+ end[0] = out; end[1] = out+1; /* reinitialize end pointers */
128
+ out[1] = NULL; oid = 0; /* start with 1st output list */
129
+ do { /* merge loop */
130
+ cnt[0] = cnt[1] = run; /* merge run elements from the */
131
+ end[oid] = _merge(in, cnt, end[oid]); /* input lists */
132
+ oid ^= 1; /* toggle index of output list */
133
+ } while (in[1]); /* while both lists are not empty */
134
+ if (in[0]) { /* if there is one input list left */
135
+ if (!list) /* if there is no rem. collection, */
136
+ list = in[0]; /* just note the rem. input list */
137
+ else { /* if there is a rem. collection, */
138
+ cnt[0] = run; cnt[1] = rem; in[1] = list;
139
+ _merge(in, cnt, &list); /* merge it and the input list to */
140
+ } /* get the new renmainder collection */
141
+ rem += run; /* there are now run more elements */
142
+ } /* in the remainder collection */
143
+ run <<= 1; /* double run length */
144
+ } /* while (out[1]) .. */
145
+ if (rem > 0) { /* if there is a rem. collection */
146
+ in[0] = out[0]; cnt[0] = run;
147
+ in[1] = list; cnt[1] = rem;
148
+ _merge(in, cnt, out); /* merge it to the output list */
149
+ } /* and store the result in out[0] */
150
+ return out[0]; /* return the sorted list */
151
+ } /* _sort() */
152
+
153
+ /*--------------------------------------------------------------------*/
154
+
155
+ static void _reorg (SYMTAB *tab)
156
+ { /* --- reorganize a hash table */
157
+ int i; /* loop variable */
158
+ int size; /* new bucket vector size */
159
+ STE **p; /* new bucket vector, buffer */
160
+ STE *ste; /* to traverse symbol table elements */
161
+ STE *list = NULL; /* list of all symbols */
162
+
163
+ size = (tab->size << 1) +1; /* calculate new vector size */
164
+ if (size > tab->max) /* if new size exceeds maximum, */
165
+ size = tab->max; /* set the maximal size */
166
+ for (p = &list, i = tab->size; --i >= 0; ) {
167
+ *p = tab->bvec[i]; /* traverse the bucket vector and */
168
+ while (*p) p = &(*p)->succ; /* link all bucket lists together */
169
+ } /* (collect symbols) */
170
+ p = (STE**)realloc(tab->bvec, size *sizeof(STE*));
171
+ if (!p) return; /* enlarge bucket vector */
172
+ tab->bvec = p; /* set new bucket vector */
173
+ tab->size = size; /* and its size */
174
+ for (p += i = size; --i >= 0; )
175
+ *--p = NULL; /* clear the hash buckets */
176
+ while (list) { /* traverse list of all symbols */
177
+ ste = list; list = list->succ; /* get next symbol */
178
+ i = tab->hash(ste->name, ste->type) %size;
179
+ ste->succ = tab->bvec[i]; /* compute the hash bucket index */
180
+ tab->bvec[i] = ste; /* and insert the symbol at */
181
+ } /* the head of the bucket list */
182
+ for (i = size; --i >= 0; ) /* sort bucket lists according to */
183
+ tab->bvec[i] = _sort(tab->bvec[i]); /* the visibility level */
184
+ } /* _reorg() */
185
+
186
+ /*----------------------------------------------------------------------
187
+ Symbol Table Functions
188
+ ----------------------------------------------------------------------*/
189
+
190
+ SYMTAB* st_create (int init, int max, HASHFN hash, SYMFN delfn)
191
+ { /* --- create a symbol table */
192
+ SYMTAB *tab; /* created symbol table */
193
+
194
+ if (init <= 0) init = DFLT_INIT; /* check and adapt initial */
195
+ if (max <= 0) max = DFLT_MAX; /* and maximal vector size */
196
+ tab = (SYMTAB*)malloc(sizeof(SYMTAB));
197
+ if (!tab) return NULL; /* allocate symbol table body */
198
+ tab->bvec = (STE**)calloc(init, sizeof(STE*));
199
+ if (!tab->bvec) { free(tab); return NULL; }
200
+ tab->level = tab->cnt = 0; /* allocate bucket vector */
201
+ tab->size = init; /* and initialize fields */
202
+ tab->max = max; /* of symbol table body */
203
+ tab->hash = (hash) ? hash : _hdflt;
204
+ tab->delfn = delfn;
205
+ tab->vsz = INT_MAX;
206
+ tab->ids = NULL;
207
+ return tab; /* return created symbol table */
208
+ } /* st_create() */
209
+
210
+ /*--------------------------------------------------------------------*/
211
+
212
+ void ap_st_delete (SYMTAB *tab)
213
+ { /* --- delete a symbol table */
214
+ assert(tab && tab->bvec); /* check argument */
215
+ _delsym(tab); /* delete all symbols, */
216
+ free(tab->bvec); /* the bucket vector, */
217
+ if (tab->ids) free(tab->ids); /* the identifier vector, */
218
+ free(tab); /* and the symbol table body */
219
+ } /* ap_st_delete() */
220
+
221
+ /*--------------------------------------------------------------------*/
222
+
223
+ void* ap_st_insert (SYMTAB *tab, const char *name, int type,
224
+ unsigned size)
225
+ { /* --- insert a symbol */
226
+ unsigned h; /* hash value */
227
+ int i; /* index of hash bucket */
228
+ STE *ste; /* to traverse bucket list */
229
+ STE *nel; /* new symbol table element */
230
+
231
+ assert(tab && name /* check the function arguments */
232
+ && ((size >= sizeof(int)) || (tab->vsz == INT_MAX)));
233
+ if ((tab->cnt /4 > tab->size) /* if buckets are rather full and */
234
+ && (tab->size < tab->max)) /* table does not have maximal size, */
235
+ _reorg(tab); /* reorganize the hash table */
236
+
237
+ h = tab->hash(name, type); /* compute the hash value and */
238
+ i = h % tab->size; /* the index of the hash bucket */
239
+ ste = tab->bvec[i]; /* get first element in bucket */
240
+ while (ste) { /* traverse the bucket list */
241
+ if ((type == ste->type) /* if symbol found */
242
+ && (strcmp(name, ste->name) == 0))
243
+ break; /* abort the loop */
244
+ ste = ste->succ; /* otherwise get the successor */
245
+ } /* element in the hash bucket */
246
+ if (ste /* if symbol found on current level */
247
+ && (ste->level == tab->level))
248
+ return EXISTS; /* return 'symbol exists' */
249
+
250
+ #ifdef NIMAPFN /* if name/identifier map management */
251
+ if (tab->cnt >= tab->vsz) { /* if the identifier vector is full */
252
+ int vsz, **tmp; /* (new) id vector and its size */
253
+ vsz = tab->vsz +((tab->vsz > BLKSIZE) ? tab->vsz >> 1 : BLKSIZE);
254
+ tmp = (int**)realloc(tab->ids, vsz *sizeof(int*));
255
+ if (!tmp) return NULL; /* resize the identifier vector and */
256
+ tab->ids = tmp; tab->vsz = vsz; /* set new vector and its size */
257
+ } /* (no resizing for symbol tables */
258
+ #endif /* since then tab->vsz = MAX_INT) */
259
+
260
+ nel = (STE*)malloc(sizeof(STE) +size +strlen(name) +1);
261
+ if (!nel) return NULL; /* allocate memory for new symbol */
262
+ nel->name = (char*)(nel+1) +size; /* and organize it */
263
+ strcpy(nel->name, name); /* note the symbol name, */
264
+ nel->type = type; /* the symbol type, and the */
265
+ nel->level = tab->level; /* current visibility level */
266
+ nel->succ = tab->bvec[i]; /* insert new symbol at the head */
267
+ tab->bvec[i] = nel++; /* of the bucket list */
268
+ #ifdef NIMAPFN /* if name/identifier maps are */
269
+ if (tab->ids) { /* supported and this is such a map */
270
+ tab->ids[tab->cnt] = (int*)nel;
271
+ *(int*)nel = tab->cnt; /* store the new symbol */
272
+ } /* in the identifier vector */
273
+ #endif /* and set the symbol identifier */
274
+ tab->cnt++; /* increment the symbol counter */
275
+ return nel; /* return pointer to data field */
276
+ } /* ap_st_insert() */
277
+
278
+ /*--------------------------------------------------------------------*/
279
+
280
+ int st_remove (SYMTAB *tab, const char *name, int type)
281
+ { /* --- remove a symbol/all symbols */
282
+ int i; /* index of hash bucket */
283
+ STE **p, *ste; /* to traverse bucket list */
284
+
285
+ assert(tab); /* check for a valid symbol table */
286
+
287
+ /* --- remove all symbols --- */
288
+ if (!name) { /* if no symbol name given */
289
+ _delsym(tab); /* delete all symbols */
290
+ tab->cnt = tab->level = 0; /* reset visibility level */
291
+ return 0; /* and symbol counter */
292
+ } /* and return 'ok' */
293
+
294
+ /* --- remove one symbol --- */
295
+ i = tab->hash(name, type) % tab->size;
296
+ p = tab->bvec +i; /* compute index of hash bucket */
297
+ while (*p) { /* and traverse bucket list */
298
+ if (((*p)->type == type) /* if symbol found */
299
+ && (strcmp(name, (*p)->name) == 0))
300
+ break; /* abort loop */
301
+ p = &(*p)->succ; /* otherwise get successor */
302
+ } /* in hash bucket */
303
+ ste = *p; /* if the symbol does not exist, */
304
+ if (!ste) return -1; /* abort the function */
305
+ *p = ste->succ; /* remove symbol from hash bucket */
306
+ if (tab->delfn) tab->delfn(ste +1); /* delete user data */
307
+ free(ste); /* and symbol table element */
308
+ tab->cnt--; /* decrement symbol counter */
309
+ return 0; /* return 'ok' */
310
+ } /* st_remove() */
311
+
312
+ /*--------------------------------------------------------------------*/
313
+ /* The problem is: ruby core has a function 'st_lookup'. so now what? */
314
+
315
+ void* ap_st_lookup (SYMTAB *tab, const char *name, int type)
316
+ { /* --- look up a symbol */
317
+ int i; /* index of hash bucket */
318
+ STE *ste; /* to traverse bucket list */
319
+
320
+ assert(tab && name); /* check arguments */
321
+ i = tab->hash(name, type) % tab->size;
322
+ ste = tab->bvec[i]; /* compute index of hash bucket */
323
+ while (ste) { /* and traverse bucket list */
324
+ if ((ste->type == type) /* if symbol found */
325
+ && (strcmp(name, ste->name) == 0))
326
+ return ste +1; /* return pointer to assoc. data */
327
+ ste = ste->succ; /* otherwise get successor */
328
+ } /* in hash bucket */
329
+ return NULL; /* return 'not found' */
330
+ } /* ap_st_lookup() */
331
+
332
+ /*--------------------------------------------------------------------*/
333
+
334
+ void st_endblk (SYMTAB *tab)
335
+ { /* --- remove one visibility level */
336
+ int i; /* loop variable */
337
+ STE *ste, *tmp; /* to traverse bucket lists */
338
+
339
+ assert(tab); /* check for a valid symbol table */
340
+ if (tab->level <= 0) return; /* if on level 0, abort */
341
+ for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
342
+ ste = tab->bvec[i]; /* get next bucket list */
343
+ while (ste /* and remove all symbols */
344
+ && (ste->level >= tab->level)) { /* of higher level */
345
+ tmp = ste; /* note symbol and */
346
+ ste = ste->succ; /* get successor */
347
+ if (tab->delfn) tab->delfn(tmp +1);
348
+ free(tmp); /* delete user data and */
349
+ tab->cnt--; /* symbol table element */
350
+ } /* and decrement symbol counter */
351
+ tab->bvec[i] = ste; /* set new start of bucket list */
352
+ }
353
+ tab->level--; /* go up one level */
354
+ } /* st_endblk() */
355
+
356
+ /*--------------------------------------------------------------------*/
357
+ #ifndef NDEBUG
358
+
359
+ void st_stats (const SYMTAB *tab)
360
+ { /* --- compute and print statistics */
361
+ const STE *ste; /* to traverse bucket lists */
362
+ int i; /* loop variable */
363
+ int used; /* number of used hash buckets */
364
+ int len; /* length of current bucket list */
365
+ int min, max; /* min. and max. bucket list length */
366
+ int cnts[10]; /* counter for bucket list lengths */
367
+
368
+ assert(tab); /* check for a valid symbol table */
369
+ min = INT_MAX; max = used = 0;/* initialize variables */
370
+ for (i = 10; --i >= 0; ) cnts[i] = 0;
371
+ for (i = tab->size; --i >= 0; ) { /* traverse bucket vector */
372
+ len = 0; /* determine bucket list length */
373
+ for (ste = tab->bvec[i]; ste; ste = ste->succ) len++;
374
+ if (len > 0) used++; /* count used hash buckets */
375
+ if (len < min) min = len; /* determine minimal and */
376
+ if (len > max) max = len; /* maximal list length */
377
+ cnts[(len >= 9) ? 9 : len]++;
378
+ } /* count list length */
379
+ printf("number of symbols : %d\n", tab->cnt);
380
+ printf("number of hash buckets: %d\n", tab->size);
381
+ printf("used hash buckets : %d\n", used);
382
+ printf("minimal list length : %d\n", min);
383
+ printf("maximal list length : %d\n", max);
384
+ printf("average list length : %g\n", (double)tab->cnt/tab->size);
385
+ printf("ditto, of used buckets: %g\n", (double)tab->cnt/used);
386
+ printf("length distribution :\n");
387
+ for (i = 0; i < 9; i++) printf("%3d ", i);
388
+ printf(" >8\n");
389
+ for (i = 0; i < 9; i++) printf("%3d ", cnts[i]);
390
+ printf("%3d\n", cnts[9]);
391
+ } /* st_stats() */
392
+
393
+ #endif
394
+ /*----------------------------------------------------------------------
395
+ Name/Identifier Map Functions
396
+ ----------------------------------------------------------------------*/
397
+ #ifdef NIMAPFN
398
+
399
+ NIMAP* nim_create (int init, int max, HASHFN hash, SYMFN delfn)
400
+ { /* --- create a name/identifier map */
401
+ NIMAP *nim; /* created name/identifier map */
402
+
403
+ nim = st_create(init, max, hash, delfn);
404
+ if (!nim) return NULL; /* create a name/identifier map */
405
+ nim->vsz = 0; /* and clear the id. vector size */
406
+ return nim; /* return created name/id map */
407
+ } /* nim_create() */
408
+
409
+ /*--------------------------------------------------------------------*/
410
+
411
+ void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
412
+ int *map, int dir)
413
+ { /* --- sort name/identifier map */
414
+ int i; /* loop variable */
415
+ int **p; /* to traverse the value vector */
416
+
417
+ assert(nim && cmpfn); /* check the function arguments */
418
+ v_sort(nim->ids, nim->cnt, cmpfn, data);
419
+ if (!map) { /* if no conversion map is requested */
420
+ for (p = nim->ids +(i = nim->cnt); --i >= 0; )
421
+ **--p = i; } /* just set new identifiers */
422
+ else { /* if a conversion map is requested, */
423
+ p = nim->ids +(i = nim->cnt); /* traverse the sorted vector */
424
+ if (dir < 0) /* if backward map (i.e. new -> old) */
425
+ while (--i >= 0) { map[i] = **--p; **p = i; }
426
+ else /* if forward map (i.e. old -> new) */
427
+ while (--i >= 0) { map[**--p] = i; **p = i; }
428
+ } /* (build conversion map) */
429
+ } /* nim_sort() */
430
+
431
+ /*--------------------------------------------------------------------*/
432
+
433
+ void nim_trunc (NIMAP *nim, int n)
434
+ { /* --- truncate name/identifier map */
435
+ int *id; /* to access the identifiers */
436
+
437
+ while (nim->cnt > n) { /* while to remove mappings */
438
+ id = nim->ids[nim->cnt -1]; /* get the identifier object */
439
+ st_remove(nim, st_name(id), 0);
440
+ } /* remove the symbol table element */
441
+ } /* nim_trunc() */
442
+
443
+ #endif