apriori 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +16 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +149 -0
- data/Rakefile +15 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +81 -0
- data/config/requirements.rb +29 -0
- data/examples/01_simple_example.rb +32 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori.rb +133 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +89 -0
- data/lib/apriori/version.rb +9 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +13 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +248 -0
- data/website/index.txt +152 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +226 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : nstats.h
|
3
|
+
Contents: management of normalization statistics
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 2003.08.12 file created
|
6
|
+
2004.08.12 description and parse function added
|
7
|
+
----------------------------------------------------------------------*/
|
8
|
+
#ifndef __NSTATS__
|
9
|
+
#define __NSTATS__
|
10
|
+
#include <stdio.h>
|
11
|
+
#ifdef NST_PARSE
|
12
|
+
#include "parse.h"
|
13
|
+
#endif
|
14
|
+
|
15
|
+
/*----------------------------------------------------------------------
|
16
|
+
Type Definitions
|
17
|
+
----------------------------------------------------------------------*/
|
18
|
+
typedef struct { /* --- numerical statistics --- */
|
19
|
+
int dim; /* dimension of data space */
|
20
|
+
double reg; /* number of registered patterns */
|
21
|
+
double *mins; /* minimal data values */
|
22
|
+
double *maxs; /* maximal data values */
|
23
|
+
double *sums; /* sums of data values */
|
24
|
+
double *sqrs; /* sums of squared data values */
|
25
|
+
double *offs; /* offsets for data scaling */
|
26
|
+
double facs[1]; /* factors for data scaling */
|
27
|
+
} NSTATS; /* (numerical statistics) */
|
28
|
+
|
29
|
+
/*----------------------------------------------------------------------
|
30
|
+
Functions
|
31
|
+
----------------------------------------------------------------------*/
|
32
|
+
extern NSTATS* nst_create (int dim);
|
33
|
+
extern void nst_delete (NSTATS *nst);
|
34
|
+
extern int nst_dim (NSTATS *nst);
|
35
|
+
|
36
|
+
extern void nst_reg (NSTATS *nst, const double *vec,
|
37
|
+
double weight);
|
38
|
+
extern void nst_range (NSTATS *nst, int idx,
|
39
|
+
double min, double max);
|
40
|
+
extern void nst_expand (NSTATS *nst, int idx, double factor);
|
41
|
+
extern void nst_scale (NSTATS *nst, int idx,
|
42
|
+
double off, double fac);
|
43
|
+
|
44
|
+
extern double nst_min (NSTATS *nst, int idx);
|
45
|
+
extern double nst_max (NSTATS *nst, int idx);
|
46
|
+
extern double nst_offset (NSTATS *nst, int idx);
|
47
|
+
extern double nst_factor (NSTATS *nst, int idx);
|
48
|
+
|
49
|
+
extern void nst_norm (NSTATS *nst, const double *vec, double *res);
|
50
|
+
extern void nst_inorm (NSTATS *nst, const double *vec, double *res);
|
51
|
+
extern void nst_center (NSTATS *nst, double *vec);
|
52
|
+
extern void nst_spans (NSTATS *nst, double *vec);
|
53
|
+
|
54
|
+
extern int nst_desc (NSTATS *nst, FILE *file,
|
55
|
+
const char *indent, int maxlen);
|
56
|
+
#ifdef NST_PARSE
|
57
|
+
extern NSTATS* nst_parse (SCAN *scan, int dim);
|
58
|
+
#endif
|
59
|
+
|
60
|
+
/*----------------------------------------------------------------------
|
61
|
+
Preprocessor Definitions
|
62
|
+
----------------------------------------------------------------------*/
|
63
|
+
#define nst_dim(s) ((s)->dim)
|
64
|
+
#define nst_min(s,i) ((s)->mins[i])
|
65
|
+
#define nst_max(s,i) ((s)->maxs[i])
|
66
|
+
#define nst_offset(s,i) ((s)->offs[i])
|
67
|
+
#define nst_factor(s,i) ((s)->facs[i])
|
68
|
+
|
69
|
+
#endif
|
@@ -0,0 +1,86 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : params.c
|
3
|
+
Contents: command line parameter retrieval
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 2003.06.05 file created
|
6
|
+
----------------------------------------------------------------------*/
|
7
|
+
#include <stdarg.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <assert.h>
|
10
|
+
#include "params.h"
|
11
|
+
|
12
|
+
/*----------------------------------------------------------------------
|
13
|
+
Functions
|
14
|
+
----------------------------------------------------------------------*/
|
15
|
+
|
16
|
+
int getints (char *s, char **end, int n, ...)
|
17
|
+
{ /* --- get integer parameters */
|
18
|
+
va_list args; /* list of variable arguments */
|
19
|
+
int k = 0, t; /* parameter counter, buffer */
|
20
|
+
|
21
|
+
assert(s && end && (n > 0)); /* check the function arguments */
|
22
|
+
va_start(args, n); /* get variable arguments */
|
23
|
+
while (k < n) { /* traverse the arguments */
|
24
|
+
t = (int)strtol(s, end,10); /* get the next parameter and */
|
25
|
+
if (*end == s) break; /* check for an empty parameter */
|
26
|
+
*(va_arg(args, int*)) = t; /* store the parameter */
|
27
|
+
k++; /* and count it */
|
28
|
+
s = *end; if (*s++ != ':') break;
|
29
|
+
} /* check for a colon */
|
30
|
+
va_end(args); /* end argument evaluation */
|
31
|
+
return k; /* return the number of parameters */
|
32
|
+
} /* getints() */
|
33
|
+
|
34
|
+
/*--------------------------------------------------------------------*/
|
35
|
+
|
36
|
+
int getdbls (char *s, char **end, int n, ...)
|
37
|
+
{ /* --- get double parameters */
|
38
|
+
va_list args; /* list of variable arguments */
|
39
|
+
int k = 0; /* parameter counter */
|
40
|
+
double t; /* temporary buffer */
|
41
|
+
|
42
|
+
assert(s && end && (n > 0)); /* check the function arguments */
|
43
|
+
va_start(args, n); /* get variable arguments */
|
44
|
+
while (k < n) { /* traverse the arguments */
|
45
|
+
t = strtod(s, end); /* get the next parameter and */
|
46
|
+
if (*end == s) break; /* check for an empty parameter */
|
47
|
+
*(va_arg(args, double*)) = t; /* store the parameter */
|
48
|
+
k++; /* and count it */
|
49
|
+
s = *end; if (*s++ != ':') break;
|
50
|
+
} /* check for a colon */
|
51
|
+
va_end(args); /* end argument evaluation */
|
52
|
+
return k; /* return the number of parameters */
|
53
|
+
} /* getdbls() */
|
54
|
+
|
55
|
+
/*--------------------------------------------------------------------*/
|
56
|
+
|
57
|
+
int getintvec (char *s, char **end, int n, int *p)
|
58
|
+
{ /* --- get integer parameter vector */
|
59
|
+
int k = 0, t; /* parameter counter, buffer */
|
60
|
+
|
61
|
+
assert(s && end && (n > 0)); /* check the function arguments */
|
62
|
+
while (k < n) { /* traverse the arguments */
|
63
|
+
t = (int)strtol(s, end,10); /* get the next parameter and */
|
64
|
+
if (*end == s) break; /* check for an empty parameter */
|
65
|
+
p[k++] = t; /* store and count the parameter */
|
66
|
+
s = *end; if (*s++ != ':') break;
|
67
|
+
} /* check for a colon */
|
68
|
+
return k; /* return the number of parameters */
|
69
|
+
} /* getintvec() */
|
70
|
+
|
71
|
+
/*--------------------------------------------------------------------*/
|
72
|
+
|
73
|
+
int getdblvec (char *s, char **end, int n, double *p)
|
74
|
+
{ /* --- get double parameter vector */
|
75
|
+
int k = 0; /* parameter counter */
|
76
|
+
double t; /* temporary buffer */
|
77
|
+
|
78
|
+
assert(s && end && (n > 0)); /* check the function arguments */
|
79
|
+
while (k < n) { /* traverse the arguments */
|
80
|
+
t = strtod(s, end); /* get the next parameter and */
|
81
|
+
if (*end == s) break; /* check for an empty parameter */
|
82
|
+
p[k++] = t; /* store and count the parameter */
|
83
|
+
s = *end; if (*s++ != ':') break;
|
84
|
+
} /* check for a colon */
|
85
|
+
return k; /* return the number of parameters */
|
86
|
+
} /* getdblvec() */
|
@@ -0,0 +1,19 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : params.h
|
3
|
+
Contents: command line parameter retrieval
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 2003.06.05 file created
|
6
|
+
----------------------------------------------------------------------*/
|
7
|
+
#ifndef __PARAMS__
|
8
|
+
#define __PARAMS__
|
9
|
+
|
10
|
+
/*----------------------------------------------------------------------
|
11
|
+
Functions
|
12
|
+
----------------------------------------------------------------------*/
|
13
|
+
extern int getints (char *s, char **end, int n, ...);
|
14
|
+
extern int getdbls (char *s, char **end, int n, ...);
|
15
|
+
|
16
|
+
extern int getintvec (char *s, char **end, int n, int *p);
|
17
|
+
extern int getdblvec (char *s, char **end, int n, double *p);
|
18
|
+
|
19
|
+
#endif
|
@@ -0,0 +1,133 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : parse.h
|
3
|
+
Contents: parser utilities
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 2004.08.12 file created
|
6
|
+
2006.02.02 error E_EDGE added
|
7
|
+
2007.01.16 error E_MSDCNT added
|
8
|
+
----------------------------------------------------------------------*/
|
9
|
+
#include <string.h>
|
10
|
+
#include <assert.h>
|
11
|
+
#include "parse.h"
|
12
|
+
|
13
|
+
/*----------------------------------------------------------------------
|
14
|
+
Constants
|
15
|
+
----------------------------------------------------------------------*/
|
16
|
+
#ifdef GERMAN /* deutsche Texte */
|
17
|
+
static const char *errmsgs[] = { /* Fehlermeldungen */
|
18
|
+
/* E_CHREXP -16 */ "\"%c\" erwartet statt %s",
|
19
|
+
/* E_STREXP -17 */ "\"%s\" erwartet statt %s",
|
20
|
+
/* E_NUMEXP -18 */ "Zahl erwartet statt %s",
|
21
|
+
/* E_NUMBER -19 */ "ungültige Zahl %s",
|
22
|
+
|
23
|
+
/* E_ATTEXP -20 */ "Attribut erwartet statt %s",
|
24
|
+
/* E_UNKATT -21 */ "unbekanntes Attribut %s",
|
25
|
+
/* E_DUPATT -22 */ "doppeltes Attribut %s",
|
26
|
+
/* E_MISATT -23 */ "Attribut %s fehlt",
|
27
|
+
/* E_ATTRIB -24 */ "ungültiges Attribut %s",
|
28
|
+
/* E_ATTYPE -25 */ "Attribut %s hat falschen Typ",
|
29
|
+
|
30
|
+
/* E_VALEXP -26 */ "Attributwert erwartet statt %s",
|
31
|
+
/* E_UNKVAL -27 */ "unbekannter Attributwert %s",
|
32
|
+
/* E_DUPVAL -28 */ "doppelter Attributwert %s",
|
33
|
+
/* E_MISVAL -29 */ "fehlender Attributwert %s",
|
34
|
+
|
35
|
+
/* E_CLSEXP -30 */ "Klassenattribut erwartet statt %s",
|
36
|
+
/* E_UNKCLS -31 */ "unbekannte Klasse %s",
|
37
|
+
/* E_DUPCLS -32 */ "doppelte Klasse %s",
|
38
|
+
/* E_MISCLS -33 */ "Klasse %s fehlt",
|
39
|
+
/* E_CLSTYPE -34 */ "Klassenattribut %s hat falschen Typ",
|
40
|
+
/* E_CLSCNT -35 */ "Klassenattribut %s hat zu wenige Werte",
|
41
|
+
|
42
|
+
/* E_DOMAIN -36 */ "ungültiger Wertebereich %s",
|
43
|
+
|
44
|
+
/* E_PAREXP -37 */ "Parameter erwartet statt %s",
|
45
|
+
/* E_CMPOP -38 */ "ungültiger Vergleichsoperator %s",
|
46
|
+
/* E_COVMAT -39 */ "ungültige Kovarianzmatrix",
|
47
|
+
|
48
|
+
/* E_DUPCDL -40 */ "doppelte Kandidatenliste für Attribut %s\n",
|
49
|
+
/* E_RANGE -41 */ "ungültiger Kandidatenbereich",
|
50
|
+
/* E_CAND -42 */ "ungültiger Kandidat %s",
|
51
|
+
/* E_LINK -43 */ "ungültiger Verweis",
|
52
|
+
|
53
|
+
/* E_LYRCNT -44 */ "ungültige Anzahl Schichten",
|
54
|
+
/* E_UNITCNT -45 */ "ungültige Anzahl Einheiten",
|
55
|
+
|
56
|
+
/* E_EDGE -46 */ "ungültiger Kantentyp %s",
|
57
|
+
|
58
|
+
/* E_MSDCNT -47 */ "falsche Anzahl Zugehörigkeitsgrade",
|
59
|
+
};
|
60
|
+
#else /* English texts */
|
61
|
+
static const char *errmsgs[] = { /* error messages */
|
62
|
+
/* E_CHREXP -16 */ "\"%c\" expected instead of %s",
|
63
|
+
/* E_STREXP -17 */ "\"%s\" expected instead of %s",
|
64
|
+
/* E_NUMEXP -18 */ "number expected instead of %s",
|
65
|
+
/* E_NUMBER -19 */ "invalid number %s",
|
66
|
+
|
67
|
+
/* E_ATTEXP -20 */ "attribute expected instead of %s",
|
68
|
+
/* E_UNKATT -21 */ "unknown attribute %s",
|
69
|
+
/* E_DUPATT -22 */ "duplicate attribute %s",
|
70
|
+
/* E_MISATT -23 */ "missing attribute %s",
|
71
|
+
/* E_ATTRIB -24 */ "invalid attribute %s",
|
72
|
+
/* E_ATTYPE -25 */ "attribute %s has wrong type",
|
73
|
+
|
74
|
+
/* E_VALEXP -26 */ "attribute value expected instead of %s",
|
75
|
+
/* E_UNKVAL -27 */ "unknown attribute value %s",
|
76
|
+
/* E_DUPVAL -28 */ "duplicate attribute value %s",
|
77
|
+
/* E_MISVAL -29 */ "missing attribute value %s",
|
78
|
+
|
79
|
+
/* E_CLSEXP -30 */ "class value expected instead of %s",
|
80
|
+
/* E_UNKCLS -31 */ "unknown class value %s",
|
81
|
+
/* E_DUPCLS -32 */ "duplicate class value %s",
|
82
|
+
/* E_MISCLS -33 */ "missing class value %s",
|
83
|
+
/* E_CLSTYPE -34 */ "class attribute %s has wrong type",
|
84
|
+
/* E_CLSCNT -35 */ "class attribute %s has too few values",
|
85
|
+
|
86
|
+
/* E_DOMAIN -36 */ "invalid attribute domain %s",
|
87
|
+
|
88
|
+
/* E_PAREXP -37 */ "parameter expected instead of %s",
|
89
|
+
/* E_CMPOP -38 */ "invalid comparison operator %s",
|
90
|
+
/* E_COVMAT -39 */ "invalid covariance matrix",
|
91
|
+
|
92
|
+
/* E_DUPCDL -40 */ "duplicate candidate list for attribute %s\n",
|
93
|
+
/* E_RANGE -41 */ "invalid candidate range",
|
94
|
+
/* E_CAND -42 */ "invalid candidate %s",
|
95
|
+
/* E_LINK -43 */ "invalid link",
|
96
|
+
|
97
|
+
/* E_LYRCNT -44 */ "invalid number of layers",
|
98
|
+
/* E_UNITCNT -45 */ "invalid number of units",
|
99
|
+
|
100
|
+
/* E_EDGE -46 */ "invalid edge type %s",
|
101
|
+
|
102
|
+
/* E_MSDCNT -47 */ "wrong number of membership degrees",
|
103
|
+
};
|
104
|
+
#endif
|
105
|
+
#define MSGCNT (int)(sizeof(errmsgs)/sizeof(const char*))
|
106
|
+
|
107
|
+
/*----------------------------------------------------------------------
|
108
|
+
Functions
|
109
|
+
----------------------------------------------------------------------*/
|
110
|
+
|
111
|
+
void pa_init (SCAN *scan) /* --- initialize parsing */
|
112
|
+
{ sc_errmsgs(scan, errmsgs, MSGCNT); }
|
113
|
+
|
114
|
+
/*--------------------------------------------------------------------*/
|
115
|
+
|
116
|
+
int pa_error (SCAN *scan, int code, int c, const char *s)
|
117
|
+
{ /* --- report a parse error */
|
118
|
+
char src[256], dst[1024]; /* buffers for string formating */
|
119
|
+
|
120
|
+
assert(scan); /* check the function arguments */
|
121
|
+
if (((code == E_DUPATT) || (code == E_MISATT)
|
122
|
+
|| (code == E_DUPVAL) || (code == E_MISVAL)
|
123
|
+
|| (code == E_DUPCLS) || (code == E_MISCLS)
|
124
|
+
|| (code == E_CAND)) && s)
|
125
|
+
sc_format(dst, s, 1); /* if "missing ..." error message, */
|
126
|
+
else { /* format the given name */
|
127
|
+
strncpy(src, sc_value(scan), 255); src[255] = '\0';
|
128
|
+
sc_format(dst, src, 1); /* if normal error message, */
|
129
|
+
} /* copy and format the token value */
|
130
|
+
if (code == E_CHREXP) return sc_error(scan, code, c, dst);
|
131
|
+
else if (code == E_STREXP) return sc_error(scan, code, s, dst);
|
132
|
+
else return sc_error(scan, code, dst);
|
133
|
+
} /* _paerr() */ /* print an error message */
|
@@ -0,0 +1,81 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : parse.h
|
3
|
+
Contents: parser utilities
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 2004.08.12 file created
|
6
|
+
2006.02.02 error E_EDGE added
|
7
|
+
2007.01.16 error E_MSDCNT added
|
8
|
+
----------------------------------------------------------------------*/
|
9
|
+
#ifndef __PARSE__
|
10
|
+
#define __PARSE__
|
11
|
+
#ifndef SC_SCAN
|
12
|
+
#define SC_SCAN
|
13
|
+
#endif
|
14
|
+
#include "scan.h"
|
15
|
+
|
16
|
+
/*----------------------------------------------------------------------
|
17
|
+
Preprocessor Definitions
|
18
|
+
----------------------------------------------------------------------*/
|
19
|
+
/* --- error codes --- */
|
20
|
+
#define E_CHREXP (-16) /* character expected */
|
21
|
+
#define E_STREXP (-17) /* string expected */
|
22
|
+
#define E_NUMEXP (-18) /* number expected */
|
23
|
+
#define E_NUMBER (-19) /* invalid number */
|
24
|
+
|
25
|
+
#define E_ATTEXP (-20) /* attribute expected */
|
26
|
+
#define E_UNKATT (-21) /* unknown attribute */
|
27
|
+
#define E_DUPATT (-22) /* duplicate attribute value */
|
28
|
+
#define E_MISATT (-23) /* missing attribute */
|
29
|
+
#define E_ATTRIB (-24) /* invalid attribute */
|
30
|
+
#define E_ATTYPE (-25) /* wrong attribute type */
|
31
|
+
|
32
|
+
#define E_VALEXP (-26) /* attribute value expected */
|
33
|
+
#define E_UNKVAL (-27) /* unknown attribute value */
|
34
|
+
#define E_DUPVAL (-28) /* duplicate attribute value */
|
35
|
+
#define E_MISVAL (-29) /* missing attribute value */
|
36
|
+
|
37
|
+
#define E_CLSEXP (-30) /* class value expected */
|
38
|
+
#define E_UNKCLS (-31) /* unknown class value */
|
39
|
+
#define E_DUPCLS (-32) /* duplicate class value */
|
40
|
+
#define E_MISCLS (-33) /* missing class value */
|
41
|
+
#define E_CLSTYPE (-34) /* class attribute must be nominal */
|
42
|
+
#define E_CLSCNT (-35) /* class attribute has too few values */
|
43
|
+
|
44
|
+
#define E_DOMAIN (-36) /* invalid attribute domain */
|
45
|
+
|
46
|
+
#define E_PAREXP (-37) /* parameter expected */
|
47
|
+
#define E_CMPOP (-38) /* invalid comparison operator */
|
48
|
+
#define E_COVMAT (-39) /* invalid covariance matrix */
|
49
|
+
|
50
|
+
#define E_DUPCDL (-40) /* duplicate candidate list */
|
51
|
+
#define E_RANGE (-41) /* invalid candidate range */
|
52
|
+
#define E_CAND (-42) /* invalid candidate */
|
53
|
+
#define E_LINK (-43) /* invalid link */
|
54
|
+
|
55
|
+
#define E_LYRCNT (-44) /* invalid number of layers */
|
56
|
+
#define E_UNITCNT (-45) /* invalid number of units */
|
57
|
+
|
58
|
+
#define E_EDGE (-46) /* invalid edge type */
|
59
|
+
|
60
|
+
#define E_MSDCNT (-47) /* wrong number of membership degrees */
|
61
|
+
|
62
|
+
/*----------------------------------------------------------------------
|
63
|
+
Functions
|
64
|
+
----------------------------------------------------------------------*/
|
65
|
+
extern void pa_init (SCAN *scan);
|
66
|
+
extern int pa_error (SCAN *scan, int code, int c, const char *s);
|
67
|
+
|
68
|
+
/*----------------------------------------------------------------------
|
69
|
+
Preprocessor Definitions
|
70
|
+
----------------------------------------------------------------------*/
|
71
|
+
#define ERROR(c) return pa_error(scan, c, -1, NULL)
|
72
|
+
#define XERROR(c,s) return pa_error(scan, c, -1, s)
|
73
|
+
#define ERR_CHR(c) return pa_error(scan, E_CHREXP, c, NULL)
|
74
|
+
#define ERR_STR(s) return pa_error(scan, E_STREXP, -1, s)
|
75
|
+
#define GET_TOK() if (sc_next(scan) < 0) \
|
76
|
+
return sc_error(scan, sc_token(scan))
|
77
|
+
#define GET_CHR(c) if (sc_token(scan) != (c)) ERR_CHR(c); \
|
78
|
+
else GET_TOK()
|
79
|
+
#define RECOVER() if (sc_recover(scan, ';', '{', '}', 0) == T_EOF) \
|
80
|
+
return 1
|
81
|
+
#endif
|
data/ext/util/src/scan.c
ADDED
@@ -0,0 +1,767 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : scan.c
|
3
|
+
Contents: scanner (lexical analysis of a character stream)
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1996.01.16 file created
|
6
|
+
1996.02.21 identifier recognition made more flexible
|
7
|
+
1996.03.17 keyword tokens removed
|
8
|
+
1996.04.15 duplicate state removed from sc_next
|
9
|
+
1997.07.29 < and > declared active (for decision trees)
|
10
|
+
1997.09.08 escape sequences in strings made possible
|
11
|
+
1997.09.11 single characters stored also in scn->value
|
12
|
+
1998.02.08 recover and error message functions added
|
13
|
+
1998.02.09 bug in state S_NUMPT concerning "-." removed
|
14
|
+
1998.02.13 token T_RGT ('->') added
|
15
|
+
1998.03.04 returned tokens changed for some states
|
16
|
+
1998.04.17 token T_LFT ('<-') added
|
17
|
+
1998.05.27 token T_CMP (two char comparison operator) added
|
18
|
+
1998.05.31 token conversion to number removed
|
19
|
+
1999.02.08 reading from standard input made possible
|
20
|
+
1999.04.29 quoted string parsing improved
|
21
|
+
1999.11.13 token string length stored in scn->len
|
22
|
+
2000.11.23 functions sc_fmtlen and sc_format added
|
23
|
+
2001.07.15 scanner made an object, state definitions added
|
24
|
+
2001.07.16 characters with code > 127 made printable
|
25
|
+
look ahead functionality added (sc_back)
|
26
|
+
2006.02.02 token T_DASH (undirected edge '--') added
|
27
|
+
----------------------------------------------------------------------*/
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <stdlib.h>
|
30
|
+
#include <string.h>
|
31
|
+
#include <stdarg.h>
|
32
|
+
#include <assert.h>
|
33
|
+
#include "scan.h"
|
34
|
+
#ifdef STORAGE
|
35
|
+
#include "storage.h"
|
36
|
+
#endif
|
37
|
+
|
38
|
+
/*----------------------------------------------------------------------
|
39
|
+
Preprocessor Definitions
|
40
|
+
----------------------------------------------------------------------*/
|
41
|
+
#ifdef SC_SCAN
|
42
|
+
/* --- character classes --- */
|
43
|
+
#define C_INVALID 0 /* invalid character */
|
44
|
+
#define C_SPACE 1 /* white space, e.g. ' ' '\t' '\n' */
|
45
|
+
#define C_LETTER 2 /* letter or underscore '_' */
|
46
|
+
#define C_DIGIT 3 /* digit */
|
47
|
+
#define C_POINT 4 /* point, '.' */
|
48
|
+
#define C_SIGN 5 /* sign, '+' or '-' */
|
49
|
+
#define C_SLASH 6 /* slash, '/' */
|
50
|
+
#define C_QUOTE 7 /* quote, e.g. '"' '`' */
|
51
|
+
#define C_CMPOP 8 /* comparison operator, e.g. '<' */
|
52
|
+
#define C_ACTIVE 9 /* active characters, e.g. ',' '(' */
|
53
|
+
|
54
|
+
/* --- scanner states --- */
|
55
|
+
#define S_SPACE 0 /* skipping white space */
|
56
|
+
#define S_ID 1 /* reading identifier */
|
57
|
+
#define S_NUMDIG 2 /* reading number, digit */
|
58
|
+
#define S_NUMPT 3 /* reading number, decimal point */
|
59
|
+
#define S_FRAC 4 /* reading number, digit and point */
|
60
|
+
#define S_EXPIND 5 /* reading exponent, indicator */
|
61
|
+
#define S_EXPSGN 6 /* reading exponent, sign */
|
62
|
+
#define S_EXPDIG 7 /* reading exponent, digit */
|
63
|
+
#define S_SIGN 8 /* sign read */
|
64
|
+
#define S_CMPOP 9 /* reading comparison operator */
|
65
|
+
#define S_STRING 10 /* reading quoted string */
|
66
|
+
#define S_ESC 11 /* reading escaped character */
|
67
|
+
#define S_OCT1 12 /* reading octal number, 1 digit */
|
68
|
+
#define S_OCT2 13 /* reading octal number, 2 digits */
|
69
|
+
#define S_HEX1 14 /* reading hexad. number, 1 digit */
|
70
|
+
#define S_HEX2 15 /* reading hexad. number, 2 digits */
|
71
|
+
#define S_SLASH 16 /* slash read */
|
72
|
+
#define S_CPPCOM 17 /* reading C++ comment */
|
73
|
+
#define S_CCOM1 18 /* reading C comment */
|
74
|
+
#define S_CCOM2 19 /* reading C comment, possible end */
|
75
|
+
#define S_CCOM3 20 /* reading C comment, possible start */
|
76
|
+
|
77
|
+
/* --- functions --- */
|
78
|
+
#define UNGETC(s,c) do { if ((c) == EOF) break; \
|
79
|
+
if ((c) == '\n') (s)->line--; \
|
80
|
+
ungetc(c, (s)->file); } while (0)
|
81
|
+
|
82
|
+
/* --- additional error codes --- */
|
83
|
+
#define E_UNKNOWN (-11) /* unknown error */
|
84
|
+
#define MSGOFFSET (-16) /* offset for add. error messages */
|
85
|
+
|
86
|
+
/* --- texts --- */
|
87
|
+
#ifdef GERMAN /* deutsche Texte */
|
88
|
+
#define FILETXT "Datei"
|
89
|
+
#define LINETXT "Zeile"
|
90
|
+
#else /* English texts */
|
91
|
+
#define FILETXT "file"
|
92
|
+
#define LINETXT "line"
|
93
|
+
#endif /* #ifdef GERMAN .. #else .. */
|
94
|
+
#endif /* #ifdef SC_SCAN */
|
95
|
+
|
96
|
+
/*----------------------------------------------------------------------
|
97
|
+
Constants
|
98
|
+
----------------------------------------------------------------------*/
|
99
|
+
static const char _scftab[256] = { /* scanable form classes */
|
100
|
+
/* NUL SOH STX ETX EOT ENQ ACK BEL */
|
101
|
+
/* 00 */ 2, 2, 2, 2, 2, 2, 2, 'a',
|
102
|
+
/* BS HT LF VT FF CR SO SI */
|
103
|
+
'b', 't', 'n', 'v', 'f', 'r', 2, 2,
|
104
|
+
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
|
105
|
+
/* 10 */ 2, 2, 2, 2, 2, 2, 2, 2,
|
106
|
+
/* CAN EM SUB ESC FS GS RS US */
|
107
|
+
2, 2, 2, 2, 2, 2, 2, 2,
|
108
|
+
/* ' ' '!' '"' '#' '$' '%' '&' ''' */
|
109
|
+
/* 20 */ 1, 1, '"', 1, 1, 1, 1, 1,
|
110
|
+
/* '(' ')' '*' '+' ',' '-' '.' '/' */
|
111
|
+
1, 1, 1, 0, 1, 0, 0, 1,
|
112
|
+
/* '0' '1' '2' '3' '4' '5' '6' '7' */
|
113
|
+
/* 30 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
114
|
+
/* '8' '9' ':' ';' '<' '=' '>' '?' */
|
115
|
+
0, 0, 1, 1, 1, 1, 1, 1,
|
116
|
+
/* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
|
117
|
+
/* 40 */ 1, 0, 0, 0, 0, 0, 0, 0,
|
118
|
+
/* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
|
119
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
120
|
+
/* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
|
121
|
+
/* 50 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
122
|
+
/* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
|
123
|
+
0, 0, 0, 1, '\\', 1, 1, 0,
|
124
|
+
/* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
|
125
|
+
/* 60 */ 1, 0, 0, 0, 0, 0, 0, 0,
|
126
|
+
/* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
|
127
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
128
|
+
/* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
|
129
|
+
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
130
|
+
/* 'x' 'y' 'z' '{' '|' '}' '~' DEL */
|
131
|
+
0, 0, 0, 1, 1, 1, 1, 2,
|
132
|
+
/* 80 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
133
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
134
|
+
/* 90 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
135
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
136
|
+
/* a0 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
137
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
138
|
+
/* b0 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
139
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
140
|
+
/* c0 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
141
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
142
|
+
/* d0 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
143
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
144
|
+
/* e0 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
145
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
146
|
+
/* f0 */ 1, 1, 1, 1, 1, 1, 1, 1,
|
147
|
+
1, 1, 1, 1, 1, 1, 1, 1 };
|
148
|
+
|
149
|
+
#ifdef SC_SCAN
|
150
|
+
static const char _ccltab[256] = { /* character classes */
|
151
|
+
/* NUL SOH STX ETX EOT ENQ ACK BEL */
|
152
|
+
/* 00 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
153
|
+
/* BS HT LF VT FF CR SO SI */
|
154
|
+
0, 1, 1, 1, 1, 1, 0, 0,
|
155
|
+
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
|
156
|
+
/* 10 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
157
|
+
/* CAN EM SUB ESC FS GS RS US */
|
158
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
159
|
+
/* ' ' '!' '"' '#' '$' '%' '&' ''' */
|
160
|
+
/* 20 */ 1, 8, 7, 9, 9, 9, 9, 7,
|
161
|
+
/* '(' ')' '*' '+' ',' '-' '.' '/' */
|
162
|
+
9, 9, 9, 5, 9, 5, 4, 6,
|
163
|
+
/* '0' '1' '2' '3' '4' '5' '6' '7' */
|
164
|
+
/* 30 */ 3, 3, 3, 3, 3, 3, 3, 3,
|
165
|
+
/* '8' '9' ':' ';' '<' '=' '>' '?' */
|
166
|
+
3, 3, 9, 9, 8, 8, 8, 9,
|
167
|
+
/* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
|
168
|
+
/* 40 */ 0, 2, 2, 2, 2, 2, 2, 2,
|
169
|
+
/* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
|
170
|
+
2, 2, 2, 2, 2, 2, 2, 2,
|
171
|
+
/* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
|
172
|
+
/* 50 */ 2, 2, 2, 2, 2, 2, 2, 2,
|
173
|
+
/* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
|
174
|
+
2, 2, 2, 9, 9, 9, 9, 2,
|
175
|
+
/* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
|
176
|
+
/* 60 */ 7, 2, 2, 2, 2, 2, 2, 2,
|
177
|
+
/* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
|
178
|
+
2, 2, 2, 2, 2, 2, 2, 2,
|
179
|
+
/* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
|
180
|
+
/* 70 */ 2, 2, 2, 2, 2, 2, 2, 2,
|
181
|
+
/* 'x' 'y' 'z' '{' '|' '}' '~' DEL */
|
182
|
+
2, 2, 2, 9, 9, 9, 9, 0,
|
183
|
+
/* 80 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
184
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
185
|
+
/* 90 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
186
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
187
|
+
/* a0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
188
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
189
|
+
/* b0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
190
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
191
|
+
/* c0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
192
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
193
|
+
/* d0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
194
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
195
|
+
/* e0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
196
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
197
|
+
/* f0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
198
|
+
0, 0, 0, 0, 0, 0, 0, 0 };
|
199
|
+
|
200
|
+
#ifdef GERMAN /* deutsche Texte */
|
201
|
+
static const char *_errmsgs[] = { /* error messages */
|
202
|
+
/* E_NONE 0 */ "kein Fehler",
|
203
|
+
/* E_NOMEM -1 */ "nicht genug Speicher",
|
204
|
+
/* E_FOPEN -2 */ "�ffnen fehlgeschlagen",
|
205
|
+
/* E_FREAD -3 */ "Lesefehler",
|
206
|
+
/* E_FWRITE -4 */ "Schreibfehler",
|
207
|
+
/* E_CHAR -5 */ "ung�ltiges Zeichen '%c' (0x%02x)",
|
208
|
+
/* E_BUFOVF -6 */ "Puffer�berlauf",
|
209
|
+
/* E_UNTSTR -7 */ "unbeendete Zeichenkette",
|
210
|
+
/* E_UNTCOM -8 */ "unerwartetes Dateiende in Kommentar "
|
211
|
+
"(Anfang in Zeile %d)",
|
212
|
+
/* E_STATE -9 */ "ung�ltiger Scannerzustand",
|
213
|
+
/* E_GARBAGE -10 */ "ung�ltiger Text am Dateiende",
|
214
|
+
/* E_UNKNOWN -11 */ "unbekannter Fehler"
|
215
|
+
};
|
216
|
+
#else /* English texts */
|
217
|
+
static const char *_errmsgs[] = { /* error messages */
|
218
|
+
/* E_NONE 0 */ "no error",
|
219
|
+
/* E_NOMEM -1 */ "not enough memory",
|
220
|
+
/* E_FOPEN -2 */ "file open failed",
|
221
|
+
/* E_FREAD -3 */ "file read failed",
|
222
|
+
/* E_FWRITE -4 */ "file write failed",
|
223
|
+
/* E_CHAR -5 */ "invalid character '%c' (0x%02x)",
|
224
|
+
/* E_BUFOVF -6 */ "scan buffer overflow",
|
225
|
+
/* E_UNTSTR -7 */ "unterminated string",
|
226
|
+
/* E_UNTCOM -8 */ "unexpected end of file in comment "
|
227
|
+
"started on line %d",
|
228
|
+
/* E_STATE -9 */ "invalid scanner state",
|
229
|
+
/* E_GARBAGE -10 */ "garbage at end of file",
|
230
|
+
/* E_UNKNOWN -11 */ "unknown error"
|
231
|
+
};
|
232
|
+
#endif /* #ifdef GERMAN .. #else .. */
|
233
|
+
#endif /* #ifdef SC_SCAN */
|
234
|
+
|
235
|
+
/*----------------------------------------------------------------------
|
236
|
+
Auxiliary Functions
|
237
|
+
----------------------------------------------------------------------*/
|
238
|
+
#ifdef SC_SCAN
|
239
|
+
|
240
|
+
static int _swap (SCAN *scan)
|
241
|
+
{ /* --- swap token information */
|
242
|
+
int t; /* swap buffer */
|
243
|
+
|
244
|
+
if (scan->value == scan->buf[0]) scan->value = scan->buf[1];
|
245
|
+
else scan->value = scan->buf[0];
|
246
|
+
t = scan->plen; scan->plen = scan->len; scan->len = t;
|
247
|
+
t = scan->pline; scan->pline = scan->line; scan->line = t;
|
248
|
+
t = scan->ptoken; scan->ptoken = scan->token; scan->token = t;
|
249
|
+
return t; /* return the new token */
|
250
|
+
} /* _swap() */
|
251
|
+
|
252
|
+
#endif
|
253
|
+
/*----------------------------------------------------------------------
|
254
|
+
Main Functions
|
255
|
+
----------------------------------------------------------------------*/
|
256
|
+
|
257
|
+
int sc_fmtlen (const char *s, int *len)
|
258
|
+
{ /* --- length of a formatted name */
|
259
|
+
int n = 0, k = 0; /* number of (additional) characters */
|
260
|
+
int q = 0; /* quote flag (default: no quotes) */
|
261
|
+
|
262
|
+
assert(s); /* check the function arguments */
|
263
|
+
while (*s) { /* while not at end of name */
|
264
|
+
n++; /* count character */
|
265
|
+
switch (_scftab[(unsigned char)*s++]) {
|
266
|
+
case 0: break;
|
267
|
+
case 1: q = 2; break;
|
268
|
+
case 2: k += 3; q = 2; break;
|
269
|
+
default: k += 1; q = 2; break;
|
270
|
+
} /* sum additional characters and */
|
271
|
+
} /* set quote flag (if necessary) */
|
272
|
+
if (len) *len = n; /* store normal length and */
|
273
|
+
return n +k +q; /* return length of scanable form */
|
274
|
+
} /* sc_fmtlen() */
|
275
|
+
|
276
|
+
/*--------------------------------------------------------------------*/
|
277
|
+
|
278
|
+
int sc_format (char *dst, const char *src, int quotes)
|
279
|
+
{ /* --- format name in scanable form */
|
280
|
+
char *d; const char *s; /* to traverse buffer and name */
|
281
|
+
int c, cls; /* character and character class */
|
282
|
+
int t; /* temporary buffer */
|
283
|
+
|
284
|
+
assert(dst && src); /* check the function arguments */
|
285
|
+
if (!*src) quotes = 1; /* an empty name needs quotes */
|
286
|
+
if (!quotes) { /* if quotes are not mandatory, */
|
287
|
+
for (s = src; *s; ) /* traverse the string to convert */
|
288
|
+
if (_scftab[(unsigned char)*s++] != 0) {
|
289
|
+
quotes = 1; break; } /* if a character needs quotes, */
|
290
|
+
} /* set the quotes flag and abort */
|
291
|
+
d = dst; /* get the destination and */
|
292
|
+
if (quotes) *d++ = '"'; /* store a quote if necessary */
|
293
|
+
while (*src) { /* traverse the characters */
|
294
|
+
c = (unsigned char)*src++;/* get the next character */
|
295
|
+
cls = _scftab[c]; /* and its character class */
|
296
|
+
if (cls < 2) /* if it is a normal character, */
|
297
|
+
*d++ = c; /* just store it */
|
298
|
+
else if (cls > 2) { /* if it is an ANSI escape character, */
|
299
|
+
*d++ = '\\'; *d++ = cls;} /* store it as '\c' */
|
300
|
+
else { /* if it is any other character */
|
301
|
+
*d++ = '\\'; *d++ = 'x';
|
302
|
+
t = c >> 4; *d++ = (t > 9) ? (t -10 +'a') : (t +'0');
|
303
|
+
t = c & 0xf; *d++ = (t > 9) ? (t -10 +'a') : (t +'0');
|
304
|
+
} /* store the character code */
|
305
|
+
} /* as a hexadecimal number */
|
306
|
+
if (quotes) *d++ = '"'; /* store the closing quote */
|
307
|
+
*d = '\0'; /* and terminate the string */
|
308
|
+
return (int)(d -dst); /* return the length of the result */
|
309
|
+
} /* sc_format() */
|
310
|
+
|
311
|
+
/*--------------------------------------------------------------------*/
|
312
|
+
#ifdef SC_SCAN
|
313
|
+
|
314
|
+
SCAN* sc_create (const char *fname)
|
315
|
+
{ /* --- create a scanner */
|
316
|
+
const char *fn = fname; /* buffer for filename */
|
317
|
+
SCAN *scan; /* created scanner */
|
318
|
+
|
319
|
+
if (!fn || !*fn) fname = "<stdin>";
|
320
|
+
scan = (SCAN*)malloc(sizeof(SCAN) +strlen(fname));
|
321
|
+
if (!scan) return NULL; /* allocate memory for a scanner */
|
322
|
+
strcpy(scan->fname, fname); /* and note the file name */
|
323
|
+
if (!fn || !*fn) /* if no file name is given, */
|
324
|
+
scan->file = stdin; /* read from standard input */
|
325
|
+
else { /* if a file name is given, */
|
326
|
+
scan->file = fopen(fn,"r"); /* open the file for reading */
|
327
|
+
if (!scan->file) { free(scan); return NULL; }
|
328
|
+
}
|
329
|
+
scan->line = 1; /* initialize the fields */
|
330
|
+
scan->token = scan->len = scan->start = 0;
|
331
|
+
scan->value = scan->buf[0]; scan->buf[0][0] = '\0';
|
332
|
+
scan->back = 0;
|
333
|
+
scan->errfile = stderr;
|
334
|
+
scan->msgcnt = scan->lncnt = 0;
|
335
|
+
scan->msgs = NULL;
|
336
|
+
return scan; /* return created scanner */
|
337
|
+
} /* sc_create() */
|
338
|
+
|
339
|
+
/*--------------------------------------------------------------------*/
|
340
|
+
|
341
|
+
void sc_delete (SCAN *scan)
|
342
|
+
{ /* --- delete a scanner */
|
343
|
+
if (scan->file != stdin) fclose(scan->file);
|
344
|
+
free(scan); /* close the input file and */
|
345
|
+
} /* sc_delete() */ /* delete the scanner structure */
|
346
|
+
|
347
|
+
/*--------------------------------------------------------------------*/
|
348
|
+
|
349
|
+
int sc_next (SCAN *scan)
|
350
|
+
{ /* --- get next token */
|
351
|
+
int c, ccl; /* character and character class */
|
352
|
+
int quote = 0; /* quote at the start of a string */
|
353
|
+
int ec = 0; /* escaped character */
|
354
|
+
int state = 0; /* state of automaton */
|
355
|
+
int level = 0; /* comment nesting level */
|
356
|
+
char *p; /* to traverse the scan buffer */
|
357
|
+
char *end; /* end of the scan buffer */
|
358
|
+
|
359
|
+
if (scan->back) { /* if a step backwards has been made, */
|
360
|
+
scan->back = 0; /* clear the corresponding flag, */
|
361
|
+
return _swap(scan); /* swap back the token information, */
|
362
|
+
} /* and return the current token */
|
363
|
+
scan->pline = scan->line; /* note the relevant information */
|
364
|
+
scan->ptoken = scan->token; /* of the current token */
|
365
|
+
scan->plen = scan->len; /* and swap scan buffers */
|
366
|
+
if (scan->value == scan->buf[0]) scan->value = p = scan->buf[1];
|
367
|
+
else scan->value = p = scan->buf[0];
|
368
|
+
end = p +SC_BUFSIZE -1; /* get the end of the scan buffer */
|
369
|
+
|
370
|
+
while (1) { /* read loop */
|
371
|
+
c = getc(scan->file); /* get character and character class */
|
372
|
+
ccl = (c < 0) ? EOF : _ccltab[c];
|
373
|
+
if (c == '\n') scan->line++; /* count the line */
|
374
|
+
|
375
|
+
switch (state) { /* evaluate state of automaton */
|
376
|
+
|
377
|
+
case S_SPACE: /* --- skip white space */
|
378
|
+
switch (ccl) { /* evaluate character category */
|
379
|
+
case C_SPACE : /* do nothing */ break;
|
380
|
+
case C_LETTER: *p++ = c; state = S_ID; break;
|
381
|
+
case C_DIGIT : *p++ = c; state = S_NUMDIG; break;
|
382
|
+
case C_POINT : *p++ = c; state = S_NUMPT; break;
|
383
|
+
case C_SIGN : *p++ = c; state = S_SIGN; break;
|
384
|
+
case C_CMPOP : *p++ = c; state = S_CMPOP; break;
|
385
|
+
case C_QUOTE : quote = c; state = S_STRING; break;
|
386
|
+
case C_SLASH : state = S_SLASH; break;
|
387
|
+
case C_ACTIVE: *p++ = c; *p = '\0'; scan->len = 1;
|
388
|
+
return scan->token = c;
|
389
|
+
case EOF : strcpy(p, "<eof>"); scan->len = 4;
|
390
|
+
return scan->token = (ferror(scan->file))
|
391
|
+
? E_FREAD : T_EOF;
|
392
|
+
default : *p++ = c; *p = '\0'; scan->len = 1;
|
393
|
+
return scan->token = E_CHAR;
|
394
|
+
} break;
|
395
|
+
|
396
|
+
case S_ID: /* --- identifier (letter read) */
|
397
|
+
if ((ccl == C_LETTER) /* if another letter */
|
398
|
+
|| (ccl == C_DIGIT) /* or a digit */
|
399
|
+
|| (ccl == C_POINT) /* or a decimal point */
|
400
|
+
|| (ccl == C_SIGN)) { /* or a sign follows */
|
401
|
+
if (p >= end) return scan->token = E_BUFOVF;
|
402
|
+
*p++ = c; break; /* buffer character */
|
403
|
+
} /* otherwise */
|
404
|
+
UNGETC(scan, c); /* put back last character, */
|
405
|
+
*p = '\0'; /* terminate string in buffer */
|
406
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
407
|
+
return scan->token = T_ID; /* and return 'identifier' */
|
408
|
+
|
409
|
+
case S_NUMDIG: /* --- number (digit read) */
|
410
|
+
if (p < end) *p++ = c; /* buffer character */
|
411
|
+
else return scan->token = E_BUFOVF;
|
412
|
+
if (ccl == C_DIGIT) /* if another digit follows, */
|
413
|
+
break; /* do nothing */
|
414
|
+
if (ccl == C_POINT) { /* if a decimal point follows, */
|
415
|
+
state = S_FRAC; break; } /* go to 'fraction' state */
|
416
|
+
if ((c == 'e') /* if an exponent indicator follows */
|
417
|
+
|| (c == 'E')) { /* (lower- or uppercase), */
|
418
|
+
state = S_EXPIND; break; } /* go to 'exponent' state */
|
419
|
+
if ((ccl == C_LETTER) /* if a letter */
|
420
|
+
|| (ccl == C_SIGN)) { /* or a sign follows, */
|
421
|
+
state = S_ID; break; /* go to 'identifier' state */
|
422
|
+
} /* otherwise */
|
423
|
+
UNGETC(scan, c); /* put back last character, */
|
424
|
+
*--p = '\0'; /* terminate string in buffer */
|
425
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
426
|
+
return scan->token = T_NUM; /* and return 'number' */
|
427
|
+
|
428
|
+
case S_NUMPT: /* --- number (point read) */
|
429
|
+
if (p < end) *p++ = c; /* buffer character */
|
430
|
+
else return scan->token = E_BUFOVF;
|
431
|
+
if (ccl == C_DIGIT) { /* if a digit follows, */
|
432
|
+
state = S_FRAC; break; } /* go to 'fraction' state */
|
433
|
+
if ((ccl == C_LETTER) /* if a letter */
|
434
|
+
|| (ccl == C_POINT) /* or a decimal point */
|
435
|
+
|| (ccl == C_SIGN)) { /* or a sign follows */
|
436
|
+
state = S_ID; break; /* go to 'identifier' state */
|
437
|
+
} /* otherwise */
|
438
|
+
UNGETC(scan, c); /* put back last character, */
|
439
|
+
*--p = '\0'; /* terminate string in buffer */
|
440
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
441
|
+
return scan->token = T_ID; /* and return 'identifier' */
|
442
|
+
|
443
|
+
case S_FRAC: /* --- number (digit & point read) */
|
444
|
+
if (p < end) *p++ = c; /* buffer character */
|
445
|
+
else return scan->token = E_BUFOVF;
|
446
|
+
if (ccl == C_DIGIT) /* if another digit follows, */
|
447
|
+
break; /* do nothing else */
|
448
|
+
if ((c == 'e') /* if an exponent indicator follows, */
|
449
|
+
|| (c == 'E')) { /* (lower- or uppercase), */
|
450
|
+
state = S_EXPIND; break; } /* go to exponent state */
|
451
|
+
if ((ccl == C_LETTER) /* if a letter */
|
452
|
+
|| (ccl == C_POINT) /* or a decimal point */
|
453
|
+
|| (ccl == C_SIGN)) { /* or a sign follows, */
|
454
|
+
state = S_ID; break; /* go to 'identifier' state */
|
455
|
+
} /* otherwise */
|
456
|
+
UNGETC(scan, c); /* put back last character, */
|
457
|
+
*--p = '\0'; /* terminate string in buffer */
|
458
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
459
|
+
return scan->token = T_NUM; /* and return 'number' */
|
460
|
+
|
461
|
+
case S_EXPIND: /* --- exponent (indicator read) */
|
462
|
+
if (p < end) *p++ = c; /* buffer character */
|
463
|
+
else return scan->token = E_BUFOVF;
|
464
|
+
if (ccl == C_SIGN) { /* if a sign follows, */
|
465
|
+
state = S_EXPSGN; break; } /* go to 2nd 'exponent' state */
|
466
|
+
if (ccl == C_DIGIT) { /* if a digit follows, */
|
467
|
+
state = S_EXPDIG; break; } /* go to 3rd 'exponent' state */
|
468
|
+
if ((ccl == C_LETTER) /* if a letter */
|
469
|
+
|| (ccl == C_POINT)) { /* or a decimal point follows */
|
470
|
+
state = S_ID; break; /* go to 'identifier' state */
|
471
|
+
} /* otherwise */
|
472
|
+
UNGETC(scan, c); /* put back last character, */
|
473
|
+
*--p = '\0'; /* terminate string in buffer */
|
474
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
475
|
+
return scan->token = T_ID; /* and return 'identifier' */
|
476
|
+
|
477
|
+
case S_EXPSGN: /* --- exponent (sign read) */
|
478
|
+
if (p < end) *p++ = c; /* buffer character */
|
479
|
+
else return scan->token = E_BUFOVF;
|
480
|
+
if (ccl == C_DIGIT) { /* if a digit follows, */
|
481
|
+
state = S_EXPDIG; break;} /* do nothing else */
|
482
|
+
if ((ccl == C_LETTER) /* if a letter */
|
483
|
+
|| (ccl == C_POINT) /* or a decimal point */
|
484
|
+
|| (ccl == C_SIGN)) { /* or a sign follows */
|
485
|
+
state = S_ID; break; /* go to 'identifier' state */
|
486
|
+
} /* otherwise */
|
487
|
+
UNGETC(scan, c); /* put back last character, */
|
488
|
+
*--p = '\0'; /* terminate string in buffer */
|
489
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
490
|
+
return scan->token = T_ID; /* and return 'identifier' */
|
491
|
+
|
492
|
+
case S_EXPDIG: /* --- exponent (digit read) */
|
493
|
+
if (p < end) *p++ = c; /* buffer character */
|
494
|
+
else return scan->token = E_BUFOVF;
|
495
|
+
if (ccl == C_DIGIT) /* if another digit follows, */
|
496
|
+
break; /* do nothing else */
|
497
|
+
if ((ccl == C_LETTER) /* if a letter */
|
498
|
+
|| (ccl == C_POINT) /* or a decimal point */
|
499
|
+
|| (ccl == C_SIGN)) { /* or a sign follows, */
|
500
|
+
state = S_ID; break; /* go to 'identifier' state */
|
501
|
+
} /* otherwise */
|
502
|
+
UNGETC(scan, c); /* put back last character, */
|
503
|
+
*--p = '\0'; /* terminate string in buffer */
|
504
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
505
|
+
return scan->token = T_NUM; /* and return 'number' */
|
506
|
+
|
507
|
+
case S_SIGN: /* --- number (sign read) */
|
508
|
+
*p++ = c; /* buffer character */
|
509
|
+
if (ccl == C_DIGIT) { /* if a digit follows, */
|
510
|
+
state = S_NUMDIG; break; } /* go to 'number' state */
|
511
|
+
if (ccl == C_POINT) { /* if a decimal point follows, */
|
512
|
+
state = S_NUMPT; break; } /* go to fraction state */
|
513
|
+
if ((c == '-') /* if a '-' follows and previous */
|
514
|
+
&& (scan->value[0] == '-')) { /* char was a minus sign */
|
515
|
+
*p = '\0'; scan->len = 2; return scan->token = T_DASH; }
|
516
|
+
if ((c == '>') /* if a '>' follows and previous */
|
517
|
+
&& (scan->value[0] == '-')) { /* char was a minus sign */
|
518
|
+
*p = '\0'; scan->len = 2; return scan->token = T_RGT; }
|
519
|
+
if ((ccl == C_LETTER) /* if a letter */
|
520
|
+
|| (ccl == C_SIGN)) { /* or a sign follows, */
|
521
|
+
state = S_ID; break; } /* go to 'identifier' state */
|
522
|
+
UNGETC(scan, c); /* otherwise put back last character, */
|
523
|
+
*--p = '\0'; /* terminate string in buffer */
|
524
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
525
|
+
return scan->token = T_ID; /* and return 'identifier' */
|
526
|
+
|
527
|
+
case S_CMPOP: /* --- comparison operator read */
|
528
|
+
if ((c == '-') /* if a minus sign follows and */
|
529
|
+
&& (scan->value[0] == '<')) { /* prev. char was a '<' */
|
530
|
+
*p++ = '-'; scan->token = T_LFT; }
|
531
|
+
else if (c == '=') { /* if an equal sign follows */
|
532
|
+
*p++ = '='; scan->token = T_CMP; }
|
533
|
+
else { /* if anything else follows */
|
534
|
+
UNGETC(scan, c); scan->token = scan->value[0]; }
|
535
|
+
*p = '\0'; /* terminate string in buffer */
|
536
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
537
|
+
return scan->token; /* and return the token read */
|
538
|
+
|
539
|
+
case S_STRING: /* --- quoted string */
|
540
|
+
if ((c == '\n') || (c == EOF)) /* if end of line or file, */
|
541
|
+
return scan->token = E_UNTSTR; /* string is unterminated */
|
542
|
+
if (c != quote) { /* if not at end of string */
|
543
|
+
if (p >= end) return scan->token = E_BUFOVF;
|
544
|
+
if (c == '\\') { /* if escaped character follows, */
|
545
|
+
state = S_ESC; break; } /* go to escaped char state */
|
546
|
+
*p++ = c; break; /* otherwise buffer character */
|
547
|
+
} /* if at end of string, */
|
548
|
+
*p = '\0'; /* terminate string in buffer */
|
549
|
+
scan->len = (int)(p -scan->value); /* set string length */
|
550
|
+
return scan->token = T_ID; /* and return 'identifier' */
|
551
|
+
|
552
|
+
case S_ESC: /* --- after '\' in quoted string */
|
553
|
+
if ((c >= '0') && (c <= '7')) { /* if octal digit, */
|
554
|
+
ec = c -'0'; state = S_OCT1; break; }/* evaluate digit */
|
555
|
+
if (c == 'x') { /* if hexadecimal character code, */
|
556
|
+
state = S_HEX1; break;} /* go to hexadecimal evaluation */
|
557
|
+
switch (c) { /* evaluate character after '\' */
|
558
|
+
case 'a': c = '\a'; break;
|
559
|
+
case 'b': c = '\b'; break;
|
560
|
+
case 'f': c = '\f'; break;
|
561
|
+
case 'n': c = '\n'; break;
|
562
|
+
case 'r': c = '\r'; break;
|
563
|
+
case 't': c = '\t'; break;
|
564
|
+
case 'v': c = '\v'; break;
|
565
|
+
case '\n': c = -1; break;
|
566
|
+
default : break;
|
567
|
+
} /* get escaped character */
|
568
|
+
if (c >= 0) *p++ = c; /* and store it, then */
|
569
|
+
state = S_STRING; break;/* return to quoted string state */
|
570
|
+
|
571
|
+
case S_OCT1: /* --- escaped octal number 1 */
|
572
|
+
if ((c >= '0') /* if an octal digit follows, */
|
573
|
+
&& (c <= '7')) { /* evaluate it */
|
574
|
+
ec = ec *8 +c -'0'; state = S_OCT2; break; }
|
575
|
+
UNGETC(scan, c); /* otherwise put back last character */
|
576
|
+
*p++ = ec; /* store escaped character and */
|
577
|
+
state = S_STRING; break;/* return to quoted string state */
|
578
|
+
|
579
|
+
case S_OCT2: /* --- escaped octal number 2 */
|
580
|
+
if ((c >= '0') || (c <= '7'))
|
581
|
+
ec = ec *8 +c -'0'; /* if octal digit, evaluate it */
|
582
|
+
else UNGETC(scan, c); /* otherwise put back last character */
|
583
|
+
*p++ = ec; /* store escaped character and */
|
584
|
+
state = S_STRING; break;/* return to quoted string state */
|
585
|
+
|
586
|
+
case S_HEX1: /* --- escaped hexadecimal number 1 */
|
587
|
+
if (ccl == C_DIGIT) { /* if hexadecimal digit, evaluate it */
|
588
|
+
ec = c -'0'; state = S_HEX2; break; }
|
589
|
+
if ((c >= 'a') && (c <= 'f')) {
|
590
|
+
ec = c -'a' +10; state = S_HEX2; break; }
|
591
|
+
if ((c >= 'A') && (c <= 'F')) {
|
592
|
+
ec = c -'A' +10; state = S_HEX2; break; }
|
593
|
+
UNGETC(scan, c); /* otherwise put back last character */
|
594
|
+
*p++ = 'x'; /* store escaped character ('x') and */
|
595
|
+
state = S_STRING; break;/* return to quoted string state */
|
596
|
+
|
597
|
+
case S_HEX2: /* --- escaped hexadecimal number 2 */
|
598
|
+
if (ccl == C_DIGIT) /* if hexadecimal digit, evaluate it */
|
599
|
+
ec = ec*16 +c -'0';
|
600
|
+
else if ((c >= 'a') && (c <= 'f'))
|
601
|
+
ec = ec*16 +c -'a' +10;
|
602
|
+
else if ((c >= 'A') && (c <= 'F'))
|
603
|
+
ec = ec*16 +c -'A' +10;
|
604
|
+
else UNGETC(scan, c); /* otherwise put back last character */
|
605
|
+
*p++ = ec; /* store escaped character and */
|
606
|
+
state = S_STRING; break;/* return to quoted string state */
|
607
|
+
|
608
|
+
case S_SLASH: /* --- slash '/' */
|
609
|
+
if (c == '/') { /* if C++ style comment, then */
|
610
|
+
state = S_CPPCOM; break; } /* skip to end of line */
|
611
|
+
if (c == '*') { /* if C style comment */
|
612
|
+
scan->start = scan->line; level = 1;
|
613
|
+
state = S_CCOM1; break; /* note start line, init. level */
|
614
|
+
} /* and go to first 'comment' state */
|
615
|
+
UNGETC(scan, c); /* otherwise put back last character */
|
616
|
+
*p++ = '/'; *p = '\0'; /* store character in buffer */
|
617
|
+
scan->len = 1; /* set string length and */
|
618
|
+
return scan->token = '/'; /* return `character' */
|
619
|
+
|
620
|
+
case S_CPPCOM: /* --- C++ style comment */
|
621
|
+
if ((c == '\n') /* if at end of line */
|
622
|
+
|| (c == EOF)) /* or at end of file */
|
623
|
+
state = S_SPACE; /* return to white space skipping */
|
624
|
+
break; /* (skip to end of line) */
|
625
|
+
|
626
|
+
case S_CCOM1: /* --- C style comment 1 */
|
627
|
+
if (c == EOF) /* if end of file, abort */
|
628
|
+
return scan->token = E_UNTCOM;
|
629
|
+
if (c == '*') /* if possibly 'end of comment', */
|
630
|
+
state = S_CCOM2; /* go to 2nd 'comment' state */
|
631
|
+
else if (c == '/') /* if possibly 'start of comment', */
|
632
|
+
state = S_CCOM3; /* go to 3rd 'comment' state */
|
633
|
+
break;
|
634
|
+
|
635
|
+
case S_CCOM2: /* --- C style comment 2 */
|
636
|
+
if (c == EOF) /* if end of file, abort */
|
637
|
+
return scan->token = E_UNTCOM;
|
638
|
+
if (c == '/') { /* if end of comment found */
|
639
|
+
if (--level <= 0) state = S_SPACE;
|
640
|
+
else state = S_CCOM1; }
|
641
|
+
else if (c != '*') /* if end of comment impossible */
|
642
|
+
state = S_CCOM1; /* return to comment skipping */
|
643
|
+
break; /* (possible start of comment) */
|
644
|
+
|
645
|
+
case S_CCOM3: /* --- C style comment 3 */
|
646
|
+
if (c == EOF) /* if end of file, abort */
|
647
|
+
return scan->token = E_UNTCOM;
|
648
|
+
if (c == '*') { /* if start of comment found */
|
649
|
+
level++; state = S_CCOM1; }
|
650
|
+
else if (c != '/') /* if start of comment impossible */
|
651
|
+
state = S_CCOM1; /* return to comment skipping */
|
652
|
+
break; /* (possible end of comment) */
|
653
|
+
|
654
|
+
default: /* if state is invalid, abort */
|
655
|
+
return scan->token = E_STATE;
|
656
|
+
|
657
|
+
} /* switch() */
|
658
|
+
} /* while(1) */
|
659
|
+
} /* sc_next() */
|
660
|
+
|
661
|
+
/*--------------------------------------------------------------------*/
|
662
|
+
|
663
|
+
int sc_nexter (SCAN *scan)
|
664
|
+
{ /* --- get next token error reporting */
|
665
|
+
if (sc_next(scan) < 0) return sc_error(scan, scan->token);
|
666
|
+
return scan->token; /* get next token, report error, */
|
667
|
+
} /* sc_nexter() */ /* and return next token */
|
668
|
+
|
669
|
+
/*--------------------------------------------------------------------*/
|
670
|
+
|
671
|
+
int sc_back (SCAN *scan)
|
672
|
+
{ /* --- go back one token */
|
673
|
+
if (scan->back) /* a second step backwards */
|
674
|
+
return scan->token; /* is impossible, so do nothing */
|
675
|
+
scan->back = -1; /* set the step backward flag */
|
676
|
+
return _swap(scan); /* swap the token information */
|
677
|
+
} /* sc_back() */ /* and return the previous token */
|
678
|
+
|
679
|
+
/*--------------------------------------------------------------------*/
|
680
|
+
|
681
|
+
int sc_eof (SCAN *scan)
|
682
|
+
{ /* --- check for end of file */
|
683
|
+
if (scan->token == T_EOF) return 1;
|
684
|
+
sc_error(scan, E_GARBAGE); /* check for end of file */
|
685
|
+
return 0; /* and report an error */
|
686
|
+
} /* sc_eof() */ /* if it is not reached */
|
687
|
+
|
688
|
+
/*--------------------------------------------------------------------*/
|
689
|
+
|
690
|
+
int sc_recover (SCAN *scan, int stop, int beg, int end, int level)
|
691
|
+
{ /* --- recover from an error */
|
692
|
+
while ((scan->token != stop) /* while at stop token */
|
693
|
+
&& (scan->token != T_EOF)) { /* and not at end of file */
|
694
|
+
if (scan->token == beg) /* if begin level token found, */
|
695
|
+
level++; /* increment level counter */
|
696
|
+
else if ((scan->token == end) /* if end level token found */
|
697
|
+
&& (--level <= 0)) /* and on level to return to, */
|
698
|
+
break; /* abort loop */
|
699
|
+
if (sc_next(scan) < 0) return scan->token;
|
700
|
+
} /* consume token */
|
701
|
+
if (scan->token != T_EOF) /* if not at end of file, */
|
702
|
+
sc_next(scan); /* consume token (stop or end) */
|
703
|
+
return scan->token; /* return the next token */
|
704
|
+
} /* sc_recover() */
|
705
|
+
|
706
|
+
/*--------------------------------------------------------------------*/
|
707
|
+
|
708
|
+
void sc_errfile (SCAN *scan, FILE *file, int lncnt)
|
709
|
+
{ /* --- set file for error output */
|
710
|
+
assert(scan); /* check the function arguments */
|
711
|
+
scan->errfile = (file) ? file : stderr;
|
712
|
+
scan->lncnt = lncnt; /* set file and line count */
|
713
|
+
} /* sc_errfile() */
|
714
|
+
|
715
|
+
/*--------------------------------------------------------------------*/
|
716
|
+
|
717
|
+
void sc_errmsgs (SCAN *scan, const char *msgs[], int cnt)
|
718
|
+
{ /* --- set additonal error messages */
|
719
|
+
assert(scan); /* check the function arguments */
|
720
|
+
scan->msgs = msgs; /* note error message vector */
|
721
|
+
scan->msgcnt = cnt; /* and number of error messages */
|
722
|
+
} /* sc_errmsgs() */
|
723
|
+
|
724
|
+
/*--------------------------------------------------------------------*/
|
725
|
+
|
726
|
+
int sc_error (SCAN *scan, int code, ...)
|
727
|
+
{ /* --- print an error message */
|
728
|
+
va_list args; /* variable argument list */
|
729
|
+
const char *msg; /* error message */
|
730
|
+
int c, pc; /* the invalid character */
|
731
|
+
int tmp; /* temporary buffer */
|
732
|
+
|
733
|
+
assert(scan); /* check the function arguments */
|
734
|
+
if (scan->lncnt <= 0) /* if line count is zero or negative, */
|
735
|
+
putc('\n', scan->errfile); /* start a new output line */
|
736
|
+
fprintf(scan->errfile, FILETXT" %s", scan->fname);
|
737
|
+
/* print the file name */
|
738
|
+
if ((code != E_NONE) /* if an error occurred, */
|
739
|
+
&& (code != E_FOPEN) /* but not 'file open failed' */
|
740
|
+
&& (code != E_UNTCOM)) { /* and not 'unterminated comment' */
|
741
|
+
fputs((scan->lncnt > 2) ? ",\n" : ", ", scan->errfile);
|
742
|
+
fprintf(scan->errfile, LINETXT" %d", scan->line);
|
743
|
+
} /* print line number */
|
744
|
+
fputs((scan->lncnt > 1) ? ":\n" : ": ", scan->errfile);
|
745
|
+
|
746
|
+
if (code >= 0) code = E_NONE; /* check error code and */
|
747
|
+
tmp = MSGOFFSET -code; /* get error message text */
|
748
|
+
if (code > E_UNKNOWN) msg = _errmsgs[-code];
|
749
|
+
else if (tmp < scan->msgcnt) msg = scan->msgs[tmp];
|
750
|
+
else msg = NULL;
|
751
|
+
if (!msg) msg = _errmsgs[-(code = E_UNKNOWN)];
|
752
|
+
|
753
|
+
switch (code) { /* special error handling */
|
754
|
+
case E_CHAR : c = pc = (unsigned char)scan->value[0];
|
755
|
+
if (c < ' ') pc = ' ';
|
756
|
+
fprintf (scan->errfile, msg, pc, c); break;
|
757
|
+
case E_UNTCOM: fprintf (scan->errfile, msg, scan->start); break;
|
758
|
+
default : va_start(args, code); /* get variable arguments */
|
759
|
+
vfprintf(scan->errfile, msg, args);
|
760
|
+
va_end(args); break; /* print error message and */
|
761
|
+
} /* end variable argument evaluation */
|
762
|
+
if (scan->lncnt > 0) /* if line count is positive, */
|
763
|
+
putc('\n', scan->errfile); /* terminate output line */
|
764
|
+
return code; /* return error code */
|
765
|
+
} /* sc_error() */
|
766
|
+
|
767
|
+
#endif
|