apriori 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,69 @@
1
+ /*----------------------------------------------------------------------
2
+ File : nstats.h
3
+ Contents: management of normalization statistics
4
+ Author : Christian Borgelt
5
+ History : 2003.08.12 file created
6
+ 2004.08.12 description and parse function added
7
+ ----------------------------------------------------------------------*/
8
+ #ifndef __NSTATS__
9
+ #define __NSTATS__
10
+ #include <stdio.h>
11
+ #ifdef NST_PARSE
12
+ #include "parse.h"
13
+ #endif
14
+
15
+ /*----------------------------------------------------------------------
16
+ Type Definitions
17
+ ----------------------------------------------------------------------*/
18
+ typedef struct { /* --- numerical statistics --- */
19
+ int dim; /* dimension of data space */
20
+ double reg; /* number of registered patterns */
21
+ double *mins; /* minimal data values */
22
+ double *maxs; /* maximal data values */
23
+ double *sums; /* sums of data values */
24
+ double *sqrs; /* sums of squared data values */
25
+ double *offs; /* offsets for data scaling */
26
+ double facs[1]; /* factors for data scaling */
27
+ } NSTATS; /* (numerical statistics) */
28
+
29
+ /*----------------------------------------------------------------------
30
+ Functions
31
+ ----------------------------------------------------------------------*/
32
+ extern NSTATS* nst_create (int dim);
33
+ extern void nst_delete (NSTATS *nst);
34
+ extern int nst_dim (NSTATS *nst);
35
+
36
+ extern void nst_reg (NSTATS *nst, const double *vec,
37
+ double weight);
38
+ extern void nst_range (NSTATS *nst, int idx,
39
+ double min, double max);
40
+ extern void nst_expand (NSTATS *nst, int idx, double factor);
41
+ extern void nst_scale (NSTATS *nst, int idx,
42
+ double off, double fac);
43
+
44
+ extern double nst_min (NSTATS *nst, int idx);
45
+ extern double nst_max (NSTATS *nst, int idx);
46
+ extern double nst_offset (NSTATS *nst, int idx);
47
+ extern double nst_factor (NSTATS *nst, int idx);
48
+
49
+ extern void nst_norm (NSTATS *nst, const double *vec, double *res);
50
+ extern void nst_inorm (NSTATS *nst, const double *vec, double *res);
51
+ extern void nst_center (NSTATS *nst, double *vec);
52
+ extern void nst_spans (NSTATS *nst, double *vec);
53
+
54
+ extern int nst_desc (NSTATS *nst, FILE *file,
55
+ const char *indent, int maxlen);
56
+ #ifdef NST_PARSE
57
+ extern NSTATS* nst_parse (SCAN *scan, int dim);
58
+ #endif
59
+
60
+ /*----------------------------------------------------------------------
61
+ Preprocessor Definitions
62
+ ----------------------------------------------------------------------*/
63
+ #define nst_dim(s) ((s)->dim)
64
+ #define nst_min(s,i) ((s)->mins[i])
65
+ #define nst_max(s,i) ((s)->maxs[i])
66
+ #define nst_offset(s,i) ((s)->offs[i])
67
+ #define nst_factor(s,i) ((s)->facs[i])
68
+
69
+ #endif
@@ -0,0 +1,86 @@
1
+ /*----------------------------------------------------------------------
2
+ File : params.c
3
+ Contents: command line parameter retrieval
4
+ Author : Christian Borgelt
5
+ History : 2003.06.05 file created
6
+ ----------------------------------------------------------------------*/
7
+ #include <stdarg.h>
8
+ #include <stdlib.h>
9
+ #include <assert.h>
10
+ #include "params.h"
11
+
12
+ /*----------------------------------------------------------------------
13
+ Functions
14
+ ----------------------------------------------------------------------*/
15
+
16
+ int getints (char *s, char **end, int n, ...)
17
+ { /* --- get integer parameters */
18
+ va_list args; /* list of variable arguments */
19
+ int k = 0, t; /* parameter counter, buffer */
20
+
21
+ assert(s && end && (n > 0)); /* check the function arguments */
22
+ va_start(args, n); /* get variable arguments */
23
+ while (k < n) { /* traverse the arguments */
24
+ t = (int)strtol(s, end,10); /* get the next parameter and */
25
+ if (*end == s) break; /* check for an empty parameter */
26
+ *(va_arg(args, int*)) = t; /* store the parameter */
27
+ k++; /* and count it */
28
+ s = *end; if (*s++ != ':') break;
29
+ } /* check for a colon */
30
+ va_end(args); /* end argument evaluation */
31
+ return k; /* return the number of parameters */
32
+ } /* getints() */
33
+
34
+ /*--------------------------------------------------------------------*/
35
+
36
+ int getdbls (char *s, char **end, int n, ...)
37
+ { /* --- get double parameters */
38
+ va_list args; /* list of variable arguments */
39
+ int k = 0; /* parameter counter */
40
+ double t; /* temporary buffer */
41
+
42
+ assert(s && end && (n > 0)); /* check the function arguments */
43
+ va_start(args, n); /* get variable arguments */
44
+ while (k < n) { /* traverse the arguments */
45
+ t = strtod(s, end); /* get the next parameter and */
46
+ if (*end == s) break; /* check for an empty parameter */
47
+ *(va_arg(args, double*)) = t; /* store the parameter */
48
+ k++; /* and count it */
49
+ s = *end; if (*s++ != ':') break;
50
+ } /* check for a colon */
51
+ va_end(args); /* end argument evaluation */
52
+ return k; /* return the number of parameters */
53
+ } /* getdbls() */
54
+
55
+ /*--------------------------------------------------------------------*/
56
+
57
+ int getintvec (char *s, char **end, int n, int *p)
58
+ { /* --- get integer parameter vector */
59
+ int k = 0, t; /* parameter counter, buffer */
60
+
61
+ assert(s && end && (n > 0)); /* check the function arguments */
62
+ while (k < n) { /* traverse the arguments */
63
+ t = (int)strtol(s, end,10); /* get the next parameter and */
64
+ if (*end == s) break; /* check for an empty parameter */
65
+ p[k++] = t; /* store and count the parameter */
66
+ s = *end; if (*s++ != ':') break;
67
+ } /* check for a colon */
68
+ return k; /* return the number of parameters */
69
+ } /* getintvec() */
70
+
71
+ /*--------------------------------------------------------------------*/
72
+
73
+ int getdblvec (char *s, char **end, int n, double *p)
74
+ { /* --- get double parameter vector */
75
+ int k = 0; /* parameter counter */
76
+ double t; /* temporary buffer */
77
+
78
+ assert(s && end && (n > 0)); /* check the function arguments */
79
+ while (k < n) { /* traverse the arguments */
80
+ t = strtod(s, end); /* get the next parameter and */
81
+ if (*end == s) break; /* check for an empty parameter */
82
+ p[k++] = t; /* store and count the parameter */
83
+ s = *end; if (*s++ != ':') break;
84
+ } /* check for a colon */
85
+ return k; /* return the number of parameters */
86
+ } /* getdblvec() */
@@ -0,0 +1,19 @@
1
+ /*----------------------------------------------------------------------
2
+ File : params.h
3
+ Contents: command line parameter retrieval
4
+ Author : Christian Borgelt
5
+ History : 2003.06.05 file created
6
+ ----------------------------------------------------------------------*/
7
+ #ifndef __PARAMS__
8
+ #define __PARAMS__
9
+
10
+ /*----------------------------------------------------------------------
11
+ Functions
12
+ ----------------------------------------------------------------------*/
13
+ extern int getints (char *s, char **end, int n, ...);
14
+ extern int getdbls (char *s, char **end, int n, ...);
15
+
16
+ extern int getintvec (char *s, char **end, int n, int *p);
17
+ extern int getdblvec (char *s, char **end, int n, double *p);
18
+
19
+ #endif
@@ -0,0 +1,133 @@
1
+ /*----------------------------------------------------------------------
2
+ File : parse.h
3
+ Contents: parser utilities
4
+ Author : Christian Borgelt
5
+ History : 2004.08.12 file created
6
+ 2006.02.02 error E_EDGE added
7
+ 2007.01.16 error E_MSDCNT added
8
+ ----------------------------------------------------------------------*/
9
+ #include <string.h>
10
+ #include <assert.h>
11
+ #include "parse.h"
12
+
13
+ /*----------------------------------------------------------------------
14
+ Constants
15
+ ----------------------------------------------------------------------*/
16
+ #ifdef GERMAN /* deutsche Texte */
17
+ static const char *errmsgs[] = { /* Fehlermeldungen */
18
+ /* E_CHREXP -16 */ "\"%c\" erwartet statt %s",
19
+ /* E_STREXP -17 */ "\"%s\" erwartet statt %s",
20
+ /* E_NUMEXP -18 */ "Zahl erwartet statt %s",
21
+ /* E_NUMBER -19 */ "ungültige Zahl %s",
22
+
23
+ /* E_ATTEXP -20 */ "Attribut erwartet statt %s",
24
+ /* E_UNKATT -21 */ "unbekanntes Attribut %s",
25
+ /* E_DUPATT -22 */ "doppeltes Attribut %s",
26
+ /* E_MISATT -23 */ "Attribut %s fehlt",
27
+ /* E_ATTRIB -24 */ "ungültiges Attribut %s",
28
+ /* E_ATTYPE -25 */ "Attribut %s hat falschen Typ",
29
+
30
+ /* E_VALEXP -26 */ "Attributwert erwartet statt %s",
31
+ /* E_UNKVAL -27 */ "unbekannter Attributwert %s",
32
+ /* E_DUPVAL -28 */ "doppelter Attributwert %s",
33
+ /* E_MISVAL -29 */ "fehlender Attributwert %s",
34
+
35
+ /* E_CLSEXP -30 */ "Klassenattribut erwartet statt %s",
36
+ /* E_UNKCLS -31 */ "unbekannte Klasse %s",
37
+ /* E_DUPCLS -32 */ "doppelte Klasse %s",
38
+ /* E_MISCLS -33 */ "Klasse %s fehlt",
39
+ /* E_CLSTYPE -34 */ "Klassenattribut %s hat falschen Typ",
40
+ /* E_CLSCNT -35 */ "Klassenattribut %s hat zu wenige Werte",
41
+
42
+ /* E_DOMAIN -36 */ "ungültiger Wertebereich %s",
43
+
44
+ /* E_PAREXP -37 */ "Parameter erwartet statt %s",
45
+ /* E_CMPOP -38 */ "ungültiger Vergleichsoperator %s",
46
+ /* E_COVMAT -39 */ "ungültige Kovarianzmatrix",
47
+
48
+ /* E_DUPCDL -40 */ "doppelte Kandidatenliste für Attribut %s\n",
49
+ /* E_RANGE -41 */ "ungültiger Kandidatenbereich",
50
+ /* E_CAND -42 */ "ungültiger Kandidat %s",
51
+ /* E_LINK -43 */ "ungültiger Verweis",
52
+
53
+ /* E_LYRCNT -44 */ "ungültige Anzahl Schichten",
54
+ /* E_UNITCNT -45 */ "ungültige Anzahl Einheiten",
55
+
56
+ /* E_EDGE -46 */ "ungültiger Kantentyp %s",
57
+
58
+ /* E_MSDCNT -47 */ "falsche Anzahl Zugehörigkeitsgrade",
59
+ };
60
+ #else /* English texts */
61
+ static const char *errmsgs[] = { /* error messages */
62
+ /* E_CHREXP -16 */ "\"%c\" expected instead of %s",
63
+ /* E_STREXP -17 */ "\"%s\" expected instead of %s",
64
+ /* E_NUMEXP -18 */ "number expected instead of %s",
65
+ /* E_NUMBER -19 */ "invalid number %s",
66
+
67
+ /* E_ATTEXP -20 */ "attribute expected instead of %s",
68
+ /* E_UNKATT -21 */ "unknown attribute %s",
69
+ /* E_DUPATT -22 */ "duplicate attribute %s",
70
+ /* E_MISATT -23 */ "missing attribute %s",
71
+ /* E_ATTRIB -24 */ "invalid attribute %s",
72
+ /* E_ATTYPE -25 */ "attribute %s has wrong type",
73
+
74
+ /* E_VALEXP -26 */ "attribute value expected instead of %s",
75
+ /* E_UNKVAL -27 */ "unknown attribute value %s",
76
+ /* E_DUPVAL -28 */ "duplicate attribute value %s",
77
+ /* E_MISVAL -29 */ "missing attribute value %s",
78
+
79
+ /* E_CLSEXP -30 */ "class value expected instead of %s",
80
+ /* E_UNKCLS -31 */ "unknown class value %s",
81
+ /* E_DUPCLS -32 */ "duplicate class value %s",
82
+ /* E_MISCLS -33 */ "missing class value %s",
83
+ /* E_CLSTYPE -34 */ "class attribute %s has wrong type",
84
+ /* E_CLSCNT -35 */ "class attribute %s has too few values",
85
+
86
+ /* E_DOMAIN -36 */ "invalid attribute domain %s",
87
+
88
+ /* E_PAREXP -37 */ "parameter expected instead of %s",
89
+ /* E_CMPOP -38 */ "invalid comparison operator %s",
90
+ /* E_COVMAT -39 */ "invalid covariance matrix",
91
+
92
+ /* E_DUPCDL -40 */ "duplicate candidate list for attribute %s\n",
93
+ /* E_RANGE -41 */ "invalid candidate range",
94
+ /* E_CAND -42 */ "invalid candidate %s",
95
+ /* E_LINK -43 */ "invalid link",
96
+
97
+ /* E_LYRCNT -44 */ "invalid number of layers",
98
+ /* E_UNITCNT -45 */ "invalid number of units",
99
+
100
+ /* E_EDGE -46 */ "invalid edge type %s",
101
+
102
+ /* E_MSDCNT -47 */ "wrong number of membership degrees",
103
+ };
104
+ #endif
105
+ #define MSGCNT (int)(sizeof(errmsgs)/sizeof(const char*))
106
+
107
+ /*----------------------------------------------------------------------
108
+ Functions
109
+ ----------------------------------------------------------------------*/
110
+
111
+ void pa_init (SCAN *scan) /* --- initialize parsing */
112
+ { sc_errmsgs(scan, errmsgs, MSGCNT); }
113
+
114
+ /*--------------------------------------------------------------------*/
115
+
116
+ int pa_error (SCAN *scan, int code, int c, const char *s)
117
+ { /* --- report a parse error */
118
+ char src[256], dst[1024]; /* buffers for string formating */
119
+
120
+ assert(scan); /* check the function arguments */
121
+ if (((code == E_DUPATT) || (code == E_MISATT)
122
+ || (code == E_DUPVAL) || (code == E_MISVAL)
123
+ || (code == E_DUPCLS) || (code == E_MISCLS)
124
+ || (code == E_CAND)) && s)
125
+ sc_format(dst, s, 1); /* if "missing ..." error message, */
126
+ else { /* format the given name */
127
+ strncpy(src, sc_value(scan), 255); src[255] = '\0';
128
+ sc_format(dst, src, 1); /* if normal error message, */
129
+ } /* copy and format the token value */
130
+ if (code == E_CHREXP) return sc_error(scan, code, c, dst);
131
+ else if (code == E_STREXP) return sc_error(scan, code, s, dst);
132
+ else return sc_error(scan, code, dst);
133
+ } /* _paerr() */ /* print an error message */
@@ -0,0 +1,81 @@
1
+ /*----------------------------------------------------------------------
2
+ File : parse.h
3
+ Contents: parser utilities
4
+ Author : Christian Borgelt
5
+ History : 2004.08.12 file created
6
+ 2006.02.02 error E_EDGE added
7
+ 2007.01.16 error E_MSDCNT added
8
+ ----------------------------------------------------------------------*/
9
+ #ifndef __PARSE__
10
+ #define __PARSE__
11
+ #ifndef SC_SCAN
12
+ #define SC_SCAN
13
+ #endif
14
+ #include "scan.h"
15
+
16
+ /*----------------------------------------------------------------------
17
+ Preprocessor Definitions
18
+ ----------------------------------------------------------------------*/
19
+ /* --- error codes --- */
20
+ #define E_CHREXP (-16) /* character expected */
21
+ #define E_STREXP (-17) /* string expected */
22
+ #define E_NUMEXP (-18) /* number expected */
23
+ #define E_NUMBER (-19) /* invalid number */
24
+
25
+ #define E_ATTEXP (-20) /* attribute expected */
26
+ #define E_UNKATT (-21) /* unknown attribute */
27
+ #define E_DUPATT (-22) /* duplicate attribute value */
28
+ #define E_MISATT (-23) /* missing attribute */
29
+ #define E_ATTRIB (-24) /* invalid attribute */
30
+ #define E_ATTYPE (-25) /* wrong attribute type */
31
+
32
+ #define E_VALEXP (-26) /* attribute value expected */
33
+ #define E_UNKVAL (-27) /* unknown attribute value */
34
+ #define E_DUPVAL (-28) /* duplicate attribute value */
35
+ #define E_MISVAL (-29) /* missing attribute value */
36
+
37
+ #define E_CLSEXP (-30) /* class value expected */
38
+ #define E_UNKCLS (-31) /* unknown class value */
39
+ #define E_DUPCLS (-32) /* duplicate class value */
40
+ #define E_MISCLS (-33) /* missing class value */
41
+ #define E_CLSTYPE (-34) /* class attribute must be nominal */
42
+ #define E_CLSCNT (-35) /* class attribute has too few values */
43
+
44
+ #define E_DOMAIN (-36) /* invalid attribute domain */
45
+
46
+ #define E_PAREXP (-37) /* parameter expected */
47
+ #define E_CMPOP (-38) /* invalid comparison operator */
48
+ #define E_COVMAT (-39) /* invalid covariance matrix */
49
+
50
+ #define E_DUPCDL (-40) /* duplicate candidate list */
51
+ #define E_RANGE (-41) /* invalid candidate range */
52
+ #define E_CAND (-42) /* invalid candidate */
53
+ #define E_LINK (-43) /* invalid link */
54
+
55
+ #define E_LYRCNT (-44) /* invalid number of layers */
56
+ #define E_UNITCNT (-45) /* invalid number of units */
57
+
58
+ #define E_EDGE (-46) /* invalid edge type */
59
+
60
+ #define E_MSDCNT (-47) /* wrong number of membership degrees */
61
+
62
+ /*----------------------------------------------------------------------
63
+ Functions
64
+ ----------------------------------------------------------------------*/
65
+ extern void pa_init (SCAN *scan);
66
+ extern int pa_error (SCAN *scan, int code, int c, const char *s);
67
+
68
+ /*----------------------------------------------------------------------
69
+ Preprocessor Definitions
70
+ ----------------------------------------------------------------------*/
71
+ #define ERROR(c) return pa_error(scan, c, -1, NULL)
72
+ #define XERROR(c,s) return pa_error(scan, c, -1, s)
73
+ #define ERR_CHR(c) return pa_error(scan, E_CHREXP, c, NULL)
74
+ #define ERR_STR(s) return pa_error(scan, E_STREXP, -1, s)
75
+ #define GET_TOK() if (sc_next(scan) < 0) \
76
+ return sc_error(scan, sc_token(scan))
77
+ #define GET_CHR(c) if (sc_token(scan) != (c)) ERR_CHR(c); \
78
+ else GET_TOK()
79
+ #define RECOVER() if (sc_recover(scan, ';', '{', '}', 0) == T_EOF) \
80
+ return 1
81
+ #endif
@@ -0,0 +1,767 @@
1
+ /*----------------------------------------------------------------------
2
+ File : scan.c
3
+ Contents: scanner (lexical analysis of a character stream)
4
+ Author : Christian Borgelt
5
+ History : 1996.01.16 file created
6
+ 1996.02.21 identifier recognition made more flexible
7
+ 1996.03.17 keyword tokens removed
8
+ 1996.04.15 duplicate state removed from sc_next
9
+ 1997.07.29 < and > declared active (for decision trees)
10
+ 1997.09.08 escape sequences in strings made possible
11
+ 1997.09.11 single characters stored also in scn->value
12
+ 1998.02.08 recover and error message functions added
13
+ 1998.02.09 bug in state S_NUMPT concerning "-." removed
14
+ 1998.02.13 token T_RGT ('->') added
15
+ 1998.03.04 returned tokens changed for some states
16
+ 1998.04.17 token T_LFT ('<-') added
17
+ 1998.05.27 token T_CMP (two char comparison operator) added
18
+ 1998.05.31 token conversion to number removed
19
+ 1999.02.08 reading from standard input made possible
20
+ 1999.04.29 quoted string parsing improved
21
+ 1999.11.13 token string length stored in scn->len
22
+ 2000.11.23 functions sc_fmtlen and sc_format added
23
+ 2001.07.15 scanner made an object, state definitions added
24
+ 2001.07.16 characters with code > 127 made printable
25
+ look ahead functionality added (sc_back)
26
+ 2006.02.02 token T_DASH (undirected edge '--') added
27
+ ----------------------------------------------------------------------*/
28
+ #include <stdio.h>
29
+ #include <stdlib.h>
30
+ #include <string.h>
31
+ #include <stdarg.h>
32
+ #include <assert.h>
33
+ #include "scan.h"
34
+ #ifdef STORAGE
35
+ #include "storage.h"
36
+ #endif
37
+
38
+ /*----------------------------------------------------------------------
39
+ Preprocessor Definitions
40
+ ----------------------------------------------------------------------*/
41
+ #ifdef SC_SCAN
42
+ /* --- character classes --- */
43
+ #define C_INVALID 0 /* invalid character */
44
+ #define C_SPACE 1 /* white space, e.g. ' ' '\t' '\n' */
45
+ #define C_LETTER 2 /* letter or underscore '_' */
46
+ #define C_DIGIT 3 /* digit */
47
+ #define C_POINT 4 /* point, '.' */
48
+ #define C_SIGN 5 /* sign, '+' or '-' */
49
+ #define C_SLASH 6 /* slash, '/' */
50
+ #define C_QUOTE 7 /* quote, e.g. '"' '`' */
51
+ #define C_CMPOP 8 /* comparison operator, e.g. '<' */
52
+ #define C_ACTIVE 9 /* active characters, e.g. ',' '(' */
53
+
54
+ /* --- scanner states --- */
55
+ #define S_SPACE 0 /* skipping white space */
56
+ #define S_ID 1 /* reading identifier */
57
+ #define S_NUMDIG 2 /* reading number, digit */
58
+ #define S_NUMPT 3 /* reading number, decimal point */
59
+ #define S_FRAC 4 /* reading number, digit and point */
60
+ #define S_EXPIND 5 /* reading exponent, indicator */
61
+ #define S_EXPSGN 6 /* reading exponent, sign */
62
+ #define S_EXPDIG 7 /* reading exponent, digit */
63
+ #define S_SIGN 8 /* sign read */
64
+ #define S_CMPOP 9 /* reading comparison operator */
65
+ #define S_STRING 10 /* reading quoted string */
66
+ #define S_ESC 11 /* reading escaped character */
67
+ #define S_OCT1 12 /* reading octal number, 1 digit */
68
+ #define S_OCT2 13 /* reading octal number, 2 digits */
69
+ #define S_HEX1 14 /* reading hexad. number, 1 digit */
70
+ #define S_HEX2 15 /* reading hexad. number, 2 digits */
71
+ #define S_SLASH 16 /* slash read */
72
+ #define S_CPPCOM 17 /* reading C++ comment */
73
+ #define S_CCOM1 18 /* reading C comment */
74
+ #define S_CCOM2 19 /* reading C comment, possible end */
75
+ #define S_CCOM3 20 /* reading C comment, possible start */
76
+
77
+ /* --- functions --- */
78
+ #define UNGETC(s,c) do { if ((c) == EOF) break; \
79
+ if ((c) == '\n') (s)->line--; \
80
+ ungetc(c, (s)->file); } while (0)
81
+
82
+ /* --- additional error codes --- */
83
+ #define E_UNKNOWN (-11) /* unknown error */
84
+ #define MSGOFFSET (-16) /* offset for add. error messages */
85
+
86
+ /* --- texts --- */
87
+ #ifdef GERMAN /* deutsche Texte */
88
+ #define FILETXT "Datei"
89
+ #define LINETXT "Zeile"
90
+ #else /* English texts */
91
+ #define FILETXT "file"
92
+ #define LINETXT "line"
93
+ #endif /* #ifdef GERMAN .. #else .. */
94
+ #endif /* #ifdef SC_SCAN */
95
+
96
+ /*----------------------------------------------------------------------
97
+ Constants
98
+ ----------------------------------------------------------------------*/
99
+ static const char _scftab[256] = { /* scanable form classes */
100
+ /* NUL SOH STX ETX EOT ENQ ACK BEL */
101
+ /* 00 */ 2, 2, 2, 2, 2, 2, 2, 'a',
102
+ /* BS HT LF VT FF CR SO SI */
103
+ 'b', 't', 'n', 'v', 'f', 'r', 2, 2,
104
+ /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
105
+ /* 10 */ 2, 2, 2, 2, 2, 2, 2, 2,
106
+ /* CAN EM SUB ESC FS GS RS US */
107
+ 2, 2, 2, 2, 2, 2, 2, 2,
108
+ /* ' ' '!' '"' '#' '$' '%' '&' ''' */
109
+ /* 20 */ 1, 1, '"', 1, 1, 1, 1, 1,
110
+ /* '(' ')' '*' '+' ',' '-' '.' '/' */
111
+ 1, 1, 1, 0, 1, 0, 0, 1,
112
+ /* '0' '1' '2' '3' '4' '5' '6' '7' */
113
+ /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0,
114
+ /* '8' '9' ':' ';' '<' '=' '>' '?' */
115
+ 0, 0, 1, 1, 1, 1, 1, 1,
116
+ /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
117
+ /* 40 */ 1, 0, 0, 0, 0, 0, 0, 0,
118
+ /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
119
+ 0, 0, 0, 0, 0, 0, 0, 0,
120
+ /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
121
+ /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0,
122
+ /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
123
+ 0, 0, 0, 1, '\\', 1, 1, 0,
124
+ /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
125
+ /* 60 */ 1, 0, 0, 0, 0, 0, 0, 0,
126
+ /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
127
+ 0, 0, 0, 0, 0, 0, 0, 0,
128
+ /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
129
+ /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
130
+ /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */
131
+ 0, 0, 0, 1, 1, 1, 1, 2,
132
+ /* 80 */ 1, 1, 1, 1, 1, 1, 1, 1,
133
+ 1, 1, 1, 1, 1, 1, 1, 1,
134
+ /* 90 */ 1, 1, 1, 1, 1, 1, 1, 1,
135
+ 1, 1, 1, 1, 1, 1, 1, 1,
136
+ /* a0 */ 1, 1, 1, 1, 1, 1, 1, 1,
137
+ 1, 1, 1, 1, 1, 1, 1, 1,
138
+ /* b0 */ 1, 1, 1, 1, 1, 1, 1, 1,
139
+ 1, 1, 1, 1, 1, 1, 1, 1,
140
+ /* c0 */ 1, 1, 1, 1, 1, 1, 1, 1,
141
+ 1, 1, 1, 1, 1, 1, 1, 1,
142
+ /* d0 */ 1, 1, 1, 1, 1, 1, 1, 1,
143
+ 1, 1, 1, 1, 1, 1, 1, 1,
144
+ /* e0 */ 1, 1, 1, 1, 1, 1, 1, 1,
145
+ 1, 1, 1, 1, 1, 1, 1, 1,
146
+ /* f0 */ 1, 1, 1, 1, 1, 1, 1, 1,
147
+ 1, 1, 1, 1, 1, 1, 1, 1 };
148
+
149
+ #ifdef SC_SCAN
150
+ static const char _ccltab[256] = { /* character classes */
151
+ /* NUL SOH STX ETX EOT ENQ ACK BEL */
152
+ /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0,
153
+ /* BS HT LF VT FF CR SO SI */
154
+ 0, 1, 1, 1, 1, 1, 0, 0,
155
+ /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
156
+ /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0,
157
+ /* CAN EM SUB ESC FS GS RS US */
158
+ 0, 0, 0, 0, 0, 0, 0, 0,
159
+ /* ' ' '!' '"' '#' '$' '%' '&' ''' */
160
+ /* 20 */ 1, 8, 7, 9, 9, 9, 9, 7,
161
+ /* '(' ')' '*' '+' ',' '-' '.' '/' */
162
+ 9, 9, 9, 5, 9, 5, 4, 6,
163
+ /* '0' '1' '2' '3' '4' '5' '6' '7' */
164
+ /* 30 */ 3, 3, 3, 3, 3, 3, 3, 3,
165
+ /* '8' '9' ':' ';' '<' '=' '>' '?' */
166
+ 3, 3, 9, 9, 8, 8, 8, 9,
167
+ /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
168
+ /* 40 */ 0, 2, 2, 2, 2, 2, 2, 2,
169
+ /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
170
+ 2, 2, 2, 2, 2, 2, 2, 2,
171
+ /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
172
+ /* 50 */ 2, 2, 2, 2, 2, 2, 2, 2,
173
+ /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
174
+ 2, 2, 2, 9, 9, 9, 9, 2,
175
+ /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
176
+ /* 60 */ 7, 2, 2, 2, 2, 2, 2, 2,
177
+ /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
178
+ 2, 2, 2, 2, 2, 2, 2, 2,
179
+ /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
180
+ /* 70 */ 2, 2, 2, 2, 2, 2, 2, 2,
181
+ /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */
182
+ 2, 2, 2, 9, 9, 9, 9, 0,
183
+ /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0,
184
+ 0, 0, 0, 0, 0, 0, 0, 0,
185
+ /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0,
186
+ 0, 0, 0, 0, 0, 0, 0, 0,
187
+ /* a0 */ 0, 0, 0, 0, 0, 0, 0, 0,
188
+ 0, 0, 0, 0, 0, 0, 0, 0,
189
+ /* b0 */ 0, 0, 0, 0, 0, 0, 0, 0,
190
+ 0, 0, 0, 0, 0, 0, 0, 0,
191
+ /* c0 */ 0, 0, 0, 0, 0, 0, 0, 0,
192
+ 0, 0, 0, 0, 0, 0, 0, 0,
193
+ /* d0 */ 0, 0, 0, 0, 0, 0, 0, 0,
194
+ 0, 0, 0, 0, 0, 0, 0, 0,
195
+ /* e0 */ 0, 0, 0, 0, 0, 0, 0, 0,
196
+ 0, 0, 0, 0, 0, 0, 0, 0,
197
+ /* f0 */ 0, 0, 0, 0, 0, 0, 0, 0,
198
+ 0, 0, 0, 0, 0, 0, 0, 0 };
199
+
200
+ #ifdef GERMAN /* deutsche Texte */
201
+ static const char *_errmsgs[] = { /* error messages */
202
+ /* E_NONE 0 */ "kein Fehler",
203
+ /* E_NOMEM -1 */ "nicht genug Speicher",
204
+ /* E_FOPEN -2 */ "�ffnen fehlgeschlagen",
205
+ /* E_FREAD -3 */ "Lesefehler",
206
+ /* E_FWRITE -4 */ "Schreibfehler",
207
+ /* E_CHAR -5 */ "ung�ltiges Zeichen '%c' (0x%02x)",
208
+ /* E_BUFOVF -6 */ "Puffer�berlauf",
209
+ /* E_UNTSTR -7 */ "unbeendete Zeichenkette",
210
+ /* E_UNTCOM -8 */ "unerwartetes Dateiende in Kommentar "
211
+ "(Anfang in Zeile %d)",
212
+ /* E_STATE -9 */ "ung�ltiger Scannerzustand",
213
+ /* E_GARBAGE -10 */ "ung�ltiger Text am Dateiende",
214
+ /* E_UNKNOWN -11 */ "unbekannter Fehler"
215
+ };
216
+ #else /* English texts */
217
+ static const char *_errmsgs[] = { /* error messages */
218
+ /* E_NONE 0 */ "no error",
219
+ /* E_NOMEM -1 */ "not enough memory",
220
+ /* E_FOPEN -2 */ "file open failed",
221
+ /* E_FREAD -3 */ "file read failed",
222
+ /* E_FWRITE -4 */ "file write failed",
223
+ /* E_CHAR -5 */ "invalid character '%c' (0x%02x)",
224
+ /* E_BUFOVF -6 */ "scan buffer overflow",
225
+ /* E_UNTSTR -7 */ "unterminated string",
226
+ /* E_UNTCOM -8 */ "unexpected end of file in comment "
227
+ "started on line %d",
228
+ /* E_STATE -9 */ "invalid scanner state",
229
+ /* E_GARBAGE -10 */ "garbage at end of file",
230
+ /* E_UNKNOWN -11 */ "unknown error"
231
+ };
232
+ #endif /* #ifdef GERMAN .. #else .. */
233
+ #endif /* #ifdef SC_SCAN */
234
+
235
+ /*----------------------------------------------------------------------
236
+ Auxiliary Functions
237
+ ----------------------------------------------------------------------*/
238
+ #ifdef SC_SCAN
239
+
240
+ static int _swap (SCAN *scan)
241
+ { /* --- swap token information */
242
+ int t; /* swap buffer */
243
+
244
+ if (scan->value == scan->buf[0]) scan->value = scan->buf[1];
245
+ else scan->value = scan->buf[0];
246
+ t = scan->plen; scan->plen = scan->len; scan->len = t;
247
+ t = scan->pline; scan->pline = scan->line; scan->line = t;
248
+ t = scan->ptoken; scan->ptoken = scan->token; scan->token = t;
249
+ return t; /* return the new token */
250
+ } /* _swap() */
251
+
252
+ #endif
253
+ /*----------------------------------------------------------------------
254
+ Main Functions
255
+ ----------------------------------------------------------------------*/
256
+
257
+ int sc_fmtlen (const char *s, int *len)
258
+ { /* --- length of a formatted name */
259
+ int n = 0, k = 0; /* number of (additional) characters */
260
+ int q = 0; /* quote flag (default: no quotes) */
261
+
262
+ assert(s); /* check the function arguments */
263
+ while (*s) { /* while not at end of name */
264
+ n++; /* count character */
265
+ switch (_scftab[(unsigned char)*s++]) {
266
+ case 0: break;
267
+ case 1: q = 2; break;
268
+ case 2: k += 3; q = 2; break;
269
+ default: k += 1; q = 2; break;
270
+ } /* sum additional characters and */
271
+ } /* set quote flag (if necessary) */
272
+ if (len) *len = n; /* store normal length and */
273
+ return n +k +q; /* return length of scanable form */
274
+ } /* sc_fmtlen() */
275
+
276
+ /*--------------------------------------------------------------------*/
277
+
278
+ int sc_format (char *dst, const char *src, int quotes)
279
+ { /* --- format name in scanable form */
280
+ char *d; const char *s; /* to traverse buffer and name */
281
+ int c, cls; /* character and character class */
282
+ int t; /* temporary buffer */
283
+
284
+ assert(dst && src); /* check the function arguments */
285
+ if (!*src) quotes = 1; /* an empty name needs quotes */
286
+ if (!quotes) { /* if quotes are not mandatory, */
287
+ for (s = src; *s; ) /* traverse the string to convert */
288
+ if (_scftab[(unsigned char)*s++] != 0) {
289
+ quotes = 1; break; } /* if a character needs quotes, */
290
+ } /* set the quotes flag and abort */
291
+ d = dst; /* get the destination and */
292
+ if (quotes) *d++ = '"'; /* store a quote if necessary */
293
+ while (*src) { /* traverse the characters */
294
+ c = (unsigned char)*src++;/* get the next character */
295
+ cls = _scftab[c]; /* and its character class */
296
+ if (cls < 2) /* if it is a normal character, */
297
+ *d++ = c; /* just store it */
298
+ else if (cls > 2) { /* if it is an ANSI escape character, */
299
+ *d++ = '\\'; *d++ = cls;} /* store it as '\c' */
300
+ else { /* if it is any other character */
301
+ *d++ = '\\'; *d++ = 'x';
302
+ t = c >> 4; *d++ = (t > 9) ? (t -10 +'a') : (t +'0');
303
+ t = c & 0xf; *d++ = (t > 9) ? (t -10 +'a') : (t +'0');
304
+ } /* store the character code */
305
+ } /* as a hexadecimal number */
306
+ if (quotes) *d++ = '"'; /* store the closing quote */
307
+ *d = '\0'; /* and terminate the string */
308
+ return (int)(d -dst); /* return the length of the result */
309
+ } /* sc_format() */
310
+
311
+ /*--------------------------------------------------------------------*/
312
+ #ifdef SC_SCAN
313
+
314
+ SCAN* sc_create (const char *fname)
315
+ { /* --- create a scanner */
316
+ const char *fn = fname; /* buffer for filename */
317
+ SCAN *scan; /* created scanner */
318
+
319
+ if (!fn || !*fn) fname = "<stdin>";
320
+ scan = (SCAN*)malloc(sizeof(SCAN) +strlen(fname));
321
+ if (!scan) return NULL; /* allocate memory for a scanner */
322
+ strcpy(scan->fname, fname); /* and note the file name */
323
+ if (!fn || !*fn) /* if no file name is given, */
324
+ scan->file = stdin; /* read from standard input */
325
+ else { /* if a file name is given, */
326
+ scan->file = fopen(fn,"r"); /* open the file for reading */
327
+ if (!scan->file) { free(scan); return NULL; }
328
+ }
329
+ scan->line = 1; /* initialize the fields */
330
+ scan->token = scan->len = scan->start = 0;
331
+ scan->value = scan->buf[0]; scan->buf[0][0] = '\0';
332
+ scan->back = 0;
333
+ scan->errfile = stderr;
334
+ scan->msgcnt = scan->lncnt = 0;
335
+ scan->msgs = NULL;
336
+ return scan; /* return created scanner */
337
+ } /* sc_create() */
338
+
339
+ /*--------------------------------------------------------------------*/
340
+
341
+ void sc_delete (SCAN *scan)
342
+ { /* --- delete a scanner */
343
+ if (scan->file != stdin) fclose(scan->file);
344
+ free(scan); /* close the input file and */
345
+ } /* sc_delete() */ /* delete the scanner structure */
346
+
347
+ /*--------------------------------------------------------------------*/
348
+
349
+ int sc_next (SCAN *scan)
350
+ { /* --- get next token */
351
+ int c, ccl; /* character and character class */
352
+ int quote = 0; /* quote at the start of a string */
353
+ int ec = 0; /* escaped character */
354
+ int state = 0; /* state of automaton */
355
+ int level = 0; /* comment nesting level */
356
+ char *p; /* to traverse the scan buffer */
357
+ char *end; /* end of the scan buffer */
358
+
359
+ if (scan->back) { /* if a step backwards has been made, */
360
+ scan->back = 0; /* clear the corresponding flag, */
361
+ return _swap(scan); /* swap back the token information, */
362
+ } /* and return the current token */
363
+ scan->pline = scan->line; /* note the relevant information */
364
+ scan->ptoken = scan->token; /* of the current token */
365
+ scan->plen = scan->len; /* and swap scan buffers */
366
+ if (scan->value == scan->buf[0]) scan->value = p = scan->buf[1];
367
+ else scan->value = p = scan->buf[0];
368
+ end = p +SC_BUFSIZE -1; /* get the end of the scan buffer */
369
+
370
+ while (1) { /* read loop */
371
+ c = getc(scan->file); /* get character and character class */
372
+ ccl = (c < 0) ? EOF : _ccltab[c];
373
+ if (c == '\n') scan->line++; /* count the line */
374
+
375
+ switch (state) { /* evaluate state of automaton */
376
+
377
+ case S_SPACE: /* --- skip white space */
378
+ switch (ccl) { /* evaluate character category */
379
+ case C_SPACE : /* do nothing */ break;
380
+ case C_LETTER: *p++ = c; state = S_ID; break;
381
+ case C_DIGIT : *p++ = c; state = S_NUMDIG; break;
382
+ case C_POINT : *p++ = c; state = S_NUMPT; break;
383
+ case C_SIGN : *p++ = c; state = S_SIGN; break;
384
+ case C_CMPOP : *p++ = c; state = S_CMPOP; break;
385
+ case C_QUOTE : quote = c; state = S_STRING; break;
386
+ case C_SLASH : state = S_SLASH; break;
387
+ case C_ACTIVE: *p++ = c; *p = '\0'; scan->len = 1;
388
+ return scan->token = c;
389
+ case EOF : strcpy(p, "<eof>"); scan->len = 4;
390
+ return scan->token = (ferror(scan->file))
391
+ ? E_FREAD : T_EOF;
392
+ default : *p++ = c; *p = '\0'; scan->len = 1;
393
+ return scan->token = E_CHAR;
394
+ } break;
395
+
396
+ case S_ID: /* --- identifier (letter read) */
397
+ if ((ccl == C_LETTER) /* if another letter */
398
+ || (ccl == C_DIGIT) /* or a digit */
399
+ || (ccl == C_POINT) /* or a decimal point */
400
+ || (ccl == C_SIGN)) { /* or a sign follows */
401
+ if (p >= end) return scan->token = E_BUFOVF;
402
+ *p++ = c; break; /* buffer character */
403
+ } /* otherwise */
404
+ UNGETC(scan, c); /* put back last character, */
405
+ *p = '\0'; /* terminate string in buffer */
406
+ scan->len = (int)(p -scan->value); /* set string length */
407
+ return scan->token = T_ID; /* and return 'identifier' */
408
+
409
+ case S_NUMDIG: /* --- number (digit read) */
410
+ if (p < end) *p++ = c; /* buffer character */
411
+ else return scan->token = E_BUFOVF;
412
+ if (ccl == C_DIGIT) /* if another digit follows, */
413
+ break; /* do nothing */
414
+ if (ccl == C_POINT) { /* if a decimal point follows, */
415
+ state = S_FRAC; break; } /* go to 'fraction' state */
416
+ if ((c == 'e') /* if an exponent indicator follows */
417
+ || (c == 'E')) { /* (lower- or uppercase), */
418
+ state = S_EXPIND; break; } /* go to 'exponent' state */
419
+ if ((ccl == C_LETTER) /* if a letter */
420
+ || (ccl == C_SIGN)) { /* or a sign follows, */
421
+ state = S_ID; break; /* go to 'identifier' state */
422
+ } /* otherwise */
423
+ UNGETC(scan, c); /* put back last character, */
424
+ *--p = '\0'; /* terminate string in buffer */
425
+ scan->len = (int)(p -scan->value); /* set string length */
426
+ return scan->token = T_NUM; /* and return 'number' */
427
+
428
+ case S_NUMPT: /* --- number (point read) */
429
+ if (p < end) *p++ = c; /* buffer character */
430
+ else return scan->token = E_BUFOVF;
431
+ if (ccl == C_DIGIT) { /* if a digit follows, */
432
+ state = S_FRAC; break; } /* go to 'fraction' state */
433
+ if ((ccl == C_LETTER) /* if a letter */
434
+ || (ccl == C_POINT) /* or a decimal point */
435
+ || (ccl == C_SIGN)) { /* or a sign follows */
436
+ state = S_ID; break; /* go to 'identifier' state */
437
+ } /* otherwise */
438
+ UNGETC(scan, c); /* put back last character, */
439
+ *--p = '\0'; /* terminate string in buffer */
440
+ scan->len = (int)(p -scan->value); /* set string length */
441
+ return scan->token = T_ID; /* and return 'identifier' */
442
+
443
+ case S_FRAC: /* --- number (digit & point read) */
444
+ if (p < end) *p++ = c; /* buffer character */
445
+ else return scan->token = E_BUFOVF;
446
+ if (ccl == C_DIGIT) /* if another digit follows, */
447
+ break; /* do nothing else */
448
+ if ((c == 'e') /* if an exponent indicator follows, */
449
+ || (c == 'E')) { /* (lower- or uppercase), */
450
+ state = S_EXPIND; break; } /* go to exponent state */
451
+ if ((ccl == C_LETTER) /* if a letter */
452
+ || (ccl == C_POINT) /* or a decimal point */
453
+ || (ccl == C_SIGN)) { /* or a sign follows, */
454
+ state = S_ID; break; /* go to 'identifier' state */
455
+ } /* otherwise */
456
+ UNGETC(scan, c); /* put back last character, */
457
+ *--p = '\0'; /* terminate string in buffer */
458
+ scan->len = (int)(p -scan->value); /* set string length */
459
+ return scan->token = T_NUM; /* and return 'number' */
460
+
461
+ case S_EXPIND: /* --- exponent (indicator read) */
462
+ if (p < end) *p++ = c; /* buffer character */
463
+ else return scan->token = E_BUFOVF;
464
+ if (ccl == C_SIGN) { /* if a sign follows, */
465
+ state = S_EXPSGN; break; } /* go to 2nd 'exponent' state */
466
+ if (ccl == C_DIGIT) { /* if a digit follows, */
467
+ state = S_EXPDIG; break; } /* go to 3rd 'exponent' state */
468
+ if ((ccl == C_LETTER) /* if a letter */
469
+ || (ccl == C_POINT)) { /* or a decimal point follows */
470
+ state = S_ID; break; /* go to 'identifier' state */
471
+ } /* otherwise */
472
+ UNGETC(scan, c); /* put back last character, */
473
+ *--p = '\0'; /* terminate string in buffer */
474
+ scan->len = (int)(p -scan->value); /* set string length */
475
+ return scan->token = T_ID; /* and return 'identifier' */
476
+
477
+ case S_EXPSGN: /* --- exponent (sign read) */
478
+ if (p < end) *p++ = c; /* buffer character */
479
+ else return scan->token = E_BUFOVF;
480
+ if (ccl == C_DIGIT) { /* if a digit follows, */
481
+ state = S_EXPDIG; break;} /* do nothing else */
482
+ if ((ccl == C_LETTER) /* if a letter */
483
+ || (ccl == C_POINT) /* or a decimal point */
484
+ || (ccl == C_SIGN)) { /* or a sign follows */
485
+ state = S_ID; break; /* go to 'identifier' state */
486
+ } /* otherwise */
487
+ UNGETC(scan, c); /* put back last character, */
488
+ *--p = '\0'; /* terminate string in buffer */
489
+ scan->len = (int)(p -scan->value); /* set string length */
490
+ return scan->token = T_ID; /* and return 'identifier' */
491
+
492
+ case S_EXPDIG: /* --- exponent (digit read) */
493
+ if (p < end) *p++ = c; /* buffer character */
494
+ else return scan->token = E_BUFOVF;
495
+ if (ccl == C_DIGIT) /* if another digit follows, */
496
+ break; /* do nothing else */
497
+ if ((ccl == C_LETTER) /* if a letter */
498
+ || (ccl == C_POINT) /* or a decimal point */
499
+ || (ccl == C_SIGN)) { /* or a sign follows, */
500
+ state = S_ID; break; /* go to 'identifier' state */
501
+ } /* otherwise */
502
+ UNGETC(scan, c); /* put back last character, */
503
+ *--p = '\0'; /* terminate string in buffer */
504
+ scan->len = (int)(p -scan->value); /* set string length */
505
+ return scan->token = T_NUM; /* and return 'number' */
506
+
507
+ case S_SIGN: /* --- number (sign read) */
508
+ *p++ = c; /* buffer character */
509
+ if (ccl == C_DIGIT) { /* if a digit follows, */
510
+ state = S_NUMDIG; break; } /* go to 'number' state */
511
+ if (ccl == C_POINT) { /* if a decimal point follows, */
512
+ state = S_NUMPT; break; } /* go to fraction state */
513
+ if ((c == '-') /* if a '-' follows and previous */
514
+ && (scan->value[0] == '-')) { /* char was a minus sign */
515
+ *p = '\0'; scan->len = 2; return scan->token = T_DASH; }
516
+ if ((c == '>') /* if a '>' follows and previous */
517
+ && (scan->value[0] == '-')) { /* char was a minus sign */
518
+ *p = '\0'; scan->len = 2; return scan->token = T_RGT; }
519
+ if ((ccl == C_LETTER) /* if a letter */
520
+ || (ccl == C_SIGN)) { /* or a sign follows, */
521
+ state = S_ID; break; } /* go to 'identifier' state */
522
+ UNGETC(scan, c); /* otherwise put back last character, */
523
+ *--p = '\0'; /* terminate string in buffer */
524
+ scan->len = (int)(p -scan->value); /* set string length */
525
+ return scan->token = T_ID; /* and return 'identifier' */
526
+
527
+ case S_CMPOP: /* --- comparison operator read */
528
+ if ((c == '-') /* if a minus sign follows and */
529
+ && (scan->value[0] == '<')) { /* prev. char was a '<' */
530
+ *p++ = '-'; scan->token = T_LFT; }
531
+ else if (c == '=') { /* if an equal sign follows */
532
+ *p++ = '='; scan->token = T_CMP; }
533
+ else { /* if anything else follows */
534
+ UNGETC(scan, c); scan->token = scan->value[0]; }
535
+ *p = '\0'; /* terminate string in buffer */
536
+ scan->len = (int)(p -scan->value); /* set string length */
537
+ return scan->token; /* and return the token read */
538
+
539
+ case S_STRING: /* --- quoted string */
540
+ if ((c == '\n') || (c == EOF)) /* if end of line or file, */
541
+ return scan->token = E_UNTSTR; /* string is unterminated */
542
+ if (c != quote) { /* if not at end of string */
543
+ if (p >= end) return scan->token = E_BUFOVF;
544
+ if (c == '\\') { /* if escaped character follows, */
545
+ state = S_ESC; break; } /* go to escaped char state */
546
+ *p++ = c; break; /* otherwise buffer character */
547
+ } /* if at end of string, */
548
+ *p = '\0'; /* terminate string in buffer */
549
+ scan->len = (int)(p -scan->value); /* set string length */
550
+ return scan->token = T_ID; /* and return 'identifier' */
551
+
552
+ case S_ESC: /* --- after '\' in quoted string */
553
+ if ((c >= '0') && (c <= '7')) { /* if octal digit, */
554
+ ec = c -'0'; state = S_OCT1; break; }/* evaluate digit */
555
+ if (c == 'x') { /* if hexadecimal character code, */
556
+ state = S_HEX1; break;} /* go to hexadecimal evaluation */
557
+ switch (c) { /* evaluate character after '\' */
558
+ case 'a': c = '\a'; break;
559
+ case 'b': c = '\b'; break;
560
+ case 'f': c = '\f'; break;
561
+ case 'n': c = '\n'; break;
562
+ case 'r': c = '\r'; break;
563
+ case 't': c = '\t'; break;
564
+ case 'v': c = '\v'; break;
565
+ case '\n': c = -1; break;
566
+ default : break;
567
+ } /* get escaped character */
568
+ if (c >= 0) *p++ = c; /* and store it, then */
569
+ state = S_STRING; break;/* return to quoted string state */
570
+
571
+ case S_OCT1: /* --- escaped octal number 1 */
572
+ if ((c >= '0') /* if an octal digit follows, */
573
+ && (c <= '7')) { /* evaluate it */
574
+ ec = ec *8 +c -'0'; state = S_OCT2; break; }
575
+ UNGETC(scan, c); /* otherwise put back last character */
576
+ *p++ = ec; /* store escaped character and */
577
+ state = S_STRING; break;/* return to quoted string state */
578
+
579
+ case S_OCT2: /* --- escaped octal number 2 */
580
+ if ((c >= '0') || (c <= '7'))
581
+ ec = ec *8 +c -'0'; /* if octal digit, evaluate it */
582
+ else UNGETC(scan, c); /* otherwise put back last character */
583
+ *p++ = ec; /* store escaped character and */
584
+ state = S_STRING; break;/* return to quoted string state */
585
+
586
+ case S_HEX1: /* --- escaped hexadecimal number 1 */
587
+ if (ccl == C_DIGIT) { /* if hexadecimal digit, evaluate it */
588
+ ec = c -'0'; state = S_HEX2; break; }
589
+ if ((c >= 'a') && (c <= 'f')) {
590
+ ec = c -'a' +10; state = S_HEX2; break; }
591
+ if ((c >= 'A') && (c <= 'F')) {
592
+ ec = c -'A' +10; state = S_HEX2; break; }
593
+ UNGETC(scan, c); /* otherwise put back last character */
594
+ *p++ = 'x'; /* store escaped character ('x') and */
595
+ state = S_STRING; break;/* return to quoted string state */
596
+
597
+ case S_HEX2: /* --- escaped hexadecimal number 2 */
598
+ if (ccl == C_DIGIT) /* if hexadecimal digit, evaluate it */
599
+ ec = ec*16 +c -'0';
600
+ else if ((c >= 'a') && (c <= 'f'))
601
+ ec = ec*16 +c -'a' +10;
602
+ else if ((c >= 'A') && (c <= 'F'))
603
+ ec = ec*16 +c -'A' +10;
604
+ else UNGETC(scan, c); /* otherwise put back last character */
605
+ *p++ = ec; /* store escaped character and */
606
+ state = S_STRING; break;/* return to quoted string state */
607
+
608
+ case S_SLASH: /* --- slash '/' */
609
+ if (c == '/') { /* if C++ style comment, then */
610
+ state = S_CPPCOM; break; } /* skip to end of line */
611
+ if (c == '*') { /* if C style comment */
612
+ scan->start = scan->line; level = 1;
613
+ state = S_CCOM1; break; /* note start line, init. level */
614
+ } /* and go to first 'comment' state */
615
+ UNGETC(scan, c); /* otherwise put back last character */
616
+ *p++ = '/'; *p = '\0'; /* store character in buffer */
617
+ scan->len = 1; /* set string length and */
618
+ return scan->token = '/'; /* return `character' */
619
+
620
+ case S_CPPCOM: /* --- C++ style comment */
621
+ if ((c == '\n') /* if at end of line */
622
+ || (c == EOF)) /* or at end of file */
623
+ state = S_SPACE; /* return to white space skipping */
624
+ break; /* (skip to end of line) */
625
+
626
+ case S_CCOM1: /* --- C style comment 1 */
627
+ if (c == EOF) /* if end of file, abort */
628
+ return scan->token = E_UNTCOM;
629
+ if (c == '*') /* if possibly 'end of comment', */
630
+ state = S_CCOM2; /* go to 2nd 'comment' state */
631
+ else if (c == '/') /* if possibly 'start of comment', */
632
+ state = S_CCOM3; /* go to 3rd 'comment' state */
633
+ break;
634
+
635
+ case S_CCOM2: /* --- C style comment 2 */
636
+ if (c == EOF) /* if end of file, abort */
637
+ return scan->token = E_UNTCOM;
638
+ if (c == '/') { /* if end of comment found */
639
+ if (--level <= 0) state = S_SPACE;
640
+ else state = S_CCOM1; }
641
+ else if (c != '*') /* if end of comment impossible */
642
+ state = S_CCOM1; /* return to comment skipping */
643
+ break; /* (possible start of comment) */
644
+
645
+ case S_CCOM3: /* --- C style comment 3 */
646
+ if (c == EOF) /* if end of file, abort */
647
+ return scan->token = E_UNTCOM;
648
+ if (c == '*') { /* if start of comment found */
649
+ level++; state = S_CCOM1; }
650
+ else if (c != '/') /* if start of comment impossible */
651
+ state = S_CCOM1; /* return to comment skipping */
652
+ break; /* (possible end of comment) */
653
+
654
+ default: /* if state is invalid, abort */
655
+ return scan->token = E_STATE;
656
+
657
+ } /* switch() */
658
+ } /* while(1) */
659
+ } /* sc_next() */
660
+
661
+ /*--------------------------------------------------------------------*/
662
+
663
+ int sc_nexter (SCAN *scan)
664
+ { /* --- get next token error reporting */
665
+ if (sc_next(scan) < 0) return sc_error(scan, scan->token);
666
+ return scan->token; /* get next token, report error, */
667
+ } /* sc_nexter() */ /* and return next token */
668
+
669
+ /*--------------------------------------------------------------------*/
670
+
671
+ int sc_back (SCAN *scan)
672
+ { /* --- go back one token */
673
+ if (scan->back) /* a second step backwards */
674
+ return scan->token; /* is impossible, so do nothing */
675
+ scan->back = -1; /* set the step backward flag */
676
+ return _swap(scan); /* swap the token information */
677
+ } /* sc_back() */ /* and return the previous token */
678
+
679
+ /*--------------------------------------------------------------------*/
680
+
681
+ int sc_eof (SCAN *scan)
682
+ { /* --- check for end of file */
683
+ if (scan->token == T_EOF) return 1;
684
+ sc_error(scan, E_GARBAGE); /* check for end of file */
685
+ return 0; /* and report an error */
686
+ } /* sc_eof() */ /* if it is not reached */
687
+
688
+ /*--------------------------------------------------------------------*/
689
+
690
+ int sc_recover (SCAN *scan, int stop, int beg, int end, int level)
691
+ { /* --- recover from an error */
692
+ while ((scan->token != stop) /* while at stop token */
693
+ && (scan->token != T_EOF)) { /* and not at end of file */
694
+ if (scan->token == beg) /* if begin level token found, */
695
+ level++; /* increment level counter */
696
+ else if ((scan->token == end) /* if end level token found */
697
+ && (--level <= 0)) /* and on level to return to, */
698
+ break; /* abort loop */
699
+ if (sc_next(scan) < 0) return scan->token;
700
+ } /* consume token */
701
+ if (scan->token != T_EOF) /* if not at end of file, */
702
+ sc_next(scan); /* consume token (stop or end) */
703
+ return scan->token; /* return the next token */
704
+ } /* sc_recover() */
705
+
706
+ /*--------------------------------------------------------------------*/
707
+
708
+ void sc_errfile (SCAN *scan, FILE *file, int lncnt)
709
+ { /* --- set file for error output */
710
+ assert(scan); /* check the function arguments */
711
+ scan->errfile = (file) ? file : stderr;
712
+ scan->lncnt = lncnt; /* set file and line count */
713
+ } /* sc_errfile() */
714
+
715
+ /*--------------------------------------------------------------------*/
716
+
717
+ void sc_errmsgs (SCAN *scan, const char *msgs[], int cnt)
718
+ { /* --- set additonal error messages */
719
+ assert(scan); /* check the function arguments */
720
+ scan->msgs = msgs; /* note error message vector */
721
+ scan->msgcnt = cnt; /* and number of error messages */
722
+ } /* sc_errmsgs() */
723
+
724
+ /*--------------------------------------------------------------------*/
725
+
726
+ int sc_error (SCAN *scan, int code, ...)
727
+ { /* --- print an error message */
728
+ va_list args; /* variable argument list */
729
+ const char *msg; /* error message */
730
+ int c, pc; /* the invalid character */
731
+ int tmp; /* temporary buffer */
732
+
733
+ assert(scan); /* check the function arguments */
734
+ if (scan->lncnt <= 0) /* if line count is zero or negative, */
735
+ putc('\n', scan->errfile); /* start a new output line */
736
+ fprintf(scan->errfile, FILETXT" %s", scan->fname);
737
+ /* print the file name */
738
+ if ((code != E_NONE) /* if an error occurred, */
739
+ && (code != E_FOPEN) /* but not 'file open failed' */
740
+ && (code != E_UNTCOM)) { /* and not 'unterminated comment' */
741
+ fputs((scan->lncnt > 2) ? ",\n" : ", ", scan->errfile);
742
+ fprintf(scan->errfile, LINETXT" %d", scan->line);
743
+ } /* print line number */
744
+ fputs((scan->lncnt > 1) ? ":\n" : ": ", scan->errfile);
745
+
746
+ if (code >= 0) code = E_NONE; /* check error code and */
747
+ tmp = MSGOFFSET -code; /* get error message text */
748
+ if (code > E_UNKNOWN) msg = _errmsgs[-code];
749
+ else if (tmp < scan->msgcnt) msg = scan->msgs[tmp];
750
+ else msg = NULL;
751
+ if (!msg) msg = _errmsgs[-(code = E_UNKNOWN)];
752
+
753
+ switch (code) { /* special error handling */
754
+ case E_CHAR : c = pc = (unsigned char)scan->value[0];
755
+ if (c < ' ') pc = ' ';
756
+ fprintf (scan->errfile, msg, pc, c); break;
757
+ case E_UNTCOM: fprintf (scan->errfile, msg, scan->start); break;
758
+ default : va_start(args, code); /* get variable arguments */
759
+ vfprintf(scan->errfile, msg, args);
760
+ va_end(args); break; /* print error message and */
761
+ } /* end variable argument evaluation */
762
+ if (scan->lncnt > 0) /* if line count is positive, */
763
+ putc('\n', scan->errfile); /* terminate output line */
764
+ return code; /* return error code */
765
+ } /* sc_error() */
766
+
767
+ #endif