apriori-rails 0.2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +22 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +17 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +88 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +39 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +97 -0
  97. data/lib/apriori/version.rb +3 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +251 -0
  118. data/website/index.txt +154 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +267 -0
@@ -0,0 +1,69 @@
1
+ /*----------------------------------------------------------------------
2
+ File : nstats.h
3
+ Contents: management of normalization statistics
4
+ Author : Christian Borgelt
5
+ History : 2003.08.12 file created
6
+ 2004.08.12 description and parse function added
7
+ ----------------------------------------------------------------------*/
8
+ #ifndef __NSTATS__
9
+ #define __NSTATS__
10
+ #include <stdio.h>
11
+ #ifdef NST_PARSE
12
+ #include "parse.h"
13
+ #endif
14
+
15
+ /*----------------------------------------------------------------------
16
+ Type Definitions
17
+ ----------------------------------------------------------------------*/
18
+ typedef struct { /* --- numerical statistics --- */
19
+ int dim; /* dimension of data space */
20
+ double reg; /* number of registered patterns */
21
+ double *mins; /* minimal data values */
22
+ double *maxs; /* maximal data values */
23
+ double *sums; /* sums of data values */
24
+ double *sqrs; /* sums of squared data values */
25
+ double *offs; /* offsets for data scaling */
26
+ double facs[1]; /* factors for data scaling */
27
+ } NSTATS; /* (numerical statistics) */
28
+
29
+ /*----------------------------------------------------------------------
30
+ Functions
31
+ ----------------------------------------------------------------------*/
32
+ extern NSTATS* nst_create (int dim);
33
+ extern void nst_delete (NSTATS *nst);
34
+ extern int nst_dim (NSTATS *nst);
35
+
36
+ extern void nst_reg (NSTATS *nst, const double *vec,
37
+ double weight);
38
+ extern void nst_range (NSTATS *nst, int idx,
39
+ double min, double max);
40
+ extern void nst_expand (NSTATS *nst, int idx, double factor);
41
+ extern void nst_scale (NSTATS *nst, int idx,
42
+ double off, double fac);
43
+
44
+ extern double nst_min (NSTATS *nst, int idx);
45
+ extern double nst_max (NSTATS *nst, int idx);
46
+ extern double nst_offset (NSTATS *nst, int idx);
47
+ extern double nst_factor (NSTATS *nst, int idx);
48
+
49
+ extern void nst_norm (NSTATS *nst, const double *vec, double *res);
50
+ extern void nst_inorm (NSTATS *nst, const double *vec, double *res);
51
+ extern void nst_center (NSTATS *nst, double *vec);
52
+ extern void nst_spans (NSTATS *nst, double *vec);
53
+
54
+ extern int nst_desc (NSTATS *nst, FILE *file,
55
+ const char *indent, int maxlen);
56
+ #ifdef NST_PARSE
57
+ extern NSTATS* nst_parse (SCAN *scan, int dim);
58
+ #endif
59
+
60
+ /*----------------------------------------------------------------------
61
+ Preprocessor Definitions
62
+ ----------------------------------------------------------------------*/
63
+ #define nst_dim(s) ((s)->dim)
64
+ #define nst_min(s,i) ((s)->mins[i])
65
+ #define nst_max(s,i) ((s)->maxs[i])
66
+ #define nst_offset(s,i) ((s)->offs[i])
67
+ #define nst_factor(s,i) ((s)->facs[i])
68
+
69
+ #endif
@@ -0,0 +1,86 @@
1
+ /*----------------------------------------------------------------------
2
+ File : params.c
3
+ Contents: command line parameter retrieval
4
+ Author : Christian Borgelt
5
+ History : 2003.06.05 file created
6
+ ----------------------------------------------------------------------*/
7
+ #include <stdarg.h>
8
+ #include <stdlib.h>
9
+ #include <assert.h>
10
+ #include "params.h"
11
+
12
+ /*----------------------------------------------------------------------
13
+ Functions
14
+ ----------------------------------------------------------------------*/
15
+
16
+ int getints (char *s, char **end, int n, ...)
17
+ { /* --- get integer parameters */
18
+ va_list args; /* list of variable arguments */
19
+ int k = 0, t; /* parameter counter, buffer */
20
+
21
+ assert(s && end && (n > 0)); /* check the function arguments */
22
+ va_start(args, n); /* get variable arguments */
23
+ while (k < n) { /* traverse the arguments */
24
+ t = (int)strtol(s, end,10); /* get the next parameter and */
25
+ if (*end == s) break; /* check for an empty parameter */
26
+ *(va_arg(args, int*)) = t; /* store the parameter */
27
+ k++; /* and count it */
28
+ s = *end; if (*s++ != ':') break;
29
+ } /* check for a colon */
30
+ va_end(args); /* end argument evaluation */
31
+ return k; /* return the number of parameters */
32
+ } /* getints() */
33
+
34
+ /*--------------------------------------------------------------------*/
35
+
36
+ int getdbls (char *s, char **end, int n, ...)
37
+ { /* --- get double parameters */
38
+ va_list args; /* list of variable arguments */
39
+ int k = 0; /* parameter counter */
40
+ double t; /* temporary buffer */
41
+
42
+ assert(s && end && (n > 0)); /* check the function arguments */
43
+ va_start(args, n); /* get variable arguments */
44
+ while (k < n) { /* traverse the arguments */
45
+ t = strtod(s, end); /* get the next parameter and */
46
+ if (*end == s) break; /* check for an empty parameter */
47
+ *(va_arg(args, double*)) = t; /* store the parameter */
48
+ k++; /* and count it */
49
+ s = *end; if (*s++ != ':') break;
50
+ } /* check for a colon */
51
+ va_end(args); /* end argument evaluation */
52
+ return k; /* return the number of parameters */
53
+ } /* getdbls() */
54
+
55
+ /*--------------------------------------------------------------------*/
56
+
57
+ int getintvec (char *s, char **end, int n, int *p)
58
+ { /* --- get integer parameter vector */
59
+ int k = 0, t; /* parameter counter, buffer */
60
+
61
+ assert(s && end && (n > 0)); /* check the function arguments */
62
+ while (k < n) { /* traverse the arguments */
63
+ t = (int)strtol(s, end,10); /* get the next parameter and */
64
+ if (*end == s) break; /* check for an empty parameter */
65
+ p[k++] = t; /* store and count the parameter */
66
+ s = *end; if (*s++ != ':') break;
67
+ } /* check for a colon */
68
+ return k; /* return the number of parameters */
69
+ } /* getintvec() */
70
+
71
+ /*--------------------------------------------------------------------*/
72
+
73
+ int getdblvec (char *s, char **end, int n, double *p)
74
+ { /* --- get double parameter vector */
75
+ int k = 0; /* parameter counter */
76
+ double t; /* temporary buffer */
77
+
78
+ assert(s && end && (n > 0)); /* check the function arguments */
79
+ while (k < n) { /* traverse the arguments */
80
+ t = strtod(s, end); /* get the next parameter and */
81
+ if (*end == s) break; /* check for an empty parameter */
82
+ p[k++] = t; /* store and count the parameter */
83
+ s = *end; if (*s++ != ':') break;
84
+ } /* check for a colon */
85
+ return k; /* return the number of parameters */
86
+ } /* getdblvec() */
@@ -0,0 +1,19 @@
1
+ /*----------------------------------------------------------------------
2
+ File : params.h
3
+ Contents: command line parameter retrieval
4
+ Author : Christian Borgelt
5
+ History : 2003.06.05 file created
6
+ ----------------------------------------------------------------------*/
7
+ #ifndef __PARAMS__
8
+ #define __PARAMS__
9
+
10
+ /*----------------------------------------------------------------------
11
+ Functions
12
+ ----------------------------------------------------------------------*/
13
+ extern int getints (char *s, char **end, int n, ...);
14
+ extern int getdbls (char *s, char **end, int n, ...);
15
+
16
+ extern int getintvec (char *s, char **end, int n, int *p);
17
+ extern int getdblvec (char *s, char **end, int n, double *p);
18
+
19
+ #endif
@@ -0,0 +1,133 @@
1
+ /*----------------------------------------------------------------------
2
+ File : parse.h
3
+ Contents: parser utilities
4
+ Author : Christian Borgelt
5
+ History : 2004.08.12 file created
6
+ 2006.02.02 error E_EDGE added
7
+ 2007.01.16 error E_MSDCNT added
8
+ ----------------------------------------------------------------------*/
9
+ #include <string.h>
10
+ #include <assert.h>
11
+ #include "parse.h"
12
+
13
+ /*----------------------------------------------------------------------
14
+ Constants
15
+ ----------------------------------------------------------------------*/
16
+ #ifdef GERMAN /* deutsche Texte */
17
+ static const char *errmsgs[] = { /* Fehlermeldungen */
18
+ /* E_CHREXP -16 */ "\"%c\" erwartet statt %s",
19
+ /* E_STREXP -17 */ "\"%s\" erwartet statt %s",
20
+ /* E_NUMEXP -18 */ "Zahl erwartet statt %s",
21
+ /* E_NUMBER -19 */ "ungültige Zahl %s",
22
+
23
+ /* E_ATTEXP -20 */ "Attribut erwartet statt %s",
24
+ /* E_UNKATT -21 */ "unbekanntes Attribut %s",
25
+ /* E_DUPATT -22 */ "doppeltes Attribut %s",
26
+ /* E_MISATT -23 */ "Attribut %s fehlt",
27
+ /* E_ATTRIB -24 */ "ungültiges Attribut %s",
28
+ /* E_ATTYPE -25 */ "Attribut %s hat falschen Typ",
29
+
30
+ /* E_VALEXP -26 */ "Attributwert erwartet statt %s",
31
+ /* E_UNKVAL -27 */ "unbekannter Attributwert %s",
32
+ /* E_DUPVAL -28 */ "doppelter Attributwert %s",
33
+ /* E_MISVAL -29 */ "fehlender Attributwert %s",
34
+
35
+ /* E_CLSEXP -30 */ "Klassenattribut erwartet statt %s",
36
+ /* E_UNKCLS -31 */ "unbekannte Klasse %s",
37
+ /* E_DUPCLS -32 */ "doppelte Klasse %s",
38
+ /* E_MISCLS -33 */ "Klasse %s fehlt",
39
+ /* E_CLSTYPE -34 */ "Klassenattribut %s hat falschen Typ",
40
+ /* E_CLSCNT -35 */ "Klassenattribut %s hat zu wenige Werte",
41
+
42
+ /* E_DOMAIN -36 */ "ungültiger Wertebereich %s",
43
+
44
+ /* E_PAREXP -37 */ "Parameter erwartet statt %s",
45
+ /* E_CMPOP -38 */ "ungültiger Vergleichsoperator %s",
46
+ /* E_COVMAT -39 */ "ungültige Kovarianzmatrix",
47
+
48
+ /* E_DUPCDL -40 */ "doppelte Kandidatenliste für Attribut %s\n",
49
+ /* E_RANGE -41 */ "ungültiger Kandidatenbereich",
50
+ /* E_CAND -42 */ "ungültiger Kandidat %s",
51
+ /* E_LINK -43 */ "ungültiger Verweis",
52
+
53
+ /* E_LYRCNT -44 */ "ungültige Anzahl Schichten",
54
+ /* E_UNITCNT -45 */ "ungültige Anzahl Einheiten",
55
+
56
+ /* E_EDGE -46 */ "ungültiger Kantentyp %s",
57
+
58
+ /* E_MSDCNT -47 */ "falsche Anzahl Zugehörigkeitsgrade",
59
+ };
60
+ #else /* English texts */
61
+ static const char *errmsgs[] = { /* error messages */
62
+ /* E_CHREXP -16 */ "\"%c\" expected instead of %s",
63
+ /* E_STREXP -17 */ "\"%s\" expected instead of %s",
64
+ /* E_NUMEXP -18 */ "number expected instead of %s",
65
+ /* E_NUMBER -19 */ "invalid number %s",
66
+
67
+ /* E_ATTEXP -20 */ "attribute expected instead of %s",
68
+ /* E_UNKATT -21 */ "unknown attribute %s",
69
+ /* E_DUPATT -22 */ "duplicate attribute %s",
70
+ /* E_MISATT -23 */ "missing attribute %s",
71
+ /* E_ATTRIB -24 */ "invalid attribute %s",
72
+ /* E_ATTYPE -25 */ "attribute %s has wrong type",
73
+
74
+ /* E_VALEXP -26 */ "attribute value expected instead of %s",
75
+ /* E_UNKVAL -27 */ "unknown attribute value %s",
76
+ /* E_DUPVAL -28 */ "duplicate attribute value %s",
77
+ /* E_MISVAL -29 */ "missing attribute value %s",
78
+
79
+ /* E_CLSEXP -30 */ "class value expected instead of %s",
80
+ /* E_UNKCLS -31 */ "unknown class value %s",
81
+ /* E_DUPCLS -32 */ "duplicate class value %s",
82
+ /* E_MISCLS -33 */ "missing class value %s",
83
+ /* E_CLSTYPE -34 */ "class attribute %s has wrong type",
84
+ /* E_CLSCNT -35 */ "class attribute %s has too few values",
85
+
86
+ /* E_DOMAIN -36 */ "invalid attribute domain %s",
87
+
88
+ /* E_PAREXP -37 */ "parameter expected instead of %s",
89
+ /* E_CMPOP -38 */ "invalid comparison operator %s",
90
+ /* E_COVMAT -39 */ "invalid covariance matrix",
91
+
92
+ /* E_DUPCDL -40 */ "duplicate candidate list for attribute %s\n",
93
+ /* E_RANGE -41 */ "invalid candidate range",
94
+ /* E_CAND -42 */ "invalid candidate %s",
95
+ /* E_LINK -43 */ "invalid link",
96
+
97
+ /* E_LYRCNT -44 */ "invalid number of layers",
98
+ /* E_UNITCNT -45 */ "invalid number of units",
99
+
100
+ /* E_EDGE -46 */ "invalid edge type %s",
101
+
102
+ /* E_MSDCNT -47 */ "wrong number of membership degrees",
103
+ };
104
+ #endif
105
+ #define MSGCNT (int)(sizeof(errmsgs)/sizeof(const char*))
106
+
107
+ /*----------------------------------------------------------------------
108
+ Functions
109
+ ----------------------------------------------------------------------*/
110
+
111
+ void pa_init (SCAN *scan) /* --- initialize parsing */
112
+ { sc_errmsgs(scan, errmsgs, MSGCNT); }
113
+
114
+ /*--------------------------------------------------------------------*/
115
+
116
+ int pa_error (SCAN *scan, int code, int c, const char *s)
117
+ { /* --- report a parse error */
118
+ char src[256], dst[1024]; /* buffers for string formating */
119
+
120
+ assert(scan); /* check the function arguments */
121
+ if (((code == E_DUPATT) || (code == E_MISATT)
122
+ || (code == E_DUPVAL) || (code == E_MISVAL)
123
+ || (code == E_DUPCLS) || (code == E_MISCLS)
124
+ || (code == E_CAND)) && s)
125
+ sc_format(dst, s, 1); /* if "missing ..." error message, */
126
+ else { /* format the given name */
127
+ strncpy(src, sc_value(scan), 255); src[255] = '\0';
128
+ sc_format(dst, src, 1); /* if normal error message, */
129
+ } /* copy and format the token value */
130
+ if (code == E_CHREXP) return sc_error(scan, code, c, dst);
131
+ else if (code == E_STREXP) return sc_error(scan, code, s, dst);
132
+ else return sc_error(scan, code, dst);
133
+ } /* _paerr() */ /* print an error message */
@@ -0,0 +1,81 @@
1
+ /*----------------------------------------------------------------------
2
+ File : parse.h
3
+ Contents: parser utilities
4
+ Author : Christian Borgelt
5
+ History : 2004.08.12 file created
6
+ 2006.02.02 error E_EDGE added
7
+ 2007.01.16 error E_MSDCNT added
8
+ ----------------------------------------------------------------------*/
9
+ #ifndef __PARSE__
10
+ #define __PARSE__
11
+ #ifndef SC_SCAN
12
+ #define SC_SCAN
13
+ #endif
14
+ #include "scan.h"
15
+
16
+ /*----------------------------------------------------------------------
17
+ Preprocessor Definitions
18
+ ----------------------------------------------------------------------*/
19
+ /* --- error codes --- */
20
+ #define E_CHREXP (-16) /* character expected */
21
+ #define E_STREXP (-17) /* string expected */
22
+ #define E_NUMEXP (-18) /* number expected */
23
+ #define E_NUMBER (-19) /* invalid number */
24
+
25
+ #define E_ATTEXP (-20) /* attribute expected */
26
+ #define E_UNKATT (-21) /* unknown attribute */
27
+ #define E_DUPATT (-22) /* duplicate attribute value */
28
+ #define E_MISATT (-23) /* missing attribute */
29
+ #define E_ATTRIB (-24) /* invalid attribute */
30
+ #define E_ATTYPE (-25) /* wrong attribute type */
31
+
32
+ #define E_VALEXP (-26) /* attribute value expected */
33
+ #define E_UNKVAL (-27) /* unknown attribute value */
34
+ #define E_DUPVAL (-28) /* duplicate attribute value */
35
+ #define E_MISVAL (-29) /* missing attribute value */
36
+
37
+ #define E_CLSEXP (-30) /* class value expected */
38
+ #define E_UNKCLS (-31) /* unknown class value */
39
+ #define E_DUPCLS (-32) /* duplicate class value */
40
+ #define E_MISCLS (-33) /* missing class value */
41
+ #define E_CLSTYPE (-34) /* class attribute must be nominal */
42
+ #define E_CLSCNT (-35) /* class attribute has too few values */
43
+
44
+ #define E_DOMAIN (-36) /* invalid attribute domain */
45
+
46
+ #define E_PAREXP (-37) /* parameter expected */
47
+ #define E_CMPOP (-38) /* invalid comparison operator */
48
+ #define E_COVMAT (-39) /* invalid covariance matrix */
49
+
50
+ #define E_DUPCDL (-40) /* duplicate candidate list */
51
+ #define E_RANGE (-41) /* invalid candidate range */
52
+ #define E_CAND (-42) /* invalid candidate */
53
+ #define E_LINK (-43) /* invalid link */
54
+
55
+ #define E_LYRCNT (-44) /* invalid number of layers */
56
+ #define E_UNITCNT (-45) /* invalid number of units */
57
+
58
+ #define E_EDGE (-46) /* invalid edge type */
59
+
60
+ #define E_MSDCNT (-47) /* wrong number of membership degrees */
61
+
62
+ /*----------------------------------------------------------------------
63
+ Functions
64
+ ----------------------------------------------------------------------*/
65
+ extern void pa_init (SCAN *scan);
66
+ extern int pa_error (SCAN *scan, int code, int c, const char *s);
67
+
68
+ /*----------------------------------------------------------------------
69
+ Preprocessor Definitions
70
+ ----------------------------------------------------------------------*/
71
+ #define ERROR(c) return pa_error(scan, c, -1, NULL)
72
+ #define XERROR(c,s) return pa_error(scan, c, -1, s)
73
+ #define ERR_CHR(c) return pa_error(scan, E_CHREXP, c, NULL)
74
+ #define ERR_STR(s) return pa_error(scan, E_STREXP, -1, s)
75
+ #define GET_TOK() if (sc_next(scan) < 0) \
76
+ return sc_error(scan, sc_token(scan))
77
+ #define GET_CHR(c) if (sc_token(scan) != (c)) ERR_CHR(c); \
78
+ else GET_TOK()
79
+ #define RECOVER() if (sc_recover(scan, ';', '{', '}', 0) == T_EOF) \
80
+ return 1
81
+ #endif
@@ -0,0 +1,767 @@
1
+ /*----------------------------------------------------------------------
2
+ File : scan.c
3
+ Contents: scanner (lexical analysis of a character stream)
4
+ Author : Christian Borgelt
5
+ History : 1996.01.16 file created
6
+ 1996.02.21 identifier recognition made more flexible
7
+ 1996.03.17 keyword tokens removed
8
+ 1996.04.15 duplicate state removed from sc_next
9
+ 1997.07.29 < and > declared active (for decision trees)
10
+ 1997.09.08 escape sequences in strings made possible
11
+ 1997.09.11 single characters stored also in scn->value
12
+ 1998.02.08 recover and error message functions added
13
+ 1998.02.09 bug in state S_NUMPT concerning "-." removed
14
+ 1998.02.13 token T_RGT ('->') added
15
+ 1998.03.04 returned tokens changed for some states
16
+ 1998.04.17 token T_LFT ('<-') added
17
+ 1998.05.27 token T_CMP (two char comparison operator) added
18
+ 1998.05.31 token conversion to number removed
19
+ 1999.02.08 reading from standard input made possible
20
+ 1999.04.29 quoted string parsing improved
21
+ 1999.11.13 token string length stored in scn->len
22
+ 2000.11.23 functions sc_fmtlen and sc_format added
23
+ 2001.07.15 scanner made an object, state definitions added
24
+ 2001.07.16 characters with code > 127 made printable
25
+ look ahead functionality added (sc_back)
26
+ 2006.02.02 token T_DASH (undirected edge '--') added
27
+ ----------------------------------------------------------------------*/
28
+ #include <stdio.h>
29
+ #include <stdlib.h>
30
+ #include <string.h>
31
+ #include <stdarg.h>
32
+ #include <assert.h>
33
+ #include "scan.h"
34
+ #ifdef STORAGE
35
+ #include "storage.h"
36
+ #endif
37
+
38
+ /*----------------------------------------------------------------------
39
+ Preprocessor Definitions
40
+ ----------------------------------------------------------------------*/
41
+ #ifdef SC_SCAN
42
+ /* --- character classes --- */
43
+ #define C_INVALID 0 /* invalid character */
44
+ #define C_SPACE 1 /* white space, e.g. ' ' '\t' '\n' */
45
+ #define C_LETTER 2 /* letter or underscore '_' */
46
+ #define C_DIGIT 3 /* digit */
47
+ #define C_POINT 4 /* point, '.' */
48
+ #define C_SIGN 5 /* sign, '+' or '-' */
49
+ #define C_SLASH 6 /* slash, '/' */
50
+ #define C_QUOTE 7 /* quote, e.g. '"' '`' */
51
+ #define C_CMPOP 8 /* comparison operator, e.g. '<' */
52
+ #define C_ACTIVE 9 /* active characters, e.g. ',' '(' */
53
+
54
+ /* --- scanner states --- */
55
+ #define S_SPACE 0 /* skipping white space */
56
+ #define S_ID 1 /* reading identifier */
57
+ #define S_NUMDIG 2 /* reading number, digit */
58
+ #define S_NUMPT 3 /* reading number, decimal point */
59
+ #define S_FRAC 4 /* reading number, digit and point */
60
+ #define S_EXPIND 5 /* reading exponent, indicator */
61
+ #define S_EXPSGN 6 /* reading exponent, sign */
62
+ #define S_EXPDIG 7 /* reading exponent, digit */
63
+ #define S_SIGN 8 /* sign read */
64
+ #define S_CMPOP 9 /* reading comparison operator */
65
+ #define S_STRING 10 /* reading quoted string */
66
+ #define S_ESC 11 /* reading escaped character */
67
+ #define S_OCT1 12 /* reading octal number, 1 digit */
68
+ #define S_OCT2 13 /* reading octal number, 2 digits */
69
+ #define S_HEX1 14 /* reading hexad. number, 1 digit */
70
+ #define S_HEX2 15 /* reading hexad. number, 2 digits */
71
+ #define S_SLASH 16 /* slash read */
72
+ #define S_CPPCOM 17 /* reading C++ comment */
73
+ #define S_CCOM1 18 /* reading C comment */
74
+ #define S_CCOM2 19 /* reading C comment, possible end */
75
+ #define S_CCOM3 20 /* reading C comment, possible start */
76
+
77
+ /* --- functions --- */
78
+ #define UNGETC(s,c) do { if ((c) == EOF) break; \
79
+ if ((c) == '\n') (s)->line--; \
80
+ ungetc(c, (s)->file); } while (0)
81
+
82
+ /* --- additional error codes --- */
83
+ #define E_UNKNOWN (-11) /* unknown error */
84
+ #define MSGOFFSET (-16) /* offset for add. error messages */
85
+
86
+ /* --- texts --- */
87
+ #ifdef GERMAN /* deutsche Texte */
88
+ #define FILETXT "Datei"
89
+ #define LINETXT "Zeile"
90
+ #else /* English texts */
91
+ #define FILETXT "file"
92
+ #define LINETXT "line"
93
+ #endif /* #ifdef GERMAN .. #else .. */
94
+ #endif /* #ifdef SC_SCAN */
95
+
96
+ /*----------------------------------------------------------------------
97
+ Constants
98
+ ----------------------------------------------------------------------*/
99
+ static const char _scftab[256] = { /* scanable form classes */
100
+ /* NUL SOH STX ETX EOT ENQ ACK BEL */
101
+ /* 00 */ 2, 2, 2, 2, 2, 2, 2, 'a',
102
+ /* BS HT LF VT FF CR SO SI */
103
+ 'b', 't', 'n', 'v', 'f', 'r', 2, 2,
104
+ /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
105
+ /* 10 */ 2, 2, 2, 2, 2, 2, 2, 2,
106
+ /* CAN EM SUB ESC FS GS RS US */
107
+ 2, 2, 2, 2, 2, 2, 2, 2,
108
+ /* ' ' '!' '"' '#' '$' '%' '&' ''' */
109
+ /* 20 */ 1, 1, '"', 1, 1, 1, 1, 1,
110
+ /* '(' ')' '*' '+' ',' '-' '.' '/' */
111
+ 1, 1, 1, 0, 1, 0, 0, 1,
112
+ /* '0' '1' '2' '3' '4' '5' '6' '7' */
113
+ /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0,
114
+ /* '8' '9' ':' ';' '<' '=' '>' '?' */
115
+ 0, 0, 1, 1, 1, 1, 1, 1,
116
+ /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
117
+ /* 40 */ 1, 0, 0, 0, 0, 0, 0, 0,
118
+ /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
119
+ 0, 0, 0, 0, 0, 0, 0, 0,
120
+ /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
121
+ /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0,
122
+ /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
123
+ 0, 0, 0, 1, '\\', 1, 1, 0,
124
+ /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
125
+ /* 60 */ 1, 0, 0, 0, 0, 0, 0, 0,
126
+ /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
127
+ 0, 0, 0, 0, 0, 0, 0, 0,
128
+ /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
129
+ /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
130
+ /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */
131
+ 0, 0, 0, 1, 1, 1, 1, 2,
132
+ /* 80 */ 1, 1, 1, 1, 1, 1, 1, 1,
133
+ 1, 1, 1, 1, 1, 1, 1, 1,
134
+ /* 90 */ 1, 1, 1, 1, 1, 1, 1, 1,
135
+ 1, 1, 1, 1, 1, 1, 1, 1,
136
+ /* a0 */ 1, 1, 1, 1, 1, 1, 1, 1,
137
+ 1, 1, 1, 1, 1, 1, 1, 1,
138
+ /* b0 */ 1, 1, 1, 1, 1, 1, 1, 1,
139
+ 1, 1, 1, 1, 1, 1, 1, 1,
140
+ /* c0 */ 1, 1, 1, 1, 1, 1, 1, 1,
141
+ 1, 1, 1, 1, 1, 1, 1, 1,
142
+ /* d0 */ 1, 1, 1, 1, 1, 1, 1, 1,
143
+ 1, 1, 1, 1, 1, 1, 1, 1,
144
+ /* e0 */ 1, 1, 1, 1, 1, 1, 1, 1,
145
+ 1, 1, 1, 1, 1, 1, 1, 1,
146
+ /* f0 */ 1, 1, 1, 1, 1, 1, 1, 1,
147
+ 1, 1, 1, 1, 1, 1, 1, 1 };
148
+
149
+ #ifdef SC_SCAN
150
+ static const char _ccltab[256] = { /* character classes */
151
+ /* NUL SOH STX ETX EOT ENQ ACK BEL */
152
+ /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0,
153
+ /* BS HT LF VT FF CR SO SI */
154
+ 0, 1, 1, 1, 1, 1, 0, 0,
155
+ /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
156
+ /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0,
157
+ /* CAN EM SUB ESC FS GS RS US */
158
+ 0, 0, 0, 0, 0, 0, 0, 0,
159
+ /* ' ' '!' '"' '#' '$' '%' '&' ''' */
160
+ /* 20 */ 1, 8, 7, 9, 9, 9, 9, 7,
161
+ /* '(' ')' '*' '+' ',' '-' '.' '/' */
162
+ 9, 9, 9, 5, 9, 5, 4, 6,
163
+ /* '0' '1' '2' '3' '4' '5' '6' '7' */
164
+ /* 30 */ 3, 3, 3, 3, 3, 3, 3, 3,
165
+ /* '8' '9' ':' ';' '<' '=' '>' '?' */
166
+ 3, 3, 9, 9, 8, 8, 8, 9,
167
+ /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
168
+ /* 40 */ 0, 2, 2, 2, 2, 2, 2, 2,
169
+ /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
170
+ 2, 2, 2, 2, 2, 2, 2, 2,
171
+ /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
172
+ /* 50 */ 2, 2, 2, 2, 2, 2, 2, 2,
173
+ /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
174
+ 2, 2, 2, 9, 9, 9, 9, 2,
175
+ /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
176
+ /* 60 */ 7, 2, 2, 2, 2, 2, 2, 2,
177
+ /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
178
+ 2, 2, 2, 2, 2, 2, 2, 2,
179
+ /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
180
+ /* 70 */ 2, 2, 2, 2, 2, 2, 2, 2,
181
+ /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */
182
+ 2, 2, 2, 9, 9, 9, 9, 0,
183
+ /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0,
184
+ 0, 0, 0, 0, 0, 0, 0, 0,
185
+ /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0,
186
+ 0, 0, 0, 0, 0, 0, 0, 0,
187
+ /* a0 */ 0, 0, 0, 0, 0, 0, 0, 0,
188
+ 0, 0, 0, 0, 0, 0, 0, 0,
189
+ /* b0 */ 0, 0, 0, 0, 0, 0, 0, 0,
190
+ 0, 0, 0, 0, 0, 0, 0, 0,
191
+ /* c0 */ 0, 0, 0, 0, 0, 0, 0, 0,
192
+ 0, 0, 0, 0, 0, 0, 0, 0,
193
+ /* d0 */ 0, 0, 0, 0, 0, 0, 0, 0,
194
+ 0, 0, 0, 0, 0, 0, 0, 0,
195
+ /* e0 */ 0, 0, 0, 0, 0, 0, 0, 0,
196
+ 0, 0, 0, 0, 0, 0, 0, 0,
197
+ /* f0 */ 0, 0, 0, 0, 0, 0, 0, 0,
198
+ 0, 0, 0, 0, 0, 0, 0, 0 };
199
+
200
+ #ifdef GERMAN /* deutsche Texte */
201
+ static const char *_errmsgs[] = { /* error messages */
202
+ /* E_NONE 0 */ "kein Fehler",
203
+ /* E_NOMEM -1 */ "nicht genug Speicher",
204
+ /* E_FOPEN -2 */ "�ffnen fehlgeschlagen",
205
+ /* E_FREAD -3 */ "Lesefehler",
206
+ /* E_FWRITE -4 */ "Schreibfehler",
207
+ /* E_CHAR -5 */ "ung�ltiges Zeichen '%c' (0x%02x)",
208
+ /* E_BUFOVF -6 */ "Puffer�berlauf",
209
+ /* E_UNTSTR -7 */ "unbeendete Zeichenkette",
210
+ /* E_UNTCOM -8 */ "unerwartetes Dateiende in Kommentar "
211
+ "(Anfang in Zeile %d)",
212
+ /* E_STATE -9 */ "ung�ltiger Scannerzustand",
213
+ /* E_GARBAGE -10 */ "ung�ltiger Text am Dateiende",
214
+ /* E_UNKNOWN -11 */ "unbekannter Fehler"
215
+ };
216
+ #else /* English texts */
217
+ static const char *_errmsgs[] = { /* error messages */
218
+ /* E_NONE 0 */ "no error",
219
+ /* E_NOMEM -1 */ "not enough memory",
220
+ /* E_FOPEN -2 */ "file open failed",
221
+ /* E_FREAD -3 */ "file read failed",
222
+ /* E_FWRITE -4 */ "file write failed",
223
+ /* E_CHAR -5 */ "invalid character '%c' (0x%02x)",
224
+ /* E_BUFOVF -6 */ "scan buffer overflow",
225
+ /* E_UNTSTR -7 */ "unterminated string",
226
+ /* E_UNTCOM -8 */ "unexpected end of file in comment "
227
+ "started on line %d",
228
+ /* E_STATE -9 */ "invalid scanner state",
229
+ /* E_GARBAGE -10 */ "garbage at end of file",
230
+ /* E_UNKNOWN -11 */ "unknown error"
231
+ };
232
+ #endif /* #ifdef GERMAN .. #else .. */
233
+ #endif /* #ifdef SC_SCAN */
234
+
235
+ /*----------------------------------------------------------------------
236
+ Auxiliary Functions
237
+ ----------------------------------------------------------------------*/
238
+ #ifdef SC_SCAN
239
+
240
+ static int _swap (SCAN *scan)
241
+ { /* --- swap token information */
242
+ int t; /* swap buffer */
243
+
244
+ if (scan->value == scan->buf[0]) scan->value = scan->buf[1];
245
+ else scan->value = scan->buf[0];
246
+ t = scan->plen; scan->plen = scan->len; scan->len = t;
247
+ t = scan->pline; scan->pline = scan->line; scan->line = t;
248
+ t = scan->ptoken; scan->ptoken = scan->token; scan->token = t;
249
+ return t; /* return the new token */
250
+ } /* _swap() */
251
+
252
+ #endif
253
+ /*----------------------------------------------------------------------
254
+ Main Functions
255
+ ----------------------------------------------------------------------*/
256
+
257
+ int sc_fmtlen (const char *s, int *len)
258
+ { /* --- length of a formatted name */
259
+ int n = 0, k = 0; /* number of (additional) characters */
260
+ int q = 0; /* quote flag (default: no quotes) */
261
+
262
+ assert(s); /* check the function arguments */
263
+ while (*s) { /* while not at end of name */
264
+ n++; /* count character */
265
+ switch (_scftab[(unsigned char)*s++]) {
266
+ case 0: break;
267
+ case 1: q = 2; break;
268
+ case 2: k += 3; q = 2; break;
269
+ default: k += 1; q = 2; break;
270
+ } /* sum additional characters and */
271
+ } /* set quote flag (if necessary) */
272
+ if (len) *len = n; /* store normal length and */
273
+ return n +k +q; /* return length of scanable form */
274
+ } /* sc_fmtlen() */
275
+
276
+ /*--------------------------------------------------------------------*/
277
+
278
+ int sc_format (char *dst, const char *src, int quotes)
279
+ { /* --- format name in scanable form */
280
+ char *d; const char *s; /* to traverse buffer and name */
281
+ int c, cls; /* character and character class */
282
+ int t; /* temporary buffer */
283
+
284
+ assert(dst && src); /* check the function arguments */
285
+ if (!*src) quotes = 1; /* an empty name needs quotes */
286
+ if (!quotes) { /* if quotes are not mandatory, */
287
+ for (s = src; *s; ) /* traverse the string to convert */
288
+ if (_scftab[(unsigned char)*s++] != 0) {
289
+ quotes = 1; break; } /* if a character needs quotes, */
290
+ } /* set the quotes flag and abort */
291
+ d = dst; /* get the destination and */
292
+ if (quotes) *d++ = '"'; /* store a quote if necessary */
293
+ while (*src) { /* traverse the characters */
294
+ c = (unsigned char)*src++;/* get the next character */
295
+ cls = _scftab[c]; /* and its character class */
296
+ if (cls < 2) /* if it is a normal character, */
297
+ *d++ = c; /* just store it */
298
+ else if (cls > 2) { /* if it is an ANSI escape character, */
299
+ *d++ = '\\'; *d++ = cls;} /* store it as '\c' */
300
+ else { /* if it is any other character */
301
+ *d++ = '\\'; *d++ = 'x';
302
+ t = c >> 4; *d++ = (t > 9) ? (t -10 +'a') : (t +'0');
303
+ t = c & 0xf; *d++ = (t > 9) ? (t -10 +'a') : (t +'0');
304
+ } /* store the character code */
305
+ } /* as a hexadecimal number */
306
+ if (quotes) *d++ = '"'; /* store the closing quote */
307
+ *d = '\0'; /* and terminate the string */
308
+ return (int)(d -dst); /* return the length of the result */
309
+ } /* sc_format() */
310
+
311
+ /*--------------------------------------------------------------------*/
312
+ #ifdef SC_SCAN
313
+
314
+ SCAN* sc_create (const char *fname)
315
+ { /* --- create a scanner */
316
+ const char *fn = fname; /* buffer for filename */
317
+ SCAN *scan; /* created scanner */
318
+
319
+ if (!fn || !*fn) fname = "<stdin>";
320
+ scan = (SCAN*)malloc(sizeof(SCAN) +strlen(fname));
321
+ if (!scan) return NULL; /* allocate memory for a scanner */
322
+ strcpy(scan->fname, fname); /* and note the file name */
323
+ if (!fn || !*fn) /* if no file name is given, */
324
+ scan->file = stdin; /* read from standard input */
325
+ else { /* if a file name is given, */
326
+ scan->file = fopen(fn,"r"); /* open the file for reading */
327
+ if (!scan->file) { free(scan); return NULL; }
328
+ }
329
+ scan->line = 1; /* initialize the fields */
330
+ scan->token = scan->len = scan->start = 0;
331
+ scan->value = scan->buf[0]; scan->buf[0][0] = '\0';
332
+ scan->back = 0;
333
+ scan->errfile = stderr;
334
+ scan->msgcnt = scan->lncnt = 0;
335
+ scan->msgs = NULL;
336
+ return scan; /* return created scanner */
337
+ } /* sc_create() */
338
+
339
+ /*--------------------------------------------------------------------*/
340
+
341
+ void sc_delete (SCAN *scan)
342
+ { /* --- delete a scanner */
343
+ if (scan->file != stdin) fclose(scan->file);
344
+ free(scan); /* close the input file and */
345
+ } /* sc_delete() */ /* delete the scanner structure */
346
+
347
+ /*--------------------------------------------------------------------*/
348
+
349
+ int sc_next (SCAN *scan)
350
+ { /* --- get next token */
351
+ int c, ccl; /* character and character class */
352
+ int quote = 0; /* quote at the start of a string */
353
+ int ec = 0; /* escaped character */
354
+ int state = 0; /* state of automaton */
355
+ int level = 0; /* comment nesting level */
356
+ char *p; /* to traverse the scan buffer */
357
+ char *end; /* end of the scan buffer */
358
+
359
+ if (scan->back) { /* if a step backwards has been made, */
360
+ scan->back = 0; /* clear the corresponding flag, */
361
+ return _swap(scan); /* swap back the token information, */
362
+ } /* and return the current token */
363
+ scan->pline = scan->line; /* note the relevant information */
364
+ scan->ptoken = scan->token; /* of the current token */
365
+ scan->plen = scan->len; /* and swap scan buffers */
366
+ if (scan->value == scan->buf[0]) scan->value = p = scan->buf[1];
367
+ else scan->value = p = scan->buf[0];
368
+ end = p +SC_BUFSIZE -1; /* get the end of the scan buffer */
369
+
370
+ while (1) { /* read loop */
371
+ c = getc(scan->file); /* get character and character class */
372
+ ccl = (c < 0) ? EOF : _ccltab[c];
373
+ if (c == '\n') scan->line++; /* count the line */
374
+
375
+ switch (state) { /* evaluate state of automaton */
376
+
377
+ case S_SPACE: /* --- skip white space */
378
+ switch (ccl) { /* evaluate character category */
379
+ case C_SPACE : /* do nothing */ break;
380
+ case C_LETTER: *p++ = c; state = S_ID; break;
381
+ case C_DIGIT : *p++ = c; state = S_NUMDIG; break;
382
+ case C_POINT : *p++ = c; state = S_NUMPT; break;
383
+ case C_SIGN : *p++ = c; state = S_SIGN; break;
384
+ case C_CMPOP : *p++ = c; state = S_CMPOP; break;
385
+ case C_QUOTE : quote = c; state = S_STRING; break;
386
+ case C_SLASH : state = S_SLASH; break;
387
+ case C_ACTIVE: *p++ = c; *p = '\0'; scan->len = 1;
388
+ return scan->token = c;
389
+ case EOF : strcpy(p, "<eof>"); scan->len = 4;
390
+ return scan->token = (ferror(scan->file))
391
+ ? E_FREAD : T_EOF;
392
+ default : *p++ = c; *p = '\0'; scan->len = 1;
393
+ return scan->token = E_CHAR;
394
+ } break;
395
+
396
+ case S_ID: /* --- identifier (letter read) */
397
+ if ((ccl == C_LETTER) /* if another letter */
398
+ || (ccl == C_DIGIT) /* or a digit */
399
+ || (ccl == C_POINT) /* or a decimal point */
400
+ || (ccl == C_SIGN)) { /* or a sign follows */
401
+ if (p >= end) return scan->token = E_BUFOVF;
402
+ *p++ = c; break; /* buffer character */
403
+ } /* otherwise */
404
+ UNGETC(scan, c); /* put back last character, */
405
+ *p = '\0'; /* terminate string in buffer */
406
+ scan->len = (int)(p -scan->value); /* set string length */
407
+ return scan->token = T_ID; /* and return 'identifier' */
408
+
409
+ case S_NUMDIG: /* --- number (digit read) */
410
+ if (p < end) *p++ = c; /* buffer character */
411
+ else return scan->token = E_BUFOVF;
412
+ if (ccl == C_DIGIT) /* if another digit follows, */
413
+ break; /* do nothing */
414
+ if (ccl == C_POINT) { /* if a decimal point follows, */
415
+ state = S_FRAC; break; } /* go to 'fraction' state */
416
+ if ((c == 'e') /* if an exponent indicator follows */
417
+ || (c == 'E')) { /* (lower- or uppercase), */
418
+ state = S_EXPIND; break; } /* go to 'exponent' state */
419
+ if ((ccl == C_LETTER) /* if a letter */
420
+ || (ccl == C_SIGN)) { /* or a sign follows, */
421
+ state = S_ID; break; /* go to 'identifier' state */
422
+ } /* otherwise */
423
+ UNGETC(scan, c); /* put back last character, */
424
+ *--p = '\0'; /* terminate string in buffer */
425
+ scan->len = (int)(p -scan->value); /* set string length */
426
+ return scan->token = T_NUM; /* and return 'number' */
427
+
428
+ case S_NUMPT: /* --- number (point read) */
429
+ if (p < end) *p++ = c; /* buffer character */
430
+ else return scan->token = E_BUFOVF;
431
+ if (ccl == C_DIGIT) { /* if a digit follows, */
432
+ state = S_FRAC; break; } /* go to 'fraction' state */
433
+ if ((ccl == C_LETTER) /* if a letter */
434
+ || (ccl == C_POINT) /* or a decimal point */
435
+ || (ccl == C_SIGN)) { /* or a sign follows */
436
+ state = S_ID; break; /* go to 'identifier' state */
437
+ } /* otherwise */
438
+ UNGETC(scan, c); /* put back last character, */
439
+ *--p = '\0'; /* terminate string in buffer */
440
+ scan->len = (int)(p -scan->value); /* set string length */
441
+ return scan->token = T_ID; /* and return 'identifier' */
442
+
443
+ case S_FRAC: /* --- number (digit & point read) */
444
+ if (p < end) *p++ = c; /* buffer character */
445
+ else return scan->token = E_BUFOVF;
446
+ if (ccl == C_DIGIT) /* if another digit follows, */
447
+ break; /* do nothing else */
448
+ if ((c == 'e') /* if an exponent indicator follows, */
449
+ || (c == 'E')) { /* (lower- or uppercase), */
450
+ state = S_EXPIND; break; } /* go to exponent state */
451
+ if ((ccl == C_LETTER) /* if a letter */
452
+ || (ccl == C_POINT) /* or a decimal point */
453
+ || (ccl == C_SIGN)) { /* or a sign follows, */
454
+ state = S_ID; break; /* go to 'identifier' state */
455
+ } /* otherwise */
456
+ UNGETC(scan, c); /* put back last character, */
457
+ *--p = '\0'; /* terminate string in buffer */
458
+ scan->len = (int)(p -scan->value); /* set string length */
459
+ return scan->token = T_NUM; /* and return 'number' */
460
+
461
+ case S_EXPIND: /* --- exponent (indicator read) */
462
+ if (p < end) *p++ = c; /* buffer character */
463
+ else return scan->token = E_BUFOVF;
464
+ if (ccl == C_SIGN) { /* if a sign follows, */
465
+ state = S_EXPSGN; break; } /* go to 2nd 'exponent' state */
466
+ if (ccl == C_DIGIT) { /* if a digit follows, */
467
+ state = S_EXPDIG; break; } /* go to 3rd 'exponent' state */
468
+ if ((ccl == C_LETTER) /* if a letter */
469
+ || (ccl == C_POINT)) { /* or a decimal point follows */
470
+ state = S_ID; break; /* go to 'identifier' state */
471
+ } /* otherwise */
472
+ UNGETC(scan, c); /* put back last character, */
473
+ *--p = '\0'; /* terminate string in buffer */
474
+ scan->len = (int)(p -scan->value); /* set string length */
475
+ return scan->token = T_ID; /* and return 'identifier' */
476
+
477
+ case S_EXPSGN: /* --- exponent (sign read) */
478
+ if (p < end) *p++ = c; /* buffer character */
479
+ else return scan->token = E_BUFOVF;
480
+ if (ccl == C_DIGIT) { /* if a digit follows, */
481
+ state = S_EXPDIG; break;} /* do nothing else */
482
+ if ((ccl == C_LETTER) /* if a letter */
483
+ || (ccl == C_POINT) /* or a decimal point */
484
+ || (ccl == C_SIGN)) { /* or a sign follows */
485
+ state = S_ID; break; /* go to 'identifier' state */
486
+ } /* otherwise */
487
+ UNGETC(scan, c); /* put back last character, */
488
+ *--p = '\0'; /* terminate string in buffer */
489
+ scan->len = (int)(p -scan->value); /* set string length */
490
+ return scan->token = T_ID; /* and return 'identifier' */
491
+
492
+ case S_EXPDIG: /* --- exponent (digit read) */
493
+ if (p < end) *p++ = c; /* buffer character */
494
+ else return scan->token = E_BUFOVF;
495
+ if (ccl == C_DIGIT) /* if another digit follows, */
496
+ break; /* do nothing else */
497
+ if ((ccl == C_LETTER) /* if a letter */
498
+ || (ccl == C_POINT) /* or a decimal point */
499
+ || (ccl == C_SIGN)) { /* or a sign follows, */
500
+ state = S_ID; break; /* go to 'identifier' state */
501
+ } /* otherwise */
502
+ UNGETC(scan, c); /* put back last character, */
503
+ *--p = '\0'; /* terminate string in buffer */
504
+ scan->len = (int)(p -scan->value); /* set string length */
505
+ return scan->token = T_NUM; /* and return 'number' */
506
+
507
+ case S_SIGN: /* --- number (sign read) */
508
+ *p++ = c; /* buffer character */
509
+ if (ccl == C_DIGIT) { /* if a digit follows, */
510
+ state = S_NUMDIG; break; } /* go to 'number' state */
511
+ if (ccl == C_POINT) { /* if a decimal point follows, */
512
+ state = S_NUMPT; break; } /* go to fraction state */
513
+ if ((c == '-') /* if a '-' follows and previous */
514
+ && (scan->value[0] == '-')) { /* char was a minus sign */
515
+ *p = '\0'; scan->len = 2; return scan->token = T_DASH; }
516
+ if ((c == '>') /* if a '>' follows and previous */
517
+ && (scan->value[0] == '-')) { /* char was a minus sign */
518
+ *p = '\0'; scan->len = 2; return scan->token = T_RGT; }
519
+ if ((ccl == C_LETTER) /* if a letter */
520
+ || (ccl == C_SIGN)) { /* or a sign follows, */
521
+ state = S_ID; break; } /* go to 'identifier' state */
522
+ UNGETC(scan, c); /* otherwise put back last character, */
523
+ *--p = '\0'; /* terminate string in buffer */
524
+ scan->len = (int)(p -scan->value); /* set string length */
525
+ return scan->token = T_ID; /* and return 'identifier' */
526
+
527
+ case S_CMPOP: /* --- comparison operator read */
528
+ if ((c == '-') /* if a minus sign follows and */
529
+ && (scan->value[0] == '<')) { /* prev. char was a '<' */
530
+ *p++ = '-'; scan->token = T_LFT; }
531
+ else if (c == '=') { /* if an equal sign follows */
532
+ *p++ = '='; scan->token = T_CMP; }
533
+ else { /* if anything else follows */
534
+ UNGETC(scan, c); scan->token = scan->value[0]; }
535
+ *p = '\0'; /* terminate string in buffer */
536
+ scan->len = (int)(p -scan->value); /* set string length */
537
+ return scan->token; /* and return the token read */
538
+
539
+ case S_STRING: /* --- quoted string */
540
+ if ((c == '\n') || (c == EOF)) /* if end of line or file, */
541
+ return scan->token = E_UNTSTR; /* string is unterminated */
542
+ if (c != quote) { /* if not at end of string */
543
+ if (p >= end) return scan->token = E_BUFOVF;
544
+ if (c == '\\') { /* if escaped character follows, */
545
+ state = S_ESC; break; } /* go to escaped char state */
546
+ *p++ = c; break; /* otherwise buffer character */
547
+ } /* if at end of string, */
548
+ *p = '\0'; /* terminate string in buffer */
549
+ scan->len = (int)(p -scan->value); /* set string length */
550
+ return scan->token = T_ID; /* and return 'identifier' */
551
+
552
+ case S_ESC: /* --- after '\' in quoted string */
553
+ if ((c >= '0') && (c <= '7')) { /* if octal digit, */
554
+ ec = c -'0'; state = S_OCT1; break; }/* evaluate digit */
555
+ if (c == 'x') { /* if hexadecimal character code, */
556
+ state = S_HEX1; break;} /* go to hexadecimal evaluation */
557
+ switch (c) { /* evaluate character after '\' */
558
+ case 'a': c = '\a'; break;
559
+ case 'b': c = '\b'; break;
560
+ case 'f': c = '\f'; break;
561
+ case 'n': c = '\n'; break;
562
+ case 'r': c = '\r'; break;
563
+ case 't': c = '\t'; break;
564
+ case 'v': c = '\v'; break;
565
+ case '\n': c = -1; break;
566
+ default : break;
567
+ } /* get escaped character */
568
+ if (c >= 0) *p++ = c; /* and store it, then */
569
+ state = S_STRING; break;/* return to quoted string state */
570
+
571
+ case S_OCT1: /* --- escaped octal number 1 */
572
+ if ((c >= '0') /* if an octal digit follows, */
573
+ && (c <= '7')) { /* evaluate it */
574
+ ec = ec *8 +c -'0'; state = S_OCT2; break; }
575
+ UNGETC(scan, c); /* otherwise put back last character */
576
+ *p++ = ec; /* store escaped character and */
577
+ state = S_STRING; break;/* return to quoted string state */
578
+
579
+ case S_OCT2: /* --- escaped octal number 2 */
580
+ if ((c >= '0') || (c <= '7'))
581
+ ec = ec *8 +c -'0'; /* if octal digit, evaluate it */
582
+ else UNGETC(scan, c); /* otherwise put back last character */
583
+ *p++ = ec; /* store escaped character and */
584
+ state = S_STRING; break;/* return to quoted string state */
585
+
586
+ case S_HEX1: /* --- escaped hexadecimal number 1 */
587
+ if (ccl == C_DIGIT) { /* if hexadecimal digit, evaluate it */
588
+ ec = c -'0'; state = S_HEX2; break; }
589
+ if ((c >= 'a') && (c <= 'f')) {
590
+ ec = c -'a' +10; state = S_HEX2; break; }
591
+ if ((c >= 'A') && (c <= 'F')) {
592
+ ec = c -'A' +10; state = S_HEX2; break; }
593
+ UNGETC(scan, c); /* otherwise put back last character */
594
+ *p++ = 'x'; /* store escaped character ('x') and */
595
+ state = S_STRING; break;/* return to quoted string state */
596
+
597
+ case S_HEX2: /* --- escaped hexadecimal number 2 */
598
+ if (ccl == C_DIGIT) /* if hexadecimal digit, evaluate it */
599
+ ec = ec*16 +c -'0';
600
+ else if ((c >= 'a') && (c <= 'f'))
601
+ ec = ec*16 +c -'a' +10;
602
+ else if ((c >= 'A') && (c <= 'F'))
603
+ ec = ec*16 +c -'A' +10;
604
+ else UNGETC(scan, c); /* otherwise put back last character */
605
+ *p++ = ec; /* store escaped character and */
606
+ state = S_STRING; break;/* return to quoted string state */
607
+
608
+ case S_SLASH: /* --- slash '/' */
609
+ if (c == '/') { /* if C++ style comment, then */
610
+ state = S_CPPCOM; break; } /* skip to end of line */
611
+ if (c == '*') { /* if C style comment */
612
+ scan->start = scan->line; level = 1;
613
+ state = S_CCOM1; break; /* note start line, init. level */
614
+ } /* and go to first 'comment' state */
615
+ UNGETC(scan, c); /* otherwise put back last character */
616
+ *p++ = '/'; *p = '\0'; /* store character in buffer */
617
+ scan->len = 1; /* set string length and */
618
+ return scan->token = '/'; /* return `character' */
619
+
620
+ case S_CPPCOM: /* --- C++ style comment */
621
+ if ((c == '\n') /* if at end of line */
622
+ || (c == EOF)) /* or at end of file */
623
+ state = S_SPACE; /* return to white space skipping */
624
+ break; /* (skip to end of line) */
625
+
626
+ case S_CCOM1: /* --- C style comment 1 */
627
+ if (c == EOF) /* if end of file, abort */
628
+ return scan->token = E_UNTCOM;
629
+ if (c == '*') /* if possibly 'end of comment', */
630
+ state = S_CCOM2; /* go to 2nd 'comment' state */
631
+ else if (c == '/') /* if possibly 'start of comment', */
632
+ state = S_CCOM3; /* go to 3rd 'comment' state */
633
+ break;
634
+
635
+ case S_CCOM2: /* --- C style comment 2 */
636
+ if (c == EOF) /* if end of file, abort */
637
+ return scan->token = E_UNTCOM;
638
+ if (c == '/') { /* if end of comment found */
639
+ if (--level <= 0) state = S_SPACE;
640
+ else state = S_CCOM1; }
641
+ else if (c != '*') /* if end of comment impossible */
642
+ state = S_CCOM1; /* return to comment skipping */
643
+ break; /* (possible start of comment) */
644
+
645
+ case S_CCOM3: /* --- C style comment 3 */
646
+ if (c == EOF) /* if end of file, abort */
647
+ return scan->token = E_UNTCOM;
648
+ if (c == '*') { /* if start of comment found */
649
+ level++; state = S_CCOM1; }
650
+ else if (c != '/') /* if start of comment impossible */
651
+ state = S_CCOM1; /* return to comment skipping */
652
+ break; /* (possible end of comment) */
653
+
654
+ default: /* if state is invalid, abort */
655
+ return scan->token = E_STATE;
656
+
657
+ } /* switch() */
658
+ } /* while(1) */
659
+ } /* sc_next() */
660
+
661
+ /*--------------------------------------------------------------------*/
662
+
663
+ int sc_nexter (SCAN *scan)
664
+ { /* --- get next token error reporting */
665
+ if (sc_next(scan) < 0) return sc_error(scan, scan->token);
666
+ return scan->token; /* get next token, report error, */
667
+ } /* sc_nexter() */ /* and return next token */
668
+
669
+ /*--------------------------------------------------------------------*/
670
+
671
+ int sc_back (SCAN *scan)
672
+ { /* --- go back one token */
673
+ if (scan->back) /* a second step backwards */
674
+ return scan->token; /* is impossible, so do nothing */
675
+ scan->back = -1; /* set the step backward flag */
676
+ return _swap(scan); /* swap the token information */
677
+ } /* sc_back() */ /* and return the previous token */
678
+
679
+ /*--------------------------------------------------------------------*/
680
+
681
+ int sc_eof (SCAN *scan)
682
+ { /* --- check for end of file */
683
+ if (scan->token == T_EOF) return 1;
684
+ sc_error(scan, E_GARBAGE); /* check for end of file */
685
+ return 0; /* and report an error */
686
+ } /* sc_eof() */ /* if it is not reached */
687
+
688
+ /*--------------------------------------------------------------------*/
689
+
690
+ int sc_recover (SCAN *scan, int stop, int beg, int end, int level)
691
+ { /* --- recover from an error */
692
+ while ((scan->token != stop) /* while at stop token */
693
+ && (scan->token != T_EOF)) { /* and not at end of file */
694
+ if (scan->token == beg) /* if begin level token found, */
695
+ level++; /* increment level counter */
696
+ else if ((scan->token == end) /* if end level token found */
697
+ && (--level <= 0)) /* and on level to return to, */
698
+ break; /* abort loop */
699
+ if (sc_next(scan) < 0) return scan->token;
700
+ } /* consume token */
701
+ if (scan->token != T_EOF) /* if not at end of file, */
702
+ sc_next(scan); /* consume token (stop or end) */
703
+ return scan->token; /* return the next token */
704
+ } /* sc_recover() */
705
+
706
+ /*--------------------------------------------------------------------*/
707
+
708
+ void sc_errfile (SCAN *scan, FILE *file, int lncnt)
709
+ { /* --- set file for error output */
710
+ assert(scan); /* check the function arguments */
711
+ scan->errfile = (file) ? file : stderr;
712
+ scan->lncnt = lncnt; /* set file and line count */
713
+ } /* sc_errfile() */
714
+
715
+ /*--------------------------------------------------------------------*/
716
+
717
+ void sc_errmsgs (SCAN *scan, const char *msgs[], int cnt)
718
+ { /* --- set additonal error messages */
719
+ assert(scan); /* check the function arguments */
720
+ scan->msgs = msgs; /* note error message vector */
721
+ scan->msgcnt = cnt; /* and number of error messages */
722
+ } /* sc_errmsgs() */
723
+
724
+ /*--------------------------------------------------------------------*/
725
+
726
+ int sc_error (SCAN *scan, int code, ...)
727
+ { /* --- print an error message */
728
+ va_list args; /* variable argument list */
729
+ const char *msg; /* error message */
730
+ int c, pc; /* the invalid character */
731
+ int tmp; /* temporary buffer */
732
+
733
+ assert(scan); /* check the function arguments */
734
+ if (scan->lncnt <= 0) /* if line count is zero or negative, */
735
+ putc('\n', scan->errfile); /* start a new output line */
736
+ fprintf(scan->errfile, FILETXT" %s", scan->fname);
737
+ /* print the file name */
738
+ if ((code != E_NONE) /* if an error occurred, */
739
+ && (code != E_FOPEN) /* but not 'file open failed' */
740
+ && (code != E_UNTCOM)) { /* and not 'unterminated comment' */
741
+ fputs((scan->lncnt > 2) ? ",\n" : ", ", scan->errfile);
742
+ fprintf(scan->errfile, LINETXT" %d", scan->line);
743
+ } /* print line number */
744
+ fputs((scan->lncnt > 1) ? ":\n" : ": ", scan->errfile);
745
+
746
+ if (code >= 0) code = E_NONE; /* check error code and */
747
+ tmp = MSGOFFSET -code; /* get error message text */
748
+ if (code > E_UNKNOWN) msg = _errmsgs[-code];
749
+ else if (tmp < scan->msgcnt) msg = scan->msgs[tmp];
750
+ else msg = NULL;
751
+ if (!msg) msg = _errmsgs[-(code = E_UNKNOWN)];
752
+
753
+ switch (code) { /* special error handling */
754
+ case E_CHAR : c = pc = (unsigned char)scan->value[0];
755
+ if (c < ' ') pc = ' ';
756
+ fprintf (scan->errfile, msg, pc, c); break;
757
+ case E_UNTCOM: fprintf (scan->errfile, msg, scan->start); break;
758
+ default : va_start(args, code); /* get variable arguments */
759
+ vfprintf(scan->errfile, msg, args);
760
+ va_end(args); break; /* print error message and */
761
+ } /* end variable argument evaluation */
762
+ if (scan->lncnt > 0) /* if line count is positive, */
763
+ putc('\n', scan->errfile); /* terminate output line */
764
+ return code; /* return error code */
765
+ } /* sc_error() */
766
+
767
+ #endif