ruco-cpp 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +17 -0
  3. data/bin/console +14 -0
  4. data/bin/ruco +30 -0
  5. data/bin/setup +7 -0
  6. data/data/ruco/Parser.frame +359 -0
  7. data/data/ruco/Scanner.frame +896 -0
  8. data/data/ruco/picojson/Changes +14 -0
  9. data/data/ruco/picojson/LICENSE +25 -0
  10. data/data/ruco/picojson/Makefile +8 -0
  11. data/data/ruco/picojson/README.mkdn +183 -0
  12. data/data/ruco/picojson/examples/github-issues.cc +110 -0
  13. data/data/ruco/picojson/examples/iostream.cc +70 -0
  14. data/data/ruco/picojson/examples/streaming.cc +76 -0
  15. data/data/ruco/picojson/picojson.h +1299 -0
  16. data/ext/cocor/Action.cpp +81 -0
  17. data/ext/cocor/Action.h +59 -0
  18. data/ext/cocor/ArrayList.cpp +79 -0
  19. data/ext/cocor/ArrayList.h +52 -0
  20. data/ext/cocor/BitArray.cpp +156 -0
  21. data/ext/cocor/BitArray.h +68 -0
  22. data/ext/cocor/CharClass.cpp +42 -0
  23. data/ext/cocor/CharClass.h +48 -0
  24. data/ext/cocor/CharSet.cpp +166 -0
  25. data/ext/cocor/CharSet.h +68 -0
  26. data/ext/cocor/Coco.atg +528 -0
  27. data/ext/cocor/Coco.cpp +173 -0
  28. data/ext/cocor/Comment.cpp +45 -0
  29. data/ext/cocor/Comment.h +51 -0
  30. data/ext/cocor/Copyright.frame +27 -0
  31. data/ext/cocor/DFA.cpp +865 -0
  32. data/ext/cocor/DFA.h +132 -0
  33. data/ext/cocor/Generator.cpp +182 -0
  34. data/ext/cocor/Generator.h +61 -0
  35. data/ext/cocor/Graph.h +59 -0
  36. data/ext/cocor/HashTable.cpp +115 -0
  37. data/ext/cocor/HashTable.h +84 -0
  38. data/ext/cocor/Makefile +11 -0
  39. data/ext/cocor/Melted.cpp +39 -0
  40. data/ext/cocor/Melted.h +51 -0
  41. data/ext/cocor/Node.cpp +69 -0
  42. data/ext/cocor/Node.h +86 -0
  43. data/ext/cocor/Parser.cpp +925 -0
  44. data/ext/cocor/Parser.frame +326 -0
  45. data/ext/cocor/Parser.h +153 -0
  46. data/ext/cocor/ParserGen.cpp +486 -0
  47. data/ext/cocor/ParserGen.h +99 -0
  48. data/ext/cocor/Position.cpp +37 -0
  49. data/ext/cocor/Position.h +46 -0
  50. data/ext/cocor/README.md +12 -0
  51. data/ext/cocor/Scanner.cpp +833 -0
  52. data/ext/cocor/Scanner.frame +897 -0
  53. data/ext/cocor/Scanner.h +291 -0
  54. data/ext/cocor/Sets.h +84 -0
  55. data/ext/cocor/SortedList.cpp +141 -0
  56. data/ext/cocor/SortedList.h +68 -0
  57. data/ext/cocor/State.cpp +77 -0
  58. data/ext/cocor/State.h +55 -0
  59. data/ext/cocor/StringBuilder.cpp +88 -0
  60. data/ext/cocor/StringBuilder.h +29 -0
  61. data/ext/cocor/Symbol.cpp +61 -0
  62. data/ext/cocor/Symbol.h +70 -0
  63. data/ext/cocor/Tab.cpp +1248 -0
  64. data/ext/cocor/Tab.h +245 -0
  65. data/ext/cocor/Target.cpp +41 -0
  66. data/ext/cocor/Target.h +48 -0
  67. data/ext/cocor/build.bat +3 -0
  68. data/ext/cocor/build.sh +4 -0
  69. data/ext/cocor/coc.bat +1 -0
  70. data/ext/cocor/coc.sh +2 -0
  71. data/ext/cocor/cocor_ruby_ext.cpp +124 -0
  72. data/ext/cocor/cygBuild.bat +1 -0
  73. data/ext/cocor/extconf.rb +5 -0
  74. data/ext/cocor/mingwbuild.bat +2 -0
  75. data/ext/cocor/mkmf.log +57 -0
  76. data/ext/cocor/zipsources.bat +1 -0
  77. data/lib/cocor.rb +14 -0
  78. data/lib/ruco/version.rb +3 -0
  79. data/lib/ruco.rb +728 -0
  80. metadata +195 -0
@@ -0,0 +1,925 @@
1
+ /*----------------------------------------------------------------------
2
+ Compiler Generator Coco/R,
3
+ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
4
+ extended by M. Loeberbauer & A. Woess, Univ. of Linz
5
+ ported to C++ by Csaba Balazs, University of Szeged
6
+ with improvements by Pat Terry, Rhodes University
7
+
8
+ This program is free software; you can redistribute it and/or modify it
9
+ under the terms of the GNU General Public License as published by the
10
+ Free Software Foundation; either version 2, or (at your option) any
11
+ later version.
12
+
13
+ This program is distributed in the hope that it will be useful, but
14
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16
+ for more details.
17
+
18
+ You should have received a copy of the GNU General Public License along
19
+ with this program; if not, write to the Free Software Foundation, Inc.,
20
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
+
22
+ As an exception, it is allowed to write an extension of Coco/R that is
23
+ used as a plugin in non-free software.
24
+
25
+ If not otherwise stated, any source code generated by Coco/R (other than
26
+ Coco/R itself) does not fall under the GNU General Public License.
27
+ -----------------------------------------------------------------------*/
28
+
29
+
30
+ #include <wchar.h>
31
+ #include "Parser.h"
32
+ #include "Scanner.h"
33
+
34
+
35
+ namespace Coco {
36
+
37
+
38
+ void Parser::SynErr(int n) {
39
+ if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n);
40
+ errDist = 0;
41
+ }
42
+
43
+ void Parser::SemErr(const wchar_t* msg) {
44
+ if (errDist >= minErrDist) errors->Error(t->line, t->col, msg);
45
+ errDist = 0;
46
+ }
47
+
48
+ void Parser::Get() {
49
+ for (;;) {
50
+ t = la;
51
+ la = scanner->Scan();
52
+ if (la->kind <= maxT) { ++errDist; break; }
53
+ if (la->kind == _ddtSym) {
54
+ tab->SetDDT(la->val);
55
+ }
56
+ if (la->kind == _optionSym) {
57
+ tab->SetOption(la->val);
58
+ }
59
+
60
+ if (dummyToken != t) {
61
+ dummyToken->kind = t->kind;
62
+ dummyToken->pos = t->pos;
63
+ dummyToken->col = t->col;
64
+ dummyToken->line = t->line;
65
+ dummyToken->next = NULL;
66
+ coco_string_delete(dummyToken->val);
67
+ dummyToken->val = coco_string_create(t->val);
68
+ t = dummyToken;
69
+ }
70
+ la = t;
71
+ }
72
+ }
73
+
74
+ void Parser::Expect(int n) {
75
+ if (la->kind==n) Get(); else { SynErr(n); }
76
+ }
77
+
78
+ void Parser::ExpectWeak(int n, int follow) {
79
+ if (la->kind == n) Get();
80
+ else {
81
+ SynErr(n);
82
+ while (!StartOf(follow)) Get();
83
+ }
84
+ }
85
+
86
+ bool Parser::WeakSeparator(int n, int syFol, int repFol) {
87
+ if (la->kind == n) {Get(); return true;}
88
+ else if (StartOf(repFol)) {return false;}
89
+ else {
90
+ SynErr(n);
91
+ while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) {
92
+ Get();
93
+ }
94
+ return StartOf(syFol);
95
+ }
96
+ }
97
+
98
+ void Parser::Coco() {
99
+ Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s;
100
+ int beg = la->pos; int line = la->line;
101
+ while (StartOf(1)) {
102
+ Get();
103
+ }
104
+ if (la->pos != beg) {
105
+ pgen->usingPos = new Position(beg, t->pos + coco_string_length(t->val), 0, line);
106
+ }
107
+
108
+ Expect(6 /* "COMPILER" */);
109
+ genScanner = true;
110
+ tab->ignored = new CharSet();
111
+ Expect(_ident);
112
+ gramName = coco_string_create(t->val);
113
+ beg = la->pos;
114
+ line = la->line;
115
+
116
+ while (StartOf(2)) {
117
+ Get();
118
+ }
119
+ tab->semDeclPos = new Position(beg, la->pos, 0, line);
120
+ if (la->kind == 7 /* "IGNORECASE" */) {
121
+ Get();
122
+ dfa->ignoreCase = true;
123
+ }
124
+ if (la->kind == 8 /* "CHARACTERS" */) {
125
+ Get();
126
+ while (la->kind == _ident) {
127
+ SetDecl();
128
+ }
129
+ }
130
+ if (la->kind == 9 /* "TOKENS" */) {
131
+ Get();
132
+ while (la->kind == _ident || la->kind == _string || la->kind == _char) {
133
+ TokenDecl(Node::t);
134
+ }
135
+ }
136
+ if (la->kind == 10 /* "PRAGMAS" */) {
137
+ Get();
138
+ while (la->kind == _ident || la->kind == _string || la->kind == _char) {
139
+ TokenDecl(Node::pr);
140
+ }
141
+ }
142
+ while (la->kind == 11 /* "COMMENTS" */) {
143
+ Get();
144
+ bool nested = false;
145
+ Expect(12 /* "FROM" */);
146
+ TokenExpr(g1);
147
+ Expect(13 /* "TO" */);
148
+ TokenExpr(g2);
149
+ if (la->kind == 14 /* "NESTED" */) {
150
+ Get();
151
+ nested = true;
152
+ }
153
+ dfa->NewComment(g1->l, g2->l, nested);
154
+ }
155
+ while (la->kind == 15 /* "IGNORE" */) {
156
+ Get();
157
+ Set(s);
158
+ tab->ignored->Or(s);
159
+ }
160
+ while (!(la->kind == _EOF || la->kind == 16 /* "PRODUCTIONS" */)) {SynErr(42); Get();}
161
+ Expect(16 /* "PRODUCTIONS" */);
162
+ if (genScanner) dfa->MakeDeterministic();
163
+ tab->DeleteNodes();
164
+
165
+ while (la->kind == _ident) {
166
+ Get();
167
+ sym = tab->FindSym(t->val);
168
+ bool undef = (sym == NULL);
169
+ if (undef) sym = tab->NewSym(Node::nt, t->val, t->line);
170
+ else {
171
+ if (sym->typ == Node::nt) {
172
+ if (sym->graph != NULL) SemErr(L"name declared twice");
173
+ } else SemErr(L"this symbol kind not allowed on left side of production");
174
+ sym->line = t->line;
175
+ }
176
+ bool noAttrs = (sym->attrPos == NULL);
177
+ sym->attrPos = NULL;
178
+
179
+ if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) {
180
+ AttrDecl(sym);
181
+ }
182
+ if (!undef)
183
+ if (noAttrs != (sym->attrPos == NULL))
184
+ SemErr(L"attribute mismatch between declaration and use of this symbol");
185
+
186
+ if (la->kind == 39 /* "(." */) {
187
+ SemText(sym->semPos);
188
+ }
189
+ ExpectWeak(17 /* "=" */, 3);
190
+ Expression(g);
191
+ sym->graph = g->l;
192
+ tab->Finish(g);
193
+
194
+ ExpectWeak(18 /* "." */, 4);
195
+ }
196
+ Expect(19 /* "END" */);
197
+ Expect(_ident);
198
+ if (!coco_string_equal(gramName, t->val))
199
+ SemErr(L"name does not match grammar name");
200
+ tab->gramSy = tab->FindSym(gramName);
201
+ if (tab->gramSy == NULL)
202
+ SemErr(L"missing production for grammar name");
203
+ else {
204
+ sym = tab->gramSy;
205
+ if (sym->attrPos != NULL)
206
+ SemErr(L"grammar symbol must not have attributes");
207
+ }
208
+ tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number
209
+ tab->SetupAnys();
210
+ tab->RenumberPragmas();
211
+ if (tab->ddt[2]) tab->PrintNodes();
212
+ if (errors->count == 0) {
213
+ wprintf(L"checking\n");
214
+ tab->CompSymbolSets();
215
+ if (tab->ddt[7]) tab->XRef();
216
+ if (tab->GrammarOk()) {
217
+ wprintf(L"parser");
218
+ pgen->WriteParser();
219
+ if (genScanner) {
220
+ wprintf(L" + scanner");
221
+ dfa->WriteScanner();
222
+ if (tab->ddt[0]) dfa->PrintStates();
223
+ }
224
+ wprintf(L" generated\n");
225
+ if (tab->ddt[8]) pgen->WriteStatistics();
226
+ }
227
+ }
228
+ if (tab->ddt[6]) tab->PrintSymbolTable();
229
+
230
+ Expect(18 /* "." */);
231
+ }
232
+
233
+ void Parser::SetDecl() {
234
+ CharSet *s;
235
+ Expect(_ident);
236
+ wchar_t *name = coco_string_create(t->val);
237
+ CharClass *c = tab->FindCharClass(name);
238
+ if (c != NULL) SemErr(L"name declared twice");
239
+
240
+ Expect(17 /* "=" */);
241
+ Set(s);
242
+ if (s->Elements() == 0) SemErr(L"character set must not be empty");
243
+ tab->NewCharClass(name, s);
244
+
245
+ Expect(18 /* "." */);
246
+ }
247
+
248
+ void Parser::TokenDecl(int typ) {
249
+ wchar_t* name = NULL; int kind; Symbol *sym; Graph *g;
250
+ Sym(name, kind);
251
+ sym = tab->FindSym(name);
252
+ if (sym != NULL) SemErr(L"name declared twice");
253
+ else {
254
+ sym = tab->NewSym(typ, name, t->line);
255
+ sym->tokenKind = Symbol::fixedToken;
256
+ }
257
+ tokenString = NULL;
258
+
259
+ while (!(StartOf(5))) {SynErr(43); Get();}
260
+ if (la->kind == 17 /* "=" */) {
261
+ Get();
262
+ TokenExpr(g);
263
+ Expect(18 /* "." */);
264
+ if (kind == str) SemErr(L"a literal must not be declared with a structure");
265
+ tab->Finish(g);
266
+ if (tokenString == NULL || coco_string_equal(tokenString, noString))
267
+ dfa->ConvertToStates(g->l, sym);
268
+ else { // TokenExpr is a single string
269
+ if ((*(tab->literals))[tokenString] != NULL)
270
+ SemErr(L"token string declared twice");
271
+ tab->literals->Set(tokenString, sym);
272
+ dfa->MatchLiteral(tokenString, sym);
273
+ }
274
+
275
+ } else if (StartOf(6)) {
276
+ if (kind == id) genScanner = false;
277
+ else dfa->MatchLiteral(sym->name, sym);
278
+
279
+ } else SynErr(44);
280
+ if (la->kind == 39 /* "(." */) {
281
+ SemText(sym->semPos);
282
+ if (typ != Node::pr) SemErr(L"semantic action not allowed here");
283
+ }
284
+ }
285
+
286
+ void Parser::TokenExpr(Graph* &g) {
287
+ Graph *g2;
288
+ TokenTerm(g);
289
+ bool first = true;
290
+ while (WeakSeparator(28 /* "|" */,8,7) ) {
291
+ TokenTerm(g2);
292
+ if (first) { tab->MakeFirstAlt(g); first = false; }
293
+ tab->MakeAlternative(g, g2);
294
+
295
+ }
296
+ }
297
+
298
+ void Parser::Set(CharSet* &s) {
299
+ CharSet *s2;
300
+ SimSet(s);
301
+ while (la->kind == 20 /* "+" */ || la->kind == 21 /* "-" */) {
302
+ if (la->kind == 20 /* "+" */) {
303
+ Get();
304
+ SimSet(s2);
305
+ s->Or(s2);
306
+ } else {
307
+ Get();
308
+ SimSet(s2);
309
+ s->Subtract(s2);
310
+ }
311
+ }
312
+ }
313
+
314
+ void Parser::AttrDecl(Symbol *sym) {
315
+ if (la->kind == 24 /* "<" */) {
316
+ Get();
317
+ int beg = la->pos; int col = la->col; int line = la->line;
318
+ while (StartOf(9)) {
319
+ if (StartOf(10)) {
320
+ Get();
321
+ } else {
322
+ Get();
323
+ SemErr(L"bad string in attributes");
324
+ }
325
+ }
326
+ Expect(25 /* ">" */);
327
+ if (t->pos > beg)
328
+ sym->attrPos = new Position(beg, t->pos, col, line);
329
+ } else if (la->kind == 26 /* "<." */) {
330
+ Get();
331
+ int beg = la->pos; int col = la->col; int line = la->line;
332
+ while (StartOf(11)) {
333
+ if (StartOf(12)) {
334
+ Get();
335
+ } else {
336
+ Get();
337
+ SemErr(L"bad string in attributes");
338
+ }
339
+ }
340
+ Expect(27 /* ".>" */);
341
+ if (t->pos > beg)
342
+ sym->attrPos = new Position(beg, t->pos, col, line);
343
+ } else SynErr(45);
344
+ }
345
+
346
+ void Parser::SemText(Position* &pos) {
347
+ Expect(39 /* "(." */);
348
+ int beg = la->pos; int col = la->col; int line = t->line;
349
+ while (StartOf(13)) {
350
+ if (StartOf(14)) {
351
+ Get();
352
+ } else if (la->kind == _badString) {
353
+ Get();
354
+ SemErr(L"bad string in semantic action");
355
+ } else {
356
+ Get();
357
+ SemErr(L"missing end of previous semantic action");
358
+ }
359
+ }
360
+ Expect(40 /* ".)" */);
361
+ pos = new Position(beg, t->pos, col, line);
362
+ }
363
+
364
+ void Parser::Expression(Graph* &g) {
365
+ Graph *g2;
366
+ Term(g);
367
+ bool first = true;
368
+ while (WeakSeparator(28 /* "|" */,16,15) ) {
369
+ Term(g2);
370
+ if (first) { tab->MakeFirstAlt(g); first = false; }
371
+ tab->MakeAlternative(g, g2);
372
+
373
+ }
374
+ }
375
+
376
+ void Parser::SimSet(CharSet* &s) {
377
+ int n1, n2;
378
+ s = new CharSet();
379
+ if (la->kind == _ident) {
380
+ Get();
381
+ CharClass *c = tab->FindCharClass(t->val);
382
+ if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set);
383
+
384
+ } else if (la->kind == _string) {
385
+ Get();
386
+ wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2);
387
+ wchar_t *name = tab->Unescape(subName2);
388
+ coco_string_delete(subName2);
389
+ wchar_t ch;
390
+ int len = coco_string_length(name);
391
+ for(int i=0; i < len; i++) {
392
+ ch = name[i];
393
+ if (dfa->ignoreCase) {
394
+ if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower()
395
+ }
396
+ s->Set(ch);
397
+ }
398
+ coco_string_delete(name);
399
+
400
+ } else if (la->kind == _char) {
401
+ Char(n1);
402
+ s->Set(n1);
403
+ if (la->kind == 22 /* ".." */) {
404
+ Get();
405
+ Char(n2);
406
+ for (int i = n1; i <= n2; i++) s->Set(i);
407
+ }
408
+ } else if (la->kind == 23 /* "ANY" */) {
409
+ Get();
410
+ s = new CharSet(); s->Fill();
411
+ } else SynErr(46);
412
+ }
413
+
414
+ void Parser::Char(int &n) {
415
+ Expect(_char);
416
+ n = 0;
417
+ wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2);
418
+ wchar_t* name = tab->Unescape(subName);
419
+ coco_string_delete(subName);
420
+
421
+ // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++
422
+ if (coco_string_length(name) <= 1) n = name[0];
423
+ else SemErr(L"unacceptable character value");
424
+ coco_string_delete(name);
425
+ if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32;
426
+
427
+ }
428
+
429
+ void Parser::Sym(wchar_t* &name, int &kind) {
430
+ name = coco_string_create(L"???"); kind = id;
431
+ if (la->kind == _ident) {
432
+ Get();
433
+ kind = id; coco_string_delete(name); name = coco_string_create(t->val);
434
+ } else if (la->kind == _string || la->kind == _char) {
435
+ if (la->kind == _string) {
436
+ Get();
437
+ coco_string_delete(name); name = coco_string_create(t->val);
438
+ } else {
439
+ Get();
440
+ wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2);
441
+ coco_string_delete(name);
442
+ name = coco_string_create_append(L"\"", subName);
443
+ coco_string_delete(subName);
444
+ coco_string_merge(name, L"\"");
445
+
446
+ }
447
+ kind = str;
448
+ if (dfa->ignoreCase) {
449
+ wchar_t *oldName = name;
450
+ name = coco_string_create_lower(name);
451
+ coco_string_delete(oldName);
452
+ }
453
+ if (coco_string_indexof(name, ' ') >= 0)
454
+ SemErr(L"literal tokens must not contain blanks");
455
+ } else SynErr(47);
456
+ }
457
+
458
+ void Parser::Term(Graph* &g) {
459
+ Graph *g2; Node *rslv = NULL; g = NULL;
460
+ if (StartOf(17)) {
461
+ if (la->kind == 37 /* "IF" */) {
462
+ rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line);
463
+ Resolver(rslv->pos);
464
+ g = new Graph(rslv);
465
+ }
466
+ Factor(g2);
467
+ if (rslv != NULL) tab->MakeSequence(g, g2);
468
+ else g = g2;
469
+ while (StartOf(18)) {
470
+ Factor(g2);
471
+ tab->MakeSequence(g, g2);
472
+ }
473
+ } else if (StartOf(19)) {
474
+ g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0));
475
+ } else SynErr(48);
476
+ if (g == NULL) // invalid start of Term
477
+ g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0));
478
+ }
479
+
480
+ void Parser::Resolver(Position* &pos) {
481
+ Expect(37 /* "IF" */);
482
+ Expect(30 /* "(" */);
483
+ int beg = la->pos; int col = la->col; int line = la->line;
484
+ Condition();
485
+ pos = new Position(beg, t->pos, col, line);
486
+ }
487
+
488
+ void Parser::Factor(Graph* &g) {
489
+ wchar_t* name = NULL; int kind; Position *pos; bool weak = false;
490
+ g = NULL;
491
+
492
+ switch (la->kind) {
493
+ case _ident: case _string: case _char: case 29 /* "WEAK" */: {
494
+ if (la->kind == 29 /* "WEAK" */) {
495
+ Get();
496
+ weak = true;
497
+ }
498
+ Sym(name, kind);
499
+ Symbol *sym = tab->FindSym(name);
500
+ if (sym == NULL && kind == str)
501
+ sym = (Symbol*)((*(tab->literals))[name]);
502
+ bool undef = (sym == NULL);
503
+ if (undef) {
504
+ if (kind == id)
505
+ sym = tab->NewSym(Node::nt, name, 0); // forward nt
506
+ else if (genScanner) {
507
+ sym = tab->NewSym(Node::t, name, t->line);
508
+ dfa->MatchLiteral(sym->name, sym);
509
+ } else { // undefined string in production
510
+ SemErr(L"undefined string in production");
511
+ sym = tab->eofSy; // dummy
512
+ }
513
+ }
514
+ int typ = sym->typ;
515
+ if (typ != Node::t && typ != Node::nt)
516
+ SemErr(L"this symbol kind is not allowed in a production");
517
+ if (weak) {
518
+ if (typ == Node::t) typ = Node::wt;
519
+ else SemErr(L"only terminals may be weak");
520
+ }
521
+ Node *p = tab->NewNode(typ, sym, t->line);
522
+ g = new Graph(p);
523
+
524
+ if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) {
525
+ Attribs(p);
526
+ if (kind != id) SemErr(L"a literal must not have attributes");
527
+ }
528
+ if (undef)
529
+ sym->attrPos = p->pos; // dummy
530
+ else if ((p->pos == NULL) != (sym->attrPos == NULL))
531
+ SemErr(L"attribute mismatch between declaration and use of this symbol");
532
+
533
+ break;
534
+ }
535
+ case 30 /* "(" */: {
536
+ Get();
537
+ Expression(g);
538
+ Expect(31 /* ")" */);
539
+ break;
540
+ }
541
+ case 32 /* "[" */: {
542
+ Get();
543
+ Expression(g);
544
+ Expect(33 /* "]" */);
545
+ tab->MakeOption(g);
546
+ break;
547
+ }
548
+ case 34 /* "{" */: {
549
+ Get();
550
+ Expression(g);
551
+ Expect(35 /* "}" */);
552
+ tab->MakeIteration(g);
553
+ break;
554
+ }
555
+ case 39 /* "(." */: {
556
+ SemText(pos);
557
+ Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0);
558
+ p->pos = pos;
559
+ g = new Graph(p);
560
+
561
+ break;
562
+ }
563
+ case 23 /* "ANY" */: {
564
+ Get();
565
+ Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys
566
+ g = new Graph(p);
567
+
568
+ break;
569
+ }
570
+ case 36 /* "SYNC" */: {
571
+ Get();
572
+ Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0);
573
+ g = new Graph(p);
574
+
575
+ break;
576
+ }
577
+ default: SynErr(49); break;
578
+ }
579
+ if (g == NULL) // invalid start of Factor
580
+ g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0));
581
+
582
+ }
583
+
584
+ void Parser::Attribs(Node *p) {
585
+ if (la->kind == 24 /* "<" */) {
586
+ Get();
587
+ int beg = la->pos; int col = la->col; int line = la->line;
588
+ while (StartOf(9)) {
589
+ if (StartOf(10)) {
590
+ Get();
591
+ } else {
592
+ Get();
593
+ SemErr(L"bad string in attributes");
594
+ }
595
+ }
596
+ Expect(25 /* ">" */);
597
+ if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line);
598
+ } else if (la->kind == 26 /* "<." */) {
599
+ Get();
600
+ int beg = la->pos; int col = la->col; int line = la->line;
601
+ while (StartOf(11)) {
602
+ if (StartOf(12)) {
603
+ Get();
604
+ } else {
605
+ Get();
606
+ SemErr(L"bad string in attributes");
607
+ }
608
+ }
609
+ Expect(27 /* ".>" */);
610
+ if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line);
611
+ } else SynErr(50);
612
+ }
613
+
614
+ void Parser::Condition() {
615
+ while (StartOf(20)) {
616
+ if (la->kind == 30 /* "(" */) {
617
+ Get();
618
+ Condition();
619
+ } else {
620
+ Get();
621
+ }
622
+ }
623
+ Expect(31 /* ")" */);
624
+ }
625
+
626
+ void Parser::TokenTerm(Graph* &g) {
627
+ Graph *g2;
628
+ TokenFactor(g);
629
+ while (StartOf(8)) {
630
+ TokenFactor(g2);
631
+ tab->MakeSequence(g, g2);
632
+ }
633
+ if (la->kind == 38 /* "CONTEXT" */) {
634
+ Get();
635
+ Expect(30 /* "(" */);
636
+ TokenExpr(g2);
637
+ tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true;
638
+ tab->MakeSequence(g, g2);
639
+ Expect(31 /* ")" */);
640
+ }
641
+ }
642
+
643
+ void Parser::TokenFactor(Graph* &g) {
644
+ wchar_t* name = NULL; int kind;
645
+ g = NULL;
646
+ if (la->kind == _ident || la->kind == _string || la->kind == _char) {
647
+ Sym(name, kind);
648
+ if (kind == id) {
649
+ CharClass *c = tab->FindCharClass(name);
650
+ if (c == NULL) {
651
+ SemErr(L"undefined name");
652
+ c = tab->NewCharClass(name, new CharSet());
653
+ }
654
+ Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n;
655
+ g = new Graph(p);
656
+ tokenString = coco_string_create(noString);
657
+ } else { // str
658
+ g = tab->StrToGraph(name);
659
+ if (tokenString == NULL) tokenString = coco_string_create(name);
660
+ else tokenString = coco_string_create(noString);
661
+ }
662
+
663
+ } else if (la->kind == 30 /* "(" */) {
664
+ Get();
665
+ TokenExpr(g);
666
+ Expect(31 /* ")" */);
667
+ } else if (la->kind == 32 /* "[" */) {
668
+ Get();
669
+ TokenExpr(g);
670
+ Expect(33 /* "]" */);
671
+ tab->MakeOption(g); tokenString = coco_string_create(noString);
672
+ } else if (la->kind == 34 /* "{" */) {
673
+ Get();
674
+ TokenExpr(g);
675
+ Expect(35 /* "}" */);
676
+ tab->MakeIteration(g); tokenString = coco_string_create(noString);
677
+ } else SynErr(51);
678
+ if (g == NULL) // invalid start of TokenFactor
679
+ g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0));
680
+ }
681
+
682
+
683
+
684
+
685
+ // If the user declared a method Init and a mehtod Destroy they should
686
+ // be called in the contructur and the destructor respctively.
687
+ //
688
+ // The following templates are used to recognize if the user declared
689
+ // the methods Init and Destroy.
690
+
691
+ template<typename T>
692
+ struct ParserInitExistsRecognizer {
693
+ template<typename U, void (U::*)() = &U::Init>
694
+ struct ExistsIfInitIsDefinedMarker{};
695
+
696
+ struct InitIsMissingType {
697
+ char dummy1;
698
+ };
699
+
700
+ struct InitExistsType {
701
+ char dummy1; char dummy2;
702
+ };
703
+
704
+ // exists always
705
+ template<typename U>
706
+ static InitIsMissingType is_here(...);
707
+
708
+ // exist only if ExistsIfInitIsDefinedMarker is defined
709
+ template<typename U>
710
+ static InitExistsType is_here(ExistsIfInitIsDefinedMarker<U>*);
711
+
712
+ enum { InitExists = (sizeof(is_here<T>(NULL)) == sizeof(InitExistsType)) };
713
+ };
714
+
715
+ template<typename T>
716
+ struct ParserDestroyExistsRecognizer {
717
+ template<typename U, void (U::*)() = &U::Destroy>
718
+ struct ExistsIfDestroyIsDefinedMarker{};
719
+
720
+ struct DestroyIsMissingType {
721
+ char dummy1;
722
+ };
723
+
724
+ struct DestroyExistsType {
725
+ char dummy1; char dummy2;
726
+ };
727
+
728
+ // exists always
729
+ template<typename U>
730
+ static DestroyIsMissingType is_here(...);
731
+
732
+ // exist only if ExistsIfDestroyIsDefinedMarker is defined
733
+ template<typename U>
734
+ static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker<U>*);
735
+
736
+ enum { DestroyExists = (sizeof(is_here<T>(NULL)) == sizeof(DestroyExistsType)) };
737
+ };
738
+
739
+ // The folloing templates are used to call the Init and Destroy methods if they exist.
740
+
741
+ // Generic case of the ParserInitCaller, gets used if the Init method is missing
742
+ template<typename T, bool = ParserInitExistsRecognizer<T>::InitExists>
743
+ struct ParserInitCaller {
744
+ static void CallInit(T *t) {
745
+ // nothing to do
746
+ }
747
+ };
748
+
749
+ // True case of the ParserInitCaller, gets used if the Init method exists
750
+ template<typename T>
751
+ struct ParserInitCaller<T, true> {
752
+ static void CallInit(T *t) {
753
+ t->Init();
754
+ }
755
+ };
756
+
757
+ // Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing
758
+ template<typename T, bool = ParserDestroyExistsRecognizer<T>::DestroyExists>
759
+ struct ParserDestroyCaller {
760
+ static void CallDestroy(T *t) {
761
+ // nothing to do
762
+ }
763
+ };
764
+
765
+ // True case of the ParserDestroyCaller, gets used if the Destroy method exists
766
+ template<typename T>
767
+ struct ParserDestroyCaller<T, true> {
768
+ static void CallDestroy(T *t) {
769
+ t->Destroy();
770
+ }
771
+ };
772
+
773
+ void Parser::Parse() {
774
+ t = NULL;
775
+ la = dummyToken = new Token();
776
+ la->val = coco_string_create(L"Dummy Token");
777
+ Get();
778
+ Coco();
779
+ Expect(0);
780
+ }
781
+
782
+ Parser::Parser(Scanner *scanner) {
783
+ maxT = 41;
784
+
785
+ ParserInitCaller<Parser>::CallInit(this);
786
+ dummyToken = NULL;
787
+ t = la = NULL;
788
+ minErrDist = 2;
789
+ errDist = minErrDist;
790
+ this->scanner = scanner;
791
+ errors = new Errors();
792
+ }
793
+
794
+ bool Parser::StartOf(int s) {
795
+ const bool T = true;
796
+ const bool x = false;
797
+
798
+ static bool set[21][43] = {
799
+ {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x},
800
+ {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x},
801
+ {x,T,T,T, T,T,T,x, x,x,x,x, T,T,T,x, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x},
802
+ {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,T,x, x,x,x,T, x,x,x,x, T,T,T,x, T,x,T,x, T,T,x,T, x,x,x},
803
+ {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x},
804
+ {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x},
805
+ {x,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x},
806
+ {x,x,x,x, x,x,x,x, x,x,x,T, x,T,T,T, T,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x},
807
+ {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, T,x,T,x, x,x,x,x, x,x,x},
808
+ {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x},
809
+ {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x},
810
+ {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x},
811
+ {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x},
812
+ {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,x},
813
+ {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, x,T,x},
814
+ {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x},
815
+ {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,T, x,x,x,x, T,T,T,T, T,T,T,T, T,T,x,T, x,x,x},
816
+ {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,T,x,T, x,x,x},
817
+ {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,x,x,T, x,x,x},
818
+ {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, T,x,x,T, x,T,x,T, x,x,x,x, x,x,x},
819
+ {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,x}
820
+ };
821
+
822
+
823
+
824
+ return set[s][la->kind];
825
+ }
826
+
827
+ Parser::~Parser() {
828
+ ParserDestroyCaller<Parser>::CallDestroy(this);
829
+ delete errors;
830
+ delete dummyToken;
831
+ }
832
+
833
+ Errors::Errors() {
834
+ count = 0;
835
+ }
836
+
837
+ void Errors::SynErr(int line, int col, int n) {
838
+ wchar_t* s;
839
+ switch (n) {
840
+ case 0: s = coco_string_create(L"EOF expected"); break;
841
+ case 1: s = coco_string_create(L"ident expected"); break;
842
+ case 2: s = coco_string_create(L"number expected"); break;
843
+ case 3: s = coco_string_create(L"string expected"); break;
844
+ case 4: s = coco_string_create(L"badString expected"); break;
845
+ case 5: s = coco_string_create(L"char expected"); break;
846
+ case 6: s = coco_string_create(L"\"COMPILER\" expected"); break;
847
+ case 7: s = coco_string_create(L"\"IGNORECASE\" expected"); break;
848
+ case 8: s = coco_string_create(L"\"CHARACTERS\" expected"); break;
849
+ case 9: s = coco_string_create(L"\"TOKENS\" expected"); break;
850
+ case 10: s = coco_string_create(L"\"PRAGMAS\" expected"); break;
851
+ case 11: s = coco_string_create(L"\"COMMENTS\" expected"); break;
852
+ case 12: s = coco_string_create(L"\"FROM\" expected"); break;
853
+ case 13: s = coco_string_create(L"\"TO\" expected"); break;
854
+ case 14: s = coco_string_create(L"\"NESTED\" expected"); break;
855
+ case 15: s = coco_string_create(L"\"IGNORE\" expected"); break;
856
+ case 16: s = coco_string_create(L"\"PRODUCTIONS\" expected"); break;
857
+ case 17: s = coco_string_create(L"\"=\" expected"); break;
858
+ case 18: s = coco_string_create(L"\".\" expected"); break;
859
+ case 19: s = coco_string_create(L"\"END\" expected"); break;
860
+ case 20: s = coco_string_create(L"\"+\" expected"); break;
861
+ case 21: s = coco_string_create(L"\"-\" expected"); break;
862
+ case 22: s = coco_string_create(L"\"..\" expected"); break;
863
+ case 23: s = coco_string_create(L"\"ANY\" expected"); break;
864
+ case 24: s = coco_string_create(L"\"<\" expected"); break;
865
+ case 25: s = coco_string_create(L"\">\" expected"); break;
866
+ case 26: s = coco_string_create(L"\"<.\" expected"); break;
867
+ case 27: s = coco_string_create(L"\".>\" expected"); break;
868
+ case 28: s = coco_string_create(L"\"|\" expected"); break;
869
+ case 29: s = coco_string_create(L"\"WEAK\" expected"); break;
870
+ case 30: s = coco_string_create(L"\"(\" expected"); break;
871
+ case 31: s = coco_string_create(L"\")\" expected"); break;
872
+ case 32: s = coco_string_create(L"\"[\" expected"); break;
873
+ case 33: s = coco_string_create(L"\"]\" expected"); break;
874
+ case 34: s = coco_string_create(L"\"{\" expected"); break;
875
+ case 35: s = coco_string_create(L"\"}\" expected"); break;
876
+ case 36: s = coco_string_create(L"\"SYNC\" expected"); break;
877
+ case 37: s = coco_string_create(L"\"IF\" expected"); break;
878
+ case 38: s = coco_string_create(L"\"CONTEXT\" expected"); break;
879
+ case 39: s = coco_string_create(L"\"(.\" expected"); break;
880
+ case 40: s = coco_string_create(L"\".)\" expected"); break;
881
+ case 41: s = coco_string_create(L"??? expected"); break;
882
+ case 42: s = coco_string_create(L"this symbol not expected in Coco"); break;
883
+ case 43: s = coco_string_create(L"this symbol not expected in TokenDecl"); break;
884
+ case 44: s = coco_string_create(L"invalid TokenDecl"); break;
885
+ case 45: s = coco_string_create(L"invalid AttrDecl"); break;
886
+ case 46: s = coco_string_create(L"invalid SimSet"); break;
887
+ case 47: s = coco_string_create(L"invalid Sym"); break;
888
+ case 48: s = coco_string_create(L"invalid Term"); break;
889
+ case 49: s = coco_string_create(L"invalid Factor"); break;
890
+ case 50: s = coco_string_create(L"invalid Attribs"); break;
891
+ case 51: s = coco_string_create(L"invalid TokenFactor"); break;
892
+
893
+ default:
894
+ {
895
+ wchar_t format[20];
896
+ coco_swprintf(format, 20, L"error %d", n);
897
+ s = coco_string_create(format);
898
+ }
899
+ break;
900
+ }
901
+ wprintf(L"-- line %d col %d: %ls\n", line, col, s);
902
+ coco_string_delete(s);
903
+ count++;
904
+ }
905
+
906
+ void Errors::Error(int line, int col, const wchar_t *s) {
907
+ wprintf(L"-- line %d col %d: %ls\n", line, col, s);
908
+ count++;
909
+ }
910
+
911
+ void Errors::Warning(int line, int col, const wchar_t *s) {
912
+ wprintf(L"-- line %d col %d: %ls\n", line, col, s);
913
+ }
914
+
915
+ void Errors::Warning(const wchar_t *s) {
916
+ wprintf(L"%ls\n", s);
917
+ }
918
+
919
+ void Errors::Exception(const wchar_t* s) {
920
+ wprintf(L"%ls", s);
921
+ exit(1);
922
+ }
923
+
924
+ } // namespace
925
+