ruco-cpp 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +17 -0
  3. data/bin/console +14 -0
  4. data/bin/ruco +30 -0
  5. data/bin/setup +7 -0
  6. data/data/ruco/Parser.frame +359 -0
  7. data/data/ruco/Scanner.frame +896 -0
  8. data/data/ruco/picojson/Changes +14 -0
  9. data/data/ruco/picojson/LICENSE +25 -0
  10. data/data/ruco/picojson/Makefile +8 -0
  11. data/data/ruco/picojson/README.mkdn +183 -0
  12. data/data/ruco/picojson/examples/github-issues.cc +110 -0
  13. data/data/ruco/picojson/examples/iostream.cc +70 -0
  14. data/data/ruco/picojson/examples/streaming.cc +76 -0
  15. data/data/ruco/picojson/picojson.h +1299 -0
  16. data/ext/cocor/Action.cpp +81 -0
  17. data/ext/cocor/Action.h +59 -0
  18. data/ext/cocor/ArrayList.cpp +79 -0
  19. data/ext/cocor/ArrayList.h +52 -0
  20. data/ext/cocor/BitArray.cpp +156 -0
  21. data/ext/cocor/BitArray.h +68 -0
  22. data/ext/cocor/CharClass.cpp +42 -0
  23. data/ext/cocor/CharClass.h +48 -0
  24. data/ext/cocor/CharSet.cpp +166 -0
  25. data/ext/cocor/CharSet.h +68 -0
  26. data/ext/cocor/Coco.atg +528 -0
  27. data/ext/cocor/Coco.cpp +173 -0
  28. data/ext/cocor/Comment.cpp +45 -0
  29. data/ext/cocor/Comment.h +51 -0
  30. data/ext/cocor/Copyright.frame +27 -0
  31. data/ext/cocor/DFA.cpp +865 -0
  32. data/ext/cocor/DFA.h +132 -0
  33. data/ext/cocor/Generator.cpp +182 -0
  34. data/ext/cocor/Generator.h +61 -0
  35. data/ext/cocor/Graph.h +59 -0
  36. data/ext/cocor/HashTable.cpp +115 -0
  37. data/ext/cocor/HashTable.h +84 -0
  38. data/ext/cocor/Makefile +11 -0
  39. data/ext/cocor/Melted.cpp +39 -0
  40. data/ext/cocor/Melted.h +51 -0
  41. data/ext/cocor/Node.cpp +69 -0
  42. data/ext/cocor/Node.h +86 -0
  43. data/ext/cocor/Parser.cpp +925 -0
  44. data/ext/cocor/Parser.frame +326 -0
  45. data/ext/cocor/Parser.h +153 -0
  46. data/ext/cocor/ParserGen.cpp +486 -0
  47. data/ext/cocor/ParserGen.h +99 -0
  48. data/ext/cocor/Position.cpp +37 -0
  49. data/ext/cocor/Position.h +46 -0
  50. data/ext/cocor/README.md +12 -0
  51. data/ext/cocor/Scanner.cpp +833 -0
  52. data/ext/cocor/Scanner.frame +897 -0
  53. data/ext/cocor/Scanner.h +291 -0
  54. data/ext/cocor/Sets.h +84 -0
  55. data/ext/cocor/SortedList.cpp +141 -0
  56. data/ext/cocor/SortedList.h +68 -0
  57. data/ext/cocor/State.cpp +77 -0
  58. data/ext/cocor/State.h +55 -0
  59. data/ext/cocor/StringBuilder.cpp +88 -0
  60. data/ext/cocor/StringBuilder.h +29 -0
  61. data/ext/cocor/Symbol.cpp +61 -0
  62. data/ext/cocor/Symbol.h +70 -0
  63. data/ext/cocor/Tab.cpp +1248 -0
  64. data/ext/cocor/Tab.h +245 -0
  65. data/ext/cocor/Target.cpp +41 -0
  66. data/ext/cocor/Target.h +48 -0
  67. data/ext/cocor/build.bat +3 -0
  68. data/ext/cocor/build.sh +4 -0
  69. data/ext/cocor/coc.bat +1 -0
  70. data/ext/cocor/coc.sh +2 -0
  71. data/ext/cocor/cocor_ruby_ext.cpp +124 -0
  72. data/ext/cocor/cygBuild.bat +1 -0
  73. data/ext/cocor/extconf.rb +5 -0
  74. data/ext/cocor/mingwbuild.bat +2 -0
  75. data/ext/cocor/mkmf.log +57 -0
  76. data/ext/cocor/zipsources.bat +1 -0
  77. data/lib/cocor.rb +14 -0
  78. data/lib/ruco/version.rb +3 -0
  79. data/lib/ruco.rb +728 -0
  80. metadata +195 -0
data/ext/cocor/DFA.cpp ADDED
@@ -0,0 +1,865 @@
1
+ /*-------------------------------------------------------------------------
2
+ DFA -- Generation of the Scanner Automaton
3
+ Compiler Generator Coco/R,
4
+ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
5
+ extended by M. Loeberbauer & A. Woess, Univ. of Linz
6
+ ported to C++ by Csaba Balazs, University of Szeged
7
+ with improvements by Pat Terry, Rhodes University
8
+
9
+ This program is free software; you can redistribute it and/or modify it
10
+ under the terms of the GNU General Public License as published by the
11
+ Free Software Foundation; either version 2, or (at your option) any
12
+ later version.
13
+
14
+ This program is distributed in the hope that it will be useful, but
15
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17
+ for more details.
18
+
19
+ You should have received a copy of the GNU General Public License along
20
+ with this program; if not, write to the Free Software Foundation, Inc.,
21
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
+
23
+ As an exception, it is allowed to write an extension of Coco/R that is
24
+ used as a plugin in non-free software.
25
+
26
+ If not otherwise stated, any source code generated by Coco/R (other than
27
+ Coco/R itself) does not fall under the GNU General Public License.
28
+ -------------------------------------------------------------------------*/
29
+
30
+ #include <stdlib.h>
31
+ #include <wchar.h>
32
+ #include "DFA.h"
33
+ #include "Tab.h"
34
+ #include "Parser.h"
35
+ #include "BitArray.h"
36
+ #include "Scanner.h"
37
+ #include "Generator.h"
38
+
39
+ namespace Coco {
40
+
41
+ //---------- Output primitives
42
+ wchar_t* DFA::Ch(wchar_t ch) {
43
+ wchar_t* format = new wchar_t[10];
44
+ if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\')
45
+ coco_swprintf(format, 10, L"%d\0", (int) ch);
46
+ else
47
+ coco_swprintf(format, 10, L"L'%lc'\0", (int) ch);
48
+ return format;
49
+ }
50
+
51
+ wchar_t* DFA::ChCond(wchar_t ch) {
52
+ wchar_t* format = new wchar_t[20];
53
+ wchar_t* res = Ch(ch);
54
+ coco_swprintf(format, 20, L"ch == %ls\0", res);
55
+ delete [] res;
56
+ return format;
57
+ }
58
+
59
+ void DFA::PutRange(CharSet *s) {
60
+ for (CharSet::Range *r = s->head; r != NULL; r = r->next) {
61
+ if (r->from == r->to) {
62
+ wchar_t *from = Ch((wchar_t) r->from);
63
+ fwprintf(gen, L"ch == %ls", from);
64
+ delete [] from;
65
+ } else if (r->from == 0) {
66
+ wchar_t *to = Ch((wchar_t) r->to);
67
+ fwprintf(gen, L"ch <= %ls", to);
68
+ delete [] to;
69
+ } else {
70
+ wchar_t *from = Ch((wchar_t) r->from);
71
+ wchar_t *to = Ch((wchar_t) r->to);
72
+ fwprintf(gen, L"(ch >= %ls && ch <= %ls)", from, to);
73
+ delete [] from; delete [] to;
74
+ }
75
+ if (r->next != NULL) fwprintf(gen, L" || ");
76
+ }
77
+ }
78
+
79
+
80
+ //---------- State handling
81
+
82
+ State* DFA::NewState() {
83
+ State *s = new State(); s->nr = ++lastStateNr;
84
+ if (firstState == NULL) firstState = s; else lastState->next = s;
85
+ lastState = s;
86
+ return s;
87
+ }
88
+
89
+ void DFA::NewTransition(State *from, State *to, int typ, int sym, int tc) {
90
+ Target *t = new Target(to);
91
+ Action *a = new Action(typ, sym, tc); a->target = t;
92
+ from->AddAction(a);
93
+ if (typ == Node::clas) curSy->tokenKind = Symbol::classToken;
94
+ }
95
+
96
+ void DFA::CombineShifts() {
97
+ State *state;
98
+ Action *a, *b, *c;
99
+ CharSet *seta, *setb;
100
+ for (state = firstState; state != NULL; state = state->next) {
101
+ for (a = state->firstAction; a != NULL; a = a->next) {
102
+ b = a->next;
103
+ while (b != NULL)
104
+ if (a->target->state == b->target->state && a->tc == b->tc) {
105
+ seta = a->Symbols(tab); setb = b->Symbols(tab);
106
+ seta->Or(setb);
107
+ a->ShiftWith(seta, tab);
108
+ c = b; b = b->next; state->DetachAction(c);
109
+ } else b = b->next;
110
+ }
111
+ }
112
+ }
113
+
114
+ void DFA::FindUsedStates(State *state, BitArray *used) {
115
+ if ((*used)[state->nr]) return;
116
+ used->Set(state->nr, true);
117
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
118
+ FindUsedStates(a->target->state, used);
119
+ }
120
+
121
+ void DFA::DeleteRedundantStates() {
122
+ //State *newState = new State[State::lastNr + 1];
123
+ State **newState = (State**) malloc (sizeof(State*) * (lastStateNr + 1));
124
+ BitArray *used = new BitArray(lastStateNr + 1);
125
+ FindUsedStates(firstState, used);
126
+ // combine equal final states
127
+ for (State *s1 = firstState->next; s1 != NULL; s1 = s1->next) // firstState cannot be final
128
+ if ((*used)[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx))
129
+ for (State *s2 = s1->next; s2 != NULL; s2 = s2->next)
130
+ if ((*used)[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) {
131
+ used->Set(s2->nr, false); newState[s2->nr] = s1;
132
+ }
133
+
134
+ State *state;
135
+ for (state = firstState; state != NULL; state = state->next)
136
+ if ((*used)[state->nr])
137
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
138
+ if (!((*used)[a->target->state->nr]))
139
+ a->target->state = newState[a->target->state->nr];
140
+ // delete unused states
141
+ lastState = firstState; lastStateNr = 0; // firstState has number 0
142
+ for (state = firstState->next; state != NULL; state = state->next)
143
+ if ((*used)[state->nr]) {state->nr = ++lastStateNr; lastState = state;}
144
+ else lastState->next = state->next;
145
+ free (newState);
146
+ delete used;
147
+ }
148
+
149
+ State* DFA::TheState(Node *p) {
150
+ State *state;
151
+ if (p == NULL) {state = NewState(); state->endOf = curSy; return state;}
152
+ else return p->state;
153
+ }
154
+
155
+ void DFA::Step(State *from, Node *p, BitArray *stepped) {
156
+ if (p == NULL) return;
157
+ stepped->Set(p->n, true);
158
+
159
+ if (p->typ == Node::clas || p->typ == Node::chr) {
160
+ NewTransition(from, TheState(p->next), p->typ, p->val, p->code);
161
+ } else if (p->typ == Node::alt) {
162
+ Step(from, p->sub, stepped); Step(from, p->down, stepped);
163
+ } else if (p->typ == Node::iter) {
164
+ if (tab->DelSubGraph(p->sub)) {
165
+ parser->SemErr(L"contents of {...} must not be deletable");
166
+ return;
167
+ }
168
+ if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped);
169
+ Step(from, p->sub, stepped);
170
+ if (p->state != from) {
171
+ BitArray *newStepped = new BitArray(tab->nodes->Count);
172
+ Step(p->state, p, newStepped);
173
+ delete newStepped;
174
+ }
175
+ } else if (p->typ == Node::opt) {
176
+ if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped);
177
+ Step(from, p->sub, stepped);
178
+ }
179
+ }
180
+
181
+ // Assigns a state n.state to every node n. There will be a transition from
182
+ // n.state to n.next.state triggered by n.val. All nodes in an alternative
183
+ // chain are represented by the same state.
184
+ // Numbering scheme:
185
+ // - any node after a chr, clas, opt, or alt, must get a new number
186
+ // - if a nested structure starts with an iteration the iter node must get a new number
187
+ // - if an iteration follows an iteration, it must get a new number
188
+ void DFA::NumberNodes(Node *p, State *state, bool renumIter) {
189
+ if (p == NULL) return;
190
+ if (p->state != NULL) return; // already visited;
191
+ if ((state == NULL) || ((p->typ == Node::iter) && renumIter)) state = NewState();
192
+ p->state = state;
193
+ if (tab->DelGraph(p)) state->endOf = curSy;
194
+
195
+ if (p->typ == Node::clas || p->typ == Node::chr) {
196
+ NumberNodes(p->next, NULL, false);
197
+ } else if (p->typ == Node::opt) {
198
+ NumberNodes(p->next, NULL, false);
199
+ NumberNodes(p->sub, state, true);
200
+ } else if (p->typ == Node::iter) {
201
+ NumberNodes(p->next, state, true);
202
+ NumberNodes(p->sub, state, true);
203
+ } else if (p->typ == Node::alt) {
204
+ NumberNodes(p->next, NULL, false);
205
+ NumberNodes(p->sub, state, true);
206
+ NumberNodes(p->down, state, renumIter);
207
+ }
208
+ }
209
+
210
+ void DFA::FindTrans (Node *p, bool start, BitArray *marked) {
211
+ if (p == NULL || (*marked)[p->n]) return;
212
+ marked->Set(p->n, true);
213
+ if (start) {
214
+ BitArray *stepped = new BitArray(tab->nodes->Count);
215
+ Step(p->state, p, stepped); // start of group of equally numbered nodes
216
+ delete stepped;
217
+ }
218
+
219
+ if (p->typ == Node::clas || p->typ == Node::chr) {
220
+ FindTrans(p->next, true, marked);
221
+ } else if (p->typ == Node::opt) {
222
+ FindTrans(p->next, true, marked); FindTrans(p->sub, false, marked);
223
+ } else if (p->typ == Node::iter) {
224
+ FindTrans(p->next, false, marked); FindTrans(p->sub, false, marked);
225
+ } else if (p->typ == Node::alt) {
226
+ FindTrans(p->sub, false, marked); FindTrans(p->down, false, marked);
227
+ }
228
+ }
229
+
230
+ void DFA::ConvertToStates(Node *p, Symbol *sym) {
231
+ curGraph = p; curSy = sym;
232
+ if (tab->DelGraph(curGraph)) {
233
+ parser->SemErr(L"token might be empty");
234
+ return;
235
+ }
236
+ NumberNodes(curGraph, firstState, true);
237
+ FindTrans(curGraph, true, new BitArray(tab->nodes->Count));
238
+ if (p->typ == Node::iter) {
239
+ BitArray *stepped = new BitArray(tab->nodes->Count);
240
+ Step(firstState, p, stepped);
241
+ delete stepped;
242
+ }
243
+ }
244
+
245
+ // match string against current automaton; store it either as a fixedToken or as a litToken
246
+ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) {
247
+ wchar_t *subS = coco_string_create(s, 1, coco_string_length(s)-2);
248
+ s = tab->Unescape(subS);
249
+ coco_string_delete(subS);
250
+ int i, len = coco_string_length(s);
251
+ State *state = firstState;
252
+ Action *a = NULL;
253
+ for (i = 0; i < len; i++) { // try to match s against existing DFA
254
+ a = FindAction(state, s[i]);
255
+ if (a == NULL) break;
256
+ state = a->target->state;
257
+ }
258
+ // if s was not totally consumed or leads to a non-final state => make new DFA from it
259
+ if (i != len || state->endOf == NULL) {
260
+ state = firstState; i = 0; a = NULL;
261
+ dirtyDFA = true;
262
+ }
263
+ for (; i < len; i++) { // make new DFA for s[i..len-1]
264
+ State *to = NewState();
265
+ NewTransition(state, to, Node::chr, s[i], Node::normalTrans);
266
+ state = to;
267
+ }
268
+ coco_string_delete(s);
269
+ Symbol *matchedSym = state->endOf;
270
+ if (state->endOf == NULL) {
271
+ state->endOf = sym;
272
+ } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) {
273
+ // s matched a token with a fixed definition or a token with an appendix that will be cut off
274
+ wchar_t format[200];
275
+ coco_swprintf(format, 200, L"tokens %ls and %ls cannot be distinguished", sym->name, matchedSym->name);
276
+ parser->SemErr(format);
277
+ } else { // matchedSym == classToken || classLitToken
278
+ matchedSym->tokenKind = Symbol::classLitToken;
279
+ sym->tokenKind = Symbol::litToken;
280
+ }
281
+ }
282
+
283
+ void DFA::SplitActions(State *state, Action *a, Action *b) {
284
+ Action *c; CharSet *seta, *setb, *setc;
285
+ seta = a->Symbols(tab); setb = b->Symbols(tab);
286
+ if (seta->Equals(setb)) {
287
+ a->AddTargets(b);
288
+ state->DetachAction(b);
289
+ } else if (seta->Includes(setb)) {
290
+ setc = seta->Clone(); setc->Subtract(setb);
291
+ b->AddTargets(a);
292
+ a->ShiftWith(setc, tab);
293
+ } else if (setb->Includes(seta)) {
294
+ setc = setb->Clone(); setc->Subtract(seta);
295
+ a->AddTargets(b);
296
+ b->ShiftWith(setc, tab);
297
+ } else {
298
+ setc = seta->Clone(); setc->And(setb);
299
+ seta->Subtract(setc);
300
+ setb->Subtract(setc);
301
+ a->ShiftWith(seta, tab);
302
+ b->ShiftWith(setb, tab);
303
+ c = new Action(0, 0, Node::normalTrans); // typ and sym are set in ShiftWith
304
+ c->AddTargets(a);
305
+ c->AddTargets(b);
306
+ c->ShiftWith(setc, tab);
307
+ state->AddAction(c);
308
+ }
309
+ }
310
+
311
+ bool DFA::Overlap(Action *a, Action *b) {
312
+ CharSet *seta, *setb;
313
+ if (a->typ == Node::chr)
314
+ if (b->typ == Node::chr) return (a->sym == b->sym);
315
+ else {setb = tab->CharClassSet(b->sym); return setb->Get(a->sym);}
316
+ else {
317
+ seta = tab->CharClassSet(a->sym);
318
+ if (b->typ == Node::chr) return seta->Get(b->sym);
319
+ else {setb = tab->CharClassSet(b->sym); return seta->Intersects(setb);}
320
+ }
321
+ }
322
+
323
+ bool DFA::MakeUnique(State *state) { // return true if actions were split
324
+ bool changed = false;
325
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
326
+ for (Action *b = a->next; b != NULL; b = b->next)
327
+ if (Overlap(a, b)) {
328
+ SplitActions(state, a, b);
329
+ changed = true;
330
+ }
331
+ return changed;
332
+ }
333
+
334
+ void DFA::MeltStates(State *state) {
335
+ bool changed, ctx;
336
+ BitArray *targets;
337
+ Symbol *endOf;
338
+ for (Action *action = state->firstAction; action != NULL; action = action->next) {
339
+ if (action->target->next != NULL) {
340
+ GetTargetStates(action, targets, endOf, ctx);
341
+ Melted *melt = StateWithSet(targets);
342
+ if (melt == NULL) {
343
+ State *s = NewState(); s->endOf = endOf; s->ctx = ctx;
344
+ for (Target *targ = action->target; targ != NULL; targ = targ->next)
345
+ s->MeltWith(targ->state);
346
+ do {changed = MakeUnique(s);} while (changed);
347
+ melt = NewMelted(targets, s);
348
+ }
349
+ action->target->next = NULL;
350
+ action->target->state = melt->state;
351
+ }
352
+ }
353
+ }
354
+
355
+ void DFA::FindCtxStates() {
356
+ for (State *state = firstState; state != NULL; state = state->next)
357
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
358
+ if (a->tc == Node::contextTrans) a->target->state->ctx = true;
359
+ }
360
+
361
+ void DFA::MakeDeterministic() {
362
+ State *state;
363
+ bool changed;
364
+ lastSimState = lastState->nr;
365
+ maxStates = 2 * lastSimState; // heuristic for set size in Melted.set
366
+ FindCtxStates();
367
+ for (state = firstState; state != NULL; state = state->next)
368
+ do {changed = MakeUnique(state);} while (changed);
369
+ for (state = firstState; state != NULL; state = state->next)
370
+ MeltStates(state);
371
+ DeleteRedundantStates();
372
+ CombineShifts();
373
+ }
374
+
375
+ void DFA::PrintStates() {
376
+ fwprintf(trace, L"\n");
377
+ fwprintf(trace, L"---------- states ----------\n");
378
+ for (State *state = firstState; state != NULL; state = state->next) {
379
+ bool first = true;
380
+ if (state->endOf == NULL) fwprintf(trace, L" ");
381
+ else {
382
+ wchar_t *paddedName = tab->Name(state->endOf->name);
383
+ fwprintf(trace, L"E(%12s)", paddedName);
384
+ coco_string_delete(paddedName);
385
+ }
386
+ fwprintf(trace, L"%3d:", state->nr);
387
+ if (state->firstAction == NULL) fwprintf(trace, L"\n");
388
+ for (Action *action = state->firstAction; action != NULL; action = action->next) {
389
+ if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" ");
390
+
391
+ if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)(*tab->classes)[action->sym])->name);
392
+ else fwprintf(trace, L"%3s", Ch((wchar_t)action->sym));
393
+ for (Target *targ = action->target; targ != NULL; targ = targ->next) {
394
+ fwprintf(trace, L"%3d", targ->state->nr);
395
+ }
396
+ if (action->tc == Node::contextTrans) fwprintf(trace, L" context\n"); else fwprintf(trace, L"\n");
397
+ }
398
+ }
399
+ fwprintf(trace, L"\n---------- character classes ----------\n");
400
+ tab->WriteCharClasses();
401
+ }
402
+
403
+ //---------------------------- actions --------------------------------
404
+
405
+ Action* DFA::FindAction(State *state, wchar_t ch) {
406
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
407
+ if (a->typ == Node::chr && ch == a->sym) return a;
408
+ else if (a->typ == Node::clas) {
409
+ CharSet *s = tab->CharClassSet(a->sym);
410
+ if (s->Get(ch)) return a;
411
+ }
412
+ return NULL;
413
+ }
414
+
415
+
416
+ void DFA::GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) {
417
+ // compute the set of target states
418
+ targets = new BitArray(maxStates); endOf = NULL;
419
+ ctx = false;
420
+ for (Target *t = a->target; t != NULL; t = t->next) {
421
+ int stateNr = t->state->nr;
422
+ if (stateNr <= lastSimState) { targets->Set(stateNr, true); }
423
+ else { targets->Or(MeltedSet(stateNr)); }
424
+ if (t->state->endOf != NULL) {
425
+ if (endOf == NULL || endOf == t->state->endOf) {
426
+ endOf = t->state->endOf;
427
+ }
428
+ else {
429
+ wprintf(L"Tokens %ls and %ls cannot be distinguished\n", endOf->name, t->state->endOf->name);
430
+ errors->count++;
431
+ }
432
+ }
433
+ if (t->state->ctx) {
434
+ ctx = true;
435
+ // The following check seems to be unnecessary. It reported an error
436
+ // if a symbol + context was the prefix of another symbol, e.g.
437
+ // s1 = "a" "b" "c".
438
+ // s2 = "a" CONTEXT("b").
439
+ // But this is ok.
440
+ // if (t.state.endOf != null) {
441
+ // Console.WriteLine("Ambiguous context clause");
442
+ // Errors.count++;
443
+ // }
444
+ }
445
+ }
446
+ }
447
+
448
+
449
+ //------------------------- melted states ------------------------------
450
+
451
+
452
+ Melted* DFA::NewMelted(BitArray *set, State *state) {
453
+ Melted *m = new Melted(set, state);
454
+ m->next = firstMelted; firstMelted = m;
455
+ return m;
456
+
457
+ }
458
+
459
+ BitArray* DFA::MeltedSet(int nr) {
460
+ Melted *m = firstMelted;
461
+ while (m != NULL) {
462
+ if (m->state->nr == nr) return m->set; else m = m->next;
463
+ }
464
+ //Errors::Exception("-- compiler error in Melted::Set");
465
+ //throw new Exception("-- compiler error in Melted::Set");
466
+ return NULL;
467
+ }
468
+
469
+ Melted* DFA::StateWithSet(BitArray *s) {
470
+ for (Melted *m = firstMelted; m != NULL; m = m->next)
471
+ if (Sets::Equals(s, m->set)) return m;
472
+ return NULL;
473
+ }
474
+
475
+
476
+ //------------------------ comments --------------------------------
477
+
478
+ wchar_t* DFA::CommentStr(Node *p) {
479
+ StringBuilder s = StringBuilder();
480
+ while (p != NULL) {
481
+ if (p->typ == Node::chr) {
482
+ s.Append((wchar_t)p->val);
483
+ } else if (p->typ == Node::clas) {
484
+ CharSet *set = tab->CharClassSet(p->val);
485
+ if (set->Elements() != 1) parser->SemErr(L"character set contains more than 1 character");
486
+ s.Append((wchar_t) set->First());
487
+ }
488
+ else parser->SemErr(L"comment delimiters may not be structured");
489
+ p = p->next;
490
+ }
491
+ if (s.GetLength() == 0 || s.GetLength() > 2) {
492
+ parser->SemErr(L"comment delimiters must be 1 or 2 characters long");
493
+ s = StringBuilder(L"?");
494
+ }
495
+ return s.ToString();
496
+ }
497
+
498
+
499
+ void DFA::NewComment(Node *from, Node *to, bool nested) {
500
+ Comment *c = new Comment(CommentStr(from), CommentStr(to), nested);
501
+ c->next = firstComment; firstComment = c;
502
+ }
503
+
504
+
505
+ //------------------------ scanner generation ----------------------
506
+
507
+ void DFA::GenComBody(Comment *com) {
508
+ fwprintf(gen, L"\t\tfor(;;) {\n");
509
+
510
+ wchar_t* res = ChCond(com->stop[0]);
511
+ fwprintf(gen, L"\t\t\tif (%ls) ", res);
512
+ fwprintf(gen, L"{\n");
513
+ delete [] res;
514
+
515
+ if (coco_string_length(com->stop) == 1) {
516
+ fwprintf(gen, L"\t\t\t\tlevel--;\n");
517
+ fwprintf(gen, L"\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n");
518
+ fwprintf(gen, L"\t\t\t\tNextCh();\n");
519
+ } else {
520
+ fwprintf(gen, L"\t\t\t\tNextCh();\n");
521
+ wchar_t* res = ChCond(com->stop[1]);
522
+ fwprintf(gen, L"\t\t\t\tif (%ls) {\n", res);
523
+ delete [] res;
524
+ fwprintf(gen, L"\t\t\t\t\tlevel--;\n");
525
+ fwprintf(gen, L"\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n");
526
+ fwprintf(gen, L"\t\t\t\t\tNextCh();\n");
527
+ fwprintf(gen, L"\t\t\t\t}\n");
528
+ }
529
+ if (com->nested) {
530
+ fwprintf(gen, L"\t\t\t}");
531
+ wchar_t* res = ChCond(com->start[0]);
532
+ fwprintf(gen, L" else if (%ls) ", res);
533
+ delete [] res;
534
+ fwprintf(gen, L"{\n");
535
+ if (coco_string_length(com->stop) == 1)
536
+ fwprintf(gen, L"\t\t\t\tlevel++; NextCh();\n");
537
+ else {
538
+ fwprintf(gen, L"\t\t\t\tNextCh();\n");
539
+ wchar_t* res = ChCond(com->start[1]);
540
+ fwprintf(gen, L"\t\t\t\tif (%ls) ", res);
541
+ delete [] res;
542
+ fwprintf(gen, L"{\n");
543
+ fwprintf(gen, L"\t\t\t\t\tlevel++; NextCh();\n");
544
+ fwprintf(gen, L"\t\t\t\t}\n");
545
+ }
546
+ }
547
+ fwprintf(gen, L"\t\t\t} else if (ch == buffer->EoF) return false;\n");
548
+ fwprintf(gen, L"\t\t\telse NextCh();\n");
549
+ fwprintf(gen, L"\t\t}\n");
550
+ }
551
+
552
+ void DFA::GenCommentHeader(Comment *com, int i) {
553
+ fwprintf(gen, L"\tbool Comment%d();\n", i);
554
+ }
555
+
556
+ void DFA::GenComment(Comment *com, int i) {
557
+ fwprintf(gen, L"\n");
558
+ fwprintf(gen, L"bool Scanner::Comment%d() ", i);
559
+ fwprintf(gen, L"{\n");
560
+ fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n");
561
+ if (coco_string_length(com->start) == 1) {
562
+ fwprintf(gen, L"\tNextCh();\n");
563
+ GenComBody(com);
564
+ } else {
565
+ fwprintf(gen, L"\tNextCh();\n");
566
+ wchar_t* res = ChCond(com->start[1]);
567
+ fwprintf(gen, L"\tif (%ls) ", res);
568
+ delete [] res;
569
+ fwprintf(gen, L"{\n");
570
+
571
+ fwprintf(gen, L"\t\tNextCh();\n");
572
+ GenComBody(com);
573
+
574
+ fwprintf(gen, L"\t} else {\n");
575
+ fwprintf(gen, L"\t\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n");
576
+ fwprintf(gen, L"\t}\n");
577
+ fwprintf(gen, L"\treturn false;\n");
578
+ }
579
+ fwprintf(gen, L"}\n");
580
+ }
581
+
582
+ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literals
583
+ if (('a'<=sym->name[0] && sym->name[0]<='z') ||
584
+ ('A'<=sym->name[0] && sym->name[0]<='Z')) { //Char::IsLetter(sym->name[0])
585
+
586
+ Iterator *iter = tab->literals->GetIterator();
587
+ while (iter->HasNext()) {
588
+ DictionaryEntry *e = iter->Next();
589
+ if (e->val == sym) { return e->key; }
590
+ }
591
+ }
592
+ return sym->name;
593
+ }
594
+
595
+ void DFA::GenLiterals () {
596
+ Symbol *sym;
597
+
598
+ ArrayList *ts[2];
599
+ ts[0] = tab->terminals;
600
+ ts[1] = tab->pragmas;
601
+
602
+ for (int i = 0; i < 2; ++i) {
603
+ for (int j = 0; j < ts[i]->Count; j++) {
604
+ sym = (Symbol*) ((*(ts[i]))[j]);
605
+ if (sym->tokenKind == Symbol::litToken) {
606
+ wchar_t* name = coco_string_create(SymName(sym));
607
+ if (ignoreCase) {
608
+ wchar_t *oldName = name;
609
+ name = coco_string_create_lower(name);
610
+ coco_string_delete(oldName);
611
+ }
612
+ // sym.name stores literals with quotes, e.g. "\"Literal\""
613
+
614
+ fwprintf(gen, L"\tkeywords.set(L");
615
+ // write keyword, escape non printable characters
616
+ for (int k = 0; name[k] != L'\0'; k++) {
617
+ wchar_t c = name[k];
618
+ fwprintf(gen, (c >= 32 && c <= 127) ? L"%lc" : L"\\x%04x", c);
619
+ }
620
+ fwprintf(gen, L", %d);\n", sym->n);
621
+
622
+ coco_string_delete(name);
623
+ }
624
+ }
625
+ }
626
+ }
627
+
628
+ int DFA::GenNamespaceOpen(const wchar_t *nsName) {
629
+ if (nsName == NULL || coco_string_length(nsName) == 0) {
630
+ return 0;
631
+ }
632
+ const int len = coco_string_length(nsName);
633
+ int startPos = 0;
634
+ int nrOfNs = 0;
635
+ do {
636
+ int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR);
637
+ if (curLen == -1) { curLen = len - startPos; }
638
+ wchar_t *curNs = coco_string_create(nsName, startPos, curLen);
639
+ fwprintf(gen, L"namespace %ls {\n", curNs);
640
+ coco_string_delete(curNs);
641
+ startPos = startPos + curLen + 1;
642
+ if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) {
643
+ ++startPos;
644
+ }
645
+ ++nrOfNs;
646
+ } while (startPos < len);
647
+ return nrOfNs;
648
+ }
649
+
650
+ void DFA::GenNamespaceClose(int nrOfNs) {
651
+ for (int i = 0; i < nrOfNs; ++i) {
652
+ fwprintf(gen, L"} // namespace\n");
653
+ }
654
+ }
655
+
656
+ void DFA::CheckLabels() {
657
+ int i;
658
+ State *state;
659
+ Action *action;
660
+
661
+ for (i=0; i < lastStateNr+1; i++) {
662
+ existLabel[i] = false;
663
+ }
664
+
665
+ for (state = firstState->next; state != NULL; state = state->next) {
666
+ for (action = state->firstAction; action != NULL; action = action->next) {
667
+ existLabel[action->target->state->nr] = true;
668
+ }
669
+ }
670
+ }
671
+
672
+ void DFA::WriteState(State *state) {
673
+ Symbol *endOf = state->endOf;
674
+ fwprintf(gen, L"\t\tcase %d:\n", state->nr);
675
+ if (existLabel[state->nr])
676
+ fwprintf(gen, L"\t\t\tcase_%d:\n", state->nr);
677
+
678
+ if (endOf != NULL && state->firstAction != NULL) {
679
+ fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d;\n", endOf->n);
680
+ }
681
+ bool ctxEnd = state->ctx;
682
+
683
+ for (Action *action = state->firstAction; action != NULL; action = action->next) {
684
+ if (action == state->firstAction) fwprintf(gen, L"\t\t\tif (");
685
+ else fwprintf(gen, L"\t\t\telse if (");
686
+ if (action->typ == Node::chr) {
687
+ wchar_t* res = ChCond((wchar_t)action->sym);
688
+ fwprintf(gen, L"%ls", res);
689
+ delete [] res;
690
+ } else PutRange(tab->CharClassSet(action->sym));
691
+ fwprintf(gen, L") {");
692
+
693
+ if (action->tc == Node::contextTrans) {
694
+ fwprintf(gen, L"apx++; "); ctxEnd = false;
695
+ } else if (state->ctx)
696
+ fwprintf(gen, L"apx = 0; ");
697
+ fwprintf(gen, L"AddCh(); goto case_%d;", action->target->state->nr);
698
+ fwprintf(gen, L"}\n");
699
+ }
700
+ if (state->firstAction == NULL)
701
+ fwprintf(gen, L"\t\t\t{");
702
+ else
703
+ fwprintf(gen, L"\t\t\telse {");
704
+ if (ctxEnd) { // final context state: cut appendix
705
+ fwprintf(gen, L"\n");
706
+ fwprintf(gen, L"\t\t\t\ttlen -= apx;\n");
707
+ fwprintf(gen, L"\t\t\t\tSetScannerBehindT();");
708
+
709
+ fwprintf(gen, L"\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n");
710
+ fwprintf(gen, L"\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n");
711
+ fwprintf(gen, L"\t\t\t\t");
712
+ }
713
+ if (endOf == NULL) {
714
+ fwprintf(gen, L"goto case_0;}\n");
715
+ } else {
716
+ fwprintf(gen, L"t->kind = %d; ", endOf->n);
717
+ if (endOf->tokenKind == Symbol::classLitToken) {
718
+ if (ignoreCase) {
719
+ fwprintf(gen, L"wchar_t *literal = coco_string_create_lower(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n");
720
+ } else {
721
+ fwprintf(gen, L"wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n");
722
+ }
723
+ } else {
724
+ fwprintf(gen, L"break;}\n");
725
+ }
726
+ }
727
+ }
728
+
729
+ void DFA::WriteStartTab() {
730
+ bool firstRange = true;
731
+ for (Action *action = firstState->firstAction; action != NULL; action = action->next) {
732
+ int targetState = action->target->state->nr;
733
+ if (action->typ == Node::chr) {
734
+ fwprintf(gen, L"\tstart.set(%d, %d);\n", action->sym, targetState);
735
+ } else {
736
+ CharSet *s = tab->CharClassSet(action->sym);
737
+ for (CharSet::Range *r = s->head; r != NULL; r = r->next) {
738
+ if (firstRange) {
739
+ firstRange = false;
740
+ fwprintf(gen, L"\tint i;\n");
741
+ }
742
+ fwprintf(gen, L"\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n", r->from, r->to, targetState);
743
+ }
744
+ }
745
+ }
746
+ fwprintf(gen, L"\t\tstart.set(Buffer::EoF, -1);\n");
747
+ }
748
+
749
+ void DFA::WriteScanner() {
750
+ Generator g = Generator(tab, errors);
751
+ fram = g.OpenFrame(L"Scanner.frame");
752
+ gen = g.OpenGen(L"Scanner.h");
753
+ if (dirtyDFA) MakeDeterministic();
754
+
755
+ // Header
756
+ g.GenCopyright();
757
+ g.SkipFramePart(L"-->begin");
758
+
759
+ g.CopyFramePart(L"-->prefix");
760
+ g.GenPrefixFromNamespace();
761
+
762
+ g.CopyFramePart(L"-->prefix");
763
+ g.GenPrefixFromNamespace();
764
+
765
+ g.CopyFramePart(L"-->namespace_open");
766
+ int nrOfNs = GenNamespaceOpen(tab->nsName);
767
+
768
+ g.CopyFramePart(L"-->casing0");
769
+ if (ignoreCase) {
770
+ fwprintf(gen, L"\twchar_t valCh; // current input character (for token.val)\n");
771
+ }
772
+ g.CopyFramePart(L"-->commentsheader");
773
+ Comment *com = firstComment;
774
+ int cmdIdx = 0;
775
+ while (com != NULL) {
776
+ GenCommentHeader(com, cmdIdx);
777
+ com = com->next; cmdIdx++;
778
+ }
779
+
780
+ g.CopyFramePart(L"-->namespace_close");
781
+ GenNamespaceClose(nrOfNs);
782
+
783
+ g.CopyFramePart(L"-->implementation");
784
+ fclose(gen);
785
+
786
+ // Source
787
+ gen = g.OpenGen(L"Scanner.cpp");
788
+ g.GenCopyright();
789
+ g.SkipFramePart(L"-->begin");
790
+ g.CopyFramePart(L"-->namespace_open");
791
+ nrOfNs = GenNamespaceOpen(tab->nsName);
792
+
793
+ g.CopyFramePart(L"-->declarations");
794
+ fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count - 1);
795
+ fwprintf(gen, L"\tnoSym = %d;\n", tab->noSym->n);
796
+ WriteStartTab();
797
+ GenLiterals();
798
+
799
+ g.CopyFramePart(L"-->initialization");
800
+ g.CopyFramePart(L"-->casing1");
801
+ if (ignoreCase) {
802
+ fwprintf(gen, L"\t\tvalCh = ch;\n");
803
+ fwprintf(gen, L"\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()");
804
+ }
805
+ g.CopyFramePart(L"-->casing2");
806
+ fwprintf(gen, L"\t\ttval[tlen++] = ");
807
+ if (ignoreCase) fwprintf(gen, L"valCh;"); else fwprintf(gen, L"ch;");
808
+
809
+ g.CopyFramePart(L"-->comments");
810
+ com = firstComment; cmdIdx = 0;
811
+ while (com != NULL) {
812
+ GenComment(com, cmdIdx);
813
+ com = com->next; cmdIdx++;
814
+ }
815
+
816
+ g.CopyFramePart(L"-->scan1");
817
+ fwprintf(gen, L"\t\t\t");
818
+ if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, L"false"); }
819
+
820
+ g.CopyFramePart(L"-->scan2");
821
+ if (firstComment != NULL) {
822
+ fwprintf(gen, L"\tif (");
823
+ com = firstComment; cmdIdx = 0;
824
+ while (com != NULL) {
825
+ wchar_t* res = ChCond(com->start[0]);
826
+ fwprintf(gen, L"(%ls && Comment%d())", res, cmdIdx);
827
+ delete [] res;
828
+ if (com->next != NULL) {
829
+ fwprintf(gen, L" || ");
830
+ }
831
+ com = com->next; cmdIdx++;
832
+ }
833
+ fwprintf(gen, L") return NextToken();");
834
+ }
835
+ if (hasCtxMoves) { fwprintf(gen, L"\n"); fwprintf(gen, L"\tint apx = 0;"); } /* pdt */
836
+ g.CopyFramePart(L"-->scan3");
837
+
838
+ /* CSB 02-10-05 check the Labels */
839
+ existLabel = new bool[lastStateNr+1];
840
+ CheckLabels();
841
+ for (State *state = firstState->next; state != NULL; state = state->next)
842
+ WriteState(state);
843
+ delete [] existLabel;
844
+
845
+ g.CopyFramePart(L"-->namespace_close");
846
+ GenNamespaceClose(nrOfNs);
847
+
848
+ g.CopyFramePart(NULL);
849
+ fclose(gen);
850
+ }
851
+
852
+ DFA::DFA(Parser *parser) {
853
+ this->parser = parser;
854
+ tab = parser->tab;
855
+ errors = parser->errors;
856
+ trace = parser->trace;
857
+ firstState = NULL; lastState = NULL; lastStateNr = -1;
858
+ firstState = NewState();
859
+ firstMelted = NULL; firstComment = NULL;
860
+ ignoreCase = false;
861
+ dirtyDFA = false;
862
+ hasCtxMoves = false;
863
+ }
864
+
865
+ }; // namespace