ruco-cpp 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +17 -0
  3. data/bin/console +14 -0
  4. data/bin/ruco +30 -0
  5. data/bin/setup +7 -0
  6. data/data/ruco/Parser.frame +359 -0
  7. data/data/ruco/Scanner.frame +896 -0
  8. data/data/ruco/picojson/Changes +14 -0
  9. data/data/ruco/picojson/LICENSE +25 -0
  10. data/data/ruco/picojson/Makefile +8 -0
  11. data/data/ruco/picojson/README.mkdn +183 -0
  12. data/data/ruco/picojson/examples/github-issues.cc +110 -0
  13. data/data/ruco/picojson/examples/iostream.cc +70 -0
  14. data/data/ruco/picojson/examples/streaming.cc +76 -0
  15. data/data/ruco/picojson/picojson.h +1299 -0
  16. data/ext/cocor/Action.cpp +81 -0
  17. data/ext/cocor/Action.h +59 -0
  18. data/ext/cocor/ArrayList.cpp +79 -0
  19. data/ext/cocor/ArrayList.h +52 -0
  20. data/ext/cocor/BitArray.cpp +156 -0
  21. data/ext/cocor/BitArray.h +68 -0
  22. data/ext/cocor/CharClass.cpp +42 -0
  23. data/ext/cocor/CharClass.h +48 -0
  24. data/ext/cocor/CharSet.cpp +166 -0
  25. data/ext/cocor/CharSet.h +68 -0
  26. data/ext/cocor/Coco.atg +528 -0
  27. data/ext/cocor/Coco.cpp +173 -0
  28. data/ext/cocor/Comment.cpp +45 -0
  29. data/ext/cocor/Comment.h +51 -0
  30. data/ext/cocor/Copyright.frame +27 -0
  31. data/ext/cocor/DFA.cpp +865 -0
  32. data/ext/cocor/DFA.h +132 -0
  33. data/ext/cocor/Generator.cpp +182 -0
  34. data/ext/cocor/Generator.h +61 -0
  35. data/ext/cocor/Graph.h +59 -0
  36. data/ext/cocor/HashTable.cpp +115 -0
  37. data/ext/cocor/HashTable.h +84 -0
  38. data/ext/cocor/Makefile +11 -0
  39. data/ext/cocor/Melted.cpp +39 -0
  40. data/ext/cocor/Melted.h +51 -0
  41. data/ext/cocor/Node.cpp +69 -0
  42. data/ext/cocor/Node.h +86 -0
  43. data/ext/cocor/Parser.cpp +925 -0
  44. data/ext/cocor/Parser.frame +326 -0
  45. data/ext/cocor/Parser.h +153 -0
  46. data/ext/cocor/ParserGen.cpp +486 -0
  47. data/ext/cocor/ParserGen.h +99 -0
  48. data/ext/cocor/Position.cpp +37 -0
  49. data/ext/cocor/Position.h +46 -0
  50. data/ext/cocor/README.md +12 -0
  51. data/ext/cocor/Scanner.cpp +833 -0
  52. data/ext/cocor/Scanner.frame +897 -0
  53. data/ext/cocor/Scanner.h +291 -0
  54. data/ext/cocor/Sets.h +84 -0
  55. data/ext/cocor/SortedList.cpp +141 -0
  56. data/ext/cocor/SortedList.h +68 -0
  57. data/ext/cocor/State.cpp +77 -0
  58. data/ext/cocor/State.h +55 -0
  59. data/ext/cocor/StringBuilder.cpp +88 -0
  60. data/ext/cocor/StringBuilder.h +29 -0
  61. data/ext/cocor/Symbol.cpp +61 -0
  62. data/ext/cocor/Symbol.h +70 -0
  63. data/ext/cocor/Tab.cpp +1248 -0
  64. data/ext/cocor/Tab.h +245 -0
  65. data/ext/cocor/Target.cpp +41 -0
  66. data/ext/cocor/Target.h +48 -0
  67. data/ext/cocor/build.bat +3 -0
  68. data/ext/cocor/build.sh +4 -0
  69. data/ext/cocor/coc.bat +1 -0
  70. data/ext/cocor/coc.sh +2 -0
  71. data/ext/cocor/cocor_ruby_ext.cpp +124 -0
  72. data/ext/cocor/cygBuild.bat +1 -0
  73. data/ext/cocor/extconf.rb +5 -0
  74. data/ext/cocor/mingwbuild.bat +2 -0
  75. data/ext/cocor/mkmf.log +57 -0
  76. data/ext/cocor/zipsources.bat +1 -0
  77. data/lib/cocor.rb +14 -0
  78. data/lib/ruco/version.rb +3 -0
  79. data/lib/ruco.rb +728 -0
  80. metadata +195 -0
data/ext/cocor/DFA.cpp ADDED
@@ -0,0 +1,865 @@
1
+ /*-------------------------------------------------------------------------
2
+ DFA -- Generation of the Scanner Automaton
3
+ Compiler Generator Coco/R,
4
+ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
5
+ extended by M. Loeberbauer & A. Woess, Univ. of Linz
6
+ ported to C++ by Csaba Balazs, University of Szeged
7
+ with improvements by Pat Terry, Rhodes University
8
+
9
+ This program is free software; you can redistribute it and/or modify it
10
+ under the terms of the GNU General Public License as published by the
11
+ Free Software Foundation; either version 2, or (at your option) any
12
+ later version.
13
+
14
+ This program is distributed in the hope that it will be useful, but
15
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17
+ for more details.
18
+
19
+ You should have received a copy of the GNU General Public License along
20
+ with this program; if not, write to the Free Software Foundation, Inc.,
21
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
+
23
+ As an exception, it is allowed to write an extension of Coco/R that is
24
+ used as a plugin in non-free software.
25
+
26
+ If not otherwise stated, any source code generated by Coco/R (other than
27
+ Coco/R itself) does not fall under the GNU General Public License.
28
+ -------------------------------------------------------------------------*/
29
+
30
+ #include <stdlib.h>
31
+ #include <wchar.h>
32
+ #include "DFA.h"
33
+ #include "Tab.h"
34
+ #include "Parser.h"
35
+ #include "BitArray.h"
36
+ #include "Scanner.h"
37
+ #include "Generator.h"
38
+
39
+ namespace Coco {
40
+
41
+ //---------- Output primitives
42
+ wchar_t* DFA::Ch(wchar_t ch) {
43
+ wchar_t* format = new wchar_t[10];
44
+ if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\')
45
+ coco_swprintf(format, 10, L"%d\0", (int) ch);
46
+ else
47
+ coco_swprintf(format, 10, L"L'%lc'\0", (int) ch);
48
+ return format;
49
+ }
50
+
51
+ wchar_t* DFA::ChCond(wchar_t ch) {
52
+ wchar_t* format = new wchar_t[20];
53
+ wchar_t* res = Ch(ch);
54
+ coco_swprintf(format, 20, L"ch == %ls\0", res);
55
+ delete [] res;
56
+ return format;
57
+ }
58
+
59
+ void DFA::PutRange(CharSet *s) {
60
+ for (CharSet::Range *r = s->head; r != NULL; r = r->next) {
61
+ if (r->from == r->to) {
62
+ wchar_t *from = Ch((wchar_t) r->from);
63
+ fwprintf(gen, L"ch == %ls", from);
64
+ delete [] from;
65
+ } else if (r->from == 0) {
66
+ wchar_t *to = Ch((wchar_t) r->to);
67
+ fwprintf(gen, L"ch <= %ls", to);
68
+ delete [] to;
69
+ } else {
70
+ wchar_t *from = Ch((wchar_t) r->from);
71
+ wchar_t *to = Ch((wchar_t) r->to);
72
+ fwprintf(gen, L"(ch >= %ls && ch <= %ls)", from, to);
73
+ delete [] from; delete [] to;
74
+ }
75
+ if (r->next != NULL) fwprintf(gen, L" || ");
76
+ }
77
+ }
78
+
79
+
80
+ //---------- State handling
81
+
82
+ State* DFA::NewState() {
83
+ State *s = new State(); s->nr = ++lastStateNr;
84
+ if (firstState == NULL) firstState = s; else lastState->next = s;
85
+ lastState = s;
86
+ return s;
87
+ }
88
+
89
+ void DFA::NewTransition(State *from, State *to, int typ, int sym, int tc) {
90
+ Target *t = new Target(to);
91
+ Action *a = new Action(typ, sym, tc); a->target = t;
92
+ from->AddAction(a);
93
+ if (typ == Node::clas) curSy->tokenKind = Symbol::classToken;
94
+ }
95
+
96
+ void DFA::CombineShifts() {
97
+ State *state;
98
+ Action *a, *b, *c;
99
+ CharSet *seta, *setb;
100
+ for (state = firstState; state != NULL; state = state->next) {
101
+ for (a = state->firstAction; a != NULL; a = a->next) {
102
+ b = a->next;
103
+ while (b != NULL)
104
+ if (a->target->state == b->target->state && a->tc == b->tc) {
105
+ seta = a->Symbols(tab); setb = b->Symbols(tab);
106
+ seta->Or(setb);
107
+ a->ShiftWith(seta, tab);
108
+ c = b; b = b->next; state->DetachAction(c);
109
+ } else b = b->next;
110
+ }
111
+ }
112
+ }
113
+
114
+ void DFA::FindUsedStates(State *state, BitArray *used) {
115
+ if ((*used)[state->nr]) return;
116
+ used->Set(state->nr, true);
117
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
118
+ FindUsedStates(a->target->state, used);
119
+ }
120
+
121
+ void DFA::DeleteRedundantStates() {
122
+ //State *newState = new State[State::lastNr + 1];
123
+ State **newState = (State**) malloc (sizeof(State*) * (lastStateNr + 1));
124
+ BitArray *used = new BitArray(lastStateNr + 1);
125
+ FindUsedStates(firstState, used);
126
+ // combine equal final states
127
+ for (State *s1 = firstState->next; s1 != NULL; s1 = s1->next) // firstState cannot be final
128
+ if ((*used)[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx))
129
+ for (State *s2 = s1->next; s2 != NULL; s2 = s2->next)
130
+ if ((*used)[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) {
131
+ used->Set(s2->nr, false); newState[s2->nr] = s1;
132
+ }
133
+
134
+ State *state;
135
+ for (state = firstState; state != NULL; state = state->next)
136
+ if ((*used)[state->nr])
137
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
138
+ if (!((*used)[a->target->state->nr]))
139
+ a->target->state = newState[a->target->state->nr];
140
+ // delete unused states
141
+ lastState = firstState; lastStateNr = 0; // firstState has number 0
142
+ for (state = firstState->next; state != NULL; state = state->next)
143
+ if ((*used)[state->nr]) {state->nr = ++lastStateNr; lastState = state;}
144
+ else lastState->next = state->next;
145
+ free (newState);
146
+ delete used;
147
+ }
148
+
149
+ State* DFA::TheState(Node *p) {
150
+ State *state;
151
+ if (p == NULL) {state = NewState(); state->endOf = curSy; return state;}
152
+ else return p->state;
153
+ }
154
+
155
+ void DFA::Step(State *from, Node *p, BitArray *stepped) {
156
+ if (p == NULL) return;
157
+ stepped->Set(p->n, true);
158
+
159
+ if (p->typ == Node::clas || p->typ == Node::chr) {
160
+ NewTransition(from, TheState(p->next), p->typ, p->val, p->code);
161
+ } else if (p->typ == Node::alt) {
162
+ Step(from, p->sub, stepped); Step(from, p->down, stepped);
163
+ } else if (p->typ == Node::iter) {
164
+ if (tab->DelSubGraph(p->sub)) {
165
+ parser->SemErr(L"contents of {...} must not be deletable");
166
+ return;
167
+ }
168
+ if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped);
169
+ Step(from, p->sub, stepped);
170
+ if (p->state != from) {
171
+ BitArray *newStepped = new BitArray(tab->nodes->Count);
172
+ Step(p->state, p, newStepped);
173
+ delete newStepped;
174
+ }
175
+ } else if (p->typ == Node::opt) {
176
+ if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped);
177
+ Step(from, p->sub, stepped);
178
+ }
179
+ }
180
+
181
+ // Assigns a state n.state to every node n. There will be a transition from
182
+ // n.state to n.next.state triggered by n.val. All nodes in an alternative
183
+ // chain are represented by the same state.
184
+ // Numbering scheme:
185
+ // - any node after a chr, clas, opt, or alt, must get a new number
186
+ // - if a nested structure starts with an iteration the iter node must get a new number
187
+ // - if an iteration follows an iteration, it must get a new number
188
+ void DFA::NumberNodes(Node *p, State *state, bool renumIter) {
189
+ if (p == NULL) return;
190
+ if (p->state != NULL) return; // already visited;
191
+ if ((state == NULL) || ((p->typ == Node::iter) && renumIter)) state = NewState();
192
+ p->state = state;
193
+ if (tab->DelGraph(p)) state->endOf = curSy;
194
+
195
+ if (p->typ == Node::clas || p->typ == Node::chr) {
196
+ NumberNodes(p->next, NULL, false);
197
+ } else if (p->typ == Node::opt) {
198
+ NumberNodes(p->next, NULL, false);
199
+ NumberNodes(p->sub, state, true);
200
+ } else if (p->typ == Node::iter) {
201
+ NumberNodes(p->next, state, true);
202
+ NumberNodes(p->sub, state, true);
203
+ } else if (p->typ == Node::alt) {
204
+ NumberNodes(p->next, NULL, false);
205
+ NumberNodes(p->sub, state, true);
206
+ NumberNodes(p->down, state, renumIter);
207
+ }
208
+ }
209
+
210
+ void DFA::FindTrans (Node *p, bool start, BitArray *marked) {
211
+ if (p == NULL || (*marked)[p->n]) return;
212
+ marked->Set(p->n, true);
213
+ if (start) {
214
+ BitArray *stepped = new BitArray(tab->nodes->Count);
215
+ Step(p->state, p, stepped); // start of group of equally numbered nodes
216
+ delete stepped;
217
+ }
218
+
219
+ if (p->typ == Node::clas || p->typ == Node::chr) {
220
+ FindTrans(p->next, true, marked);
221
+ } else if (p->typ == Node::opt) {
222
+ FindTrans(p->next, true, marked); FindTrans(p->sub, false, marked);
223
+ } else if (p->typ == Node::iter) {
224
+ FindTrans(p->next, false, marked); FindTrans(p->sub, false, marked);
225
+ } else if (p->typ == Node::alt) {
226
+ FindTrans(p->sub, false, marked); FindTrans(p->down, false, marked);
227
+ }
228
+ }
229
+
230
+ void DFA::ConvertToStates(Node *p, Symbol *sym) {
231
+ curGraph = p; curSy = sym;
232
+ if (tab->DelGraph(curGraph)) {
233
+ parser->SemErr(L"token might be empty");
234
+ return;
235
+ }
236
+ NumberNodes(curGraph, firstState, true);
237
+ FindTrans(curGraph, true, new BitArray(tab->nodes->Count));
238
+ if (p->typ == Node::iter) {
239
+ BitArray *stepped = new BitArray(tab->nodes->Count);
240
+ Step(firstState, p, stepped);
241
+ delete stepped;
242
+ }
243
+ }
244
+
245
+ // match string against current automaton; store it either as a fixedToken or as a litToken
246
+ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) {
247
+ wchar_t *subS = coco_string_create(s, 1, coco_string_length(s)-2);
248
+ s = tab->Unescape(subS);
249
+ coco_string_delete(subS);
250
+ int i, len = coco_string_length(s);
251
+ State *state = firstState;
252
+ Action *a = NULL;
253
+ for (i = 0; i < len; i++) { // try to match s against existing DFA
254
+ a = FindAction(state, s[i]);
255
+ if (a == NULL) break;
256
+ state = a->target->state;
257
+ }
258
+ // if s was not totally consumed or leads to a non-final state => make new DFA from it
259
+ if (i != len || state->endOf == NULL) {
260
+ state = firstState; i = 0; a = NULL;
261
+ dirtyDFA = true;
262
+ }
263
+ for (; i < len; i++) { // make new DFA for s[i..len-1]
264
+ State *to = NewState();
265
+ NewTransition(state, to, Node::chr, s[i], Node::normalTrans);
266
+ state = to;
267
+ }
268
+ coco_string_delete(s);
269
+ Symbol *matchedSym = state->endOf;
270
+ if (state->endOf == NULL) {
271
+ state->endOf = sym;
272
+ } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) {
273
+ // s matched a token with a fixed definition or a token with an appendix that will be cut off
274
+ wchar_t format[200];
275
+ coco_swprintf(format, 200, L"tokens %ls and %ls cannot be distinguished", sym->name, matchedSym->name);
276
+ parser->SemErr(format);
277
+ } else { // matchedSym == classToken || classLitToken
278
+ matchedSym->tokenKind = Symbol::classLitToken;
279
+ sym->tokenKind = Symbol::litToken;
280
+ }
281
+ }
282
+
283
+ void DFA::SplitActions(State *state, Action *a, Action *b) {
284
+ Action *c; CharSet *seta, *setb, *setc;
285
+ seta = a->Symbols(tab); setb = b->Symbols(tab);
286
+ if (seta->Equals(setb)) {
287
+ a->AddTargets(b);
288
+ state->DetachAction(b);
289
+ } else if (seta->Includes(setb)) {
290
+ setc = seta->Clone(); setc->Subtract(setb);
291
+ b->AddTargets(a);
292
+ a->ShiftWith(setc, tab);
293
+ } else if (setb->Includes(seta)) {
294
+ setc = setb->Clone(); setc->Subtract(seta);
295
+ a->AddTargets(b);
296
+ b->ShiftWith(setc, tab);
297
+ } else {
298
+ setc = seta->Clone(); setc->And(setb);
299
+ seta->Subtract(setc);
300
+ setb->Subtract(setc);
301
+ a->ShiftWith(seta, tab);
302
+ b->ShiftWith(setb, tab);
303
+ c = new Action(0, 0, Node::normalTrans); // typ and sym are set in ShiftWith
304
+ c->AddTargets(a);
305
+ c->AddTargets(b);
306
+ c->ShiftWith(setc, tab);
307
+ state->AddAction(c);
308
+ }
309
+ }
310
+
311
+ bool DFA::Overlap(Action *a, Action *b) {
312
+ CharSet *seta, *setb;
313
+ if (a->typ == Node::chr)
314
+ if (b->typ == Node::chr) return (a->sym == b->sym);
315
+ else {setb = tab->CharClassSet(b->sym); return setb->Get(a->sym);}
316
+ else {
317
+ seta = tab->CharClassSet(a->sym);
318
+ if (b->typ == Node::chr) return seta->Get(b->sym);
319
+ else {setb = tab->CharClassSet(b->sym); return seta->Intersects(setb);}
320
+ }
321
+ }
322
+
323
+ bool DFA::MakeUnique(State *state) { // return true if actions were split
324
+ bool changed = false;
325
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
326
+ for (Action *b = a->next; b != NULL; b = b->next)
327
+ if (Overlap(a, b)) {
328
+ SplitActions(state, a, b);
329
+ changed = true;
330
+ }
331
+ return changed;
332
+ }
333
+
334
+ void DFA::MeltStates(State *state) {
335
+ bool changed, ctx;
336
+ BitArray *targets;
337
+ Symbol *endOf;
338
+ for (Action *action = state->firstAction; action != NULL; action = action->next) {
339
+ if (action->target->next != NULL) {
340
+ GetTargetStates(action, targets, endOf, ctx);
341
+ Melted *melt = StateWithSet(targets);
342
+ if (melt == NULL) {
343
+ State *s = NewState(); s->endOf = endOf; s->ctx = ctx;
344
+ for (Target *targ = action->target; targ != NULL; targ = targ->next)
345
+ s->MeltWith(targ->state);
346
+ do {changed = MakeUnique(s);} while (changed);
347
+ melt = NewMelted(targets, s);
348
+ }
349
+ action->target->next = NULL;
350
+ action->target->state = melt->state;
351
+ }
352
+ }
353
+ }
354
+
355
+ void DFA::FindCtxStates() {
356
+ for (State *state = firstState; state != NULL; state = state->next)
357
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
358
+ if (a->tc == Node::contextTrans) a->target->state->ctx = true;
359
+ }
360
+
361
+ void DFA::MakeDeterministic() {
362
+ State *state;
363
+ bool changed;
364
+ lastSimState = lastState->nr;
365
+ maxStates = 2 * lastSimState; // heuristic for set size in Melted.set
366
+ FindCtxStates();
367
+ for (state = firstState; state != NULL; state = state->next)
368
+ do {changed = MakeUnique(state);} while (changed);
369
+ for (state = firstState; state != NULL; state = state->next)
370
+ MeltStates(state);
371
+ DeleteRedundantStates();
372
+ CombineShifts();
373
+ }
374
+
375
+ void DFA::PrintStates() {
376
+ fwprintf(trace, L"\n");
377
+ fwprintf(trace, L"---------- states ----------\n");
378
+ for (State *state = firstState; state != NULL; state = state->next) {
379
+ bool first = true;
380
+ if (state->endOf == NULL) fwprintf(trace, L" ");
381
+ else {
382
+ wchar_t *paddedName = tab->Name(state->endOf->name);
383
+ fwprintf(trace, L"E(%12s)", paddedName);
384
+ coco_string_delete(paddedName);
385
+ }
386
+ fwprintf(trace, L"%3d:", state->nr);
387
+ if (state->firstAction == NULL) fwprintf(trace, L"\n");
388
+ for (Action *action = state->firstAction; action != NULL; action = action->next) {
389
+ if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" ");
390
+
391
+ if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)(*tab->classes)[action->sym])->name);
392
+ else fwprintf(trace, L"%3s", Ch((wchar_t)action->sym));
393
+ for (Target *targ = action->target; targ != NULL; targ = targ->next) {
394
+ fwprintf(trace, L"%3d", targ->state->nr);
395
+ }
396
+ if (action->tc == Node::contextTrans) fwprintf(trace, L" context\n"); else fwprintf(trace, L"\n");
397
+ }
398
+ }
399
+ fwprintf(trace, L"\n---------- character classes ----------\n");
400
+ tab->WriteCharClasses();
401
+ }
402
+
403
+ //---------------------------- actions --------------------------------
404
+
405
+ Action* DFA::FindAction(State *state, wchar_t ch) {
406
+ for (Action *a = state->firstAction; a != NULL; a = a->next)
407
+ if (a->typ == Node::chr && ch == a->sym) return a;
408
+ else if (a->typ == Node::clas) {
409
+ CharSet *s = tab->CharClassSet(a->sym);
410
+ if (s->Get(ch)) return a;
411
+ }
412
+ return NULL;
413
+ }
414
+
415
+
416
+ void DFA::GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) {
417
+ // compute the set of target states
418
+ targets = new BitArray(maxStates); endOf = NULL;
419
+ ctx = false;
420
+ for (Target *t = a->target; t != NULL; t = t->next) {
421
+ int stateNr = t->state->nr;
422
+ if (stateNr <= lastSimState) { targets->Set(stateNr, true); }
423
+ else { targets->Or(MeltedSet(stateNr)); }
424
+ if (t->state->endOf != NULL) {
425
+ if (endOf == NULL || endOf == t->state->endOf) {
426
+ endOf = t->state->endOf;
427
+ }
428
+ else {
429
+ wprintf(L"Tokens %ls and %ls cannot be distinguished\n", endOf->name, t->state->endOf->name);
430
+ errors->count++;
431
+ }
432
+ }
433
+ if (t->state->ctx) {
434
+ ctx = true;
435
+ // The following check seems to be unnecessary. It reported an error
436
+ // if a symbol + context was the prefix of another symbol, e.g.
437
+ // s1 = "a" "b" "c".
438
+ // s2 = "a" CONTEXT("b").
439
+ // But this is ok.
440
+ // if (t.state.endOf != null) {
441
+ // Console.WriteLine("Ambiguous context clause");
442
+ // Errors.count++;
443
+ // }
444
+ }
445
+ }
446
+ }
447
+
448
+
449
+ //------------------------- melted states ------------------------------
450
+
451
+
452
+ Melted* DFA::NewMelted(BitArray *set, State *state) {
453
+ Melted *m = new Melted(set, state);
454
+ m->next = firstMelted; firstMelted = m;
455
+ return m;
456
+
457
+ }
458
+
459
+ BitArray* DFA::MeltedSet(int nr) {
460
+ Melted *m = firstMelted;
461
+ while (m != NULL) {
462
+ if (m->state->nr == nr) return m->set; else m = m->next;
463
+ }
464
+ //Errors::Exception("-- compiler error in Melted::Set");
465
+ //throw new Exception("-- compiler error in Melted::Set");
466
+ return NULL;
467
+ }
468
+
469
+ Melted* DFA::StateWithSet(BitArray *s) {
470
+ for (Melted *m = firstMelted; m != NULL; m = m->next)
471
+ if (Sets::Equals(s, m->set)) return m;
472
+ return NULL;
473
+ }
474
+
475
+
476
+ //------------------------ comments --------------------------------
477
+
478
+ wchar_t* DFA::CommentStr(Node *p) {
479
+ StringBuilder s = StringBuilder();
480
+ while (p != NULL) {
481
+ if (p->typ == Node::chr) {
482
+ s.Append((wchar_t)p->val);
483
+ } else if (p->typ == Node::clas) {
484
+ CharSet *set = tab->CharClassSet(p->val);
485
+ if (set->Elements() != 1) parser->SemErr(L"character set contains more than 1 character");
486
+ s.Append((wchar_t) set->First());
487
+ }
488
+ else parser->SemErr(L"comment delimiters may not be structured");
489
+ p = p->next;
490
+ }
491
+ if (s.GetLength() == 0 || s.GetLength() > 2) {
492
+ parser->SemErr(L"comment delimiters must be 1 or 2 characters long");
493
+ s = StringBuilder(L"?");
494
+ }
495
+ return s.ToString();
496
+ }
497
+
498
+
499
+ void DFA::NewComment(Node *from, Node *to, bool nested) {
500
+ Comment *c = new Comment(CommentStr(from), CommentStr(to), nested);
501
+ c->next = firstComment; firstComment = c;
502
+ }
503
+
504
+
505
+ //------------------------ scanner generation ----------------------
506
+
507
+ void DFA::GenComBody(Comment *com) {
508
+ fwprintf(gen, L"\t\tfor(;;) {\n");
509
+
510
+ wchar_t* res = ChCond(com->stop[0]);
511
+ fwprintf(gen, L"\t\t\tif (%ls) ", res);
512
+ fwprintf(gen, L"{\n");
513
+ delete [] res;
514
+
515
+ if (coco_string_length(com->stop) == 1) {
516
+ fwprintf(gen, L"\t\t\t\tlevel--;\n");
517
+ fwprintf(gen, L"\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n");
518
+ fwprintf(gen, L"\t\t\t\tNextCh();\n");
519
+ } else {
520
+ fwprintf(gen, L"\t\t\t\tNextCh();\n");
521
+ wchar_t* res = ChCond(com->stop[1]);
522
+ fwprintf(gen, L"\t\t\t\tif (%ls) {\n", res);
523
+ delete [] res;
524
+ fwprintf(gen, L"\t\t\t\t\tlevel--;\n");
525
+ fwprintf(gen, L"\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n");
526
+ fwprintf(gen, L"\t\t\t\t\tNextCh();\n");
527
+ fwprintf(gen, L"\t\t\t\t}\n");
528
+ }
529
+ if (com->nested) {
530
+ fwprintf(gen, L"\t\t\t}");
531
+ wchar_t* res = ChCond(com->start[0]);
532
+ fwprintf(gen, L" else if (%ls) ", res);
533
+ delete [] res;
534
+ fwprintf(gen, L"{\n");
535
+ if (coco_string_length(com->stop) == 1)
536
+ fwprintf(gen, L"\t\t\t\tlevel++; NextCh();\n");
537
+ else {
538
+ fwprintf(gen, L"\t\t\t\tNextCh();\n");
539
+ wchar_t* res = ChCond(com->start[1]);
540
+ fwprintf(gen, L"\t\t\t\tif (%ls) ", res);
541
+ delete [] res;
542
+ fwprintf(gen, L"{\n");
543
+ fwprintf(gen, L"\t\t\t\t\tlevel++; NextCh();\n");
544
+ fwprintf(gen, L"\t\t\t\t}\n");
545
+ }
546
+ }
547
+ fwprintf(gen, L"\t\t\t} else if (ch == buffer->EoF) return false;\n");
548
+ fwprintf(gen, L"\t\t\telse NextCh();\n");
549
+ fwprintf(gen, L"\t\t}\n");
550
+ }
551
+
552
+ void DFA::GenCommentHeader(Comment *com, int i) {
553
+ fwprintf(gen, L"\tbool Comment%d();\n", i);
554
+ }
555
+
556
+ void DFA::GenComment(Comment *com, int i) {
557
+ fwprintf(gen, L"\n");
558
+ fwprintf(gen, L"bool Scanner::Comment%d() ", i);
559
+ fwprintf(gen, L"{\n");
560
+ fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n");
561
+ if (coco_string_length(com->start) == 1) {
562
+ fwprintf(gen, L"\tNextCh();\n");
563
+ GenComBody(com);
564
+ } else {
565
+ fwprintf(gen, L"\tNextCh();\n");
566
+ wchar_t* res = ChCond(com->start[1]);
567
+ fwprintf(gen, L"\tif (%ls) ", res);
568
+ delete [] res;
569
+ fwprintf(gen, L"{\n");
570
+
571
+ fwprintf(gen, L"\t\tNextCh();\n");
572
+ GenComBody(com);
573
+
574
+ fwprintf(gen, L"\t} else {\n");
575
+ fwprintf(gen, L"\t\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n");
576
+ fwprintf(gen, L"\t}\n");
577
+ fwprintf(gen, L"\treturn false;\n");
578
+ }
579
+ fwprintf(gen, L"}\n");
580
+ }
581
+
582
+ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literals
583
+ if (('a'<=sym->name[0] && sym->name[0]<='z') ||
584
+ ('A'<=sym->name[0] && sym->name[0]<='Z')) { //Char::IsLetter(sym->name[0])
585
+
586
+ Iterator *iter = tab->literals->GetIterator();
587
+ while (iter->HasNext()) {
588
+ DictionaryEntry *e = iter->Next();
589
+ if (e->val == sym) { return e->key; }
590
+ }
591
+ }
592
+ return sym->name;
593
+ }
594
+
595
+ void DFA::GenLiterals () {
596
+ Symbol *sym;
597
+
598
+ ArrayList *ts[2];
599
+ ts[0] = tab->terminals;
600
+ ts[1] = tab->pragmas;
601
+
602
+ for (int i = 0; i < 2; ++i) {
603
+ for (int j = 0; j < ts[i]->Count; j++) {
604
+ sym = (Symbol*) ((*(ts[i]))[j]);
605
+ if (sym->tokenKind == Symbol::litToken) {
606
+ wchar_t* name = coco_string_create(SymName(sym));
607
+ if (ignoreCase) {
608
+ wchar_t *oldName = name;
609
+ name = coco_string_create_lower(name);
610
+ coco_string_delete(oldName);
611
+ }
612
+ // sym.name stores literals with quotes, e.g. "\"Literal\""
613
+
614
+ fwprintf(gen, L"\tkeywords.set(L");
615
+ // write keyword, escape non printable characters
616
+ for (int k = 0; name[k] != L'\0'; k++) {
617
+ wchar_t c = name[k];
618
+ fwprintf(gen, (c >= 32 && c <= 127) ? L"%lc" : L"\\x%04x", c);
619
+ }
620
+ fwprintf(gen, L", %d);\n", sym->n);
621
+
622
+ coco_string_delete(name);
623
+ }
624
+ }
625
+ }
626
+ }
627
+
628
+ int DFA::GenNamespaceOpen(const wchar_t *nsName) {
629
+ if (nsName == NULL || coco_string_length(nsName) == 0) {
630
+ return 0;
631
+ }
632
+ const int len = coco_string_length(nsName);
633
+ int startPos = 0;
634
+ int nrOfNs = 0;
635
+ do {
636
+ int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR);
637
+ if (curLen == -1) { curLen = len - startPos; }
638
+ wchar_t *curNs = coco_string_create(nsName, startPos, curLen);
639
+ fwprintf(gen, L"namespace %ls {\n", curNs);
640
+ coco_string_delete(curNs);
641
+ startPos = startPos + curLen + 1;
642
+ if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) {
643
+ ++startPos;
644
+ }
645
+ ++nrOfNs;
646
+ } while (startPos < len);
647
+ return nrOfNs;
648
+ }
649
+
650
+ void DFA::GenNamespaceClose(int nrOfNs) {
651
+ for (int i = 0; i < nrOfNs; ++i) {
652
+ fwprintf(gen, L"} // namespace\n");
653
+ }
654
+ }
655
+
656
+ void DFA::CheckLabels() {
657
+ int i;
658
+ State *state;
659
+ Action *action;
660
+
661
+ for (i=0; i < lastStateNr+1; i++) {
662
+ existLabel[i] = false;
663
+ }
664
+
665
+ for (state = firstState->next; state != NULL; state = state->next) {
666
+ for (action = state->firstAction; action != NULL; action = action->next) {
667
+ existLabel[action->target->state->nr] = true;
668
+ }
669
+ }
670
+ }
671
+
672
+ void DFA::WriteState(State *state) {
673
+ Symbol *endOf = state->endOf;
674
+ fwprintf(gen, L"\t\tcase %d:\n", state->nr);
675
+ if (existLabel[state->nr])
676
+ fwprintf(gen, L"\t\t\tcase_%d:\n", state->nr);
677
+
678
+ if (endOf != NULL && state->firstAction != NULL) {
679
+ fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d;\n", endOf->n);
680
+ }
681
+ bool ctxEnd = state->ctx;
682
+
683
+ for (Action *action = state->firstAction; action != NULL; action = action->next) {
684
+ if (action == state->firstAction) fwprintf(gen, L"\t\t\tif (");
685
+ else fwprintf(gen, L"\t\t\telse if (");
686
+ if (action->typ == Node::chr) {
687
+ wchar_t* res = ChCond((wchar_t)action->sym);
688
+ fwprintf(gen, L"%ls", res);
689
+ delete [] res;
690
+ } else PutRange(tab->CharClassSet(action->sym));
691
+ fwprintf(gen, L") {");
692
+
693
+ if (action->tc == Node::contextTrans) {
694
+ fwprintf(gen, L"apx++; "); ctxEnd = false;
695
+ } else if (state->ctx)
696
+ fwprintf(gen, L"apx = 0; ");
697
+ fwprintf(gen, L"AddCh(); goto case_%d;", action->target->state->nr);
698
+ fwprintf(gen, L"}\n");
699
+ }
700
+ if (state->firstAction == NULL)
701
+ fwprintf(gen, L"\t\t\t{");
702
+ else
703
+ fwprintf(gen, L"\t\t\telse {");
704
+ if (ctxEnd) { // final context state: cut appendix
705
+ fwprintf(gen, L"\n");
706
+ fwprintf(gen, L"\t\t\t\ttlen -= apx;\n");
707
+ fwprintf(gen, L"\t\t\t\tSetScannerBehindT();");
708
+
709
+ fwprintf(gen, L"\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n");
710
+ fwprintf(gen, L"\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n");
711
+ fwprintf(gen, L"\t\t\t\t");
712
+ }
713
+ if (endOf == NULL) {
714
+ fwprintf(gen, L"goto case_0;}\n");
715
+ } else {
716
+ fwprintf(gen, L"t->kind = %d; ", endOf->n);
717
+ if (endOf->tokenKind == Symbol::classLitToken) {
718
+ if (ignoreCase) {
719
+ fwprintf(gen, L"wchar_t *literal = coco_string_create_lower(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n");
720
+ } else {
721
+ fwprintf(gen, L"wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n");
722
+ }
723
+ } else {
724
+ fwprintf(gen, L"break;}\n");
725
+ }
726
+ }
727
+ }
728
+
729
+ void DFA::WriteStartTab() {
730
+ bool firstRange = true;
731
+ for (Action *action = firstState->firstAction; action != NULL; action = action->next) {
732
+ int targetState = action->target->state->nr;
733
+ if (action->typ == Node::chr) {
734
+ fwprintf(gen, L"\tstart.set(%d, %d);\n", action->sym, targetState);
735
+ } else {
736
+ CharSet *s = tab->CharClassSet(action->sym);
737
+ for (CharSet::Range *r = s->head; r != NULL; r = r->next) {
738
+ if (firstRange) {
739
+ firstRange = false;
740
+ fwprintf(gen, L"\tint i;\n");
741
+ }
742
+ fwprintf(gen, L"\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n", r->from, r->to, targetState);
743
+ }
744
+ }
745
+ }
746
+ fwprintf(gen, L"\t\tstart.set(Buffer::EoF, -1);\n");
747
+ }
748
+
749
+ void DFA::WriteScanner() {
750
+ Generator g = Generator(tab, errors);
751
+ fram = g.OpenFrame(L"Scanner.frame");
752
+ gen = g.OpenGen(L"Scanner.h");
753
+ if (dirtyDFA) MakeDeterministic();
754
+
755
+ // Header
756
+ g.GenCopyright();
757
+ g.SkipFramePart(L"-->begin");
758
+
759
+ g.CopyFramePart(L"-->prefix");
760
+ g.GenPrefixFromNamespace();
761
+
762
+ g.CopyFramePart(L"-->prefix");
763
+ g.GenPrefixFromNamespace();
764
+
765
+ g.CopyFramePart(L"-->namespace_open");
766
+ int nrOfNs = GenNamespaceOpen(tab->nsName);
767
+
768
+ g.CopyFramePart(L"-->casing0");
769
+ if (ignoreCase) {
770
+ fwprintf(gen, L"\twchar_t valCh; // current input character (for token.val)\n");
771
+ }
772
+ g.CopyFramePart(L"-->commentsheader");
773
+ Comment *com = firstComment;
774
+ int cmdIdx = 0;
775
+ while (com != NULL) {
776
+ GenCommentHeader(com, cmdIdx);
777
+ com = com->next; cmdIdx++;
778
+ }
779
+
780
+ g.CopyFramePart(L"-->namespace_close");
781
+ GenNamespaceClose(nrOfNs);
782
+
783
+ g.CopyFramePart(L"-->implementation");
784
+ fclose(gen);
785
+
786
+ // Source
787
+ gen = g.OpenGen(L"Scanner.cpp");
788
+ g.GenCopyright();
789
+ g.SkipFramePart(L"-->begin");
790
+ g.CopyFramePart(L"-->namespace_open");
791
+ nrOfNs = GenNamespaceOpen(tab->nsName);
792
+
793
+ g.CopyFramePart(L"-->declarations");
794
+ fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count - 1);
795
+ fwprintf(gen, L"\tnoSym = %d;\n", tab->noSym->n);
796
+ WriteStartTab();
797
+ GenLiterals();
798
+
799
+ g.CopyFramePart(L"-->initialization");
800
+ g.CopyFramePart(L"-->casing1");
801
+ if (ignoreCase) {
802
+ fwprintf(gen, L"\t\tvalCh = ch;\n");
803
+ fwprintf(gen, L"\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()");
804
+ }
805
+ g.CopyFramePart(L"-->casing2");
806
+ fwprintf(gen, L"\t\ttval[tlen++] = ");
807
+ if (ignoreCase) fwprintf(gen, L"valCh;"); else fwprintf(gen, L"ch;");
808
+
809
+ g.CopyFramePart(L"-->comments");
810
+ com = firstComment; cmdIdx = 0;
811
+ while (com != NULL) {
812
+ GenComment(com, cmdIdx);
813
+ com = com->next; cmdIdx++;
814
+ }
815
+
816
+ g.CopyFramePart(L"-->scan1");
817
+ fwprintf(gen, L"\t\t\t");
818
+ if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, L"false"); }
819
+
820
+ g.CopyFramePart(L"-->scan2");
821
+ if (firstComment != NULL) {
822
+ fwprintf(gen, L"\tif (");
823
+ com = firstComment; cmdIdx = 0;
824
+ while (com != NULL) {
825
+ wchar_t* res = ChCond(com->start[0]);
826
+ fwprintf(gen, L"(%ls && Comment%d())", res, cmdIdx);
827
+ delete [] res;
828
+ if (com->next != NULL) {
829
+ fwprintf(gen, L" || ");
830
+ }
831
+ com = com->next; cmdIdx++;
832
+ }
833
+ fwprintf(gen, L") return NextToken();");
834
+ }
835
+ if (hasCtxMoves) { fwprintf(gen, L"\n"); fwprintf(gen, L"\tint apx = 0;"); } /* pdt */
836
+ g.CopyFramePart(L"-->scan3");
837
+
838
+ /* CSB 02-10-05 check the Labels */
839
+ existLabel = new bool[lastStateNr+1];
840
+ CheckLabels();
841
+ for (State *state = firstState->next; state != NULL; state = state->next)
842
+ WriteState(state);
843
+ delete [] existLabel;
844
+
845
+ g.CopyFramePart(L"-->namespace_close");
846
+ GenNamespaceClose(nrOfNs);
847
+
848
+ g.CopyFramePart(NULL);
849
+ fclose(gen);
850
+ }
851
+
852
+ DFA::DFA(Parser *parser) {
853
+ this->parser = parser;
854
+ tab = parser->tab;
855
+ errors = parser->errors;
856
+ trace = parser->trace;
857
+ firstState = NULL; lastState = NULL; lastStateNr = -1;
858
+ firstState = NewState();
859
+ firstMelted = NULL; firstComment = NULL;
860
+ ignoreCase = false;
861
+ dirtyDFA = false;
862
+ hasCtxMoves = false;
863
+ }
864
+
865
+ }; // namespace