ruco-cpp 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +17 -0
  3. data/bin/console +14 -0
  4. data/bin/ruco +30 -0
  5. data/bin/setup +7 -0
  6. data/data/ruco/Parser.frame +359 -0
  7. data/data/ruco/Scanner.frame +896 -0
  8. data/data/ruco/picojson/Changes +14 -0
  9. data/data/ruco/picojson/LICENSE +25 -0
  10. data/data/ruco/picojson/Makefile +8 -0
  11. data/data/ruco/picojson/README.mkdn +183 -0
  12. data/data/ruco/picojson/examples/github-issues.cc +110 -0
  13. data/data/ruco/picojson/examples/iostream.cc +70 -0
  14. data/data/ruco/picojson/examples/streaming.cc +76 -0
  15. data/data/ruco/picojson/picojson.h +1299 -0
  16. data/ext/cocor/Action.cpp +81 -0
  17. data/ext/cocor/Action.h +59 -0
  18. data/ext/cocor/ArrayList.cpp +79 -0
  19. data/ext/cocor/ArrayList.h +52 -0
  20. data/ext/cocor/BitArray.cpp +156 -0
  21. data/ext/cocor/BitArray.h +68 -0
  22. data/ext/cocor/CharClass.cpp +42 -0
  23. data/ext/cocor/CharClass.h +48 -0
  24. data/ext/cocor/CharSet.cpp +166 -0
  25. data/ext/cocor/CharSet.h +68 -0
  26. data/ext/cocor/Coco.atg +528 -0
  27. data/ext/cocor/Coco.cpp +173 -0
  28. data/ext/cocor/Comment.cpp +45 -0
  29. data/ext/cocor/Comment.h +51 -0
  30. data/ext/cocor/Copyright.frame +27 -0
  31. data/ext/cocor/DFA.cpp +865 -0
  32. data/ext/cocor/DFA.h +132 -0
  33. data/ext/cocor/Generator.cpp +182 -0
  34. data/ext/cocor/Generator.h +61 -0
  35. data/ext/cocor/Graph.h +59 -0
  36. data/ext/cocor/HashTable.cpp +115 -0
  37. data/ext/cocor/HashTable.h +84 -0
  38. data/ext/cocor/Makefile +11 -0
  39. data/ext/cocor/Melted.cpp +39 -0
  40. data/ext/cocor/Melted.h +51 -0
  41. data/ext/cocor/Node.cpp +69 -0
  42. data/ext/cocor/Node.h +86 -0
  43. data/ext/cocor/Parser.cpp +925 -0
  44. data/ext/cocor/Parser.frame +326 -0
  45. data/ext/cocor/Parser.h +153 -0
  46. data/ext/cocor/ParserGen.cpp +486 -0
  47. data/ext/cocor/ParserGen.h +99 -0
  48. data/ext/cocor/Position.cpp +37 -0
  49. data/ext/cocor/Position.h +46 -0
  50. data/ext/cocor/README.md +12 -0
  51. data/ext/cocor/Scanner.cpp +833 -0
  52. data/ext/cocor/Scanner.frame +897 -0
  53. data/ext/cocor/Scanner.h +291 -0
  54. data/ext/cocor/Sets.h +84 -0
  55. data/ext/cocor/SortedList.cpp +141 -0
  56. data/ext/cocor/SortedList.h +68 -0
  57. data/ext/cocor/State.cpp +77 -0
  58. data/ext/cocor/State.h +55 -0
  59. data/ext/cocor/StringBuilder.cpp +88 -0
  60. data/ext/cocor/StringBuilder.h +29 -0
  61. data/ext/cocor/Symbol.cpp +61 -0
  62. data/ext/cocor/Symbol.h +70 -0
  63. data/ext/cocor/Tab.cpp +1248 -0
  64. data/ext/cocor/Tab.h +245 -0
  65. data/ext/cocor/Target.cpp +41 -0
  66. data/ext/cocor/Target.h +48 -0
  67. data/ext/cocor/build.bat +3 -0
  68. data/ext/cocor/build.sh +4 -0
  69. data/ext/cocor/coc.bat +1 -0
  70. data/ext/cocor/coc.sh +2 -0
  71. data/ext/cocor/cocor_ruby_ext.cpp +124 -0
  72. data/ext/cocor/cygBuild.bat +1 -0
  73. data/ext/cocor/extconf.rb +5 -0
  74. data/ext/cocor/mingwbuild.bat +2 -0
  75. data/ext/cocor/mkmf.log +57 -0
  76. data/ext/cocor/zipsources.bat +1 -0
  77. data/lib/cocor.rb +14 -0
  78. data/lib/ruco/version.rb +3 -0
  79. data/lib/ruco.rb +728 -0
  80. metadata +195 -0
@@ -0,0 +1,833 @@
1
+ /*----------------------------------------------------------------------
2
+ Compiler Generator Coco/R,
3
+ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
4
+ extended by M. Loeberbauer & A. Woess, Univ. of Linz
5
+ ported to C++ by Csaba Balazs, University of Szeged
6
+ with improvements by Pat Terry, Rhodes University
7
+
8
+ This program is free software; you can redistribute it and/or modify it
9
+ under the terms of the GNU General Public License as published by the
10
+ Free Software Foundation; either version 2, or (at your option) any
11
+ later version.
12
+
13
+ This program is distributed in the hope that it will be useful, but
14
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16
+ for more details.
17
+
18
+ You should have received a copy of the GNU General Public License along
19
+ with this program; if not, write to the Free Software Foundation, Inc.,
20
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
+
22
+ As an exception, it is allowed to write an extension of Coco/R that is
23
+ used as a plugin in non-free software.
24
+
25
+ If not otherwise stated, any source code generated by Coco/R (other than
26
+ Coco/R itself) does not fall under the GNU General Public License.
27
+ -----------------------------------------------------------------------*/
28
+
29
+
30
+ #include <memory.h>
31
+ #include <string.h>
32
+ #include "Scanner.h"
33
+
34
+ namespace Coco {
35
+
36
+
37
+
38
+ // string handling, wide character
39
+
40
+
41
+ wchar_t* coco_string_create(const wchar_t* value) {
42
+ return coco_string_create(value, 0);
43
+ }
44
+
45
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
46
+ int valueLen = 0;
47
+ int len = 0;
48
+
49
+ if (value) {
50
+ valueLen = wcslen(value);
51
+ len = valueLen - startIndex;
52
+ }
53
+
54
+ return coco_string_create(value, startIndex, len);
55
+ }
56
+
57
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
58
+ int len = 0;
59
+ wchar_t* data;
60
+
61
+ if (value) { len = length; }
62
+ data = new wchar_t[len + 1];
63
+ wcsncpy(data, &(value[startIndex]), len);
64
+ data[len] = 0;
65
+
66
+ return data;
67
+ }
68
+
69
+ wchar_t* coco_string_create_upper(const wchar_t* data) {
70
+ if (!data) { return NULL; }
71
+
72
+ int dataLen = 0;
73
+ if (data) { dataLen = wcslen(data); }
74
+
75
+ wchar_t *newData = new wchar_t[dataLen + 1];
76
+
77
+ for (int i = 0; i <= dataLen; i++) {
78
+ if ((L'a' <= data[i]) && (data[i] <= L'z')) {
79
+ newData[i] = data[i] + (L'A' - L'a');
80
+ }
81
+ else { newData[i] = data[i]; }
82
+ }
83
+
84
+ newData[dataLen] = L'\0';
85
+ return newData;
86
+ }
87
+
88
+ wchar_t* coco_string_create_lower(const wchar_t* data) {
89
+ if (!data) { return NULL; }
90
+ int dataLen = wcslen(data);
91
+ return coco_string_create_lower(data, 0, dataLen);
92
+ }
93
+
94
+ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
95
+ if (!data) { return NULL; }
96
+
97
+ wchar_t* newData = new wchar_t[dataLen + 1];
98
+
99
+ for (int i = 0; i <= dataLen; i++) {
100
+ wchar_t ch = data[startIndex + i];
101
+ if ((L'A' <= ch) && (ch <= L'Z')) {
102
+ newData[i] = ch - (L'A' - L'a');
103
+ }
104
+ else { newData[i] = ch; }
105
+ }
106
+ newData[dataLen] = L'\0';
107
+ return newData;
108
+ }
109
+
110
+ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
111
+ wchar_t* data;
112
+ int data1Len = 0;
113
+ int data2Len = 0;
114
+
115
+ if (data1) { data1Len = wcslen(data1); }
116
+ if (data2) {data2Len = wcslen(data2); }
117
+
118
+ data = new wchar_t[data1Len + data2Len + 1];
119
+
120
+ if (data1) { wcscpy(data, data1); }
121
+ if (data2) { wcscpy(data + data1Len, data2); }
122
+
123
+ data[data1Len + data2Len] = 0;
124
+
125
+ return data;
126
+ }
127
+
128
+ wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
129
+ int targetLen = coco_string_length(target);
130
+ wchar_t* data = new wchar_t[targetLen + 2];
131
+ wcsncpy(data, target, targetLen);
132
+ data[targetLen] = appendix;
133
+ data[targetLen + 1] = 0;
134
+ return data;
135
+ }
136
+
137
+ void coco_string_delete(wchar_t* &data) {
138
+ delete [] data;
139
+ data = NULL;
140
+ }
141
+
142
+ int coco_string_length(const wchar_t* data) {
143
+ if (data) { return wcslen(data); }
144
+ return 0;
145
+ }
146
+
147
+ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
148
+ int dataLen = wcslen(data);
149
+ int endLen = wcslen(end);
150
+ return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
151
+ }
152
+
153
+ int coco_string_indexof(const wchar_t* data, const wchar_t value) {
154
+ const wchar_t* chr = wcschr(data, value);
155
+
156
+ if (chr) { return (chr-data); }
157
+ return -1;
158
+ }
159
+
160
+ int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
161
+ const wchar_t* chr = wcsrchr(data, value);
162
+
163
+ if (chr) { return (chr-data); }
164
+ return -1;
165
+ }
166
+
167
+ void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
168
+ if (!appendix) { return; }
169
+ wchar_t* data = coco_string_create_append(target, appendix);
170
+ delete [] target;
171
+ target = data;
172
+ }
173
+
174
+ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
175
+ return wcscmp( data1, data2 ) == 0;
176
+ }
177
+
178
+ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
179
+ return wcscmp(data1, data2);
180
+ }
181
+
182
+ int coco_string_hash(const wchar_t *data) {
183
+ int h = 0;
184
+ if (!data) { return 0; }
185
+ while (*data != 0) {
186
+ h = (h * 7) ^ *data;
187
+ ++data;
188
+ }
189
+ if (h < 0) { h = -h; }
190
+ return h;
191
+ }
192
+
193
+ // string handling, ascii character
194
+
195
+ wchar_t* coco_string_create(const char* value) {
196
+ int len = 0;
197
+ if (value) { len = strlen(value); }
198
+ wchar_t* data = new wchar_t[len + 1];
199
+ for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
200
+ data[len] = 0;
201
+ return data;
202
+ }
203
+
204
+ char* coco_string_create_char(const wchar_t *value) {
205
+ int len = coco_string_length(value);
206
+ char *res = new char[len + 1];
207
+ for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
208
+ res[len] = 0;
209
+ return res;
210
+ }
211
+
212
+ void coco_string_delete(char* &data) {
213
+ delete [] data;
214
+ data = NULL;
215
+ }
216
+
217
+
218
+ Token::Token() {
219
+ kind = 0;
220
+ pos = 0;
221
+ col = 0;
222
+ line = 0;
223
+ val = NULL;
224
+ next = NULL;
225
+ }
226
+
227
+ Token::~Token() {
228
+ coco_string_delete(val);
229
+ }
230
+
231
+ Buffer::Buffer(FILE* s, bool isUserStream) {
232
+ // ensure binary read on windows
233
+ #if _MSC_VER >= 1300
234
+ _setmode(_fileno(s), _O_BINARY);
235
+ #endif
236
+ stream = s; this->isUserStream = isUserStream;
237
+ if (CanSeek()) {
238
+ fseek(s, 0, SEEK_END);
239
+ fileLen = ftell(s);
240
+ fseek(s, 0, SEEK_SET);
241
+ bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
242
+ bufStart = INT_MAX; // nothing in the buffer so far
243
+ } else {
244
+ fileLen = bufLen = bufStart = 0;
245
+ }
246
+ bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
247
+ buf = new unsigned char[bufCapacity];
248
+ if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start)
249
+ else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
250
+ if (bufLen == fileLen && CanSeek()) Close();
251
+ }
252
+
253
+ Buffer::Buffer(Buffer *b) {
254
+ buf = b->buf;
255
+ bufCapacity = b->bufCapacity;
256
+ b->buf = NULL;
257
+ bufStart = b->bufStart;
258
+ bufLen = b->bufLen;
259
+ fileLen = b->fileLen;
260
+ bufPos = b->bufPos;
261
+ stream = b->stream;
262
+ b->stream = NULL;
263
+ isUserStream = b->isUserStream;
264
+ }
265
+
266
+ Buffer::Buffer(const unsigned char* buf, int len) {
267
+ this->buf = new unsigned char[len];
268
+ memcpy(this->buf, buf, len*sizeof(unsigned char));
269
+ bufStart = 0;
270
+ bufCapacity = bufLen = len;
271
+ fileLen = len;
272
+ bufPos = 0;
273
+ stream = NULL;
274
+ }
275
+
276
+ Buffer::~Buffer() {
277
+ Close();
278
+ if (buf != NULL) {
279
+ delete [] buf;
280
+ buf = NULL;
281
+ }
282
+ }
283
+
284
+ void Buffer::Close() {
285
+ if (!isUserStream && stream != NULL) {
286
+ fclose(stream);
287
+ stream = NULL;
288
+ }
289
+ }
290
+
291
+ int Buffer::Read() {
292
+ if (bufPos < bufLen) {
293
+ return buf[bufPos++];
294
+ } else if (GetPos() < fileLen) {
295
+ SetPos(GetPos()); // shift buffer start to Pos
296
+ return buf[bufPos++];
297
+ } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
298
+ return buf[bufPos++];
299
+ } else {
300
+ return EoF;
301
+ }
302
+ }
303
+
304
+ int Buffer::Peek() {
305
+ int curPos = GetPos();
306
+ int ch = Read();
307
+ SetPos(curPos);
308
+ return ch;
309
+ }
310
+
311
+ // beg .. begin, zero-based, inclusive, in byte
312
+ // end .. end, zero-based, exclusive, in byte
313
+ wchar_t* Buffer::GetString(int beg, int end) {
314
+ int len = 0;
315
+ wchar_t *buf = new wchar_t[end - beg];
316
+ int oldPos = GetPos();
317
+ SetPos(beg);
318
+ while (GetPos() < end) buf[len++] = (wchar_t) Read();
319
+ SetPos(oldPos);
320
+ wchar_t *res = coco_string_create(buf, 0, len);
321
+ coco_string_delete(buf);
322
+ return res;
323
+ }
324
+
325
+ int Buffer::GetPos() {
326
+ return bufPos + bufStart;
327
+ }
328
+
329
+ void Buffer::SetPos(int value) {
330
+ if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
331
+ // Wanted position is after buffer and the stream
332
+ // is not seek-able e.g. network or console,
333
+ // thus we have to read the stream manually till
334
+ // the wanted position is in sight.
335
+ while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
336
+ }
337
+
338
+ if ((value < 0) || (value > fileLen)) {
339
+ wprintf(L"--- buffer out of bounds access, position: %d\n", value);
340
+ exit(1);
341
+ }
342
+
343
+ if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
344
+ bufPos = value - bufStart;
345
+ } else if (stream != NULL) { // must be swapped in
346
+ fseek(stream, value, SEEK_SET);
347
+ bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
348
+ bufStart = value; bufPos = 0;
349
+ } else {
350
+ bufPos = fileLen - bufStart; // make Pos return fileLen
351
+ }
352
+ }
353
+
354
+ // Read the next chunk of bytes from the stream, increases the buffer
355
+ // if needed and updates the fields fileLen and bufLen.
356
+ // Returns the number of bytes read.
357
+ int Buffer::ReadNextStreamChunk() {
358
+ int free = bufCapacity - bufLen;
359
+ if (free == 0) {
360
+ // in the case of a growing input stream
361
+ // we can neither seek in the stream, nor can we
362
+ // foresee the maximum length, thus we must adapt
363
+ // the buffer size on demand.
364
+ bufCapacity = bufLen * 2;
365
+ unsigned char *newBuf = new unsigned char[bufCapacity];
366
+ memcpy(newBuf, buf, bufLen*sizeof(unsigned char));
367
+ delete [] buf;
368
+ buf = newBuf;
369
+ free = bufLen;
370
+ }
371
+ int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
372
+ if (read > 0) {
373
+ fileLen = bufLen = (bufLen + read);
374
+ return read;
375
+ }
376
+ // end of stream reached
377
+ return 0;
378
+ }
379
+
380
+ bool Buffer::CanSeek() {
381
+ return (stream != NULL) && (ftell(stream) != -1);
382
+ }
383
+
384
+ int UTF8Buffer::Read() {
385
+ int ch;
386
+ do {
387
+ ch = Buffer::Read();
388
+ // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
389
+ } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
390
+ if (ch < 128 || ch == EoF) {
391
+ // nothing to do, first 127 chars are the same in ascii and utf8
392
+ // 0xxxxxxx or end of file character
393
+ } else if ((ch & 0xF0) == 0xF0) {
394
+ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
395
+ int c1 = ch & 0x07; ch = Buffer::Read();
396
+ int c2 = ch & 0x3F; ch = Buffer::Read();
397
+ int c3 = ch & 0x3F; ch = Buffer::Read();
398
+ int c4 = ch & 0x3F;
399
+ ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
400
+ } else if ((ch & 0xE0) == 0xE0) {
401
+ // 1110xxxx 10xxxxxx 10xxxxxx
402
+ int c1 = ch & 0x0F; ch = Buffer::Read();
403
+ int c2 = ch & 0x3F; ch = Buffer::Read();
404
+ int c3 = ch & 0x3F;
405
+ ch = (((c1 << 6) | c2) << 6) | c3;
406
+ } else if ((ch & 0xC0) == 0xC0) {
407
+ // 110xxxxx 10xxxxxx
408
+ int c1 = ch & 0x1F; ch = Buffer::Read();
409
+ int c2 = ch & 0x3F;
410
+ ch = (c1 << 6) | c2;
411
+ }
412
+ return ch;
413
+ }
414
+
415
+ Scanner::Scanner(const unsigned char* buf, int len) {
416
+ buffer = new Buffer(buf, len);
417
+ Init();
418
+ }
419
+
420
+ Scanner::Scanner(const wchar_t* fileName) {
421
+ FILE* stream;
422
+ char *chFileName = coco_string_create_char(fileName);
423
+ if ((stream = fopen(chFileName, "rb")) == NULL) {
424
+ wprintf(L"--- Cannot open file %ls\n", fileName);
425
+ exit(1);
426
+ }
427
+ coco_string_delete(chFileName);
428
+ buffer = new Buffer(stream, false);
429
+ Init();
430
+ }
431
+
432
+ Scanner::Scanner(FILE* s) {
433
+ buffer = new Buffer(s, true);
434
+ Init();
435
+ }
436
+
437
+ Scanner::~Scanner() {
438
+ char* cur = (char*) firstHeap;
439
+
440
+ while(cur != NULL) {
441
+ cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
442
+ free(firstHeap);
443
+ firstHeap = cur;
444
+ }
445
+ delete [] tval;
446
+ delete buffer;
447
+ }
448
+
449
+ void Scanner::Init() {
450
+ EOL = '\n';
451
+ eofSym = 0;
452
+ maxT = 41;
453
+ noSym = 41;
454
+ int i;
455
+ for (i = 65; i <= 90; ++i) start.set(i, 1);
456
+ for (i = 95; i <= 95; ++i) start.set(i, 1);
457
+ for (i = 97; i <= 122; ++i) start.set(i, 1);
458
+ for (i = 48; i <= 57; ++i) start.set(i, 2);
459
+ start.set(34, 12);
460
+ start.set(39, 5);
461
+ start.set(36, 13);
462
+ start.set(61, 16);
463
+ start.set(46, 31);
464
+ start.set(43, 17);
465
+ start.set(45, 18);
466
+ start.set(60, 32);
467
+ start.set(62, 20);
468
+ start.set(124, 23);
469
+ start.set(40, 33);
470
+ start.set(41, 24);
471
+ start.set(91, 25);
472
+ start.set(93, 26);
473
+ start.set(123, 27);
474
+ start.set(125, 28);
475
+ start.set(Buffer::EoF, -1);
476
+ keywords.set(L"COMPILER", 6);
477
+ keywords.set(L"IGNORECASE", 7);
478
+ keywords.set(L"CHARACTERS", 8);
479
+ keywords.set(L"TOKENS", 9);
480
+ keywords.set(L"PRAGMAS", 10);
481
+ keywords.set(L"COMMENTS", 11);
482
+ keywords.set(L"FROM", 12);
483
+ keywords.set(L"TO", 13);
484
+ keywords.set(L"NESTED", 14);
485
+ keywords.set(L"IGNORE", 15);
486
+ keywords.set(L"PRODUCTIONS", 16);
487
+ keywords.set(L"END", 19);
488
+ keywords.set(L"ANY", 23);
489
+ keywords.set(L"WEAK", 29);
490
+ keywords.set(L"SYNC", 36);
491
+ keywords.set(L"IF", 37);
492
+ keywords.set(L"CONTEXT", 38);
493
+
494
+
495
+ tvalLength = 128;
496
+ tval = new wchar_t[tvalLength]; // text of current token
497
+
498
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
499
+ heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
500
+ firstHeap = heap;
501
+ heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
502
+ *heapEnd = 0;
503
+ heapTop = heap;
504
+ if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
505
+ wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
506
+ exit(1);
507
+ }
508
+
509
+ pos = -1; line = 1; col = 0; charPos = -1;
510
+ oldEols = 0;
511
+ NextCh();
512
+ if (ch == 0xEF) { // check optional byte order mark for UTF-8
513
+ NextCh(); int ch1 = ch;
514
+ NextCh(); int ch2 = ch;
515
+ if (ch1 != 0xBB || ch2 != 0xBF) {
516
+ wprintf(L"Illegal byte order mark at start of file");
517
+ exit(1);
518
+ }
519
+ Buffer *oldBuf = buffer;
520
+ buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
521
+ delete oldBuf; oldBuf = NULL;
522
+ NextCh();
523
+ }
524
+
525
+
526
+ pt = tokens = CreateToken(); // first token is a dummy
527
+ }
528
+
529
+ void Scanner::NextCh() {
530
+ if (oldEols > 0) { ch = EOL; oldEols--; }
531
+ else {
532
+ pos = buffer->GetPos();
533
+ // buffer reads unicode chars, if UTF8 has been detected
534
+ ch = buffer->Read(); col++; charPos++;
535
+ // replace isolated '\r' by '\n' in order to make
536
+ // eol handling uniform across Windows, Unix and Mac
537
+ if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
538
+ if (ch == EOL) { line++; col = 0; }
539
+ }
540
+
541
+ }
542
+
543
+ void Scanner::AddCh() {
544
+ if (tlen >= tvalLength) {
545
+ tvalLength *= 2;
546
+ wchar_t *newBuf = new wchar_t[tvalLength];
547
+ memcpy(newBuf, tval, tlen*sizeof(wchar_t));
548
+ delete [] tval;
549
+ tval = newBuf;
550
+ }
551
+ if (ch != Buffer::EoF) {
552
+ tval[tlen++] = ch;
553
+ NextCh();
554
+ }
555
+ }
556
+
557
+
558
+ bool Scanner::Comment0() {
559
+ int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
560
+ NextCh();
561
+ if (ch == L'/') {
562
+ NextCh();
563
+ for(;;) {
564
+ if (ch == 10) {
565
+ level--;
566
+ if (level == 0) { oldEols = line - line0; NextCh(); return true; }
567
+ NextCh();
568
+ } else if (ch == buffer->EoF) return false;
569
+ else NextCh();
570
+ }
571
+ } else {
572
+ buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
573
+ }
574
+ return false;
575
+ }
576
+
577
+ bool Scanner::Comment1() {
578
+ int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
579
+ NextCh();
580
+ if (ch == L'*') {
581
+ NextCh();
582
+ for(;;) {
583
+ if (ch == L'*') {
584
+ NextCh();
585
+ if (ch == L'/') {
586
+ level--;
587
+ if (level == 0) { oldEols = line - line0; NextCh(); return true; }
588
+ NextCh();
589
+ }
590
+ } else if (ch == L'/') {
591
+ NextCh();
592
+ if (ch == L'*') {
593
+ level++; NextCh();
594
+ }
595
+ } else if (ch == buffer->EoF) return false;
596
+ else NextCh();
597
+ }
598
+ } else {
599
+ buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
600
+ }
601
+ return false;
602
+ }
603
+
604
+
605
+ void Scanner::CreateHeapBlock() {
606
+ void* newHeap;
607
+ char* cur = (char*) firstHeap;
608
+
609
+ while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
610
+ cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
611
+ free(firstHeap);
612
+ firstHeap = cur;
613
+ }
614
+
615
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
616
+ newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
617
+ *heapEnd = newHeap;
618
+ heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
619
+ *heapEnd = 0;
620
+ heap = newHeap;
621
+ heapTop = heap;
622
+ }
623
+
624
+ Token* Scanner::CreateToken() {
625
+ Token *t;
626
+ if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
627
+ CreateHeapBlock();
628
+ }
629
+ t = (Token*) heapTop;
630
+ heapTop = (void*) ((char*) heapTop + sizeof(Token));
631
+ t->val = NULL;
632
+ t->next = NULL;
633
+ return t;
634
+ }
635
+
636
+ void Scanner::AppendVal(Token *t) {
637
+ int reqMem = (tlen + 1) * sizeof(wchar_t);
638
+ if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
639
+ if (reqMem > COCO_HEAP_BLOCK_SIZE) {
640
+ wprintf(L"--- Too long token value\n");
641
+ exit(1);
642
+ }
643
+ CreateHeapBlock();
644
+ }
645
+ t->val = (wchar_t*) heapTop;
646
+ heapTop = (void*) ((char*) heapTop + reqMem);
647
+
648
+ wcsncpy(t->val, tval, tlen);
649
+ t->val[tlen] = L'\0';
650
+ }
651
+
652
+ Token* Scanner::NextToken() {
653
+ while (ch == ' ' ||
654
+ (ch >= 9 && ch <= 10) || ch == 13
655
+ ) NextCh();
656
+ if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken();
657
+ int recKind = noSym;
658
+ int recEnd = pos;
659
+ t = CreateToken();
660
+ t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
661
+ int state = start.state(ch);
662
+ tlen = 0; AddCh();
663
+
664
+ switch (state) {
665
+ case -1: { t->kind = eofSym; break; } // NextCh already done
666
+ case 0: {
667
+ case_0:
668
+ if (recKind != noSym) {
669
+ tlen = recEnd - t->pos;
670
+ SetScannerBehindT();
671
+ }
672
+ t->kind = recKind; break;
673
+ } // NextCh already done
674
+ case 1:
675
+ case_1:
676
+ recEnd = pos; recKind = 1;
677
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;}
678
+ else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}
679
+ case 2:
680
+ case_2:
681
+ recEnd = pos; recKind = 2;
682
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_2;}
683
+ else {t->kind = 2; break;}
684
+ case 3:
685
+ case_3:
686
+ {t->kind = 3; break;}
687
+ case 4:
688
+ case_4:
689
+ {t->kind = 4; break;}
690
+ case 5:
691
+ if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_6;}
692
+ else if (ch == 92) {AddCh(); goto case_7;}
693
+ else {goto case_0;}
694
+ case 6:
695
+ case_6:
696
+ if (ch == 39) {AddCh(); goto case_9;}
697
+ else {goto case_0;}
698
+ case 7:
699
+ case_7:
700
+ if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_8;}
701
+ else {goto case_0;}
702
+ case 8:
703
+ case_8:
704
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_8;}
705
+ else if (ch == 39) {AddCh(); goto case_9;}
706
+ else {goto case_0;}
707
+ case 9:
708
+ case_9:
709
+ {t->kind = 5; break;}
710
+ case 10:
711
+ case_10:
712
+ recEnd = pos; recKind = 42;
713
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;}
714
+ else {t->kind = 42; break;}
715
+ case 11:
716
+ case_11:
717
+ recEnd = pos; recKind = 43;
718
+ if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;}
719
+ else {t->kind = 43; break;}
720
+ case 12:
721
+ case_12:
722
+ if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;}
723
+ else if (ch == 10 || ch == 13) {AddCh(); goto case_4;}
724
+ else if (ch == L'"') {AddCh(); goto case_3;}
725
+ else if (ch == 92) {AddCh(); goto case_14;}
726
+ else {goto case_0;}
727
+ case 13:
728
+ recEnd = pos; recKind = 42;
729
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;}
730
+ else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;}
731
+ else {t->kind = 42; break;}
732
+ case 14:
733
+ case_14:
734
+ if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;}
735
+ else {goto case_0;}
736
+ case 15:
737
+ case_15:
738
+ recEnd = pos; recKind = 42;
739
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;}
740
+ else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;}
741
+ else if (ch == L'=') {AddCh(); goto case_11;}
742
+ else {t->kind = 42; break;}
743
+ case 16:
744
+ {t->kind = 17; break;}
745
+ case 17:
746
+ {t->kind = 20; break;}
747
+ case 18:
748
+ {t->kind = 21; break;}
749
+ case 19:
750
+ case_19:
751
+ {t->kind = 22; break;}
752
+ case 20:
753
+ {t->kind = 25; break;}
754
+ case 21:
755
+ case_21:
756
+ {t->kind = 26; break;}
757
+ case 22:
758
+ case_22:
759
+ {t->kind = 27; break;}
760
+ case 23:
761
+ {t->kind = 28; break;}
762
+ case 24:
763
+ {t->kind = 31; break;}
764
+ case 25:
765
+ {t->kind = 32; break;}
766
+ case 26:
767
+ {t->kind = 33; break;}
768
+ case 27:
769
+ {t->kind = 34; break;}
770
+ case 28:
771
+ {t->kind = 35; break;}
772
+ case 29:
773
+ case_29:
774
+ {t->kind = 39; break;}
775
+ case 30:
776
+ case_30:
777
+ {t->kind = 40; break;}
778
+ case 31:
779
+ recEnd = pos; recKind = 18;
780
+ if (ch == L'.') {AddCh(); goto case_19;}
781
+ else if (ch == L'>') {AddCh(); goto case_22;}
782
+ else if (ch == L')') {AddCh(); goto case_30;}
783
+ else {t->kind = 18; break;}
784
+ case 32:
785
+ recEnd = pos; recKind = 24;
786
+ if (ch == L'.') {AddCh(); goto case_21;}
787
+ else {t->kind = 24; break;}
788
+ case 33:
789
+ recEnd = pos; recKind = 30;
790
+ if (ch == L'.') {AddCh(); goto case_29;}
791
+ else {t->kind = 30; break;}
792
+
793
+ }
794
+ AppendVal(t);
795
+ return t;
796
+ }
797
+
798
+ void Scanner::SetScannerBehindT() {
799
+ buffer->SetPos(t->pos);
800
+ NextCh();
801
+ line = t->line; col = t->col; charPos = t->charPos;
802
+ for (int i = 0; i < tlen; i++) NextCh();
803
+ }
804
+
805
+ // get the next token (possibly a token already seen during peeking)
806
+ Token* Scanner::Scan() {
807
+ if (tokens->next == NULL) {
808
+ return pt = tokens = NextToken();
809
+ } else {
810
+ pt = tokens = tokens->next;
811
+ return tokens;
812
+ }
813
+ }
814
+
815
+ // peek for the next token, ignore pragmas
816
+ Token* Scanner::Peek() {
817
+ do {
818
+ if (pt->next == NULL) {
819
+ pt->next = NextToken();
820
+ }
821
+ pt = pt->next;
822
+ } while (pt->kind > maxT); // skip pragmas
823
+
824
+ return pt;
825
+ }
826
+
827
+ // make sure that peeking starts at the current scan position
828
+ void Scanner::ResetPeek() {
829
+ pt = tokens;
830
+ }
831
+
832
+ } // namespace
833
+