ruco-cpp 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +17 -0
  3. data/bin/console +14 -0
  4. data/bin/ruco +30 -0
  5. data/bin/setup +7 -0
  6. data/data/ruco/Parser.frame +359 -0
  7. data/data/ruco/Scanner.frame +896 -0
  8. data/data/ruco/picojson/Changes +14 -0
  9. data/data/ruco/picojson/LICENSE +25 -0
  10. data/data/ruco/picojson/Makefile +8 -0
  11. data/data/ruco/picojson/README.mkdn +183 -0
  12. data/data/ruco/picojson/examples/github-issues.cc +110 -0
  13. data/data/ruco/picojson/examples/iostream.cc +70 -0
  14. data/data/ruco/picojson/examples/streaming.cc +76 -0
  15. data/data/ruco/picojson/picojson.h +1299 -0
  16. data/ext/cocor/Action.cpp +81 -0
  17. data/ext/cocor/Action.h +59 -0
  18. data/ext/cocor/ArrayList.cpp +79 -0
  19. data/ext/cocor/ArrayList.h +52 -0
  20. data/ext/cocor/BitArray.cpp +156 -0
  21. data/ext/cocor/BitArray.h +68 -0
  22. data/ext/cocor/CharClass.cpp +42 -0
  23. data/ext/cocor/CharClass.h +48 -0
  24. data/ext/cocor/CharSet.cpp +166 -0
  25. data/ext/cocor/CharSet.h +68 -0
  26. data/ext/cocor/Coco.atg +528 -0
  27. data/ext/cocor/Coco.cpp +173 -0
  28. data/ext/cocor/Comment.cpp +45 -0
  29. data/ext/cocor/Comment.h +51 -0
  30. data/ext/cocor/Copyright.frame +27 -0
  31. data/ext/cocor/DFA.cpp +865 -0
  32. data/ext/cocor/DFA.h +132 -0
  33. data/ext/cocor/Generator.cpp +182 -0
  34. data/ext/cocor/Generator.h +61 -0
  35. data/ext/cocor/Graph.h +59 -0
  36. data/ext/cocor/HashTable.cpp +115 -0
  37. data/ext/cocor/HashTable.h +84 -0
  38. data/ext/cocor/Makefile +11 -0
  39. data/ext/cocor/Melted.cpp +39 -0
  40. data/ext/cocor/Melted.h +51 -0
  41. data/ext/cocor/Node.cpp +69 -0
  42. data/ext/cocor/Node.h +86 -0
  43. data/ext/cocor/Parser.cpp +925 -0
  44. data/ext/cocor/Parser.frame +326 -0
  45. data/ext/cocor/Parser.h +153 -0
  46. data/ext/cocor/ParserGen.cpp +486 -0
  47. data/ext/cocor/ParserGen.h +99 -0
  48. data/ext/cocor/Position.cpp +37 -0
  49. data/ext/cocor/Position.h +46 -0
  50. data/ext/cocor/README.md +12 -0
  51. data/ext/cocor/Scanner.cpp +833 -0
  52. data/ext/cocor/Scanner.frame +897 -0
  53. data/ext/cocor/Scanner.h +291 -0
  54. data/ext/cocor/Sets.h +84 -0
  55. data/ext/cocor/SortedList.cpp +141 -0
  56. data/ext/cocor/SortedList.h +68 -0
  57. data/ext/cocor/State.cpp +77 -0
  58. data/ext/cocor/State.h +55 -0
  59. data/ext/cocor/StringBuilder.cpp +88 -0
  60. data/ext/cocor/StringBuilder.h +29 -0
  61. data/ext/cocor/Symbol.cpp +61 -0
  62. data/ext/cocor/Symbol.h +70 -0
  63. data/ext/cocor/Tab.cpp +1248 -0
  64. data/ext/cocor/Tab.h +245 -0
  65. data/ext/cocor/Target.cpp +41 -0
  66. data/ext/cocor/Target.h +48 -0
  67. data/ext/cocor/build.bat +3 -0
  68. data/ext/cocor/build.sh +4 -0
  69. data/ext/cocor/coc.bat +1 -0
  70. data/ext/cocor/coc.sh +2 -0
  71. data/ext/cocor/cocor_ruby_ext.cpp +124 -0
  72. data/ext/cocor/cygBuild.bat +1 -0
  73. data/ext/cocor/extconf.rb +5 -0
  74. data/ext/cocor/mingwbuild.bat +2 -0
  75. data/ext/cocor/mkmf.log +57 -0
  76. data/ext/cocor/zipsources.bat +1 -0
  77. data/lib/cocor.rb +14 -0
  78. data/lib/ruco/version.rb +3 -0
  79. data/lib/ruco.rb +728 -0
  80. metadata +195 -0
@@ -0,0 +1,897 @@
1
+ /*----------------------------------------------------------------------
2
+ Compiler Generator Coco/R,
3
+ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
4
+ extended by M. Loeberbauer & A. Woess, Univ. of Linz
5
+ ported to C++ by Csaba Balazs, University of Szeged
6
+ with improvements by Pat Terry, Rhodes University
7
+
8
+ This program is free software; you can redistribute it and/or modify it
9
+ under the terms of the GNU General Public License as published by the
10
+ Free Software Foundation; either version 2, or (at your option) any
11
+ later version.
12
+
13
+ This program is distributed in the hope that it will be useful, but
14
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16
+ for more details.
17
+
18
+ You should have received a copy of the GNU General Public License along
19
+ with this program; if not, write to the Free Software Foundation, Inc.,
20
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
+
22
+ As an exception, it is allowed to write an extension of Coco/R that is
23
+ used as a plugin in non-free software.
24
+
25
+ If not otherwise stated, any source code generated by Coco/R (other than
26
+ Coco/R itself) does not fall under the GNU General Public License.
27
+ -----------------------------------------------------------------------*/
28
+
29
+ /*----------------------------------------------------------------------
30
+ Scanner.h Specification
31
+ -----------------------------------------------------------------------*/
32
+
33
+ -->begin
34
+
35
+ #if !defined(-->prefixCOCO_SCANNER_H__)
36
+ #define -->prefixCOCO_SCANNER_H__
37
+
38
+ #include <limits.h>
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <string.h>
42
+ #include <wchar.h>
43
+
44
+ // io.h and fcntl are used to ensure binary read from streams on windows
45
+ #if _MSC_VER >= 1300
46
+ #include <io.h>
47
+ #include <fcntl.h>
48
+ #endif
49
+
50
+ #if _MSC_VER >= 1400
51
+ #define coco_swprintf swprintf_s
52
+ #elif _MSC_VER >= 1300
53
+ #define coco_swprintf _snwprintf
54
+ #elif defined __MINGW32__
55
+ #define coco_swprintf _snwprintf
56
+ #else
57
+ // assume every other compiler knows swprintf
58
+ #define coco_swprintf swprintf
59
+ #endif
60
+
61
+ #define COCO_WCHAR_MAX 65535
62
+ #define COCO_MIN_BUFFER_LENGTH 1024
63
+ #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH)
64
+ #define COCO_HEAP_BLOCK_SIZE (64*1024)
65
+ #define COCO_CPP_NAMESPACE_SEPARATOR L':'
66
+
67
+ -->namespace_open
68
+
69
+ // string handling, wide character
70
+ wchar_t* coco_string_create(const wchar_t *value);
71
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex);
72
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length);
73
+ wchar_t* coco_string_create_upper(const wchar_t* data);
74
+ wchar_t* coco_string_create_lower(const wchar_t* data);
75
+ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen);
76
+ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2);
77
+ wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value);
78
+ void coco_string_delete(wchar_t* &data);
79
+ int coco_string_length(const wchar_t* data);
80
+ bool coco_string_endswith(const wchar_t* data, const wchar_t *value);
81
+ int coco_string_indexof(const wchar_t* data, const wchar_t value);
82
+ int coco_string_lastindexof(const wchar_t* data, const wchar_t value);
83
+ void coco_string_merge(wchar_t* &data, const wchar_t* value);
84
+ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2);
85
+ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2);
86
+ int coco_string_hash(const wchar_t* data);
87
+
88
+ // string handling, ascii character
89
+ wchar_t* coco_string_create(const char *value);
90
+ char* coco_string_create_char(const wchar_t *value);
91
+ void coco_string_delete(char* &data);
92
+
93
+
94
+ class Token
95
+ {
96
+ public:
97
+ int kind; // token kind
98
+ int pos; // token position in bytes in the source text (starting at 0)
99
+ int charPos; // token position in characters in the source text (starting at 0)
100
+ int col; // token column (starting at 1)
101
+ int line; // token line (starting at 1)
102
+ wchar_t* val; // token value
103
+ Token *next; // ML 2005-03-11 Peek tokens are kept in linked list
104
+
105
+ Token();
106
+ ~Token();
107
+ };
108
+
109
+ class Buffer {
110
+ // This Buffer supports the following cases:
111
+ // 1) seekable stream (file)
112
+ // a) whole stream in buffer
113
+ // b) part of stream in buffer
114
+ // 2) non seekable stream (network, console)
115
+ private:
116
+ unsigned char *buf; // input buffer
117
+ int bufCapacity; // capacity of buf
118
+ int bufStart; // position of first byte in buffer relative to input stream
119
+ int bufLen; // length of buffer
120
+ int fileLen; // length of input stream (may change if the stream is no file)
121
+ int bufPos; // current position in buffer
122
+ FILE* stream; // input stream (seekable)
123
+ bool isUserStream; // was the stream opened by the user?
124
+
125
+ int ReadNextStreamChunk();
126
+ bool CanSeek(); // true if stream can be seeked otherwise false
127
+
128
+ public:
129
+ static const int EoF = COCO_WCHAR_MAX + 1;
130
+
131
+ Buffer(FILE* s, bool isUserStream);
132
+ Buffer(const unsigned char* buf, int len);
133
+ Buffer(Buffer *b);
134
+ virtual ~Buffer();
135
+
136
+ virtual void Close();
137
+ virtual int Read();
138
+ virtual int Peek();
139
+ virtual wchar_t* GetString(int beg, int end);
140
+ virtual int GetPos();
141
+ virtual void SetPos(int value);
142
+ };
143
+
144
+ class UTF8Buffer : public Buffer {
145
+ public:
146
+ UTF8Buffer(Buffer *b) : Buffer(b) {};
147
+ virtual int Read();
148
+ };
149
+
150
+ //-----------------------------------------------------------------------------------
151
+ // StartStates -- maps characters to start states of tokens
152
+ //-----------------------------------------------------------------------------------
153
+ class StartStates {
154
+ private:
155
+ class Elem {
156
+ public:
157
+ int key, val;
158
+ Elem *next;
159
+ Elem(int key, int val) { this->key = key; this->val = val; next = NULL; }
160
+ };
161
+
162
+ Elem **tab;
163
+
164
+ public:
165
+ StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
166
+ virtual ~StartStates() {
167
+ for (int i = 0; i < 128; ++i) {
168
+ Elem *e = tab[i];
169
+ while (e != NULL) {
170
+ Elem *next = e->next;
171
+ delete e;
172
+ e = next;
173
+ }
174
+ }
175
+ delete [] tab;
176
+ }
177
+
178
+ void set(int key, int val) {
179
+ Elem *e = new Elem(key, val);
180
+ int k = ((unsigned int) key) % 128;
181
+ e->next = tab[k]; tab[k] = e;
182
+ }
183
+
184
+ int state(int key) {
185
+ Elem *e = tab[((unsigned int) key) % 128];
186
+ while (e != NULL && e->key != key) e = e->next;
187
+ return e == NULL ? 0 : e->val;
188
+ }
189
+ };
190
+
191
+ //-------------------------------------------------------------------------------------------
192
+ // KeywordMap -- maps strings to integers (identifiers to keyword kinds)
193
+ //-------------------------------------------------------------------------------------------
194
+ class KeywordMap {
195
+ private:
196
+ class Elem {
197
+ public:
198
+ wchar_t *key;
199
+ int val;
200
+ Elem *next;
201
+ Elem(const wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; }
202
+ virtual ~Elem() { coco_string_delete(key); }
203
+ };
204
+
205
+ Elem **tab;
206
+
207
+ public:
208
+ KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
209
+ virtual ~KeywordMap() {
210
+ for (int i = 0; i < 128; ++i) {
211
+ Elem *e = tab[i];
212
+ while (e != NULL) {
213
+ Elem *next = e->next;
214
+ delete e;
215
+ e = next;
216
+ }
217
+ }
218
+ delete [] tab;
219
+ }
220
+
221
+ void set(const wchar_t *key, int val) {
222
+ Elem *e = new Elem(key, val);
223
+ int k = coco_string_hash(key) % 128;
224
+ e->next = tab[k]; tab[k] = e;
225
+ }
226
+
227
+ int get(const wchar_t *key, int defaultVal) {
228
+ Elem *e = tab[coco_string_hash(key) % 128];
229
+ while (e != NULL && !coco_string_equal(e->key, key)) e = e->next;
230
+ return e == NULL ? defaultVal : e->val;
231
+ }
232
+ };
233
+
234
+ class Scanner {
235
+ private:
236
+ void *firstHeap;
237
+ void *heap;
238
+ void *heapTop;
239
+ void **heapEnd;
240
+
241
+ unsigned char EOL;
242
+ int eofSym;
243
+ int noSym;
244
+ int maxT;
245
+ int charSetSize;
246
+ StartStates start;
247
+ KeywordMap keywords;
248
+
249
+ Token *t; // current token
250
+ wchar_t *tval; // text of current token
251
+ int tvalLength; // length of text of current token
252
+ int tlen; // length of current token
253
+
254
+ Token *tokens; // list of tokens already peeked (first token is a dummy)
255
+ Token *pt; // current peek token
256
+
257
+ int ch; // current input character
258
+ -->casing0
259
+ int pos; // byte position of current character
260
+ int charPos; // position by unicode characters starting with 0
261
+ int line; // line number of current character
262
+ int col; // column number of current character
263
+ int oldEols; // EOLs that appeared in a comment;
264
+
265
+ void CreateHeapBlock();
266
+ Token* CreateToken();
267
+ void AppendVal(Token *t);
268
+ void SetScannerBehindT();
269
+
270
+ void Init();
271
+ void NextCh();
272
+ void AddCh();
273
+ -->commentsheader
274
+ Token* NextToken();
275
+
276
+ public:
277
+ Buffer *buffer; // scanner buffer
278
+
279
+ Scanner(const unsigned char* buf, int len);
280
+ Scanner(const wchar_t* fileName);
281
+ Scanner(FILE* s);
282
+ ~Scanner();
283
+ Token* Scan();
284
+ Token* Peek();
285
+ void ResetPeek();
286
+
287
+ }; // end Scanner
288
+
289
+ -->namespace_close
290
+
291
+ #endif
292
+
293
+ -->implementation
294
+
295
+ /*----------------------------------------------------------------------
296
+ Scanner.cpp Specification
297
+ -----------------------------------------------------------------------*/
298
+
299
+ -->begin
300
+
301
+ #include <memory.h>
302
+ #include <string.h>
303
+ #include "Scanner.h"
304
+
305
+ -->namespace_open
306
+
307
+
308
+ // string handling, wide character
309
+
310
+
311
+ wchar_t* coco_string_create(const wchar_t* value) {
312
+ return coco_string_create(value, 0);
313
+ }
314
+
315
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
316
+ int valueLen = 0;
317
+ int len = 0;
318
+
319
+ if (value) {
320
+ valueLen = wcslen(value);
321
+ len = valueLen - startIndex;
322
+ }
323
+
324
+ return coco_string_create(value, startIndex, len);
325
+ }
326
+
327
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
328
+ int len = 0;
329
+ wchar_t* data;
330
+
331
+ if (value) { len = length; }
332
+ data = new wchar_t[len + 1];
333
+ wcsncpy(data, &(value[startIndex]), len);
334
+ data[len] = 0;
335
+
336
+ return data;
337
+ }
338
+
339
+ wchar_t* coco_string_create_upper(const wchar_t* data) {
340
+ if (!data) { return NULL; }
341
+
342
+ int dataLen = 0;
343
+ if (data) { dataLen = wcslen(data); }
344
+
345
+ wchar_t *newData = new wchar_t[dataLen + 1];
346
+
347
+ for (int i = 0; i <= dataLen; i++) {
348
+ if ((L'a' <= data[i]) && (data[i] <= L'z')) {
349
+ newData[i] = data[i] + (L'A' - L'a');
350
+ }
351
+ else { newData[i] = data[i]; }
352
+ }
353
+
354
+ newData[dataLen] = L'\0';
355
+ return newData;
356
+ }
357
+
358
+ wchar_t* coco_string_create_lower(const wchar_t* data) {
359
+ if (!data) { return NULL; }
360
+ int dataLen = wcslen(data);
361
+ return coco_string_create_lower(data, 0, dataLen);
362
+ }
363
+
364
+ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
365
+ if (!data) { return NULL; }
366
+
367
+ wchar_t* newData = new wchar_t[dataLen + 1];
368
+
369
+ for (int i = 0; i <= dataLen; i++) {
370
+ wchar_t ch = data[startIndex + i];
371
+ if ((L'A' <= ch) && (ch <= L'Z')) {
372
+ newData[i] = ch - (L'A' - L'a');
373
+ }
374
+ else { newData[i] = ch; }
375
+ }
376
+ newData[dataLen] = L'\0';
377
+ return newData;
378
+ }
379
+
380
+ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
381
+ wchar_t* data;
382
+ int data1Len = 0;
383
+ int data2Len = 0;
384
+
385
+ if (data1) { data1Len = wcslen(data1); }
386
+ if (data2) {data2Len = wcslen(data2); }
387
+
388
+ data = new wchar_t[data1Len + data2Len + 1];
389
+
390
+ if (data1) { wcscpy(data, data1); }
391
+ if (data2) { wcscpy(data + data1Len, data2); }
392
+
393
+ data[data1Len + data2Len] = 0;
394
+
395
+ return data;
396
+ }
397
+
398
+ wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
399
+ int targetLen = coco_string_length(target);
400
+ wchar_t* data = new wchar_t[targetLen + 2];
401
+ wcsncpy(data, target, targetLen);
402
+ data[targetLen] = appendix;
403
+ data[targetLen + 1] = 0;
404
+ return data;
405
+ }
406
+
407
+ void coco_string_delete(wchar_t* &data) {
408
+ delete [] data;
409
+ data = NULL;
410
+ }
411
+
412
+ int coco_string_length(const wchar_t* data) {
413
+ if (data) { return wcslen(data); }
414
+ return 0;
415
+ }
416
+
417
+ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
418
+ int dataLen = wcslen(data);
419
+ int endLen = wcslen(end);
420
+ return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
421
+ }
422
+
423
+ int coco_string_indexof(const wchar_t* data, const wchar_t value) {
424
+ const wchar_t* chr = wcschr(data, value);
425
+
426
+ if (chr) { return (chr-data); }
427
+ return -1;
428
+ }
429
+
430
+ int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
431
+ const wchar_t* chr = wcsrchr(data, value);
432
+
433
+ if (chr) { return (chr-data); }
434
+ return -1;
435
+ }
436
+
437
+ void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
438
+ if (!appendix) { return; }
439
+ wchar_t* data = coco_string_create_append(target, appendix);
440
+ delete [] target;
441
+ target = data;
442
+ }
443
+
444
+ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
445
+ return wcscmp( data1, data2 ) == 0;
446
+ }
447
+
448
+ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
449
+ return wcscmp(data1, data2);
450
+ }
451
+
452
+ int coco_string_hash(const wchar_t *data) {
453
+ int h = 0;
454
+ if (!data) { return 0; }
455
+ while (*data != 0) {
456
+ h = (h * 7) ^ *data;
457
+ ++data;
458
+ }
459
+ if (h < 0) { h = -h; }
460
+ return h;
461
+ }
462
+
463
+ // string handling, ascii character
464
+
465
+ wchar_t* coco_string_create(const char* value) {
466
+ int len = 0;
467
+ if (value) { len = strlen(value); }
468
+ wchar_t* data = new wchar_t[len + 1];
469
+ for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
470
+ data[len] = 0;
471
+ return data;
472
+ }
473
+
474
+ char* coco_string_create_char(const wchar_t *value) {
475
+ int len = coco_string_length(value);
476
+ char *res = new char[len + 1];
477
+ for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
478
+ res[len] = 0;
479
+ return res;
480
+ }
481
+
482
+ void coco_string_delete(char* &data) {
483
+ delete [] data;
484
+ data = NULL;
485
+ }
486
+
487
+
488
+ Token::Token() {
489
+ kind = 0;
490
+ pos = 0;
491
+ col = 0;
492
+ line = 0;
493
+ val = NULL;
494
+ next = NULL;
495
+ }
496
+
497
+ Token::~Token() {
498
+ coco_string_delete(val);
499
+ }
500
+
501
+ Buffer::Buffer(FILE* s, bool isUserStream) {
502
+ // ensure binary read on windows
503
+ #if _MSC_VER >= 1300
504
+ _setmode(_fileno(s), _O_BINARY);
505
+ #endif
506
+ stream = s; this->isUserStream = isUserStream;
507
+ if (CanSeek()) {
508
+ fseek(s, 0, SEEK_END);
509
+ fileLen = ftell(s);
510
+ fseek(s, 0, SEEK_SET);
511
+ bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
512
+ bufStart = INT_MAX; // nothing in the buffer so far
513
+ } else {
514
+ fileLen = bufLen = bufStart = 0;
515
+ }
516
+ bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
517
+ buf = new unsigned char[bufCapacity];
518
+ if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start)
519
+ else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
520
+ if (bufLen == fileLen && CanSeek()) Close();
521
+ }
522
+
523
+ Buffer::Buffer(Buffer *b) {
524
+ buf = b->buf;
525
+ bufCapacity = b->bufCapacity;
526
+ b->buf = NULL;
527
+ bufStart = b->bufStart;
528
+ bufLen = b->bufLen;
529
+ fileLen = b->fileLen;
530
+ bufPos = b->bufPos;
531
+ stream = b->stream;
532
+ b->stream = NULL;
533
+ isUserStream = b->isUserStream;
534
+ }
535
+
536
+ Buffer::Buffer(const unsigned char* buf, int len) {
537
+ this->buf = new unsigned char[len];
538
+ memcpy(this->buf, buf, len*sizeof(unsigned char));
539
+ bufStart = 0;
540
+ bufCapacity = bufLen = len;
541
+ fileLen = len;
542
+ bufPos = 0;
543
+ stream = NULL;
544
+ }
545
+
546
+ Buffer::~Buffer() {
547
+ Close();
548
+ if (buf != NULL) {
549
+ delete [] buf;
550
+ buf = NULL;
551
+ }
552
+ }
553
+
554
+ void Buffer::Close() {
555
+ if (!isUserStream && stream != NULL) {
556
+ fclose(stream);
557
+ stream = NULL;
558
+ }
559
+ }
560
+
561
+ int Buffer::Read() {
562
+ if (bufPos < bufLen) {
563
+ return buf[bufPos++];
564
+ } else if (GetPos() < fileLen) {
565
+ SetPos(GetPos()); // shift buffer start to Pos
566
+ return buf[bufPos++];
567
+ } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
568
+ return buf[bufPos++];
569
+ } else {
570
+ return EoF;
571
+ }
572
+ }
573
+
574
+ int Buffer::Peek() {
575
+ int curPos = GetPos();
576
+ int ch = Read();
577
+ SetPos(curPos);
578
+ return ch;
579
+ }
580
+
581
+ // beg .. begin, zero-based, inclusive, in byte
582
+ // end .. end, zero-based, exclusive, in byte
583
+ wchar_t* Buffer::GetString(int beg, int end) {
584
+ int len = 0;
585
+ wchar_t *buf = new wchar_t[end - beg];
586
+ int oldPos = GetPos();
587
+ SetPos(beg);
588
+ while (GetPos() < end) buf[len++] = (wchar_t) Read();
589
+ SetPos(oldPos);
590
+ wchar_t *res = coco_string_create(buf, 0, len);
591
+ coco_string_delete(buf);
592
+ return res;
593
+ }
594
+
595
+ int Buffer::GetPos() {
596
+ return bufPos + bufStart;
597
+ }
598
+
599
+ void Buffer::SetPos(int value) {
600
+ if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
601
+ // Wanted position is after buffer and the stream
602
+ // is not seek-able e.g. network or console,
603
+ // thus we have to read the stream manually till
604
+ // the wanted position is in sight.
605
+ while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
606
+ }
607
+
608
+ if ((value < 0) || (value > fileLen)) {
609
+ wprintf(L"--- buffer out of bounds access, position: %d\n", value);
610
+ exit(1);
611
+ }
612
+
613
+ if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
614
+ bufPos = value - bufStart;
615
+ } else if (stream != NULL) { // must be swapped in
616
+ fseek(stream, value, SEEK_SET);
617
+ bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
618
+ bufStart = value; bufPos = 0;
619
+ } else {
620
+ bufPos = fileLen - bufStart; // make Pos return fileLen
621
+ }
622
+ }
623
+
624
+ // Read the next chunk of bytes from the stream, increases the buffer
625
+ // if needed and updates the fields fileLen and bufLen.
626
+ // Returns the number of bytes read.
627
+ int Buffer::ReadNextStreamChunk() {
628
+ int free = bufCapacity - bufLen;
629
+ if (free == 0) {
630
+ // in the case of a growing input stream
631
+ // we can neither seek in the stream, nor can we
632
+ // foresee the maximum length, thus we must adapt
633
+ // the buffer size on demand.
634
+ bufCapacity = bufLen * 2;
635
+ unsigned char *newBuf = new unsigned char[bufCapacity];
636
+ memcpy(newBuf, buf, bufLen*sizeof(unsigned char));
637
+ delete [] buf;
638
+ buf = newBuf;
639
+ free = bufLen;
640
+ }
641
+ int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
642
+ if (read > 0) {
643
+ fileLen = bufLen = (bufLen + read);
644
+ return read;
645
+ }
646
+ // end of stream reached
647
+ return 0;
648
+ }
649
+
650
+ bool Buffer::CanSeek() {
651
+ return (stream != NULL) && (ftell(stream) != -1);
652
+ }
653
+
654
+ int UTF8Buffer::Read() {
655
+ int ch;
656
+ do {
657
+ ch = Buffer::Read();
658
+ // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
659
+ } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
660
+ if (ch < 128 || ch == EoF) {
661
+ // nothing to do, first 127 chars are the same in ascii and utf8
662
+ // 0xxxxxxx or end of file character
663
+ } else if ((ch & 0xF0) == 0xF0) {
664
+ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
665
+ int c1 = ch & 0x07; ch = Buffer::Read();
666
+ int c2 = ch & 0x3F; ch = Buffer::Read();
667
+ int c3 = ch & 0x3F; ch = Buffer::Read();
668
+ int c4 = ch & 0x3F;
669
+ ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
670
+ } else if ((ch & 0xE0) == 0xE0) {
671
+ // 1110xxxx 10xxxxxx 10xxxxxx
672
+ int c1 = ch & 0x0F; ch = Buffer::Read();
673
+ int c2 = ch & 0x3F; ch = Buffer::Read();
674
+ int c3 = ch & 0x3F;
675
+ ch = (((c1 << 6) | c2) << 6) | c3;
676
+ } else if ((ch & 0xC0) == 0xC0) {
677
+ // 110xxxxx 10xxxxxx
678
+ int c1 = ch & 0x1F; ch = Buffer::Read();
679
+ int c2 = ch & 0x3F;
680
+ ch = (c1 << 6) | c2;
681
+ }
682
+ return ch;
683
+ }
684
+
685
+ Scanner::Scanner(const unsigned char* buf, int len) {
686
+ buffer = new Buffer(buf, len);
687
+ Init();
688
+ }
689
+
690
+ Scanner::Scanner(const wchar_t* fileName) {
691
+ FILE* stream;
692
+ char *chFileName = coco_string_create_char(fileName);
693
+ if ((stream = fopen(chFileName, "rb")) == NULL) {
694
+ wprintf(L"--- Cannot open file %ls\n", fileName);
695
+ exit(1);
696
+ }
697
+ coco_string_delete(chFileName);
698
+ buffer = new Buffer(stream, false);
699
+ Init();
700
+ }
701
+
702
+ Scanner::Scanner(FILE* s) {
703
+ buffer = new Buffer(s, true);
704
+ Init();
705
+ }
706
+
707
+ Scanner::~Scanner() {
708
+ char* cur = (char*) firstHeap;
709
+
710
+ while(cur != NULL) {
711
+ cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
712
+ free(firstHeap);
713
+ firstHeap = cur;
714
+ }
715
+ delete [] tval;
716
+ delete buffer;
717
+ }
718
+
719
+ void Scanner::Init() {
720
+ EOL = '\n';
721
+ eofSym = 0;
722
+ -->declarations
723
+
724
+ tvalLength = 128;
725
+ tval = new wchar_t[tvalLength]; // text of current token
726
+
727
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
728
+ heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
729
+ firstHeap = heap;
730
+ heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
731
+ *heapEnd = 0;
732
+ heapTop = heap;
733
+ if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
734
+ wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
735
+ exit(1);
736
+ }
737
+
738
+ pos = -1; line = 1; col = 0; charPos = -1;
739
+ oldEols = 0;
740
+ NextCh();
741
+ if (ch == 0xEF) { // check optional byte order mark for UTF-8
742
+ NextCh(); int ch1 = ch;
743
+ NextCh(); int ch2 = ch;
744
+ if (ch1 != 0xBB || ch2 != 0xBF) {
745
+ wprintf(L"Illegal byte order mark at start of file");
746
+ exit(1);
747
+ }
748
+ Buffer *oldBuf = buffer;
749
+ buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
750
+ delete oldBuf; oldBuf = NULL;
751
+ NextCh();
752
+ }
753
+
754
+ -->initialization
755
+ pt = tokens = CreateToken(); // first token is a dummy
756
+ }
757
+
758
+ void Scanner::NextCh() {
759
+ if (oldEols > 0) { ch = EOL; oldEols--; }
760
+ else {
761
+ pos = buffer->GetPos();
762
+ // buffer reads unicode chars, if UTF8 has been detected
763
+ ch = buffer->Read(); col++; charPos++;
764
+ // replace isolated '\r' by '\n' in order to make
765
+ // eol handling uniform across Windows, Unix and Mac
766
+ if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
767
+ if (ch == EOL) { line++; col = 0; }
768
+ }
769
+ -->casing1
770
+ }
771
+
772
+ void Scanner::AddCh() {
773
+ if (tlen >= tvalLength) {
774
+ tvalLength *= 2;
775
+ wchar_t *newBuf = new wchar_t[tvalLength];
776
+ memcpy(newBuf, tval, tlen*sizeof(wchar_t));
777
+ delete [] tval;
778
+ tval = newBuf;
779
+ }
780
+ if (ch != Buffer::EoF) {
781
+ -->casing2
782
+ NextCh();
783
+ }
784
+ }
785
+
786
+ -->comments
787
+
788
+ void Scanner::CreateHeapBlock() {
789
+ void* newHeap;
790
+ char* cur = (char*) firstHeap;
791
+
792
+ while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
793
+ cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
794
+ free(firstHeap);
795
+ firstHeap = cur;
796
+ }
797
+
798
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
799
+ newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
800
+ *heapEnd = newHeap;
801
+ heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
802
+ *heapEnd = 0;
803
+ heap = newHeap;
804
+ heapTop = heap;
805
+ }
806
+
807
+ Token* Scanner::CreateToken() {
808
+ Token *t;
809
+ if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
810
+ CreateHeapBlock();
811
+ }
812
+ t = (Token*) heapTop;
813
+ heapTop = (void*) ((char*) heapTop + sizeof(Token));
814
+ t->val = NULL;
815
+ t->next = NULL;
816
+ return t;
817
+ }
818
+
819
+ void Scanner::AppendVal(Token *t) {
820
+ int reqMem = (tlen + 1) * sizeof(wchar_t);
821
+ if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
822
+ if (reqMem > COCO_HEAP_BLOCK_SIZE) {
823
+ wprintf(L"--- Too long token value\n");
824
+ exit(1);
825
+ }
826
+ CreateHeapBlock();
827
+ }
828
+ t->val = (wchar_t*) heapTop;
829
+ heapTop = (void*) ((char*) heapTop + reqMem);
830
+
831
+ wcsncpy(t->val, tval, tlen);
832
+ t->val[tlen] = L'\0';
833
+ }
834
+
835
+ Token* Scanner::NextToken() {
836
+ while (ch == ' ' ||
837
+ -->scan1
838
+ ) NextCh();
839
+ -->scan2
840
+ int recKind = noSym;
841
+ int recEnd = pos;
842
+ t = CreateToken();
843
+ t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
844
+ int state = start.state(ch);
845
+ tlen = 0; AddCh();
846
+
847
+ switch (state) {
848
+ case -1: { t->kind = eofSym; break; } // NextCh already done
849
+ case 0: {
850
+ case_0:
851
+ if (recKind != noSym) {
852
+ tlen = recEnd - t->pos;
853
+ SetScannerBehindT();
854
+ }
855
+ t->kind = recKind; break;
856
+ } // NextCh already done
857
+ -->scan3
858
+ }
859
+ AppendVal(t);
860
+ return t;
861
+ }
862
+
863
+ void Scanner::SetScannerBehindT() {
864
+ buffer->SetPos(t->pos);
865
+ NextCh();
866
+ line = t->line; col = t->col; charPos = t->charPos;
867
+ for (int i = 0; i < tlen; i++) NextCh();
868
+ }
869
+
870
+ // get the next token (possibly a token already seen during peeking)
871
+ Token* Scanner::Scan() {
872
+ if (tokens->next == NULL) {
873
+ return pt = tokens = NextToken();
874
+ } else {
875
+ pt = tokens = tokens->next;
876
+ return tokens;
877
+ }
878
+ }
879
+
880
+ // peek for the next token, ignore pragmas
881
+ Token* Scanner::Peek() {
882
+ do {
883
+ if (pt->next == NULL) {
884
+ pt->next = NextToken();
885
+ }
886
+ pt = pt->next;
887
+ } while (pt->kind > maxT); // skip pragmas
888
+
889
+ return pt;
890
+ }
891
+
892
+ // make sure that peeking starts at the current scan position
893
+ void Scanner::ResetPeek() {
894
+ pt = tokens;
895
+ }
896
+
897
+ -->namespace_close