ruco-cpp 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +17 -0
  3. data/bin/console +14 -0
  4. data/bin/ruco +30 -0
  5. data/bin/setup +7 -0
  6. data/data/ruco/Parser.frame +359 -0
  7. data/data/ruco/Scanner.frame +896 -0
  8. data/data/ruco/picojson/Changes +14 -0
  9. data/data/ruco/picojson/LICENSE +25 -0
  10. data/data/ruco/picojson/Makefile +8 -0
  11. data/data/ruco/picojson/README.mkdn +183 -0
  12. data/data/ruco/picojson/examples/github-issues.cc +110 -0
  13. data/data/ruco/picojson/examples/iostream.cc +70 -0
  14. data/data/ruco/picojson/examples/streaming.cc +76 -0
  15. data/data/ruco/picojson/picojson.h +1299 -0
  16. data/ext/cocor/Action.cpp +81 -0
  17. data/ext/cocor/Action.h +59 -0
  18. data/ext/cocor/ArrayList.cpp +79 -0
  19. data/ext/cocor/ArrayList.h +52 -0
  20. data/ext/cocor/BitArray.cpp +156 -0
  21. data/ext/cocor/BitArray.h +68 -0
  22. data/ext/cocor/CharClass.cpp +42 -0
  23. data/ext/cocor/CharClass.h +48 -0
  24. data/ext/cocor/CharSet.cpp +166 -0
  25. data/ext/cocor/CharSet.h +68 -0
  26. data/ext/cocor/Coco.atg +528 -0
  27. data/ext/cocor/Coco.cpp +173 -0
  28. data/ext/cocor/Comment.cpp +45 -0
  29. data/ext/cocor/Comment.h +51 -0
  30. data/ext/cocor/Copyright.frame +27 -0
  31. data/ext/cocor/DFA.cpp +865 -0
  32. data/ext/cocor/DFA.h +132 -0
  33. data/ext/cocor/Generator.cpp +182 -0
  34. data/ext/cocor/Generator.h +61 -0
  35. data/ext/cocor/Graph.h +59 -0
  36. data/ext/cocor/HashTable.cpp +115 -0
  37. data/ext/cocor/HashTable.h +84 -0
  38. data/ext/cocor/Makefile +11 -0
  39. data/ext/cocor/Melted.cpp +39 -0
  40. data/ext/cocor/Melted.h +51 -0
  41. data/ext/cocor/Node.cpp +69 -0
  42. data/ext/cocor/Node.h +86 -0
  43. data/ext/cocor/Parser.cpp +925 -0
  44. data/ext/cocor/Parser.frame +326 -0
  45. data/ext/cocor/Parser.h +153 -0
  46. data/ext/cocor/ParserGen.cpp +486 -0
  47. data/ext/cocor/ParserGen.h +99 -0
  48. data/ext/cocor/Position.cpp +37 -0
  49. data/ext/cocor/Position.h +46 -0
  50. data/ext/cocor/README.md +12 -0
  51. data/ext/cocor/Scanner.cpp +833 -0
  52. data/ext/cocor/Scanner.frame +897 -0
  53. data/ext/cocor/Scanner.h +291 -0
  54. data/ext/cocor/Sets.h +84 -0
  55. data/ext/cocor/SortedList.cpp +141 -0
  56. data/ext/cocor/SortedList.h +68 -0
  57. data/ext/cocor/State.cpp +77 -0
  58. data/ext/cocor/State.h +55 -0
  59. data/ext/cocor/StringBuilder.cpp +88 -0
  60. data/ext/cocor/StringBuilder.h +29 -0
  61. data/ext/cocor/Symbol.cpp +61 -0
  62. data/ext/cocor/Symbol.h +70 -0
  63. data/ext/cocor/Tab.cpp +1248 -0
  64. data/ext/cocor/Tab.h +245 -0
  65. data/ext/cocor/Target.cpp +41 -0
  66. data/ext/cocor/Target.h +48 -0
  67. data/ext/cocor/build.bat +3 -0
  68. data/ext/cocor/build.sh +4 -0
  69. data/ext/cocor/coc.bat +1 -0
  70. data/ext/cocor/coc.sh +2 -0
  71. data/ext/cocor/cocor_ruby_ext.cpp +124 -0
  72. data/ext/cocor/cygBuild.bat +1 -0
  73. data/ext/cocor/extconf.rb +5 -0
  74. data/ext/cocor/mingwbuild.bat +2 -0
  75. data/ext/cocor/mkmf.log +57 -0
  76. data/ext/cocor/zipsources.bat +1 -0
  77. data/lib/cocor.rb +14 -0
  78. data/lib/ruco/version.rb +3 -0
  79. data/lib/ruco.rb +728 -0
  80. metadata +195 -0
@@ -0,0 +1,896 @@
1
+ /*----------------------------------------------------------------------
2
+ Compiler Generator Coco/R,
3
+ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
4
+ extended by M. Loeberbauer & A. Woess, Univ. of Linz
5
+ ported to C++ by Csaba Balazs, University of Szeged
6
+ with improvements by Pat Terry, Rhodes University
7
+
8
+ This program is free software; you can redistribute it and/or modify it
9
+ under the terms of the GNU General Public License as published by the
10
+ Free Software Foundation; either version 2, or (at your option) any
11
+ later version.
12
+
13
+ This program is distributed in the hope that it will be useful, but
14
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16
+ for more details.
17
+
18
+ You should have received a copy of the GNU General Public License along
19
+ with this program; if not, write to the Free Software Foundation, Inc.,
20
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
+
22
+ As an exception, it is allowed to write an extension of Coco/R that is
23
+ used as a plugin in non-free software.
24
+
25
+ If not otherwise stated, any source code generated by Coco/R (other than
26
+ Coco/R itself) does not fall under the GNU General Public License.
27
+ -----------------------------------------------------------------------*/
28
+
29
+ /*----------------------------------------------------------------------
30
+ Scanner.h Specification
31
+ -----------------------------------------------------------------------*/
32
+
33
+ -->begin
34
+
35
+ #if !defined(-->prefixCOCO_SCANNER_H__)
36
+ #define -->prefixCOCO_SCANNER_H__
37
+
38
+ #include <limits.h>
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <string.h>
42
+ #include <wchar.h>
43
+
44
+ // io.h and fcntl are used to ensure binary read from streams on windows
45
+ #if _MSC_VER >= 1300
46
+ #include <io.h>
47
+ #include <fcntl.h>
48
+ #endif
49
+
50
+ #if _MSC_VER >= 1400
51
+ #define coco_swprintf swprintf_s
52
+ #elif _MSC_VER >= 1300
53
+ #define coco_swprintf _snwprintf
54
+ #elif defined __MINGW32__
55
+ #define coco_swprintf _snwprintf
56
+ #else
57
+ // assume every other compiler knows swprintf
58
+ #define coco_swprintf swprintf
59
+ #endif
60
+
61
+ #define COCO_WCHAR_MAX 65535
62
+ #define COCO_MIN_BUFFER_LENGTH 1024
63
+ #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH)
64
+ #define COCO_HEAP_BLOCK_SIZE (64*1024)
65
+ #define COCO_CPP_NAMESPACE_SEPARATOR L':'
66
+
67
+ -->namespace_open
68
+
69
+ // string handling, wide character
70
+ wchar_t* coco_string_create(const wchar_t *value);
71
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex);
72
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length);
73
+ wchar_t* coco_string_create_upper(const wchar_t* data);
74
+ wchar_t* coco_string_create_lower(const wchar_t* data);
75
+ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen);
76
+ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2);
77
+ wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value);
78
+ void coco_string_delete(wchar_t* &data);
79
+ int coco_string_length(const wchar_t* data);
80
+ bool coco_string_endswith(const wchar_t* data, const wchar_t *value);
81
+ int coco_string_indexof(const wchar_t* data, const wchar_t value);
82
+ int coco_string_lastindexof(const wchar_t* data, const wchar_t value);
83
+ void coco_string_merge(wchar_t* &data, const wchar_t* value);
84
+ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2);
85
+ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2);
86
+ int coco_string_hash(const wchar_t* data);
87
+
88
+ // string handling, ascii character
89
+ wchar_t* coco_string_create(const char *value);
90
+ char* coco_string_create_char(const wchar_t *value);
91
+ void coco_string_delete(char* &data);
92
+
93
+
94
+ class Token
95
+ {
96
+ public:
97
+ int kind; // token kind
98
+ int pos; // token position in bytes in the source text (starting at 0)
99
+ int charPos; // token position in characters in the source text (starting at 0)
100
+ int col; // token column (starting at 1)
101
+ int line; // token line (starting at 1)
102
+ wchar_t* val; // token value
103
+ Token *next; // ML 2005-03-11 Peek tokens are kept in linked list
104
+
105
+ Token();
106
+ ~Token();
107
+ };
108
+
109
+ class Buffer {
110
+ // This Buffer supports the following cases:
111
+ // 1) seekable stream (file)
112
+ // a) whole stream in buffer
113
+ // b) part of stream in buffer
114
+ // 2) non seekable stream (network, console)
115
+ private:
116
+ char *buf; // input buffer
117
+ int bufCapacity; // capacity of buf
118
+ int bufStart; // position of first byte in buffer relative to input stream
119
+ int bufLen; // length of buffer
120
+ int fileLen; // length of input stream (may change if the stream is no file)
121
+ int bufPos; // current position in buffer
122
+ FILE* stream; // input stream (seekable)
123
+ bool isUserStream; // was the stream opened by the user?
124
+
125
+ int ReadNextStreamChunk();
126
+ bool CanSeek(); // true if stream can be seeked otherwise false
127
+
128
+ public:
129
+ static const int EoF = COCO_WCHAR_MAX + 1;
130
+
131
+ Buffer(FILE* s, bool isUserStream);
132
+ Buffer(const char* buf, size_t len);
133
+ Buffer(Buffer *b);
134
+ virtual ~Buffer();
135
+
136
+ virtual void Close();
137
+ virtual int Read();
138
+ virtual int Peek();
139
+ virtual wchar_t* GetString(int beg, int end);
140
+ virtual int GetPos();
141
+ virtual void SetPos(int value);
142
+ };
143
+
144
+ class UTF8Buffer : public Buffer {
145
+ public:
146
+ UTF8Buffer(Buffer *b) : Buffer(b) {};
147
+ virtual int Read();
148
+ };
149
+
150
+ //-----------------------------------------------------------------------------------
151
+ // StartStates -- maps characters to start states of tokens
152
+ //-----------------------------------------------------------------------------------
153
+ class StartStates {
154
+ private:
155
+ class Elem {
156
+ public:
157
+ int key, val;
158
+ Elem *next;
159
+ Elem(int key, int val) { this->key = key; this->val = val; next = NULL; }
160
+ };
161
+
162
+ Elem **tab;
163
+
164
+ public:
165
+ StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
166
+ virtual ~StartStates() {
167
+ for (int i = 0; i < 128; ++i) {
168
+ Elem *e = tab[i];
169
+ while (e != NULL) {
170
+ Elem *next = e->next;
171
+ delete e;
172
+ e = next;
173
+ }
174
+ }
175
+ delete [] tab;
176
+ }
177
+
178
+ void set(int key, int val) {
179
+ Elem *e = new Elem(key, val);
180
+ int k = ((unsigned int) key) % 128;
181
+ e->next = tab[k]; tab[k] = e;
182
+ }
183
+
184
+ int state(int key) {
185
+ Elem *e = tab[((unsigned int) key) % 128];
186
+ while (e != NULL && e->key != key) e = e->next;
187
+ return e == NULL ? 0 : e->val;
188
+ }
189
+ };
190
+
191
+ //-------------------------------------------------------------------------------------------
192
+ // KeywordMap -- maps strings to integers (identifiers to keyword kinds)
193
+ //-------------------------------------------------------------------------------------------
194
+ class KeywordMap {
195
+ private:
196
+ class Elem {
197
+ public:
198
+ wchar_t *key;
199
+ int val;
200
+ Elem *next;
201
+ Elem(const wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; }
202
+ virtual ~Elem() { coco_string_delete(key); }
203
+ };
204
+
205
+ Elem **tab;
206
+
207
+ public:
208
+ KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
209
+ virtual ~KeywordMap() {
210
+ for (int i = 0; i < 128; ++i) {
211
+ Elem *e = tab[i];
212
+ while (e != NULL) {
213
+ Elem *next = e->next;
214
+ delete e;
215
+ e = next;
216
+ }
217
+ }
218
+ delete [] tab;
219
+ }
220
+
221
+ void set(const wchar_t *key, int val) {
222
+ Elem *e = new Elem(key, val);
223
+ int k = coco_string_hash(key) % 128;
224
+ e->next = tab[k]; tab[k] = e;
225
+ }
226
+
227
+ int get(const wchar_t *key, int defaultVal) {
228
+ Elem *e = tab[coco_string_hash(key) % 128];
229
+ while (e != NULL && !coco_string_equal(e->key, key)) e = e->next;
230
+ return e == NULL ? defaultVal : e->val;
231
+ }
232
+ };
233
+
234
+ class Scanner {
235
+ private:
236
+ void *firstHeap;
237
+ void *heap;
238
+ void *heapTop;
239
+ void **heapEnd;
240
+
241
+ unsigned char EOL;
242
+ int eofSym;
243
+ int noSym;
244
+ int maxT;
245
+ StartStates start;
246
+ KeywordMap keywords;
247
+
248
+ Token *t; // current token
249
+ wchar_t *tval; // text of current token
250
+ int tvalLength; // length of text of current token
251
+ int tlen; // length of current token
252
+
253
+ Token *tokens; // list of tokens already peeked (first token is a dummy)
254
+ Token *pt; // current peek token
255
+
256
+ int ch; // current input character
257
+ -->casing0
258
+ int pos; // byte position of current character
259
+ int charPos; // position by unicode characters starting with 0
260
+ int line; // line number of current character
261
+ int col; // column number of current character
262
+ int oldEols; // EOLs that appeared in a comment;
263
+
264
+ void CreateHeapBlock();
265
+ Token* CreateToken();
266
+ void AppendVal(Token *t);
267
+ void SetScannerBehindT();
268
+
269
+ void Init();
270
+ void NextCh();
271
+ void AddCh();
272
+ -->commentsheader
273
+ Token* NextToken();
274
+
275
+ public:
276
+ Buffer *buffer; // scanner buffer
277
+
278
+ Scanner(const char* buf, size_t len);
279
+ Scanner(const wchar_t* fileName);
280
+ Scanner(FILE* s);
281
+ ~Scanner();
282
+ Token* Scan();
283
+ Token* Peek();
284
+ void ResetPeek();
285
+
286
+ }; // end Scanner
287
+
288
+ -->namespace_close
289
+
290
+ #endif
291
+
292
+ -->implementation
293
+
294
+ /*----------------------------------------------------------------------
295
+ Scanner.cpp Specification
296
+ -----------------------------------------------------------------------*/
297
+
298
+ -->begin
299
+
300
+ #include <memory.h>
301
+ #include <string.h>
302
+ #include "Scanner.h"
303
+
304
+ -->namespace_open
305
+
306
+
307
+ // string handling, wide character
308
+
309
+
310
+ wchar_t* coco_string_create(const wchar_t* value) {
311
+ return coco_string_create(value, 0);
312
+ }
313
+
314
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
315
+ int valueLen = 0;
316
+ int len = 0;
317
+
318
+ if (value) {
319
+ valueLen = wcslen(value);
320
+ len = valueLen - startIndex;
321
+ }
322
+
323
+ return coco_string_create(value, startIndex, len);
324
+ }
325
+
326
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
327
+ int len = 0;
328
+ wchar_t* data;
329
+
330
+ if (value) { len = length; }
331
+ data = new wchar_t[len + 1];
332
+ wcsncpy(data, &(value[startIndex]), len);
333
+ data[len] = 0;
334
+
335
+ return data;
336
+ }
337
+
338
+ wchar_t* coco_string_create_upper(const wchar_t* data) {
339
+ if (!data) { return NULL; }
340
+
341
+ int dataLen = 0;
342
+ if (data) { dataLen = wcslen(data); }
343
+
344
+ wchar_t *newData = new wchar_t[dataLen + 1];
345
+
346
+ for (int i = 0; i <= dataLen; i++) {
347
+ if ((L'a' <= data[i]) && (data[i] <= L'z')) {
348
+ newData[i] = data[i] + (L'A' - L'a');
349
+ }
350
+ else { newData[i] = data[i]; }
351
+ }
352
+
353
+ newData[dataLen] = L'\0';
354
+ return newData;
355
+ }
356
+
357
+ wchar_t* coco_string_create_lower(const wchar_t* data) {
358
+ if (!data) { return NULL; }
359
+ int dataLen = wcslen(data);
360
+ return coco_string_create_lower(data, 0, dataLen);
361
+ }
362
+
363
+ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
364
+ if (!data) { return NULL; }
365
+
366
+ wchar_t* newData = new wchar_t[dataLen + 1];
367
+
368
+ for (int i = 0; i <= dataLen; i++) {
369
+ wchar_t ch = data[startIndex + i];
370
+ if ((L'A' <= ch) && (ch <= L'Z')) {
371
+ newData[i] = ch - (L'A' - L'a');
372
+ }
373
+ else { newData[i] = ch; }
374
+ }
375
+ newData[dataLen] = L'\0';
376
+ return newData;
377
+ }
378
+
379
+ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
380
+ wchar_t* data;
381
+ int data1Len = 0;
382
+ int data2Len = 0;
383
+
384
+ if (data1) { data1Len = wcslen(data1); }
385
+ if (data2) {data2Len = wcslen(data2); }
386
+
387
+ data = new wchar_t[data1Len + data2Len + 1];
388
+
389
+ if (data1) { wcscpy(data, data1); }
390
+ if (data2) { wcscpy(data + data1Len, data2); }
391
+
392
+ data[data1Len + data2Len] = 0;
393
+
394
+ return data;
395
+ }
396
+
397
+ wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
398
+ int targetLen = coco_string_length(target);
399
+ wchar_t* data = new wchar_t[targetLen + 2];
400
+ wcsncpy(data, target, targetLen);
401
+ data[targetLen] = appendix;
402
+ data[targetLen + 1] = 0;
403
+ return data;
404
+ }
405
+
406
+ void coco_string_delete(wchar_t* &data) {
407
+ delete [] data;
408
+ data = NULL;
409
+ }
410
+
411
+ int coco_string_length(const wchar_t* data) {
412
+ if (data) { return wcslen(data); }
413
+ return 0;
414
+ }
415
+
416
+ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
417
+ int dataLen = wcslen(data);
418
+ int endLen = wcslen(end);
419
+ return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
420
+ }
421
+
422
+ int coco_string_indexof(const wchar_t* data, const wchar_t value) {
423
+ const wchar_t* chr = wcschr(data, value);
424
+
425
+ if (chr) { return (chr-data); }
426
+ return -1;
427
+ }
428
+
429
+ int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
430
+ const wchar_t* chr = wcsrchr(data, value);
431
+
432
+ if (chr) { return (chr-data); }
433
+ return -1;
434
+ }
435
+
436
+ void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
437
+ if (!appendix) { return; }
438
+ wchar_t* data = coco_string_create_append(target, appendix);
439
+ delete [] target;
440
+ target = data;
441
+ }
442
+
443
+ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
444
+ return wcscmp( data1, data2 ) == 0;
445
+ }
446
+
447
+ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
448
+ return wcscmp(data1, data2);
449
+ }
450
+
451
+ int coco_string_hash(const wchar_t *data) {
452
+ int h = 0;
453
+ if (!data) { return 0; }
454
+ while (*data != 0) {
455
+ h = (h * 7) ^ *data;
456
+ ++data;
457
+ }
458
+ if (h < 0) { h = -h; }
459
+ return h;
460
+ }
461
+
462
+ // string handling, ascii character
463
+
464
+ wchar_t* coco_string_create(const char* value) {
465
+ int len = 0;
466
+ if (value) { len = strlen(value); }
467
+ wchar_t* data = new wchar_t[len + 1];
468
+ for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
469
+ data[len] = 0;
470
+ return data;
471
+ }
472
+
473
+ char* coco_string_create_char(const wchar_t *value) {
474
+ int len = coco_string_length(value);
475
+ char *res = new char[len + 1];
476
+ for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
477
+ res[len] = 0;
478
+ return res;
479
+ }
480
+
481
+ void coco_string_delete(char* &data) {
482
+ delete [] data;
483
+ data = NULL;
484
+ }
485
+
486
+
487
+ Token::Token() {
488
+ kind = 0;
489
+ pos = 0;
490
+ col = 0;
491
+ line = 0;
492
+ val = NULL;
493
+ next = NULL;
494
+ }
495
+
496
+ Token::~Token() {
497
+ coco_string_delete(val);
498
+ }
499
+
500
+ Buffer::Buffer(FILE* s, bool isUserStream) {
501
+ // ensure binary read on windows
502
+ #if _MSC_VER >= 1300
503
+ _setmode(_fileno(s), _O_BINARY);
504
+ #endif
505
+ stream = s; this->isUserStream = isUserStream;
506
+ if (CanSeek()) {
507
+ fseek(s, 0, SEEK_END);
508
+ fileLen = ftell(s);
509
+ fseek(s, 0, SEEK_SET);
510
+ bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
511
+ bufStart = INT_MAX; // nothing in the buffer so far
512
+ } else {
513
+ fileLen = bufLen = bufStart = 0;
514
+ }
515
+ bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
516
+ buf = new char[bufCapacity];
517
+ if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start)
518
+ else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
519
+ if (bufLen == fileLen && CanSeek()) Close();
520
+ }
521
+
522
+ Buffer::Buffer(Buffer *b) {
523
+ buf = b->buf;
524
+ bufCapacity = b->bufCapacity;
525
+ b->buf = NULL;
526
+ bufStart = b->bufStart;
527
+ bufLen = b->bufLen;
528
+ fileLen = b->fileLen;
529
+ bufPos = b->bufPos;
530
+ stream = b->stream;
531
+ b->stream = NULL;
532
+ isUserStream = b->isUserStream;
533
+ }
534
+
535
+ Buffer::Buffer(const char* buf, size_t len) {
536
+ this->buf = new char[len];
537
+ memcpy(this->buf, buf, len*sizeof(unsigned char));
538
+ bufStart = 0;
539
+ bufCapacity = bufLen = len;
540
+ fileLen = len;
541
+ bufPos = 0;
542
+ stream = NULL;
543
+ }
544
+
545
+ Buffer::~Buffer() {
546
+ Close();
547
+ if (buf != NULL) {
548
+ delete [] buf;
549
+ buf = NULL;
550
+ }
551
+ }
552
+
553
+ void Buffer::Close() {
554
+ if (!isUserStream && stream != NULL) {
555
+ fclose(stream);
556
+ stream = NULL;
557
+ }
558
+ }
559
+
560
+ int Buffer::Read() {
561
+ if (bufPos < bufLen) {
562
+ return buf[bufPos++];
563
+ } else if (GetPos() < fileLen) {
564
+ SetPos(GetPos()); // shift buffer start to Pos
565
+ return buf[bufPos++];
566
+ } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
567
+ return buf[bufPos++];
568
+ } else {
569
+ return EoF;
570
+ }
571
+ }
572
+
573
+ int Buffer::Peek() {
574
+ int curPos = GetPos();
575
+ int ch = Read();
576
+ SetPos(curPos);
577
+ return ch;
578
+ }
579
+
580
+ // beg .. begin, zero-based, inclusive, in byte
581
+ // end .. end, zero-based, exclusive, in byte
582
+ wchar_t* Buffer::GetString(int beg, int end) {
583
+ int len = 0;
584
+ wchar_t *buf = new wchar_t[end - beg];
585
+ int oldPos = GetPos();
586
+ SetPos(beg);
587
+ while (GetPos() < end) buf[len++] = (wchar_t) Read();
588
+ SetPos(oldPos);
589
+ wchar_t *res = coco_string_create(buf, 0, len);
590
+ coco_string_delete(buf);
591
+ return res;
592
+ }
593
+
594
+ int Buffer::GetPos() {
595
+ return bufPos + bufStart;
596
+ }
597
+
598
+ void Buffer::SetPos(int value) {
599
+ if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
600
+ // Wanted position is after buffer and the stream
601
+ // is not seek-able e.g. network or console,
602
+ // thus we have to read the stream manually till
603
+ // the wanted position is in sight.
604
+ while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
605
+ }
606
+
607
+ if ((value < 0) || (value > fileLen)) {
608
+ wprintf(L"--- buffer out of bounds access, position: %d\n", value);
609
+ exit(1);
610
+ }
611
+
612
+ if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
613
+ bufPos = value - bufStart;
614
+ } else if (stream != NULL) { // must be swapped in
615
+ fseek(stream, value, SEEK_SET);
616
+ bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
617
+ bufStart = value; bufPos = 0;
618
+ } else {
619
+ bufPos = fileLen - bufStart; // make Pos return fileLen
620
+ }
621
+ }
622
+
623
+ // Read the next chunk of bytes from the stream, increases the buffer
624
+ // if needed and updates the fields fileLen and bufLen.
625
+ // Returns the number of bytes read.
626
+ int Buffer::ReadNextStreamChunk() {
627
+ int free = bufCapacity - bufLen;
628
+ if (free == 0) {
629
+ // in the case of a growing input stream
630
+ // we can neither seek in the stream, nor can we
631
+ // foresee the maximum length, thus we must adapt
632
+ // the buffer size on demand.
633
+ bufCapacity = bufLen * 2;
634
+ char *newBuf = new char[bufCapacity];
635
+ memcpy(newBuf, buf, bufLen*sizeof(char));
636
+ delete [] buf;
637
+ buf = newBuf;
638
+ free = bufLen;
639
+ }
640
+ int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
641
+ if (read > 0) {
642
+ fileLen = bufLen = (bufLen + read);
643
+ return read;
644
+ }
645
+ // end of stream reached
646
+ return 0;
647
+ }
648
+
649
+ bool Buffer::CanSeek() {
650
+ return (stream != NULL) && (ftell(stream) != -1);
651
+ }
652
+
653
+ int UTF8Buffer::Read() {
654
+ int ch;
655
+ do {
656
+ ch = Buffer::Read();
657
+ // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
658
+ } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
659
+ if (ch < 128 || ch == EoF) {
660
+ // nothing to do, first 127 chars are the same in ascii and utf8
661
+ // 0xxxxxxx or end of file character
662
+ } else if ((ch & 0xF0) == 0xF0) {
663
+ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
664
+ int c1 = ch & 0x07; ch = Buffer::Read();
665
+ int c2 = ch & 0x3F; ch = Buffer::Read();
666
+ int c3 = ch & 0x3F; ch = Buffer::Read();
667
+ int c4 = ch & 0x3F;
668
+ ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
669
+ } else if ((ch & 0xE0) == 0xE0) {
670
+ // 1110xxxx 10xxxxxx 10xxxxxx
671
+ int c1 = ch & 0x0F; ch = Buffer::Read();
672
+ int c2 = ch & 0x3F; ch = Buffer::Read();
673
+ int c3 = ch & 0x3F;
674
+ ch = (((c1 << 6) | c2) << 6) | c3;
675
+ } else if ((ch & 0xC0) == 0xC0) {
676
+ // 110xxxxx 10xxxxxx
677
+ int c1 = ch & 0x1F; ch = Buffer::Read();
678
+ int c2 = ch & 0x3F;
679
+ ch = (c1 << 6) | c2;
680
+ }
681
+ return ch;
682
+ }
683
+
684
+ Scanner::Scanner(const char* buf, size_t len) {
685
+ buffer = new Buffer(buf, len);
686
+ Init();
687
+ }
688
+
689
+ Scanner::Scanner(const wchar_t* fileName) {
690
+ FILE* stream;
691
+ char *chFileName = coco_string_create_char(fileName);
692
+ if ((stream = fopen(chFileName, "rb")) == NULL) {
693
+ wprintf(L"--- Cannot open file %ls\n", fileName);
694
+ exit(1);
695
+ }
696
+ coco_string_delete(chFileName);
697
+ buffer = new Buffer(stream, false);
698
+ Init();
699
+ }
700
+
701
+ Scanner::Scanner(FILE* s) {
702
+ buffer = new Buffer(s, true);
703
+ Init();
704
+ }
705
+
706
+ Scanner::~Scanner() {
707
+ char* cur = (char*) firstHeap;
708
+
709
+ while(cur != NULL) {
710
+ cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
711
+ free(firstHeap);
712
+ firstHeap = cur;
713
+ }
714
+ delete [] tval;
715
+ delete buffer;
716
+ }
717
+
718
+ void Scanner::Init() {
719
+ EOL = '\n';
720
+ eofSym = 0;
721
+ -->declarations
722
+
723
+ tvalLength = 128;
724
+ tval = new wchar_t[tvalLength]; // text of current token
725
+
726
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
727
+ heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
728
+ firstHeap = heap;
729
+ heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
730
+ *heapEnd = 0;
731
+ heapTop = heap;
732
+ if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
733
+ wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
734
+ exit(1);
735
+ }
736
+
737
+ pos = -1; line = 1; col = 0; charPos = -1;
738
+ oldEols = 0;
739
+ NextCh();
740
+ if (ch == 0xEF) { // check optional byte order mark for UTF-8
741
+ NextCh(); int ch1 = ch;
742
+ NextCh(); int ch2 = ch;
743
+ if (ch1 != 0xBB || ch2 != 0xBF) {
744
+ wprintf(L"Illegal byte order mark at start of file");
745
+ exit(1);
746
+ }
747
+ Buffer *oldBuf = buffer;
748
+ buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
749
+ delete oldBuf; oldBuf = NULL;
750
+ NextCh();
751
+ }
752
+
753
+ -->initialization
754
+ pt = tokens = CreateToken(); // first token is a dummy
755
+ }
756
+
757
+ void Scanner::NextCh() {
758
+ if (oldEols > 0) { ch = EOL; oldEols--; }
759
+ else {
760
+ pos = buffer->GetPos();
761
+ // buffer reads unicode chars, if UTF8 has been detected
762
+ ch = buffer->Read(); col++; charPos++;
763
+ // replace isolated '\r' by '\n' in order to make
764
+ // eol handling uniform across Windows, Unix and Mac
765
+ if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
766
+ if (ch == EOL) { line++; col = 0; }
767
+ }
768
+ -->casing1
769
+ }
770
+
771
+ void Scanner::AddCh() {
772
+ if (tlen >= tvalLength) {
773
+ tvalLength *= 2;
774
+ wchar_t *newBuf = new wchar_t[tvalLength];
775
+ memcpy(newBuf, tval, tlen*sizeof(wchar_t));
776
+ delete [] tval;
777
+ tval = newBuf;
778
+ }
779
+ if (ch != Buffer::EoF) {
780
+ -->casing2
781
+ NextCh();
782
+ }
783
+ }
784
+
785
+ -->comments
786
+
787
+ void Scanner::CreateHeapBlock() {
788
+ void* newHeap;
789
+ char* cur = (char*) firstHeap;
790
+
791
+ while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
792
+ cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
793
+ free(firstHeap);
794
+ firstHeap = cur;
795
+ }
796
+
797
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
798
+ newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
799
+ *heapEnd = newHeap;
800
+ heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
801
+ *heapEnd = 0;
802
+ heap = newHeap;
803
+ heapTop = heap;
804
+ }
805
+
806
+ Token* Scanner::CreateToken() {
807
+ Token *t;
808
+ if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
809
+ CreateHeapBlock();
810
+ }
811
+ t = (Token*) heapTop;
812
+ heapTop = (void*) ((char*) heapTop + sizeof(Token));
813
+ t->val = NULL;
814
+ t->next = NULL;
815
+ return t;
816
+ }
817
+
818
+ void Scanner::AppendVal(Token *t) {
819
+ int reqMem = (tlen + 1) * sizeof(wchar_t);
820
+ if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
821
+ if (reqMem > COCO_HEAP_BLOCK_SIZE) {
822
+ wprintf(L"--- Too long token value\n");
823
+ exit(1);
824
+ }
825
+ CreateHeapBlock();
826
+ }
827
+ t->val = (wchar_t*) heapTop;
828
+ heapTop = (void*) ((char*) heapTop + reqMem);
829
+
830
+ wcsncpy(t->val, tval, tlen);
831
+ t->val[tlen] = L'\0';
832
+ }
833
+
834
+ Token* Scanner::NextToken() {
835
+ while (ch == ' ' ||
836
+ -->scan1
837
+ ) NextCh();
838
+ -->scan2
839
+ int recKind = noSym;
840
+ int recEnd = pos;
841
+ t = CreateToken();
842
+ t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
843
+ int state = start.state(ch);
844
+ tlen = 0; AddCh();
845
+
846
+ switch (state) {
847
+ case -1: { t->kind = eofSym; break; } // NextCh already done
848
+ case 0: {
849
+ case_0:
850
+ if (recKind != noSym) {
851
+ tlen = recEnd - t->pos;
852
+ SetScannerBehindT();
853
+ }
854
+ t->kind = recKind; break;
855
+ } // NextCh already done
856
+ -->scan3
857
+ }
858
+ AppendVal(t);
859
+ return t;
860
+ }
861
+
862
+ void Scanner::SetScannerBehindT() {
863
+ buffer->SetPos(t->pos);
864
+ NextCh();
865
+ line = t->line; col = t->col; charPos = t->charPos;
866
+ for (int i = 0; i < tlen; i++) NextCh();
867
+ }
868
+
869
+ // get the next token (possibly a token already seen during peeking)
870
+ Token* Scanner::Scan() {
871
+ if (tokens->next == NULL) {
872
+ return pt = tokens = NextToken();
873
+ } else {
874
+ pt = tokens = tokens->next;
875
+ return tokens;
876
+ }
877
+ }
878
+
879
+ // peek for the next token, ignore pragmas
880
+ Token* Scanner::Peek() {
881
+ do {
882
+ if (pt->next == NULL) {
883
+ pt->next = NextToken();
884
+ }
885
+ pt = pt->next;
886
+ } while (pt->kind > maxT); // skip pragmas
887
+
888
+ return pt;
889
+ }
890
+
891
+ // make sure that peeking starts at the current scan position
892
+ void Scanner::ResetPeek() {
893
+ pt = tokens;
894
+ }
895
+
896
+ -->namespace_close