ruco-cpp 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Rakefile +17 -0
- data/bin/console +14 -0
- data/bin/ruco +30 -0
- data/bin/setup +7 -0
- data/data/ruco/Parser.frame +359 -0
- data/data/ruco/Scanner.frame +896 -0
- data/data/ruco/picojson/Changes +14 -0
- data/data/ruco/picojson/LICENSE +25 -0
- data/data/ruco/picojson/Makefile +8 -0
- data/data/ruco/picojson/README.mkdn +183 -0
- data/data/ruco/picojson/examples/github-issues.cc +110 -0
- data/data/ruco/picojson/examples/iostream.cc +70 -0
- data/data/ruco/picojson/examples/streaming.cc +76 -0
- data/data/ruco/picojson/picojson.h +1299 -0
- data/ext/cocor/Action.cpp +81 -0
- data/ext/cocor/Action.h +59 -0
- data/ext/cocor/ArrayList.cpp +79 -0
- data/ext/cocor/ArrayList.h +52 -0
- data/ext/cocor/BitArray.cpp +156 -0
- data/ext/cocor/BitArray.h +68 -0
- data/ext/cocor/CharClass.cpp +42 -0
- data/ext/cocor/CharClass.h +48 -0
- data/ext/cocor/CharSet.cpp +166 -0
- data/ext/cocor/CharSet.h +68 -0
- data/ext/cocor/Coco.atg +528 -0
- data/ext/cocor/Coco.cpp +173 -0
- data/ext/cocor/Comment.cpp +45 -0
- data/ext/cocor/Comment.h +51 -0
- data/ext/cocor/Copyright.frame +27 -0
- data/ext/cocor/DFA.cpp +865 -0
- data/ext/cocor/DFA.h +132 -0
- data/ext/cocor/Generator.cpp +182 -0
- data/ext/cocor/Generator.h +61 -0
- data/ext/cocor/Graph.h +59 -0
- data/ext/cocor/HashTable.cpp +115 -0
- data/ext/cocor/HashTable.h +84 -0
- data/ext/cocor/Makefile +11 -0
- data/ext/cocor/Melted.cpp +39 -0
- data/ext/cocor/Melted.h +51 -0
- data/ext/cocor/Node.cpp +69 -0
- data/ext/cocor/Node.h +86 -0
- data/ext/cocor/Parser.cpp +925 -0
- data/ext/cocor/Parser.frame +326 -0
- data/ext/cocor/Parser.h +153 -0
- data/ext/cocor/ParserGen.cpp +486 -0
- data/ext/cocor/ParserGen.h +99 -0
- data/ext/cocor/Position.cpp +37 -0
- data/ext/cocor/Position.h +46 -0
- data/ext/cocor/README.md +12 -0
- data/ext/cocor/Scanner.cpp +833 -0
- data/ext/cocor/Scanner.frame +897 -0
- data/ext/cocor/Scanner.h +291 -0
- data/ext/cocor/Sets.h +84 -0
- data/ext/cocor/SortedList.cpp +141 -0
- data/ext/cocor/SortedList.h +68 -0
- data/ext/cocor/State.cpp +77 -0
- data/ext/cocor/State.h +55 -0
- data/ext/cocor/StringBuilder.cpp +88 -0
- data/ext/cocor/StringBuilder.h +29 -0
- data/ext/cocor/Symbol.cpp +61 -0
- data/ext/cocor/Symbol.h +70 -0
- data/ext/cocor/Tab.cpp +1248 -0
- data/ext/cocor/Tab.h +245 -0
- data/ext/cocor/Target.cpp +41 -0
- data/ext/cocor/Target.h +48 -0
- data/ext/cocor/build.bat +3 -0
- data/ext/cocor/build.sh +4 -0
- data/ext/cocor/coc.bat +1 -0
- data/ext/cocor/coc.sh +2 -0
- data/ext/cocor/cocor_ruby_ext.cpp +124 -0
- data/ext/cocor/cygBuild.bat +1 -0
- data/ext/cocor/extconf.rb +5 -0
- data/ext/cocor/mingwbuild.bat +2 -0
- data/ext/cocor/mkmf.log +57 -0
- data/ext/cocor/zipsources.bat +1 -0
- data/lib/cocor.rb +14 -0
- data/lib/ruco/version.rb +3 -0
- data/lib/ruco.rb +728 -0
- metadata +195 -0
@@ -0,0 +1,896 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
Compiler Generator Coco/R,
|
3
|
+
Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
|
4
|
+
extended by M. Loeberbauer & A. Woess, Univ. of Linz
|
5
|
+
ported to C++ by Csaba Balazs, University of Szeged
|
6
|
+
with improvements by Pat Terry, Rhodes University
|
7
|
+
|
8
|
+
This program is free software; you can redistribute it and/or modify it
|
9
|
+
under the terms of the GNU General Public License as published by the
|
10
|
+
Free Software Foundation; either version 2, or (at your option) any
|
11
|
+
later version.
|
12
|
+
|
13
|
+
This program is distributed in the hope that it will be useful, but
|
14
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
15
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
16
|
+
for more details.
|
17
|
+
|
18
|
+
You should have received a copy of the GNU General Public License along
|
19
|
+
with this program; if not, write to the Free Software Foundation, Inc.,
|
20
|
+
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
21
|
+
|
22
|
+
As an exception, it is allowed to write an extension of Coco/R that is
|
23
|
+
used as a plugin in non-free software.
|
24
|
+
|
25
|
+
If not otherwise stated, any source code generated by Coco/R (other than
|
26
|
+
Coco/R itself) does not fall under the GNU General Public License.
|
27
|
+
-----------------------------------------------------------------------*/
|
28
|
+
|
29
|
+
/*----------------------------------------------------------------------
|
30
|
+
Scanner.h Specification
|
31
|
+
-----------------------------------------------------------------------*/
|
32
|
+
|
33
|
+
-->begin
|
34
|
+
|
35
|
+
#if !defined(-->prefixCOCO_SCANNER_H__)
|
36
|
+
#define -->prefixCOCO_SCANNER_H__
|
37
|
+
|
38
|
+
#include <limits.h>
|
39
|
+
#include <stdio.h>
|
40
|
+
#include <stdlib.h>
|
41
|
+
#include <string.h>
|
42
|
+
#include <wchar.h>
|
43
|
+
|
44
|
+
// io.h and fcntl are used to ensure binary read from streams on windows
|
45
|
+
#if _MSC_VER >= 1300
|
46
|
+
#include <io.h>
|
47
|
+
#include <fcntl.h>
|
48
|
+
#endif
|
49
|
+
|
50
|
+
#if _MSC_VER >= 1400
|
51
|
+
#define coco_swprintf swprintf_s
|
52
|
+
#elif _MSC_VER >= 1300
|
53
|
+
#define coco_swprintf _snwprintf
|
54
|
+
#elif defined __MINGW32__
|
55
|
+
#define coco_swprintf _snwprintf
|
56
|
+
#else
|
57
|
+
// assume every other compiler knows swprintf
|
58
|
+
#define coco_swprintf swprintf
|
59
|
+
#endif
|
60
|
+
|
61
|
+
#define COCO_WCHAR_MAX 65535
|
62
|
+
#define COCO_MIN_BUFFER_LENGTH 1024
|
63
|
+
#define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH)
|
64
|
+
#define COCO_HEAP_BLOCK_SIZE (64*1024)
|
65
|
+
#define COCO_CPP_NAMESPACE_SEPARATOR L':'
|
66
|
+
|
67
|
+
-->namespace_open
|
68
|
+
|
69
|
+
// string handling, wide character
|
70
|
+
wchar_t* coco_string_create(const wchar_t *value);
|
71
|
+
wchar_t* coco_string_create(const wchar_t *value, int startIndex);
|
72
|
+
wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length);
|
73
|
+
wchar_t* coco_string_create_upper(const wchar_t* data);
|
74
|
+
wchar_t* coco_string_create_lower(const wchar_t* data);
|
75
|
+
wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen);
|
76
|
+
wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2);
|
77
|
+
wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value);
|
78
|
+
void coco_string_delete(wchar_t* &data);
|
79
|
+
int coco_string_length(const wchar_t* data);
|
80
|
+
bool coco_string_endswith(const wchar_t* data, const wchar_t *value);
|
81
|
+
int coco_string_indexof(const wchar_t* data, const wchar_t value);
|
82
|
+
int coco_string_lastindexof(const wchar_t* data, const wchar_t value);
|
83
|
+
void coco_string_merge(wchar_t* &data, const wchar_t* value);
|
84
|
+
bool coco_string_equal(const wchar_t* data1, const wchar_t* data2);
|
85
|
+
int coco_string_compareto(const wchar_t* data1, const wchar_t* data2);
|
86
|
+
int coco_string_hash(const wchar_t* data);
|
87
|
+
|
88
|
+
// string handling, ascii character
|
89
|
+
wchar_t* coco_string_create(const char *value);
|
90
|
+
char* coco_string_create_char(const wchar_t *value);
|
91
|
+
void coco_string_delete(char* &data);
|
92
|
+
|
93
|
+
|
94
|
+
class Token
|
95
|
+
{
|
96
|
+
public:
|
97
|
+
int kind; // token kind
|
98
|
+
int pos; // token position in bytes in the source text (starting at 0)
|
99
|
+
int charPos; // token position in characters in the source text (starting at 0)
|
100
|
+
int col; // token column (starting at 1)
|
101
|
+
int line; // token line (starting at 1)
|
102
|
+
wchar_t* val; // token value
|
103
|
+
Token *next; // ML 2005-03-11 Peek tokens are kept in linked list
|
104
|
+
|
105
|
+
Token();
|
106
|
+
~Token();
|
107
|
+
};
|
108
|
+
|
109
|
+
class Buffer {
|
110
|
+
// This Buffer supports the following cases:
|
111
|
+
// 1) seekable stream (file)
|
112
|
+
// a) whole stream in buffer
|
113
|
+
// b) part of stream in buffer
|
114
|
+
// 2) non seekable stream (network, console)
|
115
|
+
private:
|
116
|
+
char *buf; // input buffer
|
117
|
+
int bufCapacity; // capacity of buf
|
118
|
+
int bufStart; // position of first byte in buffer relative to input stream
|
119
|
+
int bufLen; // length of buffer
|
120
|
+
int fileLen; // length of input stream (may change if the stream is no file)
|
121
|
+
int bufPos; // current position in buffer
|
122
|
+
FILE* stream; // input stream (seekable)
|
123
|
+
bool isUserStream; // was the stream opened by the user?
|
124
|
+
|
125
|
+
int ReadNextStreamChunk();
|
126
|
+
bool CanSeek(); // true if stream can be seeked otherwise false
|
127
|
+
|
128
|
+
public:
|
129
|
+
static const int EoF = COCO_WCHAR_MAX + 1;
|
130
|
+
|
131
|
+
Buffer(FILE* s, bool isUserStream);
|
132
|
+
Buffer(const char* buf, size_t len);
|
133
|
+
Buffer(Buffer *b);
|
134
|
+
virtual ~Buffer();
|
135
|
+
|
136
|
+
virtual void Close();
|
137
|
+
virtual int Read();
|
138
|
+
virtual int Peek();
|
139
|
+
virtual wchar_t* GetString(int beg, int end);
|
140
|
+
virtual int GetPos();
|
141
|
+
virtual void SetPos(int value);
|
142
|
+
};
|
143
|
+
|
144
|
+
class UTF8Buffer : public Buffer {
|
145
|
+
public:
|
146
|
+
UTF8Buffer(Buffer *b) : Buffer(b) {};
|
147
|
+
virtual int Read();
|
148
|
+
};
|
149
|
+
|
150
|
+
//-----------------------------------------------------------------------------------
|
151
|
+
// StartStates -- maps characters to start states of tokens
|
152
|
+
//-----------------------------------------------------------------------------------
|
153
|
+
class StartStates {
|
154
|
+
private:
|
155
|
+
class Elem {
|
156
|
+
public:
|
157
|
+
int key, val;
|
158
|
+
Elem *next;
|
159
|
+
Elem(int key, int val) { this->key = key; this->val = val; next = NULL; }
|
160
|
+
};
|
161
|
+
|
162
|
+
Elem **tab;
|
163
|
+
|
164
|
+
public:
|
165
|
+
StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
|
166
|
+
virtual ~StartStates() {
|
167
|
+
for (int i = 0; i < 128; ++i) {
|
168
|
+
Elem *e = tab[i];
|
169
|
+
while (e != NULL) {
|
170
|
+
Elem *next = e->next;
|
171
|
+
delete e;
|
172
|
+
e = next;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
delete [] tab;
|
176
|
+
}
|
177
|
+
|
178
|
+
void set(int key, int val) {
|
179
|
+
Elem *e = new Elem(key, val);
|
180
|
+
int k = ((unsigned int) key) % 128;
|
181
|
+
e->next = tab[k]; tab[k] = e;
|
182
|
+
}
|
183
|
+
|
184
|
+
int state(int key) {
|
185
|
+
Elem *e = tab[((unsigned int) key) % 128];
|
186
|
+
while (e != NULL && e->key != key) e = e->next;
|
187
|
+
return e == NULL ? 0 : e->val;
|
188
|
+
}
|
189
|
+
};
|
190
|
+
|
191
|
+
//-------------------------------------------------------------------------------------------
|
192
|
+
// KeywordMap -- maps strings to integers (identifiers to keyword kinds)
|
193
|
+
//-------------------------------------------------------------------------------------------
|
194
|
+
class KeywordMap {
|
195
|
+
private:
|
196
|
+
class Elem {
|
197
|
+
public:
|
198
|
+
wchar_t *key;
|
199
|
+
int val;
|
200
|
+
Elem *next;
|
201
|
+
Elem(const wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; }
|
202
|
+
virtual ~Elem() { coco_string_delete(key); }
|
203
|
+
};
|
204
|
+
|
205
|
+
Elem **tab;
|
206
|
+
|
207
|
+
public:
|
208
|
+
KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
|
209
|
+
virtual ~KeywordMap() {
|
210
|
+
for (int i = 0; i < 128; ++i) {
|
211
|
+
Elem *e = tab[i];
|
212
|
+
while (e != NULL) {
|
213
|
+
Elem *next = e->next;
|
214
|
+
delete e;
|
215
|
+
e = next;
|
216
|
+
}
|
217
|
+
}
|
218
|
+
delete [] tab;
|
219
|
+
}
|
220
|
+
|
221
|
+
void set(const wchar_t *key, int val) {
|
222
|
+
Elem *e = new Elem(key, val);
|
223
|
+
int k = coco_string_hash(key) % 128;
|
224
|
+
e->next = tab[k]; tab[k] = e;
|
225
|
+
}
|
226
|
+
|
227
|
+
int get(const wchar_t *key, int defaultVal) {
|
228
|
+
Elem *e = tab[coco_string_hash(key) % 128];
|
229
|
+
while (e != NULL && !coco_string_equal(e->key, key)) e = e->next;
|
230
|
+
return e == NULL ? defaultVal : e->val;
|
231
|
+
}
|
232
|
+
};
|
233
|
+
|
234
|
+
class Scanner {
|
235
|
+
private:
|
236
|
+
void *firstHeap;
|
237
|
+
void *heap;
|
238
|
+
void *heapTop;
|
239
|
+
void **heapEnd;
|
240
|
+
|
241
|
+
unsigned char EOL;
|
242
|
+
int eofSym;
|
243
|
+
int noSym;
|
244
|
+
int maxT;
|
245
|
+
StartStates start;
|
246
|
+
KeywordMap keywords;
|
247
|
+
|
248
|
+
Token *t; // current token
|
249
|
+
wchar_t *tval; // text of current token
|
250
|
+
int tvalLength; // length of text of current token
|
251
|
+
int tlen; // length of current token
|
252
|
+
|
253
|
+
Token *tokens; // list of tokens already peeked (first token is a dummy)
|
254
|
+
Token *pt; // current peek token
|
255
|
+
|
256
|
+
int ch; // current input character
|
257
|
+
-->casing0
|
258
|
+
int pos; // byte position of current character
|
259
|
+
int charPos; // position by unicode characters starting with 0
|
260
|
+
int line; // line number of current character
|
261
|
+
int col; // column number of current character
|
262
|
+
int oldEols; // EOLs that appeared in a comment;
|
263
|
+
|
264
|
+
void CreateHeapBlock();
|
265
|
+
Token* CreateToken();
|
266
|
+
void AppendVal(Token *t);
|
267
|
+
void SetScannerBehindT();
|
268
|
+
|
269
|
+
void Init();
|
270
|
+
void NextCh();
|
271
|
+
void AddCh();
|
272
|
+
-->commentsheader
|
273
|
+
Token* NextToken();
|
274
|
+
|
275
|
+
public:
|
276
|
+
Buffer *buffer; // scanner buffer
|
277
|
+
|
278
|
+
Scanner(const char* buf, size_t len);
|
279
|
+
Scanner(const wchar_t* fileName);
|
280
|
+
Scanner(FILE* s);
|
281
|
+
~Scanner();
|
282
|
+
Token* Scan();
|
283
|
+
Token* Peek();
|
284
|
+
void ResetPeek();
|
285
|
+
|
286
|
+
}; // end Scanner
|
287
|
+
|
288
|
+
-->namespace_close
|
289
|
+
|
290
|
+
#endif
|
291
|
+
|
292
|
+
-->implementation
|
293
|
+
|
294
|
+
/*----------------------------------------------------------------------
|
295
|
+
Scanner.cpp Specification
|
296
|
+
-----------------------------------------------------------------------*/
|
297
|
+
|
298
|
+
-->begin
|
299
|
+
|
300
|
+
#include <memory.h>
|
301
|
+
#include <string.h>
|
302
|
+
#include "Scanner.h"
|
303
|
+
|
304
|
+
-->namespace_open
|
305
|
+
|
306
|
+
|
307
|
+
// string handling, wide character
|
308
|
+
|
309
|
+
|
310
|
+
wchar_t* coco_string_create(const wchar_t* value) {
|
311
|
+
return coco_string_create(value, 0);
|
312
|
+
}
|
313
|
+
|
314
|
+
wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
|
315
|
+
int valueLen = 0;
|
316
|
+
int len = 0;
|
317
|
+
|
318
|
+
if (value) {
|
319
|
+
valueLen = wcslen(value);
|
320
|
+
len = valueLen - startIndex;
|
321
|
+
}
|
322
|
+
|
323
|
+
return coco_string_create(value, startIndex, len);
|
324
|
+
}
|
325
|
+
|
326
|
+
wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
|
327
|
+
int len = 0;
|
328
|
+
wchar_t* data;
|
329
|
+
|
330
|
+
if (value) { len = length; }
|
331
|
+
data = new wchar_t[len + 1];
|
332
|
+
wcsncpy(data, &(value[startIndex]), len);
|
333
|
+
data[len] = 0;
|
334
|
+
|
335
|
+
return data;
|
336
|
+
}
|
337
|
+
|
338
|
+
wchar_t* coco_string_create_upper(const wchar_t* data) {
|
339
|
+
if (!data) { return NULL; }
|
340
|
+
|
341
|
+
int dataLen = 0;
|
342
|
+
if (data) { dataLen = wcslen(data); }
|
343
|
+
|
344
|
+
wchar_t *newData = new wchar_t[dataLen + 1];
|
345
|
+
|
346
|
+
for (int i = 0; i <= dataLen; i++) {
|
347
|
+
if ((L'a' <= data[i]) && (data[i] <= L'z')) {
|
348
|
+
newData[i] = data[i] + (L'A' - L'a');
|
349
|
+
}
|
350
|
+
else { newData[i] = data[i]; }
|
351
|
+
}
|
352
|
+
|
353
|
+
newData[dataLen] = L'\0';
|
354
|
+
return newData;
|
355
|
+
}
|
356
|
+
|
357
|
+
wchar_t* coco_string_create_lower(const wchar_t* data) {
|
358
|
+
if (!data) { return NULL; }
|
359
|
+
int dataLen = wcslen(data);
|
360
|
+
return coco_string_create_lower(data, 0, dataLen);
|
361
|
+
}
|
362
|
+
|
363
|
+
wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
|
364
|
+
if (!data) { return NULL; }
|
365
|
+
|
366
|
+
wchar_t* newData = new wchar_t[dataLen + 1];
|
367
|
+
|
368
|
+
for (int i = 0; i <= dataLen; i++) {
|
369
|
+
wchar_t ch = data[startIndex + i];
|
370
|
+
if ((L'A' <= ch) && (ch <= L'Z')) {
|
371
|
+
newData[i] = ch - (L'A' - L'a');
|
372
|
+
}
|
373
|
+
else { newData[i] = ch; }
|
374
|
+
}
|
375
|
+
newData[dataLen] = L'\0';
|
376
|
+
return newData;
|
377
|
+
}
|
378
|
+
|
379
|
+
wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
|
380
|
+
wchar_t* data;
|
381
|
+
int data1Len = 0;
|
382
|
+
int data2Len = 0;
|
383
|
+
|
384
|
+
if (data1) { data1Len = wcslen(data1); }
|
385
|
+
if (data2) {data2Len = wcslen(data2); }
|
386
|
+
|
387
|
+
data = new wchar_t[data1Len + data2Len + 1];
|
388
|
+
|
389
|
+
if (data1) { wcscpy(data, data1); }
|
390
|
+
if (data2) { wcscpy(data + data1Len, data2); }
|
391
|
+
|
392
|
+
data[data1Len + data2Len] = 0;
|
393
|
+
|
394
|
+
return data;
|
395
|
+
}
|
396
|
+
|
397
|
+
wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
|
398
|
+
int targetLen = coco_string_length(target);
|
399
|
+
wchar_t* data = new wchar_t[targetLen + 2];
|
400
|
+
wcsncpy(data, target, targetLen);
|
401
|
+
data[targetLen] = appendix;
|
402
|
+
data[targetLen + 1] = 0;
|
403
|
+
return data;
|
404
|
+
}
|
405
|
+
|
406
|
+
void coco_string_delete(wchar_t* &data) {
|
407
|
+
delete [] data;
|
408
|
+
data = NULL;
|
409
|
+
}
|
410
|
+
|
411
|
+
int coco_string_length(const wchar_t* data) {
|
412
|
+
if (data) { return wcslen(data); }
|
413
|
+
return 0;
|
414
|
+
}
|
415
|
+
|
416
|
+
bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
|
417
|
+
int dataLen = wcslen(data);
|
418
|
+
int endLen = wcslen(end);
|
419
|
+
return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
|
420
|
+
}
|
421
|
+
|
422
|
+
int coco_string_indexof(const wchar_t* data, const wchar_t value) {
|
423
|
+
const wchar_t* chr = wcschr(data, value);
|
424
|
+
|
425
|
+
if (chr) { return (chr-data); }
|
426
|
+
return -1;
|
427
|
+
}
|
428
|
+
|
429
|
+
int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
|
430
|
+
const wchar_t* chr = wcsrchr(data, value);
|
431
|
+
|
432
|
+
if (chr) { return (chr-data); }
|
433
|
+
return -1;
|
434
|
+
}
|
435
|
+
|
436
|
+
void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
|
437
|
+
if (!appendix) { return; }
|
438
|
+
wchar_t* data = coco_string_create_append(target, appendix);
|
439
|
+
delete [] target;
|
440
|
+
target = data;
|
441
|
+
}
|
442
|
+
|
443
|
+
bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
|
444
|
+
return wcscmp( data1, data2 ) == 0;
|
445
|
+
}
|
446
|
+
|
447
|
+
int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
|
448
|
+
return wcscmp(data1, data2);
|
449
|
+
}
|
450
|
+
|
451
|
+
int coco_string_hash(const wchar_t *data) {
|
452
|
+
int h = 0;
|
453
|
+
if (!data) { return 0; }
|
454
|
+
while (*data != 0) {
|
455
|
+
h = (h * 7) ^ *data;
|
456
|
+
++data;
|
457
|
+
}
|
458
|
+
if (h < 0) { h = -h; }
|
459
|
+
return h;
|
460
|
+
}
|
461
|
+
|
462
|
+
// string handling, ascii character
|
463
|
+
|
464
|
+
wchar_t* coco_string_create(const char* value) {
|
465
|
+
int len = 0;
|
466
|
+
if (value) { len = strlen(value); }
|
467
|
+
wchar_t* data = new wchar_t[len + 1];
|
468
|
+
for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
|
469
|
+
data[len] = 0;
|
470
|
+
return data;
|
471
|
+
}
|
472
|
+
|
473
|
+
char* coco_string_create_char(const wchar_t *value) {
|
474
|
+
int len = coco_string_length(value);
|
475
|
+
char *res = new char[len + 1];
|
476
|
+
for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
|
477
|
+
res[len] = 0;
|
478
|
+
return res;
|
479
|
+
}
|
480
|
+
|
481
|
+
void coco_string_delete(char* &data) {
|
482
|
+
delete [] data;
|
483
|
+
data = NULL;
|
484
|
+
}
|
485
|
+
|
486
|
+
|
487
|
+
Token::Token() {
|
488
|
+
kind = 0;
|
489
|
+
pos = 0;
|
490
|
+
col = 0;
|
491
|
+
line = 0;
|
492
|
+
val = NULL;
|
493
|
+
next = NULL;
|
494
|
+
}
|
495
|
+
|
496
|
+
Token::~Token() {
|
497
|
+
coco_string_delete(val);
|
498
|
+
}
|
499
|
+
|
500
|
+
Buffer::Buffer(FILE* s, bool isUserStream) {
|
501
|
+
// ensure binary read on windows
|
502
|
+
#if _MSC_VER >= 1300
|
503
|
+
_setmode(_fileno(s), _O_BINARY);
|
504
|
+
#endif
|
505
|
+
stream = s; this->isUserStream = isUserStream;
|
506
|
+
if (CanSeek()) {
|
507
|
+
fseek(s, 0, SEEK_END);
|
508
|
+
fileLen = ftell(s);
|
509
|
+
fseek(s, 0, SEEK_SET);
|
510
|
+
bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
|
511
|
+
bufStart = INT_MAX; // nothing in the buffer so far
|
512
|
+
} else {
|
513
|
+
fileLen = bufLen = bufStart = 0;
|
514
|
+
}
|
515
|
+
bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
|
516
|
+
buf = new char[bufCapacity];
|
517
|
+
if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start)
|
518
|
+
else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
|
519
|
+
if (bufLen == fileLen && CanSeek()) Close();
|
520
|
+
}
|
521
|
+
|
522
|
+
Buffer::Buffer(Buffer *b) {
|
523
|
+
buf = b->buf;
|
524
|
+
bufCapacity = b->bufCapacity;
|
525
|
+
b->buf = NULL;
|
526
|
+
bufStart = b->bufStart;
|
527
|
+
bufLen = b->bufLen;
|
528
|
+
fileLen = b->fileLen;
|
529
|
+
bufPos = b->bufPos;
|
530
|
+
stream = b->stream;
|
531
|
+
b->stream = NULL;
|
532
|
+
isUserStream = b->isUserStream;
|
533
|
+
}
|
534
|
+
|
535
|
+
Buffer::Buffer(const char* buf, size_t len) {
|
536
|
+
this->buf = new char[len];
|
537
|
+
memcpy(this->buf, buf, len*sizeof(unsigned char));
|
538
|
+
bufStart = 0;
|
539
|
+
bufCapacity = bufLen = len;
|
540
|
+
fileLen = len;
|
541
|
+
bufPos = 0;
|
542
|
+
stream = NULL;
|
543
|
+
}
|
544
|
+
|
545
|
+
Buffer::~Buffer() {
|
546
|
+
Close();
|
547
|
+
if (buf != NULL) {
|
548
|
+
delete [] buf;
|
549
|
+
buf = NULL;
|
550
|
+
}
|
551
|
+
}
|
552
|
+
|
553
|
+
void Buffer::Close() {
|
554
|
+
if (!isUserStream && stream != NULL) {
|
555
|
+
fclose(stream);
|
556
|
+
stream = NULL;
|
557
|
+
}
|
558
|
+
}
|
559
|
+
|
560
|
+
int Buffer::Read() {
|
561
|
+
if (bufPos < bufLen) {
|
562
|
+
return buf[bufPos++];
|
563
|
+
} else if (GetPos() < fileLen) {
|
564
|
+
SetPos(GetPos()); // shift buffer start to Pos
|
565
|
+
return buf[bufPos++];
|
566
|
+
} else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
|
567
|
+
return buf[bufPos++];
|
568
|
+
} else {
|
569
|
+
return EoF;
|
570
|
+
}
|
571
|
+
}
|
572
|
+
|
573
|
+
int Buffer::Peek() {
|
574
|
+
int curPos = GetPos();
|
575
|
+
int ch = Read();
|
576
|
+
SetPos(curPos);
|
577
|
+
return ch;
|
578
|
+
}
|
579
|
+
|
580
|
+
// beg .. begin, zero-based, inclusive, in byte
|
581
|
+
// end .. end, zero-based, exclusive, in byte
|
582
|
+
wchar_t* Buffer::GetString(int beg, int end) {
|
583
|
+
int len = 0;
|
584
|
+
wchar_t *buf = new wchar_t[end - beg];
|
585
|
+
int oldPos = GetPos();
|
586
|
+
SetPos(beg);
|
587
|
+
while (GetPos() < end) buf[len++] = (wchar_t) Read();
|
588
|
+
SetPos(oldPos);
|
589
|
+
wchar_t *res = coco_string_create(buf, 0, len);
|
590
|
+
coco_string_delete(buf);
|
591
|
+
return res;
|
592
|
+
}
|
593
|
+
|
594
|
+
int Buffer::GetPos() {
|
595
|
+
return bufPos + bufStart;
|
596
|
+
}
|
597
|
+
|
598
|
+
void Buffer::SetPos(int value) {
|
599
|
+
if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
|
600
|
+
// Wanted position is after buffer and the stream
|
601
|
+
// is not seek-able e.g. network or console,
|
602
|
+
// thus we have to read the stream manually till
|
603
|
+
// the wanted position is in sight.
|
604
|
+
while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
|
605
|
+
}
|
606
|
+
|
607
|
+
if ((value < 0) || (value > fileLen)) {
|
608
|
+
wprintf(L"--- buffer out of bounds access, position: %d\n", value);
|
609
|
+
exit(1);
|
610
|
+
}
|
611
|
+
|
612
|
+
if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
|
613
|
+
bufPos = value - bufStart;
|
614
|
+
} else if (stream != NULL) { // must be swapped in
|
615
|
+
fseek(stream, value, SEEK_SET);
|
616
|
+
bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
|
617
|
+
bufStart = value; bufPos = 0;
|
618
|
+
} else {
|
619
|
+
bufPos = fileLen - bufStart; // make Pos return fileLen
|
620
|
+
}
|
621
|
+
}
|
622
|
+
|
623
|
+
// Read the next chunk of bytes from the stream, increases the buffer
|
624
|
+
// if needed and updates the fields fileLen and bufLen.
|
625
|
+
// Returns the number of bytes read.
|
626
|
+
int Buffer::ReadNextStreamChunk() {
|
627
|
+
int free = bufCapacity - bufLen;
|
628
|
+
if (free == 0) {
|
629
|
+
// in the case of a growing input stream
|
630
|
+
// we can neither seek in the stream, nor can we
|
631
|
+
// foresee the maximum length, thus we must adapt
|
632
|
+
// the buffer size on demand.
|
633
|
+
bufCapacity = bufLen * 2;
|
634
|
+
char *newBuf = new char[bufCapacity];
|
635
|
+
memcpy(newBuf, buf, bufLen*sizeof(char));
|
636
|
+
delete [] buf;
|
637
|
+
buf = newBuf;
|
638
|
+
free = bufLen;
|
639
|
+
}
|
640
|
+
int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
|
641
|
+
if (read > 0) {
|
642
|
+
fileLen = bufLen = (bufLen + read);
|
643
|
+
return read;
|
644
|
+
}
|
645
|
+
// end of stream reached
|
646
|
+
return 0;
|
647
|
+
}
|
648
|
+
|
649
|
+
bool Buffer::CanSeek() {
|
650
|
+
return (stream != NULL) && (ftell(stream) != -1);
|
651
|
+
}
|
652
|
+
|
653
|
+
int UTF8Buffer::Read() {
|
654
|
+
int ch;
|
655
|
+
do {
|
656
|
+
ch = Buffer::Read();
|
657
|
+
// until we find a utf8 start (0xxxxxxx or 11xxxxxx)
|
658
|
+
} while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
|
659
|
+
if (ch < 128 || ch == EoF) {
|
660
|
+
// nothing to do, first 127 chars are the same in ascii and utf8
|
661
|
+
// 0xxxxxxx or end of file character
|
662
|
+
} else if ((ch & 0xF0) == 0xF0) {
|
663
|
+
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
664
|
+
int c1 = ch & 0x07; ch = Buffer::Read();
|
665
|
+
int c2 = ch & 0x3F; ch = Buffer::Read();
|
666
|
+
int c3 = ch & 0x3F; ch = Buffer::Read();
|
667
|
+
int c4 = ch & 0x3F;
|
668
|
+
ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
|
669
|
+
} else if ((ch & 0xE0) == 0xE0) {
|
670
|
+
// 1110xxxx 10xxxxxx 10xxxxxx
|
671
|
+
int c1 = ch & 0x0F; ch = Buffer::Read();
|
672
|
+
int c2 = ch & 0x3F; ch = Buffer::Read();
|
673
|
+
int c3 = ch & 0x3F;
|
674
|
+
ch = (((c1 << 6) | c2) << 6) | c3;
|
675
|
+
} else if ((ch & 0xC0) == 0xC0) {
|
676
|
+
// 110xxxxx 10xxxxxx
|
677
|
+
int c1 = ch & 0x1F; ch = Buffer::Read();
|
678
|
+
int c2 = ch & 0x3F;
|
679
|
+
ch = (c1 << 6) | c2;
|
680
|
+
}
|
681
|
+
return ch;
|
682
|
+
}
|
683
|
+
|
684
|
+
Scanner::Scanner(const char* buf, size_t len) {
|
685
|
+
buffer = new Buffer(buf, len);
|
686
|
+
Init();
|
687
|
+
}
|
688
|
+
|
689
|
+
Scanner::Scanner(const wchar_t* fileName) {
|
690
|
+
FILE* stream;
|
691
|
+
char *chFileName = coco_string_create_char(fileName);
|
692
|
+
if ((stream = fopen(chFileName, "rb")) == NULL) {
|
693
|
+
wprintf(L"--- Cannot open file %ls\n", fileName);
|
694
|
+
exit(1);
|
695
|
+
}
|
696
|
+
coco_string_delete(chFileName);
|
697
|
+
buffer = new Buffer(stream, false);
|
698
|
+
Init();
|
699
|
+
}
|
700
|
+
|
701
|
+
Scanner::Scanner(FILE* s) {
|
702
|
+
buffer = new Buffer(s, true);
|
703
|
+
Init();
|
704
|
+
}
|
705
|
+
|
706
|
+
Scanner::~Scanner() {
|
707
|
+
char* cur = (char*) firstHeap;
|
708
|
+
|
709
|
+
while(cur != NULL) {
|
710
|
+
cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
|
711
|
+
free(firstHeap);
|
712
|
+
firstHeap = cur;
|
713
|
+
}
|
714
|
+
delete [] tval;
|
715
|
+
delete buffer;
|
716
|
+
}
|
717
|
+
|
718
|
+
void Scanner::Init() {
|
719
|
+
EOL = '\n';
|
720
|
+
eofSym = 0;
|
721
|
+
-->declarations
|
722
|
+
|
723
|
+
tvalLength = 128;
|
724
|
+
tval = new wchar_t[tvalLength]; // text of current token
|
725
|
+
|
726
|
+
// COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
|
727
|
+
heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
|
728
|
+
firstHeap = heap;
|
729
|
+
heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
|
730
|
+
*heapEnd = 0;
|
731
|
+
heapTop = heap;
|
732
|
+
if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
|
733
|
+
wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
|
734
|
+
exit(1);
|
735
|
+
}
|
736
|
+
|
737
|
+
pos = -1; line = 1; col = 0; charPos = -1;
|
738
|
+
oldEols = 0;
|
739
|
+
NextCh();
|
740
|
+
if (ch == 0xEF) { // check optional byte order mark for UTF-8
|
741
|
+
NextCh(); int ch1 = ch;
|
742
|
+
NextCh(); int ch2 = ch;
|
743
|
+
if (ch1 != 0xBB || ch2 != 0xBF) {
|
744
|
+
wprintf(L"Illegal byte order mark at start of file");
|
745
|
+
exit(1);
|
746
|
+
}
|
747
|
+
Buffer *oldBuf = buffer;
|
748
|
+
buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
|
749
|
+
delete oldBuf; oldBuf = NULL;
|
750
|
+
NextCh();
|
751
|
+
}
|
752
|
+
|
753
|
+
-->initialization
|
754
|
+
pt = tokens = CreateToken(); // first token is a dummy
|
755
|
+
}
|
756
|
+
|
757
|
+
void Scanner::NextCh() {
|
758
|
+
if (oldEols > 0) { ch = EOL; oldEols--; }
|
759
|
+
else {
|
760
|
+
pos = buffer->GetPos();
|
761
|
+
// buffer reads unicode chars, if UTF8 has been detected
|
762
|
+
ch = buffer->Read(); col++; charPos++;
|
763
|
+
// replace isolated '\r' by '\n' in order to make
|
764
|
+
// eol handling uniform across Windows, Unix and Mac
|
765
|
+
if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
|
766
|
+
if (ch == EOL) { line++; col = 0; }
|
767
|
+
}
|
768
|
+
-->casing1
|
769
|
+
}
|
770
|
+
|
771
|
+
void Scanner::AddCh() {
|
772
|
+
if (tlen >= tvalLength) {
|
773
|
+
tvalLength *= 2;
|
774
|
+
wchar_t *newBuf = new wchar_t[tvalLength];
|
775
|
+
memcpy(newBuf, tval, tlen*sizeof(wchar_t));
|
776
|
+
delete [] tval;
|
777
|
+
tval = newBuf;
|
778
|
+
}
|
779
|
+
if (ch != Buffer::EoF) {
|
780
|
+
-->casing2
|
781
|
+
NextCh();
|
782
|
+
}
|
783
|
+
}
|
784
|
+
|
785
|
+
-->comments
|
786
|
+
|
787
|
+
void Scanner::CreateHeapBlock() {
|
788
|
+
void* newHeap;
|
789
|
+
char* cur = (char*) firstHeap;
|
790
|
+
|
791
|
+
while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
|
792
|
+
cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
|
793
|
+
free(firstHeap);
|
794
|
+
firstHeap = cur;
|
795
|
+
}
|
796
|
+
|
797
|
+
// COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
|
798
|
+
newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
|
799
|
+
*heapEnd = newHeap;
|
800
|
+
heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
|
801
|
+
*heapEnd = 0;
|
802
|
+
heap = newHeap;
|
803
|
+
heapTop = heap;
|
804
|
+
}
|
805
|
+
|
806
|
+
Token* Scanner::CreateToken() {
|
807
|
+
Token *t;
|
808
|
+
if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
|
809
|
+
CreateHeapBlock();
|
810
|
+
}
|
811
|
+
t = (Token*) heapTop;
|
812
|
+
heapTop = (void*) ((char*) heapTop + sizeof(Token));
|
813
|
+
t->val = NULL;
|
814
|
+
t->next = NULL;
|
815
|
+
return t;
|
816
|
+
}
|
817
|
+
|
818
|
+
void Scanner::AppendVal(Token *t) {
|
819
|
+
int reqMem = (tlen + 1) * sizeof(wchar_t);
|
820
|
+
if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
|
821
|
+
if (reqMem > COCO_HEAP_BLOCK_SIZE) {
|
822
|
+
wprintf(L"--- Too long token value\n");
|
823
|
+
exit(1);
|
824
|
+
}
|
825
|
+
CreateHeapBlock();
|
826
|
+
}
|
827
|
+
t->val = (wchar_t*) heapTop;
|
828
|
+
heapTop = (void*) ((char*) heapTop + reqMem);
|
829
|
+
|
830
|
+
wcsncpy(t->val, tval, tlen);
|
831
|
+
t->val[tlen] = L'\0';
|
832
|
+
}
|
833
|
+
|
834
|
+
Token* Scanner::NextToken() {
|
835
|
+
while (ch == ' ' ||
|
836
|
+
-->scan1
|
837
|
+
) NextCh();
|
838
|
+
-->scan2
|
839
|
+
int recKind = noSym;
|
840
|
+
int recEnd = pos;
|
841
|
+
t = CreateToken();
|
842
|
+
t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
|
843
|
+
int state = start.state(ch);
|
844
|
+
tlen = 0; AddCh();
|
845
|
+
|
846
|
+
switch (state) {
|
847
|
+
case -1: { t->kind = eofSym; break; } // NextCh already done
|
848
|
+
case 0: {
|
849
|
+
case_0:
|
850
|
+
if (recKind != noSym) {
|
851
|
+
tlen = recEnd - t->pos;
|
852
|
+
SetScannerBehindT();
|
853
|
+
}
|
854
|
+
t->kind = recKind; break;
|
855
|
+
} // NextCh already done
|
856
|
+
-->scan3
|
857
|
+
}
|
858
|
+
AppendVal(t);
|
859
|
+
return t;
|
860
|
+
}
|
861
|
+
|
862
|
+
void Scanner::SetScannerBehindT() {
|
863
|
+
buffer->SetPos(t->pos);
|
864
|
+
NextCh();
|
865
|
+
line = t->line; col = t->col; charPos = t->charPos;
|
866
|
+
for (int i = 0; i < tlen; i++) NextCh();
|
867
|
+
}
|
868
|
+
|
869
|
+
// get the next token (possibly a token already seen during peeking)
|
870
|
+
Token* Scanner::Scan() {
|
871
|
+
if (tokens->next == NULL) {
|
872
|
+
return pt = tokens = NextToken();
|
873
|
+
} else {
|
874
|
+
pt = tokens = tokens->next;
|
875
|
+
return tokens;
|
876
|
+
}
|
877
|
+
}
|
878
|
+
|
879
|
+
// peek for the next token, ignore pragmas
|
880
|
+
Token* Scanner::Peek() {
|
881
|
+
do {
|
882
|
+
if (pt->next == NULL) {
|
883
|
+
pt->next = NextToken();
|
884
|
+
}
|
885
|
+
pt = pt->next;
|
886
|
+
} while (pt->kind > maxT); // skip pragmas
|
887
|
+
|
888
|
+
return pt;
|
889
|
+
}
|
890
|
+
|
891
|
+
// make sure that peeking starts at the current scan position
|
892
|
+
void Scanner::ResetPeek() {
|
893
|
+
pt = tokens;
|
894
|
+
}
|
895
|
+
|
896
|
+
-->namespace_close
|