s2 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,130 @@
1
+
2
+
3
+ #if !defined(S2_COCO_PARSER_H__)
4
+ #define S2_COCO_PARSER_H__
5
+
6
+ #include <iostream>
7
+ #include <memory>
8
+ #include "S2.hpp"
9
+
10
+
11
+ #include "Scanner.h"
12
+
13
+ namespace S2 {
14
+
15
+
16
+ class ParserException {
17
+
18
+ int line,col;
19
+ std::wstring message;
20
+
21
+ public:
22
+ ParserException(int line, int col, std::wstring message) :
23
+ line(line), col(col), message(message)
24
+ {
25
+ }
26
+
27
+ int LineNumber() const
28
+ {
29
+ return line;
30
+ }
31
+
32
+ int ColumnNumber() const
33
+ {
34
+ return col;
35
+ }
36
+
37
+ std::wstring GetMessage() const
38
+ {
39
+ return message;
40
+ }
41
+ };
42
+
43
+ class Errors {
44
+ public:
45
+ int count; // number of errors detected
46
+ std::vector<ParserException> warnings;
47
+
48
+ Errors();
49
+ void SynErr(int line, int col, int n);
50
+ void Error(int line, int col, const wchar_t *s);
51
+ void Warning(int line, int col, const wchar_t *s);
52
+ void Warning(const wchar_t *s);
53
+ void Exception(const wchar_t *s);
54
+
55
+ }; // Errors
56
+
57
+ class Parser {
58
+ private:
59
+ enum {
60
+ _EOF=0,
61
+ _pascalcase=1,
62
+ _camelcase=2,
63
+ _number=3,
64
+ _hexinteger=4,
65
+ _string=5,
66
+ _badString=6,
67
+ _char=7,
68
+ _endOfLine=8,
69
+ _customTokenTypeVariable=9,
70
+ _ddtSym=22,
71
+ _optionSym=23
72
+ };
73
+ int maxT;
74
+
75
+ Token *dummyToken;
76
+ int errDist;
77
+ int minErrDist;
78
+
79
+ void SynErr(int n);
80
+ void Get();
81
+ void Expect(int n);
82
+ bool StartOf(int s);
83
+ void ExpectWeak(int n, int follow);
84
+ bool WeakSeparator(int n, int syFol, int repFol);
85
+
86
+ public:
87
+ Scanner *scanner;
88
+ Errors *errors;
89
+
90
+ Token *t; // last recognized token
91
+ Token *la; // lookahead token
92
+
93
+ S2Ptr s2;
94
+
95
+
96
+
97
+ Parser(Scanner *scanner);
98
+ ~Parser();
99
+ void SemErr(const wchar_t* msg);
100
+
101
+ void S2();
102
+ void Statement(StatementPtr& production);
103
+ void TypeVariable(TypeVariablePtr& production);
104
+ void StructName(StructNamePtr& production);
105
+ void MemberName(MemberNamePtr& production);
106
+ void NumberLiteral(NumberLiteralPtr& production);
107
+ void StringLiteral(StringLiteralPtr& production);
108
+ void TypeIdentifier(TypeIdentifierPtr& production);
109
+ void TypeParameterArguments(TypeParameterArgumentsPtr& production);
110
+ void TypeExpression(TypeExpressionPtr& production);
111
+ void TypeDeclaration(TypeDeclarationPtr& production);
112
+ void TypeParameters(TypeParametersPtr& production);
113
+ void NumberLit(NumberLitPtr& production);
114
+ void Expression(ExpressionPtr& production);
115
+ void AttributeParam(AttributeParamPtr& production);
116
+ void AttributeParamList(AttributeParamListPtr& production);
117
+ void Attribute(AttributePtr& production);
118
+ void Member(MemberPtr& production);
119
+ void Structure(StructurePtr& production);
120
+ void Import(ImportPtr& production);
121
+
122
+ void Parse();
123
+
124
+ }; // end Parser
125
+
126
+ } // namespace
127
+
128
+
129
+ #endif
130
+
@@ -0,0 +1,246 @@
1
+
2
+ #ifndef S2_HPP
3
+ #define S2_HPP
4
+
5
+ /*
6
+ WARNING: This file is generated using ruco. Please modify the .ruco file if you wish to change anything
7
+ https://github.com/davidsiaw/ruco
8
+ */
9
+
10
+ #include <string>
11
+ #include <memory>
12
+ #include <vector>
13
+
14
+ namespace S2
15
+ {
16
+
17
+ enum StatementType
18
+ {
19
+ STRUCTURE_STATEMENT,
20
+ IMPORT_STATEMENT
21
+ };
22
+
23
+ class Statement
24
+ {
25
+ public:
26
+ unsigned _line, _col;
27
+ virtual StatementType get_statement_type() const = 0;
28
+ };
29
+ typedef std::shared_ptr<Statement> StatementPtr;
30
+ typedef std::vector<StatementPtr> StatementArray;
31
+
32
+ class S2
33
+ {
34
+ public:
35
+ unsigned _line, _col;
36
+ StatementArray statements;
37
+ };
38
+ typedef std::shared_ptr<S2> S2Ptr;
39
+ typedef std::vector<S2Ptr> S2Array;
40
+
41
+ class StructName
42
+ {
43
+ public:
44
+ unsigned _line, _col;
45
+ std::wstring content;
46
+ };
47
+ typedef std::shared_ptr<StructName> StructNamePtr;
48
+ typedef std::vector<StructNamePtr> StructNameArray;
49
+
50
+ class TypeVariable
51
+ {
52
+ public:
53
+ unsigned _line, _col;
54
+ std::wstring content;
55
+ };
56
+ typedef std::shared_ptr<TypeVariable> TypeVariablePtr;
57
+ typedef std::vector<TypeVariablePtr> TypeVariableArray;
58
+
59
+ enum TypeExpressionType
60
+ {
61
+ TYPEIDENTIFIER_TYPEEXPRESSION
62
+ };
63
+
64
+ class TypeExpression
65
+ {
66
+ public:
67
+ unsigned _line, _col;
68
+ virtual TypeExpressionType get_typeexpression_type() const = 0;
69
+ };
70
+ typedef std::shared_ptr<TypeExpression> TypeExpressionPtr;
71
+ typedef std::vector<TypeExpressionPtr> TypeExpressionArray;
72
+
73
+ class TypeParameterArguments
74
+ {
75
+ public:
76
+ unsigned _line, _col;
77
+ TypeExpressionArray typeexpressions;
78
+ };
79
+ typedef std::shared_ptr<TypeParameterArguments> TypeParameterArgumentsPtr;
80
+ typedef std::vector<TypeParameterArgumentsPtr> TypeParameterArgumentsArray;
81
+
82
+ class TypeIdentifier : public TypeExpression
83
+ {
84
+ public:
85
+ unsigned _line, _col;
86
+ StructNamePtr structname;
87
+ TypeVariablePtr typevariable;
88
+ TypeParameterArgumentsArray typeparameterarguments;
89
+ virtual TypeExpressionType get_typeexpression_type() const
90
+ {
91
+ return TYPEIDENTIFIER_TYPEEXPRESSION;
92
+ }
93
+
94
+ };
95
+ typedef std::shared_ptr<TypeIdentifier> TypeIdentifierPtr;
96
+ typedef std::vector<TypeIdentifierPtr> TypeIdentifierArray;
97
+
98
+ class TypeParameters
99
+ {
100
+ public:
101
+ unsigned _line, _col;
102
+ TypeVariableArray typevariables;
103
+ };
104
+ typedef std::shared_ptr<TypeParameters> TypeParametersPtr;
105
+ typedef std::vector<TypeParametersPtr> TypeParametersArray;
106
+
107
+ class TypeDeclaration
108
+ {
109
+ public:
110
+ unsigned _line, _col;
111
+ StructNamePtr structname;
112
+ TypeParametersArray typeparameters;
113
+ };
114
+ typedef std::shared_ptr<TypeDeclaration> TypeDeclarationPtr;
115
+ typedef std::vector<TypeDeclarationPtr> TypeDeclarationArray;
116
+
117
+ class NumberLiteral
118
+ {
119
+ public:
120
+ unsigned _line, _col;
121
+ std::wstring content;
122
+ };
123
+ typedef std::shared_ptr<NumberLiteral> NumberLiteralPtr;
124
+ typedef std::vector<NumberLiteralPtr> NumberLiteralArray;
125
+
126
+ enum ExpressionType
127
+ {
128
+ NUMBERLIT_EXPRESSION
129
+ };
130
+
131
+ class Expression
132
+ {
133
+ public:
134
+ unsigned _line, _col;
135
+ virtual ExpressionType get_expression_type() const = 0;
136
+ };
137
+ typedef std::shared_ptr<Expression> ExpressionPtr;
138
+ typedef std::vector<ExpressionPtr> ExpressionArray;
139
+
140
+ class NumberLit : public Expression
141
+ {
142
+ public:
143
+ unsigned _line, _col;
144
+ NumberLiteralPtr numberliteral;
145
+ virtual ExpressionType get_expression_type() const
146
+ {
147
+ return NUMBERLIT_EXPRESSION;
148
+ }
149
+
150
+ };
151
+ typedef std::shared_ptr<NumberLit> NumberLitPtr;
152
+ typedef std::vector<NumberLitPtr> NumberLitArray;
153
+
154
+ class MemberName
155
+ {
156
+ public:
157
+ unsigned _line, _col;
158
+ std::wstring content;
159
+ };
160
+ typedef std::shared_ptr<MemberName> MemberNamePtr;
161
+ typedef std::vector<MemberNamePtr> MemberNameArray;
162
+
163
+ class AttributeParam
164
+ {
165
+ public:
166
+ unsigned _line, _col;
167
+ MemberNamePtr membername;
168
+ ExpressionPtr expression;
169
+ };
170
+ typedef std::shared_ptr<AttributeParam> AttributeParamPtr;
171
+ typedef std::vector<AttributeParamPtr> AttributeParamArray;
172
+
173
+ class AttributeParamList
174
+ {
175
+ public:
176
+ unsigned _line, _col;
177
+ AttributeParamArray attributeparams;
178
+ };
179
+ typedef std::shared_ptr<AttributeParamList> AttributeParamListPtr;
180
+ typedef std::vector<AttributeParamListPtr> AttributeParamListArray;
181
+
182
+ class Attribute
183
+ {
184
+ public:
185
+ unsigned _line, _col;
186
+ TypeExpressionPtr typeexpression;
187
+ AttributeParamListArray attributeparamlists;
188
+ };
189
+ typedef std::shared_ptr<Attribute> AttributePtr;
190
+ typedef std::vector<AttributePtr> AttributeArray;
191
+
192
+ class Member
193
+ {
194
+ public:
195
+ unsigned _line, _col;
196
+ AttributeArray attributes;
197
+ TypeIdentifierPtr typeidentifier;
198
+ MemberNameArray membernames;
199
+ };
200
+ typedef std::shared_ptr<Member> MemberPtr;
201
+ typedef std::vector<MemberPtr> MemberArray;
202
+
203
+ class Structure : public Statement
204
+ {
205
+ public:
206
+ unsigned _line, _col;
207
+ AttributeArray attributes;
208
+ TypeDeclarationPtr typedeclaration;
209
+ MemberArray members;
210
+ virtual StatementType get_statement_type() const
211
+ {
212
+ return STRUCTURE_STATEMENT;
213
+ }
214
+
215
+ };
216
+ typedef std::shared_ptr<Structure> StructurePtr;
217
+ typedef std::vector<StructurePtr> StructureArray;
218
+
219
+ class StringLiteral
220
+ {
221
+ public:
222
+ unsigned _line, _col;
223
+ std::wstring content;
224
+ };
225
+ typedef std::shared_ptr<StringLiteral> StringLiteralPtr;
226
+ typedef std::vector<StringLiteralPtr> StringLiteralArray;
227
+
228
+ class Import : public Statement
229
+ {
230
+ public:
231
+ unsigned _line, _col;
232
+ StringLiteralPtr stringliteral;
233
+ virtual StatementType get_statement_type() const
234
+ {
235
+ return IMPORT_STATEMENT;
236
+ }
237
+
238
+ };
239
+ typedef std::shared_ptr<Import> ImportPtr;
240
+ typedef std::vector<ImportPtr> ImportArray;
241
+
242
+
243
+ }
244
+
245
+ #endif // S2_HPP
246
+
@@ -0,0 +1,796 @@
1
+
2
+
3
+ #include <memory.h>
4
+ #include <string.h>
5
+ #include "Scanner.h"
6
+
7
+ namespace S2 {
8
+
9
+
10
+
11
+ // string handling, wide character
12
+
13
+
14
+ wchar_t* coco_string_create(const wchar_t* value) {
15
+ return coco_string_create(value, 0);
16
+ }
17
+
18
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
19
+ int valueLen = 0;
20
+ int len = 0;
21
+
22
+ if (value) {
23
+ valueLen = wcslen(value);
24
+ len = valueLen - startIndex;
25
+ }
26
+
27
+ return coco_string_create(value, startIndex, len);
28
+ }
29
+
30
+ wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
31
+ int len = 0;
32
+ wchar_t* data;
33
+
34
+ if (value) { len = length; }
35
+ data = new wchar_t[len + 1];
36
+ wcsncpy(data, &(value[startIndex]), len);
37
+ data[len] = 0;
38
+
39
+ return data;
40
+ }
41
+
42
+ wchar_t* coco_string_create_upper(const wchar_t* data) {
43
+ if (!data) { return NULL; }
44
+
45
+ int dataLen = 0;
46
+ if (data) { dataLen = wcslen(data); }
47
+
48
+ wchar_t *newData = new wchar_t[dataLen + 1];
49
+
50
+ for (int i = 0; i <= dataLen; i++) {
51
+ if ((L'a' <= data[i]) && (data[i] <= L'z')) {
52
+ newData[i] = data[i] + (L'A' - L'a');
53
+ }
54
+ else { newData[i] = data[i]; }
55
+ }
56
+
57
+ newData[dataLen] = L'\0';
58
+ return newData;
59
+ }
60
+
61
+ wchar_t* coco_string_create_lower(const wchar_t* data) {
62
+ if (!data) { return NULL; }
63
+ int dataLen = wcslen(data);
64
+ return coco_string_create_lower(data, 0, dataLen);
65
+ }
66
+
67
+ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
68
+ if (!data) { return NULL; }
69
+
70
+ wchar_t* newData = new wchar_t[dataLen + 1];
71
+
72
+ for (int i = 0; i <= dataLen; i++) {
73
+ wchar_t ch = data[startIndex + i];
74
+ if ((L'A' <= ch) && (ch <= L'Z')) {
75
+ newData[i] = ch - (L'A' - L'a');
76
+ }
77
+ else { newData[i] = ch; }
78
+ }
79
+ newData[dataLen] = L'\0';
80
+ return newData;
81
+ }
82
+
83
+ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
84
+ wchar_t* data;
85
+ int data1Len = 0;
86
+ int data2Len = 0;
87
+
88
+ if (data1) { data1Len = wcslen(data1); }
89
+ if (data2) {data2Len = wcslen(data2); }
90
+
91
+ data = new wchar_t[data1Len + data2Len + 1];
92
+
93
+ if (data1) { wcscpy(data, data1); }
94
+ if (data2) { wcscpy(data + data1Len, data2); }
95
+
96
+ data[data1Len + data2Len] = 0;
97
+
98
+ return data;
99
+ }
100
+
101
+ wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
102
+ int targetLen = coco_string_length(target);
103
+ wchar_t* data = new wchar_t[targetLen + 2];
104
+ wcsncpy(data, target, targetLen);
105
+ data[targetLen] = appendix;
106
+ data[targetLen + 1] = 0;
107
+ return data;
108
+ }
109
+
110
+ void coco_string_delete(wchar_t* &data) {
111
+ delete [] data;
112
+ data = NULL;
113
+ }
114
+
115
+ int coco_string_length(const wchar_t* data) {
116
+ if (data) { return wcslen(data); }
117
+ return 0;
118
+ }
119
+
120
+ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
121
+ int dataLen = wcslen(data);
122
+ int endLen = wcslen(end);
123
+ return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
124
+ }
125
+
126
+ int coco_string_indexof(const wchar_t* data, const wchar_t value) {
127
+ const wchar_t* chr = wcschr(data, value);
128
+
129
+ if (chr) { return (chr-data); }
130
+ return -1;
131
+ }
132
+
133
+ int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
134
+ const wchar_t* chr = wcsrchr(data, value);
135
+
136
+ if (chr) { return (chr-data); }
137
+ return -1;
138
+ }
139
+
140
+ void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
141
+ if (!appendix) { return; }
142
+ wchar_t* data = coco_string_create_append(target, appendix);
143
+ delete [] target;
144
+ target = data;
145
+ }
146
+
147
+ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
148
+ return wcscmp( data1, data2 ) == 0;
149
+ }
150
+
151
+ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
152
+ return wcscmp(data1, data2);
153
+ }
154
+
155
+ int coco_string_hash(const wchar_t *data) {
156
+ int h = 0;
157
+ if (!data) { return 0; }
158
+ while (*data != 0) {
159
+ h = (h * 7) ^ *data;
160
+ ++data;
161
+ }
162
+ if (h < 0) { h = -h; }
163
+ return h;
164
+ }
165
+
166
+ // string handling, ascii character
167
+
168
+ wchar_t* coco_string_create(const char* value) {
169
+ int len = 0;
170
+ if (value) { len = strlen(value); }
171
+ wchar_t* data = new wchar_t[len + 1];
172
+ for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
173
+ data[len] = 0;
174
+ return data;
175
+ }
176
+
177
+ char* coco_string_create_char(const wchar_t *value) {
178
+ int len = coco_string_length(value);
179
+ char *res = new char[len + 1];
180
+ for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
181
+ res[len] = 0;
182
+ return res;
183
+ }
184
+
185
+ void coco_string_delete(char* &data) {
186
+ delete [] data;
187
+ data = NULL;
188
+ }
189
+
190
+
191
+ Token::Token() {
192
+ kind = 0;
193
+ pos = 0;
194
+ col = 0;
195
+ line = 0;
196
+ val = NULL;
197
+ next = NULL;
198
+ }
199
+
200
+ Token::~Token() {
201
+ coco_string_delete(val);
202
+ }
203
+
204
+ Buffer::Buffer(FILE* s, bool isUserStream) {
205
+ // ensure binary read on windows
206
+ #if _MSC_VER >= 1300
207
+ _setmode(_fileno(s), _O_BINARY);
208
+ #endif
209
+ stream = s; this->isUserStream = isUserStream;
210
+ if (CanSeek()) {
211
+ fseek(s, 0, SEEK_END);
212
+ fileLen = ftell(s);
213
+ fseek(s, 0, SEEK_SET);
214
+ bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
215
+ bufStart = INT_MAX; // nothing in the buffer so far
216
+ } else {
217
+ fileLen = bufLen = bufStart = 0;
218
+ }
219
+ bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
220
+ buf = new char[bufCapacity];
221
+ if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start)
222
+ else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
223
+ if (bufLen == fileLen && CanSeek()) Close();
224
+ }
225
+
226
+ Buffer::Buffer(Buffer *b) {
227
+ buf = b->buf;
228
+ bufCapacity = b->bufCapacity;
229
+ b->buf = NULL;
230
+ bufStart = b->bufStart;
231
+ bufLen = b->bufLen;
232
+ fileLen = b->fileLen;
233
+ bufPos = b->bufPos;
234
+ stream = b->stream;
235
+ b->stream = NULL;
236
+ isUserStream = b->isUserStream;
237
+ }
238
+
239
+ Buffer::Buffer(const char* buf, size_t len) {
240
+ this->buf = new char[len];
241
+ memcpy(this->buf, buf, len*sizeof(unsigned char));
242
+ bufStart = 0;
243
+ bufCapacity = bufLen = len;
244
+ fileLen = len;
245
+ bufPos = 0;
246
+ stream = NULL;
247
+ }
248
+
249
+ Buffer::~Buffer() {
250
+ Close();
251
+ if (buf != NULL) {
252
+ delete [] buf;
253
+ buf = NULL;
254
+ }
255
+ }
256
+
257
+ void Buffer::Close() {
258
+ if (!isUserStream && stream != NULL) {
259
+ fclose(stream);
260
+ stream = NULL;
261
+ }
262
+ }
263
+
264
+ int Buffer::Read() {
265
+ if (bufPos < bufLen) {
266
+ return buf[bufPos++];
267
+ } else if (GetPos() < fileLen) {
268
+ SetPos(GetPos()); // shift buffer start to Pos
269
+ return buf[bufPos++];
270
+ } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
271
+ return buf[bufPos++];
272
+ } else {
273
+ return EoF;
274
+ }
275
+ }
276
+
277
+ int Buffer::Peek() {
278
+ int curPos = GetPos();
279
+ int ch = Read();
280
+ SetPos(curPos);
281
+ return ch;
282
+ }
283
+
284
+ // beg .. begin, zero-based, inclusive, in byte
285
+ // end .. end, zero-based, exclusive, in byte
286
+ wchar_t* Buffer::GetString(int beg, int end) {
287
+ int len = 0;
288
+ wchar_t *buf = new wchar_t[end - beg];
289
+ int oldPos = GetPos();
290
+ SetPos(beg);
291
+ while (GetPos() < end) buf[len++] = (wchar_t) Read();
292
+ SetPos(oldPos);
293
+ wchar_t *res = coco_string_create(buf, 0, len);
294
+ coco_string_delete(buf);
295
+ return res;
296
+ }
297
+
298
+ int Buffer::GetPos() {
299
+ return bufPos + bufStart;
300
+ }
301
+
302
+ void Buffer::SetPos(int value) {
303
+ if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
304
+ // Wanted position is after buffer and the stream
305
+ // is not seek-able e.g. network or console,
306
+ // thus we have to read the stream manually till
307
+ // the wanted position is in sight.
308
+ while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
309
+ }
310
+
311
+ if ((value < 0) || (value > fileLen)) {
312
+ wprintf(L"--- buffer out of bounds access, position: %d\n", value);
313
+ exit(1);
314
+ }
315
+
316
+ if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
317
+ bufPos = value - bufStart;
318
+ } else if (stream != NULL) { // must be swapped in
319
+ fseek(stream, value, SEEK_SET);
320
+ bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
321
+ bufStart = value; bufPos = 0;
322
+ } else {
323
+ bufPos = fileLen - bufStart; // make Pos return fileLen
324
+ }
325
+ }
326
+
327
+ // Read the next chunk of bytes from the stream, increases the buffer
328
+ // if needed and updates the fields fileLen and bufLen.
329
+ // Returns the number of bytes read.
330
+ int Buffer::ReadNextStreamChunk() {
331
+ int free = bufCapacity - bufLen;
332
+ if (free == 0) {
333
+ // in the case of a growing input stream
334
+ // we can neither seek in the stream, nor can we
335
+ // foresee the maximum length, thus we must adapt
336
+ // the buffer size on demand.
337
+ bufCapacity = bufLen * 2;
338
+ char *newBuf = new char[bufCapacity];
339
+ memcpy(newBuf, buf, bufLen*sizeof(char));
340
+ delete [] buf;
341
+ buf = newBuf;
342
+ free = bufLen;
343
+ }
344
+ int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
345
+ if (read > 0) {
346
+ fileLen = bufLen = (bufLen + read);
347
+ return read;
348
+ }
349
+ // end of stream reached
350
+ return 0;
351
+ }
352
+
353
+ bool Buffer::CanSeek() {
354
+ return (stream != NULL) && (ftell(stream) != -1);
355
+ }
356
+
357
+ int UTF8Buffer::Read() {
358
+ int ch;
359
+ do {
360
+ ch = Buffer::Read();
361
+ // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
362
+ } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
363
+ if (ch < 128 || ch == EoF) {
364
+ // nothing to do, first 127 chars are the same in ascii and utf8
365
+ // 0xxxxxxx or end of file character
366
+ } else if ((ch & 0xF0) == 0xF0) {
367
+ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
368
+ int c1 = ch & 0x07; ch = Buffer::Read();
369
+ int c2 = ch & 0x3F; ch = Buffer::Read();
370
+ int c3 = ch & 0x3F; ch = Buffer::Read();
371
+ int c4 = ch & 0x3F;
372
+ ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
373
+ } else if ((ch & 0xE0) == 0xE0) {
374
+ // 1110xxxx 10xxxxxx 10xxxxxx
375
+ int c1 = ch & 0x0F; ch = Buffer::Read();
376
+ int c2 = ch & 0x3F; ch = Buffer::Read();
377
+ int c3 = ch & 0x3F;
378
+ ch = (((c1 << 6) | c2) << 6) | c3;
379
+ } else if ((ch & 0xC0) == 0xC0) {
380
+ // 110xxxxx 10xxxxxx
381
+ int c1 = ch & 0x1F; ch = Buffer::Read();
382
+ int c2 = ch & 0x3F;
383
+ ch = (c1 << 6) | c2;
384
+ }
385
+ return ch;
386
+ }
387
+
388
+ Scanner::Scanner(const char* buf, size_t len) {
389
+ buffer = new Buffer(buf, len);
390
+ Init();
391
+ }
392
+
393
+ Scanner::Scanner(const wchar_t* fileName) {
394
+ FILE* stream;
395
+ char *chFileName = coco_string_create_char(fileName);
396
+ if ((stream = fopen(chFileName, "rb")) == NULL) {
397
+ wprintf(L"--- Cannot open file %ls\n", fileName);
398
+ exit(1);
399
+ }
400
+ coco_string_delete(chFileName);
401
+ buffer = new Buffer(stream, false);
402
+ Init();
403
+ }
404
+
405
+ Scanner::Scanner(FILE* s) {
406
+ buffer = new Buffer(s, true);
407
+ Init();
408
+ }
409
+
410
+ Scanner::~Scanner() {
411
+ char* cur = (char*) firstHeap;
412
+
413
+ while(cur != NULL) {
414
+ cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
415
+ free(firstHeap);
416
+ firstHeap = cur;
417
+ }
418
+ delete [] tval;
419
+ delete buffer;
420
+ }
421
+
422
+ void Scanner::Init() {
423
+ EOL = '\n';
424
+ eofSym = 0;
425
+ maxT = 21;
426
+ noSym = 21;
427
+ int i;
428
+ for (i = 65; i <= 90; ++i) start.set(i, 1);
429
+ for (i = 97; i <= 122; ++i) start.set(i, 2);
430
+ for (i = 49; i <= 57; ++i) start.set(i, 3);
431
+ for (i = 10; i <= 10; ++i) start.set(i, 15);
432
+ for (i = 13; i <= 13; ++i) start.set(i, 15);
433
+ start.set(48, 19);
434
+ start.set(34, 20);
435
+ start.set(39, 10);
436
+ start.set(36, 21);
437
+ start.set(60, 24);
438
+ start.set(44, 25);
439
+ start.set(62, 26);
440
+ start.set(61, 27);
441
+ start.set(58, 28);
442
+ start.set(91, 29);
443
+ start.set(93, 30);
444
+ start.set(123, 31);
445
+ start.set(125, 32);
446
+ start.set(Buffer::EoF, -1);
447
+ keywords.set(L"struct", 17);
448
+ keywords.set(L"import", 20);
449
+
450
+
451
+ tvalLength = 128;
452
+ tval = new wchar_t[tvalLength]; // text of current token
453
+
454
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
455
+ heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
456
+ firstHeap = heap;
457
+ heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
458
+ *heapEnd = 0;
459
+ heapTop = heap;
460
+ if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
461
+ wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
462
+ exit(1);
463
+ }
464
+
465
+ pos = -1; line = 1; col = 0; charPos = -1;
466
+ oldEols = 0;
467
+ NextCh();
468
+ if (ch == 0xEF) { // check optional byte order mark for UTF-8
469
+ NextCh(); int ch1 = ch;
470
+ NextCh(); int ch2 = ch;
471
+ if (ch1 != 0xBB || ch2 != 0xBF) {
472
+ wprintf(L"Illegal byte order mark at start of file");
473
+ exit(1);
474
+ }
475
+ Buffer *oldBuf = buffer;
476
+ buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
477
+ delete oldBuf; oldBuf = NULL;
478
+ NextCh();
479
+ }
480
+
481
+
482
+ pt = tokens = CreateToken(); // first token is a dummy
483
+ }
484
+
485
+ void Scanner::NextCh() {
486
+ if (oldEols > 0) { ch = EOL; oldEols--; }
487
+ else {
488
+ pos = buffer->GetPos();
489
+ // buffer reads unicode chars, if UTF8 has been detected
490
+ ch = buffer->Read(); col++; charPos++;
491
+ // replace isolated '\r' by '\n' in order to make
492
+ // eol handling uniform across Windows, Unix and Mac
493
+ if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
494
+ if (ch == EOL) { line++; col = 0; }
495
+ }
496
+
497
+ }
498
+
499
+ void Scanner::AddCh() {
500
+ if (tlen >= tvalLength) {
501
+ tvalLength *= 2;
502
+ wchar_t *newBuf = new wchar_t[tvalLength];
503
+ memcpy(newBuf, tval, tlen*sizeof(wchar_t));
504
+ delete [] tval;
505
+ tval = newBuf;
506
+ }
507
+ if (ch != Buffer::EoF) {
508
+ tval[tlen++] = ch;
509
+ NextCh();
510
+ }
511
+ }
512
+
513
+
514
+ bool Scanner::Comment0() {
515
+ int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
516
+ NextCh();
517
+ if (ch == L'/') {
518
+ NextCh();
519
+ for(;;) {
520
+ if (ch == 10) {
521
+ level--;
522
+ if (level == 0) { oldEols = line - line0; NextCh(); return true; }
523
+ NextCh();
524
+ } else if (ch == buffer->EoF) return false;
525
+ else NextCh();
526
+ }
527
+ } else {
528
+ buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
529
+ }
530
+ return false;
531
+ }
532
+
533
+ bool Scanner::Comment1() {
534
+ int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
535
+ NextCh();
536
+ if (ch == L'*') {
537
+ NextCh();
538
+ for(;;) {
539
+ if (ch == L'*') {
540
+ NextCh();
541
+ if (ch == L'/') {
542
+ level--;
543
+ if (level == 0) { oldEols = line - line0; NextCh(); return true; }
544
+ NextCh();
545
+ }
546
+ } else if (ch == L'/') {
547
+ NextCh();
548
+ if (ch == L'*') {
549
+ level++; NextCh();
550
+ }
551
+ } else if (ch == buffer->EoF) return false;
552
+ else NextCh();
553
+ }
554
+ } else {
555
+ buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
556
+ }
557
+ return false;
558
+ }
559
+
560
+
561
+ void Scanner::CreateHeapBlock() {
562
+ void* newHeap;
563
+ char* cur = (char*) firstHeap;
564
+
565
+ while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
566
+ cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
567
+ free(firstHeap);
568
+ firstHeap = cur;
569
+ }
570
+
571
+ // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
572
+ newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
573
+ *heapEnd = newHeap;
574
+ heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
575
+ *heapEnd = 0;
576
+ heap = newHeap;
577
+ heapTop = heap;
578
+ }
579
+
580
+ Token* Scanner::CreateToken() {
581
+ Token *t;
582
+ if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
583
+ CreateHeapBlock();
584
+ }
585
+ t = (Token*) heapTop;
586
+ heapTop = (void*) ((char*) heapTop + sizeof(Token));
587
+ t->val = NULL;
588
+ t->next = NULL;
589
+ return t;
590
+ }
591
+
592
+ void Scanner::AppendVal(Token *t) {
593
+ int reqMem = (tlen + 1) * sizeof(wchar_t);
594
+ if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
595
+ if (reqMem > COCO_HEAP_BLOCK_SIZE) {
596
+ wprintf(L"--- Too long token value\n");
597
+ exit(1);
598
+ }
599
+ CreateHeapBlock();
600
+ }
601
+ t->val = (wchar_t*) heapTop;
602
+ heapTop = (void*) ((char*) heapTop + reqMem);
603
+
604
+ wcsncpy(t->val, tval, tlen);
605
+ t->val[tlen] = L'\0';
606
+ }
607
+
608
+ Token* Scanner::NextToken() {
609
+ while (ch == ' ' ||
610
+ (ch >= 9 && ch <= 10) || ch == 13
611
+ ) NextCh();
612
+ if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken();
613
+ int recKind = noSym;
614
+ int recEnd = pos;
615
+ t = CreateToken();
616
+ t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
617
+ int state = start.state(ch);
618
+ tlen = 0; AddCh();
619
+
620
+ switch (state) {
621
+ case -1: { t->kind = eofSym; break; } // NextCh already done
622
+ case 0: {
623
+ case_0:
624
+ if (recKind != noSym) {
625
+ tlen = recEnd - t->pos;
626
+ SetScannerBehindT();
627
+ }
628
+ t->kind = recKind; break;
629
+ } // NextCh already done
630
+ case 1:
631
+ case_1:
632
+ recEnd = pos; recKind = 1;
633
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;}
634
+ else {t->kind = 1; break;}
635
+ case 2:
636
+ case_2:
637
+ recEnd = pos; recKind = 2;
638
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_2;}
639
+ else {t->kind = 2; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}
640
+ case 3:
641
+ case_3:
642
+ recEnd = pos; recKind = 3;
643
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
644
+ else if (ch == L'.') {AddCh(); goto case_4;}
645
+ else {t->kind = 3; break;}
646
+ case 4:
647
+ case_4:
648
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_5;}
649
+ else {goto case_0;}
650
+ case 5:
651
+ case_5:
652
+ recEnd = pos; recKind = 3;
653
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_5;}
654
+ else {t->kind = 3; break;}
655
+ case 6:
656
+ case_6:
657
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_7;}
658
+ else {goto case_0;}
659
+ case 7:
660
+ case_7:
661
+ recEnd = pos; recKind = 4;
662
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_7;}
663
+ else {t->kind = 4; break;}
664
+ case 8:
665
+ case_8:
666
+ {t->kind = 5; break;}
667
+ case 9:
668
+ case_9:
669
+ {t->kind = 6; break;}
670
+ case 10:
671
+ if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_11;}
672
+ else if (ch == 92) {AddCh(); goto case_12;}
673
+ else {goto case_0;}
674
+ case 11:
675
+ case_11:
676
+ if (ch == 39) {AddCh(); goto case_14;}
677
+ else {goto case_0;}
678
+ case 12:
679
+ case_12:
680
+ if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_13;}
681
+ else {goto case_0;}
682
+ case 13:
683
+ case_13:
684
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_13;}
685
+ else if (ch == 39) {AddCh(); goto case_14;}
686
+ else {goto case_0;}
687
+ case 14:
688
+ case_14:
689
+ {t->kind = 7; break;}
690
+ case 15:
691
+ {t->kind = 8; break;}
692
+ case 16:
693
+ case_16:
694
+ recEnd = pos; recKind = 9;
695
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_16;}
696
+ else {t->kind = 9; break;}
697
+ case 17:
698
+ case_17:
699
+ recEnd = pos; recKind = 22;
700
+ if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_17;}
701
+ else {t->kind = 22; break;}
702
+ case 18:
703
+ case_18:
704
+ recEnd = pos; recKind = 23;
705
+ if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_18;}
706
+ else {t->kind = 23; break;}
707
+ case 19:
708
+ recEnd = pos; recKind = 3;
709
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
710
+ else if (ch == L'.') {AddCh(); goto case_4;}
711
+ else if (ch == L'x') {AddCh(); goto case_6;}
712
+ else {t->kind = 3; break;}
713
+ case 20:
714
+ case_20:
715
+ if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_20;}
716
+ else if (ch == 10 || ch == 13) {AddCh(); goto case_9;}
717
+ else if (ch == L'"') {AddCh(); goto case_8;}
718
+ else if (ch == 92) {AddCh(); goto case_22;}
719
+ else {goto case_0;}
720
+ case 21:
721
+ recEnd = pos; recKind = 22;
722
+ if ((ch >= L'A' && ch <= L'Z')) {AddCh(); goto case_16;}
723
+ else if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_17;}
724
+ else if ((ch >= L'a' && ch <= L'z')) {AddCh(); goto case_23;}
725
+ else {t->kind = 22; break;}
726
+ case 22:
727
+ case_22:
728
+ if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_20;}
729
+ else {goto case_0;}
730
+ case 23:
731
+ case_23:
732
+ recEnd = pos; recKind = 22;
733
+ if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_17;}
734
+ else if ((ch >= L'a' && ch <= L'z')) {AddCh(); goto case_23;}
735
+ else if (ch == L'=') {AddCh(); goto case_18;}
736
+ else {t->kind = 22; break;}
737
+ case 24:
738
+ {t->kind = 10; break;}
739
+ case 25:
740
+ {t->kind = 11; break;}
741
+ case 26:
742
+ {t->kind = 12; break;}
743
+ case 27:
744
+ {t->kind = 13; break;}
745
+ case 28:
746
+ {t->kind = 14; break;}
747
+ case 29:
748
+ {t->kind = 15; break;}
749
+ case 30:
750
+ {t->kind = 16; break;}
751
+ case 31:
752
+ {t->kind = 18; break;}
753
+ case 32:
754
+ {t->kind = 19; break;}
755
+
756
+ }
757
+ AppendVal(t);
758
+ return t;
759
+ }
760
+
761
+ void Scanner::SetScannerBehindT() {
762
+ buffer->SetPos(t->pos);
763
+ NextCh();
764
+ line = t->line; col = t->col; charPos = t->charPos;
765
+ for (int i = 0; i < tlen; i++) NextCh();
766
+ }
767
+
768
+ // get the next token (possibly a token already seen during peeking)
769
+ Token* Scanner::Scan() {
770
+ if (tokens->next == NULL) {
771
+ return pt = tokens = NextToken();
772
+ } else {
773
+ pt = tokens = tokens->next;
774
+ return tokens;
775
+ }
776
+ }
777
+
778
+ // peek for the next token, ignore pragmas
779
+ Token* Scanner::Peek() {
780
+ do {
781
+ if (pt->next == NULL) {
782
+ pt->next = NextToken();
783
+ }
784
+ pt = pt->next;
785
+ } while (pt->kind > maxT); // skip pragmas
786
+
787
+ return pt;
788
+ }
789
+
790
+ // make sure that peeking starts at the current scan position
791
+ void Scanner::ResetPeek() {
792
+ pt = tokens;
793
+ }
794
+
795
+ } // namespace
796
+