s2 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Rakefile +22 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/s2/s2_parse/Makefile +52 -0
- data/ext/s2/s2_parse/Parser.cpp +525 -0
- data/ext/s2/s2_parse/Parser.h +130 -0
- data/ext/s2/s2_parse/S2.hpp +246 -0
- data/ext/s2/s2_parse/Scanner.cpp +796 -0
- data/ext/s2/s2_parse/Scanner.h +263 -0
- data/ext/s2/s2_parse/extconf.rb +5 -0
- data/ext/s2/s2_parse/parse_s2.cpp +630 -0
- data/ext/s2/s2_parse/parse_s2.hpp +42 -0
- data/ext/s2/s2_parse/picojson.hpp +1299 -0
- data/ext/s2/s2_parse/s2.atg +321 -0
- data/ext/s2/s2_parse/s2.ruco +93 -0
- data/ext/s2/s2_parse/s2_parse.cpp +70 -0
- data/lib/s2.rb +5 -0
- data/lib/s2/display.rb +21 -0
- data/lib/s2/internal/c.rb +128 -0
- data/lib/s2/s2_parse.rb +18 -0
- data/lib/s2/version.rb +3 -0
- metadata +151 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
#if !defined(S2_COCO_PARSER_H__)
|
4
|
+
#define S2_COCO_PARSER_H__
|
5
|
+
|
6
|
+
#include <iostream>
|
7
|
+
#include <memory>
|
8
|
+
#include "S2.hpp"
|
9
|
+
|
10
|
+
|
11
|
+
#include "Scanner.h"
|
12
|
+
|
13
|
+
namespace S2 {
|
14
|
+
|
15
|
+
|
16
|
+
class ParserException {
|
17
|
+
|
18
|
+
int line,col;
|
19
|
+
std::wstring message;
|
20
|
+
|
21
|
+
public:
|
22
|
+
ParserException(int line, int col, std::wstring message) :
|
23
|
+
line(line), col(col), message(message)
|
24
|
+
{
|
25
|
+
}
|
26
|
+
|
27
|
+
int LineNumber() const
|
28
|
+
{
|
29
|
+
return line;
|
30
|
+
}
|
31
|
+
|
32
|
+
int ColumnNumber() const
|
33
|
+
{
|
34
|
+
return col;
|
35
|
+
}
|
36
|
+
|
37
|
+
std::wstring GetMessage() const
|
38
|
+
{
|
39
|
+
return message;
|
40
|
+
}
|
41
|
+
};
|
42
|
+
|
43
|
+
class Errors {
|
44
|
+
public:
|
45
|
+
int count; // number of errors detected
|
46
|
+
std::vector<ParserException> warnings;
|
47
|
+
|
48
|
+
Errors();
|
49
|
+
void SynErr(int line, int col, int n);
|
50
|
+
void Error(int line, int col, const wchar_t *s);
|
51
|
+
void Warning(int line, int col, const wchar_t *s);
|
52
|
+
void Warning(const wchar_t *s);
|
53
|
+
void Exception(const wchar_t *s);
|
54
|
+
|
55
|
+
}; // Errors
|
56
|
+
|
57
|
+
class Parser {
|
58
|
+
private:
|
59
|
+
enum {
|
60
|
+
_EOF=0,
|
61
|
+
_pascalcase=1,
|
62
|
+
_camelcase=2,
|
63
|
+
_number=3,
|
64
|
+
_hexinteger=4,
|
65
|
+
_string=5,
|
66
|
+
_badString=6,
|
67
|
+
_char=7,
|
68
|
+
_endOfLine=8,
|
69
|
+
_customTokenTypeVariable=9,
|
70
|
+
_ddtSym=22,
|
71
|
+
_optionSym=23
|
72
|
+
};
|
73
|
+
int maxT;
|
74
|
+
|
75
|
+
Token *dummyToken;
|
76
|
+
int errDist;
|
77
|
+
int minErrDist;
|
78
|
+
|
79
|
+
void SynErr(int n);
|
80
|
+
void Get();
|
81
|
+
void Expect(int n);
|
82
|
+
bool StartOf(int s);
|
83
|
+
void ExpectWeak(int n, int follow);
|
84
|
+
bool WeakSeparator(int n, int syFol, int repFol);
|
85
|
+
|
86
|
+
public:
|
87
|
+
Scanner *scanner;
|
88
|
+
Errors *errors;
|
89
|
+
|
90
|
+
Token *t; // last recognized token
|
91
|
+
Token *la; // lookahead token
|
92
|
+
|
93
|
+
S2Ptr s2;
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
Parser(Scanner *scanner);
|
98
|
+
~Parser();
|
99
|
+
void SemErr(const wchar_t* msg);
|
100
|
+
|
101
|
+
void S2();
|
102
|
+
void Statement(StatementPtr& production);
|
103
|
+
void TypeVariable(TypeVariablePtr& production);
|
104
|
+
void StructName(StructNamePtr& production);
|
105
|
+
void MemberName(MemberNamePtr& production);
|
106
|
+
void NumberLiteral(NumberLiteralPtr& production);
|
107
|
+
void StringLiteral(StringLiteralPtr& production);
|
108
|
+
void TypeIdentifier(TypeIdentifierPtr& production);
|
109
|
+
void TypeParameterArguments(TypeParameterArgumentsPtr& production);
|
110
|
+
void TypeExpression(TypeExpressionPtr& production);
|
111
|
+
void TypeDeclaration(TypeDeclarationPtr& production);
|
112
|
+
void TypeParameters(TypeParametersPtr& production);
|
113
|
+
void NumberLit(NumberLitPtr& production);
|
114
|
+
void Expression(ExpressionPtr& production);
|
115
|
+
void AttributeParam(AttributeParamPtr& production);
|
116
|
+
void AttributeParamList(AttributeParamListPtr& production);
|
117
|
+
void Attribute(AttributePtr& production);
|
118
|
+
void Member(MemberPtr& production);
|
119
|
+
void Structure(StructurePtr& production);
|
120
|
+
void Import(ImportPtr& production);
|
121
|
+
|
122
|
+
void Parse();
|
123
|
+
|
124
|
+
}; // end Parser
|
125
|
+
|
126
|
+
} // namespace
|
127
|
+
|
128
|
+
|
129
|
+
#endif
|
130
|
+
|
@@ -0,0 +1,246 @@
|
|
1
|
+
|
2
|
+
#ifndef S2_HPP
|
3
|
+
#define S2_HPP
|
4
|
+
|
5
|
+
/*
|
6
|
+
WARNING: This file is generated using ruco. Please modify the .ruco file if you wish to change anything
|
7
|
+
https://github.com/davidsiaw/ruco
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <string>
|
11
|
+
#include <memory>
|
12
|
+
#include <vector>
|
13
|
+
|
14
|
+
namespace S2
|
15
|
+
{
|
16
|
+
|
17
|
+
enum StatementType
|
18
|
+
{
|
19
|
+
STRUCTURE_STATEMENT,
|
20
|
+
IMPORT_STATEMENT
|
21
|
+
};
|
22
|
+
|
23
|
+
class Statement
|
24
|
+
{
|
25
|
+
public:
|
26
|
+
unsigned _line, _col;
|
27
|
+
virtual StatementType get_statement_type() const = 0;
|
28
|
+
};
|
29
|
+
typedef std::shared_ptr<Statement> StatementPtr;
|
30
|
+
typedef std::vector<StatementPtr> StatementArray;
|
31
|
+
|
32
|
+
class S2
|
33
|
+
{
|
34
|
+
public:
|
35
|
+
unsigned _line, _col;
|
36
|
+
StatementArray statements;
|
37
|
+
};
|
38
|
+
typedef std::shared_ptr<S2> S2Ptr;
|
39
|
+
typedef std::vector<S2Ptr> S2Array;
|
40
|
+
|
41
|
+
class StructName
|
42
|
+
{
|
43
|
+
public:
|
44
|
+
unsigned _line, _col;
|
45
|
+
std::wstring content;
|
46
|
+
};
|
47
|
+
typedef std::shared_ptr<StructName> StructNamePtr;
|
48
|
+
typedef std::vector<StructNamePtr> StructNameArray;
|
49
|
+
|
50
|
+
class TypeVariable
|
51
|
+
{
|
52
|
+
public:
|
53
|
+
unsigned _line, _col;
|
54
|
+
std::wstring content;
|
55
|
+
};
|
56
|
+
typedef std::shared_ptr<TypeVariable> TypeVariablePtr;
|
57
|
+
typedef std::vector<TypeVariablePtr> TypeVariableArray;
|
58
|
+
|
59
|
+
enum TypeExpressionType
|
60
|
+
{
|
61
|
+
TYPEIDENTIFIER_TYPEEXPRESSION
|
62
|
+
};
|
63
|
+
|
64
|
+
class TypeExpression
|
65
|
+
{
|
66
|
+
public:
|
67
|
+
unsigned _line, _col;
|
68
|
+
virtual TypeExpressionType get_typeexpression_type() const = 0;
|
69
|
+
};
|
70
|
+
typedef std::shared_ptr<TypeExpression> TypeExpressionPtr;
|
71
|
+
typedef std::vector<TypeExpressionPtr> TypeExpressionArray;
|
72
|
+
|
73
|
+
class TypeParameterArguments
|
74
|
+
{
|
75
|
+
public:
|
76
|
+
unsigned _line, _col;
|
77
|
+
TypeExpressionArray typeexpressions;
|
78
|
+
};
|
79
|
+
typedef std::shared_ptr<TypeParameterArguments> TypeParameterArgumentsPtr;
|
80
|
+
typedef std::vector<TypeParameterArgumentsPtr> TypeParameterArgumentsArray;
|
81
|
+
|
82
|
+
class TypeIdentifier : public TypeExpression
|
83
|
+
{
|
84
|
+
public:
|
85
|
+
unsigned _line, _col;
|
86
|
+
StructNamePtr structname;
|
87
|
+
TypeVariablePtr typevariable;
|
88
|
+
TypeParameterArgumentsArray typeparameterarguments;
|
89
|
+
virtual TypeExpressionType get_typeexpression_type() const
|
90
|
+
{
|
91
|
+
return TYPEIDENTIFIER_TYPEEXPRESSION;
|
92
|
+
}
|
93
|
+
|
94
|
+
};
|
95
|
+
typedef std::shared_ptr<TypeIdentifier> TypeIdentifierPtr;
|
96
|
+
typedef std::vector<TypeIdentifierPtr> TypeIdentifierArray;
|
97
|
+
|
98
|
+
class TypeParameters
|
99
|
+
{
|
100
|
+
public:
|
101
|
+
unsigned _line, _col;
|
102
|
+
TypeVariableArray typevariables;
|
103
|
+
};
|
104
|
+
typedef std::shared_ptr<TypeParameters> TypeParametersPtr;
|
105
|
+
typedef std::vector<TypeParametersPtr> TypeParametersArray;
|
106
|
+
|
107
|
+
class TypeDeclaration
|
108
|
+
{
|
109
|
+
public:
|
110
|
+
unsigned _line, _col;
|
111
|
+
StructNamePtr structname;
|
112
|
+
TypeParametersArray typeparameters;
|
113
|
+
};
|
114
|
+
typedef std::shared_ptr<TypeDeclaration> TypeDeclarationPtr;
|
115
|
+
typedef std::vector<TypeDeclarationPtr> TypeDeclarationArray;
|
116
|
+
|
117
|
+
class NumberLiteral
|
118
|
+
{
|
119
|
+
public:
|
120
|
+
unsigned _line, _col;
|
121
|
+
std::wstring content;
|
122
|
+
};
|
123
|
+
typedef std::shared_ptr<NumberLiteral> NumberLiteralPtr;
|
124
|
+
typedef std::vector<NumberLiteralPtr> NumberLiteralArray;
|
125
|
+
|
126
|
+
enum ExpressionType
|
127
|
+
{
|
128
|
+
NUMBERLIT_EXPRESSION
|
129
|
+
};
|
130
|
+
|
131
|
+
class Expression
|
132
|
+
{
|
133
|
+
public:
|
134
|
+
unsigned _line, _col;
|
135
|
+
virtual ExpressionType get_expression_type() const = 0;
|
136
|
+
};
|
137
|
+
typedef std::shared_ptr<Expression> ExpressionPtr;
|
138
|
+
typedef std::vector<ExpressionPtr> ExpressionArray;
|
139
|
+
|
140
|
+
class NumberLit : public Expression
|
141
|
+
{
|
142
|
+
public:
|
143
|
+
unsigned _line, _col;
|
144
|
+
NumberLiteralPtr numberliteral;
|
145
|
+
virtual ExpressionType get_expression_type() const
|
146
|
+
{
|
147
|
+
return NUMBERLIT_EXPRESSION;
|
148
|
+
}
|
149
|
+
|
150
|
+
};
|
151
|
+
typedef std::shared_ptr<NumberLit> NumberLitPtr;
|
152
|
+
typedef std::vector<NumberLitPtr> NumberLitArray;
|
153
|
+
|
154
|
+
class MemberName
|
155
|
+
{
|
156
|
+
public:
|
157
|
+
unsigned _line, _col;
|
158
|
+
std::wstring content;
|
159
|
+
};
|
160
|
+
typedef std::shared_ptr<MemberName> MemberNamePtr;
|
161
|
+
typedef std::vector<MemberNamePtr> MemberNameArray;
|
162
|
+
|
163
|
+
class AttributeParam
|
164
|
+
{
|
165
|
+
public:
|
166
|
+
unsigned _line, _col;
|
167
|
+
MemberNamePtr membername;
|
168
|
+
ExpressionPtr expression;
|
169
|
+
};
|
170
|
+
typedef std::shared_ptr<AttributeParam> AttributeParamPtr;
|
171
|
+
typedef std::vector<AttributeParamPtr> AttributeParamArray;
|
172
|
+
|
173
|
+
class AttributeParamList
|
174
|
+
{
|
175
|
+
public:
|
176
|
+
unsigned _line, _col;
|
177
|
+
AttributeParamArray attributeparams;
|
178
|
+
};
|
179
|
+
typedef std::shared_ptr<AttributeParamList> AttributeParamListPtr;
|
180
|
+
typedef std::vector<AttributeParamListPtr> AttributeParamListArray;
|
181
|
+
|
182
|
+
class Attribute
|
183
|
+
{
|
184
|
+
public:
|
185
|
+
unsigned _line, _col;
|
186
|
+
TypeExpressionPtr typeexpression;
|
187
|
+
AttributeParamListArray attributeparamlists;
|
188
|
+
};
|
189
|
+
typedef std::shared_ptr<Attribute> AttributePtr;
|
190
|
+
typedef std::vector<AttributePtr> AttributeArray;
|
191
|
+
|
192
|
+
class Member
|
193
|
+
{
|
194
|
+
public:
|
195
|
+
unsigned _line, _col;
|
196
|
+
AttributeArray attributes;
|
197
|
+
TypeIdentifierPtr typeidentifier;
|
198
|
+
MemberNameArray membernames;
|
199
|
+
};
|
200
|
+
typedef std::shared_ptr<Member> MemberPtr;
|
201
|
+
typedef std::vector<MemberPtr> MemberArray;
|
202
|
+
|
203
|
+
class Structure : public Statement
|
204
|
+
{
|
205
|
+
public:
|
206
|
+
unsigned _line, _col;
|
207
|
+
AttributeArray attributes;
|
208
|
+
TypeDeclarationPtr typedeclaration;
|
209
|
+
MemberArray members;
|
210
|
+
virtual StatementType get_statement_type() const
|
211
|
+
{
|
212
|
+
return STRUCTURE_STATEMENT;
|
213
|
+
}
|
214
|
+
|
215
|
+
};
|
216
|
+
typedef std::shared_ptr<Structure> StructurePtr;
|
217
|
+
typedef std::vector<StructurePtr> StructureArray;
|
218
|
+
|
219
|
+
class StringLiteral
|
220
|
+
{
|
221
|
+
public:
|
222
|
+
unsigned _line, _col;
|
223
|
+
std::wstring content;
|
224
|
+
};
|
225
|
+
typedef std::shared_ptr<StringLiteral> StringLiteralPtr;
|
226
|
+
typedef std::vector<StringLiteralPtr> StringLiteralArray;
|
227
|
+
|
228
|
+
class Import : public Statement
|
229
|
+
{
|
230
|
+
public:
|
231
|
+
unsigned _line, _col;
|
232
|
+
StringLiteralPtr stringliteral;
|
233
|
+
virtual StatementType get_statement_type() const
|
234
|
+
{
|
235
|
+
return IMPORT_STATEMENT;
|
236
|
+
}
|
237
|
+
|
238
|
+
};
|
239
|
+
typedef std::shared_ptr<Import> ImportPtr;
|
240
|
+
typedef std::vector<ImportPtr> ImportArray;
|
241
|
+
|
242
|
+
|
243
|
+
}
|
244
|
+
|
245
|
+
#endif // S2_HPP
|
246
|
+
|
@@ -0,0 +1,796 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
#include <memory.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include "Scanner.h"
|
6
|
+
|
7
|
+
namespace S2 {
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
// string handling, wide character
|
12
|
+
|
13
|
+
|
14
|
+
wchar_t* coco_string_create(const wchar_t* value) {
|
15
|
+
return coco_string_create(value, 0);
|
16
|
+
}
|
17
|
+
|
18
|
+
wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
|
19
|
+
int valueLen = 0;
|
20
|
+
int len = 0;
|
21
|
+
|
22
|
+
if (value) {
|
23
|
+
valueLen = wcslen(value);
|
24
|
+
len = valueLen - startIndex;
|
25
|
+
}
|
26
|
+
|
27
|
+
return coco_string_create(value, startIndex, len);
|
28
|
+
}
|
29
|
+
|
30
|
+
wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
|
31
|
+
int len = 0;
|
32
|
+
wchar_t* data;
|
33
|
+
|
34
|
+
if (value) { len = length; }
|
35
|
+
data = new wchar_t[len + 1];
|
36
|
+
wcsncpy(data, &(value[startIndex]), len);
|
37
|
+
data[len] = 0;
|
38
|
+
|
39
|
+
return data;
|
40
|
+
}
|
41
|
+
|
42
|
+
wchar_t* coco_string_create_upper(const wchar_t* data) {
|
43
|
+
if (!data) { return NULL; }
|
44
|
+
|
45
|
+
int dataLen = 0;
|
46
|
+
if (data) { dataLen = wcslen(data); }
|
47
|
+
|
48
|
+
wchar_t *newData = new wchar_t[dataLen + 1];
|
49
|
+
|
50
|
+
for (int i = 0; i <= dataLen; i++) {
|
51
|
+
if ((L'a' <= data[i]) && (data[i] <= L'z')) {
|
52
|
+
newData[i] = data[i] + (L'A' - L'a');
|
53
|
+
}
|
54
|
+
else { newData[i] = data[i]; }
|
55
|
+
}
|
56
|
+
|
57
|
+
newData[dataLen] = L'\0';
|
58
|
+
return newData;
|
59
|
+
}
|
60
|
+
|
61
|
+
wchar_t* coco_string_create_lower(const wchar_t* data) {
|
62
|
+
if (!data) { return NULL; }
|
63
|
+
int dataLen = wcslen(data);
|
64
|
+
return coco_string_create_lower(data, 0, dataLen);
|
65
|
+
}
|
66
|
+
|
67
|
+
wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
|
68
|
+
if (!data) { return NULL; }
|
69
|
+
|
70
|
+
wchar_t* newData = new wchar_t[dataLen + 1];
|
71
|
+
|
72
|
+
for (int i = 0; i <= dataLen; i++) {
|
73
|
+
wchar_t ch = data[startIndex + i];
|
74
|
+
if ((L'A' <= ch) && (ch <= L'Z')) {
|
75
|
+
newData[i] = ch - (L'A' - L'a');
|
76
|
+
}
|
77
|
+
else { newData[i] = ch; }
|
78
|
+
}
|
79
|
+
newData[dataLen] = L'\0';
|
80
|
+
return newData;
|
81
|
+
}
|
82
|
+
|
83
|
+
wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
|
84
|
+
wchar_t* data;
|
85
|
+
int data1Len = 0;
|
86
|
+
int data2Len = 0;
|
87
|
+
|
88
|
+
if (data1) { data1Len = wcslen(data1); }
|
89
|
+
if (data2) {data2Len = wcslen(data2); }
|
90
|
+
|
91
|
+
data = new wchar_t[data1Len + data2Len + 1];
|
92
|
+
|
93
|
+
if (data1) { wcscpy(data, data1); }
|
94
|
+
if (data2) { wcscpy(data + data1Len, data2); }
|
95
|
+
|
96
|
+
data[data1Len + data2Len] = 0;
|
97
|
+
|
98
|
+
return data;
|
99
|
+
}
|
100
|
+
|
101
|
+
wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
|
102
|
+
int targetLen = coco_string_length(target);
|
103
|
+
wchar_t* data = new wchar_t[targetLen + 2];
|
104
|
+
wcsncpy(data, target, targetLen);
|
105
|
+
data[targetLen] = appendix;
|
106
|
+
data[targetLen + 1] = 0;
|
107
|
+
return data;
|
108
|
+
}
|
109
|
+
|
110
|
+
void coco_string_delete(wchar_t* &data) {
|
111
|
+
delete [] data;
|
112
|
+
data = NULL;
|
113
|
+
}
|
114
|
+
|
115
|
+
int coco_string_length(const wchar_t* data) {
|
116
|
+
if (data) { return wcslen(data); }
|
117
|
+
return 0;
|
118
|
+
}
|
119
|
+
|
120
|
+
bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
|
121
|
+
int dataLen = wcslen(data);
|
122
|
+
int endLen = wcslen(end);
|
123
|
+
return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
|
124
|
+
}
|
125
|
+
|
126
|
+
int coco_string_indexof(const wchar_t* data, const wchar_t value) {
|
127
|
+
const wchar_t* chr = wcschr(data, value);
|
128
|
+
|
129
|
+
if (chr) { return (chr-data); }
|
130
|
+
return -1;
|
131
|
+
}
|
132
|
+
|
133
|
+
int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
|
134
|
+
const wchar_t* chr = wcsrchr(data, value);
|
135
|
+
|
136
|
+
if (chr) { return (chr-data); }
|
137
|
+
return -1;
|
138
|
+
}
|
139
|
+
|
140
|
+
void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
|
141
|
+
if (!appendix) { return; }
|
142
|
+
wchar_t* data = coco_string_create_append(target, appendix);
|
143
|
+
delete [] target;
|
144
|
+
target = data;
|
145
|
+
}
|
146
|
+
|
147
|
+
bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
|
148
|
+
return wcscmp( data1, data2 ) == 0;
|
149
|
+
}
|
150
|
+
|
151
|
+
int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
|
152
|
+
return wcscmp(data1, data2);
|
153
|
+
}
|
154
|
+
|
155
|
+
int coco_string_hash(const wchar_t *data) {
|
156
|
+
int h = 0;
|
157
|
+
if (!data) { return 0; }
|
158
|
+
while (*data != 0) {
|
159
|
+
h = (h * 7) ^ *data;
|
160
|
+
++data;
|
161
|
+
}
|
162
|
+
if (h < 0) { h = -h; }
|
163
|
+
return h;
|
164
|
+
}
|
165
|
+
|
166
|
+
// string handling, ascii character
|
167
|
+
|
168
|
+
wchar_t* coco_string_create(const char* value) {
|
169
|
+
int len = 0;
|
170
|
+
if (value) { len = strlen(value); }
|
171
|
+
wchar_t* data = new wchar_t[len + 1];
|
172
|
+
for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
|
173
|
+
data[len] = 0;
|
174
|
+
return data;
|
175
|
+
}
|
176
|
+
|
177
|
+
char* coco_string_create_char(const wchar_t *value) {
|
178
|
+
int len = coco_string_length(value);
|
179
|
+
char *res = new char[len + 1];
|
180
|
+
for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
|
181
|
+
res[len] = 0;
|
182
|
+
return res;
|
183
|
+
}
|
184
|
+
|
185
|
+
void coco_string_delete(char* &data) {
|
186
|
+
delete [] data;
|
187
|
+
data = NULL;
|
188
|
+
}
|
189
|
+
|
190
|
+
|
191
|
+
Token::Token() {
|
192
|
+
kind = 0;
|
193
|
+
pos = 0;
|
194
|
+
col = 0;
|
195
|
+
line = 0;
|
196
|
+
val = NULL;
|
197
|
+
next = NULL;
|
198
|
+
}
|
199
|
+
|
200
|
+
Token::~Token() {
|
201
|
+
coco_string_delete(val);
|
202
|
+
}
|
203
|
+
|
204
|
+
Buffer::Buffer(FILE* s, bool isUserStream) {
|
205
|
+
// ensure binary read on windows
|
206
|
+
#if _MSC_VER >= 1300
|
207
|
+
_setmode(_fileno(s), _O_BINARY);
|
208
|
+
#endif
|
209
|
+
stream = s; this->isUserStream = isUserStream;
|
210
|
+
if (CanSeek()) {
|
211
|
+
fseek(s, 0, SEEK_END);
|
212
|
+
fileLen = ftell(s);
|
213
|
+
fseek(s, 0, SEEK_SET);
|
214
|
+
bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
|
215
|
+
bufStart = INT_MAX; // nothing in the buffer so far
|
216
|
+
} else {
|
217
|
+
fileLen = bufLen = bufStart = 0;
|
218
|
+
}
|
219
|
+
bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
|
220
|
+
buf = new char[bufCapacity];
|
221
|
+
if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start)
|
222
|
+
else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
|
223
|
+
if (bufLen == fileLen && CanSeek()) Close();
|
224
|
+
}
|
225
|
+
|
226
|
+
Buffer::Buffer(Buffer *b) {
|
227
|
+
buf = b->buf;
|
228
|
+
bufCapacity = b->bufCapacity;
|
229
|
+
b->buf = NULL;
|
230
|
+
bufStart = b->bufStart;
|
231
|
+
bufLen = b->bufLen;
|
232
|
+
fileLen = b->fileLen;
|
233
|
+
bufPos = b->bufPos;
|
234
|
+
stream = b->stream;
|
235
|
+
b->stream = NULL;
|
236
|
+
isUserStream = b->isUserStream;
|
237
|
+
}
|
238
|
+
|
239
|
+
Buffer::Buffer(const char* buf, size_t len) {
|
240
|
+
this->buf = new char[len];
|
241
|
+
memcpy(this->buf, buf, len*sizeof(unsigned char));
|
242
|
+
bufStart = 0;
|
243
|
+
bufCapacity = bufLen = len;
|
244
|
+
fileLen = len;
|
245
|
+
bufPos = 0;
|
246
|
+
stream = NULL;
|
247
|
+
}
|
248
|
+
|
249
|
+
Buffer::~Buffer() {
|
250
|
+
Close();
|
251
|
+
if (buf != NULL) {
|
252
|
+
delete [] buf;
|
253
|
+
buf = NULL;
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
void Buffer::Close() {
|
258
|
+
if (!isUserStream && stream != NULL) {
|
259
|
+
fclose(stream);
|
260
|
+
stream = NULL;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
int Buffer::Read() {
|
265
|
+
if (bufPos < bufLen) {
|
266
|
+
return buf[bufPos++];
|
267
|
+
} else if (GetPos() < fileLen) {
|
268
|
+
SetPos(GetPos()); // shift buffer start to Pos
|
269
|
+
return buf[bufPos++];
|
270
|
+
} else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
|
271
|
+
return buf[bufPos++];
|
272
|
+
} else {
|
273
|
+
return EoF;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
int Buffer::Peek() {
|
278
|
+
int curPos = GetPos();
|
279
|
+
int ch = Read();
|
280
|
+
SetPos(curPos);
|
281
|
+
return ch;
|
282
|
+
}
|
283
|
+
|
284
|
+
// beg .. begin, zero-based, inclusive, in byte
|
285
|
+
// end .. end, zero-based, exclusive, in byte
|
286
|
+
wchar_t* Buffer::GetString(int beg, int end) {
|
287
|
+
int len = 0;
|
288
|
+
wchar_t *buf = new wchar_t[end - beg];
|
289
|
+
int oldPos = GetPos();
|
290
|
+
SetPos(beg);
|
291
|
+
while (GetPos() < end) buf[len++] = (wchar_t) Read();
|
292
|
+
SetPos(oldPos);
|
293
|
+
wchar_t *res = coco_string_create(buf, 0, len);
|
294
|
+
coco_string_delete(buf);
|
295
|
+
return res;
|
296
|
+
}
|
297
|
+
|
298
|
+
int Buffer::GetPos() {
|
299
|
+
return bufPos + bufStart;
|
300
|
+
}
|
301
|
+
|
302
|
+
void Buffer::SetPos(int value) {
|
303
|
+
if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
|
304
|
+
// Wanted position is after buffer and the stream
|
305
|
+
// is not seek-able e.g. network or console,
|
306
|
+
// thus we have to read the stream manually till
|
307
|
+
// the wanted position is in sight.
|
308
|
+
while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
|
309
|
+
}
|
310
|
+
|
311
|
+
if ((value < 0) || (value > fileLen)) {
|
312
|
+
wprintf(L"--- buffer out of bounds access, position: %d\n", value);
|
313
|
+
exit(1);
|
314
|
+
}
|
315
|
+
|
316
|
+
if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
|
317
|
+
bufPos = value - bufStart;
|
318
|
+
} else if (stream != NULL) { // must be swapped in
|
319
|
+
fseek(stream, value, SEEK_SET);
|
320
|
+
bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
|
321
|
+
bufStart = value; bufPos = 0;
|
322
|
+
} else {
|
323
|
+
bufPos = fileLen - bufStart; // make Pos return fileLen
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
// Read the next chunk of bytes from the stream, increases the buffer
|
328
|
+
// if needed and updates the fields fileLen and bufLen.
|
329
|
+
// Returns the number of bytes read.
|
330
|
+
int Buffer::ReadNextStreamChunk() {
|
331
|
+
int free = bufCapacity - bufLen;
|
332
|
+
if (free == 0) {
|
333
|
+
// in the case of a growing input stream
|
334
|
+
// we can neither seek in the stream, nor can we
|
335
|
+
// foresee the maximum length, thus we must adapt
|
336
|
+
// the buffer size on demand.
|
337
|
+
bufCapacity = bufLen * 2;
|
338
|
+
char *newBuf = new char[bufCapacity];
|
339
|
+
memcpy(newBuf, buf, bufLen*sizeof(char));
|
340
|
+
delete [] buf;
|
341
|
+
buf = newBuf;
|
342
|
+
free = bufLen;
|
343
|
+
}
|
344
|
+
int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
|
345
|
+
if (read > 0) {
|
346
|
+
fileLen = bufLen = (bufLen + read);
|
347
|
+
return read;
|
348
|
+
}
|
349
|
+
// end of stream reached
|
350
|
+
return 0;
|
351
|
+
}
|
352
|
+
|
353
|
+
bool Buffer::CanSeek() {
|
354
|
+
return (stream != NULL) && (ftell(stream) != -1);
|
355
|
+
}
|
356
|
+
|
357
|
+
int UTF8Buffer::Read() {
|
358
|
+
int ch;
|
359
|
+
do {
|
360
|
+
ch = Buffer::Read();
|
361
|
+
// until we find a utf8 start (0xxxxxxx or 11xxxxxx)
|
362
|
+
} while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
|
363
|
+
if (ch < 128 || ch == EoF) {
|
364
|
+
// nothing to do, first 127 chars are the same in ascii and utf8
|
365
|
+
// 0xxxxxxx or end of file character
|
366
|
+
} else if ((ch & 0xF0) == 0xF0) {
|
367
|
+
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
368
|
+
int c1 = ch & 0x07; ch = Buffer::Read();
|
369
|
+
int c2 = ch & 0x3F; ch = Buffer::Read();
|
370
|
+
int c3 = ch & 0x3F; ch = Buffer::Read();
|
371
|
+
int c4 = ch & 0x3F;
|
372
|
+
ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
|
373
|
+
} else if ((ch & 0xE0) == 0xE0) {
|
374
|
+
// 1110xxxx 10xxxxxx 10xxxxxx
|
375
|
+
int c1 = ch & 0x0F; ch = Buffer::Read();
|
376
|
+
int c2 = ch & 0x3F; ch = Buffer::Read();
|
377
|
+
int c3 = ch & 0x3F;
|
378
|
+
ch = (((c1 << 6) | c2) << 6) | c3;
|
379
|
+
} else if ((ch & 0xC0) == 0xC0) {
|
380
|
+
// 110xxxxx 10xxxxxx
|
381
|
+
int c1 = ch & 0x1F; ch = Buffer::Read();
|
382
|
+
int c2 = ch & 0x3F;
|
383
|
+
ch = (c1 << 6) | c2;
|
384
|
+
}
|
385
|
+
return ch;
|
386
|
+
}
|
387
|
+
|
388
|
+
Scanner::Scanner(const char* buf, size_t len) {
|
389
|
+
buffer = new Buffer(buf, len);
|
390
|
+
Init();
|
391
|
+
}
|
392
|
+
|
393
|
+
Scanner::Scanner(const wchar_t* fileName) {
|
394
|
+
FILE* stream;
|
395
|
+
char *chFileName = coco_string_create_char(fileName);
|
396
|
+
if ((stream = fopen(chFileName, "rb")) == NULL) {
|
397
|
+
wprintf(L"--- Cannot open file %ls\n", fileName);
|
398
|
+
exit(1);
|
399
|
+
}
|
400
|
+
coco_string_delete(chFileName);
|
401
|
+
buffer = new Buffer(stream, false);
|
402
|
+
Init();
|
403
|
+
}
|
404
|
+
|
405
|
+
Scanner::Scanner(FILE* s) {
|
406
|
+
buffer = new Buffer(s, true);
|
407
|
+
Init();
|
408
|
+
}
|
409
|
+
|
410
|
+
Scanner::~Scanner() {
|
411
|
+
char* cur = (char*) firstHeap;
|
412
|
+
|
413
|
+
while(cur != NULL) {
|
414
|
+
cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
|
415
|
+
free(firstHeap);
|
416
|
+
firstHeap = cur;
|
417
|
+
}
|
418
|
+
delete [] tval;
|
419
|
+
delete buffer;
|
420
|
+
}
|
421
|
+
|
422
|
+
void Scanner::Init() {
|
423
|
+
EOL = '\n';
|
424
|
+
eofSym = 0;
|
425
|
+
maxT = 21;
|
426
|
+
noSym = 21;
|
427
|
+
int i;
|
428
|
+
for (i = 65; i <= 90; ++i) start.set(i, 1);
|
429
|
+
for (i = 97; i <= 122; ++i) start.set(i, 2);
|
430
|
+
for (i = 49; i <= 57; ++i) start.set(i, 3);
|
431
|
+
for (i = 10; i <= 10; ++i) start.set(i, 15);
|
432
|
+
for (i = 13; i <= 13; ++i) start.set(i, 15);
|
433
|
+
start.set(48, 19);
|
434
|
+
start.set(34, 20);
|
435
|
+
start.set(39, 10);
|
436
|
+
start.set(36, 21);
|
437
|
+
start.set(60, 24);
|
438
|
+
start.set(44, 25);
|
439
|
+
start.set(62, 26);
|
440
|
+
start.set(61, 27);
|
441
|
+
start.set(58, 28);
|
442
|
+
start.set(91, 29);
|
443
|
+
start.set(93, 30);
|
444
|
+
start.set(123, 31);
|
445
|
+
start.set(125, 32);
|
446
|
+
start.set(Buffer::EoF, -1);
|
447
|
+
keywords.set(L"struct", 17);
|
448
|
+
keywords.set(L"import", 20);
|
449
|
+
|
450
|
+
|
451
|
+
tvalLength = 128;
|
452
|
+
tval = new wchar_t[tvalLength]; // text of current token
|
453
|
+
|
454
|
+
// COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
|
455
|
+
heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
|
456
|
+
firstHeap = heap;
|
457
|
+
heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
|
458
|
+
*heapEnd = 0;
|
459
|
+
heapTop = heap;
|
460
|
+
if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
|
461
|
+
wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
|
462
|
+
exit(1);
|
463
|
+
}
|
464
|
+
|
465
|
+
pos = -1; line = 1; col = 0; charPos = -1;
|
466
|
+
oldEols = 0;
|
467
|
+
NextCh();
|
468
|
+
if (ch == 0xEF) { // check optional byte order mark for UTF-8
|
469
|
+
NextCh(); int ch1 = ch;
|
470
|
+
NextCh(); int ch2 = ch;
|
471
|
+
if (ch1 != 0xBB || ch2 != 0xBF) {
|
472
|
+
wprintf(L"Illegal byte order mark at start of file");
|
473
|
+
exit(1);
|
474
|
+
}
|
475
|
+
Buffer *oldBuf = buffer;
|
476
|
+
buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
|
477
|
+
delete oldBuf; oldBuf = NULL;
|
478
|
+
NextCh();
|
479
|
+
}
|
480
|
+
|
481
|
+
|
482
|
+
pt = tokens = CreateToken(); // first token is a dummy
|
483
|
+
}
|
484
|
+
|
485
|
+
void Scanner::NextCh() {
|
486
|
+
if (oldEols > 0) { ch = EOL; oldEols--; }
|
487
|
+
else {
|
488
|
+
pos = buffer->GetPos();
|
489
|
+
// buffer reads unicode chars, if UTF8 has been detected
|
490
|
+
ch = buffer->Read(); col++; charPos++;
|
491
|
+
// replace isolated '\r' by '\n' in order to make
|
492
|
+
// eol handling uniform across Windows, Unix and Mac
|
493
|
+
if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
|
494
|
+
if (ch == EOL) { line++; col = 0; }
|
495
|
+
}
|
496
|
+
|
497
|
+
}
|
498
|
+
|
499
|
+
void Scanner::AddCh() {
|
500
|
+
if (tlen >= tvalLength) {
|
501
|
+
tvalLength *= 2;
|
502
|
+
wchar_t *newBuf = new wchar_t[tvalLength];
|
503
|
+
memcpy(newBuf, tval, tlen*sizeof(wchar_t));
|
504
|
+
delete [] tval;
|
505
|
+
tval = newBuf;
|
506
|
+
}
|
507
|
+
if (ch != Buffer::EoF) {
|
508
|
+
tval[tlen++] = ch;
|
509
|
+
NextCh();
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
|
514
|
+
bool Scanner::Comment0() {
|
515
|
+
int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
|
516
|
+
NextCh();
|
517
|
+
if (ch == L'/') {
|
518
|
+
NextCh();
|
519
|
+
for(;;) {
|
520
|
+
if (ch == 10) {
|
521
|
+
level--;
|
522
|
+
if (level == 0) { oldEols = line - line0; NextCh(); return true; }
|
523
|
+
NextCh();
|
524
|
+
} else if (ch == buffer->EoF) return false;
|
525
|
+
else NextCh();
|
526
|
+
}
|
527
|
+
} else {
|
528
|
+
buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
|
529
|
+
}
|
530
|
+
return false;
|
531
|
+
}
|
532
|
+
|
533
|
+
bool Scanner::Comment1() {
|
534
|
+
int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
|
535
|
+
NextCh();
|
536
|
+
if (ch == L'*') {
|
537
|
+
NextCh();
|
538
|
+
for(;;) {
|
539
|
+
if (ch == L'*') {
|
540
|
+
NextCh();
|
541
|
+
if (ch == L'/') {
|
542
|
+
level--;
|
543
|
+
if (level == 0) { oldEols = line - line0; NextCh(); return true; }
|
544
|
+
NextCh();
|
545
|
+
}
|
546
|
+
} else if (ch == L'/') {
|
547
|
+
NextCh();
|
548
|
+
if (ch == L'*') {
|
549
|
+
level++; NextCh();
|
550
|
+
}
|
551
|
+
} else if (ch == buffer->EoF) return false;
|
552
|
+
else NextCh();
|
553
|
+
}
|
554
|
+
} else {
|
555
|
+
buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
|
556
|
+
}
|
557
|
+
return false;
|
558
|
+
}
|
559
|
+
|
560
|
+
|
561
|
+
void Scanner::CreateHeapBlock() {
|
562
|
+
void* newHeap;
|
563
|
+
char* cur = (char*) firstHeap;
|
564
|
+
|
565
|
+
while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
|
566
|
+
cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
|
567
|
+
free(firstHeap);
|
568
|
+
firstHeap = cur;
|
569
|
+
}
|
570
|
+
|
571
|
+
// COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
|
572
|
+
newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
|
573
|
+
*heapEnd = newHeap;
|
574
|
+
heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
|
575
|
+
*heapEnd = 0;
|
576
|
+
heap = newHeap;
|
577
|
+
heapTop = heap;
|
578
|
+
}
|
579
|
+
|
580
|
+
Token* Scanner::CreateToken() {
|
581
|
+
Token *t;
|
582
|
+
if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
|
583
|
+
CreateHeapBlock();
|
584
|
+
}
|
585
|
+
t = (Token*) heapTop;
|
586
|
+
heapTop = (void*) ((char*) heapTop + sizeof(Token));
|
587
|
+
t->val = NULL;
|
588
|
+
t->next = NULL;
|
589
|
+
return t;
|
590
|
+
}
|
591
|
+
|
592
|
+
void Scanner::AppendVal(Token *t) {
|
593
|
+
int reqMem = (tlen + 1) * sizeof(wchar_t);
|
594
|
+
if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
|
595
|
+
if (reqMem > COCO_HEAP_BLOCK_SIZE) {
|
596
|
+
wprintf(L"--- Too long token value\n");
|
597
|
+
exit(1);
|
598
|
+
}
|
599
|
+
CreateHeapBlock();
|
600
|
+
}
|
601
|
+
t->val = (wchar_t*) heapTop;
|
602
|
+
heapTop = (void*) ((char*) heapTop + reqMem);
|
603
|
+
|
604
|
+
wcsncpy(t->val, tval, tlen);
|
605
|
+
t->val[tlen] = L'\0';
|
606
|
+
}
|
607
|
+
|
608
|
+
Token* Scanner::NextToken() {
|
609
|
+
while (ch == ' ' ||
|
610
|
+
(ch >= 9 && ch <= 10) || ch == 13
|
611
|
+
) NextCh();
|
612
|
+
if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken();
|
613
|
+
int recKind = noSym;
|
614
|
+
int recEnd = pos;
|
615
|
+
t = CreateToken();
|
616
|
+
t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
|
617
|
+
int state = start.state(ch);
|
618
|
+
tlen = 0; AddCh();
|
619
|
+
|
620
|
+
switch (state) {
|
621
|
+
case -1: { t->kind = eofSym; break; } // NextCh already done
|
622
|
+
case 0: {
|
623
|
+
case_0:
|
624
|
+
if (recKind != noSym) {
|
625
|
+
tlen = recEnd - t->pos;
|
626
|
+
SetScannerBehindT();
|
627
|
+
}
|
628
|
+
t->kind = recKind; break;
|
629
|
+
} // NextCh already done
|
630
|
+
case 1:
|
631
|
+
case_1:
|
632
|
+
recEnd = pos; recKind = 1;
|
633
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;}
|
634
|
+
else {t->kind = 1; break;}
|
635
|
+
case 2:
|
636
|
+
case_2:
|
637
|
+
recEnd = pos; recKind = 2;
|
638
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_2;}
|
639
|
+
else {t->kind = 2; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}
|
640
|
+
case 3:
|
641
|
+
case_3:
|
642
|
+
recEnd = pos; recKind = 3;
|
643
|
+
if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
|
644
|
+
else if (ch == L'.') {AddCh(); goto case_4;}
|
645
|
+
else {t->kind = 3; break;}
|
646
|
+
case 4:
|
647
|
+
case_4:
|
648
|
+
if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_5;}
|
649
|
+
else {goto case_0;}
|
650
|
+
case 5:
|
651
|
+
case_5:
|
652
|
+
recEnd = pos; recKind = 3;
|
653
|
+
if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_5;}
|
654
|
+
else {t->kind = 3; break;}
|
655
|
+
case 6:
|
656
|
+
case_6:
|
657
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_7;}
|
658
|
+
else {goto case_0;}
|
659
|
+
case 7:
|
660
|
+
case_7:
|
661
|
+
recEnd = pos; recKind = 4;
|
662
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_7;}
|
663
|
+
else {t->kind = 4; break;}
|
664
|
+
case 8:
|
665
|
+
case_8:
|
666
|
+
{t->kind = 5; break;}
|
667
|
+
case 9:
|
668
|
+
case_9:
|
669
|
+
{t->kind = 6; break;}
|
670
|
+
case 10:
|
671
|
+
if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_11;}
|
672
|
+
else if (ch == 92) {AddCh(); goto case_12;}
|
673
|
+
else {goto case_0;}
|
674
|
+
case 11:
|
675
|
+
case_11:
|
676
|
+
if (ch == 39) {AddCh(); goto case_14;}
|
677
|
+
else {goto case_0;}
|
678
|
+
case 12:
|
679
|
+
case_12:
|
680
|
+
if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_13;}
|
681
|
+
else {goto case_0;}
|
682
|
+
case 13:
|
683
|
+
case_13:
|
684
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_13;}
|
685
|
+
else if (ch == 39) {AddCh(); goto case_14;}
|
686
|
+
else {goto case_0;}
|
687
|
+
case 14:
|
688
|
+
case_14:
|
689
|
+
{t->kind = 7; break;}
|
690
|
+
case 15:
|
691
|
+
{t->kind = 8; break;}
|
692
|
+
case 16:
|
693
|
+
case_16:
|
694
|
+
recEnd = pos; recKind = 9;
|
695
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_16;}
|
696
|
+
else {t->kind = 9; break;}
|
697
|
+
case 17:
|
698
|
+
case_17:
|
699
|
+
recEnd = pos; recKind = 22;
|
700
|
+
if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_17;}
|
701
|
+
else {t->kind = 22; break;}
|
702
|
+
case 18:
|
703
|
+
case_18:
|
704
|
+
recEnd = pos; recKind = 23;
|
705
|
+
if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_18;}
|
706
|
+
else {t->kind = 23; break;}
|
707
|
+
case 19:
|
708
|
+
recEnd = pos; recKind = 3;
|
709
|
+
if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
|
710
|
+
else if (ch == L'.') {AddCh(); goto case_4;}
|
711
|
+
else if (ch == L'x') {AddCh(); goto case_6;}
|
712
|
+
else {t->kind = 3; break;}
|
713
|
+
case 20:
|
714
|
+
case_20:
|
715
|
+
if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_20;}
|
716
|
+
else if (ch == 10 || ch == 13) {AddCh(); goto case_9;}
|
717
|
+
else if (ch == L'"') {AddCh(); goto case_8;}
|
718
|
+
else if (ch == 92) {AddCh(); goto case_22;}
|
719
|
+
else {goto case_0;}
|
720
|
+
case 21:
|
721
|
+
recEnd = pos; recKind = 22;
|
722
|
+
if ((ch >= L'A' && ch <= L'Z')) {AddCh(); goto case_16;}
|
723
|
+
else if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_17;}
|
724
|
+
else if ((ch >= L'a' && ch <= L'z')) {AddCh(); goto case_23;}
|
725
|
+
else {t->kind = 22; break;}
|
726
|
+
case 22:
|
727
|
+
case_22:
|
728
|
+
if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_20;}
|
729
|
+
else {goto case_0;}
|
730
|
+
case 23:
|
731
|
+
case_23:
|
732
|
+
recEnd = pos; recKind = 22;
|
733
|
+
if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_17;}
|
734
|
+
else if ((ch >= L'a' && ch <= L'z')) {AddCh(); goto case_23;}
|
735
|
+
else if (ch == L'=') {AddCh(); goto case_18;}
|
736
|
+
else {t->kind = 22; break;}
|
737
|
+
case 24:
|
738
|
+
{t->kind = 10; break;}
|
739
|
+
case 25:
|
740
|
+
{t->kind = 11; break;}
|
741
|
+
case 26:
|
742
|
+
{t->kind = 12; break;}
|
743
|
+
case 27:
|
744
|
+
{t->kind = 13; break;}
|
745
|
+
case 28:
|
746
|
+
{t->kind = 14; break;}
|
747
|
+
case 29:
|
748
|
+
{t->kind = 15; break;}
|
749
|
+
case 30:
|
750
|
+
{t->kind = 16; break;}
|
751
|
+
case 31:
|
752
|
+
{t->kind = 18; break;}
|
753
|
+
case 32:
|
754
|
+
{t->kind = 19; break;}
|
755
|
+
|
756
|
+
}
|
757
|
+
AppendVal(t);
|
758
|
+
return t;
|
759
|
+
}
|
760
|
+
|
761
|
+
void Scanner::SetScannerBehindT() {
|
762
|
+
buffer->SetPos(t->pos);
|
763
|
+
NextCh();
|
764
|
+
line = t->line; col = t->col; charPos = t->charPos;
|
765
|
+
for (int i = 0; i < tlen; i++) NextCh();
|
766
|
+
}
|
767
|
+
|
768
|
+
// get the next token (possibly a token already seen during peeking)
|
769
|
+
Token* Scanner::Scan() {
|
770
|
+
if (tokens->next == NULL) {
|
771
|
+
return pt = tokens = NextToken();
|
772
|
+
} else {
|
773
|
+
pt = tokens = tokens->next;
|
774
|
+
return tokens;
|
775
|
+
}
|
776
|
+
}
|
777
|
+
|
778
|
+
// peek for the next token, ignore pragmas
|
779
|
+
Token* Scanner::Peek() {
|
780
|
+
do {
|
781
|
+
if (pt->next == NULL) {
|
782
|
+
pt->next = NextToken();
|
783
|
+
}
|
784
|
+
pt = pt->next;
|
785
|
+
} while (pt->kind > maxT); // skip pragmas
|
786
|
+
|
787
|
+
return pt;
|
788
|
+
}
|
789
|
+
|
790
|
+
// make sure that peeking starts at the current scan position
|
791
|
+
void Scanner::ResetPeek() {
|
792
|
+
pt = tokens;
|
793
|
+
}
|
794
|
+
|
795
|
+
} // namespace
|
796
|
+
|