myco 0.1.0.dev
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +2 -0
- data/bin/myco +7 -0
- data/lib/myco/backtrace.rb +56 -0
- data/lib/myco/bootstrap/component.rb +142 -0
- data/lib/myco/bootstrap/empty_object.rb +4 -0
- data/lib/myco/bootstrap/file_toplevel.rb +5 -0
- data/lib/myco/bootstrap/find_constant.rb +86 -0
- data/lib/myco/bootstrap/instance.rb +52 -0
- data/lib/myco/bootstrap/meme.rb +160 -0
- data/lib/myco/bootstrap/void.rb +40 -0
- data/lib/myco/bootstrap.my +15 -0
- data/lib/myco/bootstrap.rb +10 -0
- data/lib/myco/command.my +33 -0
- data/lib/myco/core/BasicObject.my +46 -0
- data/lib/myco/core/Category.my +5 -0
- data/lib/myco/core/Decorator.my +18 -0
- data/lib/myco/core/FileToplevel.my +23 -0
- data/lib/myco/core/Object.my +24 -0
- data/lib/myco/core/Switch.my +31 -0
- data/lib/myco/eval.rb +63 -0
- data/lib/myco/parser/ast/constant_access.rb +29 -0
- data/lib/myco/parser/ast/constant_define.rb +40 -0
- data/lib/myco/parser/ast/constant_reopen.rb +47 -0
- data/lib/myco/parser/ast/declare_category.rb +51 -0
- data/lib/myco/parser/ast/declare_decorator.rb +35 -0
- data/lib/myco/parser/ast/declare_file.rb +54 -0
- data/lib/myco/parser/ast/declare_meme.rb +44 -0
- data/lib/myco/parser/ast/declare_object.rb +75 -0
- data/lib/myco/parser/ast/declare_string.rb +37 -0
- data/lib/myco/parser/ast/invoke.rb +66 -0
- data/lib/myco/parser/ast/local_variable_access_ambiguous.rb +38 -0
- data/lib/myco/parser/ast/misc.rb +61 -0
- data/lib/myco/parser/ast/myco_module_scope.rb +58 -0
- data/lib/myco/parser/ast/quest.rb +82 -0
- data/lib/myco/parser/ast.rb +15 -0
- data/lib/myco/parser/builder.output +3995 -0
- data/lib/myco/parser/builder.racc +585 -0
- data/lib/myco/parser/builder.rb +1592 -0
- data/lib/myco/parser/lexer.rb +2306 -0
- data/lib/myco/parser/lexer.rl +393 -0
- data/lib/myco/parser/lexer_char_classes.rl +56 -0
- data/lib/myco/parser/lexer_common.rb +95 -0
- data/lib/myco/parser/lexer_skeleton.rl +154 -0
- data/lib/myco/parser/peg_parser.kpeg +759 -0
- data/lib/myco/parser/peg_parser.rb +7094 -0
- data/lib/myco/parser.rb +40 -0
- data/lib/myco/tools/OptionParser.my +38 -0
- data/lib/myco/tools/mycompile.my +51 -0
- data/lib/myco/toolset.rb +16 -0
- data/lib/myco/version.rb +22 -0
- data/lib/myco.rb +15 -0
- metadata +247 -0
@@ -0,0 +1,393 @@
|
|
1
|
+
|
2
|
+
%%machine lexer; # %
|
3
|
+
|
4
|
+
%%{
|
5
|
+
# %
|
6
|
+
constant = c_upper c_alnum* ;
|
7
|
+
identifier = c_lower c_alnum* ;
|
8
|
+
|
9
|
+
comment = '#' (any - c_eol)*; # end-of-line comment
|
10
|
+
|
11
|
+
integer = [0-9]+ ;
|
12
|
+
float = [0-9]+ '.' [0-9]+ ;
|
13
|
+
|
14
|
+
strbody_norm = ^('\\' | '"');
|
15
|
+
strbody = strbody_norm* ('\\' c_any strbody_norm*)*;
|
16
|
+
|
17
|
+
# "foo bar"
|
18
|
+
#
|
19
|
+
string = (
|
20
|
+
zlen % { note_begin :string }
|
21
|
+
'"' % { note :string, :T_STRING_BEGIN; note :string }
|
22
|
+
strbody % { note :string, :T_STRING_BODY; note :string }
|
23
|
+
'"' % { note :string, :T_STRING_END }
|
24
|
+
);
|
25
|
+
|
26
|
+
# id: foo
|
27
|
+
#
|
28
|
+
declid = (
|
29
|
+
zlen % { note_begin :declid }
|
30
|
+
'id' % { note :declid, :T_DECLID_TAG }
|
31
|
+
c_space*
|
32
|
+
':'
|
33
|
+
c_space* % { note :declid }
|
34
|
+
identifier % { note :declid, :T_DECLID_VALUE }
|
35
|
+
);
|
36
|
+
|
37
|
+
# [foo]
|
38
|
+
#
|
39
|
+
category = (
|
40
|
+
zlen % { note_begin :category }
|
41
|
+
'[' % { note :category, :T_CATEGORY_BEGIN }
|
42
|
+
c_space* % { note :category }
|
43
|
+
identifier % { note :category, :T_CATEGORY_BODY }
|
44
|
+
c_space* % { note :category }
|
45
|
+
']' % { note :category, :T_CATEGORY_END }
|
46
|
+
);
|
47
|
+
|
48
|
+
# :foo
|
49
|
+
# :"bar baz"
|
50
|
+
#
|
51
|
+
symbol = (
|
52
|
+
':' % { note_begin :symbol }
|
53
|
+
(
|
54
|
+
(
|
55
|
+
identifier % { note :symbol, :T_SYMBOL; }
|
56
|
+
)
|
57
|
+
| (
|
58
|
+
'"' % { note :symbol, :T_SYMSTR_BEGIN; note :symbol }
|
59
|
+
strbody % { note :symbol, :T_SYMSTR_BODY; note :symbol }
|
60
|
+
'"' % { note :symbol, :T_SYMSTR_END; }
|
61
|
+
)
|
62
|
+
)
|
63
|
+
);
|
64
|
+
|
65
|
+
# Foo
|
66
|
+
# ::Bar
|
67
|
+
# Foo::Bar
|
68
|
+
# ::Foo::Bar::Baz
|
69
|
+
#
|
70
|
+
sconstant = (
|
71
|
+
zlen % { note_begin :sconstant }
|
72
|
+
(
|
73
|
+
'::' % { note :sconstant, :T_SCOPE; note :sconstant }
|
74
|
+
)? (
|
75
|
+
constant % { note :sconstant, :T_CONSTANT; note :sconstant }
|
76
|
+
'::' % { note :sconstant, :T_SCOPE; note :sconstant }
|
77
|
+
)*
|
78
|
+
constant % { note :sconstant, :T_CONSTANT }
|
79
|
+
);
|
80
|
+
|
81
|
+
# Foo,Bar,Baz
|
82
|
+
#
|
83
|
+
constant_list = (
|
84
|
+
zlen % { note_begin :constant_list, nil }
|
85
|
+
sconstant % { xfer_notes :sconstant, :constant_list }
|
86
|
+
(
|
87
|
+
c_space* % { note :constant_list }
|
88
|
+
',' % { note :constant_list, :T_CONST_SEP }
|
89
|
+
c_space_nl*
|
90
|
+
sconstant % { xfer_notes :sconstant, :constant_list }
|
91
|
+
)*
|
92
|
+
);
|
93
|
+
|
94
|
+
# Foo <
|
95
|
+
#
|
96
|
+
cdefn_begin = (
|
97
|
+
zlen % { note_begin :cdefn_begin }
|
98
|
+
constant % { note :cdefn_begin, :T_CONSTANT }
|
99
|
+
c_space* % { note :cdefn_begin }
|
100
|
+
'<' % { note :cdefn_begin, :T_DEFINE }
|
101
|
+
) % {
|
102
|
+
emit_notes :cdefn_begin
|
103
|
+
};
|
104
|
+
|
105
|
+
# Object {
|
106
|
+
#
|
107
|
+
decl_begin = (
|
108
|
+
(cdefn_begin c_space_nl*)?
|
109
|
+
constant_list
|
110
|
+
c_space_nl* % { note_begin :decl_begin }
|
111
|
+
'{' % { note :decl_begin, :T_DECLARE_BEGIN }
|
112
|
+
) % {
|
113
|
+
emit_notes :constant_list
|
114
|
+
emit_notes :decl_begin
|
115
|
+
};
|
116
|
+
|
117
|
+
# Starting delimiter for a string declaration
|
118
|
+
#
|
119
|
+
# Can be any string of characters following a
|
120
|
+
# constant name + whitespace that is not ambiguous
|
121
|
+
# with some other construction
|
122
|
+
#
|
123
|
+
# The ending delimiter will be calculated from as follows:
|
124
|
+
# The string of characters is reversed.
|
125
|
+
# If there are groups of "alphabetical" characters,
|
126
|
+
# the intra-group order remains intact.
|
127
|
+
# If there are non-alphabetical characters with "directionality",
|
128
|
+
# the "opposite" characters are substituted.
|
129
|
+
#
|
130
|
+
dstr_delim = (
|
131
|
+
^(c_space_nl|'{'|':'|',')
|
132
|
+
^(c_space_nl)+
|
133
|
+
);
|
134
|
+
|
135
|
+
# Object @@@
|
136
|
+
# ...
|
137
|
+
# @@@
|
138
|
+
#
|
139
|
+
dstr_begin = (
|
140
|
+
constant_list
|
141
|
+
c_space+ % { mark :space }
|
142
|
+
dstr_delim % { grab :delim, kram(:space) }
|
143
|
+
) % {
|
144
|
+
emit_notes :constant_list
|
145
|
+
|
146
|
+
start, stop = @stored[:delim]
|
147
|
+
emit :T_DECLSTR_BEGIN, start, stop
|
148
|
+
|
149
|
+
# Table of replacement characters to use when calculating
|
150
|
+
# the ending delimiter from the starting delimiter.
|
151
|
+
# Directional characters are replaced with their opposite.
|
152
|
+
@dstr_replace_table ||= %w{
|
153
|
+
< > ( ) { } [ ]
|
154
|
+
}
|
155
|
+
|
156
|
+
# Calculate the ending delimiter to look for and store it
|
157
|
+
@dstr_destrlim = text(start, stop) \
|
158
|
+
.split(/(?<=[^a-zA-Z])|(?=[^a-zA-Z])/)
|
159
|
+
.map { |str|
|
160
|
+
idx = @dstr_replace_table.find_index(str)
|
161
|
+
idx.nil? ? str :
|
162
|
+
(idx.odd? ? @dstr_replace_table[idx-1] : @dstr_replace_table[idx+1])
|
163
|
+
}
|
164
|
+
.reverse
|
165
|
+
.join ''
|
166
|
+
};
|
167
|
+
|
168
|
+
# identifier (
|
169
|
+
#
|
170
|
+
args_begin = (
|
171
|
+
zlen % { note_begin :args_begin }
|
172
|
+
identifier % { note :args_begin, :T_IDENTIFIER }
|
173
|
+
c_space_nl* % { note :args_begin }
|
174
|
+
'(' % { note :args_begin, :T_ARGS_BEGIN }
|
175
|
+
);
|
176
|
+
|
177
|
+
##
|
178
|
+
# Top level machine
|
179
|
+
|
180
|
+
main := |*
|
181
|
+
c_space;
|
182
|
+
comment;
|
183
|
+
|
184
|
+
decl_begin => { fcall decl_body; };
|
185
|
+
dstr_begin => { fcall dstr_body; };
|
186
|
+
|
187
|
+
string => { emit_notes :string };
|
188
|
+
declid => { emit_notes :declid };
|
189
|
+
category => { emit_notes :category };
|
190
|
+
identifier => { emit :T_IDENTIFIER };
|
191
|
+
constant => { emit :T_CONSTANT };
|
192
|
+
'::' => { emit :T_SCOPE };
|
193
|
+
|
194
|
+
':' => { fcall pre_meme; };
|
195
|
+
|
196
|
+
';' => { emit :T_EXPR_SEP };
|
197
|
+
c_nl => { emit :T_EXPR_SEP };
|
198
|
+
|
199
|
+
c_eof => { emit :T_DECLARE_END };
|
200
|
+
any => { error :main };
|
201
|
+
*|;
|
202
|
+
|
203
|
+
##
|
204
|
+
# Declarative body machine
|
205
|
+
|
206
|
+
decl_body := |*
|
207
|
+
c_space;
|
208
|
+
comment;
|
209
|
+
|
210
|
+
(c_eol|';') => { emit :T_EXPR_SEP };
|
211
|
+
|
212
|
+
decl_begin => { fcall decl_body; };
|
213
|
+
dstr_begin => { fcall dstr_body; };
|
214
|
+
|
215
|
+
string => { emit_notes :string };
|
216
|
+
declid => { emit_notes :declid };
|
217
|
+
category => { emit_notes :category };
|
218
|
+
identifier => { emit :T_IDENTIFIER };
|
219
|
+
constant => { emit :T_CONSTANT };
|
220
|
+
'::' => { emit :T_SCOPE };
|
221
|
+
|
222
|
+
':' => { fcall pre_meme; };
|
223
|
+
|
224
|
+
'}' => { emit :T_DECLARE_END; fret; };
|
225
|
+
|
226
|
+
any => { error :decl_body };
|
227
|
+
*|;
|
228
|
+
|
229
|
+
##
|
230
|
+
# Pre-meme body sub-machines
|
231
|
+
|
232
|
+
pre_meme := |*
|
233
|
+
c_space_nl+;
|
234
|
+
comment;
|
235
|
+
|
236
|
+
# Parameters are specified within '|'s
|
237
|
+
'|' => { emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body; };
|
238
|
+
|
239
|
+
# A meme begins with either a '{' or some other item for inline
|
240
|
+
^(c_space_nl|'{'|'|') =>
|
241
|
+
{ fhold; emit :T_MEME_BEGIN, @ts, @ts; bpush :meml; fgoto meme_body; };
|
242
|
+
'{' => { emit :T_MEME_BEGIN; bpush :meme; fgoto meme_body; };
|
243
|
+
|
244
|
+
any => { error :pre_meme };
|
245
|
+
*|;
|
246
|
+
|
247
|
+
##
|
248
|
+
# Declarative string machine
|
249
|
+
|
250
|
+
dstr_body := |*
|
251
|
+
(
|
252
|
+
c_nl % { mark :newline }
|
253
|
+
(^c_nl)* % { grab :line, kram(:newline) }
|
254
|
+
) => {
|
255
|
+
start, stop = @stored[:line];
|
256
|
+
line_text = text start, stop
|
257
|
+
|
258
|
+
raise "No known delimiter for string declaration." \
|
259
|
+
if @dstr_delim.nil?
|
260
|
+
|
261
|
+
if (line_text =~ /^(\s*)(\S+)/; $2==@dstr_delim)
|
262
|
+
emit :T_DECLSTR_BODY, *@dstr_body_start, start
|
263
|
+
@dstr_body_start = nil
|
264
|
+
@dstr_delim = nil
|
265
|
+
|
266
|
+
emit :T_DECLSTR_END, start+$1.size, stop
|
267
|
+
fret;
|
268
|
+
else
|
269
|
+
@dstr_body_start ||= start
|
270
|
+
end
|
271
|
+
};
|
272
|
+
*|;
|
273
|
+
|
274
|
+
##
|
275
|
+
# Meme body machine
|
276
|
+
|
277
|
+
meme_body := |*
|
278
|
+
c_space+;
|
279
|
+
comment;
|
280
|
+
|
281
|
+
decl_begin => { fcall decl_body; };
|
282
|
+
dstr_begin => { fcall dstr_body; };
|
283
|
+
|
284
|
+
args_begin => { emit_notes :args_begin; bpush :args; fcall meme_body; };
|
285
|
+
'(' => { emit :T_PAREN_BEGIN; bpush :paren; fcall meme_body; };
|
286
|
+
'[' => { emit :T_ARRAY_BEGIN; bpush :array; fcall meme_body; };
|
287
|
+
'{' => { emit :T_MEME_BEGIN; bpush :meme; fcall meme_body; };
|
288
|
+
|
289
|
+
'self' => { emit :T_SELF };
|
290
|
+
'null' => { emit :T_NULL };
|
291
|
+
'void' => { emit :T_VOID };
|
292
|
+
'true' => { emit :T_TRUE };
|
293
|
+
'false' => { emit :T_FALSE };
|
294
|
+
integer => { emit :T_INTEGER };
|
295
|
+
float => { emit :T_FLOAT };
|
296
|
+
constant => { emit :T_CONSTANT };
|
297
|
+
identifier => { emit :T_IDENTIFIER };
|
298
|
+
'.' => { emit :T_DOT };
|
299
|
+
'?' => { emit :T_QUEST };
|
300
|
+
'::' => { emit :T_SCOPE };
|
301
|
+
'=' => { emit :T_ASSIGN };
|
302
|
+
'+' => { emit :T_OP_PLUS };
|
303
|
+
'-' => { emit :T_OP_MINUS };
|
304
|
+
'*' => { emit :T_OP_MULT };
|
305
|
+
'/' => { emit :T_OP_DIV };
|
306
|
+
'%' => { emit :T_OP_MOD };
|
307
|
+
'**' => { emit :T_OP_EXP };
|
308
|
+
('<'|'>'|'<='|'>='|'=='|'==='|'<=>'|'=~')
|
309
|
+
=> { emit :T_OP_COMPARE };
|
310
|
+
'&&' => { emit :T_OP_AND };
|
311
|
+
'||' => { emit :T_OP_OR };
|
312
|
+
|
313
|
+
symbol => { emit_notes :symbol };
|
314
|
+
string => { emit_notes :string };
|
315
|
+
|
316
|
+
'\\\n'; # Escaped newline - ignore
|
317
|
+
|
318
|
+
|
319
|
+
'&' => {
|
320
|
+
case bthis
|
321
|
+
when :param; emit :T_OP_TOPROC
|
322
|
+
when :args; emit :T_OP_TOPROC
|
323
|
+
else; error :meme_body
|
324
|
+
end
|
325
|
+
};
|
326
|
+
|
327
|
+
',' => {
|
328
|
+
case bthis
|
329
|
+
when :args; emit :T_ARG_SEP
|
330
|
+
when :param; emit :T_ARG_SEP
|
331
|
+
when :array; emit :T_ARG_SEP
|
332
|
+
else; error :meme_body
|
333
|
+
end
|
334
|
+
};
|
335
|
+
|
336
|
+
';' => {
|
337
|
+
case bthis
|
338
|
+
when :meme; emit :T_EXPR_SEP
|
339
|
+
when :meml; emit :T_EXPR_SEP
|
340
|
+
when :paren; emit :T_EXPR_SEP
|
341
|
+
else; error :meme_body
|
342
|
+
end
|
343
|
+
};
|
344
|
+
|
345
|
+
c_eol => {
|
346
|
+
case bthis
|
347
|
+
when :meme; emit :T_EXPR_SEP
|
348
|
+
when :meml; emit :T_MEME_END, @ts, @ts; fhold; bpop; fret;
|
349
|
+
when :paren; emit :T_EXPR_SEP
|
350
|
+
when :args; emit :T_ARG_SEP
|
351
|
+
when :param; emit :T_ARG_SEP
|
352
|
+
when :array; emit :T_ARG_SEP
|
353
|
+
else; error :meme_body
|
354
|
+
end
|
355
|
+
};
|
356
|
+
|
357
|
+
'}' => {
|
358
|
+
case bthis
|
359
|
+
when :meme; emit :T_MEME_END; bpop; fret;
|
360
|
+
else; error :meme_body
|
361
|
+
end
|
362
|
+
};
|
363
|
+
|
364
|
+
')' => {
|
365
|
+
case bthis
|
366
|
+
when :args; emit :T_ARGS_END; bpop; fret;
|
367
|
+
when :paren; emit :T_PAREN_END; bpop; fret;
|
368
|
+
else; error :meme_body
|
369
|
+
end
|
370
|
+
};
|
371
|
+
|
372
|
+
']' => {
|
373
|
+
case bthis
|
374
|
+
when :array; emit :T_ARRAY_END; bpop; fret;
|
375
|
+
else; error :meme_body
|
376
|
+
end
|
377
|
+
};
|
378
|
+
|
379
|
+
'|' => {
|
380
|
+
case bthis
|
381
|
+
when :param; emit :T_PARAMS_END; bpop; fret;
|
382
|
+
when :meme; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
|
383
|
+
when :meml; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
|
384
|
+
when :paren; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
|
385
|
+
else; error :meme_body
|
386
|
+
end
|
387
|
+
};
|
388
|
+
|
389
|
+
any => { error :meme_body };
|
390
|
+
*|;
|
391
|
+
|
392
|
+
}%%
|
393
|
+
# %
|
@@ -0,0 +1,56 @@
|
|
1
|
+
|
2
|
+
%%machine lexer; # %
|
3
|
+
|
4
|
+
%%{
|
5
|
+
# %
|
6
|
+
##
|
7
|
+
# Basic character types - taken from:
|
8
|
+
# https://github.com/whitequark/parser/blob/master/lib/parser/lexer.rl
|
9
|
+
#
|
10
|
+
# License for whitequark/parser reproduced below.
|
11
|
+
|
12
|
+
# Copyright (c) 2013 Peter Zotov <whitequark@whitequark.org>
|
13
|
+
#
|
14
|
+
# Parts of the source are derived from ruby_parser:
|
15
|
+
# Copyright (c) Ryan Davis, seattle.rb
|
16
|
+
#
|
17
|
+
# MIT License
|
18
|
+
#
|
19
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
20
|
+
# a copy of this software and associated documentation files (the
|
21
|
+
# "Software"), to deal in the Software without restriction, including
|
22
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
23
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
24
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
25
|
+
# the following conditions:
|
26
|
+
#
|
27
|
+
# The above copyright notice and this permission notice shall be
|
28
|
+
# included in all copies or substantial portions of the Software.
|
29
|
+
#
|
30
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
31
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
32
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
33
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
34
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
35
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
36
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
37
|
+
|
38
|
+
c_nl = '\n' $ do_nl;
|
39
|
+
c_space = [ \t\r\f\v];
|
40
|
+
c_space_nl = c_space | c_nl;
|
41
|
+
|
42
|
+
c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
|
43
|
+
c_eol = c_nl | c_eof;
|
44
|
+
c_any = any - c_eof;
|
45
|
+
|
46
|
+
c_nl_zlen = c_nl | zlen;
|
47
|
+
c_line = any - c_nl_zlen;
|
48
|
+
|
49
|
+
c_unicode = c_any - 0x00..0x7f;
|
50
|
+
c_upper = [A-Z];
|
51
|
+
c_lower = [a-z_] | c_unicode;
|
52
|
+
c_alpha = c_lower | c_upper;
|
53
|
+
c_alnum = c_alpha | [0-9];
|
54
|
+
|
55
|
+
}%%
|
56
|
+
# %
|
@@ -0,0 +1,95 @@
|
|
1
|
+
|
2
|
+
class CodeTools::Parser
|
3
|
+
class Lexer
|
4
|
+
|
5
|
+
def reset_common
|
6
|
+
warn "Lexer still has items on @bstack: #{@bstack.inspect}" \
|
7
|
+
if @bstack and !@bstack.empty?
|
8
|
+
|
9
|
+
@newlines = [0]
|
10
|
+
@marks = {}
|
11
|
+
@stored = {}
|
12
|
+
@bstack = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def mark name, pos=@p
|
16
|
+
@marks[name] = pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def kram name
|
20
|
+
@marks.delete name
|
21
|
+
end
|
22
|
+
|
23
|
+
def grab name, start=@ts, stop=@p
|
24
|
+
@stored[name] = [start, stop]
|
25
|
+
end
|
26
|
+
|
27
|
+
def note_begin queue_name, pos=@p
|
28
|
+
queue = @marks[queue_name] = (pos ? [pos] : [])
|
29
|
+
end
|
30
|
+
|
31
|
+
def note queue_name, type=nil, pos=@p
|
32
|
+
queue = (@marks[queue_name] ||= [])
|
33
|
+
queue << pos
|
34
|
+
queue << type if type
|
35
|
+
queue
|
36
|
+
end
|
37
|
+
|
38
|
+
def unnote queue_name, count=1
|
39
|
+
queue = (@marks[queue_name] ||= [])
|
40
|
+
queue.pop count
|
41
|
+
queue
|
42
|
+
end
|
43
|
+
|
44
|
+
def emit_notes queue_name
|
45
|
+
queue = (@marks[queue_name] || [])
|
46
|
+
queue.each_slice(3) { |a,b,c| emit c,a,b if a && b && c }
|
47
|
+
queue.clear
|
48
|
+
end
|
49
|
+
|
50
|
+
def xfer_notes queue_name_a, queue_name_b
|
51
|
+
queue_a = (@marks[queue_name_a] || [])
|
52
|
+
queue_b = (@marks[queue_name_b] ||= [])
|
53
|
+
queue_b << queue_a.shift until queue_a.empty?
|
54
|
+
queue_b
|
55
|
+
end
|
56
|
+
|
57
|
+
def bpush name
|
58
|
+
@bstack << name
|
59
|
+
end
|
60
|
+
|
61
|
+
def bthis
|
62
|
+
@bstack.last
|
63
|
+
end
|
64
|
+
|
65
|
+
def bpop
|
66
|
+
@bstack.pop
|
67
|
+
end
|
68
|
+
|
69
|
+
def error(location, hint=nil)
|
70
|
+
str = "Lexer met unexpected character(s) in #{location.inspect}: #{text.inspect}"
|
71
|
+
str += "; "+hint.to_s if hint
|
72
|
+
str += "\n@marks = #{@marks }"
|
73
|
+
str += "\n@stored = #{@stored}"
|
74
|
+
str += "\n@bstack = #{@bstack}"
|
75
|
+
str += "\n@stack = #{@stack}"
|
76
|
+
str += "\n"
|
77
|
+
warn str
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def do_nl
|
82
|
+
@newlines << @p unless @newlines.include? @p
|
83
|
+
end
|
84
|
+
|
85
|
+
def emit(type, start = @ts, stop = @te)
|
86
|
+
@tokens << [type, text(start,stop), locate(start)]
|
87
|
+
end
|
88
|
+
|
89
|
+
def locate index
|
90
|
+
ary = @newlines.take_while { |i| i <= index }
|
91
|
+
row, col = ary.size, index-ary.last+1
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
|
2
|
+
##
|
3
|
+
# Basic Lexer skeleton - taken from gist:
|
4
|
+
# https://gist.github.com/YorickPeterse/10658884
|
5
|
+
# Originally from:
|
6
|
+
# https://github.com/YorickPeterse/oga/blob/master/lib/oga/xml/lexer.rl
|
7
|
+
#
|
8
|
+
# License for the source gist reproduced below.
|
9
|
+
|
10
|
+
# Copyright (c) 2014, Yorick Peterse
|
11
|
+
#
|
12
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
13
|
+
# of this software and associated documentation files (the "Software"), to deal
|
14
|
+
# in the Software without restriction, including without limitation the rights
|
15
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
16
|
+
# copies of the Software, and to permit persons to whom the Software is
|
17
|
+
# furnished to do so, subject to the following conditions:
|
18
|
+
#
|
19
|
+
# The above copyright notice and this permission notice shall be included in
|
20
|
+
# all copies or substantial portions of the Software.
|
21
|
+
#
|
22
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
23
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
24
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
25
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
26
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
28
|
+
# THE SOFTWARE.
|
29
|
+
|
30
|
+
%%machine lexer; # %
|
31
|
+
|
32
|
+
class CodeTools::Parser
|
33
|
+
class Lexer
|
34
|
+
%% write data; # %
|
35
|
+
|
36
|
+
##
|
37
|
+
# @param [String] data The data to lex.
|
38
|
+
#
|
39
|
+
def initialize(data)
|
40
|
+
@data = data.unpack('U*') << 0
|
41
|
+
|
42
|
+
reset
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Resets the internal state of the lexer. Typically you don't need to
|
47
|
+
# call this method yourself as its called by #lex after lexing a given
|
48
|
+
# String.
|
49
|
+
#
|
50
|
+
def reset
|
51
|
+
@line = 1
|
52
|
+
@ts = nil
|
53
|
+
@te = nil
|
54
|
+
@tokens = []
|
55
|
+
@stack = []
|
56
|
+
@top = 0
|
57
|
+
@cs = self.class.lexer_start
|
58
|
+
@act = 0
|
59
|
+
@elements = []
|
60
|
+
@eof = @data.length
|
61
|
+
@p = 0
|
62
|
+
@pe = @eof
|
63
|
+
reset_common
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# Lexes the supplied String and returns an Array of tokens. Each token is
|
68
|
+
# an Array in the following format:
|
69
|
+
#
|
70
|
+
# [TYPE, VALUE]
|
71
|
+
#
|
72
|
+
# The type is a symbol, the value is either nil or a String.
|
73
|
+
#
|
74
|
+
# This method resets the internal state of the lexer after consuming the
|
75
|
+
# input.
|
76
|
+
#
|
77
|
+
# @param [String] data The string to consume.
|
78
|
+
# @return [Array]
|
79
|
+
# @see #advance
|
80
|
+
#
|
81
|
+
def lex
|
82
|
+
tokens = []
|
83
|
+
|
84
|
+
while token = advance
|
85
|
+
tokens << token
|
86
|
+
end
|
87
|
+
|
88
|
+
reset
|
89
|
+
|
90
|
+
return tokens
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# Advances through the input and generates the corresponding tokens.
|
95
|
+
#
|
96
|
+
# This method does *not* reset the internal state of the lexer.
|
97
|
+
#
|
98
|
+
# @param [String] data The String to consume.
|
99
|
+
# @return [Array]
|
100
|
+
#
|
101
|
+
def advance
|
102
|
+
_lexer_actions = self.class.send :_lexer_actions
|
103
|
+
_lexer_range_lengths = self.class.send :_lexer_range_lengths
|
104
|
+
_lexer_trans_actions = self.class.send :_lexer_trans_actions
|
105
|
+
_lexer_key_offsets = self.class.send :_lexer_key_offsets
|
106
|
+
_lexer_index_offsets = self.class.send :_lexer_index_offsets
|
107
|
+
_lexer_to_state_actions = self.class.send :_lexer_to_state_actions
|
108
|
+
_lexer_trans_keys = self.class.send :_lexer_trans_keys
|
109
|
+
_lexer_from_state_actions = self.class.send :_lexer_from_state_actions
|
110
|
+
_lexer_single_lengths = self.class.send :_lexer_single_lengths
|
111
|
+
_lexer_trans_targs = self.class.send :_lexer_trans_targs
|
112
|
+
_lexer_eof_trans = self.class.send :_lexer_eof_trans
|
113
|
+
_lexer_indicies = self.class.send :_lexer_indicies
|
114
|
+
|
115
|
+
%% write exec;
|
116
|
+
# %
|
117
|
+
|
118
|
+
return @tokens.shift
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
|
123
|
+
##
|
124
|
+
# Returns the text of the current buffer based on the supplied start and
|
125
|
+
# stop position.
|
126
|
+
#
|
127
|
+
# By default `@ts` and `@te` are used as the start/stop position.
|
128
|
+
#
|
129
|
+
# @param [Fixnum] start
|
130
|
+
# @param [Fixnum] stop
|
131
|
+
# @return [String]
|
132
|
+
#
|
133
|
+
def text(start = @ts, stop = @te)
|
134
|
+
return @data[start...stop].pack('U*')
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
%%{
|
139
|
+
# %
|
140
|
+
# Use instance variables for `ts` and friends.
|
141
|
+
access @;
|
142
|
+
getkey (@data[@p] || 0);
|
143
|
+
variable p @p;
|
144
|
+
variable pe @pe;
|
145
|
+
variable eof @eof;
|
146
|
+
|
147
|
+
action do_nl { do_nl }
|
148
|
+
|
149
|
+
include "lexer_char_classes.rl"; # Basic character classes
|
150
|
+
include "lexer.rl"; # Main rules file
|
151
|
+
}%%
|
152
|
+
# %
|
153
|
+
end
|
154
|
+
end
|