myco 0.1.0.dev
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +2 -0
- data/bin/myco +7 -0
- data/lib/myco/backtrace.rb +56 -0
- data/lib/myco/bootstrap/component.rb +142 -0
- data/lib/myco/bootstrap/empty_object.rb +4 -0
- data/lib/myco/bootstrap/file_toplevel.rb +5 -0
- data/lib/myco/bootstrap/find_constant.rb +86 -0
- data/lib/myco/bootstrap/instance.rb +52 -0
- data/lib/myco/bootstrap/meme.rb +160 -0
- data/lib/myco/bootstrap/void.rb +40 -0
- data/lib/myco/bootstrap.my +15 -0
- data/lib/myco/bootstrap.rb +10 -0
- data/lib/myco/command.my +33 -0
- data/lib/myco/core/BasicObject.my +46 -0
- data/lib/myco/core/Category.my +5 -0
- data/lib/myco/core/Decorator.my +18 -0
- data/lib/myco/core/FileToplevel.my +23 -0
- data/lib/myco/core/Object.my +24 -0
- data/lib/myco/core/Switch.my +31 -0
- data/lib/myco/eval.rb +63 -0
- data/lib/myco/parser/ast/constant_access.rb +29 -0
- data/lib/myco/parser/ast/constant_define.rb +40 -0
- data/lib/myco/parser/ast/constant_reopen.rb +47 -0
- data/lib/myco/parser/ast/declare_category.rb +51 -0
- data/lib/myco/parser/ast/declare_decorator.rb +35 -0
- data/lib/myco/parser/ast/declare_file.rb +54 -0
- data/lib/myco/parser/ast/declare_meme.rb +44 -0
- data/lib/myco/parser/ast/declare_object.rb +75 -0
- data/lib/myco/parser/ast/declare_string.rb +37 -0
- data/lib/myco/parser/ast/invoke.rb +66 -0
- data/lib/myco/parser/ast/local_variable_access_ambiguous.rb +38 -0
- data/lib/myco/parser/ast/misc.rb +61 -0
- data/lib/myco/parser/ast/myco_module_scope.rb +58 -0
- data/lib/myco/parser/ast/quest.rb +82 -0
- data/lib/myco/parser/ast.rb +15 -0
- data/lib/myco/parser/builder.output +3995 -0
- data/lib/myco/parser/builder.racc +585 -0
- data/lib/myco/parser/builder.rb +1592 -0
- data/lib/myco/parser/lexer.rb +2306 -0
- data/lib/myco/parser/lexer.rl +393 -0
- data/lib/myco/parser/lexer_char_classes.rl +56 -0
- data/lib/myco/parser/lexer_common.rb +95 -0
- data/lib/myco/parser/lexer_skeleton.rl +154 -0
- data/lib/myco/parser/peg_parser.kpeg +759 -0
- data/lib/myco/parser/peg_parser.rb +7094 -0
- data/lib/myco/parser.rb +40 -0
- data/lib/myco/tools/OptionParser.my +38 -0
- data/lib/myco/tools/mycompile.my +51 -0
- data/lib/myco/toolset.rb +16 -0
- data/lib/myco/version.rb +22 -0
- data/lib/myco.rb +15 -0
- metadata +247 -0
@@ -0,0 +1,393 @@
|
|
1
|
+
|
2
|
+
%%machine lexer; # %
|
3
|
+
|
4
|
+
%%{
|
5
|
+
# %
|
6
|
+
constant = c_upper c_alnum* ;
|
7
|
+
identifier = c_lower c_alnum* ;
|
8
|
+
|
9
|
+
comment = '#' (any - c_eol)*; # end-of-line comment
|
10
|
+
|
11
|
+
integer = [0-9]+ ;
|
12
|
+
float = [0-9]+ '.' [0-9]+ ;
|
13
|
+
|
14
|
+
strbody_norm = ^('\\' | '"');
|
15
|
+
strbody = strbody_norm* ('\\' c_any strbody_norm*)*;
|
16
|
+
|
17
|
+
# "foo bar"
|
18
|
+
#
|
19
|
+
string = (
|
20
|
+
zlen % { note_begin :string }
|
21
|
+
'"' % { note :string, :T_STRING_BEGIN; note :string }
|
22
|
+
strbody % { note :string, :T_STRING_BODY; note :string }
|
23
|
+
'"' % { note :string, :T_STRING_END }
|
24
|
+
);
|
25
|
+
|
26
|
+
# id: foo
|
27
|
+
#
|
28
|
+
declid = (
|
29
|
+
zlen % { note_begin :declid }
|
30
|
+
'id' % { note :declid, :T_DECLID_TAG }
|
31
|
+
c_space*
|
32
|
+
':'
|
33
|
+
c_space* % { note :declid }
|
34
|
+
identifier % { note :declid, :T_DECLID_VALUE }
|
35
|
+
);
|
36
|
+
|
37
|
+
# [foo]
|
38
|
+
#
|
39
|
+
category = (
|
40
|
+
zlen % { note_begin :category }
|
41
|
+
'[' % { note :category, :T_CATEGORY_BEGIN }
|
42
|
+
c_space* % { note :category }
|
43
|
+
identifier % { note :category, :T_CATEGORY_BODY }
|
44
|
+
c_space* % { note :category }
|
45
|
+
']' % { note :category, :T_CATEGORY_END }
|
46
|
+
);
|
47
|
+
|
48
|
+
# :foo
|
49
|
+
# :"bar baz"
|
50
|
+
#
|
51
|
+
symbol = (
|
52
|
+
':' % { note_begin :symbol }
|
53
|
+
(
|
54
|
+
(
|
55
|
+
identifier % { note :symbol, :T_SYMBOL; }
|
56
|
+
)
|
57
|
+
| (
|
58
|
+
'"' % { note :symbol, :T_SYMSTR_BEGIN; note :symbol }
|
59
|
+
strbody % { note :symbol, :T_SYMSTR_BODY; note :symbol }
|
60
|
+
'"' % { note :symbol, :T_SYMSTR_END; }
|
61
|
+
)
|
62
|
+
)
|
63
|
+
);
|
64
|
+
|
65
|
+
# Foo
|
66
|
+
# ::Bar
|
67
|
+
# Foo::Bar
|
68
|
+
# ::Foo::Bar::Baz
|
69
|
+
#
|
70
|
+
sconstant = (
|
71
|
+
zlen % { note_begin :sconstant }
|
72
|
+
(
|
73
|
+
'::' % { note :sconstant, :T_SCOPE; note :sconstant }
|
74
|
+
)? (
|
75
|
+
constant % { note :sconstant, :T_CONSTANT; note :sconstant }
|
76
|
+
'::' % { note :sconstant, :T_SCOPE; note :sconstant }
|
77
|
+
)*
|
78
|
+
constant % { note :sconstant, :T_CONSTANT }
|
79
|
+
);
|
80
|
+
|
81
|
+
# Foo,Bar,Baz
|
82
|
+
#
|
83
|
+
constant_list = (
|
84
|
+
zlen % { note_begin :constant_list, nil }
|
85
|
+
sconstant % { xfer_notes :sconstant, :constant_list }
|
86
|
+
(
|
87
|
+
c_space* % { note :constant_list }
|
88
|
+
',' % { note :constant_list, :T_CONST_SEP }
|
89
|
+
c_space_nl*
|
90
|
+
sconstant % { xfer_notes :sconstant, :constant_list }
|
91
|
+
)*
|
92
|
+
);
|
93
|
+
|
94
|
+
# Foo <
|
95
|
+
#
|
96
|
+
cdefn_begin = (
|
97
|
+
zlen % { note_begin :cdefn_begin }
|
98
|
+
constant % { note :cdefn_begin, :T_CONSTANT }
|
99
|
+
c_space* % { note :cdefn_begin }
|
100
|
+
'<' % { note :cdefn_begin, :T_DEFINE }
|
101
|
+
) % {
|
102
|
+
emit_notes :cdefn_begin
|
103
|
+
};
|
104
|
+
|
105
|
+
# Object {
|
106
|
+
#
|
107
|
+
decl_begin = (
|
108
|
+
(cdefn_begin c_space_nl*)?
|
109
|
+
constant_list
|
110
|
+
c_space_nl* % { note_begin :decl_begin }
|
111
|
+
'{' % { note :decl_begin, :T_DECLARE_BEGIN }
|
112
|
+
) % {
|
113
|
+
emit_notes :constant_list
|
114
|
+
emit_notes :decl_begin
|
115
|
+
};
|
116
|
+
|
117
|
+
# Starting delimiter for a string declaration
|
118
|
+
#
|
119
|
+
# Can be any string of characters following a
|
120
|
+
# constant name + whitespace that is not ambiguous
|
121
|
+
# with some other construction
|
122
|
+
#
|
123
|
+
# The ending delimiter will be calculated from as follows:
|
124
|
+
# The string of characters is reversed.
|
125
|
+
# If there are groups of "alphabetical" characters,
|
126
|
+
# the intra-group order remains intact.
|
127
|
+
# If there are non-alphabetical characters with "directionality",
|
128
|
+
# the "opposite" characters are substituted.
|
129
|
+
#
|
130
|
+
dstr_delim = (
|
131
|
+
^(c_space_nl|'{'|':'|',')
|
132
|
+
^(c_space_nl)+
|
133
|
+
);
|
134
|
+
|
135
|
+
# Object @@@
|
136
|
+
# ...
|
137
|
+
# @@@
|
138
|
+
#
|
139
|
+
dstr_begin = (
|
140
|
+
constant_list
|
141
|
+
c_space+ % { mark :space }
|
142
|
+
dstr_delim % { grab :delim, kram(:space) }
|
143
|
+
) % {
|
144
|
+
emit_notes :constant_list
|
145
|
+
|
146
|
+
start, stop = @stored[:delim]
|
147
|
+
emit :T_DECLSTR_BEGIN, start, stop
|
148
|
+
|
149
|
+
# Table of replacement characters to use when calculating
|
150
|
+
# the ending delimiter from the starting delimiter.
|
151
|
+
# Directional characters are replaced with their opposite.
|
152
|
+
@dstr_replace_table ||= %w{
|
153
|
+
< > ( ) { } [ ]
|
154
|
+
}
|
155
|
+
|
156
|
+
# Calculate the ending delimiter to look for and store it
|
157
|
+
@dstr_destrlim = text(start, stop) \
|
158
|
+
.split(/(?<=[^a-zA-Z])|(?=[^a-zA-Z])/)
|
159
|
+
.map { |str|
|
160
|
+
idx = @dstr_replace_table.find_index(str)
|
161
|
+
idx.nil? ? str :
|
162
|
+
(idx.odd? ? @dstr_replace_table[idx-1] : @dstr_replace_table[idx+1])
|
163
|
+
}
|
164
|
+
.reverse
|
165
|
+
.join ''
|
166
|
+
};
|
167
|
+
|
168
|
+
# identifier (
|
169
|
+
#
|
170
|
+
args_begin = (
|
171
|
+
zlen % { note_begin :args_begin }
|
172
|
+
identifier % { note :args_begin, :T_IDENTIFIER }
|
173
|
+
c_space_nl* % { note :args_begin }
|
174
|
+
'(' % { note :args_begin, :T_ARGS_BEGIN }
|
175
|
+
);
|
176
|
+
|
177
|
+
##
|
178
|
+
# Top level machine
|
179
|
+
|
180
|
+
main := |*
|
181
|
+
c_space;
|
182
|
+
comment;
|
183
|
+
|
184
|
+
decl_begin => { fcall decl_body; };
|
185
|
+
dstr_begin => { fcall dstr_body; };
|
186
|
+
|
187
|
+
string => { emit_notes :string };
|
188
|
+
declid => { emit_notes :declid };
|
189
|
+
category => { emit_notes :category };
|
190
|
+
identifier => { emit :T_IDENTIFIER };
|
191
|
+
constant => { emit :T_CONSTANT };
|
192
|
+
'::' => { emit :T_SCOPE };
|
193
|
+
|
194
|
+
':' => { fcall pre_meme; };
|
195
|
+
|
196
|
+
';' => { emit :T_EXPR_SEP };
|
197
|
+
c_nl => { emit :T_EXPR_SEP };
|
198
|
+
|
199
|
+
c_eof => { emit :T_DECLARE_END };
|
200
|
+
any => { error :main };
|
201
|
+
*|;
|
202
|
+
|
203
|
+
##
|
204
|
+
# Declarative body machine
|
205
|
+
|
206
|
+
decl_body := |*
|
207
|
+
c_space;
|
208
|
+
comment;
|
209
|
+
|
210
|
+
(c_eol|';') => { emit :T_EXPR_SEP };
|
211
|
+
|
212
|
+
decl_begin => { fcall decl_body; };
|
213
|
+
dstr_begin => { fcall dstr_body; };
|
214
|
+
|
215
|
+
string => { emit_notes :string };
|
216
|
+
declid => { emit_notes :declid };
|
217
|
+
category => { emit_notes :category };
|
218
|
+
identifier => { emit :T_IDENTIFIER };
|
219
|
+
constant => { emit :T_CONSTANT };
|
220
|
+
'::' => { emit :T_SCOPE };
|
221
|
+
|
222
|
+
':' => { fcall pre_meme; };
|
223
|
+
|
224
|
+
'}' => { emit :T_DECLARE_END; fret; };
|
225
|
+
|
226
|
+
any => { error :decl_body };
|
227
|
+
*|;
|
228
|
+
|
229
|
+
##
|
230
|
+
# Pre-meme body sub-machines
|
231
|
+
|
232
|
+
pre_meme := |*
|
233
|
+
c_space_nl+;
|
234
|
+
comment;
|
235
|
+
|
236
|
+
# Parameters are specified within '|'s
|
237
|
+
'|' => { emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body; };
|
238
|
+
|
239
|
+
# A meme begins with either a '{' or some other item for inline
|
240
|
+
^(c_space_nl|'{'|'|') =>
|
241
|
+
{ fhold; emit :T_MEME_BEGIN, @ts, @ts; bpush :meml; fgoto meme_body; };
|
242
|
+
'{' => { emit :T_MEME_BEGIN; bpush :meme; fgoto meme_body; };
|
243
|
+
|
244
|
+
any => { error :pre_meme };
|
245
|
+
*|;
|
246
|
+
|
247
|
+
##
|
248
|
+
# Declarative string machine
|
249
|
+
|
250
|
+
dstr_body := |*
|
251
|
+
(
|
252
|
+
c_nl % { mark :newline }
|
253
|
+
(^c_nl)* % { grab :line, kram(:newline) }
|
254
|
+
) => {
|
255
|
+
start, stop = @stored[:line];
|
256
|
+
line_text = text start, stop
|
257
|
+
|
258
|
+
raise "No known delimiter for string declaration." \
|
259
|
+
if @dstr_delim.nil?
|
260
|
+
|
261
|
+
if (line_text =~ /^(\s*)(\S+)/; $2==@dstr_delim)
|
262
|
+
emit :T_DECLSTR_BODY, *@dstr_body_start, start
|
263
|
+
@dstr_body_start = nil
|
264
|
+
@dstr_delim = nil
|
265
|
+
|
266
|
+
emit :T_DECLSTR_END, start+$1.size, stop
|
267
|
+
fret;
|
268
|
+
else
|
269
|
+
@dstr_body_start ||= start
|
270
|
+
end
|
271
|
+
};
|
272
|
+
*|;
|
273
|
+
|
274
|
+
##
|
275
|
+
# Meme body machine
|
276
|
+
|
277
|
+
meme_body := |*
|
278
|
+
c_space+;
|
279
|
+
comment;
|
280
|
+
|
281
|
+
decl_begin => { fcall decl_body; };
|
282
|
+
dstr_begin => { fcall dstr_body; };
|
283
|
+
|
284
|
+
args_begin => { emit_notes :args_begin; bpush :args; fcall meme_body; };
|
285
|
+
'(' => { emit :T_PAREN_BEGIN; bpush :paren; fcall meme_body; };
|
286
|
+
'[' => { emit :T_ARRAY_BEGIN; bpush :array; fcall meme_body; };
|
287
|
+
'{' => { emit :T_MEME_BEGIN; bpush :meme; fcall meme_body; };
|
288
|
+
|
289
|
+
'self' => { emit :T_SELF };
|
290
|
+
'null' => { emit :T_NULL };
|
291
|
+
'void' => { emit :T_VOID };
|
292
|
+
'true' => { emit :T_TRUE };
|
293
|
+
'false' => { emit :T_FALSE };
|
294
|
+
integer => { emit :T_INTEGER };
|
295
|
+
float => { emit :T_FLOAT };
|
296
|
+
constant => { emit :T_CONSTANT };
|
297
|
+
identifier => { emit :T_IDENTIFIER };
|
298
|
+
'.' => { emit :T_DOT };
|
299
|
+
'?' => { emit :T_QUEST };
|
300
|
+
'::' => { emit :T_SCOPE };
|
301
|
+
'=' => { emit :T_ASSIGN };
|
302
|
+
'+' => { emit :T_OP_PLUS };
|
303
|
+
'-' => { emit :T_OP_MINUS };
|
304
|
+
'*' => { emit :T_OP_MULT };
|
305
|
+
'/' => { emit :T_OP_DIV };
|
306
|
+
'%' => { emit :T_OP_MOD };
|
307
|
+
'**' => { emit :T_OP_EXP };
|
308
|
+
('<'|'>'|'<='|'>='|'=='|'==='|'<=>'|'=~')
|
309
|
+
=> { emit :T_OP_COMPARE };
|
310
|
+
'&&' => { emit :T_OP_AND };
|
311
|
+
'||' => { emit :T_OP_OR };
|
312
|
+
|
313
|
+
symbol => { emit_notes :symbol };
|
314
|
+
string => { emit_notes :string };
|
315
|
+
|
316
|
+
'\\\n'; # Escaped newline - ignore
|
317
|
+
|
318
|
+
|
319
|
+
'&' => {
|
320
|
+
case bthis
|
321
|
+
when :param; emit :T_OP_TOPROC
|
322
|
+
when :args; emit :T_OP_TOPROC
|
323
|
+
else; error :meme_body
|
324
|
+
end
|
325
|
+
};
|
326
|
+
|
327
|
+
',' => {
|
328
|
+
case bthis
|
329
|
+
when :args; emit :T_ARG_SEP
|
330
|
+
when :param; emit :T_ARG_SEP
|
331
|
+
when :array; emit :T_ARG_SEP
|
332
|
+
else; error :meme_body
|
333
|
+
end
|
334
|
+
};
|
335
|
+
|
336
|
+
';' => {
|
337
|
+
case bthis
|
338
|
+
when :meme; emit :T_EXPR_SEP
|
339
|
+
when :meml; emit :T_EXPR_SEP
|
340
|
+
when :paren; emit :T_EXPR_SEP
|
341
|
+
else; error :meme_body
|
342
|
+
end
|
343
|
+
};
|
344
|
+
|
345
|
+
c_eol => {
|
346
|
+
case bthis
|
347
|
+
when :meme; emit :T_EXPR_SEP
|
348
|
+
when :meml; emit :T_MEME_END, @ts, @ts; fhold; bpop; fret;
|
349
|
+
when :paren; emit :T_EXPR_SEP
|
350
|
+
when :args; emit :T_ARG_SEP
|
351
|
+
when :param; emit :T_ARG_SEP
|
352
|
+
when :array; emit :T_ARG_SEP
|
353
|
+
else; error :meme_body
|
354
|
+
end
|
355
|
+
};
|
356
|
+
|
357
|
+
'}' => {
|
358
|
+
case bthis
|
359
|
+
when :meme; emit :T_MEME_END; bpop; fret;
|
360
|
+
else; error :meme_body
|
361
|
+
end
|
362
|
+
};
|
363
|
+
|
364
|
+
')' => {
|
365
|
+
case bthis
|
366
|
+
when :args; emit :T_ARGS_END; bpop; fret;
|
367
|
+
when :paren; emit :T_PAREN_END; bpop; fret;
|
368
|
+
else; error :meme_body
|
369
|
+
end
|
370
|
+
};
|
371
|
+
|
372
|
+
']' => {
|
373
|
+
case bthis
|
374
|
+
when :array; emit :T_ARRAY_END; bpop; fret;
|
375
|
+
else; error :meme_body
|
376
|
+
end
|
377
|
+
};
|
378
|
+
|
379
|
+
'|' => {
|
380
|
+
case bthis
|
381
|
+
when :param; emit :T_PARAMS_END; bpop; fret;
|
382
|
+
when :meme; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
|
383
|
+
when :meml; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
|
384
|
+
when :paren; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
|
385
|
+
else; error :meme_body
|
386
|
+
end
|
387
|
+
};
|
388
|
+
|
389
|
+
any => { error :meme_body };
|
390
|
+
*|;
|
391
|
+
|
392
|
+
}%%
|
393
|
+
# %
|
@@ -0,0 +1,56 @@
|
|
1
|
+
|
2
|
+
%%machine lexer; # %
|
3
|
+
|
4
|
+
%%{
|
5
|
+
# %
|
6
|
+
##
|
7
|
+
# Basic character types - taken from:
|
8
|
+
# https://github.com/whitequark/parser/blob/master/lib/parser/lexer.rl
|
9
|
+
#
|
10
|
+
# License for whitequark/parser reproduced below.
|
11
|
+
|
12
|
+
# Copyright (c) 2013 Peter Zotov <whitequark@whitequark.org>
|
13
|
+
#
|
14
|
+
# Parts of the source are derived from ruby_parser:
|
15
|
+
# Copyright (c) Ryan Davis, seattle.rb
|
16
|
+
#
|
17
|
+
# MIT License
|
18
|
+
#
|
19
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
20
|
+
# a copy of this software and associated documentation files (the
|
21
|
+
# "Software"), to deal in the Software without restriction, including
|
22
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
23
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
24
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
25
|
+
# the following conditions:
|
26
|
+
#
|
27
|
+
# The above copyright notice and this permission notice shall be
|
28
|
+
# included in all copies or substantial portions of the Software.
|
29
|
+
#
|
30
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
31
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
32
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
33
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
34
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
35
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
36
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
37
|
+
|
38
|
+
c_nl = '\n' $ do_nl;
|
39
|
+
c_space = [ \t\r\f\v];
|
40
|
+
c_space_nl = c_space | c_nl;
|
41
|
+
|
42
|
+
c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
|
43
|
+
c_eol = c_nl | c_eof;
|
44
|
+
c_any = any - c_eof;
|
45
|
+
|
46
|
+
c_nl_zlen = c_nl | zlen;
|
47
|
+
c_line = any - c_nl_zlen;
|
48
|
+
|
49
|
+
c_unicode = c_any - 0x00..0x7f;
|
50
|
+
c_upper = [A-Z];
|
51
|
+
c_lower = [a-z_] | c_unicode;
|
52
|
+
c_alpha = c_lower | c_upper;
|
53
|
+
c_alnum = c_alpha | [0-9];
|
54
|
+
|
55
|
+
}%%
|
56
|
+
# %
|
@@ -0,0 +1,95 @@
|
|
1
|
+
|
2
|
+
class CodeTools::Parser
|
3
|
+
class Lexer
|
4
|
+
|
5
|
+
def reset_common
|
6
|
+
warn "Lexer still has items on @bstack: #{@bstack.inspect}" \
|
7
|
+
if @bstack and !@bstack.empty?
|
8
|
+
|
9
|
+
@newlines = [0]
|
10
|
+
@marks = {}
|
11
|
+
@stored = {}
|
12
|
+
@bstack = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def mark name, pos=@p
|
16
|
+
@marks[name] = pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def kram name
|
20
|
+
@marks.delete name
|
21
|
+
end
|
22
|
+
|
23
|
+
def grab name, start=@ts, stop=@p
|
24
|
+
@stored[name] = [start, stop]
|
25
|
+
end
|
26
|
+
|
27
|
+
def note_begin queue_name, pos=@p
|
28
|
+
queue = @marks[queue_name] = (pos ? [pos] : [])
|
29
|
+
end
|
30
|
+
|
31
|
+
def note queue_name, type=nil, pos=@p
|
32
|
+
queue = (@marks[queue_name] ||= [])
|
33
|
+
queue << pos
|
34
|
+
queue << type if type
|
35
|
+
queue
|
36
|
+
end
|
37
|
+
|
38
|
+
def unnote queue_name, count=1
|
39
|
+
queue = (@marks[queue_name] ||= [])
|
40
|
+
queue.pop count
|
41
|
+
queue
|
42
|
+
end
|
43
|
+
|
44
|
+
def emit_notes queue_name
|
45
|
+
queue = (@marks[queue_name] || [])
|
46
|
+
queue.each_slice(3) { |a,b,c| emit c,a,b if a && b && c }
|
47
|
+
queue.clear
|
48
|
+
end
|
49
|
+
|
50
|
+
def xfer_notes queue_name_a, queue_name_b
|
51
|
+
queue_a = (@marks[queue_name_a] || [])
|
52
|
+
queue_b = (@marks[queue_name_b] ||= [])
|
53
|
+
queue_b << queue_a.shift until queue_a.empty?
|
54
|
+
queue_b
|
55
|
+
end
|
56
|
+
|
57
|
+
def bpush name
|
58
|
+
@bstack << name
|
59
|
+
end
|
60
|
+
|
61
|
+
def bthis
|
62
|
+
@bstack.last
|
63
|
+
end
|
64
|
+
|
65
|
+
def bpop
|
66
|
+
@bstack.pop
|
67
|
+
end
|
68
|
+
|
69
|
+
def error(location, hint=nil)
|
70
|
+
str = "Lexer met unexpected character(s) in #{location.inspect}: #{text.inspect}"
|
71
|
+
str += "; "+hint.to_s if hint
|
72
|
+
str += "\n@marks = #{@marks }"
|
73
|
+
str += "\n@stored = #{@stored}"
|
74
|
+
str += "\n@bstack = #{@bstack}"
|
75
|
+
str += "\n@stack = #{@stack}"
|
76
|
+
str += "\n"
|
77
|
+
warn str
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def do_nl
|
82
|
+
@newlines << @p unless @newlines.include? @p
|
83
|
+
end
|
84
|
+
|
85
|
+
def emit(type, start = @ts, stop = @te)
|
86
|
+
@tokens << [type, text(start,stop), locate(start)]
|
87
|
+
end
|
88
|
+
|
89
|
+
def locate index
|
90
|
+
ary = @newlines.take_while { |i| i <= index }
|
91
|
+
row, col = ary.size, index-ary.last+1
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
|
2
|
+
##
|
3
|
+
# Basic Lexer skeleton - taken from gist:
|
4
|
+
# https://gist.github.com/YorickPeterse/10658884
|
5
|
+
# Originally from:
|
6
|
+
# https://github.com/YorickPeterse/oga/blob/master/lib/oga/xml/lexer.rl
|
7
|
+
#
|
8
|
+
# License for the source gist reproduced below.
|
9
|
+
|
10
|
+
# Copyright (c) 2014, Yorick Peterse
|
11
|
+
#
|
12
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
13
|
+
# of this software and associated documentation files (the "Software"), to deal
|
14
|
+
# in the Software without restriction, including without limitation the rights
|
15
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
16
|
+
# copies of the Software, and to permit persons to whom the Software is
|
17
|
+
# furnished to do so, subject to the following conditions:
|
18
|
+
#
|
19
|
+
# The above copyright notice and this permission notice shall be included in
|
20
|
+
# all copies or substantial portions of the Software.
|
21
|
+
#
|
22
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
23
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
24
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
25
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
26
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
28
|
+
# THE SOFTWARE.
|
29
|
+
|
30
|
+
%%machine lexer; # %
|
31
|
+
|
32
|
+
class CodeTools::Parser
|
33
|
+
class Lexer
|
34
|
+
%% write data; # %
|
35
|
+
|
36
|
+
##
|
37
|
+
# @param [String] data The data to lex.
|
38
|
+
#
|
39
|
+
def initialize(data)
|
40
|
+
@data = data.unpack('U*') << 0
|
41
|
+
|
42
|
+
reset
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Resets the internal state of the lexer. Typically you don't need to
|
47
|
+
# call this method yourself as its called by #lex after lexing a given
|
48
|
+
# String.
|
49
|
+
#
|
50
|
+
def reset
|
51
|
+
@line = 1
|
52
|
+
@ts = nil
|
53
|
+
@te = nil
|
54
|
+
@tokens = []
|
55
|
+
@stack = []
|
56
|
+
@top = 0
|
57
|
+
@cs = self.class.lexer_start
|
58
|
+
@act = 0
|
59
|
+
@elements = []
|
60
|
+
@eof = @data.length
|
61
|
+
@p = 0
|
62
|
+
@pe = @eof
|
63
|
+
reset_common
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# Lexes the supplied String and returns an Array of tokens. Each token is
|
68
|
+
# an Array in the following format:
|
69
|
+
#
|
70
|
+
# [TYPE, VALUE]
|
71
|
+
#
|
72
|
+
# The type is a symbol, the value is either nil or a String.
|
73
|
+
#
|
74
|
+
# This method resets the internal state of the lexer after consuming the
|
75
|
+
# input.
|
76
|
+
#
|
77
|
+
# @param [String] data The string to consume.
|
78
|
+
# @return [Array]
|
79
|
+
# @see #advance
|
80
|
+
#
|
81
|
+
def lex
|
82
|
+
tokens = []
|
83
|
+
|
84
|
+
while token = advance
|
85
|
+
tokens << token
|
86
|
+
end
|
87
|
+
|
88
|
+
reset
|
89
|
+
|
90
|
+
return tokens
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# Advances through the input and generates the corresponding tokens.
|
95
|
+
#
|
96
|
+
# This method does *not* reset the internal state of the lexer.
|
97
|
+
#
|
98
|
+
# @param [String] data The String to consume.
|
99
|
+
# @return [Array]
|
100
|
+
#
|
101
|
+
def advance
|
102
|
+
_lexer_actions = self.class.send :_lexer_actions
|
103
|
+
_lexer_range_lengths = self.class.send :_lexer_range_lengths
|
104
|
+
_lexer_trans_actions = self.class.send :_lexer_trans_actions
|
105
|
+
_lexer_key_offsets = self.class.send :_lexer_key_offsets
|
106
|
+
_lexer_index_offsets = self.class.send :_lexer_index_offsets
|
107
|
+
_lexer_to_state_actions = self.class.send :_lexer_to_state_actions
|
108
|
+
_lexer_trans_keys = self.class.send :_lexer_trans_keys
|
109
|
+
_lexer_from_state_actions = self.class.send :_lexer_from_state_actions
|
110
|
+
_lexer_single_lengths = self.class.send :_lexer_single_lengths
|
111
|
+
_lexer_trans_targs = self.class.send :_lexer_trans_targs
|
112
|
+
_lexer_eof_trans = self.class.send :_lexer_eof_trans
|
113
|
+
_lexer_indicies = self.class.send :_lexer_indicies
|
114
|
+
|
115
|
+
%% write exec;
|
116
|
+
# %
|
117
|
+
|
118
|
+
return @tokens.shift
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
|
123
|
+
##
|
124
|
+
# Returns the text of the current buffer based on the supplied start and
|
125
|
+
# stop position.
|
126
|
+
#
|
127
|
+
# By default `@ts` and `@te` are used as the start/stop position.
|
128
|
+
#
|
129
|
+
# @param [Fixnum] start
|
130
|
+
# @param [Fixnum] stop
|
131
|
+
# @return [String]
|
132
|
+
#
|
133
|
+
def text(start = @ts, stop = @te)
|
134
|
+
return @data[start...stop].pack('U*')
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
%%{
|
139
|
+
# %
|
140
|
+
# Use instance variables for `ts` and friends.
|
141
|
+
access @;
|
142
|
+
getkey (@data[@p] || 0);
|
143
|
+
variable p @p;
|
144
|
+
variable pe @pe;
|
145
|
+
variable eof @eof;
|
146
|
+
|
147
|
+
action do_nl { do_nl }
|
148
|
+
|
149
|
+
include "lexer_char_classes.rl"; # Basic character classes
|
150
|
+
include "lexer.rl"; # Main rules file
|
151
|
+
}%%
|
152
|
+
# %
|
153
|
+
end
|
154
|
+
end
|