myco 0.1.0.dev

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +2 -0
  3. data/bin/myco +7 -0
  4. data/lib/myco/backtrace.rb +56 -0
  5. data/lib/myco/bootstrap/component.rb +142 -0
  6. data/lib/myco/bootstrap/empty_object.rb +4 -0
  7. data/lib/myco/bootstrap/file_toplevel.rb +5 -0
  8. data/lib/myco/bootstrap/find_constant.rb +86 -0
  9. data/lib/myco/bootstrap/instance.rb +52 -0
  10. data/lib/myco/bootstrap/meme.rb +160 -0
  11. data/lib/myco/bootstrap/void.rb +40 -0
  12. data/lib/myco/bootstrap.my +15 -0
  13. data/lib/myco/bootstrap.rb +10 -0
  14. data/lib/myco/command.my +33 -0
  15. data/lib/myco/core/BasicObject.my +46 -0
  16. data/lib/myco/core/Category.my +5 -0
  17. data/lib/myco/core/Decorator.my +18 -0
  18. data/lib/myco/core/FileToplevel.my +23 -0
  19. data/lib/myco/core/Object.my +24 -0
  20. data/lib/myco/core/Switch.my +31 -0
  21. data/lib/myco/eval.rb +63 -0
  22. data/lib/myco/parser/ast/constant_access.rb +29 -0
  23. data/lib/myco/parser/ast/constant_define.rb +40 -0
  24. data/lib/myco/parser/ast/constant_reopen.rb +47 -0
  25. data/lib/myco/parser/ast/declare_category.rb +51 -0
  26. data/lib/myco/parser/ast/declare_decorator.rb +35 -0
  27. data/lib/myco/parser/ast/declare_file.rb +54 -0
  28. data/lib/myco/parser/ast/declare_meme.rb +44 -0
  29. data/lib/myco/parser/ast/declare_object.rb +75 -0
  30. data/lib/myco/parser/ast/declare_string.rb +37 -0
  31. data/lib/myco/parser/ast/invoke.rb +66 -0
  32. data/lib/myco/parser/ast/local_variable_access_ambiguous.rb +38 -0
  33. data/lib/myco/parser/ast/misc.rb +61 -0
  34. data/lib/myco/parser/ast/myco_module_scope.rb +58 -0
  35. data/lib/myco/parser/ast/quest.rb +82 -0
  36. data/lib/myco/parser/ast.rb +15 -0
  37. data/lib/myco/parser/builder.output +3995 -0
  38. data/lib/myco/parser/builder.racc +585 -0
  39. data/lib/myco/parser/builder.rb +1592 -0
  40. data/lib/myco/parser/lexer.rb +2306 -0
  41. data/lib/myco/parser/lexer.rl +393 -0
  42. data/lib/myco/parser/lexer_char_classes.rl +56 -0
  43. data/lib/myco/parser/lexer_common.rb +95 -0
  44. data/lib/myco/parser/lexer_skeleton.rl +154 -0
  45. data/lib/myco/parser/peg_parser.kpeg +759 -0
  46. data/lib/myco/parser/peg_parser.rb +7094 -0
  47. data/lib/myco/parser.rb +40 -0
  48. data/lib/myco/tools/OptionParser.my +38 -0
  49. data/lib/myco/tools/mycompile.my +51 -0
  50. data/lib/myco/toolset.rb +16 -0
  51. data/lib/myco/version.rb +22 -0
  52. data/lib/myco.rb +15 -0
  53. metadata +247 -0
@@ -0,0 +1,393 @@
1
+
2
+ %%machine lexer; # %
3
+
4
+ %%{
5
+ # %
6
+ constant = c_upper c_alnum* ;
7
+ identifier = c_lower c_alnum* ;
8
+
9
+ comment = '#' (any - c_eol)*; # end-of-line comment
10
+
11
+ integer = [0-9]+ ;
12
+ float = [0-9]+ '.' [0-9]+ ;
13
+
14
+ strbody_norm = ^('\\' | '"');
15
+ strbody = strbody_norm* ('\\' c_any strbody_norm*)*;
16
+
17
+ # "foo bar"
18
+ #
19
+ string = (
20
+ zlen % { note_begin :string }
21
+ '"' % { note :string, :T_STRING_BEGIN; note :string }
22
+ strbody % { note :string, :T_STRING_BODY; note :string }
23
+ '"' % { note :string, :T_STRING_END }
24
+ );
25
+
26
+ # id: foo
27
+ #
28
+ declid = (
29
+ zlen % { note_begin :declid }
30
+ 'id' % { note :declid, :T_DECLID_TAG }
31
+ c_space*
32
+ ':'
33
+ c_space* % { note :declid }
34
+ identifier % { note :declid, :T_DECLID_VALUE }
35
+ );
36
+
37
+ # [foo]
38
+ #
39
+ category = (
40
+ zlen % { note_begin :category }
41
+ '[' % { note :category, :T_CATEGORY_BEGIN }
42
+ c_space* % { note :category }
43
+ identifier % { note :category, :T_CATEGORY_BODY }
44
+ c_space* % { note :category }
45
+ ']' % { note :category, :T_CATEGORY_END }
46
+ );
47
+
48
+ # :foo
49
+ # :"bar baz"
50
+ #
51
+ symbol = (
52
+ ':' % { note_begin :symbol }
53
+ (
54
+ (
55
+ identifier % { note :symbol, :T_SYMBOL; }
56
+ )
57
+ | (
58
+ '"' % { note :symbol, :T_SYMSTR_BEGIN; note :symbol }
59
+ strbody % { note :symbol, :T_SYMSTR_BODY; note :symbol }
60
+ '"' % { note :symbol, :T_SYMSTR_END; }
61
+ )
62
+ )
63
+ );
64
+
65
+ # Foo
66
+ # ::Bar
67
+ # Foo::Bar
68
+ # ::Foo::Bar::Baz
69
+ #
70
+ sconstant = (
71
+ zlen % { note_begin :sconstant }
72
+ (
73
+ '::' % { note :sconstant, :T_SCOPE; note :sconstant }
74
+ )? (
75
+ constant % { note :sconstant, :T_CONSTANT; note :sconstant }
76
+ '::' % { note :sconstant, :T_SCOPE; note :sconstant }
77
+ )*
78
+ constant % { note :sconstant, :T_CONSTANT }
79
+ );
80
+
81
+ # Foo,Bar,Baz
82
+ #
83
+ constant_list = (
84
+ zlen % { note_begin :constant_list, nil }
85
+ sconstant % { xfer_notes :sconstant, :constant_list }
86
+ (
87
+ c_space* % { note :constant_list }
88
+ ',' % { note :constant_list, :T_CONST_SEP }
89
+ c_space_nl*
90
+ sconstant % { xfer_notes :sconstant, :constant_list }
91
+ )*
92
+ );
93
+
94
+ # Foo <
95
+ #
96
+ cdefn_begin = (
97
+ zlen % { note_begin :cdefn_begin }
98
+ constant % { note :cdefn_begin, :T_CONSTANT }
99
+ c_space* % { note :cdefn_begin }
100
+ '<' % { note :cdefn_begin, :T_DEFINE }
101
+ ) % {
102
+ emit_notes :cdefn_begin
103
+ };
104
+
105
+ # Object {
106
+ #
107
+ decl_begin = (
108
+ (cdefn_begin c_space_nl*)?
109
+ constant_list
110
+ c_space_nl* % { note_begin :decl_begin }
111
+ '{' % { note :decl_begin, :T_DECLARE_BEGIN }
112
+ ) % {
113
+ emit_notes :constant_list
114
+ emit_notes :decl_begin
115
+ };
116
+
117
+ # Starting delimiter for a string declaration
118
+ #
119
+ # Can be any string of characters following a
120
+ # constant name + whitespace that is not ambiguous
121
+ # with some other construction
122
+ #
123
+ # The ending delimiter will be calculated from as follows:
124
+ # The string of characters is reversed.
125
+ # If there are groups of "alphabetical" characters,
126
+ # the intra-group order remains intact.
127
+ # If there are non-alphabetical characters with "directionality",
128
+ # the "opposite" characters are substituted.
129
+ #
130
+ dstr_delim = (
131
+ ^(c_space_nl|'{'|':'|',')
132
+ ^(c_space_nl)+
133
+ );
134
+
135
+ # Object @@@
136
+ # ...
137
+ # @@@
138
+ #
139
+ dstr_begin = (
140
+ constant_list
141
+ c_space+ % { mark :space }
142
+ dstr_delim % { grab :delim, kram(:space) }
143
+ ) % {
144
+ emit_notes :constant_list
145
+
146
+ start, stop = @stored[:delim]
147
+ emit :T_DECLSTR_BEGIN, start, stop
148
+
149
+ # Table of replacement characters to use when calculating
150
+ # the ending delimiter from the starting delimiter.
151
+ # Directional characters are replaced with their opposite.
152
+ @dstr_replace_table ||= %w{
153
+ < > ( ) { } [ ]
154
+ }
155
+
156
+ # Calculate the ending delimiter to look for and store it
157
+ @dstr_destrlim = text(start, stop) \
158
+ .split(/(?<=[^a-zA-Z])|(?=[^a-zA-Z])/)
159
+ .map { |str|
160
+ idx = @dstr_replace_table.find_index(str)
161
+ idx.nil? ? str :
162
+ (idx.odd? ? @dstr_replace_table[idx-1] : @dstr_replace_table[idx+1])
163
+ }
164
+ .reverse
165
+ .join ''
166
+ };
167
+
168
+ # identifier (
169
+ #
170
+ args_begin = (
171
+ zlen % { note_begin :args_begin }
172
+ identifier % { note :args_begin, :T_IDENTIFIER }
173
+ c_space_nl* % { note :args_begin }
174
+ '(' % { note :args_begin, :T_ARGS_BEGIN }
175
+ );
176
+
177
+ ##
178
+ # Top level machine
179
+
180
+ main := |*
181
+ c_space;
182
+ comment;
183
+
184
+ decl_begin => { fcall decl_body; };
185
+ dstr_begin => { fcall dstr_body; };
186
+
187
+ string => { emit_notes :string };
188
+ declid => { emit_notes :declid };
189
+ category => { emit_notes :category };
190
+ identifier => { emit :T_IDENTIFIER };
191
+ constant => { emit :T_CONSTANT };
192
+ '::' => { emit :T_SCOPE };
193
+
194
+ ':' => { fcall pre_meme; };
195
+
196
+ ';' => { emit :T_EXPR_SEP };
197
+ c_nl => { emit :T_EXPR_SEP };
198
+
199
+ c_eof => { emit :T_DECLARE_END };
200
+ any => { error :main };
201
+ *|;
202
+
203
+ ##
204
+ # Declarative body machine
205
+
206
+ decl_body := |*
207
+ c_space;
208
+ comment;
209
+
210
+ (c_eol|';') => { emit :T_EXPR_SEP };
211
+
212
+ decl_begin => { fcall decl_body; };
213
+ dstr_begin => { fcall dstr_body; };
214
+
215
+ string => { emit_notes :string };
216
+ declid => { emit_notes :declid };
217
+ category => { emit_notes :category };
218
+ identifier => { emit :T_IDENTIFIER };
219
+ constant => { emit :T_CONSTANT };
220
+ '::' => { emit :T_SCOPE };
221
+
222
+ ':' => { fcall pre_meme; };
223
+
224
+ '}' => { emit :T_DECLARE_END; fret; };
225
+
226
+ any => { error :decl_body };
227
+ *|;
228
+
229
+ ##
230
+ # Pre-meme body sub-machines
231
+
232
+ pre_meme := |*
233
+ c_space_nl+;
234
+ comment;
235
+
236
+ # Parameters are specified within '|'s
237
+ '|' => { emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body; };
238
+
239
+ # A meme begins with either a '{' or some other item for inline
240
+ ^(c_space_nl|'{'|'|') =>
241
+ { fhold; emit :T_MEME_BEGIN, @ts, @ts; bpush :meml; fgoto meme_body; };
242
+ '{' => { emit :T_MEME_BEGIN; bpush :meme; fgoto meme_body; };
243
+
244
+ any => { error :pre_meme };
245
+ *|;
246
+
247
+ ##
248
+ # Declarative string machine
249
+
250
+ dstr_body := |*
251
+ (
252
+ c_nl % { mark :newline }
253
+ (^c_nl)* % { grab :line, kram(:newline) }
254
+ ) => {
255
+ start, stop = @stored[:line];
256
+ line_text = text start, stop
257
+
258
+ raise "No known delimiter for string declaration." \
259
+ if @dstr_delim.nil?
260
+
261
+ if (line_text =~ /^(\s*)(\S+)/; $2==@dstr_delim)
262
+ emit :T_DECLSTR_BODY, *@dstr_body_start, start
263
+ @dstr_body_start = nil
264
+ @dstr_delim = nil
265
+
266
+ emit :T_DECLSTR_END, start+$1.size, stop
267
+ fret;
268
+ else
269
+ @dstr_body_start ||= start
270
+ end
271
+ };
272
+ *|;
273
+
274
+ ##
275
+ # Meme body machine
276
+
277
+ meme_body := |*
278
+ c_space+;
279
+ comment;
280
+
281
+ decl_begin => { fcall decl_body; };
282
+ dstr_begin => { fcall dstr_body; };
283
+
284
+ args_begin => { emit_notes :args_begin; bpush :args; fcall meme_body; };
285
+ '(' => { emit :T_PAREN_BEGIN; bpush :paren; fcall meme_body; };
286
+ '[' => { emit :T_ARRAY_BEGIN; bpush :array; fcall meme_body; };
287
+ '{' => { emit :T_MEME_BEGIN; bpush :meme; fcall meme_body; };
288
+
289
+ 'self' => { emit :T_SELF };
290
+ 'null' => { emit :T_NULL };
291
+ 'void' => { emit :T_VOID };
292
+ 'true' => { emit :T_TRUE };
293
+ 'false' => { emit :T_FALSE };
294
+ integer => { emit :T_INTEGER };
295
+ float => { emit :T_FLOAT };
296
+ constant => { emit :T_CONSTANT };
297
+ identifier => { emit :T_IDENTIFIER };
298
+ '.' => { emit :T_DOT };
299
+ '?' => { emit :T_QUEST };
300
+ '::' => { emit :T_SCOPE };
301
+ '=' => { emit :T_ASSIGN };
302
+ '+' => { emit :T_OP_PLUS };
303
+ '-' => { emit :T_OP_MINUS };
304
+ '*' => { emit :T_OP_MULT };
305
+ '/' => { emit :T_OP_DIV };
306
+ '%' => { emit :T_OP_MOD };
307
+ '**' => { emit :T_OP_EXP };
308
+ ('<'|'>'|'<='|'>='|'=='|'==='|'<=>'|'=~')
309
+ => { emit :T_OP_COMPARE };
310
+ '&&' => { emit :T_OP_AND };
311
+ '||' => { emit :T_OP_OR };
312
+
313
+ symbol => { emit_notes :symbol };
314
+ string => { emit_notes :string };
315
+
316
+ '\\\n'; # Escaped newline - ignore
317
+
318
+
319
+ '&' => {
320
+ case bthis
321
+ when :param; emit :T_OP_TOPROC
322
+ when :args; emit :T_OP_TOPROC
323
+ else; error :meme_body
324
+ end
325
+ };
326
+
327
+ ',' => {
328
+ case bthis
329
+ when :args; emit :T_ARG_SEP
330
+ when :param; emit :T_ARG_SEP
331
+ when :array; emit :T_ARG_SEP
332
+ else; error :meme_body
333
+ end
334
+ };
335
+
336
+ ';' => {
337
+ case bthis
338
+ when :meme; emit :T_EXPR_SEP
339
+ when :meml; emit :T_EXPR_SEP
340
+ when :paren; emit :T_EXPR_SEP
341
+ else; error :meme_body
342
+ end
343
+ };
344
+
345
+ c_eol => {
346
+ case bthis
347
+ when :meme; emit :T_EXPR_SEP
348
+ when :meml; emit :T_MEME_END, @ts, @ts; fhold; bpop; fret;
349
+ when :paren; emit :T_EXPR_SEP
350
+ when :args; emit :T_ARG_SEP
351
+ when :param; emit :T_ARG_SEP
352
+ when :array; emit :T_ARG_SEP
353
+ else; error :meme_body
354
+ end
355
+ };
356
+
357
+ '}' => {
358
+ case bthis
359
+ when :meme; emit :T_MEME_END; bpop; fret;
360
+ else; error :meme_body
361
+ end
362
+ };
363
+
364
+ ')' => {
365
+ case bthis
366
+ when :args; emit :T_ARGS_END; bpop; fret;
367
+ when :paren; emit :T_PAREN_END; bpop; fret;
368
+ else; error :meme_body
369
+ end
370
+ };
371
+
372
+ ']' => {
373
+ case bthis
374
+ when :array; emit :T_ARRAY_END; bpop; fret;
375
+ else; error :meme_body
376
+ end
377
+ };
378
+
379
+ '|' => {
380
+ case bthis
381
+ when :param; emit :T_PARAMS_END; bpop; fret;
382
+ when :meme; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
383
+ when :meml; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
384
+ when :paren; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
385
+ else; error :meme_body
386
+ end
387
+ };
388
+
389
+ any => { error :meme_body };
390
+ *|;
391
+
392
+ }%%
393
+ # %
@@ -0,0 +1,56 @@
1
+
2
+ %%machine lexer; # %
3
+
4
+ %%{
5
+ # %
6
+ ##
7
+ # Basic character types - taken from:
8
+ # https://github.com/whitequark/parser/blob/master/lib/parser/lexer.rl
9
+ #
10
+ # License for whitequark/parser reproduced below.
11
+
12
+ # Copyright (c) 2013 Peter Zotov <whitequark@whitequark.org>
13
+ #
14
+ # Parts of the source are derived from ruby_parser:
15
+ # Copyright (c) Ryan Davis, seattle.rb
16
+ #
17
+ # MIT License
18
+ #
19
+ # Permission is hereby granted, free of charge, to any person obtaining
20
+ # a copy of this software and associated documentation files (the
21
+ # "Software"), to deal in the Software without restriction, including
22
+ # without limitation the rights to use, copy, modify, merge, publish,
23
+ # distribute, sublicense, and/or sell copies of the Software, and to
24
+ # permit persons to whom the Software is furnished to do so, subject to
25
+ # the following conditions:
26
+ #
27
+ # The above copyright notice and this permission notice shall be
28
+ # included in all copies or substantial portions of the Software.
29
+ #
30
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
+
38
+ c_nl = '\n' $ do_nl;
39
+ c_space = [ \t\r\f\v];
40
+ c_space_nl = c_space | c_nl;
41
+
42
+ c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
43
+ c_eol = c_nl | c_eof;
44
+ c_any = any - c_eof;
45
+
46
+ c_nl_zlen = c_nl | zlen;
47
+ c_line = any - c_nl_zlen;
48
+
49
+ c_unicode = c_any - 0x00..0x7f;
50
+ c_upper = [A-Z];
51
+ c_lower = [a-z_] | c_unicode;
52
+ c_alpha = c_lower | c_upper;
53
+ c_alnum = c_alpha | [0-9];
54
+
55
+ }%%
56
+ # %
@@ -0,0 +1,95 @@
1
+
2
+ class CodeTools::Parser
3
+ class Lexer
4
+
5
+ def reset_common
6
+ warn "Lexer still has items on @bstack: #{@bstack.inspect}" \
7
+ if @bstack and !@bstack.empty?
8
+
9
+ @newlines = [0]
10
+ @marks = {}
11
+ @stored = {}
12
+ @bstack = []
13
+ end
14
+
15
+ def mark name, pos=@p
16
+ @marks[name] = pos
17
+ end
18
+
19
+ def kram name
20
+ @marks.delete name
21
+ end
22
+
23
+ def grab name, start=@ts, stop=@p
24
+ @stored[name] = [start, stop]
25
+ end
26
+
27
+ def note_begin queue_name, pos=@p
28
+ queue = @marks[queue_name] = (pos ? [pos] : [])
29
+ end
30
+
31
+ def note queue_name, type=nil, pos=@p
32
+ queue = (@marks[queue_name] ||= [])
33
+ queue << pos
34
+ queue << type if type
35
+ queue
36
+ end
37
+
38
+ def unnote queue_name, count=1
39
+ queue = (@marks[queue_name] ||= [])
40
+ queue.pop count
41
+ queue
42
+ end
43
+
44
+ def emit_notes queue_name
45
+ queue = (@marks[queue_name] || [])
46
+ queue.each_slice(3) { |a,b,c| emit c,a,b if a && b && c }
47
+ queue.clear
48
+ end
49
+
50
+ def xfer_notes queue_name_a, queue_name_b
51
+ queue_a = (@marks[queue_name_a] || [])
52
+ queue_b = (@marks[queue_name_b] ||= [])
53
+ queue_b << queue_a.shift until queue_a.empty?
54
+ queue_b
55
+ end
56
+
57
+ def bpush name
58
+ @bstack << name
59
+ end
60
+
61
+ def bthis
62
+ @bstack.last
63
+ end
64
+
65
+ def bpop
66
+ @bstack.pop
67
+ end
68
+
69
+ def error(location, hint=nil)
70
+ str = "Lexer met unexpected character(s) in #{location.inspect}: #{text.inspect}"
71
+ str += "; "+hint.to_s if hint
72
+ str += "\n@marks = #{@marks }"
73
+ str += "\n@stored = #{@stored}"
74
+ str += "\n@bstack = #{@bstack}"
75
+ str += "\n@stack = #{@stack}"
76
+ str += "\n"
77
+ warn str
78
+ end
79
+
80
+
81
+ def do_nl
82
+ @newlines << @p unless @newlines.include? @p
83
+ end
84
+
85
+ def emit(type, start = @ts, stop = @te)
86
+ @tokens << [type, text(start,stop), locate(start)]
87
+ end
88
+
89
+ def locate index
90
+ ary = @newlines.take_while { |i| i <= index }
91
+ row, col = ary.size, index-ary.last+1
92
+ end
93
+
94
+ end
95
+ end
@@ -0,0 +1,154 @@
1
+
2
+ ##
3
+ # Basic Lexer skeleton - taken from gist:
4
+ # https://gist.github.com/YorickPeterse/10658884
5
+ # Originally from:
6
+ # https://github.com/YorickPeterse/oga/blob/master/lib/oga/xml/lexer.rl
7
+ #
8
+ # License for the source gist reproduced below.
9
+
10
+ # Copyright (c) 2014, Yorick Peterse
11
+ #
12
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ # of this software and associated documentation files (the "Software"), to deal
14
+ # in the Software without restriction, including without limitation the rights
15
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ # copies of the Software, and to permit persons to whom the Software is
17
+ # furnished to do so, subject to the following conditions:
18
+ #
19
+ # The above copyright notice and this permission notice shall be included in
20
+ # all copies or substantial portions of the Software.
21
+ #
22
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28
+ # THE SOFTWARE.
29
+
30
+ %%machine lexer; # %
31
+
32
+ class CodeTools::Parser
33
+ class Lexer
34
+ %% write data; # %
35
+
36
+ ##
37
+ # @param [String] data The data to lex.
38
+ #
39
+ def initialize(data)
40
+ @data = data.unpack('U*') << 0
41
+
42
+ reset
43
+ end
44
+
45
+ ##
46
+ # Resets the internal state of the lexer. Typically you don't need to
47
+ # call this method yourself as its called by #lex after lexing a given
48
+ # String.
49
+ #
50
+ def reset
51
+ @line = 1
52
+ @ts = nil
53
+ @te = nil
54
+ @tokens = []
55
+ @stack = []
56
+ @top = 0
57
+ @cs = self.class.lexer_start
58
+ @act = 0
59
+ @elements = []
60
+ @eof = @data.length
61
+ @p = 0
62
+ @pe = @eof
63
+ reset_common
64
+ end
65
+
66
+ ##
67
+ # Lexes the supplied String and returns an Array of tokens. Each token is
68
+ # an Array in the following format:
69
+ #
70
+ # [TYPE, VALUE]
71
+ #
72
+ # The type is a symbol, the value is either nil or a String.
73
+ #
74
+ # This method resets the internal state of the lexer after consuming the
75
+ # input.
76
+ #
77
+ # @param [String] data The string to consume.
78
+ # @return [Array]
79
+ # @see #advance
80
+ #
81
+ def lex
82
+ tokens = []
83
+
84
+ while token = advance
85
+ tokens << token
86
+ end
87
+
88
+ reset
89
+
90
+ return tokens
91
+ end
92
+
93
+ ##
94
+ # Advances through the input and generates the corresponding tokens.
95
+ #
96
+ # This method does *not* reset the internal state of the lexer.
97
+ #
98
+ # @param [String] data The String to consume.
99
+ # @return [Array]
100
+ #
101
+ def advance
102
+ _lexer_actions = self.class.send :_lexer_actions
103
+ _lexer_range_lengths = self.class.send :_lexer_range_lengths
104
+ _lexer_trans_actions = self.class.send :_lexer_trans_actions
105
+ _lexer_key_offsets = self.class.send :_lexer_key_offsets
106
+ _lexer_index_offsets = self.class.send :_lexer_index_offsets
107
+ _lexer_to_state_actions = self.class.send :_lexer_to_state_actions
108
+ _lexer_trans_keys = self.class.send :_lexer_trans_keys
109
+ _lexer_from_state_actions = self.class.send :_lexer_from_state_actions
110
+ _lexer_single_lengths = self.class.send :_lexer_single_lengths
111
+ _lexer_trans_targs = self.class.send :_lexer_trans_targs
112
+ _lexer_eof_trans = self.class.send :_lexer_eof_trans
113
+ _lexer_indicies = self.class.send :_lexer_indicies
114
+
115
+ %% write exec;
116
+ # %
117
+
118
+ return @tokens.shift
119
+ end
120
+
121
+ private
122
+
123
+ ##
124
+ # Returns the text of the current buffer based on the supplied start and
125
+ # stop position.
126
+ #
127
+ # By default `@ts` and `@te` are used as the start/stop position.
128
+ #
129
+ # @param [Fixnum] start
130
+ # @param [Fixnum] stop
131
+ # @return [String]
132
+ #
133
+ def text(start = @ts, stop = @te)
134
+ return @data[start...stop].pack('U*')
135
+ end
136
+
137
+
138
+ %%{
139
+ # %
140
+ # Use instance variables for `ts` and friends.
141
+ access @;
142
+ getkey (@data[@p] || 0);
143
+ variable p @p;
144
+ variable pe @pe;
145
+ variable eof @eof;
146
+
147
+ action do_nl { do_nl }
148
+
149
+ include "lexer_char_classes.rl"; # Basic character classes
150
+ include "lexer.rl"; # Main rules file
151
+ }%%
152
+ # %
153
+ end
154
+ end