myco 0.1.0.dev

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +2 -0
  3. data/bin/myco +7 -0
  4. data/lib/myco/backtrace.rb +56 -0
  5. data/lib/myco/bootstrap/component.rb +142 -0
  6. data/lib/myco/bootstrap/empty_object.rb +4 -0
  7. data/lib/myco/bootstrap/file_toplevel.rb +5 -0
  8. data/lib/myco/bootstrap/find_constant.rb +86 -0
  9. data/lib/myco/bootstrap/instance.rb +52 -0
  10. data/lib/myco/bootstrap/meme.rb +160 -0
  11. data/lib/myco/bootstrap/void.rb +40 -0
  12. data/lib/myco/bootstrap.my +15 -0
  13. data/lib/myco/bootstrap.rb +10 -0
  14. data/lib/myco/command.my +33 -0
  15. data/lib/myco/core/BasicObject.my +46 -0
  16. data/lib/myco/core/Category.my +5 -0
  17. data/lib/myco/core/Decorator.my +18 -0
  18. data/lib/myco/core/FileToplevel.my +23 -0
  19. data/lib/myco/core/Object.my +24 -0
  20. data/lib/myco/core/Switch.my +31 -0
  21. data/lib/myco/eval.rb +63 -0
  22. data/lib/myco/parser/ast/constant_access.rb +29 -0
  23. data/lib/myco/parser/ast/constant_define.rb +40 -0
  24. data/lib/myco/parser/ast/constant_reopen.rb +47 -0
  25. data/lib/myco/parser/ast/declare_category.rb +51 -0
  26. data/lib/myco/parser/ast/declare_decorator.rb +35 -0
  27. data/lib/myco/parser/ast/declare_file.rb +54 -0
  28. data/lib/myco/parser/ast/declare_meme.rb +44 -0
  29. data/lib/myco/parser/ast/declare_object.rb +75 -0
  30. data/lib/myco/parser/ast/declare_string.rb +37 -0
  31. data/lib/myco/parser/ast/invoke.rb +66 -0
  32. data/lib/myco/parser/ast/local_variable_access_ambiguous.rb +38 -0
  33. data/lib/myco/parser/ast/misc.rb +61 -0
  34. data/lib/myco/parser/ast/myco_module_scope.rb +58 -0
  35. data/lib/myco/parser/ast/quest.rb +82 -0
  36. data/lib/myco/parser/ast.rb +15 -0
  37. data/lib/myco/parser/builder.output +3995 -0
  38. data/lib/myco/parser/builder.racc +585 -0
  39. data/lib/myco/parser/builder.rb +1592 -0
  40. data/lib/myco/parser/lexer.rb +2306 -0
  41. data/lib/myco/parser/lexer.rl +393 -0
  42. data/lib/myco/parser/lexer_char_classes.rl +56 -0
  43. data/lib/myco/parser/lexer_common.rb +95 -0
  44. data/lib/myco/parser/lexer_skeleton.rl +154 -0
  45. data/lib/myco/parser/peg_parser.kpeg +759 -0
  46. data/lib/myco/parser/peg_parser.rb +7094 -0
  47. data/lib/myco/parser.rb +40 -0
  48. data/lib/myco/tools/OptionParser.my +38 -0
  49. data/lib/myco/tools/mycompile.my +51 -0
  50. data/lib/myco/toolset.rb +16 -0
  51. data/lib/myco/version.rb +22 -0
  52. data/lib/myco.rb +15 -0
  53. metadata +247 -0
@@ -0,0 +1,393 @@
1
+
2
+ %%machine lexer; # %
3
+
4
+ %%{
5
+ # %
6
+ constant = c_upper c_alnum* ;
7
+ identifier = c_lower c_alnum* ;
8
+
9
+ comment = '#' (any - c_eol)*; # end-of-line comment
10
+
11
+ integer = [0-9]+ ;
12
+ float = [0-9]+ '.' [0-9]+ ;
13
+
14
+ strbody_norm = ^('\\' | '"');
15
+ strbody = strbody_norm* ('\\' c_any strbody_norm*)*;
16
+
17
+ # "foo bar"
18
+ #
19
+ string = (
20
+ zlen % { note_begin :string }
21
+ '"' % { note :string, :T_STRING_BEGIN; note :string }
22
+ strbody % { note :string, :T_STRING_BODY; note :string }
23
+ '"' % { note :string, :T_STRING_END }
24
+ );
25
+
26
+ # id: foo
27
+ #
28
+ declid = (
29
+ zlen % { note_begin :declid }
30
+ 'id' % { note :declid, :T_DECLID_TAG }
31
+ c_space*
32
+ ':'
33
+ c_space* % { note :declid }
34
+ identifier % { note :declid, :T_DECLID_VALUE }
35
+ );
36
+
37
+ # [foo]
38
+ #
39
+ category = (
40
+ zlen % { note_begin :category }
41
+ '[' % { note :category, :T_CATEGORY_BEGIN }
42
+ c_space* % { note :category }
43
+ identifier % { note :category, :T_CATEGORY_BODY }
44
+ c_space* % { note :category }
45
+ ']' % { note :category, :T_CATEGORY_END }
46
+ );
47
+
48
+ # :foo
49
+ # :"bar baz"
50
+ #
51
+ symbol = (
52
+ ':' % { note_begin :symbol }
53
+ (
54
+ (
55
+ identifier % { note :symbol, :T_SYMBOL; }
56
+ )
57
+ | (
58
+ '"' % { note :symbol, :T_SYMSTR_BEGIN; note :symbol }
59
+ strbody % { note :symbol, :T_SYMSTR_BODY; note :symbol }
60
+ '"' % { note :symbol, :T_SYMSTR_END; }
61
+ )
62
+ )
63
+ );
64
+
65
+ # Foo
66
+ # ::Bar
67
+ # Foo::Bar
68
+ # ::Foo::Bar::Baz
69
+ #
70
+ sconstant = (
71
+ zlen % { note_begin :sconstant }
72
+ (
73
+ '::' % { note :sconstant, :T_SCOPE; note :sconstant }
74
+ )? (
75
+ constant % { note :sconstant, :T_CONSTANT; note :sconstant }
76
+ '::' % { note :sconstant, :T_SCOPE; note :sconstant }
77
+ )*
78
+ constant % { note :sconstant, :T_CONSTANT }
79
+ );
80
+
81
+ # Foo,Bar,Baz
82
+ #
83
+ constant_list = (
84
+ zlen % { note_begin :constant_list, nil }
85
+ sconstant % { xfer_notes :sconstant, :constant_list }
86
+ (
87
+ c_space* % { note :constant_list }
88
+ ',' % { note :constant_list, :T_CONST_SEP }
89
+ c_space_nl*
90
+ sconstant % { xfer_notes :sconstant, :constant_list }
91
+ )*
92
+ );
93
+
94
+ # Foo <
95
+ #
96
+ cdefn_begin = (
97
+ zlen % { note_begin :cdefn_begin }
98
+ constant % { note :cdefn_begin, :T_CONSTANT }
99
+ c_space* % { note :cdefn_begin }
100
+ '<' % { note :cdefn_begin, :T_DEFINE }
101
+ ) % {
102
+ emit_notes :cdefn_begin
103
+ };
104
+
105
+ # Object {
106
+ #
107
+ decl_begin = (
108
+ (cdefn_begin c_space_nl*)?
109
+ constant_list
110
+ c_space_nl* % { note_begin :decl_begin }
111
+ '{' % { note :decl_begin, :T_DECLARE_BEGIN }
112
+ ) % {
113
+ emit_notes :constant_list
114
+ emit_notes :decl_begin
115
+ };
116
+
117
+ # Starting delimiter for a string declaration
118
+ #
119
+ # Can be any string of characters following a
120
+ # constant name + whitespace that is not ambiguous
121
+ # with some other construction
122
+ #
123
+ # The ending delimiter will be calculated from as follows:
124
+ # The string of characters is reversed.
125
+ # If there are groups of "alphabetical" characters,
126
+ # the intra-group order remains intact.
127
+ # If there are non-alphabetical characters with "directionality",
128
+ # the "opposite" characters are substituted.
129
+ #
130
+ dstr_delim = (
131
+ ^(c_space_nl|'{'|':'|',')
132
+ ^(c_space_nl)+
133
+ );
134
+
135
+ # Object @@@
136
+ # ...
137
+ # @@@
138
+ #
139
+ dstr_begin = (
140
+ constant_list
141
+ c_space+ % { mark :space }
142
+ dstr_delim % { grab :delim, kram(:space) }
143
+ ) % {
144
+ emit_notes :constant_list
145
+
146
+ start, stop = @stored[:delim]
147
+ emit :T_DECLSTR_BEGIN, start, stop
148
+
149
+ # Table of replacement characters to use when calculating
150
+ # the ending delimiter from the starting delimiter.
151
+ # Directional characters are replaced with their opposite.
152
+ @dstr_replace_table ||= %w{
153
+ < > ( ) { } [ ]
154
+ }
155
+
156
+ # Calculate the ending delimiter to look for and store it
157
+ @dstr_destrlim = text(start, stop) \
158
+ .split(/(?<=[^a-zA-Z])|(?=[^a-zA-Z])/)
159
+ .map { |str|
160
+ idx = @dstr_replace_table.find_index(str)
161
+ idx.nil? ? str :
162
+ (idx.odd? ? @dstr_replace_table[idx-1] : @dstr_replace_table[idx+1])
163
+ }
164
+ .reverse
165
+ .join ''
166
+ };
167
+
168
+ # identifier (
169
+ #
170
+ args_begin = (
171
+ zlen % { note_begin :args_begin }
172
+ identifier % { note :args_begin, :T_IDENTIFIER }
173
+ c_space_nl* % { note :args_begin }
174
+ '(' % { note :args_begin, :T_ARGS_BEGIN }
175
+ );
176
+
177
+ ##
178
+ # Top level machine
179
+
180
+ main := |*
181
+ c_space;
182
+ comment;
183
+
184
+ decl_begin => { fcall decl_body; };
185
+ dstr_begin => { fcall dstr_body; };
186
+
187
+ string => { emit_notes :string };
188
+ declid => { emit_notes :declid };
189
+ category => { emit_notes :category };
190
+ identifier => { emit :T_IDENTIFIER };
191
+ constant => { emit :T_CONSTANT };
192
+ '::' => { emit :T_SCOPE };
193
+
194
+ ':' => { fcall pre_meme; };
195
+
196
+ ';' => { emit :T_EXPR_SEP };
197
+ c_nl => { emit :T_EXPR_SEP };
198
+
199
+ c_eof => { emit :T_DECLARE_END };
200
+ any => { error :main };
201
+ *|;
202
+
203
+ ##
204
+ # Declarative body machine
205
+
206
+ decl_body := |*
207
+ c_space;
208
+ comment;
209
+
210
+ (c_eol|';') => { emit :T_EXPR_SEP };
211
+
212
+ decl_begin => { fcall decl_body; };
213
+ dstr_begin => { fcall dstr_body; };
214
+
215
+ string => { emit_notes :string };
216
+ declid => { emit_notes :declid };
217
+ category => { emit_notes :category };
218
+ identifier => { emit :T_IDENTIFIER };
219
+ constant => { emit :T_CONSTANT };
220
+ '::' => { emit :T_SCOPE };
221
+
222
+ ':' => { fcall pre_meme; };
223
+
224
+ '}' => { emit :T_DECLARE_END; fret; };
225
+
226
+ any => { error :decl_body };
227
+ *|;
228
+
229
+ ##
230
+ # Pre-meme body sub-machines
231
+
232
+ pre_meme := |*
233
+ c_space_nl+;
234
+ comment;
235
+
236
+ # Parameters are specified within '|'s
237
+ '|' => { emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body; };
238
+
239
+ # A meme begins with either a '{' or some other item for inline
240
+ ^(c_space_nl|'{'|'|') =>
241
+ { fhold; emit :T_MEME_BEGIN, @ts, @ts; bpush :meml; fgoto meme_body; };
242
+ '{' => { emit :T_MEME_BEGIN; bpush :meme; fgoto meme_body; };
243
+
244
+ any => { error :pre_meme };
245
+ *|;
246
+
247
+ ##
248
+ # Declarative string machine
249
+
250
+ dstr_body := |*
251
+ (
252
+ c_nl % { mark :newline }
253
+ (^c_nl)* % { grab :line, kram(:newline) }
254
+ ) => {
255
+ start, stop = @stored[:line];
256
+ line_text = text start, stop
257
+
258
+ raise "No known delimiter for string declaration." \
259
+ if @dstr_delim.nil?
260
+
261
+ if (line_text =~ /^(\s*)(\S+)/; $2==@dstr_delim)
262
+ emit :T_DECLSTR_BODY, *@dstr_body_start, start
263
+ @dstr_body_start = nil
264
+ @dstr_delim = nil
265
+
266
+ emit :T_DECLSTR_END, start+$1.size, stop
267
+ fret;
268
+ else
269
+ @dstr_body_start ||= start
270
+ end
271
+ };
272
+ *|;
273
+
274
+ ##
275
+ # Meme body machine
276
+
277
+ meme_body := |*
278
+ c_space+;
279
+ comment;
280
+
281
+ decl_begin => { fcall decl_body; };
282
+ dstr_begin => { fcall dstr_body; };
283
+
284
+ args_begin => { emit_notes :args_begin; bpush :args; fcall meme_body; };
285
+ '(' => { emit :T_PAREN_BEGIN; bpush :paren; fcall meme_body; };
286
+ '[' => { emit :T_ARRAY_BEGIN; bpush :array; fcall meme_body; };
287
+ '{' => { emit :T_MEME_BEGIN; bpush :meme; fcall meme_body; };
288
+
289
+ 'self' => { emit :T_SELF };
290
+ 'null' => { emit :T_NULL };
291
+ 'void' => { emit :T_VOID };
292
+ 'true' => { emit :T_TRUE };
293
+ 'false' => { emit :T_FALSE };
294
+ integer => { emit :T_INTEGER };
295
+ float => { emit :T_FLOAT };
296
+ constant => { emit :T_CONSTANT };
297
+ identifier => { emit :T_IDENTIFIER };
298
+ '.' => { emit :T_DOT };
299
+ '?' => { emit :T_QUEST };
300
+ '::' => { emit :T_SCOPE };
301
+ '=' => { emit :T_ASSIGN };
302
+ '+' => { emit :T_OP_PLUS };
303
+ '-' => { emit :T_OP_MINUS };
304
+ '*' => { emit :T_OP_MULT };
305
+ '/' => { emit :T_OP_DIV };
306
+ '%' => { emit :T_OP_MOD };
307
+ '**' => { emit :T_OP_EXP };
308
+ ('<'|'>'|'<='|'>='|'=='|'==='|'<=>'|'=~')
309
+ => { emit :T_OP_COMPARE };
310
+ '&&' => { emit :T_OP_AND };
311
+ '||' => { emit :T_OP_OR };
312
+
313
+ symbol => { emit_notes :symbol };
314
+ string => { emit_notes :string };
315
+
316
+ '\\\n'; # Escaped newline - ignore
317
+
318
+
319
+ '&' => {
320
+ case bthis
321
+ when :param; emit :T_OP_TOPROC
322
+ when :args; emit :T_OP_TOPROC
323
+ else; error :meme_body
324
+ end
325
+ };
326
+
327
+ ',' => {
328
+ case bthis
329
+ when :args; emit :T_ARG_SEP
330
+ when :param; emit :T_ARG_SEP
331
+ when :array; emit :T_ARG_SEP
332
+ else; error :meme_body
333
+ end
334
+ };
335
+
336
+ ';' => {
337
+ case bthis
338
+ when :meme; emit :T_EXPR_SEP
339
+ when :meml; emit :T_EXPR_SEP
340
+ when :paren; emit :T_EXPR_SEP
341
+ else; error :meme_body
342
+ end
343
+ };
344
+
345
+ c_eol => {
346
+ case bthis
347
+ when :meme; emit :T_EXPR_SEP
348
+ when :meml; emit :T_MEME_END, @ts, @ts; fhold; bpop; fret;
349
+ when :paren; emit :T_EXPR_SEP
350
+ when :args; emit :T_ARG_SEP
351
+ when :param; emit :T_ARG_SEP
352
+ when :array; emit :T_ARG_SEP
353
+ else; error :meme_body
354
+ end
355
+ };
356
+
357
+ '}' => {
358
+ case bthis
359
+ when :meme; emit :T_MEME_END; bpop; fret;
360
+ else; error :meme_body
361
+ end
362
+ };
363
+
364
+ ')' => {
365
+ case bthis
366
+ when :args; emit :T_ARGS_END; bpop; fret;
367
+ when :paren; emit :T_PAREN_END; bpop; fret;
368
+ else; error :meme_body
369
+ end
370
+ };
371
+
372
+ ']' => {
373
+ case bthis
374
+ when :array; emit :T_ARRAY_END; bpop; fret;
375
+ else; error :meme_body
376
+ end
377
+ };
378
+
379
+ '|' => {
380
+ case bthis
381
+ when :param; emit :T_PARAMS_END; bpop; fret;
382
+ when :meme; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
383
+ when :meml; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
384
+ when :paren; emit :T_PARAMS_BEGIN; bpush :param; fcall meme_body;
385
+ else; error :meme_body
386
+ end
387
+ };
388
+
389
+ any => { error :meme_body };
390
+ *|;
391
+
392
+ }%%
393
+ # %
@@ -0,0 +1,56 @@
1
+
2
+ %%machine lexer; # %
3
+
4
+ %%{
5
+ # %
6
+ ##
7
+ # Basic character types - taken from:
8
+ # https://github.com/whitequark/parser/blob/master/lib/parser/lexer.rl
9
+ #
10
+ # License for whitequark/parser reproduced below.
11
+
12
+ # Copyright (c) 2013 Peter Zotov <whitequark@whitequark.org>
13
+ #
14
+ # Parts of the source are derived from ruby_parser:
15
+ # Copyright (c) Ryan Davis, seattle.rb
16
+ #
17
+ # MIT License
18
+ #
19
+ # Permission is hereby granted, free of charge, to any person obtaining
20
+ # a copy of this software and associated documentation files (the
21
+ # "Software"), to deal in the Software without restriction, including
22
+ # without limitation the rights to use, copy, modify, merge, publish,
23
+ # distribute, sublicense, and/or sell copies of the Software, and to
24
+ # permit persons to whom the Software is furnished to do so, subject to
25
+ # the following conditions:
26
+ #
27
+ # The above copyright notice and this permission notice shall be
28
+ # included in all copies or substantial portions of the Software.
29
+ #
30
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
+
38
+ c_nl = '\n' $ do_nl;
39
+ c_space = [ \t\r\f\v];
40
+ c_space_nl = c_space | c_nl;
41
+
42
+ c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
43
+ c_eol = c_nl | c_eof;
44
+ c_any = any - c_eof;
45
+
46
+ c_nl_zlen = c_nl | zlen;
47
+ c_line = any - c_nl_zlen;
48
+
49
+ c_unicode = c_any - 0x00..0x7f;
50
+ c_upper = [A-Z];
51
+ c_lower = [a-z_] | c_unicode;
52
+ c_alpha = c_lower | c_upper;
53
+ c_alnum = c_alpha | [0-9];
54
+
55
+ }%%
56
+ # %
@@ -0,0 +1,95 @@
1
+
2
+ class CodeTools::Parser
3
+ class Lexer
4
+
5
+ def reset_common
6
+ warn "Lexer still has items on @bstack: #{@bstack.inspect}" \
7
+ if @bstack and !@bstack.empty?
8
+
9
+ @newlines = [0]
10
+ @marks = {}
11
+ @stored = {}
12
+ @bstack = []
13
+ end
14
+
15
+ def mark name, pos=@p
16
+ @marks[name] = pos
17
+ end
18
+
19
+ def kram name
20
+ @marks.delete name
21
+ end
22
+
23
+ def grab name, start=@ts, stop=@p
24
+ @stored[name] = [start, stop]
25
+ end
26
+
27
+ def note_begin queue_name, pos=@p
28
+ queue = @marks[queue_name] = (pos ? [pos] : [])
29
+ end
30
+
31
+ def note queue_name, type=nil, pos=@p
32
+ queue = (@marks[queue_name] ||= [])
33
+ queue << pos
34
+ queue << type if type
35
+ queue
36
+ end
37
+
38
+ def unnote queue_name, count=1
39
+ queue = (@marks[queue_name] ||= [])
40
+ queue.pop count
41
+ queue
42
+ end
43
+
44
+ def emit_notes queue_name
45
+ queue = (@marks[queue_name] || [])
46
+ queue.each_slice(3) { |a,b,c| emit c,a,b if a && b && c }
47
+ queue.clear
48
+ end
49
+
50
+ def xfer_notes queue_name_a, queue_name_b
51
+ queue_a = (@marks[queue_name_a] || [])
52
+ queue_b = (@marks[queue_name_b] ||= [])
53
+ queue_b << queue_a.shift until queue_a.empty?
54
+ queue_b
55
+ end
56
+
57
+ def bpush name
58
+ @bstack << name
59
+ end
60
+
61
+ def bthis
62
+ @bstack.last
63
+ end
64
+
65
+ def bpop
66
+ @bstack.pop
67
+ end
68
+
69
+ def error(location, hint=nil)
70
+ str = "Lexer met unexpected character(s) in #{location.inspect}: #{text.inspect}"
71
+ str += "; "+hint.to_s if hint
72
+ str += "\n@marks = #{@marks }"
73
+ str += "\n@stored = #{@stored}"
74
+ str += "\n@bstack = #{@bstack}"
75
+ str += "\n@stack = #{@stack}"
76
+ str += "\n"
77
+ warn str
78
+ end
79
+
80
+
81
+ def do_nl
82
+ @newlines << @p unless @newlines.include? @p
83
+ end
84
+
85
+ def emit(type, start = @ts, stop = @te)
86
+ @tokens << [type, text(start,stop), locate(start)]
87
+ end
88
+
89
+ def locate index
90
+ ary = @newlines.take_while { |i| i <= index }
91
+ row, col = ary.size, index-ary.last+1
92
+ end
93
+
94
+ end
95
+ end
@@ -0,0 +1,154 @@
1
+
2
+ ##
3
+ # Basic Lexer skeleton - taken from gist:
4
+ # https://gist.github.com/YorickPeterse/10658884
5
+ # Originally from:
6
+ # https://github.com/YorickPeterse/oga/blob/master/lib/oga/xml/lexer.rl
7
+ #
8
+ # License for the source gist reproduced below.
9
+
10
+ # Copyright (c) 2014, Yorick Peterse
11
+ #
12
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ # of this software and associated documentation files (the "Software"), to deal
14
+ # in the Software without restriction, including without limitation the rights
15
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ # copies of the Software, and to permit persons to whom the Software is
17
+ # furnished to do so, subject to the following conditions:
18
+ #
19
+ # The above copyright notice and this permission notice shall be included in
20
+ # all copies or substantial portions of the Software.
21
+ #
22
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28
+ # THE SOFTWARE.
29
+
30
+ %%machine lexer; # %
31
+
32
+ class CodeTools::Parser
33
+ class Lexer
34
+ %% write data; # %
35
+
36
+ ##
37
+ # @param [String] data The data to lex.
38
+ #
39
+ def initialize(data)
40
+ @data = data.unpack('U*') << 0
41
+
42
+ reset
43
+ end
44
+
45
+ ##
46
+ # Resets the internal state of the lexer. Typically you don't need to
47
+ # call this method yourself as its called by #lex after lexing a given
48
+ # String.
49
+ #
50
+ def reset
51
+ @line = 1
52
+ @ts = nil
53
+ @te = nil
54
+ @tokens = []
55
+ @stack = []
56
+ @top = 0
57
+ @cs = self.class.lexer_start
58
+ @act = 0
59
+ @elements = []
60
+ @eof = @data.length
61
+ @p = 0
62
+ @pe = @eof
63
+ reset_common
64
+ end
65
+
66
+ ##
67
+ # Lexes the supplied String and returns an Array of tokens. Each token is
68
+ # an Array in the following format:
69
+ #
70
+ # [TYPE, VALUE]
71
+ #
72
+ # The type is a symbol, the value is either nil or a String.
73
+ #
74
+ # This method resets the internal state of the lexer after consuming the
75
+ # input.
76
+ #
77
+ # @param [String] data The string to consume.
78
+ # @return [Array]
79
+ # @see #advance
80
+ #
81
+ def lex
82
+ tokens = []
83
+
84
+ while token = advance
85
+ tokens << token
86
+ end
87
+
88
+ reset
89
+
90
+ return tokens
91
+ end
92
+
93
+ ##
94
+ # Advances through the input and generates the corresponding tokens.
95
+ #
96
+ # This method does *not* reset the internal state of the lexer.
97
+ #
98
+ # @param [String] data The String to consume.
99
+ # @return [Array]
100
+ #
101
+ def advance
102
+ _lexer_actions = self.class.send :_lexer_actions
103
+ _lexer_range_lengths = self.class.send :_lexer_range_lengths
104
+ _lexer_trans_actions = self.class.send :_lexer_trans_actions
105
+ _lexer_key_offsets = self.class.send :_lexer_key_offsets
106
+ _lexer_index_offsets = self.class.send :_lexer_index_offsets
107
+ _lexer_to_state_actions = self.class.send :_lexer_to_state_actions
108
+ _lexer_trans_keys = self.class.send :_lexer_trans_keys
109
+ _lexer_from_state_actions = self.class.send :_lexer_from_state_actions
110
+ _lexer_single_lengths = self.class.send :_lexer_single_lengths
111
+ _lexer_trans_targs = self.class.send :_lexer_trans_targs
112
+ _lexer_eof_trans = self.class.send :_lexer_eof_trans
113
+ _lexer_indicies = self.class.send :_lexer_indicies
114
+
115
+ %% write exec;
116
+ # %
117
+
118
+ return @tokens.shift
119
+ end
120
+
121
+ private
122
+
123
+ ##
124
+ # Returns the text of the current buffer based on the supplied start and
125
+ # stop position.
126
+ #
127
+ # By default `@ts` and `@te` are used as the start/stop position.
128
+ #
129
+ # @param [Fixnum] start
130
+ # @param [Fixnum] stop
131
+ # @return [String]
132
+ #
133
+ def text(start = @ts, stop = @te)
134
+ return @data[start...stop].pack('U*')
135
+ end
136
+
137
+
138
+ %%{
139
+ # %
140
+ # Use instance variables for `ts` and friends.
141
+ access @;
142
+ getkey (@data[@p] || 0);
143
+ variable p @p;
144
+ variable pe @pe;
145
+ variable eof @eof;
146
+
147
+ action do_nl { do_nl }
148
+
149
+ include "lexer_char_classes.rl"; # Basic character classes
150
+ include "lexer.rl"; # Main rules file
151
+ }%%
152
+ # %
153
+ end
154
+ end