jejune 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ #--
4
+ # Copyright (c) 2010-2011 Kyle C. Yetter
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ module Jejune
27
+ module JJSInput
28
+ include Constants
29
+ include Utils
30
+
31
+ attr_reader :manager, :path, :cache_file, :dependencies, :macros
32
+ attr_accessor :syntax_errors
33
+ attr_writer :tree, :adaptor, :tokens
34
+
35
+ def initialize( source, options = {} )
36
+ super
37
+ @manager = options.fetch( :manager ) { Manager.new( self, options ) }
38
+ @cache_file = options[ :cache_file ]
39
+ @dependencies = Set.new
40
+ @path = @name
41
+ @syntax_errors = 0
42
+ @macros = {}
43
+ end
44
+
45
+ def directory
46
+ @path ? File.dirname( @path ) : '.'
47
+ end
48
+
49
+ def find_relative( name, exts = 'jjs|js|' )
50
+ find_in_directory( directory, name, exts )
51
+ end
52
+
53
+ def tokens
54
+ @tokens ||= RewriteStream.new( Lexer.new( self ) )
55
+ end
56
+
57
+ def adaptor
58
+ @adaptor ||= RewriteAdaptor.new( tokens )
59
+ end
60
+
61
+ def tree
62
+ @tree ||= begin
63
+ parser = Parser.new( tokens, :adaptor => adaptor )
64
+ tree = parser.program.tree
65
+ @syntax_errors = parser.state.syntax_errors
66
+ tree
67
+ #$stderr.puts( @path )
68
+ #$stderr.puts( parser.state.syntax_errors )
69
+ #Parser.new( tokens, :adaptor => adaptor ).program.tree
70
+ end
71
+ end
72
+
73
+ def translate()
74
+ @manager.translate( self )
75
+ end
76
+
77
+ def snippet( source, options = {} )
78
+ options = {
79
+ :manager => @manager,
80
+ :file => @path
81
+ }.update( options )
82
+
83
+ if source.is_a?( Token )
84
+ options[ :line ] ||= source.line
85
+ options[ :column ] ||= source.column
86
+ source = source.text
87
+ end
88
+
89
+ JJSSource.new( source, options )
90
+ end
91
+
92
+ def to_s
93
+ translate
94
+ end
95
+ end
96
+
97
+
98
+ class JJSSource < StringStream
99
+ include JJSInput
100
+ end
101
+
102
+ class JJSFile < FileStream
103
+ include JJSInput
104
+
105
+ def translate
106
+ output = super
107
+ @manager.commit_dependencies( @path, @dependencies )
108
+
109
+ if @cache_file and @syntax_errors.zero?
110
+ open( @cache_file, 'w' ) do | out |
111
+ out.write(
112
+ Marshal.dump(
113
+ :output => output,
114
+ :dependencies => @dependencies,
115
+ :macros => macros
116
+ )
117
+ )
118
+ end
119
+ end
120
+ return output
121
+ end
122
+
123
+ end
124
+ end
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ #--
4
+ # Copyright (c) 2010-2011 Kyle C. Yetter
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ module Jejune
27
+ module JString
28
+ extend( self )
29
+
30
+ @tab_width = 2
31
+ attr_accessor :tab_width
32
+
33
+ def utf8( *chars )
34
+ chars.pack( 'U*' )
35
+ end
36
+
37
+ def collapse( str )
38
+ str.to_s.gsub( /(\\*)\n/ ) { $1.length.even? ? $1 << '\n' : $1[ 0 ... -1 ] }
39
+ end
40
+
41
+ def quote( str, quote = nil )
42
+ if quote
43
+ qrx = /(\\*)#{ Regexp.escape( quote ) }/
44
+ str = str.to_s.gsub( qrx ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << quote }
45
+ str.insert( -1, quote ).insert( 0, quote )
46
+ else
47
+ # assume double quote
48
+ str = str.to_s.gsub( /(\\*)"/ ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << ?" }
49
+ str.insert( -1, '"' ).insert( 0, '"' )
50
+ end
51
+ end
52
+
53
+ def outdent( str, tab_width = @tab_width || 2 )
54
+ lines = str.to_s.split( /\r?\n/, -1 )
55
+ if lines.length > 1
56
+ # for strings like
57
+ # xyz = %( <- first line = "\n"
58
+ # blah blah blah
59
+ # ) <- last line = ""
60
+ # the output will be "blah blah blah"
61
+ lines.first.empty? and lines.shift # trash the first empty line
62
+ lines.last =~ /^\s+$/ and lines.pop # trash the last empty line
63
+
64
+ # for strings that specify a margin like
65
+ # xyz =
66
+ # %(
67
+ # | BLAH
68
+ # |
69
+ # | blah blah
70
+ # )
71
+ if lines.all? { | l | l =~ /^\s*\|\s?/ }
72
+ for line in lines do line.gsub!( /^\s*\|\s?/, '' ) end
73
+ else
74
+ indent = lines.grep( /^(\s+)\S/ ) { expand_tabs( $1, tab_width ).length }.min || 0
75
+ if indent > 0
76
+ for line in lines
77
+ line.sub!( /^\s+/ ) do | space |
78
+ space[ indent, space.length ]
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ return lines.join( $/ )
86
+ end
87
+
88
+ def jstring( content, single_quote = nil )
89
+ str = string_value( content, single_quote ).to_json
90
+ str.gsub!( %r[(\\*)/] ) do
91
+ slashes = $1
92
+ ( slashes.length.odd? ? slashes[ 0...-1 ] : slashes ) << '/'
93
+ end
94
+ return str
95
+ end
96
+
97
+ def split_words( str )
98
+ str.to_s.
99
+ scan( /(?:[^\s\\]|\\.)+/m ).
100
+ map! { | w | w.gsub!( /\\(\s)|(\\.)/m ) { $+ } or w }
101
+ end
102
+
103
+ def expand_tabs( str, tab_width = @tab_width || 2 )
104
+ str = str.to_s.dup
105
+ tab_width = tab_width.to_i
106
+ raise ArgumentError, "tab width must be >= 0, but expand_tabs called with `#{ tab_width }'" if tab_width < 0
107
+
108
+ case tab_width
109
+ when 0
110
+ when 1
111
+ str.gsub!( /\t/, " " )
112
+ else
113
+ while
114
+ str.gsub!( /^([^\t\n]*)(\t+)/ ) {
115
+ leading = $1.length % tab_width
116
+ length = ( tab_width * $2.length - leading )
117
+ $1 << ( ' ' * length )
118
+ }
119
+ # do nothing
120
+ end
121
+ end
122
+
123
+ return str
124
+ end
125
+
126
+ DOUBLE_ESC = [ "0abefnrstv", "\0\a\b\e\f\n\r\s\t\v" ]
127
+ ESC_RX = %r(
128
+ \\
129
+ (?:
130
+ ( x [A-Fa-f0-9]{1,2}
131
+ | 0{3}
132
+ )
133
+ | u ( [A-Fa-f0-9]{4} )
134
+ | \r? \n
135
+ | ( . )
136
+ )
137
+ )x
138
+
139
+ def string_value( str, single = false )
140
+ str = str.to_s
141
+ if single
142
+ str.to_s.gsub %r<\\(.)> do
143
+ case $1
144
+ when '\\', "'" then $1
145
+ else $&
146
+ end
147
+ end
148
+ else
149
+ # TODO: handle control/meta sequences \cx \C-x \M-x \M-\C-x
150
+ str.to_s.gsub ESC_RX do
151
+ if x = $1
152
+ i = Integer( '0' << x )
153
+ i > 128 ? [ i ].pack( 'U' ) : i.chr
154
+ elsif u = $2
155
+ [ u.to_i( 16 ) ].pack( 'U' )
156
+ elsif c = $3
157
+ c.tr!( *DOUBLE_ESC ) or c
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,633 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ #--
4
+ # Copyright (c) 2010-2011 Kyle C. Yetter
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ require 'strscan'
27
+
28
+ class Regexp
29
+ def self.literal( string, options = 0 )
30
+ Regexp.new( Regexp.escape( string.to_s ), options )
31
+ end
32
+ end
33
+
34
+ module Jejune
35
+ module LoFiLexer
36
+ class LexerError < StandardError
37
+ attr_reader :lexer
38
+
39
+ def initialize( lexer )
40
+ @lexer = lexer
41
+ location = @lexer.location
42
+ glimpse = @lexer.scanner.string[ location.position, 13 ] + '...'
43
+ message = (
44
+ "unable to match any lexical rule of %s " % @lexer.class.name <<
45
+ "for %s => %p" % [ location, glimpse ]
46
+ )
47
+ super( message )
48
+ end
49
+ end
50
+
51
+ class Location
52
+ MEMBERS = %w(file position line column)
53
+
54
+ def self.[]( *args )
55
+ new( *args )
56
+ end
57
+
58
+ attr_accessor( *MEMBERS )
59
+
60
+ def initialize( file = nil, position = nil, line = nil, column = nil )
61
+ @file = file
62
+ @position = position
63
+ @line = line
64
+ @column = column
65
+ end
66
+
67
+ def <<( text )
68
+ length = text.length
69
+ line_count = text.count( "\n" )
70
+ line_count.zero? ? ( @column += length ) :
71
+ ( @column = length - text.rindex( "\n" ) - 1 )
72
+ @line += line_count
73
+ @position += length
74
+ return self
75
+ end
76
+
77
+ def >>( text )
78
+ length = text.length
79
+ line_count = text.count( "\n" )
80
+ line_count.zero? ? ( @column -= length ) : ( @column = text.index( "\n" ) )
81
+ @line -= line_count
82
+ @position -= length
83
+ return self
84
+ end
85
+
86
+ def +( text )
87
+ clone << text
88
+ end
89
+
90
+ def -( text )
91
+ clone >> text
92
+ end
93
+
94
+ def <=>( location )
95
+ @position <=> location.position
96
+ end
97
+
98
+ include Comparable
99
+
100
+ def to_s( long = true )
101
+ if long
102
+ 'file %s @ (%i:%i)' % [ @file, @line, @column ]
103
+ else
104
+ '[%i:%i]' % [ @line, @column ]
105
+ end
106
+ end
107
+ end
108
+
109
+ module Locatable
110
+ for member in Location::MEMBERS
111
+ class_eval( <<-DELEGATE, __FILE__, __LINE__ )
112
+ def #{ member }
113
+ location.#{ member } rescue nil
114
+ end
115
+ DELEGATE
116
+ end
117
+ end
118
+
119
+ class Token
120
+ include Locatable
121
+ attr_accessor :index, :channel, :type, :text, :location
122
+ def initialize( index = nil, type = nil, text = nil, location = nil, channel = nil )
123
+ @index = index
124
+ @type = type
125
+ @text = text
126
+ @location = location
127
+ @channel = channel
128
+ block_given? and yield( self )
129
+ end
130
+ def inspect
131
+ '[%i %p(%p) @ %s]' % [ @index, @type, @text, @location.to_s( false ) ]
132
+ end
133
+ def to_s
134
+ text
135
+ end
136
+ def after
137
+ location + text
138
+ end
139
+ end
140
+
141
+ class Rule
142
+ attr_accessor :name, :channel, :action, :transition, :target_state, :text
143
+ attr_reader :options
144
+
145
+ def initialize( name, options = {}, &action )
146
+ @name = name.to_sym
147
+ @channel = options[ :channel ] || :default
148
+ @action = options[ :action ] || action
149
+ @transition = [ :go_to, :push, :pop, :stay ].find do |type|
150
+ @target_state = options[ type ]
151
+ end
152
+ @transition ||= :stay
153
+ @text = options.fetch( :text, nil )
154
+ end
155
+
156
+ def match( scanner )
157
+ raise NotImplementedError, "#match must be implemented by subclasses"
158
+ end
159
+ end
160
+
161
+ class RegexRule < Rule
162
+ def self.keyword( *args, &action )
163
+ options = args.last.is_a?( Hash ) ? options.pop : {}
164
+ case args.length
165
+ when 1
166
+ word = args[ 0 ].to_s
167
+ name = options[ :name ] || word.to_s.upcase
168
+ when 2
169
+ name, word = args
170
+ word = word.to_s
171
+ else
172
+ raise ArgumentError, <<-END.here_flow! % [ options, args ]
173
+ | bad arguments: need (name, keyword, options = {}, &action) or
174
+ | (keyword, options = {}, &action) -- got:
175
+ | options = %p
176
+ | other arguments = %p
177
+ END
178
+ end
179
+ return new( name, word, options, &action )
180
+ end
181
+
182
+ attr_accessor :pattern
183
+ def initialize( name, pattern, options = {}, &action )
184
+ super( name, options, &action )
185
+ @pattern = pattern.is_a?( Regexp ) ? pattern : Regexp.literal( pattern )
186
+ end
187
+
188
+ def match( scanner )
189
+ matched = scanner.scan( pattern ) and begin
190
+ @text ? scanner[ @text ] : matched
191
+ end
192
+ end
193
+ end
194
+
195
+ class DelimiterRule < Rule
196
+ attr_accessor :open, :close, :escape
197
+
198
+ def initialize( name, open, close = {}, options = nil, &action )
199
+ @open = open
200
+ if Hash === close
201
+ @close = open
202
+ options = close
203
+ else
204
+ @close = close
205
+ options ||= {}
206
+ end
207
+ @escape = options.fetch( :escape, '\\' )
208
+ super( name, options, &action )
209
+ @escape_regexp = ( Regexp === @escape ) ? @escape : Regexp.new( Regexp.escape( @escape.to_s ) << '.' )
210
+ @open_regexp = ( Regexp === @open ) ? @open : Regexp.literal( @open )
211
+ @close_regexp = ( Regexp === @close ) ? @close : Regexp.literal( @close )
212
+ @content_pause = Regexp.union( @escape_regexp, @close_regexp )
213
+ end
214
+
215
+ def match( scanner )
216
+ start_position = scanner.pos
217
+ catch( :nevermind ) do
218
+ if scanner.scan( @open_regexp )
219
+ body_start = scanner.pos
220
+ while true
221
+ throw( :nevermind ) unless scanner.skip_until( @content_pause )
222
+ matched = scanner.matched
223
+ if @close === matched
224
+ body_end = scanner.pos - matched.length
225
+ break
226
+ end
227
+ end
228
+ range = @text == :body ? body_start ... body_end : start_position ... scanner.pos
229
+ return( scanner.string[ range ] )
230
+ else return false
231
+ end
232
+ end
233
+ scanner.pos = start_position
234
+ return false
235
+ end
236
+ end
237
+
238
+ class NestedDelimiterRule < DelimiterRule
239
+ def initialize( name, open, close, options = {}, &action )
240
+ super
241
+ @content_pause = Regexp.union( @escape_regexp, @open_regexp, @close_regexp )
242
+ end
243
+
244
+ def match( scanner )
245
+ start_position = scanner.pos
246
+ scanner.scan( @open_regexp ) or return false
247
+ catch( :nevermind ) do
248
+ loop do
249
+ scanner.scan_until( @content_pause ) or throw( :nevermind )
250
+ case scanner.matched
251
+ when @escape_regexp then next
252
+ when @close_regexp then break
253
+ when @open_regexp
254
+ # back up over the open delimiter, and recursively invoke the matching procedure
255
+ scanner.pos -= scanner.matched_size
256
+ match( scanner ) or throw( :nevermind )
257
+ else
258
+ raise( <<-END.here_flow! % [ @name, scanner.matched, @escape_regexp, @open, @close ] )
259
+ | this shouldn't happen:
260
+ | rule %p
261
+ | scanner.matched = %p
262
+ | @escape_regexp = %p
263
+ | @open = %p
264
+ | @close = %p
265
+ END
266
+ end
267
+ end
268
+ return( scanner.string[ start_position...scanner.pos ] )
269
+ end
270
+ scanner.pos = start_position
271
+ return false
272
+ end
273
+ alias :match? :match
274
+ end
275
+
276
+ module CommonDSL
277
+ attr_accessor :rules
278
+ def self.extended( klass )
279
+ klass.instance_variable_set( :@rules, [] )
280
+ end
281
+ def register( rule )
282
+ rules << rule
283
+ return rule
284
+ end
285
+ def rule( name, pattern, options = {}, &action )
286
+ register RegexRule.new( name, pattern, options, &action )
287
+ end
288
+ def delimited( name, open, close = {}, options = nil, &action )
289
+ register DelimiterRule.new( name, open, close, options, &action )
290
+ end
291
+ def nested( name, open, close, options = {}, &action )
292
+ register NestedDelimiterRule.new( name, open, close, options, &action )
293
+ end
294
+ def keyword( *args, &action )
295
+ register RegexRule.keyword( *args, &action )
296
+ end
297
+ def inherited( klass )
298
+ klass.rules = @rules.clone
299
+ end
300
+ protected :rules=
301
+ private :register, :rule, :delimited, :nested, :keyword, :inherited
302
+ end
303
+
304
+ class Lexer
305
+
306
+ attr_reader :scanner, :location, :token
307
+ attr_accessor :channel
308
+
309
+ def initialize( text, options = {} )
310
+ file_name = options[ :file ] || options[ :file_name ] || '(string)'
311
+ channel = options[ :channel ] || :default
312
+ unless location = options[ :location ]
313
+ position = options[ :position ] || 0
314
+ line = options[ :line ] || 1
315
+ column = options[ :column ] || 0
316
+ location = Location.new( file_name, position, line, column )
317
+ end
318
+
319
+ @scanner = StringScanner.new( text )
320
+ @channel = channel
321
+ @starting_line = @location = location
322
+ @tokens = []
323
+ @emission_buffer = []
324
+ @token = nil
325
+ @debug = options.fetch( :debug, $DEBUG )
326
+ end
327
+
328
+ def source
329
+ @scanner.string
330
+ end
331
+
332
+ def reset
333
+ @scanner.pos = 0
334
+ @location = @starting_line
335
+ @tokens.clear
336
+ @token = nil
337
+ end
338
+
339
+ def next( tune = true )
340
+ channel = tune.is_a?( Symbol ) ? tune : @channel
341
+ until @emission_buffer.empty?
342
+ @token = @emission_buffer.shift
343
+ @tokens << @token
344
+ return( @token ) unless tune
345
+ return( @token ) if token.channel == channel
346
+ end
347
+ return( match ? self.next( tune ) : nil )
348
+ end
349
+
350
+ def match
351
+ raise NotImplementedError, "subclasses must implement #match"
352
+ end
353
+
354
+ def emit_token( type, text, channel = :default )
355
+ index = @tokens.length + @emission_buffer.length
356
+ token = create_token( index, type, text, @location, channel )
357
+ @emission_buffer << token
358
+ @location += text
359
+ return token
360
+ end
361
+
362
+ def create_token( index, type, text, location, channel )
363
+ Token.new( index, type, text, location, channel )
364
+ end
365
+
366
+ def each( tune = true )
367
+ block_given? or return enum_for( :each, tune )
368
+ if tune
369
+ channel = tune.is_a?( Symbol ) ? tune : @channel
370
+ @tokens.each { |token| token.channel == channel and yield( token ) }
371
+ else
372
+ @tokens.each { |token| yield( token ) }
373
+ end
374
+ while token = self.next( tune )
375
+ yield( token )
376
+ end
377
+ return self
378
+ end
379
+
380
+ include Enumerable
381
+
382
+ def lex!( tune = true )
383
+ if block_given?
384
+ each( tune ).map do |token|
385
+ yield( token )
386
+ end
387
+ else
388
+ return each( tune ).map
389
+ end
390
+ end
391
+
392
+ def tokens( tune = true )
393
+ if tune
394
+ channel = tune.is_a?( Symbol ) ? tune : @channel
395
+ @tokens.select { |token| token.channel == channel }
396
+ else
397
+ return @tokens
398
+ end
399
+ end
400
+
401
+ def []( *args )
402
+ @tokens[ *args ]
403
+ end
404
+
405
+ def debug
406
+ if @debug
407
+ $stderr.puts( "\e[31m#{ self.class }\e[0m lexer debug: \e[36m#{ yield }\e[0m" )
408
+ end
409
+ end
410
+
411
+ end
412
+
413
+ class CommonLexer < Lexer
414
+ extend CommonDSL
415
+
416
+ def initialize( text, options = {} )
417
+ super
418
+ @rules = self.class.rules
419
+ end
420
+
421
+ def match
422
+ @scanner.eos? and return nil
423
+ for rule in @rules
424
+ if text = rule.match( @scanner )
425
+ matched!( rule, text )
426
+ return true
427
+ end
428
+ end
429
+ match_failed!
430
+ end
431
+
432
+ def matched!( rule, text )
433
+ emit_token( rule.name, text, rule.channel )
434
+ action = rule.action and instance_eval( &action )
435
+ end
436
+
437
+ def match_failed!
438
+ error = LexerError.new( self )
439
+ error.set_backtrace( caller )
440
+ raise( error )
441
+ end
442
+
443
+ private :match_failed!, :matched!
444
+
445
+ end
446
+
447
+ module StatefulDSL
448
+ attr_accessor :states, :initial_state, :global_state
449
+
450
+ def self.extended( klass )
451
+ klass.instance_variable_set( :@states, {} )
452
+ klass.instance_variable_set( :@global_state, klass.state( :global ) )
453
+ klass.instance_variable_set( :@initial_state, nil )
454
+ end
455
+
456
+ def state( name, *args, &body )
457
+ state = @states[ name.to_sym ] ||= State.new( self, name, *args )
458
+ @initial_state ||= state
459
+ block_given? and state.specify( &body )
460
+ return state
461
+ end
462
+
463
+ def start_in( name )
464
+ @initial_state = state( name )
465
+ end
466
+
467
+ def register( rule )
468
+ @global_state.rules << rule
469
+ return rule
470
+ end
471
+
472
+ def inherited( klass )
473
+ klass.states = @states.inject( {} ) do |h, ( name, state )|
474
+ state = state.clone
475
+ state.lexer = klass
476
+ h[ name ] = state; h
477
+ end
478
+ klass.global_state = klass.state( :global )
479
+ klass.initial_state = ( @initial_state && klass.states[ @initial_state.name ] )
480
+ end
481
+
482
+ protected :states=, :global_state
483
+ private :register, :inherited, :start_in
484
+ end
485
+
486
+ class State
487
+ include CommonDSL
488
+
489
+ def on_enter( action = nil, &b )
490
+ if block_given? then @on_enter = b
491
+ elsif action then @on_enter = action.to_proc
492
+ end
493
+ return @on_enter
494
+ end
495
+
496
+ def on_exit( action = nil, &b )
497
+ if block_given? then @on_exit = b
498
+ elsif action then @on_exit = action.to_proc
499
+ end
500
+ return @on_exit
501
+ end
502
+
503
+ def on_failure( action = nil, &b )
504
+ if block_given? then @on_failure = b
505
+ elsif action then @on_failure = action.to_proc
506
+ end
507
+ return @on_failure
508
+ end
509
+
510
+ attr_accessor :name, :lexer
511
+
512
+ def initialize( lexer_class, name, options = {}, &body )
513
+ @lexer = lexer_class
514
+ @name = name.to_sym
515
+ @rules = []
516
+ fail_action = [ :go_to, :push, :pop ].find do |action|
517
+ options[ action ]
518
+ end
519
+ @on_failure = fail_action && lambda do
520
+ make_transition( fail_action, options[ fail_action ] )
521
+ end
522
+ @on_enter = nil
523
+ @on_exit = nil
524
+ block_given? and specify( &body )
525
+ end
526
+
527
+ def initialize_copy( orig )
528
+ @lexer = nil
529
+ @rules = orig.rules.map { |rule| rule.clone }
530
+ end
531
+
532
+ alias specify instance_eval
533
+
534
+ def include( state_name )
535
+ state = @lexer.state( state_name )
536
+ @rules.concat( state.rules )
537
+ @on_failure ||= state.on_failure
538
+ @on_exit ||= state.on_exit
539
+ @on_enter ||= state.on_enter
540
+ end
541
+
542
+ end
543
+
544
+ class StatefulLexer < Lexer
545
+ MAX_RETRIES = 5
546
+ extend CommonDSL
547
+ extend StatefulDSL
548
+
549
+ attr_reader :state, :start_state
550
+ def initialize( text, options = {} )
551
+ super
552
+ @states = self.class.states
553
+ @state_stack = []
554
+ if name = ( options[ :initial_state ] || options[ :state ] )
555
+ @state = @states[ name ]
556
+ elsif state = self.class.initial_state
557
+ @state = state
558
+ else
559
+ @state = self.class.global_state
560
+ end
561
+ @start_state = @state
562
+ end
563
+
564
+ def rules
565
+ @state.rules # or raise("state %p has no rules" % @state)
566
+ end
567
+
568
+ def reset
569
+ super
570
+ @state = @start_state
571
+ end
572
+
573
+ def make_transition( type, target )
574
+ debug { "state transition -- #{ @state.name } -> #{ type } #{ target }" }
575
+ case type
576
+ when :go_to
577
+ action = @state.on_exit and instance_eval( &action )
578
+ @state = fetch_state( target )
579
+ action = @state.on_enter and instance_eval( &action )
580
+ when :push
581
+ action = @state.on_exit and instance_eval( &action )
582
+ @state_stack.push( @state )
583
+ @state = fetch_state( target )
584
+ action = @state.on_enter and instance_eval( &action )
585
+ when :pop
586
+ action = @state.on_exit and instance_eval( &action )
587
+ @state = @state_stack.pop || @start_state
588
+ action = @state.on_enter and instance_eval( &action )
589
+ end
590
+ end
591
+
592
+
593
+ def fetch_state( name )
594
+ @states.fetch( name ) do
595
+ # TODO: make this more informative
596
+ raise( "this lexer has no state named %p" % name )
597
+ end
598
+ end
599
+
600
+ def match
601
+ @scanner.eos? and return nil
602
+ MAX_RETRIES.times do
603
+ for rule in @state.rules
604
+ if text = rule.match( @scanner )
605
+ matched!( rule, text )
606
+ return true
607
+ end
608
+ end
609
+ match_failed!
610
+ end
611
+ return false
612
+ end
613
+
614
+ def matched!( rule, text )
615
+ token = emit_token( rule.name, text, rule.channel )
616
+ make_transition( rule.transition, rule.target_state )
617
+ action = rule.action and instance_exec( token, &action )
618
+ end
619
+
620
+ def match_failed!
621
+ if action = @state.on_failure
622
+ instance_eval( &action )
623
+ else
624
+ error = LexerError.new( self )
625
+ error.set_backtrace( caller )
626
+ raise( error )
627
+ end
628
+ end
629
+ private :make_transition, :matched!, :match_failed!
630
+
631
+ end
632
+ end
633
+ end