jejune 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ #--
4
+ # Copyright (c) 2010-2011 Kyle C. Yetter
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ module Jejune
27
+ module JJSInput
28
+ include Constants
29
+ include Utils
30
+
31
+ attr_reader :manager, :path, :cache_file, :dependencies, :macros
32
+ attr_accessor :syntax_errors
33
+ attr_writer :tree, :adaptor, :tokens
34
+
35
+ def initialize( source, options = {} )
36
+ super
37
+ @manager = options.fetch( :manager ) { Manager.new( self, options ) }
38
+ @cache_file = options[ :cache_file ]
39
+ @dependencies = Set.new
40
+ @path = @name
41
+ @syntax_errors = 0
42
+ @macros = {}
43
+ end
44
+
45
+ def directory
46
+ @path ? File.dirname( @path ) : '.'
47
+ end
48
+
49
+ def find_relative( name, exts = 'jjs|js|' )
50
+ find_in_directory( directory, name, exts )
51
+ end
52
+
53
+ def tokens
54
+ @tokens ||= RewriteStream.new( Lexer.new( self ) )
55
+ end
56
+
57
+ def adaptor
58
+ @adaptor ||= RewriteAdaptor.new( tokens )
59
+ end
60
+
61
+ def tree
62
+ @tree ||= begin
63
+ parser = Parser.new( tokens, :adaptor => adaptor )
64
+ tree = parser.program.tree
65
+ @syntax_errors = parser.state.syntax_errors
66
+ tree
67
+ #$stderr.puts( @path )
68
+ #$stderr.puts( parser.state.syntax_errors )
69
+ #Parser.new( tokens, :adaptor => adaptor ).program.tree
70
+ end
71
+ end
72
+
73
+ def translate()
74
+ @manager.translate( self )
75
+ end
76
+
77
+ def snippet( source, options = {} )
78
+ options = {
79
+ :manager => @manager,
80
+ :file => @path
81
+ }.update( options )
82
+
83
+ if source.is_a?( Token )
84
+ options[ :line ] ||= source.line
85
+ options[ :column ] ||= source.column
86
+ source = source.text
87
+ end
88
+
89
+ JJSSource.new( source, options )
90
+ end
91
+
92
+ def to_s
93
+ translate
94
+ end
95
+ end
96
+
97
+
98
+ class JJSSource < StringStream
99
+ include JJSInput
100
+ end
101
+
102
+ class JJSFile < FileStream
103
+ include JJSInput
104
+
105
+ def translate
106
+ output = super
107
+ @manager.commit_dependencies( @path, @dependencies )
108
+
109
+ if @cache_file and @syntax_errors.zero?
110
+ open( @cache_file, 'w' ) do | out |
111
+ out.write(
112
+ Marshal.dump(
113
+ :output => output,
114
+ :dependencies => @dependencies,
115
+ :macros => macros
116
+ )
117
+ )
118
+ end
119
+ end
120
+ return output
121
+ end
122
+
123
+ end
124
+ end
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ #--
4
+ # Copyright (c) 2010-2011 Kyle C. Yetter
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ module Jejune
27
+ module JString
28
+ extend( self )
29
+
30
+ @tab_width = 2
31
+ attr_accessor :tab_width
32
+
33
+ def utf8( *chars )
34
+ chars.pack( 'U*' )
35
+ end
36
+
37
+ def collapse( str )
38
+ str.to_s.gsub( /(\\*)\n/ ) { $1.length.even? ? $1 << '\n' : $1[ 0 ... -1 ] }
39
+ end
40
+
41
+ def quote( str, quote = nil )
42
+ if quote
43
+ qrx = /(\\*)#{ Regexp.escape( quote ) }/
44
+ str = str.to_s.gsub( qrx ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << quote }
45
+ str.insert( -1, quote ).insert( 0, quote )
46
+ else
47
+ # assume double quote
48
+ str = str.to_s.gsub( /(\\*)"/ ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << ?" }
49
+ str.insert( -1, '"' ).insert( 0, '"' )
50
+ end
51
+ end
52
+
53
+ def outdent( str, tab_width = @tab_width || 2 )
54
+ lines = str.to_s.split( /\r?\n/, -1 )
55
+ if lines.length > 1
56
+ # for strings like
57
+ # xyz = %( <- first line = "\n"
58
+ # blah blah blah
59
+ # ) <- last line = ""
60
+ # the output will be "blah blah blah"
61
+ lines.first.empty? and lines.shift # trash the first empty line
62
+ lines.last =~ /^\s+$/ and lines.pop # trash the last empty line
63
+
64
+ # for strings that specify a margin like
65
+ # xyz =
66
+ # %(
67
+ # | BLAH
68
+ # |
69
+ # | blah blah
70
+ # )
71
+ if lines.all? { | l | l =~ /^\s*\|\s?/ }
72
+ for line in lines do line.gsub!( /^\s*\|\s?/, '' ) end
73
+ else
74
+ indent = lines.grep( /^(\s+)\S/ ) { expand_tabs( $1, tab_width ).length }.min || 0
75
+ if indent > 0
76
+ for line in lines
77
+ line.sub!( /^\s+/ ) do | space |
78
+ space[ indent, space.length ]
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ return lines.join( $/ )
86
+ end
87
+
88
+ def jstring( content, single_quote = nil )
89
+ str = string_value( content, single_quote ).to_json
90
+ str.gsub!( %r[(\\*)/] ) do
91
+ slashes = $1
92
+ ( slashes.length.odd? ? slashes[ 0...-1 ] : slashes ) << '/'
93
+ end
94
+ return str
95
+ end
96
+
97
+ def split_words( str )
98
+ str.to_s.
99
+ scan( /(?:[^\s\\]|\\.)+/m ).
100
+ map! { | w | w.gsub!( /\\(\s)|(\\.)/m ) { $+ } or w }
101
+ end
102
+
103
+ def expand_tabs( str, tab_width = @tab_width || 2 )
104
+ str = str.to_s.dup
105
+ tab_width = tab_width.to_i
106
+ raise ArgumentError, "tab width must be >= 0, but expand_tabs called with `#{ tab_width }'" if tab_width < 0
107
+
108
+ case tab_width
109
+ when 0
110
+ when 1
111
+ str.gsub!( /\t/, " " )
112
+ else
113
+ while
114
+ str.gsub!( /^([^\t\n]*)(\t+)/ ) {
115
+ leading = $1.length % tab_width
116
+ length = ( tab_width * $2.length - leading )
117
+ $1 << ( ' ' * length )
118
+ }
119
+ # do nothing
120
+ end
121
+ end
122
+
123
+ return str
124
+ end
125
+
126
+ DOUBLE_ESC = [ "0abefnrstv", "\0\a\b\e\f\n\r\s\t\v" ]
127
+ ESC_RX = %r(
128
+ \\
129
+ (?:
130
+ ( x [A-Fa-f0-9]{1,2}
131
+ | 0{3}
132
+ )
133
+ | u ( [A-Fa-f0-9]{4} )
134
+ | \r? \n
135
+ | ( . )
136
+ )
137
+ )x
138
+
139
+ def string_value( str, single = false )
140
+ str = str.to_s
141
+ if single
142
+ str.to_s.gsub %r<\\(.)> do
143
+ case $1
144
+ when '\\', "'" then $1
145
+ else $&
146
+ end
147
+ end
148
+ else
149
+ # TODO: handle control/meta sequences \cx \C-x \M-x \M-\C-x
150
+ str.to_s.gsub ESC_RX do
151
+ if x = $1
152
+ i = Integer( '0' << x )
153
+ i > 128 ? [ i ].pack( 'U' ) : i.chr
154
+ elsif u = $2
155
+ [ u.to_i( 16 ) ].pack( 'U' )
156
+ elsif c = $3
157
+ c.tr!( *DOUBLE_ESC ) or c
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,633 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ #--
4
+ # Copyright (c) 2010-2011 Kyle C. Yetter
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ require 'strscan'
27
+
28
+ class Regexp
29
+ def self.literal( string, options = 0 )
30
+ Regexp.new( Regexp.escape( string.to_s ), options )
31
+ end
32
+ end
33
+
34
+ module Jejune
35
+ module LoFiLexer
36
+ class LexerError < StandardError
37
+ attr_reader :lexer
38
+
39
+ def initialize( lexer )
40
+ @lexer = lexer
41
+ location = @lexer.location
42
+ glimpse = @lexer.scanner.string[ location.position, 13 ] + '...'
43
+ message = (
44
+ "unable to match any lexical rule of %s " % @lexer.class.name <<
45
+ "for %s => %p" % [ location, glimpse ]
46
+ )
47
+ super( message )
48
+ end
49
+ end
50
+
51
+ class Location
52
+ MEMBERS = %w(file position line column)
53
+
54
+ def self.[]( *args )
55
+ new( *args )
56
+ end
57
+
58
+ attr_accessor( *MEMBERS )
59
+
60
+ def initialize( file = nil, position = nil, line = nil, column = nil )
61
+ @file = file
62
+ @position = position
63
+ @line = line
64
+ @column = column
65
+ end
66
+
67
+ def <<( text )
68
+ length = text.length
69
+ line_count = text.count( "\n" )
70
+ line_count.zero? ? ( @column += length ) :
71
+ ( @column = length - text.rindex( "\n" ) - 1 )
72
+ @line += line_count
73
+ @position += length
74
+ return self
75
+ end
76
+
77
+ def >>( text )
78
+ length = text.length
79
+ line_count = text.count( "\n" )
80
+ line_count.zero? ? ( @column -= length ) : ( @column = text.index( "\n" ) )
81
+ @line -= line_count
82
+ @position -= length
83
+ return self
84
+ end
85
+
86
+ def +( text )
87
+ clone << text
88
+ end
89
+
90
+ def -( text )
91
+ clone >> text
92
+ end
93
+
94
+ def <=>( location )
95
+ @position <=> location.position
96
+ end
97
+
98
+ include Comparable
99
+
100
+ def to_s( long = true )
101
+ if long
102
+ 'file %s @ (%i:%i)' % [ @file, @line, @column ]
103
+ else
104
+ '[%i:%i]' % [ @line, @column ]
105
+ end
106
+ end
107
+ end
108
+
109
+ module Locatable
110
+ for member in Location::MEMBERS
111
+ class_eval( <<-DELEGATE, __FILE__, __LINE__ )
112
+ def #{ member }
113
+ location.#{ member } rescue nil
114
+ end
115
+ DELEGATE
116
+ end
117
+ end
118
+
119
+ class Token
120
+ include Locatable
121
+ attr_accessor :index, :channel, :type, :text, :location
122
+ def initialize( index = nil, type = nil, text = nil, location = nil, channel = nil )
123
+ @index = index
124
+ @type = type
125
+ @text = text
126
+ @location = location
127
+ @channel = channel
128
+ block_given? and yield( self )
129
+ end
130
+ def inspect
131
+ '[%i %p(%p) @ %s]' % [ @index, @type, @text, @location.to_s( false ) ]
132
+ end
133
+ def to_s
134
+ text
135
+ end
136
+ def after
137
+ location + text
138
+ end
139
+ end
140
+
141
+ class Rule
142
+ attr_accessor :name, :channel, :action, :transition, :target_state, :text
143
+ attr_reader :options
144
+
145
+ def initialize( name, options = {}, &action )
146
+ @name = name.to_sym
147
+ @channel = options[ :channel ] || :default
148
+ @action = options[ :action ] || action
149
+ @transition = [ :go_to, :push, :pop, :stay ].find do |type|
150
+ @target_state = options[ type ]
151
+ end
152
+ @transition ||= :stay
153
+ @text = options.fetch( :text, nil )
154
+ end
155
+
156
+ def match( scanner )
157
+ raise NotImplementedError, "#match must be implemented by subclasses"
158
+ end
159
+ end
160
+
161
+ class RegexRule < Rule
162
+ def self.keyword( *args, &action )
163
+ options = args.last.is_a?( Hash ) ? options.pop : {}
164
+ case args.length
165
+ when 1
166
+ word = args[ 0 ].to_s
167
+ name = options[ :name ] || word.to_s.upcase
168
+ when 2
169
+ name, word = args
170
+ word = word.to_s
171
+ else
172
+ raise ArgumentError, <<-END.here_flow! % [ options, args ]
173
+ | bad arguments: need (name, keyword, options = {}, &action) or
174
+ | (keyword, options = {}, &action) -- got:
175
+ | options = %p
176
+ | other arguments = %p
177
+ END
178
+ end
179
+ return new( name, word, options, &action )
180
+ end
181
+
182
+ attr_accessor :pattern
183
+ def initialize( name, pattern, options = {}, &action )
184
+ super( name, options, &action )
185
+ @pattern = pattern.is_a?( Regexp ) ? pattern : Regexp.literal( pattern )
186
+ end
187
+
188
+ def match( scanner )
189
+ matched = scanner.scan( pattern ) and begin
190
+ @text ? scanner[ @text ] : matched
191
+ end
192
+ end
193
+ end
194
+
195
+ class DelimiterRule < Rule
196
+ attr_accessor :open, :close, :escape
197
+
198
+ def initialize( name, open, close = {}, options = nil, &action )
199
+ @open = open
200
+ if Hash === close
201
+ @close = open
202
+ options = close
203
+ else
204
+ @close = close
205
+ options ||= {}
206
+ end
207
+ @escape = options.fetch( :escape, '\\' )
208
+ super( name, options, &action )
209
+ @escape_regexp = ( Regexp === @escape ) ? @escape : Regexp.new( Regexp.escape( @escape.to_s ) << '.' )
210
+ @open_regexp = ( Regexp === @open ) ? @open : Regexp.literal( @open )
211
+ @close_regexp = ( Regexp === @close ) ? @close : Regexp.literal( @close )
212
+ @content_pause = Regexp.union( @escape_regexp, @close_regexp )
213
+ end
214
+
215
+ def match( scanner )
216
+ start_position = scanner.pos
217
+ catch( :nevermind ) do
218
+ if scanner.scan( @open_regexp )
219
+ body_start = scanner.pos
220
+ while true
221
+ throw( :nevermind ) unless scanner.skip_until( @content_pause )
222
+ matched = scanner.matched
223
+ if @close === matched
224
+ body_end = scanner.pos - matched.length
225
+ break
226
+ end
227
+ end
228
+ range = @text == :body ? body_start ... body_end : start_position ... scanner.pos
229
+ return( scanner.string[ range ] )
230
+ else return false
231
+ end
232
+ end
233
+ scanner.pos = start_position
234
+ return false
235
+ end
236
+ end
237
+
238
+ class NestedDelimiterRule < DelimiterRule
239
+ def initialize( name, open, close, options = {}, &action )
240
+ super
241
+ @content_pause = Regexp.union( @escape_regexp, @open_regexp, @close_regexp )
242
+ end
243
+
244
+ def match( scanner )
245
+ start_position = scanner.pos
246
+ scanner.scan( @open_regexp ) or return false
247
+ catch( :nevermind ) do
248
+ loop do
249
+ scanner.scan_until( @content_pause ) or throw( :nevermind )
250
+ case scanner.matched
251
+ when @escape_regexp then next
252
+ when @close_regexp then break
253
+ when @open_regexp
254
+ # back up over the open delimiter, and recursively invoke the matching procedure
255
+ scanner.pos -= scanner.matched_size
256
+ match( scanner ) or throw( :nevermind )
257
+ else
258
+ raise( <<-END.here_flow! % [ @name, scanner.matched, @escape_regexp, @open, @close ] )
259
+ | this shouldn't happen:
260
+ | rule %p
261
+ | scanner.matched = %p
262
+ | @escape_regexp = %p
263
+ | @open = %p
264
+ | @close = %p
265
+ END
266
+ end
267
+ end
268
+ return( scanner.string[ start_position...scanner.pos ] )
269
+ end
270
+ scanner.pos = start_position
271
+ return false
272
+ end
273
+ alias :match? :match
274
+ end
275
+
276
+ module CommonDSL
277
+ attr_accessor :rules
278
+ def self.extended( klass )
279
+ klass.instance_variable_set( :@rules, [] )
280
+ end
281
+ def register( rule )
282
+ rules << rule
283
+ return rule
284
+ end
285
+ def rule( name, pattern, options = {}, &action )
286
+ register RegexRule.new( name, pattern, options, &action )
287
+ end
288
+ def delimited( name, open, close = {}, options = nil, &action )
289
+ register DelimiterRule.new( name, open, close, options, &action )
290
+ end
291
+ def nested( name, open, close, options = {}, &action )
292
+ register NestedDelimiterRule.new( name, open, close, options, &action )
293
+ end
294
+ def keyword( *args, &action )
295
+ register RegexRule.keyword( *args, &action )
296
+ end
297
+ def inherited( klass )
298
+ klass.rules = @rules.clone
299
+ end
300
+ protected :rules=
301
+ private :register, :rule, :delimited, :nested, :keyword, :inherited
302
+ end
303
+
304
+ class Lexer
305
+
306
+ attr_reader :scanner, :location, :token
307
+ attr_accessor :channel
308
+
309
+ def initialize( text, options = {} )
310
+ file_name = options[ :file ] || options[ :file_name ] || '(string)'
311
+ channel = options[ :channel ] || :default
312
+ unless location = options[ :location ]
313
+ position = options[ :position ] || 0
314
+ line = options[ :line ] || 1
315
+ column = options[ :column ] || 0
316
+ location = Location.new( file_name, position, line, column )
317
+ end
318
+
319
+ @scanner = StringScanner.new( text )
320
+ @channel = channel
321
+ @starting_line = @location = location
322
+ @tokens = []
323
+ @emission_buffer = []
324
+ @token = nil
325
+ @debug = options.fetch( :debug, $DEBUG )
326
+ end
327
+
328
+ def source
329
+ @scanner.string
330
+ end
331
+
332
+ def reset
333
+ @scanner.pos = 0
334
+ @location = @starting_line
335
+ @tokens.clear
336
+ @token = nil
337
+ end
338
+
339
+ def next( tune = true )
340
+ channel = tune.is_a?( Symbol ) ? tune : @channel
341
+ until @emission_buffer.empty?
342
+ @token = @emission_buffer.shift
343
+ @tokens << @token
344
+ return( @token ) unless tune
345
+ return( @token ) if token.channel == channel
346
+ end
347
+ return( match ? self.next( tune ) : nil )
348
+ end
349
+
350
+ def match
351
+ raise NotImplementedError, "subclasses must implement #match"
352
+ end
353
+
354
+ def emit_token( type, text, channel = :default )
355
+ index = @tokens.length + @emission_buffer.length
356
+ token = create_token( index, type, text, @location, channel )
357
+ @emission_buffer << token
358
+ @location += text
359
+ return token
360
+ end
361
+
362
+ def create_token( index, type, text, location, channel )
363
+ Token.new( index, type, text, location, channel )
364
+ end
365
+
366
+ def each( tune = true )
367
+ block_given? or return enum_for( :each, tune )
368
+ if tune
369
+ channel = tune.is_a?( Symbol ) ? tune : @channel
370
+ @tokens.each { |token| token.channel == channel and yield( token ) }
371
+ else
372
+ @tokens.each { |token| yield( token ) }
373
+ end
374
+ while token = self.next( tune )
375
+ yield( token )
376
+ end
377
+ return self
378
+ end
379
+
380
+ include Enumerable
381
+
382
+ def lex!( tune = true )
383
+ if block_given?
384
+ each( tune ).map do |token|
385
+ yield( token )
386
+ end
387
+ else
388
+ return each( tune ).map
389
+ end
390
+ end
391
+
392
+ def tokens( tune = true )
393
+ if tune
394
+ channel = tune.is_a?( Symbol ) ? tune : @channel
395
+ @tokens.select { |token| token.channel == channel }
396
+ else
397
+ return @tokens
398
+ end
399
+ end
400
+
401
+ def []( *args )
402
+ @tokens[ *args ]
403
+ end
404
+
405
+ def debug
406
+ if @debug
407
+ $stderr.puts( "\e[31m#{ self.class }\e[0m lexer debug: \e[36m#{ yield }\e[0m" )
408
+ end
409
+ end
410
+
411
+ end
412
+
413
+ class CommonLexer < Lexer
414
+ extend CommonDSL
415
+
416
+ def initialize( text, options = {} )
417
+ super
418
+ @rules = self.class.rules
419
+ end
420
+
421
+ def match
422
+ @scanner.eos? and return nil
423
+ for rule in @rules
424
+ if text = rule.match( @scanner )
425
+ matched!( rule, text )
426
+ return true
427
+ end
428
+ end
429
+ match_failed!
430
+ end
431
+
432
+ def matched!( rule, text )
433
+ emit_token( rule.name, text, rule.channel )
434
+ action = rule.action and instance_eval( &action )
435
+ end
436
+
437
+ def match_failed!
438
+ error = LexerError.new( self )
439
+ error.set_backtrace( caller )
440
+ raise( error )
441
+ end
442
+
443
+ private :match_failed!, :matched!
444
+
445
+ end
446
+
447
+ module StatefulDSL
448
+ attr_accessor :states, :initial_state, :global_state
449
+
450
+ def self.extended( klass )
451
+ klass.instance_variable_set( :@states, {} )
452
+ klass.instance_variable_set( :@global_state, klass.state( :global ) )
453
+ klass.instance_variable_set( :@initial_state, nil )
454
+ end
455
+
456
+ def state( name, *args, &body )
457
+ state = @states[ name.to_sym ] ||= State.new( self, name, *args )
458
+ @initial_state ||= state
459
+ block_given? and state.specify( &body )
460
+ return state
461
+ end
462
+
463
+ def start_in( name )
464
+ @initial_state = state( name )
465
+ end
466
+
467
+ def register( rule )
468
+ @global_state.rules << rule
469
+ return rule
470
+ end
471
+
472
+ def inherited( klass )
473
+ klass.states = @states.inject( {} ) do |h, ( name, state )|
474
+ state = state.clone
475
+ state.lexer = klass
476
+ h[ name ] = state; h
477
+ end
478
+ klass.global_state = klass.state( :global )
479
+ klass.initial_state = ( @initial_state && klass.states[ @initial_state.name ] )
480
+ end
481
+
482
+ protected :states=, :global_state
483
+ private :register, :inherited, :start_in
484
+ end
485
+
486
+ class State
487
+ include CommonDSL
488
+
489
+ def on_enter( action = nil, &b )
490
+ if block_given? then @on_enter = b
491
+ elsif action then @on_enter = action.to_proc
492
+ end
493
+ return @on_enter
494
+ end
495
+
496
+ def on_exit( action = nil, &b )
497
+ if block_given? then @on_exit = b
498
+ elsif action then @on_exit = action.to_proc
499
+ end
500
+ return @on_exit
501
+ end
502
+
503
+ def on_failure( action = nil, &b )
504
+ if block_given? then @on_failure = b
505
+ elsif action then @on_failure = action.to_proc
506
+ end
507
+ return @on_failure
508
+ end
509
+
510
+ attr_accessor :name, :lexer
511
+
512
+ def initialize( lexer_class, name, options = {}, &body )
513
+ @lexer = lexer_class
514
+ @name = name.to_sym
515
+ @rules = []
516
+ fail_action = [ :go_to, :push, :pop ].find do |action|
517
+ options[ action ]
518
+ end
519
+ @on_failure = fail_action && lambda do
520
+ make_transition( fail_action, options[ fail_action ] )
521
+ end
522
+ @on_enter = nil
523
+ @on_exit = nil
524
+ block_given? and specify( &body )
525
+ end
526
+
527
+ def initialize_copy( orig )
528
+ @lexer = nil
529
+ @rules = orig.rules.map { |rule| rule.clone }
530
+ end
531
+
532
+ alias specify instance_eval
533
+
534
+ def include( state_name )
535
+ state = @lexer.state( state_name )
536
+ @rules.concat( state.rules )
537
+ @on_failure ||= state.on_failure
538
+ @on_exit ||= state.on_exit
539
+ @on_enter ||= state.on_enter
540
+ end
541
+
542
+ end
543
+
544
+ class StatefulLexer < Lexer
545
+ MAX_RETRIES = 5
546
+ extend CommonDSL
547
+ extend StatefulDSL
548
+
549
+ attr_reader :state, :start_state
550
+ def initialize( text, options = {} )
551
+ super
552
+ @states = self.class.states
553
+ @state_stack = []
554
+ if name = ( options[ :initial_state ] || options[ :state ] )
555
+ @state = @states[ name ]
556
+ elsif state = self.class.initial_state
557
+ @state = state
558
+ else
559
+ @state = self.class.global_state
560
+ end
561
+ @start_state = @state
562
+ end
563
+
564
+ def rules
565
+ @state.rules # or raise("state %p has no rules" % @state)
566
+ end
567
+
568
+ def reset
569
+ super
570
+ @state = @start_state
571
+ end
572
+
573
+ def make_transition( type, target )
574
+ debug { "state transition -- #{ @state.name } -> #{ type } #{ target }" }
575
+ case type
576
+ when :go_to
577
+ action = @state.on_exit and instance_eval( &action )
578
+ @state = fetch_state( target )
579
+ action = @state.on_enter and instance_eval( &action )
580
+ when :push
581
+ action = @state.on_exit and instance_eval( &action )
582
+ @state_stack.push( @state )
583
+ @state = fetch_state( target )
584
+ action = @state.on_enter and instance_eval( &action )
585
+ when :pop
586
+ action = @state.on_exit and instance_eval( &action )
587
+ @state = @state_stack.pop || @start_state
588
+ action = @state.on_enter and instance_eval( &action )
589
+ end
590
+ end
591
+
592
+
593
+ def fetch_state( name )
594
+ @states.fetch( name ) do
595
+ # TODO: make this more informative
596
+ raise( "this lexer has no state named %p" % name )
597
+ end
598
+ end
599
+
600
+ def match
601
+ @scanner.eos? and return nil
602
+ MAX_RETRIES.times do
603
+ for rule in @state.rules
604
+ if text = rule.match( @scanner )
605
+ matched!( rule, text )
606
+ return true
607
+ end
608
+ end
609
+ match_failed!
610
+ end
611
+ return false
612
+ end
613
+
614
+ def matched!( rule, text )
615
+ token = emit_token( rule.name, text, rule.channel )
616
+ make_transition( rule.transition, rule.target_state )
617
+ action = rule.action and instance_exec( token, &action )
618
+ end
619
+
620
+ def match_failed!
621
+ if action = @state.on_failure
622
+ instance_eval( &action )
623
+ else
624
+ error = LexerError.new( self )
625
+ error.set_backtrace( caller )
626
+ raise( error )
627
+ end
628
+ end
629
+ private :make_transition, :matched!, :match_failed!
630
+
631
+ end
632
+ end
633
+ end