jejune 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gemtest +0 -0
- data/History.txt +12 -0
- data/Manifest.txt +39 -0
- data/README.txt +55 -0
- data/Rakefile +75 -0
- data/bin/jjs +173 -0
- data/lib/jejune.rb +78 -0
- data/lib/jejune/boot.rb +107 -0
- data/lib/jejune/constants.rb +105 -0
- data/lib/jejune/data-extension.rb +144 -0
- data/lib/jejune/dependency-scanner.rb +69 -0
- data/lib/jejune/ejjs.rb +178 -0
- data/lib/jejune/errors.rb +53 -0
- data/lib/jejune/grammar.rb +32 -0
- data/lib/jejune/grammar/JavaScript.g +668 -0
- data/lib/jejune/grammar/Jejune.g +1029 -0
- data/lib/jejune/grammar/Jejune.tokens +241 -0
- data/lib/jejune/grammar/lexer.rb +6504 -0
- data/lib/jejune/grammar/parser.rb +17378 -0
- data/lib/jejune/grammar/rakefile +29 -0
- data/lib/jejune/grammar/tree.rb +6737 -0
- data/lib/jejune/input.rb +124 -0
- data/lib/jejune/jstring.rb +163 -0
- data/lib/jejune/lo-fi-lexer.rb +633 -0
- data/lib/jejune/macro.rb +78 -0
- data/lib/jejune/main.rb +289 -0
- data/lib/jejune/manager.rb +333 -0
- data/lib/jejune/node-test.rb +71 -0
- data/lib/jejune/parameters.rb +83 -0
- data/lib/jejune/rewrite-debug.rb +61 -0
- data/lib/jejune/rewrite.rb +125 -0
- data/lib/jejune/scanner.rb +201 -0
- data/lib/jejune/translator.rb +710 -0
- data/lib/jejune/tree-walker.rb +81 -0
- data/lib/jejune/utils.rb +81 -0
- data/lib/jejune/version.rb +38 -0
- data/spec/samples.txt +51 -0
- data/spec/translation.rb +69 -0
- data/spec/utils.rb +63 -0
- data/tools/env.fish +2 -0
- metadata +147 -0
data/lib/jejune/input.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2010-2011 Kyle C. Yetter
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
module Jejune
|
27
|
+
module JJSInput
|
28
|
+
include Constants
|
29
|
+
include Utils
|
30
|
+
|
31
|
+
attr_reader :manager, :path, :cache_file, :dependencies, :macros
|
32
|
+
attr_accessor :syntax_errors
|
33
|
+
attr_writer :tree, :adaptor, :tokens
|
34
|
+
|
35
|
+
def initialize( source, options = {} )
|
36
|
+
super
|
37
|
+
@manager = options.fetch( :manager ) { Manager.new( self, options ) }
|
38
|
+
@cache_file = options[ :cache_file ]
|
39
|
+
@dependencies = Set.new
|
40
|
+
@path = @name
|
41
|
+
@syntax_errors = 0
|
42
|
+
@macros = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
def directory
|
46
|
+
@path ? File.dirname( @path ) : '.'
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_relative( name, exts = 'jjs|js|' )
|
50
|
+
find_in_directory( directory, name, exts )
|
51
|
+
end
|
52
|
+
|
53
|
+
def tokens
|
54
|
+
@tokens ||= RewriteStream.new( Lexer.new( self ) )
|
55
|
+
end
|
56
|
+
|
57
|
+
def adaptor
|
58
|
+
@adaptor ||= RewriteAdaptor.new( tokens )
|
59
|
+
end
|
60
|
+
|
61
|
+
def tree
|
62
|
+
@tree ||= begin
|
63
|
+
parser = Parser.new( tokens, :adaptor => adaptor )
|
64
|
+
tree = parser.program.tree
|
65
|
+
@syntax_errors = parser.state.syntax_errors
|
66
|
+
tree
|
67
|
+
#$stderr.puts( @path )
|
68
|
+
#$stderr.puts( parser.state.syntax_errors )
|
69
|
+
#Parser.new( tokens, :adaptor => adaptor ).program.tree
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def translate()
|
74
|
+
@manager.translate( self )
|
75
|
+
end
|
76
|
+
|
77
|
+
def snippet( source, options = {} )
|
78
|
+
options = {
|
79
|
+
:manager => @manager,
|
80
|
+
:file => @path
|
81
|
+
}.update( options )
|
82
|
+
|
83
|
+
if source.is_a?( Token )
|
84
|
+
options[ :line ] ||= source.line
|
85
|
+
options[ :column ] ||= source.column
|
86
|
+
source = source.text
|
87
|
+
end
|
88
|
+
|
89
|
+
JJSSource.new( source, options )
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_s
|
93
|
+
translate
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class JJSSource < StringStream
|
99
|
+
include JJSInput
|
100
|
+
end
|
101
|
+
|
102
|
+
class JJSFile < FileStream
|
103
|
+
include JJSInput
|
104
|
+
|
105
|
+
def translate
|
106
|
+
output = super
|
107
|
+
@manager.commit_dependencies( @path, @dependencies )
|
108
|
+
|
109
|
+
if @cache_file and @syntax_errors.zero?
|
110
|
+
open( @cache_file, 'w' ) do | out |
|
111
|
+
out.write(
|
112
|
+
Marshal.dump(
|
113
|
+
:output => output,
|
114
|
+
:dependencies => @dependencies,
|
115
|
+
:macros => macros
|
116
|
+
)
|
117
|
+
)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
return output
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2010-2011 Kyle C. Yetter
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
module Jejune
|
27
|
+
module JString
|
28
|
+
extend( self )
|
29
|
+
|
30
|
+
@tab_width = 2
|
31
|
+
attr_accessor :tab_width
|
32
|
+
|
33
|
+
def utf8( *chars )
|
34
|
+
chars.pack( 'U*' )
|
35
|
+
end
|
36
|
+
|
37
|
+
def collapse( str )
|
38
|
+
str.to_s.gsub( /(\\*)\n/ ) { $1.length.even? ? $1 << '\n' : $1[ 0 ... -1 ] }
|
39
|
+
end
|
40
|
+
|
41
|
+
def quote( str, quote = nil )
|
42
|
+
if quote
|
43
|
+
qrx = /(\\*)#{ Regexp.escape( quote ) }/
|
44
|
+
str = str.to_s.gsub( qrx ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << quote }
|
45
|
+
str.insert( -1, quote ).insert( 0, quote )
|
46
|
+
else
|
47
|
+
# assume double quote
|
48
|
+
str = str.to_s.gsub( /(\\*)"/ ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << ?" }
|
49
|
+
str.insert( -1, '"' ).insert( 0, '"' )
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def outdent( str, tab_width = @tab_width || 2 )
|
54
|
+
lines = str.to_s.split( /\r?\n/, -1 )
|
55
|
+
if lines.length > 1
|
56
|
+
# for strings like
|
57
|
+
# xyz = %( <- first line = "\n"
|
58
|
+
# blah blah blah
|
59
|
+
# ) <- last line = ""
|
60
|
+
# the output will be "blah blah blah"
|
61
|
+
lines.first.empty? and lines.shift # trash the first empty line
|
62
|
+
lines.last =~ /^\s+$/ and lines.pop # trash the last empty line
|
63
|
+
|
64
|
+
# for strings that specify a margin like
|
65
|
+
# xyz =
|
66
|
+
# %(
|
67
|
+
# | BLAH
|
68
|
+
# |
|
69
|
+
# | blah blah
|
70
|
+
# )
|
71
|
+
if lines.all? { | l | l =~ /^\s*\|\s?/ }
|
72
|
+
for line in lines do line.gsub!( /^\s*\|\s?/, '' ) end
|
73
|
+
else
|
74
|
+
indent = lines.grep( /^(\s+)\S/ ) { expand_tabs( $1, tab_width ).length }.min || 0
|
75
|
+
if indent > 0
|
76
|
+
for line in lines
|
77
|
+
line.sub!( /^\s+/ ) do | space |
|
78
|
+
space[ indent, space.length ]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
return lines.join( $/ )
|
86
|
+
end
|
87
|
+
|
88
|
+
def jstring( content, single_quote = nil )
|
89
|
+
str = string_value( content, single_quote ).to_json
|
90
|
+
str.gsub!( %r[(\\*)/] ) do
|
91
|
+
slashes = $1
|
92
|
+
( slashes.length.odd? ? slashes[ 0...-1 ] : slashes ) << '/'
|
93
|
+
end
|
94
|
+
return str
|
95
|
+
end
|
96
|
+
|
97
|
+
def split_words( str )
|
98
|
+
str.to_s.
|
99
|
+
scan( /(?:[^\s\\]|\\.)+/m ).
|
100
|
+
map! { | w | w.gsub!( /\\(\s)|(\\.)/m ) { $+ } or w }
|
101
|
+
end
|
102
|
+
|
103
|
+
def expand_tabs( str, tab_width = @tab_width || 2 )
|
104
|
+
str = str.to_s.dup
|
105
|
+
tab_width = tab_width.to_i
|
106
|
+
raise ArgumentError, "tab width must be >= 0, but expand_tabs called with `#{ tab_width }'" if tab_width < 0
|
107
|
+
|
108
|
+
case tab_width
|
109
|
+
when 0
|
110
|
+
when 1
|
111
|
+
str.gsub!( /\t/, " " )
|
112
|
+
else
|
113
|
+
while
|
114
|
+
str.gsub!( /^([^\t\n]*)(\t+)/ ) {
|
115
|
+
leading = $1.length % tab_width
|
116
|
+
length = ( tab_width * $2.length - leading )
|
117
|
+
$1 << ( ' ' * length )
|
118
|
+
}
|
119
|
+
# do nothing
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
return str
|
124
|
+
end
|
125
|
+
|
126
|
+
DOUBLE_ESC = [ "0abefnrstv", "\0\a\b\e\f\n\r\s\t\v" ]
|
127
|
+
ESC_RX = %r(
|
128
|
+
\\
|
129
|
+
(?:
|
130
|
+
( x [A-Fa-f0-9]{1,2}
|
131
|
+
| 0{3}
|
132
|
+
)
|
133
|
+
| u ( [A-Fa-f0-9]{4} )
|
134
|
+
| \r? \n
|
135
|
+
| ( . )
|
136
|
+
)
|
137
|
+
)x
|
138
|
+
|
139
|
+
def string_value( str, single = false )
|
140
|
+
str = str.to_s
|
141
|
+
if single
|
142
|
+
str.to_s.gsub %r<\\(.)> do
|
143
|
+
case $1
|
144
|
+
when '\\', "'" then $1
|
145
|
+
else $&
|
146
|
+
end
|
147
|
+
end
|
148
|
+
else
|
149
|
+
# TODO: handle control/meta sequences \cx \C-x \M-x \M-\C-x
|
150
|
+
str.to_s.gsub ESC_RX do
|
151
|
+
if x = $1
|
152
|
+
i = Integer( '0' << x )
|
153
|
+
i > 128 ? [ i ].pack( 'U' ) : i.chr
|
154
|
+
elsif u = $2
|
155
|
+
[ u.to_i( 16 ) ].pack( 'U' )
|
156
|
+
elsif c = $3
|
157
|
+
c.tr!( *DOUBLE_ESC ) or c
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,633 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2010-2011 Kyle C. Yetter
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require 'strscan'
|
27
|
+
|
28
|
+
class Regexp
|
29
|
+
def self.literal( string, options = 0 )
|
30
|
+
Regexp.new( Regexp.escape( string.to_s ), options )
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module Jejune
|
35
|
+
module LoFiLexer
|
36
|
+
class LexerError < StandardError
|
37
|
+
attr_reader :lexer
|
38
|
+
|
39
|
+
def initialize( lexer )
|
40
|
+
@lexer = lexer
|
41
|
+
location = @lexer.location
|
42
|
+
glimpse = @lexer.scanner.string[ location.position, 13 ] + '...'
|
43
|
+
message = (
|
44
|
+
"unable to match any lexical rule of %s " % @lexer.class.name <<
|
45
|
+
"for %s => %p" % [ location, glimpse ]
|
46
|
+
)
|
47
|
+
super( message )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Location
|
52
|
+
MEMBERS = %w(file position line column)
|
53
|
+
|
54
|
+
def self.[]( *args )
|
55
|
+
new( *args )
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_accessor( *MEMBERS )
|
59
|
+
|
60
|
+
def initialize( file = nil, position = nil, line = nil, column = nil )
|
61
|
+
@file = file
|
62
|
+
@position = position
|
63
|
+
@line = line
|
64
|
+
@column = column
|
65
|
+
end
|
66
|
+
|
67
|
+
def <<( text )
|
68
|
+
length = text.length
|
69
|
+
line_count = text.count( "\n" )
|
70
|
+
line_count.zero? ? ( @column += length ) :
|
71
|
+
( @column = length - text.rindex( "\n" ) - 1 )
|
72
|
+
@line += line_count
|
73
|
+
@position += length
|
74
|
+
return self
|
75
|
+
end
|
76
|
+
|
77
|
+
def >>( text )
|
78
|
+
length = text.length
|
79
|
+
line_count = text.count( "\n" )
|
80
|
+
line_count.zero? ? ( @column -= length ) : ( @column = text.index( "\n" ) )
|
81
|
+
@line -= line_count
|
82
|
+
@position -= length
|
83
|
+
return self
|
84
|
+
end
|
85
|
+
|
86
|
+
def +( text )
|
87
|
+
clone << text
|
88
|
+
end
|
89
|
+
|
90
|
+
def -( text )
|
91
|
+
clone >> text
|
92
|
+
end
|
93
|
+
|
94
|
+
def <=>( location )
|
95
|
+
@position <=> location.position
|
96
|
+
end
|
97
|
+
|
98
|
+
include Comparable
|
99
|
+
|
100
|
+
def to_s( long = true )
|
101
|
+
if long
|
102
|
+
'file %s @ (%i:%i)' % [ @file, @line, @column ]
|
103
|
+
else
|
104
|
+
'[%i:%i]' % [ @line, @column ]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
module Locatable
|
110
|
+
for member in Location::MEMBERS
|
111
|
+
class_eval( <<-DELEGATE, __FILE__, __LINE__ )
|
112
|
+
def #{ member }
|
113
|
+
location.#{ member } rescue nil
|
114
|
+
end
|
115
|
+
DELEGATE
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
class Token
|
120
|
+
include Locatable
|
121
|
+
attr_accessor :index, :channel, :type, :text, :location
|
122
|
+
def initialize( index = nil, type = nil, text = nil, location = nil, channel = nil )
|
123
|
+
@index = index
|
124
|
+
@type = type
|
125
|
+
@text = text
|
126
|
+
@location = location
|
127
|
+
@channel = channel
|
128
|
+
block_given? and yield( self )
|
129
|
+
end
|
130
|
+
def inspect
|
131
|
+
'[%i %p(%p) @ %s]' % [ @index, @type, @text, @location.to_s( false ) ]
|
132
|
+
end
|
133
|
+
def to_s
|
134
|
+
text
|
135
|
+
end
|
136
|
+
def after
|
137
|
+
location + text
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class Rule
|
142
|
+
attr_accessor :name, :channel, :action, :transition, :target_state, :text
|
143
|
+
attr_reader :options
|
144
|
+
|
145
|
+
def initialize( name, options = {}, &action )
|
146
|
+
@name = name.to_sym
|
147
|
+
@channel = options[ :channel ] || :default
|
148
|
+
@action = options[ :action ] || action
|
149
|
+
@transition = [ :go_to, :push, :pop, :stay ].find do |type|
|
150
|
+
@target_state = options[ type ]
|
151
|
+
end
|
152
|
+
@transition ||= :stay
|
153
|
+
@text = options.fetch( :text, nil )
|
154
|
+
end
|
155
|
+
|
156
|
+
def match( scanner )
|
157
|
+
raise NotImplementedError, "#match must be implemented by subclasses"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class RegexRule < Rule
|
162
|
+
def self.keyword( *args, &action )
|
163
|
+
options = args.last.is_a?( Hash ) ? options.pop : {}
|
164
|
+
case args.length
|
165
|
+
when 1
|
166
|
+
word = args[ 0 ].to_s
|
167
|
+
name = options[ :name ] || word.to_s.upcase
|
168
|
+
when 2
|
169
|
+
name, word = args
|
170
|
+
word = word.to_s
|
171
|
+
else
|
172
|
+
raise ArgumentError, <<-END.here_flow! % [ options, args ]
|
173
|
+
| bad arguments: need (name, keyword, options = {}, &action) or
|
174
|
+
| (keyword, options = {}, &action) -- got:
|
175
|
+
| options = %p
|
176
|
+
| other arguments = %p
|
177
|
+
END
|
178
|
+
end
|
179
|
+
return new( name, word, options, &action )
|
180
|
+
end
|
181
|
+
|
182
|
+
attr_accessor :pattern
|
183
|
+
def initialize( name, pattern, options = {}, &action )
|
184
|
+
super( name, options, &action )
|
185
|
+
@pattern = pattern.is_a?( Regexp ) ? pattern : Regexp.literal( pattern )
|
186
|
+
end
|
187
|
+
|
188
|
+
def match( scanner )
|
189
|
+
matched = scanner.scan( pattern ) and begin
|
190
|
+
@text ? scanner[ @text ] : matched
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
class DelimiterRule < Rule
|
196
|
+
attr_accessor :open, :close, :escape
|
197
|
+
|
198
|
+
def initialize( name, open, close = {}, options = nil, &action )
|
199
|
+
@open = open
|
200
|
+
if Hash === close
|
201
|
+
@close = open
|
202
|
+
options = close
|
203
|
+
else
|
204
|
+
@close = close
|
205
|
+
options ||= {}
|
206
|
+
end
|
207
|
+
@escape = options.fetch( :escape, '\\' )
|
208
|
+
super( name, options, &action )
|
209
|
+
@escape_regexp = ( Regexp === @escape ) ? @escape : Regexp.new( Regexp.escape( @escape.to_s ) << '.' )
|
210
|
+
@open_regexp = ( Regexp === @open ) ? @open : Regexp.literal( @open )
|
211
|
+
@close_regexp = ( Regexp === @close ) ? @close : Regexp.literal( @close )
|
212
|
+
@content_pause = Regexp.union( @escape_regexp, @close_regexp )
|
213
|
+
end
|
214
|
+
|
215
|
+
def match( scanner )
|
216
|
+
start_position = scanner.pos
|
217
|
+
catch( :nevermind ) do
|
218
|
+
if scanner.scan( @open_regexp )
|
219
|
+
body_start = scanner.pos
|
220
|
+
while true
|
221
|
+
throw( :nevermind ) unless scanner.skip_until( @content_pause )
|
222
|
+
matched = scanner.matched
|
223
|
+
if @close === matched
|
224
|
+
body_end = scanner.pos - matched.length
|
225
|
+
break
|
226
|
+
end
|
227
|
+
end
|
228
|
+
range = @text == :body ? body_start ... body_end : start_position ... scanner.pos
|
229
|
+
return( scanner.string[ range ] )
|
230
|
+
else return false
|
231
|
+
end
|
232
|
+
end
|
233
|
+
scanner.pos = start_position
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
class NestedDelimiterRule < DelimiterRule
|
239
|
+
def initialize( name, open, close, options = {}, &action )
|
240
|
+
super
|
241
|
+
@content_pause = Regexp.union( @escape_regexp, @open_regexp, @close_regexp )
|
242
|
+
end
|
243
|
+
|
244
|
+
def match( scanner )
|
245
|
+
start_position = scanner.pos
|
246
|
+
scanner.scan( @open_regexp ) or return false
|
247
|
+
catch( :nevermind ) do
|
248
|
+
loop do
|
249
|
+
scanner.scan_until( @content_pause ) or throw( :nevermind )
|
250
|
+
case scanner.matched
|
251
|
+
when @escape_regexp then next
|
252
|
+
when @close_regexp then break
|
253
|
+
when @open_regexp
|
254
|
+
# back up over the open delimiter, and recursively invoke the matching procedure
|
255
|
+
scanner.pos -= scanner.matched_size
|
256
|
+
match( scanner ) or throw( :nevermind )
|
257
|
+
else
|
258
|
+
raise( <<-END.here_flow! % [ @name, scanner.matched, @escape_regexp, @open, @close ] )
|
259
|
+
| this shouldn't happen:
|
260
|
+
| rule %p
|
261
|
+
| scanner.matched = %p
|
262
|
+
| @escape_regexp = %p
|
263
|
+
| @open = %p
|
264
|
+
| @close = %p
|
265
|
+
END
|
266
|
+
end
|
267
|
+
end
|
268
|
+
return( scanner.string[ start_position...scanner.pos ] )
|
269
|
+
end
|
270
|
+
scanner.pos = start_position
|
271
|
+
return false
|
272
|
+
end
|
273
|
+
alias :match? :match
|
274
|
+
end
|
275
|
+
|
276
|
+
module CommonDSL
|
277
|
+
attr_accessor :rules
|
278
|
+
def self.extended( klass )
|
279
|
+
klass.instance_variable_set( :@rules, [] )
|
280
|
+
end
|
281
|
+
def register( rule )
|
282
|
+
rules << rule
|
283
|
+
return rule
|
284
|
+
end
|
285
|
+
def rule( name, pattern, options = {}, &action )
|
286
|
+
register RegexRule.new( name, pattern, options, &action )
|
287
|
+
end
|
288
|
+
def delimited( name, open, close = {}, options = nil, &action )
|
289
|
+
register DelimiterRule.new( name, open, close, options, &action )
|
290
|
+
end
|
291
|
+
def nested( name, open, close, options = {}, &action )
|
292
|
+
register NestedDelimiterRule.new( name, open, close, options, &action )
|
293
|
+
end
|
294
|
+
def keyword( *args, &action )
|
295
|
+
register RegexRule.keyword( *args, &action )
|
296
|
+
end
|
297
|
+
def inherited( klass )
|
298
|
+
klass.rules = @rules.clone
|
299
|
+
end
|
300
|
+
protected :rules=
|
301
|
+
private :register, :rule, :delimited, :nested, :keyword, :inherited
|
302
|
+
end
|
303
|
+
|
304
|
+
class Lexer
|
305
|
+
|
306
|
+
attr_reader :scanner, :location, :token
|
307
|
+
attr_accessor :channel
|
308
|
+
|
309
|
+
def initialize( text, options = {} )
|
310
|
+
file_name = options[ :file ] || options[ :file_name ] || '(string)'
|
311
|
+
channel = options[ :channel ] || :default
|
312
|
+
unless location = options[ :location ]
|
313
|
+
position = options[ :position ] || 0
|
314
|
+
line = options[ :line ] || 1
|
315
|
+
column = options[ :column ] || 0
|
316
|
+
location = Location.new( file_name, position, line, column )
|
317
|
+
end
|
318
|
+
|
319
|
+
@scanner = StringScanner.new( text )
|
320
|
+
@channel = channel
|
321
|
+
@starting_line = @location = location
|
322
|
+
@tokens = []
|
323
|
+
@emission_buffer = []
|
324
|
+
@token = nil
|
325
|
+
@debug = options.fetch( :debug, $DEBUG )
|
326
|
+
end
|
327
|
+
|
328
|
+
def source
|
329
|
+
@scanner.string
|
330
|
+
end
|
331
|
+
|
332
|
+
def reset
|
333
|
+
@scanner.pos = 0
|
334
|
+
@location = @starting_line
|
335
|
+
@tokens.clear
|
336
|
+
@token = nil
|
337
|
+
end
|
338
|
+
|
339
|
+
def next( tune = true )
|
340
|
+
channel = tune.is_a?( Symbol ) ? tune : @channel
|
341
|
+
until @emission_buffer.empty?
|
342
|
+
@token = @emission_buffer.shift
|
343
|
+
@tokens << @token
|
344
|
+
return( @token ) unless tune
|
345
|
+
return( @token ) if token.channel == channel
|
346
|
+
end
|
347
|
+
return( match ? self.next( tune ) : nil )
|
348
|
+
end
|
349
|
+
|
350
|
+
def match
|
351
|
+
raise NotImplementedError, "subclasses must implement #match"
|
352
|
+
end
|
353
|
+
|
354
|
+
def emit_token( type, text, channel = :default )
|
355
|
+
index = @tokens.length + @emission_buffer.length
|
356
|
+
token = create_token( index, type, text, @location, channel )
|
357
|
+
@emission_buffer << token
|
358
|
+
@location += text
|
359
|
+
return token
|
360
|
+
end
|
361
|
+
|
362
|
+
def create_token( index, type, text, location, channel )
|
363
|
+
Token.new( index, type, text, location, channel )
|
364
|
+
end
|
365
|
+
|
366
|
+
def each( tune = true )
|
367
|
+
block_given? or return enum_for( :each, tune )
|
368
|
+
if tune
|
369
|
+
channel = tune.is_a?( Symbol ) ? tune : @channel
|
370
|
+
@tokens.each { |token| token.channel == channel and yield( token ) }
|
371
|
+
else
|
372
|
+
@tokens.each { |token| yield( token ) }
|
373
|
+
end
|
374
|
+
while token = self.next( tune )
|
375
|
+
yield( token )
|
376
|
+
end
|
377
|
+
return self
|
378
|
+
end
|
379
|
+
|
380
|
+
include Enumerable
|
381
|
+
|
382
|
+
def lex!( tune = true )
|
383
|
+
if block_given?
|
384
|
+
each( tune ).map do |token|
|
385
|
+
yield( token )
|
386
|
+
end
|
387
|
+
else
|
388
|
+
return each( tune ).map
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
def tokens( tune = true )
|
393
|
+
if tune
|
394
|
+
channel = tune.is_a?( Symbol ) ? tune : @channel
|
395
|
+
@tokens.select { |token| token.channel == channel }
|
396
|
+
else
|
397
|
+
return @tokens
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def []( *args )
|
402
|
+
@tokens[ *args ]
|
403
|
+
end
|
404
|
+
|
405
|
+
def debug
|
406
|
+
if @debug
|
407
|
+
$stderr.puts( "\e[31m#{ self.class }\e[0m lexer debug: \e[36m#{ yield }\e[0m" )
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
end
|
412
|
+
|
413
|
+
class CommonLexer < Lexer
|
414
|
+
extend CommonDSL
|
415
|
+
|
416
|
+
def initialize( text, options = {} )
|
417
|
+
super
|
418
|
+
@rules = self.class.rules
|
419
|
+
end
|
420
|
+
|
421
|
+
def match
|
422
|
+
@scanner.eos? and return nil
|
423
|
+
for rule in @rules
|
424
|
+
if text = rule.match( @scanner )
|
425
|
+
matched!( rule, text )
|
426
|
+
return true
|
427
|
+
end
|
428
|
+
end
|
429
|
+
match_failed!
|
430
|
+
end
|
431
|
+
|
432
|
+
def matched!( rule, text )
|
433
|
+
emit_token( rule.name, text, rule.channel )
|
434
|
+
action = rule.action and instance_eval( &action )
|
435
|
+
end
|
436
|
+
|
437
|
+
def match_failed!
|
438
|
+
error = LexerError.new( self )
|
439
|
+
error.set_backtrace( caller )
|
440
|
+
raise( error )
|
441
|
+
end
|
442
|
+
|
443
|
+
private :match_failed!, :matched!
|
444
|
+
|
445
|
+
end
|
446
|
+
|
447
|
+
module StatefulDSL
|
448
|
+
attr_accessor :states, :initial_state, :global_state
|
449
|
+
|
450
|
+
def self.extended( klass )
|
451
|
+
klass.instance_variable_set( :@states, {} )
|
452
|
+
klass.instance_variable_set( :@global_state, klass.state( :global ) )
|
453
|
+
klass.instance_variable_set( :@initial_state, nil )
|
454
|
+
end
|
455
|
+
|
456
|
+
def state( name, *args, &body )
|
457
|
+
state = @states[ name.to_sym ] ||= State.new( self, name, *args )
|
458
|
+
@initial_state ||= state
|
459
|
+
block_given? and state.specify( &body )
|
460
|
+
return state
|
461
|
+
end
|
462
|
+
|
463
|
+
def start_in( name )
|
464
|
+
@initial_state = state( name )
|
465
|
+
end
|
466
|
+
|
467
|
+
def register( rule )
|
468
|
+
@global_state.rules << rule
|
469
|
+
return rule
|
470
|
+
end
|
471
|
+
|
472
|
+
def inherited( klass )
|
473
|
+
klass.states = @states.inject( {} ) do |h, ( name, state )|
|
474
|
+
state = state.clone
|
475
|
+
state.lexer = klass
|
476
|
+
h[ name ] = state; h
|
477
|
+
end
|
478
|
+
klass.global_state = klass.state( :global )
|
479
|
+
klass.initial_state = ( @initial_state && klass.states[ @initial_state.name ] )
|
480
|
+
end
|
481
|
+
|
482
|
+
protected :states=, :global_state
|
483
|
+
private :register, :inherited, :start_in
|
484
|
+
end
|
485
|
+
|
486
|
+
class State
|
487
|
+
include CommonDSL
|
488
|
+
|
489
|
+
def on_enter( action = nil, &b )
|
490
|
+
if block_given? then @on_enter = b
|
491
|
+
elsif action then @on_enter = action.to_proc
|
492
|
+
end
|
493
|
+
return @on_enter
|
494
|
+
end
|
495
|
+
|
496
|
+
def on_exit( action = nil, &b )
|
497
|
+
if block_given? then @on_exit = b
|
498
|
+
elsif action then @on_exit = action.to_proc
|
499
|
+
end
|
500
|
+
return @on_exit
|
501
|
+
end
|
502
|
+
|
503
|
+
def on_failure( action = nil, &b )
|
504
|
+
if block_given? then @on_failure = b
|
505
|
+
elsif action then @on_failure = action.to_proc
|
506
|
+
end
|
507
|
+
return @on_failure
|
508
|
+
end
|
509
|
+
|
510
|
+
attr_accessor :name, :lexer
|
511
|
+
|
512
|
+
def initialize( lexer_class, name, options = {}, &body )
|
513
|
+
@lexer = lexer_class
|
514
|
+
@name = name.to_sym
|
515
|
+
@rules = []
|
516
|
+
fail_action = [ :go_to, :push, :pop ].find do |action|
|
517
|
+
options[ action ]
|
518
|
+
end
|
519
|
+
@on_failure = fail_action && lambda do
|
520
|
+
make_transition( fail_action, options[ fail_action ] )
|
521
|
+
end
|
522
|
+
@on_enter = nil
|
523
|
+
@on_exit = nil
|
524
|
+
block_given? and specify( &body )
|
525
|
+
end
|
526
|
+
|
527
|
+
def initialize_copy( orig )
|
528
|
+
@lexer = nil
|
529
|
+
@rules = orig.rules.map { |rule| rule.clone }
|
530
|
+
end
|
531
|
+
|
532
|
+
alias specify instance_eval
|
533
|
+
|
534
|
+
def include( state_name )
|
535
|
+
state = @lexer.state( state_name )
|
536
|
+
@rules.concat( state.rules )
|
537
|
+
@on_failure ||= state.on_failure
|
538
|
+
@on_exit ||= state.on_exit
|
539
|
+
@on_enter ||= state.on_enter
|
540
|
+
end
|
541
|
+
|
542
|
+
end
|
543
|
+
|
544
|
+
class StatefulLexer < Lexer
|
545
|
+
MAX_RETRIES = 5
|
546
|
+
extend CommonDSL
|
547
|
+
extend StatefulDSL
|
548
|
+
|
549
|
+
attr_reader :state, :start_state
|
550
|
+
def initialize( text, options = {} )
|
551
|
+
super
|
552
|
+
@states = self.class.states
|
553
|
+
@state_stack = []
|
554
|
+
if name = ( options[ :initial_state ] || options[ :state ] )
|
555
|
+
@state = @states[ name ]
|
556
|
+
elsif state = self.class.initial_state
|
557
|
+
@state = state
|
558
|
+
else
|
559
|
+
@state = self.class.global_state
|
560
|
+
end
|
561
|
+
@start_state = @state
|
562
|
+
end
|
563
|
+
|
564
|
+
def rules
|
565
|
+
@state.rules # or raise("state %p has no rules" % @state)
|
566
|
+
end
|
567
|
+
|
568
|
+
def reset
|
569
|
+
super
|
570
|
+
@state = @start_state
|
571
|
+
end
|
572
|
+
|
573
|
+
def make_transition( type, target )
|
574
|
+
debug { "state transition -- #{ @state.name } -> #{ type } #{ target }" }
|
575
|
+
case type
|
576
|
+
when :go_to
|
577
|
+
action = @state.on_exit and instance_eval( &action )
|
578
|
+
@state = fetch_state( target )
|
579
|
+
action = @state.on_enter and instance_eval( &action )
|
580
|
+
when :push
|
581
|
+
action = @state.on_exit and instance_eval( &action )
|
582
|
+
@state_stack.push( @state )
|
583
|
+
@state = fetch_state( target )
|
584
|
+
action = @state.on_enter and instance_eval( &action )
|
585
|
+
when :pop
|
586
|
+
action = @state.on_exit and instance_eval( &action )
|
587
|
+
@state = @state_stack.pop || @start_state
|
588
|
+
action = @state.on_enter and instance_eval( &action )
|
589
|
+
end
|
590
|
+
end
|
591
|
+
|
592
|
+
|
593
|
+
def fetch_state( name )
|
594
|
+
@states.fetch( name ) do
|
595
|
+
# TODO: make this more informative
|
596
|
+
raise( "this lexer has no state named %p" % name )
|
597
|
+
end
|
598
|
+
end
|
599
|
+
|
600
|
+
def match
|
601
|
+
@scanner.eos? and return nil
|
602
|
+
MAX_RETRIES.times do
|
603
|
+
for rule in @state.rules
|
604
|
+
if text = rule.match( @scanner )
|
605
|
+
matched!( rule, text )
|
606
|
+
return true
|
607
|
+
end
|
608
|
+
end
|
609
|
+
match_failed!
|
610
|
+
end
|
611
|
+
return false
|
612
|
+
end
|
613
|
+
|
614
|
+
def matched!( rule, text )
|
615
|
+
token = emit_token( rule.name, text, rule.channel )
|
616
|
+
make_transition( rule.transition, rule.target_state )
|
617
|
+
action = rule.action and instance_exec( token, &action )
|
618
|
+
end
|
619
|
+
|
620
|
+
def match_failed!
|
621
|
+
if action = @state.on_failure
|
622
|
+
instance_eval( &action )
|
623
|
+
else
|
624
|
+
error = LexerError.new( self )
|
625
|
+
error.set_backtrace( caller )
|
626
|
+
raise( error )
|
627
|
+
end
|
628
|
+
end
|
629
|
+
private :make_transition, :matched!, :match_failed!
|
630
|
+
|
631
|
+
end
|
632
|
+
end
|
633
|
+
end
|