jejune 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gemtest +0 -0
- data/History.txt +12 -0
- data/Manifest.txt +39 -0
- data/README.txt +55 -0
- data/Rakefile +75 -0
- data/bin/jjs +173 -0
- data/lib/jejune.rb +78 -0
- data/lib/jejune/boot.rb +107 -0
- data/lib/jejune/constants.rb +105 -0
- data/lib/jejune/data-extension.rb +144 -0
- data/lib/jejune/dependency-scanner.rb +69 -0
- data/lib/jejune/ejjs.rb +178 -0
- data/lib/jejune/errors.rb +53 -0
- data/lib/jejune/grammar.rb +32 -0
- data/lib/jejune/grammar/JavaScript.g +668 -0
- data/lib/jejune/grammar/Jejune.g +1029 -0
- data/lib/jejune/grammar/Jejune.tokens +241 -0
- data/lib/jejune/grammar/lexer.rb +6504 -0
- data/lib/jejune/grammar/parser.rb +17378 -0
- data/lib/jejune/grammar/rakefile +29 -0
- data/lib/jejune/grammar/tree.rb +6737 -0
- data/lib/jejune/input.rb +124 -0
- data/lib/jejune/jstring.rb +163 -0
- data/lib/jejune/lo-fi-lexer.rb +633 -0
- data/lib/jejune/macro.rb +78 -0
- data/lib/jejune/main.rb +289 -0
- data/lib/jejune/manager.rb +333 -0
- data/lib/jejune/node-test.rb +71 -0
- data/lib/jejune/parameters.rb +83 -0
- data/lib/jejune/rewrite-debug.rb +61 -0
- data/lib/jejune/rewrite.rb +125 -0
- data/lib/jejune/scanner.rb +201 -0
- data/lib/jejune/translator.rb +710 -0
- data/lib/jejune/tree-walker.rb +81 -0
- data/lib/jejune/utils.rb +81 -0
- data/lib/jejune/version.rb +38 -0
- data/spec/samples.txt +51 -0
- data/spec/translation.rb +69 -0
- data/spec/utils.rb +63 -0
- data/tools/env.fish +2 -0
- metadata +147 -0
data/lib/jejune/input.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2010-2011 Kyle C. Yetter
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
module Jejune
|
27
|
+
module JJSInput
|
28
|
+
include Constants
|
29
|
+
include Utils
|
30
|
+
|
31
|
+
attr_reader :manager, :path, :cache_file, :dependencies, :macros
|
32
|
+
attr_accessor :syntax_errors
|
33
|
+
attr_writer :tree, :adaptor, :tokens
|
34
|
+
|
35
|
+
def initialize( source, options = {} )
|
36
|
+
super
|
37
|
+
@manager = options.fetch( :manager ) { Manager.new( self, options ) }
|
38
|
+
@cache_file = options[ :cache_file ]
|
39
|
+
@dependencies = Set.new
|
40
|
+
@path = @name
|
41
|
+
@syntax_errors = 0
|
42
|
+
@macros = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
def directory
|
46
|
+
@path ? File.dirname( @path ) : '.'
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_relative( name, exts = 'jjs|js|' )
|
50
|
+
find_in_directory( directory, name, exts )
|
51
|
+
end
|
52
|
+
|
53
|
+
def tokens
|
54
|
+
@tokens ||= RewriteStream.new( Lexer.new( self ) )
|
55
|
+
end
|
56
|
+
|
57
|
+
def adaptor
|
58
|
+
@adaptor ||= RewriteAdaptor.new( tokens )
|
59
|
+
end
|
60
|
+
|
61
|
+
def tree
|
62
|
+
@tree ||= begin
|
63
|
+
parser = Parser.new( tokens, :adaptor => adaptor )
|
64
|
+
tree = parser.program.tree
|
65
|
+
@syntax_errors = parser.state.syntax_errors
|
66
|
+
tree
|
67
|
+
#$stderr.puts( @path )
|
68
|
+
#$stderr.puts( parser.state.syntax_errors )
|
69
|
+
#Parser.new( tokens, :adaptor => adaptor ).program.tree
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def translate()
|
74
|
+
@manager.translate( self )
|
75
|
+
end
|
76
|
+
|
77
|
+
def snippet( source, options = {} )
|
78
|
+
options = {
|
79
|
+
:manager => @manager,
|
80
|
+
:file => @path
|
81
|
+
}.update( options )
|
82
|
+
|
83
|
+
if source.is_a?( Token )
|
84
|
+
options[ :line ] ||= source.line
|
85
|
+
options[ :column ] ||= source.column
|
86
|
+
source = source.text
|
87
|
+
end
|
88
|
+
|
89
|
+
JJSSource.new( source, options )
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_s
|
93
|
+
translate
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class JJSSource < StringStream
|
99
|
+
include JJSInput
|
100
|
+
end
|
101
|
+
|
102
|
+
class JJSFile < FileStream
|
103
|
+
include JJSInput
|
104
|
+
|
105
|
+
def translate
|
106
|
+
output = super
|
107
|
+
@manager.commit_dependencies( @path, @dependencies )
|
108
|
+
|
109
|
+
if @cache_file and @syntax_errors.zero?
|
110
|
+
open( @cache_file, 'w' ) do | out |
|
111
|
+
out.write(
|
112
|
+
Marshal.dump(
|
113
|
+
:output => output,
|
114
|
+
:dependencies => @dependencies,
|
115
|
+
:macros => macros
|
116
|
+
)
|
117
|
+
)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
return output
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2010-2011 Kyle C. Yetter
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
module Jejune
|
27
|
+
module JString
|
28
|
+
extend( self )
|
29
|
+
|
30
|
+
@tab_width = 2
|
31
|
+
attr_accessor :tab_width
|
32
|
+
|
33
|
+
def utf8( *chars )
|
34
|
+
chars.pack( 'U*' )
|
35
|
+
end
|
36
|
+
|
37
|
+
def collapse( str )
|
38
|
+
str.to_s.gsub( /(\\*)\n/ ) { $1.length.even? ? $1 << '\n' : $1[ 0 ... -1 ] }
|
39
|
+
end
|
40
|
+
|
41
|
+
def quote( str, quote = nil )
|
42
|
+
if quote
|
43
|
+
qrx = /(\\*)#{ Regexp.escape( quote ) }/
|
44
|
+
str = str.to_s.gsub( qrx ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << quote }
|
45
|
+
str.insert( -1, quote ).insert( 0, quote )
|
46
|
+
else
|
47
|
+
# assume double quote
|
48
|
+
str = str.to_s.gsub( /(\\*)"/ ) { ( $1.length % 2 == 0 ? $1 << '\\': $1 ) << ?" }
|
49
|
+
str.insert( -1, '"' ).insert( 0, '"' )
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def outdent( str, tab_width = @tab_width || 2 )
|
54
|
+
lines = str.to_s.split( /\r?\n/, -1 )
|
55
|
+
if lines.length > 1
|
56
|
+
# for strings like
|
57
|
+
# xyz = %( <- first line = "\n"
|
58
|
+
# blah blah blah
|
59
|
+
# ) <- last line = ""
|
60
|
+
# the output will be "blah blah blah"
|
61
|
+
lines.first.empty? and lines.shift # trash the first empty line
|
62
|
+
lines.last =~ /^\s+$/ and lines.pop # trash the last empty line
|
63
|
+
|
64
|
+
# for strings that specify a margin like
|
65
|
+
# xyz =
|
66
|
+
# %(
|
67
|
+
# | BLAH
|
68
|
+
# |
|
69
|
+
# | blah blah
|
70
|
+
# )
|
71
|
+
if lines.all? { | l | l =~ /^\s*\|\s?/ }
|
72
|
+
for line in lines do line.gsub!( /^\s*\|\s?/, '' ) end
|
73
|
+
else
|
74
|
+
indent = lines.grep( /^(\s+)\S/ ) { expand_tabs( $1, tab_width ).length }.min || 0
|
75
|
+
if indent > 0
|
76
|
+
for line in lines
|
77
|
+
line.sub!( /^\s+/ ) do | space |
|
78
|
+
space[ indent, space.length ]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
return lines.join( $/ )
|
86
|
+
end
|
87
|
+
|
88
|
+
def jstring( content, single_quote = nil )
|
89
|
+
str = string_value( content, single_quote ).to_json
|
90
|
+
str.gsub!( %r[(\\*)/] ) do
|
91
|
+
slashes = $1
|
92
|
+
( slashes.length.odd? ? slashes[ 0...-1 ] : slashes ) << '/'
|
93
|
+
end
|
94
|
+
return str
|
95
|
+
end
|
96
|
+
|
97
|
+
def split_words( str )
|
98
|
+
str.to_s.
|
99
|
+
scan( /(?:[^\s\\]|\\.)+/m ).
|
100
|
+
map! { | w | w.gsub!( /\\(\s)|(\\.)/m ) { $+ } or w }
|
101
|
+
end
|
102
|
+
|
103
|
+
def expand_tabs( str, tab_width = @tab_width || 2 )
|
104
|
+
str = str.to_s.dup
|
105
|
+
tab_width = tab_width.to_i
|
106
|
+
raise ArgumentError, "tab width must be >= 0, but expand_tabs called with `#{ tab_width }'" if tab_width < 0
|
107
|
+
|
108
|
+
case tab_width
|
109
|
+
when 0
|
110
|
+
when 1
|
111
|
+
str.gsub!( /\t/, " " )
|
112
|
+
else
|
113
|
+
while
|
114
|
+
str.gsub!( /^([^\t\n]*)(\t+)/ ) {
|
115
|
+
leading = $1.length % tab_width
|
116
|
+
length = ( tab_width * $2.length - leading )
|
117
|
+
$1 << ( ' ' * length )
|
118
|
+
}
|
119
|
+
# do nothing
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
return str
|
124
|
+
end
|
125
|
+
|
126
|
+
DOUBLE_ESC = [ "0abefnrstv", "\0\a\b\e\f\n\r\s\t\v" ]
|
127
|
+
ESC_RX = %r(
|
128
|
+
\\
|
129
|
+
(?:
|
130
|
+
( x [A-Fa-f0-9]{1,2}
|
131
|
+
| 0{3}
|
132
|
+
)
|
133
|
+
| u ( [A-Fa-f0-9]{4} )
|
134
|
+
| \r? \n
|
135
|
+
| ( . )
|
136
|
+
)
|
137
|
+
)x
|
138
|
+
|
139
|
+
def string_value( str, single = false )
|
140
|
+
str = str.to_s
|
141
|
+
if single
|
142
|
+
str.to_s.gsub %r<\\(.)> do
|
143
|
+
case $1
|
144
|
+
when '\\', "'" then $1
|
145
|
+
else $&
|
146
|
+
end
|
147
|
+
end
|
148
|
+
else
|
149
|
+
# TODO: handle control/meta sequences \cx \C-x \M-x \M-\C-x
|
150
|
+
str.to_s.gsub ESC_RX do
|
151
|
+
if x = $1
|
152
|
+
i = Integer( '0' << x )
|
153
|
+
i > 128 ? [ i ].pack( 'U' ) : i.chr
|
154
|
+
elsif u = $2
|
155
|
+
[ u.to_i( 16 ) ].pack( 'U' )
|
156
|
+
elsif c = $3
|
157
|
+
c.tr!( *DOUBLE_ESC ) or c
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,633 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2010-2011 Kyle C. Yetter
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require 'strscan'
|
27
|
+
|
28
|
+
class Regexp
|
29
|
+
def self.literal( string, options = 0 )
|
30
|
+
Regexp.new( Regexp.escape( string.to_s ), options )
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module Jejune
|
35
|
+
module LoFiLexer
|
36
|
+
class LexerError < StandardError
|
37
|
+
attr_reader :lexer
|
38
|
+
|
39
|
+
def initialize( lexer )
|
40
|
+
@lexer = lexer
|
41
|
+
location = @lexer.location
|
42
|
+
glimpse = @lexer.scanner.string[ location.position, 13 ] + '...'
|
43
|
+
message = (
|
44
|
+
"unable to match any lexical rule of %s " % @lexer.class.name <<
|
45
|
+
"for %s => %p" % [ location, glimpse ]
|
46
|
+
)
|
47
|
+
super( message )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Location
|
52
|
+
MEMBERS = %w(file position line column)
|
53
|
+
|
54
|
+
def self.[]( *args )
|
55
|
+
new( *args )
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_accessor( *MEMBERS )
|
59
|
+
|
60
|
+
def initialize( file = nil, position = nil, line = nil, column = nil )
|
61
|
+
@file = file
|
62
|
+
@position = position
|
63
|
+
@line = line
|
64
|
+
@column = column
|
65
|
+
end
|
66
|
+
|
67
|
+
def <<( text )
|
68
|
+
length = text.length
|
69
|
+
line_count = text.count( "\n" )
|
70
|
+
line_count.zero? ? ( @column += length ) :
|
71
|
+
( @column = length - text.rindex( "\n" ) - 1 )
|
72
|
+
@line += line_count
|
73
|
+
@position += length
|
74
|
+
return self
|
75
|
+
end
|
76
|
+
|
77
|
+
def >>( text )
|
78
|
+
length = text.length
|
79
|
+
line_count = text.count( "\n" )
|
80
|
+
line_count.zero? ? ( @column -= length ) : ( @column = text.index( "\n" ) )
|
81
|
+
@line -= line_count
|
82
|
+
@position -= length
|
83
|
+
return self
|
84
|
+
end
|
85
|
+
|
86
|
+
def +( text )
|
87
|
+
clone << text
|
88
|
+
end
|
89
|
+
|
90
|
+
def -( text )
|
91
|
+
clone >> text
|
92
|
+
end
|
93
|
+
|
94
|
+
def <=>( location )
|
95
|
+
@position <=> location.position
|
96
|
+
end
|
97
|
+
|
98
|
+
include Comparable
|
99
|
+
|
100
|
+
def to_s( long = true )
|
101
|
+
if long
|
102
|
+
'file %s @ (%i:%i)' % [ @file, @line, @column ]
|
103
|
+
else
|
104
|
+
'[%i:%i]' % [ @line, @column ]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
module Locatable
|
110
|
+
for member in Location::MEMBERS
|
111
|
+
class_eval( <<-DELEGATE, __FILE__, __LINE__ )
|
112
|
+
def #{ member }
|
113
|
+
location.#{ member } rescue nil
|
114
|
+
end
|
115
|
+
DELEGATE
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
class Token
|
120
|
+
include Locatable
|
121
|
+
attr_accessor :index, :channel, :type, :text, :location
|
122
|
+
def initialize( index = nil, type = nil, text = nil, location = nil, channel = nil )
|
123
|
+
@index = index
|
124
|
+
@type = type
|
125
|
+
@text = text
|
126
|
+
@location = location
|
127
|
+
@channel = channel
|
128
|
+
block_given? and yield( self )
|
129
|
+
end
|
130
|
+
def inspect
|
131
|
+
'[%i %p(%p) @ %s]' % [ @index, @type, @text, @location.to_s( false ) ]
|
132
|
+
end
|
133
|
+
def to_s
|
134
|
+
text
|
135
|
+
end
|
136
|
+
def after
|
137
|
+
location + text
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class Rule
|
142
|
+
attr_accessor :name, :channel, :action, :transition, :target_state, :text
|
143
|
+
attr_reader :options
|
144
|
+
|
145
|
+
def initialize( name, options = {}, &action )
|
146
|
+
@name = name.to_sym
|
147
|
+
@channel = options[ :channel ] || :default
|
148
|
+
@action = options[ :action ] || action
|
149
|
+
@transition = [ :go_to, :push, :pop, :stay ].find do |type|
|
150
|
+
@target_state = options[ type ]
|
151
|
+
end
|
152
|
+
@transition ||= :stay
|
153
|
+
@text = options.fetch( :text, nil )
|
154
|
+
end
|
155
|
+
|
156
|
+
def match( scanner )
|
157
|
+
raise NotImplementedError, "#match must be implemented by subclasses"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class RegexRule < Rule
|
162
|
+
def self.keyword( *args, &action )
|
163
|
+
options = args.last.is_a?( Hash ) ? options.pop : {}
|
164
|
+
case args.length
|
165
|
+
when 1
|
166
|
+
word = args[ 0 ].to_s
|
167
|
+
name = options[ :name ] || word.to_s.upcase
|
168
|
+
when 2
|
169
|
+
name, word = args
|
170
|
+
word = word.to_s
|
171
|
+
else
|
172
|
+
raise ArgumentError, <<-END.here_flow! % [ options, args ]
|
173
|
+
| bad arguments: need (name, keyword, options = {}, &action) or
|
174
|
+
| (keyword, options = {}, &action) -- got:
|
175
|
+
| options = %p
|
176
|
+
| other arguments = %p
|
177
|
+
END
|
178
|
+
end
|
179
|
+
return new( name, word, options, &action )
|
180
|
+
end
|
181
|
+
|
182
|
+
attr_accessor :pattern
|
183
|
+
def initialize( name, pattern, options = {}, &action )
|
184
|
+
super( name, options, &action )
|
185
|
+
@pattern = pattern.is_a?( Regexp ) ? pattern : Regexp.literal( pattern )
|
186
|
+
end
|
187
|
+
|
188
|
+
def match( scanner )
|
189
|
+
matched = scanner.scan( pattern ) and begin
|
190
|
+
@text ? scanner[ @text ] : matched
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
class DelimiterRule < Rule
|
196
|
+
attr_accessor :open, :close, :escape
|
197
|
+
|
198
|
+
def initialize( name, open, close = {}, options = nil, &action )
|
199
|
+
@open = open
|
200
|
+
if Hash === close
|
201
|
+
@close = open
|
202
|
+
options = close
|
203
|
+
else
|
204
|
+
@close = close
|
205
|
+
options ||= {}
|
206
|
+
end
|
207
|
+
@escape = options.fetch( :escape, '\\' )
|
208
|
+
super( name, options, &action )
|
209
|
+
@escape_regexp = ( Regexp === @escape ) ? @escape : Regexp.new( Regexp.escape( @escape.to_s ) << '.' )
|
210
|
+
@open_regexp = ( Regexp === @open ) ? @open : Regexp.literal( @open )
|
211
|
+
@close_regexp = ( Regexp === @close ) ? @close : Regexp.literal( @close )
|
212
|
+
@content_pause = Regexp.union( @escape_regexp, @close_regexp )
|
213
|
+
end
|
214
|
+
|
215
|
+
def match( scanner )
|
216
|
+
start_position = scanner.pos
|
217
|
+
catch( :nevermind ) do
|
218
|
+
if scanner.scan( @open_regexp )
|
219
|
+
body_start = scanner.pos
|
220
|
+
while true
|
221
|
+
throw( :nevermind ) unless scanner.skip_until( @content_pause )
|
222
|
+
matched = scanner.matched
|
223
|
+
if @close === matched
|
224
|
+
body_end = scanner.pos - matched.length
|
225
|
+
break
|
226
|
+
end
|
227
|
+
end
|
228
|
+
range = @text == :body ? body_start ... body_end : start_position ... scanner.pos
|
229
|
+
return( scanner.string[ range ] )
|
230
|
+
else return false
|
231
|
+
end
|
232
|
+
end
|
233
|
+
scanner.pos = start_position
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
class NestedDelimiterRule < DelimiterRule
|
239
|
+
def initialize( name, open, close, options = {}, &action )
|
240
|
+
super
|
241
|
+
@content_pause = Regexp.union( @escape_regexp, @open_regexp, @close_regexp )
|
242
|
+
end
|
243
|
+
|
244
|
+
def match( scanner )
|
245
|
+
start_position = scanner.pos
|
246
|
+
scanner.scan( @open_regexp ) or return false
|
247
|
+
catch( :nevermind ) do
|
248
|
+
loop do
|
249
|
+
scanner.scan_until( @content_pause ) or throw( :nevermind )
|
250
|
+
case scanner.matched
|
251
|
+
when @escape_regexp then next
|
252
|
+
when @close_regexp then break
|
253
|
+
when @open_regexp
|
254
|
+
# back up over the open delimiter, and recursively invoke the matching procedure
|
255
|
+
scanner.pos -= scanner.matched_size
|
256
|
+
match( scanner ) or throw( :nevermind )
|
257
|
+
else
|
258
|
+
raise( <<-END.here_flow! % [ @name, scanner.matched, @escape_regexp, @open, @close ] )
|
259
|
+
| this shouldn't happen:
|
260
|
+
| rule %p
|
261
|
+
| scanner.matched = %p
|
262
|
+
| @escape_regexp = %p
|
263
|
+
| @open = %p
|
264
|
+
| @close = %p
|
265
|
+
END
|
266
|
+
end
|
267
|
+
end
|
268
|
+
return( scanner.string[ start_position...scanner.pos ] )
|
269
|
+
end
|
270
|
+
scanner.pos = start_position
|
271
|
+
return false
|
272
|
+
end
|
273
|
+
alias :match? :match
|
274
|
+
end
|
275
|
+
|
276
|
+
module CommonDSL
|
277
|
+
attr_accessor :rules
|
278
|
+
def self.extended( klass )
|
279
|
+
klass.instance_variable_set( :@rules, [] )
|
280
|
+
end
|
281
|
+
def register( rule )
|
282
|
+
rules << rule
|
283
|
+
return rule
|
284
|
+
end
|
285
|
+
def rule( name, pattern, options = {}, &action )
|
286
|
+
register RegexRule.new( name, pattern, options, &action )
|
287
|
+
end
|
288
|
+
def delimited( name, open, close = {}, options = nil, &action )
|
289
|
+
register DelimiterRule.new( name, open, close, options, &action )
|
290
|
+
end
|
291
|
+
def nested( name, open, close, options = {}, &action )
|
292
|
+
register NestedDelimiterRule.new( name, open, close, options, &action )
|
293
|
+
end
|
294
|
+
def keyword( *args, &action )
|
295
|
+
register RegexRule.keyword( *args, &action )
|
296
|
+
end
|
297
|
+
def inherited( klass )
|
298
|
+
klass.rules = @rules.clone
|
299
|
+
end
|
300
|
+
protected :rules=
|
301
|
+
private :register, :rule, :delimited, :nested, :keyword, :inherited
|
302
|
+
end
|
303
|
+
|
304
|
+
class Lexer
|
305
|
+
|
306
|
+
attr_reader :scanner, :location, :token
|
307
|
+
attr_accessor :channel
|
308
|
+
|
309
|
+
def initialize( text, options = {} )
|
310
|
+
file_name = options[ :file ] || options[ :file_name ] || '(string)'
|
311
|
+
channel = options[ :channel ] || :default
|
312
|
+
unless location = options[ :location ]
|
313
|
+
position = options[ :position ] || 0
|
314
|
+
line = options[ :line ] || 1
|
315
|
+
column = options[ :column ] || 0
|
316
|
+
location = Location.new( file_name, position, line, column )
|
317
|
+
end
|
318
|
+
|
319
|
+
@scanner = StringScanner.new( text )
|
320
|
+
@channel = channel
|
321
|
+
@starting_line = @location = location
|
322
|
+
@tokens = []
|
323
|
+
@emission_buffer = []
|
324
|
+
@token = nil
|
325
|
+
@debug = options.fetch( :debug, $DEBUG )
|
326
|
+
end
|
327
|
+
|
328
|
+
def source
|
329
|
+
@scanner.string
|
330
|
+
end
|
331
|
+
|
332
|
+
def reset
|
333
|
+
@scanner.pos = 0
|
334
|
+
@location = @starting_line
|
335
|
+
@tokens.clear
|
336
|
+
@token = nil
|
337
|
+
end
|
338
|
+
|
339
|
+
def next( tune = true )
|
340
|
+
channel = tune.is_a?( Symbol ) ? tune : @channel
|
341
|
+
until @emission_buffer.empty?
|
342
|
+
@token = @emission_buffer.shift
|
343
|
+
@tokens << @token
|
344
|
+
return( @token ) unless tune
|
345
|
+
return( @token ) if token.channel == channel
|
346
|
+
end
|
347
|
+
return( match ? self.next( tune ) : nil )
|
348
|
+
end
|
349
|
+
|
350
|
+
def match
|
351
|
+
raise NotImplementedError, "subclasses must implement #match"
|
352
|
+
end
|
353
|
+
|
354
|
+
def emit_token( type, text, channel = :default )
|
355
|
+
index = @tokens.length + @emission_buffer.length
|
356
|
+
token = create_token( index, type, text, @location, channel )
|
357
|
+
@emission_buffer << token
|
358
|
+
@location += text
|
359
|
+
return token
|
360
|
+
end
|
361
|
+
|
362
|
+
def create_token( index, type, text, location, channel )
|
363
|
+
Token.new( index, type, text, location, channel )
|
364
|
+
end
|
365
|
+
|
366
|
+
def each( tune = true )
|
367
|
+
block_given? or return enum_for( :each, tune )
|
368
|
+
if tune
|
369
|
+
channel = tune.is_a?( Symbol ) ? tune : @channel
|
370
|
+
@tokens.each { |token| token.channel == channel and yield( token ) }
|
371
|
+
else
|
372
|
+
@tokens.each { |token| yield( token ) }
|
373
|
+
end
|
374
|
+
while token = self.next( tune )
|
375
|
+
yield( token )
|
376
|
+
end
|
377
|
+
return self
|
378
|
+
end
|
379
|
+
|
380
|
+
include Enumerable
|
381
|
+
|
382
|
+
def lex!( tune = true )
|
383
|
+
if block_given?
|
384
|
+
each( tune ).map do |token|
|
385
|
+
yield( token )
|
386
|
+
end
|
387
|
+
else
|
388
|
+
return each( tune ).map
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
def tokens( tune = true )
|
393
|
+
if tune
|
394
|
+
channel = tune.is_a?( Symbol ) ? tune : @channel
|
395
|
+
@tokens.select { |token| token.channel == channel }
|
396
|
+
else
|
397
|
+
return @tokens
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def []( *args )
|
402
|
+
@tokens[ *args ]
|
403
|
+
end
|
404
|
+
|
405
|
+
def debug
|
406
|
+
if @debug
|
407
|
+
$stderr.puts( "\e[31m#{ self.class }\e[0m lexer debug: \e[36m#{ yield }\e[0m" )
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
end
|
412
|
+
|
413
|
+
class CommonLexer < Lexer
|
414
|
+
extend CommonDSL
|
415
|
+
|
416
|
+
def initialize( text, options = {} )
|
417
|
+
super
|
418
|
+
@rules = self.class.rules
|
419
|
+
end
|
420
|
+
|
421
|
+
def match
|
422
|
+
@scanner.eos? and return nil
|
423
|
+
for rule in @rules
|
424
|
+
if text = rule.match( @scanner )
|
425
|
+
matched!( rule, text )
|
426
|
+
return true
|
427
|
+
end
|
428
|
+
end
|
429
|
+
match_failed!
|
430
|
+
end
|
431
|
+
|
432
|
+
def matched!( rule, text )
|
433
|
+
emit_token( rule.name, text, rule.channel )
|
434
|
+
action = rule.action and instance_eval( &action )
|
435
|
+
end
|
436
|
+
|
437
|
+
def match_failed!
|
438
|
+
error = LexerError.new( self )
|
439
|
+
error.set_backtrace( caller )
|
440
|
+
raise( error )
|
441
|
+
end
|
442
|
+
|
443
|
+
private :match_failed!, :matched!
|
444
|
+
|
445
|
+
end
|
446
|
+
|
447
|
+
module StatefulDSL
|
448
|
+
attr_accessor :states, :initial_state, :global_state
|
449
|
+
|
450
|
+
def self.extended( klass )
|
451
|
+
klass.instance_variable_set( :@states, {} )
|
452
|
+
klass.instance_variable_set( :@global_state, klass.state( :global ) )
|
453
|
+
klass.instance_variable_set( :@initial_state, nil )
|
454
|
+
end
|
455
|
+
|
456
|
+
def state( name, *args, &body )
|
457
|
+
state = @states[ name.to_sym ] ||= State.new( self, name, *args )
|
458
|
+
@initial_state ||= state
|
459
|
+
block_given? and state.specify( &body )
|
460
|
+
return state
|
461
|
+
end
|
462
|
+
|
463
|
+
def start_in( name )
|
464
|
+
@initial_state = state( name )
|
465
|
+
end
|
466
|
+
|
467
|
+
def register( rule )
|
468
|
+
@global_state.rules << rule
|
469
|
+
return rule
|
470
|
+
end
|
471
|
+
|
472
|
+
def inherited( klass )
|
473
|
+
klass.states = @states.inject( {} ) do |h, ( name, state )|
|
474
|
+
state = state.clone
|
475
|
+
state.lexer = klass
|
476
|
+
h[ name ] = state; h
|
477
|
+
end
|
478
|
+
klass.global_state = klass.state( :global )
|
479
|
+
klass.initial_state = ( @initial_state && klass.states[ @initial_state.name ] )
|
480
|
+
end
|
481
|
+
|
482
|
+
protected :states=, :global_state
|
483
|
+
private :register, :inherited, :start_in
|
484
|
+
end
|
485
|
+
|
486
|
+
class State
|
487
|
+
include CommonDSL
|
488
|
+
|
489
|
+
def on_enter( action = nil, &b )
|
490
|
+
if block_given? then @on_enter = b
|
491
|
+
elsif action then @on_enter = action.to_proc
|
492
|
+
end
|
493
|
+
return @on_enter
|
494
|
+
end
|
495
|
+
|
496
|
+
def on_exit( action = nil, &b )
|
497
|
+
if block_given? then @on_exit = b
|
498
|
+
elsif action then @on_exit = action.to_proc
|
499
|
+
end
|
500
|
+
return @on_exit
|
501
|
+
end
|
502
|
+
|
503
|
+
def on_failure( action = nil, &b )
|
504
|
+
if block_given? then @on_failure = b
|
505
|
+
elsif action then @on_failure = action.to_proc
|
506
|
+
end
|
507
|
+
return @on_failure
|
508
|
+
end
|
509
|
+
|
510
|
+
attr_accessor :name, :lexer
|
511
|
+
|
512
|
+
def initialize( lexer_class, name, options = {}, &body )
|
513
|
+
@lexer = lexer_class
|
514
|
+
@name = name.to_sym
|
515
|
+
@rules = []
|
516
|
+
fail_action = [ :go_to, :push, :pop ].find do |action|
|
517
|
+
options[ action ]
|
518
|
+
end
|
519
|
+
@on_failure = fail_action && lambda do
|
520
|
+
make_transition( fail_action, options[ fail_action ] )
|
521
|
+
end
|
522
|
+
@on_enter = nil
|
523
|
+
@on_exit = nil
|
524
|
+
block_given? and specify( &body )
|
525
|
+
end
|
526
|
+
|
527
|
+
def initialize_copy( orig )
|
528
|
+
@lexer = nil
|
529
|
+
@rules = orig.rules.map { |rule| rule.clone }
|
530
|
+
end
|
531
|
+
|
532
|
+
alias specify instance_eval
|
533
|
+
|
534
|
+
def include( state_name )
|
535
|
+
state = @lexer.state( state_name )
|
536
|
+
@rules.concat( state.rules )
|
537
|
+
@on_failure ||= state.on_failure
|
538
|
+
@on_exit ||= state.on_exit
|
539
|
+
@on_enter ||= state.on_enter
|
540
|
+
end
|
541
|
+
|
542
|
+
end
|
543
|
+
|
544
|
+
class StatefulLexer < Lexer
|
545
|
+
MAX_RETRIES = 5
|
546
|
+
extend CommonDSL
|
547
|
+
extend StatefulDSL
|
548
|
+
|
549
|
+
attr_reader :state, :start_state
|
550
|
+
def initialize( text, options = {} )
|
551
|
+
super
|
552
|
+
@states = self.class.states
|
553
|
+
@state_stack = []
|
554
|
+
if name = ( options[ :initial_state ] || options[ :state ] )
|
555
|
+
@state = @states[ name ]
|
556
|
+
elsif state = self.class.initial_state
|
557
|
+
@state = state
|
558
|
+
else
|
559
|
+
@state = self.class.global_state
|
560
|
+
end
|
561
|
+
@start_state = @state
|
562
|
+
end
|
563
|
+
|
564
|
+
def rules
|
565
|
+
@state.rules # or raise("state %p has no rules" % @state)
|
566
|
+
end
|
567
|
+
|
568
|
+
def reset
|
569
|
+
super
|
570
|
+
@state = @start_state
|
571
|
+
end
|
572
|
+
|
573
|
+
def make_transition( type, target )
|
574
|
+
debug { "state transition -- #{ @state.name } -> #{ type } #{ target }" }
|
575
|
+
case type
|
576
|
+
when :go_to
|
577
|
+
action = @state.on_exit and instance_eval( &action )
|
578
|
+
@state = fetch_state( target )
|
579
|
+
action = @state.on_enter and instance_eval( &action )
|
580
|
+
when :push
|
581
|
+
action = @state.on_exit and instance_eval( &action )
|
582
|
+
@state_stack.push( @state )
|
583
|
+
@state = fetch_state( target )
|
584
|
+
action = @state.on_enter and instance_eval( &action )
|
585
|
+
when :pop
|
586
|
+
action = @state.on_exit and instance_eval( &action )
|
587
|
+
@state = @state_stack.pop || @start_state
|
588
|
+
action = @state.on_enter and instance_eval( &action )
|
589
|
+
end
|
590
|
+
end
|
591
|
+
|
592
|
+
|
593
|
+
def fetch_state( name )
|
594
|
+
@states.fetch( name ) do
|
595
|
+
# TODO: make this more informative
|
596
|
+
raise( "this lexer has no state named %p" % name )
|
597
|
+
end
|
598
|
+
end
|
599
|
+
|
600
|
+
def match
|
601
|
+
@scanner.eos? and return nil
|
602
|
+
MAX_RETRIES.times do
|
603
|
+
for rule in @state.rules
|
604
|
+
if text = rule.match( @scanner )
|
605
|
+
matched!( rule, text )
|
606
|
+
return true
|
607
|
+
end
|
608
|
+
end
|
609
|
+
match_failed!
|
610
|
+
end
|
611
|
+
return false
|
612
|
+
end
|
613
|
+
|
614
|
+
def matched!( rule, text )
|
615
|
+
token = emit_token( rule.name, text, rule.channel )
|
616
|
+
make_transition( rule.transition, rule.target_state )
|
617
|
+
action = rule.action and instance_exec( token, &action )
|
618
|
+
end
|
619
|
+
|
620
|
+
def match_failed!
|
621
|
+
if action = @state.on_failure
|
622
|
+
instance_eval( &action )
|
623
|
+
else
|
624
|
+
error = LexerError.new( self )
|
625
|
+
error.set_backtrace( caller )
|
626
|
+
raise( error )
|
627
|
+
end
|
628
|
+
end
|
629
|
+
private :make_transition, :matched!, :match_failed!
|
630
|
+
|
631
|
+
end
|
632
|
+
end
|
633
|
+
end
|