lrama 0.5.8 → 0.5.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +6 -1
- data/.gitignore +7 -4
- data/Gemfile +10 -6
- data/README.md +3 -3
- data/Rakefile +15 -7
- data/Steepfile +15 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/context.rb +1 -3
- data/lib/lrama/counterexamples/path.rb +0 -46
- data/lib/lrama/counterexamples/production_path.rb +17 -0
- data/lib/lrama/counterexamples/start_path.rb +21 -0
- data/lib/lrama/counterexamples/transition_path.rb +17 -0
- data/lib/lrama/counterexamples.rb +3 -0
- data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
- data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
- data/lib/lrama/grammar/code/printer_code.rb +34 -0
- data/lib/lrama/grammar/code/rule_action.rb +62 -0
- data/lib/lrama/grammar/code.rb +9 -93
- data/lib/lrama/grammar/counter.rb +15 -0
- data/lib/lrama/grammar/error_token.rb +3 -3
- data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +20 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +20 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +20 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +27 -0
- data/lib/lrama/grammar/parameterizing_rules/builder.rb +43 -0
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/printer.rb +3 -3
- data/lib/lrama/grammar/reference.rb +7 -16
- data/lib/lrama/grammar/rule.rb +18 -2
- data/lib/lrama/grammar/rule_builder.rb +179 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +132 -302
- data/lib/lrama/lexer/location.rb +22 -0
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +34 -0
- data/lib/lrama/lexer/token/tag.rb +12 -0
- data/lib/lrama/lexer/token/user_code.rb +64 -0
- data/lib/lrama/lexer/token.rb +23 -63
- data/lib/lrama/lexer.rb +38 -37
- data/lib/lrama/option_parser.rb +2 -1
- data/lib/lrama/options.rb +2 -2
- data/lib/lrama/output.rb +11 -2
- data/lib/lrama/parser.rb +607 -488
- data/lib/lrama/report/profile.rb +1 -12
- data/lib/lrama/version.rb +1 -1
- data/parser.y +177 -96
- data/rbs_collection.lock.yaml +17 -1
- data/rbs_collection.yaml +1 -0
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/code/printer_code.rbs +15 -0
- data/sig/lrama/grammar/code.rbs +24 -0
- data/sig/lrama/grammar/counter.rbs +11 -0
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +10 -0
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/precedence.rbs +11 -0
- data/sig/lrama/grammar/printer.rbs +11 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar/rule.rbs +13 -0
- data/sig/lrama/grammar/rule_builder.rbs +41 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/location.rbs +14 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +15 -0
- data/sig/lrama/lexer/token/tag.rbs +9 -0
- data/sig/lrama/lexer/token/user_code.rbs +16 -0
- data/sig/lrama/lexer/token.rbs +22 -0
- data/sig/stdlib/strscan/string_scanner.rbs +5 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +5 -2
- metadata +44 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
@@ -0,0 +1,64 @@
|
|
1
|
+
require "strscan"
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
class Lexer
|
5
|
+
class Token
|
6
|
+
class UserCode < Token
|
7
|
+
def references
|
8
|
+
@references ||= _references
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def _references
|
14
|
+
scanner = StringScanner.new(s_value)
|
15
|
+
references = []
|
16
|
+
|
17
|
+
while !scanner.eos? do
|
18
|
+
case
|
19
|
+
when reference = scan_reference(scanner)
|
20
|
+
references << reference
|
21
|
+
when scanner.scan(/\/\*/)
|
22
|
+
scanner.scan_until(/\*\//)
|
23
|
+
else
|
24
|
+
scanner.getch
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
references
|
29
|
+
end
|
30
|
+
|
31
|
+
def scan_reference(scanner)
|
32
|
+
start = scanner.pos
|
33
|
+
case
|
34
|
+
# $ references
|
35
|
+
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
36
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
37
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
38
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
39
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
40
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
41
|
+
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
42
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
43
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
44
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
45
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
46
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
47
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
48
|
+
|
49
|
+
# @ references
|
50
|
+
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
51
|
+
when scanner.scan(/@\$/) # @$
|
52
|
+
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
|
53
|
+
when scanner.scan(/@(\d+)/) # @1
|
54
|
+
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
|
55
|
+
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
56
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
57
|
+
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
58
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/lrama/lexer/token.rb
CHANGED
@@ -1,84 +1,44 @@
|
|
1
|
-
require 'lrama/lexer/token/type'
|
2
|
-
|
3
1
|
module Lrama
|
4
2
|
class Lexer
|
5
|
-
class Token
|
3
|
+
class Token < Struct.new(:s_value, :alias_name, :location, keyword_init: true)
|
6
4
|
|
7
|
-
attr_accessor :
|
8
|
-
# For User_code
|
9
|
-
attr_accessor :references
|
5
|
+
attr_accessor :referred
|
10
6
|
|
11
7
|
def to_s
|
12
|
-
"#{super}
|
8
|
+
"#{super} location: #{location}"
|
13
9
|
end
|
14
10
|
|
15
11
|
def referred_by?(string)
|
16
|
-
[self.s_value, self.
|
12
|
+
[self.s_value, self.alias_name].compact.include?(string)
|
17
13
|
end
|
18
14
|
|
19
15
|
def ==(other)
|
20
|
-
self.class == other.class && self.
|
16
|
+
self.class == other.class && self.s_value == other.s_value
|
21
17
|
end
|
22
18
|
|
23
|
-
def
|
24
|
-
|
25
|
-
ref_name = ref[1]
|
26
|
-
if ref_name.is_a?(::String) && ref_name != '$'
|
27
|
-
value =
|
28
|
-
if lhs.referred_by?(ref_name)
|
29
|
-
'$'
|
30
|
-
else
|
31
|
-
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
32
|
-
|
33
|
-
if index
|
34
|
-
index + 1
|
35
|
-
else
|
36
|
-
raise "'#{ref_name}' is invalid name."
|
37
|
-
end
|
38
|
-
end
|
39
|
-
[ref[0], value, ref[2], ref[3], ref[4]]
|
40
|
-
else
|
41
|
-
ref
|
42
|
-
end
|
43
|
-
}
|
19
|
+
def first_line
|
20
|
+
location.first_line
|
44
21
|
end
|
22
|
+
alias :line :first_line
|
45
23
|
|
46
|
-
|
47
|
-
|
24
|
+
def first_column
|
25
|
+
location.first_column
|
26
|
+
end
|
27
|
+
alias :column :first_column
|
48
28
|
|
49
|
-
def
|
50
|
-
|
51
|
-
const_set(name, type)
|
52
|
-
@types << type
|
53
|
-
@i += 1
|
29
|
+
def last_line
|
30
|
+
location.last_line
|
54
31
|
end
|
55
32
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
define_type(:P_printer) # %printer
|
60
|
-
define_type(:P_error_token) # %error-token
|
61
|
-
define_type(:P_lex_param) # %lex-param
|
62
|
-
define_type(:P_parse_param) # %parse-param
|
63
|
-
define_type(:P_initial_action) # %initial-action
|
64
|
-
define_type(:P_union) # %union
|
65
|
-
define_type(:P_token) # %token
|
66
|
-
define_type(:P_type) # %type
|
67
|
-
define_type(:P_nonassoc) # %nonassoc
|
68
|
-
define_type(:P_left) # %left
|
69
|
-
define_type(:P_right) # %right
|
70
|
-
define_type(:P_precedence) # %precedence
|
71
|
-
define_type(:P_prec) # %prec
|
72
|
-
define_type(:User_code) # { ... }
|
73
|
-
define_type(:Tag) # <int>
|
74
|
-
define_type(:Number) # 0
|
75
|
-
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
76
|
-
define_type(:Ident) # api.pure, tNUMBER
|
77
|
-
define_type(:Named_Ref) # [foo]
|
78
|
-
define_type(:Semicolon) # ;
|
79
|
-
define_type(:Bar) # |
|
80
|
-
define_type(:String) # "str"
|
81
|
-
define_type(:Char) # '+'
|
33
|
+
def last_column
|
34
|
+
location.last_column
|
35
|
+
end
|
82
36
|
end
|
83
37
|
end
|
84
38
|
end
|
39
|
+
|
40
|
+
require 'lrama/lexer/token/char'
|
41
|
+
require 'lrama/lexer/token/ident'
|
42
|
+
require 'lrama/lexer/token/parameterizing'
|
43
|
+
require 'lrama/lexer/token/tag'
|
44
|
+
require 'lrama/lexer/token/user_code'
|
data/lib/lrama/lexer.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
require "strscan"
|
2
|
+
require "lrama/lexer/location"
|
2
3
|
require "lrama/lexer/token"
|
3
4
|
|
4
5
|
module Lrama
|
5
6
|
class Lexer
|
7
|
+
attr_reader :head_line, :head_column
|
6
8
|
attr_accessor :status
|
7
9
|
attr_accessor :end_symbol
|
8
10
|
|
9
|
-
SYMBOLS = %
|
11
|
+
SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
|
10
12
|
PERCENT_TOKENS = %w(
|
11
13
|
%union
|
12
14
|
%token
|
@@ -24,12 +26,14 @@ module Lrama
|
|
24
26
|
%precedence
|
25
27
|
%prec
|
26
28
|
%error-token
|
29
|
+
%empty
|
30
|
+
%code
|
27
31
|
)
|
28
32
|
|
29
33
|
def initialize(text)
|
30
34
|
@scanner = StringScanner.new(text)
|
31
|
-
@head = @scanner.pos
|
32
|
-
@line = 1
|
35
|
+
@head_column = @head = @scanner.pos
|
36
|
+
@head_line = @line = 1
|
33
37
|
@status = :initial
|
34
38
|
@end_symbol = nil
|
35
39
|
end
|
@@ -51,6 +55,13 @@ module Lrama
|
|
51
55
|
@scanner.pos - @head
|
52
56
|
end
|
53
57
|
|
58
|
+
def location
|
59
|
+
Location.new(
|
60
|
+
first_line: @head_line, first_column: @head_column,
|
61
|
+
last_line: @line, last_column: column
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
54
65
|
def lex_token
|
55
66
|
while !@scanner.eos? do
|
56
67
|
case
|
@@ -60,11 +71,8 @@ module Lrama
|
|
60
71
|
# noop
|
61
72
|
when @scanner.scan(/\/\*/)
|
62
73
|
lex_comment
|
63
|
-
when @scanner.scan(
|
64
|
-
@scanner
|
65
|
-
newline
|
66
|
-
when @scanner.scan(/%empty/)
|
67
|
-
# noop
|
74
|
+
when @scanner.scan(/\/\/.*(?<newline>\n)?/)
|
75
|
+
newline if @scanner[:newline]
|
68
76
|
else
|
69
77
|
break
|
70
78
|
end
|
@@ -80,18 +88,20 @@ module Lrama
|
|
80
88
|
return [@scanner.matched, @scanner.matched]
|
81
89
|
when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
|
82
90
|
return [@scanner.matched, @scanner.matched]
|
91
|
+
when @scanner.scan(/[\?\+\*]/)
|
92
|
+
return [@scanner.matched, @scanner.matched]
|
83
93
|
when @scanner.scan(/<\w+>/)
|
84
|
-
return [:TAG,
|
94
|
+
return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)]
|
85
95
|
when @scanner.scan(/'.'/)
|
86
|
-
return [:CHARACTER,
|
96
|
+
return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
|
87
97
|
when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
|
88
|
-
return [:CHARACTER,
|
89
|
-
when @scanner.scan(/"/)
|
90
|
-
return [:STRING, %Q(
|
98
|
+
return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
|
99
|
+
when @scanner.scan(/".*?"/)
|
100
|
+
return [:STRING, %Q(#{@scanner.matched})]
|
91
101
|
when @scanner.scan(/\d+/)
|
92
102
|
return [:INTEGER, Integer(@scanner.matched)]
|
93
103
|
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
|
94
|
-
token =
|
104
|
+
token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location)
|
95
105
|
type =
|
96
106
|
if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
|
97
107
|
:IDENT_COLON
|
@@ -100,7 +110,7 @@ module Lrama
|
|
100
110
|
end
|
101
111
|
return [type, token]
|
102
112
|
else
|
103
|
-
raise
|
113
|
+
raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}."
|
104
114
|
end
|
105
115
|
end
|
106
116
|
|
@@ -115,28 +125,30 @@ module Lrama
|
|
115
125
|
when @scanner.scan(/}/)
|
116
126
|
if nested == 0 && @end_symbol == '}'
|
117
127
|
@scanner.unscan
|
118
|
-
return [:C_DECLARATION,
|
128
|
+
return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
|
119
129
|
else
|
120
130
|
code += @scanner.matched
|
121
131
|
nested -= 1
|
122
132
|
end
|
123
133
|
when @scanner.check(/#{@end_symbol}/)
|
124
|
-
return [:C_DECLARATION,
|
134
|
+
return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
|
125
135
|
when @scanner.scan(/\n/)
|
126
136
|
code += @scanner.matched
|
127
137
|
newline
|
128
|
-
when @scanner.scan(/"/)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
matched = @scanner.scan_until(/'/)
|
134
|
-
code += %Q('#{matched})
|
138
|
+
when @scanner.scan(/".*?"/)
|
139
|
+
code += %Q(#{@scanner.matched})
|
140
|
+
@line += @scanner.matched.count("\n")
|
141
|
+
when @scanner.scan(/'.*?'/)
|
142
|
+
code += %Q(#{@scanner.matched})
|
135
143
|
else
|
136
|
-
|
144
|
+
if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
|
145
|
+
code += @scanner.matched
|
146
|
+
else
|
147
|
+
code += @scanner.getch
|
148
|
+
end
|
137
149
|
end
|
138
150
|
end
|
139
|
-
raise
|
151
|
+
raise ParseError, "Unexpected code: #{code}."
|
140
152
|
end
|
141
153
|
|
142
154
|
private
|
@@ -155,17 +167,6 @@ module Lrama
|
|
155
167
|
end
|
156
168
|
end
|
157
169
|
|
158
|
-
def build_token(type:, s_value:, **options)
|
159
|
-
token = Token.new(type: type, s_value: s_value)
|
160
|
-
token.line = @head_line
|
161
|
-
token.column = @head_column
|
162
|
-
options.each do |attr, value|
|
163
|
-
token.public_send("#{attr}=", value)
|
164
|
-
end
|
165
|
-
|
166
|
-
token
|
167
|
-
end
|
168
|
-
|
169
170
|
def newline
|
170
171
|
@line += 1
|
171
172
|
@head = @scanner.pos + 1
|
data/lib/lrama/option_parser.rb
CHANGED
@@ -58,6 +58,7 @@ module Lrama
|
|
58
58
|
o.separator 'Tuning the Parser:'
|
59
59
|
o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
|
60
60
|
o.on('-t', 'reserved, do nothing') { }
|
61
|
+
o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true }
|
61
62
|
o.separator ''
|
62
63
|
o.separator 'Output:'
|
63
64
|
o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
|
@@ -108,7 +109,7 @@ module Lrama
|
|
108
109
|
def validate_trace(trace)
|
109
110
|
list = %w[
|
110
111
|
none locations scan parse automaton bitsets
|
111
|
-
closure grammar resource sets muscles tools
|
112
|
+
closure grammar rules resource sets muscles tools
|
112
113
|
m4-early m4 skeleton time ielr cex all
|
113
114
|
]
|
114
115
|
h = {}
|
data/lib/lrama/options.rb
CHANGED
@@ -4,7 +4,8 @@ module Lrama
|
|
4
4
|
attr_accessor :skeleton, :header, :header_file,
|
5
5
|
:report_file, :outfile,
|
6
6
|
:error_recovery, :grammar_file,
|
7
|
-
:
|
7
|
+
:trace_opts, :report_opts, :y,
|
8
|
+
:debug
|
8
9
|
|
9
10
|
def initialize
|
10
11
|
@skeleton = "bison/yacc.c"
|
@@ -14,7 +15,6 @@ module Lrama
|
|
14
15
|
@outfile = "y.tab.c"
|
15
16
|
@error_recovery = false
|
16
17
|
@grammar_file = nil
|
17
|
-
@report_file = nil
|
18
18
|
@trace_opts = nil
|
19
19
|
@report_opts = nil
|
20
20
|
@y = STDIN
|
data/lib/lrama/output.rb
CHANGED
@@ -186,9 +186,9 @@ module Lrama
|
|
186
186
|
str = ""
|
187
187
|
|
188
188
|
@context.states.rules.each do |rule|
|
189
|
-
next unless rule.
|
189
|
+
next unless rule.token_code
|
190
190
|
|
191
|
-
code = rule.
|
191
|
+
code = rule.token_code
|
192
192
|
spaces = " " * (code.column - 1)
|
193
193
|
|
194
194
|
str << <<-STR
|
@@ -349,6 +349,15 @@ module Lrama
|
|
349
349
|
end
|
350
350
|
end
|
351
351
|
|
352
|
+
# b4_percent_code_get
|
353
|
+
def percent_code(name)
|
354
|
+
@grammar.percent_codes.select do |percent_code|
|
355
|
+
percent_code.id.s_value == name
|
356
|
+
end.map do |percent_code|
|
357
|
+
percent_code.code.s_value
|
358
|
+
end.join
|
359
|
+
end
|
360
|
+
|
352
361
|
private
|
353
362
|
|
354
363
|
def eval_template(file, path)
|