lrama 0.5.8 → 0.5.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +6 -1
  3. data/.gitignore +7 -4
  4. data/Gemfile +10 -6
  5. data/README.md +3 -3
  6. data/Rakefile +15 -7
  7. data/Steepfile +15 -1
  8. data/lib/lrama/command.rb +6 -1
  9. data/lib/lrama/context.rb +1 -3
  10. data/lib/lrama/counterexamples/path.rb +0 -46
  11. data/lib/lrama/counterexamples/production_path.rb +17 -0
  12. data/lib/lrama/counterexamples/start_path.rb +21 -0
  13. data/lib/lrama/counterexamples/transition_path.rb +17 -0
  14. data/lib/lrama/counterexamples.rb +3 -0
  15. data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
  16. data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
  17. data/lib/lrama/grammar/code/printer_code.rb +34 -0
  18. data/lib/lrama/grammar/code/rule_action.rb +62 -0
  19. data/lib/lrama/grammar/code.rb +9 -93
  20. data/lib/lrama/grammar/counter.rb +15 -0
  21. data/lib/lrama/grammar/error_token.rb +3 -3
  22. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +28 -0
  23. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +20 -0
  24. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +20 -0
  25. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +20 -0
  26. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +28 -0
  27. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +27 -0
  28. data/lib/lrama/grammar/parameterizing_rules/builder.rb +43 -0
  29. data/lib/lrama/grammar/percent_code.rb +12 -0
  30. data/lib/lrama/grammar/printer.rb +3 -3
  31. data/lib/lrama/grammar/reference.rb +7 -16
  32. data/lib/lrama/grammar/rule.rb +18 -2
  33. data/lib/lrama/grammar/rule_builder.rb +179 -0
  34. data/lib/lrama/grammar/symbol.rb +2 -2
  35. data/lib/lrama/grammar.rb +132 -302
  36. data/lib/lrama/lexer/location.rb +22 -0
  37. data/lib/lrama/lexer/token/char.rb +8 -0
  38. data/lib/lrama/lexer/token/ident.rb +8 -0
  39. data/lib/lrama/lexer/token/parameterizing.rb +34 -0
  40. data/lib/lrama/lexer/token/tag.rb +12 -0
  41. data/lib/lrama/lexer/token/user_code.rb +64 -0
  42. data/lib/lrama/lexer/token.rb +23 -63
  43. data/lib/lrama/lexer.rb +38 -37
  44. data/lib/lrama/option_parser.rb +2 -1
  45. data/lib/lrama/options.rb +2 -2
  46. data/lib/lrama/output.rb +11 -2
  47. data/lib/lrama/parser.rb +607 -488
  48. data/lib/lrama/report/profile.rb +1 -12
  49. data/lib/lrama/version.rb +1 -1
  50. data/parser.y +177 -96
  51. data/rbs_collection.lock.yaml +17 -1
  52. data/rbs_collection.yaml +1 -0
  53. data/sample/calc.y +3 -1
  54. data/sample/parse.y +5 -1
  55. data/sig/lrama/grammar/code/printer_code.rbs +15 -0
  56. data/sig/lrama/grammar/code.rbs +24 -0
  57. data/sig/lrama/grammar/counter.rbs +11 -0
  58. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +10 -0
  59. data/sig/lrama/grammar/percent_code.rbs +10 -0
  60. data/sig/lrama/grammar/precedence.rbs +11 -0
  61. data/sig/lrama/grammar/printer.rbs +11 -0
  62. data/sig/lrama/grammar/reference.rbs +22 -0
  63. data/sig/lrama/grammar/rule.rbs +13 -0
  64. data/sig/lrama/grammar/rule_builder.rbs +41 -0
  65. data/sig/lrama/grammar.rbs +5 -0
  66. data/sig/lrama/lexer/location.rbs +14 -0
  67. data/sig/lrama/lexer/token/char.rbs +8 -0
  68. data/sig/lrama/lexer/token/ident.rbs +8 -0
  69. data/sig/lrama/lexer/token/parameterizing.rbs +15 -0
  70. data/sig/lrama/lexer/token/tag.rbs +9 -0
  71. data/sig/lrama/lexer/token/user_code.rbs +16 -0
  72. data/sig/lrama/lexer/token.rbs +22 -0
  73. data/sig/stdlib/strscan/string_scanner.rbs +5 -0
  74. data/template/bison/_yacc.h +2 -2
  75. data/template/bison/yacc.c +5 -2
  76. metadata +44 -4
  77. data/lib/lrama/lexer/token/type.rb +0 -8
  78. data/sig/lrama/lexer/token/type.rbs +0 -17
@@ -0,0 +1,64 @@
1
+ require "strscan"
2
+
3
+ module Lrama
4
+ class Lexer
5
+ class Token
6
+ class UserCode < Token
7
+ def references
8
+ @references ||= _references
9
+ end
10
+
11
+ private
12
+
13
+ def _references
14
+ scanner = StringScanner.new(s_value)
15
+ references = []
16
+
17
+ while !scanner.eos? do
18
+ case
19
+ when reference = scan_reference(scanner)
20
+ references << reference
21
+ when scanner.scan(/\/\*/)
22
+ scanner.scan_until(/\*\//)
23
+ else
24
+ scanner.getch
25
+ end
26
+ end
27
+
28
+ references
29
+ end
30
+
31
+ def scan_reference(scanner)
32
+ start = scanner.pos
33
+ case
34
+ # $ references
35
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
36
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
37
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
38
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
39
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
+ return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
42
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
45
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
46
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
47
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
48
+
49
+ # @ references
50
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
51
+ when scanner.scan(/@\$/) # @$
52
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
53
+ when scanner.scan(/@(\d+)/) # @1
54
+ return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
55
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
57
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
58
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,84 +1,44 @@
1
- require 'lrama/lexer/token/type'
2
-
3
1
  module Lrama
4
2
  class Lexer
5
- class Token
3
+ class Token < Struct.new(:s_value, :alias_name, :location, keyword_init: true)
6
4
 
7
- attr_accessor :line, :column, :referred
8
- # For User_code
9
- attr_accessor :references
5
+ attr_accessor :referred
10
6
 
11
7
  def to_s
12
- "#{super} line: #{line}, column: #{column}"
8
+ "#{super} location: #{location}"
13
9
  end
14
10
 
15
11
  def referred_by?(string)
16
- [self.s_value, self.alias].include?(string)
12
+ [self.s_value, self.alias_name].compact.include?(string)
17
13
  end
18
14
 
19
15
  def ==(other)
20
- self.class == other.class && self.type == other.type && self.s_value == other.s_value
16
+ self.class == other.class && self.s_value == other.s_value
21
17
  end
22
18
 
23
- def numberize_references(lhs, rhs)
24
- self.references.map! {|ref|
25
- ref_name = ref[1]
26
- if ref_name.is_a?(::String) && ref_name != '$'
27
- value =
28
- if lhs.referred_by?(ref_name)
29
- '$'
30
- else
31
- index = rhs.find_index {|token| token.referred_by?(ref_name) }
32
-
33
- if index
34
- index + 1
35
- else
36
- raise "'#{ref_name}' is invalid name."
37
- end
38
- end
39
- [ref[0], value, ref[2], ref[3], ref[4]]
40
- else
41
- ref
42
- end
43
- }
19
+ def first_line
20
+ location.first_line
44
21
  end
22
+ alias :line :first_line
45
23
 
46
- @i = 0
47
- @types = []
24
+ def first_column
25
+ location.first_column
26
+ end
27
+ alias :column :first_column
48
28
 
49
- def self.define_type(name)
50
- type = Type.new(id: @i, name: name.to_s)
51
- const_set(name, type)
52
- @types << type
53
- @i += 1
29
+ def last_line
30
+ location.last_line
54
31
  end
55
32
 
56
- # Token types
57
- define_type(:P_expect) # %expect
58
- define_type(:P_define) # %define
59
- define_type(:P_printer) # %printer
60
- define_type(:P_error_token) # %error-token
61
- define_type(:P_lex_param) # %lex-param
62
- define_type(:P_parse_param) # %parse-param
63
- define_type(:P_initial_action) # %initial-action
64
- define_type(:P_union) # %union
65
- define_type(:P_token) # %token
66
- define_type(:P_type) # %type
67
- define_type(:P_nonassoc) # %nonassoc
68
- define_type(:P_left) # %left
69
- define_type(:P_right) # %right
70
- define_type(:P_precedence) # %precedence
71
- define_type(:P_prec) # %prec
72
- define_type(:User_code) # { ... }
73
- define_type(:Tag) # <int>
74
- define_type(:Number) # 0
75
- define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
76
- define_type(:Ident) # api.pure, tNUMBER
77
- define_type(:Named_Ref) # [foo]
78
- define_type(:Semicolon) # ;
79
- define_type(:Bar) # |
80
- define_type(:String) # "str"
81
- define_type(:Char) # '+'
33
+ def last_column
34
+ location.last_column
35
+ end
82
36
  end
83
37
  end
84
38
  end
39
+
40
+ require 'lrama/lexer/token/char'
41
+ require 'lrama/lexer/token/ident'
42
+ require 'lrama/lexer/token/parameterizing'
43
+ require 'lrama/lexer/token/tag'
44
+ require 'lrama/lexer/token/user_code'
data/lib/lrama/lexer.rb CHANGED
@@ -1,12 +1,14 @@
1
1
  require "strscan"
2
+ require "lrama/lexer/location"
2
3
  require "lrama/lexer/token"
3
4
 
4
5
  module Lrama
5
6
  class Lexer
7
+ attr_reader :head_line, :head_column
6
8
  attr_accessor :status
7
9
  attr_accessor :end_symbol
8
10
 
9
- SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
11
+ SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
10
12
  PERCENT_TOKENS = %w(
11
13
  %union
12
14
  %token
@@ -24,12 +26,14 @@ module Lrama
24
26
  %precedence
25
27
  %prec
26
28
  %error-token
29
+ %empty
30
+ %code
27
31
  )
28
32
 
29
33
  def initialize(text)
30
34
  @scanner = StringScanner.new(text)
31
- @head = @scanner.pos
32
- @line = 1
35
+ @head_column = @head = @scanner.pos
36
+ @head_line = @line = 1
33
37
  @status = :initial
34
38
  @end_symbol = nil
35
39
  end
@@ -51,6 +55,13 @@ module Lrama
51
55
  @scanner.pos - @head
52
56
  end
53
57
 
58
+ def location
59
+ Location.new(
60
+ first_line: @head_line, first_column: @head_column,
61
+ last_line: @line, last_column: column
62
+ )
63
+ end
64
+
54
65
  def lex_token
55
66
  while !@scanner.eos? do
56
67
  case
@@ -60,11 +71,8 @@ module Lrama
60
71
  # noop
61
72
  when @scanner.scan(/\/\*/)
62
73
  lex_comment
63
- when @scanner.scan(/\/\//)
64
- @scanner.scan_until(/\n/)
65
- newline
66
- when @scanner.scan(/%empty/)
67
- # noop
74
+ when @scanner.scan(/\/\/.*(?<newline>\n)?/)
75
+ newline if @scanner[:newline]
68
76
  else
69
77
  break
70
78
  end
@@ -80,18 +88,20 @@ module Lrama
80
88
  return [@scanner.matched, @scanner.matched]
81
89
  when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
82
90
  return [@scanner.matched, @scanner.matched]
91
+ when @scanner.scan(/[\?\+\*]/)
92
+ return [@scanner.matched, @scanner.matched]
83
93
  when @scanner.scan(/<\w+>/)
84
- return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
94
+ return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)]
85
95
  when @scanner.scan(/'.'/)
86
- return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
96
+ return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
87
97
  when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
88
- return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
89
- when @scanner.scan(/"/)
90
- return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
98
+ return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
99
+ when @scanner.scan(/".*?"/)
100
+ return [:STRING, %Q(#{@scanner.matched})]
91
101
  when @scanner.scan(/\d+/)
92
102
  return [:INTEGER, Integer(@scanner.matched)]
93
103
  when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
94
- token = build_token(type: Token::Ident, s_value: @scanner.matched)
104
+ token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location)
95
105
  type =
96
106
  if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
97
107
  :IDENT_COLON
@@ -100,7 +110,7 @@ module Lrama
100
110
  end
101
111
  return [type, token]
102
112
  else
103
- raise
113
+ raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}."
104
114
  end
105
115
  end
106
116
 
@@ -115,28 +125,30 @@ module Lrama
115
125
  when @scanner.scan(/}/)
116
126
  if nested == 0 && @end_symbol == '}'
117
127
  @scanner.unscan
118
- return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
128
+ return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
119
129
  else
120
130
  code += @scanner.matched
121
131
  nested -= 1
122
132
  end
123
133
  when @scanner.check(/#{@end_symbol}/)
124
- return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
134
+ return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
125
135
  when @scanner.scan(/\n/)
126
136
  code += @scanner.matched
127
137
  newline
128
- when @scanner.scan(/"/)
129
- matched = @scanner.scan_until(/"/)
130
- code += %Q("#{matched})
131
- @line += matched.count("\n")
132
- when @scanner.scan(/'/)
133
- matched = @scanner.scan_until(/'/)
134
- code += %Q('#{matched})
138
+ when @scanner.scan(/".*?"/)
139
+ code += %Q(#{@scanner.matched})
140
+ @line += @scanner.matched.count("\n")
141
+ when @scanner.scan(/'.*?'/)
142
+ code += %Q(#{@scanner.matched})
135
143
  else
136
- code += @scanner.getch
144
+ if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
145
+ code += @scanner.matched
146
+ else
147
+ code += @scanner.getch
148
+ end
137
149
  end
138
150
  end
139
- raise
151
+ raise ParseError, "Unexpected code: #{code}."
140
152
  end
141
153
 
142
154
  private
@@ -155,17 +167,6 @@ module Lrama
155
167
  end
156
168
  end
157
169
 
158
- def build_token(type:, s_value:, **options)
159
- token = Token.new(type: type, s_value: s_value)
160
- token.line = @head_line
161
- token.column = @head_column
162
- options.each do |attr, value|
163
- token.public_send("#{attr}=", value)
164
- end
165
-
166
- token
167
- end
168
-
169
170
  def newline
170
171
  @line += 1
171
172
  @head = @scanner.pos + 1
@@ -58,6 +58,7 @@ module Lrama
58
58
  o.separator 'Tuning the Parser:'
59
59
  o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
60
60
  o.on('-t', 'reserved, do nothing') { }
61
+ o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true }
61
62
  o.separator ''
62
63
  o.separator 'Output:'
63
64
  o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
@@ -108,7 +109,7 @@ module Lrama
108
109
  def validate_trace(trace)
109
110
  list = %w[
110
111
  none locations scan parse automaton bitsets
111
- closure grammar resource sets muscles tools
112
+ closure grammar rules resource sets muscles tools
112
113
  m4-early m4 skeleton time ielr cex all
113
114
  ]
114
115
  h = {}
data/lib/lrama/options.rb CHANGED
@@ -4,7 +4,8 @@ module Lrama
4
4
  attr_accessor :skeleton, :header, :header_file,
5
5
  :report_file, :outfile,
6
6
  :error_recovery, :grammar_file,
7
- :report_file, :trace_opts, :report_opts, :y
7
+ :trace_opts, :report_opts, :y,
8
+ :debug
8
9
 
9
10
  def initialize
10
11
  @skeleton = "bison/yacc.c"
@@ -14,7 +15,6 @@ module Lrama
14
15
  @outfile = "y.tab.c"
15
16
  @error_recovery = false
16
17
  @grammar_file = nil
17
- @report_file = nil
18
18
  @trace_opts = nil
19
19
  @report_opts = nil
20
20
  @y = STDIN
data/lib/lrama/output.rb CHANGED
@@ -186,9 +186,9 @@ module Lrama
186
186
  str = ""
187
187
 
188
188
  @context.states.rules.each do |rule|
189
- next unless rule.code
189
+ next unless rule.token_code
190
190
 
191
- code = rule.code
191
+ code = rule.token_code
192
192
  spaces = " " * (code.column - 1)
193
193
 
194
194
  str << <<-STR
@@ -349,6 +349,15 @@ module Lrama
349
349
  end
350
350
  end
351
351
 
352
+ # b4_percent_code_get
353
+ def percent_code(name)
354
+ @grammar.percent_codes.select do |percent_code|
355
+ percent_code.id.s_value == name
356
+ end.map do |percent_code|
357
+ percent_code.code.s_value
358
+ end.join
359
+ end
360
+
352
361
  private
353
362
 
354
363
  def eval_template(file, path)