lrama 0.5.9 → 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +25 -0
  3. data/.gitignore +7 -4
  4. data/Gemfile +9 -5
  5. data/Rakefile +13 -0
  6. data/Steepfile +13 -11
  7. data/lib/lrama/context.rb +1 -3
  8. data/lib/lrama/counterexamples/path.rb +0 -46
  9. data/lib/lrama/counterexamples/production_path.rb +17 -0
  10. data/lib/lrama/counterexamples/start_path.rb +21 -0
  11. data/lib/lrama/counterexamples/transition_path.rb +17 -0
  12. data/lib/lrama/counterexamples.rb +3 -0
  13. data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
  14. data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
  15. data/lib/lrama/grammar/code/printer_code.rb +34 -0
  16. data/lib/lrama/grammar/code/rule_action.rb +62 -0
  17. data/lib/lrama/grammar/code.rb +9 -93
  18. data/lib/lrama/grammar/counter.rb +15 -0
  19. data/lib/lrama/grammar/error_token.rb +3 -3
  20. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +36 -0
  21. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +28 -0
  22. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +28 -0
  23. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +28 -0
  24. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +39 -0
  25. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +34 -0
  26. data/lib/lrama/grammar/parameterizing_rules/builder.rb +60 -0
  27. data/lib/lrama/grammar/printer.rb +3 -3
  28. data/lib/lrama/grammar/reference.rb +7 -16
  29. data/lib/lrama/grammar/rule.rb +19 -2
  30. data/lib/lrama/grammar/rule_builder.rb +177 -0
  31. data/lib/lrama/grammar/symbol.rb +16 -2
  32. data/lib/lrama/grammar/type.rb +6 -0
  33. data/lib/lrama/grammar.rb +115 -325
  34. data/lib/lrama/lexer/location.rb +22 -0
  35. data/lib/lrama/lexer/token/parameterizing.rb +18 -3
  36. data/lib/lrama/lexer/token/tag.rb +4 -0
  37. data/lib/lrama/lexer/token/user_code.rb +54 -4
  38. data/lib/lrama/lexer/token.rb +35 -10
  39. data/lib/lrama/lexer.rb +32 -31
  40. data/lib/lrama/options.rb +1 -2
  41. data/lib/lrama/output.rb +2 -2
  42. data/lib/lrama/parser.rb +514 -424
  43. data/lib/lrama/report/profile.rb +1 -12
  44. data/lib/lrama/version.rb +1 -1
  45. data/lib/lrama.rb +0 -1
  46. data/parser.y +111 -52
  47. data/rbs_collection.lock.yaml +6 -8
  48. data/rbs_collection.yaml +1 -0
  49. data/sig/lrama/grammar/code/printer_code.rbs +15 -0
  50. data/sig/lrama/grammar/code.rbs +24 -0
  51. data/sig/lrama/grammar/counter.rbs +11 -0
  52. data/sig/lrama/grammar/error_token.rbs +11 -0
  53. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +26 -0
  54. data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +10 -0
  55. data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +10 -0
  56. data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +10 -0
  57. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +11 -0
  58. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +11 -0
  59. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +23 -0
  60. data/sig/lrama/grammar/precedence.rbs +11 -0
  61. data/sig/lrama/grammar/printer.rbs +11 -0
  62. data/sig/lrama/grammar/reference.rbs +6 -6
  63. data/sig/lrama/grammar/rule.rbs +13 -0
  64. data/sig/lrama/grammar/rule_builder.rbs +42 -0
  65. data/sig/lrama/grammar/symbol.rbs +37 -0
  66. data/sig/lrama/lexer/location.rbs +14 -0
  67. data/sig/lrama/lexer/token/parameterizing.rbs +9 -0
  68. data/sig/lrama/lexer/token/tag.rbs +1 -0
  69. data/sig/lrama/lexer/token/user_code.rbs +8 -1
  70. data/sig/lrama/lexer/token.rbs +9 -4
  71. data/sig/stdlib/strscan/string_scanner.rbs +5 -0
  72. data/template/bison/yacc.c +5 -2
  73. metadata +38 -3
  74. data/lib/lrama/type.rb +0 -4
@@ -1,12 +1,62 @@
1
+ require "strscan"
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class Token
4
6
  class UserCode < Token
5
- attr_accessor :references
7
+ def references
8
+ @references ||= _references
9
+ end
10
+
11
+ private
12
+
13
+ def _references
14
+ scanner = StringScanner.new(s_value)
15
+ references = []
16
+
17
+ while !scanner.eos? do
18
+ case
19
+ when reference = scan_reference(scanner)
20
+ references << reference
21
+ when scanner.scan(/\/\*/)
22
+ scanner.scan_until(/\*\//)
23
+ else
24
+ scanner.getch
25
+ end
26
+ end
27
+
28
+ references
29
+ end
30
+
31
+ def scan_reference(scanner)
32
+ start = scanner.pos
33
+ case
34
+ # $ references
35
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
36
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
37
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
38
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
39
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
+ return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
42
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
45
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
46
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
47
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
6
48
 
7
- def initialize(s_value: nil, alias_name: nil)
8
- super
9
- self.references = []
49
+ # @ references
50
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
51
+ when scanner.scan(/@\$/) # @$
52
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
53
+ when scanner.scan(/@(\d+)/) # @1
54
+ return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
55
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
57
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
58
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
59
+ end
10
60
  end
11
61
  end
12
62
  end
@@ -1,26 +1,51 @@
1
+ require 'lrama/lexer/token/char'
2
+ require 'lrama/lexer/token/ident'
3
+ require 'lrama/lexer/token/parameterizing'
4
+ require 'lrama/lexer/token/tag'
5
+ require 'lrama/lexer/token/user_code'
6
+
1
7
  module Lrama
2
8
  class Lexer
3
- class Token < Struct.new(:s_value, :alias_name, keyword_init: true)
9
+ class Token
10
+ attr_reader :s_value, :location
11
+ attr_accessor :alias_name, :referred
4
12
 
5
- attr_accessor :line, :column, :referred
13
+ def initialize(s_value:, alias_name: nil, location: nil)
14
+ s_value.freeze
15
+ @s_value = s_value
16
+ @alias_name = alias_name
17
+ @location = location
18
+ end
6
19
 
7
20
  def to_s
8
- "#{super} line: #{line}, column: #{column}"
21
+ "#{super} location: #{location}"
9
22
  end
10
23
 
11
24
  def referred_by?(string)
12
- [self.s_value, self.alias_name].include?(string)
25
+ [self.s_value, self.alias_name].compact.include?(string)
13
26
  end
14
27
 
15
28
  def ==(other)
16
29
  self.class == other.class && self.s_value == other.s_value
17
30
  end
31
+
32
+ def first_line
33
+ location.first_line
34
+ end
35
+ alias :line :first_line
36
+
37
+ def first_column
38
+ location.first_column
39
+ end
40
+ alias :column :first_column
41
+
42
+ def last_line
43
+ location.last_line
44
+ end
45
+
46
+ def last_column
47
+ location.last_column
48
+ end
18
49
  end
19
50
  end
20
51
  end
21
-
22
- require 'lrama/lexer/token/char'
23
- require 'lrama/lexer/token/ident'
24
- require 'lrama/lexer/token/parameterizing'
25
- require 'lrama/lexer/token/tag'
26
- require 'lrama/lexer/token/user_code'
data/lib/lrama/lexer.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+ require "lrama/lexer/location"
2
3
  require "lrama/lexer/token"
3
4
 
4
5
  module Lrama
@@ -7,7 +8,7 @@ module Lrama
7
8
  attr_accessor :status
8
9
  attr_accessor :end_symbol
9
10
 
10
- SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
11
+ SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
11
12
  PERCENT_TOKENS = %w(
12
13
  %union
13
14
  %token
@@ -31,8 +32,8 @@ module Lrama
31
32
 
32
33
  def initialize(text)
33
34
  @scanner = StringScanner.new(text)
34
- @head = @scanner.pos
35
- @line = 1
35
+ @head_column = @head = @scanner.pos
36
+ @head_line = @line = 1
36
37
  @status = :initial
37
38
  @end_symbol = nil
38
39
  end
@@ -54,6 +55,13 @@ module Lrama
54
55
  @scanner.pos - @head
55
56
  end
56
57
 
58
+ def location
59
+ Location.new(
60
+ first_line: @head_line, first_column: @head_column,
61
+ last_line: @line, last_column: column
62
+ )
63
+ end
64
+
57
65
  def lex_token
58
66
  while !@scanner.eos? do
59
67
  case
@@ -63,9 +71,8 @@ module Lrama
63
71
  # noop
64
72
  when @scanner.scan(/\/\*/)
65
73
  lex_comment
66
- when @scanner.scan(/\/\//)
67
- @scanner.scan_until(/\n/)
68
- newline
74
+ when @scanner.scan(/\/\/.*(?<newline>\n)?/)
75
+ newline if @scanner[:newline]
69
76
  else
70
77
  break
71
78
  end
@@ -84,17 +91,17 @@ module Lrama
84
91
  when @scanner.scan(/[\?\+\*]/)
85
92
  return [@scanner.matched, @scanner.matched]
86
93
  when @scanner.scan(/<\w+>/)
87
- return [:TAG, setup_token(Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched))]
94
+ return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)]
88
95
  when @scanner.scan(/'.'/)
89
- return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))]
96
+ return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
90
97
  when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
91
- return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))]
92
- when @scanner.scan(/"/)
93
- return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
98
+ return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
99
+ when @scanner.scan(/".*?"/)
100
+ return [:STRING, %Q(#{@scanner.matched})]
94
101
  when @scanner.scan(/\d+/)
95
102
  return [:INTEGER, Integer(@scanner.matched)]
96
103
  when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
97
- token = setup_token(Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched))
104
+ token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location)
98
105
  type =
99
106
  if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
100
107
  :IDENT_COLON
@@ -118,25 +125,27 @@ module Lrama
118
125
  when @scanner.scan(/}/)
119
126
  if nested == 0 && @end_symbol == '}'
120
127
  @scanner.unscan
121
- return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))]
128
+ return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
122
129
  else
123
130
  code += @scanner.matched
124
131
  nested -= 1
125
132
  end
126
133
  when @scanner.check(/#{@end_symbol}/)
127
- return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))]
134
+ return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
128
135
  when @scanner.scan(/\n/)
129
136
  code += @scanner.matched
130
137
  newline
131
- when @scanner.scan(/"/)
132
- matched = @scanner.scan_until(/"/)
133
- code += %Q("#{matched})
134
- @line += matched.count("\n")
135
- when @scanner.scan(/'/)
136
- matched = @scanner.scan_until(/'/)
137
- code += %Q('#{matched})
138
+ when @scanner.scan(/".*?"/)
139
+ code += %Q(#{@scanner.matched})
140
+ @line += @scanner.matched.count("\n")
141
+ when @scanner.scan(/'.*?'/)
142
+ code += %Q(#{@scanner.matched})
138
143
  else
139
- code += @scanner.getch
144
+ if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
145
+ code += @scanner.matched
146
+ else
147
+ code += @scanner.getch
148
+ end
140
149
  end
141
150
  end
142
151
  raise ParseError, "Unexpected code: #{code}."
@@ -148,8 +157,7 @@ module Lrama
148
157
  while !@scanner.eos? do
149
158
  case
150
159
  when @scanner.scan(/\n/)
151
- @line += 1
152
- @head = @scanner.pos + 1
160
+ newline
153
161
  when @scanner.scan(/\*\//)
154
162
  return
155
163
  else
@@ -158,13 +166,6 @@ module Lrama
158
166
  end
159
167
  end
160
168
 
161
- def setup_token(token)
162
- token.line = @head_line
163
- token.column = @head_column
164
-
165
- token
166
- end
167
-
168
169
  def newline
169
170
  @line += 1
170
171
  @head = @scanner.pos + 1
data/lib/lrama/options.rb CHANGED
@@ -4,7 +4,7 @@ module Lrama
4
4
  attr_accessor :skeleton, :header, :header_file,
5
5
  :report_file, :outfile,
6
6
  :error_recovery, :grammar_file,
7
- :report_file, :trace_opts, :report_opts, :y,
7
+ :trace_opts, :report_opts, :y,
8
8
  :debug
9
9
 
10
10
  def initialize
@@ -15,7 +15,6 @@ module Lrama
15
15
  @outfile = "y.tab.c"
16
16
  @error_recovery = false
17
17
  @grammar_file = nil
18
- @report_file = nil
19
18
  @trace_opts = nil
20
19
  @report_opts = nil
21
20
  @y = STDIN
data/lib/lrama/output.rb CHANGED
@@ -186,9 +186,9 @@ module Lrama
186
186
  str = ""
187
187
 
188
188
  @context.states.rules.each do |rule|
189
- next unless rule.code
189
+ next unless rule.token_code
190
190
 
191
- code = rule.code
191
+ code = rule.token_code
192
192
  spaces = " " * (code.column - 1)
193
193
 
194
194
  str << <<-STR