lrama 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +2 -0
  3. data/.github/workflows/codespell.yaml +1 -1
  4. data/.github/workflows/gh-pages.yml +5 -6
  5. data/.github/workflows/test.yaml +25 -14
  6. data/Gemfile +4 -3
  7. data/NEWS.md +370 -35
  8. data/README.md +7 -88
  9. data/Rakefile +3 -2
  10. data/Steepfile +11 -5
  11. data/doc/Index.md +1 -1
  12. data/doc/development/compressed_state_table/parser.rb +2 -0
  13. data/doc/development/profiling.md +44 -0
  14. data/exe/lrama +1 -1
  15. data/lib/lrama/bitmap.rb +18 -5
  16. data/lib/lrama/command.rb +95 -43
  17. data/lib/lrama/context.rb +22 -24
  18. data/lib/lrama/counterexamples/derivation.rb +14 -4
  19. data/lib/lrama/counterexamples/example.rb +47 -22
  20. data/lib/lrama/counterexamples/node.rb +30 -0
  21. data/lib/lrama/counterexamples/path.rb +12 -14
  22. data/lib/lrama/counterexamples/state_item.rb +24 -1
  23. data/lib/lrama/counterexamples/triple.rb +27 -9
  24. data/lib/lrama/counterexamples.rb +216 -88
  25. data/lib/lrama/diagram.rb +77 -0
  26. data/lib/lrama/digraph.rb +28 -7
  27. data/lib/lrama/erb.rb +29 -0
  28. data/lib/lrama/grammar/auxiliary.rb +6 -1
  29. data/lib/lrama/grammar/binding.rb +37 -25
  30. data/lib/lrama/grammar/code/destructor_code.rb +11 -0
  31. data/lib/lrama/grammar/code/initial_action_code.rb +3 -0
  32. data/lib/lrama/grammar/code/no_reference_code.rb +3 -0
  33. data/lib/lrama/grammar/code/printer_code.rb +11 -0
  34. data/lib/lrama/grammar/code/rule_action.rb +17 -0
  35. data/lib/lrama/grammar/code.rb +16 -1
  36. data/lib/lrama/grammar/counter.rb +10 -0
  37. data/lib/lrama/grammar/destructor.rb +14 -1
  38. data/lib/lrama/grammar/error_token.rb +14 -1
  39. data/lib/lrama/grammar/inline/resolver.rb +80 -0
  40. data/lib/lrama/grammar/inline.rb +3 -0
  41. data/lib/lrama/grammar/{parameterizing_rule → parameterized}/resolver.rb +19 -8
  42. data/lib/lrama/grammar/{parameterizing_rule → parameterized}/rhs.rb +7 -2
  43. data/lib/lrama/grammar/parameterized/rule.rb +36 -0
  44. data/lib/lrama/grammar/parameterized.rb +5 -0
  45. data/lib/lrama/grammar/percent_code.rb +12 -1
  46. data/lib/lrama/grammar/precedence.rb +43 -1
  47. data/lib/lrama/grammar/printer.rb +9 -0
  48. data/lib/lrama/grammar/reference.rb +13 -0
  49. data/lib/lrama/grammar/rule.rb +61 -1
  50. data/lib/lrama/grammar/rule_builder.rb +84 -69
  51. data/lib/lrama/grammar/stdlib.y +68 -48
  52. data/lib/lrama/grammar/symbol.rb +63 -19
  53. data/lib/lrama/grammar/symbols/resolver.rb +64 -3
  54. data/lib/lrama/grammar/type.rb +13 -1
  55. data/lib/lrama/grammar/union.rb +12 -1
  56. data/lib/lrama/grammar.rb +231 -35
  57. data/lib/lrama/lexer/location.rb +25 -8
  58. data/lib/lrama/lexer/token/base.rb +73 -0
  59. data/lib/lrama/lexer/token/char.rb +15 -2
  60. data/lib/lrama/lexer/token/empty.rb +14 -0
  61. data/lib/lrama/lexer/token/ident.rb +2 -2
  62. data/lib/lrama/lexer/token/instantiate_rule.rb +4 -4
  63. data/lib/lrama/lexer/token/int.rb +14 -0
  64. data/lib/lrama/lexer/token/str.rb +11 -0
  65. data/lib/lrama/lexer/token/tag.rb +2 -2
  66. data/lib/lrama/lexer/token/token.rb +11 -0
  67. data/lib/lrama/lexer/token/user_code.rb +63 -37
  68. data/lib/lrama/lexer/token.rb +6 -56
  69. data/lib/lrama/lexer.rb +51 -23
  70. data/lib/lrama/logger.rb +12 -2
  71. data/lib/lrama/option_parser.rb +63 -9
  72. data/lib/lrama/options.rb +25 -7
  73. data/lib/lrama/output.rb +4 -11
  74. data/lib/lrama/parser.rb +854 -723
  75. data/lib/lrama/reporter/conflicts.rb +44 -0
  76. data/lib/lrama/reporter/grammar.rb +39 -0
  77. data/lib/lrama/reporter/precedences.rb +54 -0
  78. data/lib/lrama/reporter/profile/call_stack.rb +45 -0
  79. data/lib/lrama/reporter/profile/memory.rb +44 -0
  80. data/lib/lrama/reporter/profile.rb +4 -0
  81. data/lib/lrama/reporter/rules.rb +43 -0
  82. data/lib/lrama/reporter/states.rb +387 -0
  83. data/lib/lrama/reporter/terms.rb +44 -0
  84. data/lib/lrama/reporter.rb +39 -0
  85. data/lib/lrama/state/action/goto.rb +33 -0
  86. data/lib/lrama/state/action/reduce.rb +71 -0
  87. data/lib/lrama/state/action/shift.rb +39 -0
  88. data/lib/lrama/state/action.rb +5 -0
  89. data/lib/lrama/state/inadequacy_annotation.rb +140 -0
  90. data/lib/lrama/{states → state}/item.rb +33 -4
  91. data/lib/lrama/state/reduce_reduce_conflict.rb +14 -1
  92. data/lib/lrama/state/resolved_conflict.rb +38 -4
  93. data/lib/lrama/state/shift_reduce_conflict.rb +14 -1
  94. data/lib/lrama/state.rb +301 -200
  95. data/lib/lrama/states.rb +447 -175
  96. data/lib/lrama/tracer/actions.rb +22 -0
  97. data/lib/lrama/tracer/closure.rb +30 -0
  98. data/lib/lrama/tracer/duration.rb +38 -0
  99. data/lib/lrama/tracer/only_explicit_rules.rb +24 -0
  100. data/lib/lrama/tracer/rules.rb +23 -0
  101. data/lib/lrama/tracer/state.rb +33 -0
  102. data/lib/lrama/tracer.rb +51 -0
  103. data/lib/lrama/version.rb +2 -1
  104. data/lib/lrama/warnings/conflicts.rb +27 -0
  105. data/lib/lrama/warnings/implicit_empty.rb +29 -0
  106. data/lib/lrama/warnings/name_conflicts.rb +63 -0
  107. data/lib/lrama/warnings/redefined_rules.rb +23 -0
  108. data/lib/lrama/warnings/required.rb +23 -0
  109. data/lib/lrama/warnings/useless_precedence.rb +25 -0
  110. data/lib/lrama/warnings.rb +33 -0
  111. data/lib/lrama.rb +5 -5
  112. data/parser.y +495 -404
  113. data/rbs_collection.lock.yaml +27 -3
  114. data/rbs_collection.yaml +2 -0
  115. data/sig/generated/lrama/bitmap.rbs +12 -4
  116. data/sig/generated/lrama/counterexamples/derivation.rbs +36 -0
  117. data/sig/generated/lrama/counterexamples/example.rbs +58 -0
  118. data/sig/generated/lrama/counterexamples/node.rbs +18 -0
  119. data/sig/generated/lrama/counterexamples/path.rbs +23 -0
  120. data/sig/generated/lrama/counterexamples/state_item.rbs +19 -0
  121. data/sig/generated/lrama/counterexamples/triple.rbs +32 -0
  122. data/sig/generated/lrama/counterexamples.rbs +98 -0
  123. data/sig/generated/lrama/diagram.rbs +34 -0
  124. data/sig/generated/lrama/digraph.rbs +26 -6
  125. data/sig/generated/lrama/erb.rbs +14 -0
  126. data/sig/generated/lrama/grammar/auxiliary.rbs +16 -0
  127. data/sig/generated/lrama/grammar/binding.rbs +18 -12
  128. data/sig/generated/lrama/grammar/code/destructor_code.rbs +26 -0
  129. data/sig/{lrama → generated/lrama}/grammar/code/initial_action_code.rbs +6 -0
  130. data/sig/{lrama → generated/lrama}/grammar/code/no_reference_code.rbs +6 -0
  131. data/sig/generated/lrama/grammar/code/printer_code.rbs +26 -0
  132. data/sig/generated/lrama/grammar/code/rule_action.rbs +63 -0
  133. data/sig/generated/lrama/grammar/code.rbs +38 -0
  134. data/sig/{lrama → generated/lrama}/grammar/counter.rbs +4 -0
  135. data/sig/generated/lrama/grammar/destructor.rbs +19 -0
  136. data/sig/generated/lrama/grammar/error_token.rbs +19 -0
  137. data/sig/generated/lrama/grammar/inline/resolver.rbs +26 -0
  138. data/sig/generated/lrama/grammar/parameterized/resolver.rbs +42 -0
  139. data/sig/generated/lrama/grammar/parameterized/rhs.rbs +21 -0
  140. data/sig/generated/lrama/grammar/parameterized/rule.rbs +28 -0
  141. data/sig/{lrama → generated/lrama}/grammar/percent_code.rbs +8 -0
  142. data/sig/generated/lrama/grammar/precedence.rbs +45 -0
  143. data/sig/{lrama/grammar/error_token.rbs → generated/lrama/grammar/printer.rbs} +8 -3
  144. data/sig/generated/lrama/grammar/reference.rbs +31 -0
  145. data/sig/generated/lrama/grammar/rule.rbs +83 -0
  146. data/sig/generated/lrama/grammar/rule_builder.rbs +91 -0
  147. data/sig/generated/lrama/grammar/symbol.rbs +89 -0
  148. data/sig/generated/lrama/grammar/symbols/resolver.rbs +131 -0
  149. data/sig/generated/lrama/grammar/type.rbs +21 -0
  150. data/sig/generated/lrama/grammar/union.rbs +17 -0
  151. data/sig/generated/lrama/grammar.rbs +289 -0
  152. data/sig/generated/lrama/lexer/location.rbs +12 -3
  153. data/sig/generated/lrama/lexer/token/base.rbs +53 -0
  154. data/sig/generated/lrama/lexer/token/char.rbs +9 -2
  155. data/sig/generated/lrama/lexer/token/empty.rbs +11 -0
  156. data/sig/generated/lrama/lexer/token/ident.rbs +2 -2
  157. data/sig/generated/lrama/lexer/token/instantiate_rule.rbs +5 -5
  158. data/sig/generated/lrama/lexer/token/int.rbs +13 -0
  159. data/sig/generated/lrama/lexer/token/str.rbs +10 -0
  160. data/sig/generated/lrama/lexer/token/tag.rbs +2 -2
  161. data/sig/generated/lrama/lexer/token/token.rbs +10 -0
  162. data/sig/generated/lrama/lexer/token/user_code.rbs +2 -2
  163. data/sig/generated/lrama/lexer/token.rbs +1 -39
  164. data/sig/generated/lrama/lexer.rbs +54 -0
  165. data/sig/generated/lrama/logger.rbs +6 -0
  166. data/sig/generated/lrama/option_parser.rbs +52 -0
  167. data/sig/{lrama → generated/lrama}/options.rbs +27 -3
  168. data/sig/generated/lrama/reporter/conflicts.rbs +18 -0
  169. data/sig/generated/lrama/reporter/grammar.rbs +13 -0
  170. data/sig/generated/lrama/reporter/precedences.rbs +15 -0
  171. data/sig/generated/lrama/reporter/profile/call_stack.rbs +19 -0
  172. data/sig/generated/lrama/reporter/profile/memory.rbs +19 -0
  173. data/sig/generated/lrama/reporter/rules.rbs +13 -0
  174. data/sig/generated/lrama/reporter/states.rbs +69 -0
  175. data/sig/generated/lrama/reporter/terms.rbs +13 -0
  176. data/sig/generated/lrama/reporter.rbs +13 -0
  177. data/sig/generated/lrama/state/action/goto.rbs +28 -0
  178. data/sig/generated/lrama/state/action/reduce.rbs +49 -0
  179. data/sig/generated/lrama/state/action/shift.rbs +33 -0
  180. data/sig/generated/lrama/state/inadequacy_annotation.rbs +45 -0
  181. data/sig/generated/lrama/state/item.rbs +75 -0
  182. data/sig/generated/lrama/state/reduce_reduce_conflict.rbs +19 -0
  183. data/sig/generated/lrama/state/resolved_conflict.rbs +38 -0
  184. data/sig/generated/lrama/state/shift_reduce_conflict.rbs +19 -0
  185. data/sig/generated/lrama/state.rbs +231 -0
  186. data/sig/generated/lrama/states.rbs +215 -0
  187. data/sig/generated/lrama/tracer/actions.rbs +13 -0
  188. data/sig/generated/lrama/tracer/closure.rbs +13 -0
  189. data/sig/generated/lrama/tracer/duration.rbs +18 -0
  190. data/sig/generated/lrama/tracer/only_explicit_rules.rbs +13 -0
  191. data/sig/generated/lrama/tracer/rules.rbs +13 -0
  192. data/sig/generated/lrama/tracer/state.rbs +16 -0
  193. data/sig/generated/lrama/tracer.rbs +23 -0
  194. data/sig/generated/lrama/version.rbs +5 -0
  195. data/sig/generated/lrama/warnings/conflicts.rbs +13 -0
  196. data/sig/generated/lrama/warnings/implicit_empty.rbs +17 -0
  197. data/sig/generated/lrama/warnings/name_conflicts.rbs +31 -0
  198. data/sig/generated/lrama/warnings/redefined_rules.rbs +13 -0
  199. data/sig/generated/lrama/warnings/required.rbs +13 -0
  200. data/sig/generated/lrama/warnings/useless_precedence.rbs +13 -0
  201. data/sig/generated/lrama/warnings.rbs +11 -0
  202. data/sig/railroad_diagrams/railroad_diagrams.rbs +16 -0
  203. data/template/bison/_yacc.h +8 -0
  204. data/template/diagram/diagram.html +102 -0
  205. metadata +126 -66
  206. data/lib/lrama/counterexamples/production_path.rb +0 -19
  207. data/lib/lrama/counterexamples/start_path.rb +0 -23
  208. data/lib/lrama/counterexamples/transition_path.rb +0 -19
  209. data/lib/lrama/diagnostics.rb +0 -36
  210. data/lib/lrama/grammar/parameterizing_rule/rule.rb +0 -24
  211. data/lib/lrama/grammar/parameterizing_rule.rb +0 -5
  212. data/lib/lrama/grammar_validator.rb +0 -37
  213. data/lib/lrama/report/duration.rb +0 -27
  214. data/lib/lrama/report/profile.rb +0 -16
  215. data/lib/lrama/report.rb +0 -4
  216. data/lib/lrama/state/reduce.rb +0 -37
  217. data/lib/lrama/state/shift.rb +0 -15
  218. data/lib/lrama/states_reporter.rb +0 -362
  219. data/lib/lrama/trace_reporter.rb +0 -45
  220. data/sig/generated/lrama/trace_reporter.rbs +0 -25
  221. data/sig/lrama/counterexamples/derivation.rbs +0 -33
  222. data/sig/lrama/counterexamples/example.rbs +0 -45
  223. data/sig/lrama/counterexamples/path.rbs +0 -21
  224. data/sig/lrama/counterexamples/production_path.rbs +0 -11
  225. data/sig/lrama/counterexamples/start_path.rbs +0 -13
  226. data/sig/lrama/counterexamples/state_item.rbs +0 -10
  227. data/sig/lrama/counterexamples/transition_path.rbs +0 -11
  228. data/sig/lrama/counterexamples/triple.rbs +0 -20
  229. data/sig/lrama/counterexamples.rbs +0 -29
  230. data/sig/lrama/grammar/auxiliary.rbs +0 -10
  231. data/sig/lrama/grammar/code/destructor_code.rbs +0 -14
  232. data/sig/lrama/grammar/code/printer_code.rbs +0 -14
  233. data/sig/lrama/grammar/code/rule_action.rbs +0 -19
  234. data/sig/lrama/grammar/code.rbs +0 -24
  235. data/sig/lrama/grammar/destructor.rbs +0 -13
  236. data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +0 -24
  237. data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +0 -14
  238. data/sig/lrama/grammar/parameterizing_rule/rule.rbs +0 -16
  239. data/sig/lrama/grammar/parameterizing_rule.rbs +0 -6
  240. data/sig/lrama/grammar/precedence.rbs +0 -13
  241. data/sig/lrama/grammar/printer.rbs +0 -13
  242. data/sig/lrama/grammar/reference.rbs +0 -22
  243. data/sig/lrama/grammar/rule.rbs +0 -45
  244. data/sig/lrama/grammar/rule_builder.rbs +0 -47
  245. data/sig/lrama/grammar/symbol.rbs +0 -38
  246. data/sig/lrama/grammar/symbols/resolver.rbs +0 -60
  247. data/sig/lrama/grammar/type.rbs +0 -11
  248. data/sig/lrama/grammar/union.rbs +0 -12
  249. data/sig/lrama/grammar.rbs +0 -108
  250. data/sig/lrama/report/duration.rbs +0 -11
  251. data/sig/lrama/report/profile.rbs +0 -7
  252. data/sig/lrama/state/reduce.rbs +0 -20
  253. data/sig/lrama/state/reduce_reduce_conflict.rbs +0 -13
  254. data/sig/lrama/state/resolved_conflict.rbs +0 -14
  255. data/sig/lrama/state/shift.rbs +0 -14
  256. data/sig/lrama/state/shift_reduce_conflict.rbs +0 -13
  257. data/sig/lrama/state.rbs +0 -79
  258. data/sig/lrama/states/item.rbs +0 -30
  259. data/sig/lrama/states.rbs +0 -101
  260. data/sig/lrama/warning.rbs +0 -16
data/lib/lrama/grammar.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # rbs_inline: enabled
1
2
  # frozen_string_literal: true
2
3
 
3
4
  require "forwardable"
@@ -7,7 +8,8 @@ require_relative "grammar/code"
7
8
  require_relative "grammar/counter"
8
9
  require_relative "grammar/destructor"
9
10
  require_relative "grammar/error_token"
10
- require_relative "grammar/parameterizing_rule"
11
+ require_relative "grammar/inline"
12
+ require_relative "grammar/parameterized"
11
13
  require_relative "grammar/percent_code"
12
14
  require_relative "grammar/precedence"
13
15
  require_relative "grammar/printer"
@@ -23,19 +25,89 @@ require_relative "lexer"
23
25
  module Lrama
24
26
  # Grammar is the result of parsing an input grammar file
25
27
  class Grammar
28
+ # @rbs!
29
+ #
30
+ # interface _DelegatedMethods
31
+ # def rules: () -> Array[Rule]
32
+ # def accept_symbol: () -> Grammar::Symbol
33
+ # def eof_symbol: () -> Grammar::Symbol
34
+ # def undef_symbol: () -> Grammar::Symbol
35
+ # def precedences: () -> Array[Precedence]
36
+ #
37
+ # # delegate to @symbols_resolver
38
+ # def symbols: () -> Array[Grammar::Symbol]
39
+ # def terms: () -> Array[Grammar::Symbol]
40
+ # def nterms: () -> Array[Grammar::Symbol]
41
+ # def find_symbol_by_s_value!: (::String s_value) -> Grammar::Symbol
42
+ # def ielr_defined?: () -> bool
43
+ # end
44
+ #
45
+ # include Symbols::Resolver::_DelegatedMethods
46
+ #
47
+ # @rule_counter: Counter
48
+ # @percent_codes: Array[PercentCode]
49
+ # @printers: Array[Printer]
50
+ # @destructors: Array[Destructor]
51
+ # @error_tokens: Array[ErrorToken]
52
+ # @symbols_resolver: Symbols::Resolver
53
+ # @types: Array[Type]
54
+ # @rule_builders: Array[RuleBuilder]
55
+ # @rules: Array[Rule]
56
+ # @sym_to_rules: Hash[Integer, Array[Rule]]
57
+ # @parameterized_resolver: Parameterized::Resolver
58
+ # @empty_symbol: Grammar::Symbol
59
+ # @eof_symbol: Grammar::Symbol
60
+ # @error_symbol: Grammar::Symbol
61
+ # @undef_symbol: Grammar::Symbol
62
+ # @accept_symbol: Grammar::Symbol
63
+ # @aux: Auxiliary
64
+ # @no_stdlib: bool
65
+ # @locations: bool
66
+ # @define: Hash[String, String]
67
+ # @required: bool
68
+ # @union: Union
69
+ # @precedences: Array[Precedence]
70
+ # @start_nterm: Lrama::Lexer::Token::Base?
71
+
26
72
  extend Forwardable
27
73
 
28
- attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver
29
- attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action,
30
- :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
31
- :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations, :define
74
+ attr_reader :percent_codes #: Array[PercentCode]
75
+ attr_reader :eof_symbol #: Grammar::Symbol
76
+ attr_reader :error_symbol #: Grammar::Symbol
77
+ attr_reader :undef_symbol #: Grammar::Symbol
78
+ attr_reader :accept_symbol #: Grammar::Symbol
79
+ attr_reader :aux #: Auxiliary
80
+ attr_reader :parameterized_resolver #: Parameterized::Resolver
81
+ attr_reader :precedences #: Array[Precedence]
82
+ attr_accessor :union #: Union
83
+ attr_accessor :expect #: Integer
84
+ attr_accessor :printers #: Array[Printer]
85
+ attr_accessor :error_tokens #: Array[ErrorToken]
86
+ attr_accessor :lex_param #: String
87
+ attr_accessor :parse_param #: String
88
+ attr_accessor :initial_action #: Grammar::Code::InitialActionCode
89
+ attr_accessor :after_shift #: Lexer::Token::Base
90
+ attr_accessor :before_reduce #: Lexer::Token::Base
91
+ attr_accessor :after_reduce #: Lexer::Token::Base
92
+ attr_accessor :after_shift_error_token #: Lexer::Token::Base
93
+ attr_accessor :after_pop_stack #: Lexer::Token::Base
94
+ attr_accessor :symbols_resolver #: Symbols::Resolver
95
+ attr_accessor :types #: Array[Type]
96
+ attr_accessor :rules #: Array[Rule]
97
+ attr_accessor :rule_builders #: Array[RuleBuilder]
98
+ attr_accessor :sym_to_rules #: Hash[Integer, Array[Rule]]
99
+ attr_accessor :no_stdlib #: bool
100
+ attr_accessor :locations #: bool
101
+ attr_accessor :define #: Hash[String, String]
102
+ attr_accessor :required #: bool
32
103
 
33
104
  def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value,
34
105
  :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
35
106
  :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
36
107
  :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number!
37
108
 
38
- def initialize(rule_counter, define = {})
109
+ # @rbs (Counter rule_counter, bool locations, Hash[String, String] define) -> void
110
+ def initialize(rule_counter, locations, define = {})
39
111
  @rule_counter = rule_counter
40
112
 
41
113
  # Code defined by "%code"
@@ -48,7 +120,7 @@ module Lrama
48
120
  @rule_builders = []
49
121
  @rules = []
50
122
  @sym_to_rules = {}
51
- @parameterizing_rule_resolver = ParameterizingRule::Resolver.new
123
+ @parameterized_resolver = Parameterized::Resolver.new
52
124
  @empty_symbol = nil
53
125
  @eof_symbol = nil
54
126
  @error_symbol = nil
@@ -56,93 +128,131 @@ module Lrama
56
128
  @accept_symbol = nil
57
129
  @aux = Auxiliary.new
58
130
  @no_stdlib = false
59
- @locations = false
60
- @define = define.map {|d| d.split('=') }.to_h
131
+ @locations = locations
132
+ @define = define
133
+ @required = false
134
+ @precedences = []
135
+ @start_nterm = nil
61
136
 
62
137
  append_special_symbols
63
138
  end
64
139
 
140
+ # @rbs (Counter rule_counter, Counter midrule_action_counter) -> RuleBuilder
65
141
  def create_rule_builder(rule_counter, midrule_action_counter)
66
- RuleBuilder.new(rule_counter, midrule_action_counter, @parameterizing_rule_resolver)
142
+ RuleBuilder.new(rule_counter, midrule_action_counter, @parameterized_resolver)
67
143
  end
68
144
 
145
+ # @rbs (id: Lexer::Token::Base, code: Lexer::Token::UserCode) -> Array[PercentCode]
69
146
  def add_percent_code(id:, code:)
70
147
  @percent_codes << PercentCode.new(id.s_value, code.s_value)
71
148
  end
72
149
 
150
+ # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> Array[Destructor]
73
151
  def add_destructor(ident_or_tags:, token_code:, lineno:)
74
152
  @destructors << Destructor.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
75
153
  end
76
154
 
155
+ # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> Array[Printer]
77
156
  def add_printer(ident_or_tags:, token_code:, lineno:)
78
157
  @printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
79
158
  end
80
159
 
160
+ # @rbs (ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag], token_code: Lexer::Token::UserCode, lineno: Integer) -> Array[ErrorToken]
81
161
  def add_error_token(ident_or_tags:, token_code:, lineno:)
82
162
  @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
83
163
  end
84
164
 
165
+ # @rbs (id: Lexer::Token::Base, tag: Lexer::Token::Tag) -> Array[Type]
85
166
  def add_type(id:, tag:)
86
167
  @types << Type.new(id: id, tag: tag)
87
168
  end
88
169
 
89
- def add_nonassoc(sym, precedence)
90
- set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence))
170
+ # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence
171
+ def add_nonassoc(sym, precedence, s_value, lineno)
172
+ set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :nonassoc, precedence: precedence, lineno: lineno))
173
+ end
174
+
175
+ # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence
176
+ def add_left(sym, precedence, s_value, lineno)
177
+ set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :left, precedence: precedence, lineno: lineno))
91
178
  end
92
179
 
93
- def add_left(sym, precedence)
94
- set_precedence(sym, Precedence.new(type: :left, precedence: precedence))
180
+ # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence
181
+ def add_right(sym, precedence, s_value, lineno)
182
+ set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :right, precedence: precedence, lineno: lineno))
95
183
  end
96
184
 
97
- def add_right(sym, precedence)
98
- set_precedence(sym, Precedence.new(type: :right, precedence: precedence))
185
+ # @rbs (Grammar::Symbol sym, Integer precedence, String s_value, Integer lineno) -> Precedence
186
+ def add_precedence(sym, precedence, s_value, lineno)
187
+ set_precedence(sym, Precedence.new(symbol: sym, s_value: s_value, type: :precedence, precedence: precedence, lineno: lineno))
99
188
  end
100
189
 
101
- def add_precedence(sym, precedence)
102
- set_precedence(sym, Precedence.new(type: :precedence, precedence: precedence))
190
+ # @rbs (Lrama::Lexer::Token::Base id) -> Lrama::Lexer::Token::Base
191
+ def set_start_nterm(id)
192
+ # When multiple `%start` directives are defined, Bison does not generate an error,
193
+ # whereas Lrama does generate an error.
194
+ # Related Bison's specification are
195
+ # refs: https://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
196
+ if @start_nterm.nil?
197
+ @start_nterm = id
198
+ else
199
+ start = @start_nterm #: Lrama::Lexer::Token::Base
200
+ raise "Start non-terminal is already set to #{start.s_value} (line: #{start.first_line}). Cannot set to #{id.s_value} (line: #{id.first_line})."
201
+ end
103
202
  end
104
203
 
204
+ # @rbs (Grammar::Symbol sym, Precedence precedence) -> (Precedence | bot)
105
205
  def set_precedence(sym, precedence)
106
- raise "" if sym.nterm?
206
+ @precedences << precedence
107
207
  sym.precedence = precedence
108
208
  end
109
209
 
210
+ # @rbs (Grammar::Code::NoReferenceCode code, Integer lineno) -> Union
110
211
  def set_union(code, lineno)
111
212
  @union = Union.new(code: code, lineno: lineno)
112
213
  end
113
214
 
215
+ # @rbs (RuleBuilder builder) -> Array[RuleBuilder]
114
216
  def add_rule_builder(builder)
115
217
  @rule_builders << builder
116
218
  end
117
219
 
118
- def add_parameterizing_rule(rule)
119
- @parameterizing_rule_resolver.add_parameterizing_rule(rule)
220
+ # @rbs (Parameterized::Rule rule) -> Array[Parameterized::Rule]
221
+ def add_parameterized_rule(rule)
222
+ @parameterized_resolver.add_rule(rule)
120
223
  end
121
224
 
122
- def parameterizing_rules
123
- @parameterizing_rule_resolver.rules
225
+ # @rbs () -> Array[Parameterized::Rule]
226
+ def parameterized_rules
227
+ @parameterized_resolver.rules
124
228
  end
125
229
 
126
- def insert_before_parameterizing_rules(rules)
127
- @parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules
230
+ # @rbs (Array[Parameterized::Rule] rules) -> Array[Parameterized::Rule]
231
+ def prepend_parameterized_rules(rules)
232
+ @parameterized_resolver.rules = rules + @parameterized_resolver.rules
128
233
  end
129
234
 
235
+ # @rbs (Integer prologue_first_lineno) -> Integer
130
236
  def prologue_first_lineno=(prologue_first_lineno)
131
237
  @aux.prologue_first_lineno = prologue_first_lineno
132
238
  end
133
239
 
240
+ # @rbs (String prologue) -> String
134
241
  def prologue=(prologue)
135
242
  @aux.prologue = prologue
136
243
  end
137
244
 
245
+ # @rbs (Integer epilogue_first_lineno) -> Integer
138
246
  def epilogue_first_lineno=(epilogue_first_lineno)
139
247
  @aux.epilogue_first_lineno = epilogue_first_lineno
140
248
  end
141
249
 
250
+ # @rbs (String epilogue) -> String
142
251
  def epilogue=(epilogue)
143
252
  @aux.epilogue = epilogue
144
253
  end
145
254
 
255
+ # @rbs () -> void
146
256
  def prepare
147
257
  resolve_inline_rules
148
258
  normalize_rules
@@ -151,6 +261,7 @@ module Lrama
151
261
  fill_default_precedence
152
262
  fill_symbols
153
263
  fill_sym_to_rules
264
+ sort_precedence
154
265
  compute_nullable
155
266
  compute_first_set
156
267
  set_locations
@@ -159,25 +270,51 @@ module Lrama
159
270
  # TODO: More validation methods
160
271
  #
161
272
  # * Validation for no_declared_type_reference
273
+ #
274
+ # @rbs () -> void
162
275
  def validate!
163
276
  @symbols_resolver.validate!
277
+ validate_no_precedence_for_nterm!
164
278
  validate_rule_lhs_is_nterm!
279
+ validate_duplicated_precedence!
165
280
  end
166
281
 
282
+ # @rbs (Grammar::Symbol sym) -> Array[Rule]
167
283
  def find_rules_by_symbol!(sym)
168
284
  find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
169
285
  end
170
286
 
287
+ # @rbs (Grammar::Symbol sym) -> Array[Rule]?
171
288
  def find_rules_by_symbol(sym)
172
289
  @sym_to_rules[sym.number]
173
290
  end
174
291
 
292
+ # @rbs (String s_value) -> Array[Rule]
293
+ def select_rules_by_s_value(s_value)
294
+ @rules.select {|rule| rule.lhs.id.s_value == s_value }
295
+ end
296
+
297
+ # @rbs () -> Array[String]
298
+ def unique_rule_s_values
299
+ @rules.map {|rule| rule.lhs.id.s_value }.uniq
300
+ end
301
+
302
+ # @rbs () -> bool
175
303
  def ielr_defined?
176
304
  @define.key?('lr.type') && @define['lr.type'] == 'ielr'
177
305
  end
178
306
 
179
307
  private
180
308
 
309
+ # @rbs () -> void
310
+ def sort_precedence
311
+ @precedences.sort_by! do |prec|
312
+ prec.symbol.number
313
+ end
314
+ @precedences.freeze
315
+ end
316
+
317
+ # @rbs () -> Array[Grammar::Symbol]
181
318
  def compute_nullable
182
319
  @rules.each do |rule|
183
320
  case
@@ -227,6 +364,7 @@ module Lrama
227
364
  end
228
365
  end
229
366
 
367
+ # @rbs () -> Array[Grammar::Symbol]
230
368
  def compute_first_set
231
369
  terms.each do |term|
232
370
  term.first_set = Set.new([term]).freeze
@@ -262,12 +400,14 @@ module Lrama
262
400
  end
263
401
  end
264
402
 
403
+ # @rbs () -> Array[RuleBuilder]
265
404
  def setup_rules
266
405
  @rule_builders.each do |builder|
267
406
  builder.setup_rules
268
407
  end
269
408
  end
270
409
 
410
+ # @rbs () -> Grammar::Symbol
271
411
  def append_special_symbols
272
412
  # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
273
413
  # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
@@ -298,11 +438,12 @@ module Lrama
298
438
  @accept_symbol = term
299
439
  end
300
440
 
441
+ # @rbs () -> void
301
442
  def resolve_inline_rules
302
443
  while @rule_builders.any?(&:has_inline_rules?) do
303
444
  @rule_builders = @rule_builders.flat_map do |builder|
304
445
  if builder.has_inline_rules?
305
- builder.resolve_inline_rules
446
+ Inline::Resolver.new(builder).resolve
306
447
  else
307
448
  builder
308
449
  end
@@ -310,14 +451,10 @@ module Lrama
310
451
  end
311
452
  end
312
453
 
454
+ # @rbs () -> void
313
455
  def normalize_rules
314
- # Add $accept rule to the top of rules
315
- rule_builder = @rule_builders.first # : RuleBuilder
316
- lineno = rule_builder ? rule_builder.line : 0
317
- @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [rule_builder.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
318
-
456
+ add_accept_rule
319
457
  setup_rules
320
-
321
458
  @rule_builders.each do |builder|
322
459
  builder.rules.each do |rule|
323
460
  add_nterm(id: rule._lhs, tag: rule.lhs_tag)
@@ -325,23 +462,42 @@ module Lrama
325
462
  end
326
463
  end
327
464
 
328
- @rules.sort_by!(&:id)
465
+ nterms.freeze
466
+ @rules.sort_by!(&:id).freeze
467
+ end
468
+
469
+ # Add $accept rule to the top of rules
470
+ def add_accept_rule
471
+ if @start_nterm
472
+ start = @start_nterm #: Lrama::Lexer::Token::Base
473
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [start, @eof_symbol.id], token_code: nil, lineno: start.line)
474
+ else
475
+ rule_builder = @rule_builders.first #: RuleBuilder
476
+ lineno = rule_builder ? rule_builder.line : 0
477
+ lhs = rule_builder.lhs #: Lexer::Token::Base
478
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
479
+ end
329
480
  end
330
481
 
331
482
  # Collect symbols from rules
483
+ #
484
+ # @rbs () -> void
332
485
  def collect_symbols
333
486
  @rules.flat_map(&:_rhs).each do |s|
334
487
  case s
335
488
  when Lrama::Lexer::Token::Char
336
489
  add_term(id: s)
337
- when Lrama::Lexer::Token
490
+ when Lrama::Lexer::Token::Base
338
491
  # skip
339
492
  else
340
493
  raise "Unknown class: #{s}"
341
494
  end
342
495
  end
496
+
497
+ terms.freeze
343
498
  end
344
499
 
500
+ # @rbs () -> void
345
501
  def set_lhs_and_rhs
346
502
  @rules.each do |rule|
347
503
  rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
@@ -355,6 +511,8 @@ module Lrama
355
511
  # Rule inherits precedence from the last term in RHS.
356
512
  #
357
513
  # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
514
+ #
515
+ # @rbs () -> void
358
516
  def fill_default_precedence
359
517
  @rules.each do |rule|
360
518
  # Explicitly specified precedence has the highest priority
@@ -369,6 +527,7 @@ module Lrama
369
527
  end
370
528
  end
371
529
 
530
+ # @rbs () -> Array[Grammar::Symbol]
372
531
  def fill_symbols
373
532
  fill_symbol_number
374
533
  fill_nterm_type(@types)
@@ -378,6 +537,7 @@ module Lrama
378
537
  sort_by_number!
379
538
  end
380
539
 
540
+ # @rbs () -> Array[Rule]
381
541
  def fill_sym_to_rules
382
542
  @rules.each do |rule|
383
543
  key = rule.lhs.number
@@ -386,13 +546,48 @@ module Lrama
386
546
  end
387
547
  end
388
548
 
549
+ # @rbs () -> void
550
+ def validate_no_precedence_for_nterm!
551
+ errors = [] #: Array[String]
552
+
553
+ nterms.each do |nterm|
554
+ next if nterm.precedence.nil?
555
+
556
+ errors << "[BUG] Precedence #{nterm.name} (line: #{nterm.precedence.lineno}) is defined for nonterminal symbol (line: #{nterm.id.first_line}). Precedence can be defined for only terminal symbol."
557
+ end
558
+
559
+ return if errors.empty?
560
+
561
+ raise errors.join("\n")
562
+ end
563
+
564
+ # @rbs () -> void
389
565
  def validate_rule_lhs_is_nterm!
390
566
  errors = [] #: Array[String]
391
567
 
392
568
  rules.each do |rule|
393
569
  next if rule.lhs.nterm?
394
570
 
395
- errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is term. It should be nterm."
571
+ errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is terminal symbol. It should be nonterminal symbol."
572
+ end
573
+
574
+ return if errors.empty?
575
+
576
+ raise errors.join("\n")
577
+ end
578
+
579
+ # # @rbs () -> void
580
+ def validate_duplicated_precedence!
581
+ errors = [] #: Array[String]
582
+ seen = {} #: Hash[String, Precedence]
583
+
584
+ precedences.each do |prec|
585
+ s_value = prec.s_value
586
+ if first = seen[s_value]
587
+ errors << "%#{prec.type} redeclaration for #{s_value} (line: #{prec.lineno}) previous declaration was %#{first.type} (line: #{first.lineno})"
588
+ else
589
+ seen[s_value] = prec
590
+ end
396
591
  end
397
592
 
398
593
  return if errors.empty?
@@ -400,6 +595,7 @@ module Lrama
400
595
  raise errors.join("\n")
401
596
  end
402
597
 
598
+ # @rbs () -> void
403
599
  def set_locations
404
600
  @locations = @locations || @rules.any? {|rule| rule.contains_at_reference? }
405
601
  end
@@ -69,15 +69,15 @@ module Lrama
69
69
  def generate_error_message(error_message)
70
70
  <<~ERROR.chomp
71
71
  #{path}:#{first_line}:#{first_column}: #{error_message}
72
- #{line_with_carets}
72
+ #{error_with_carets}
73
73
  ERROR
74
74
  end
75
75
 
76
76
  # @rbs () -> String
77
- def line_with_carets
77
+ def error_with_carets
78
78
  <<~TEXT
79
- #{text}
80
- #{carets}
79
+ #{formatted_first_lineno} | #{text}
80
+ #{line_number_padding} | #{carets_line}
81
81
  TEXT
82
82
  end
83
83
 
@@ -89,13 +89,30 @@ module Lrama
89
89
  end
90
90
 
91
91
  # @rbs () -> String
92
- def blanks
93
- (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
92
+ def carets_line
93
+ leading_whitespace + highlight_marker
94
94
  end
95
95
 
96
96
  # @rbs () -> String
97
- def carets
98
- blanks + '^' * (last_column - first_column)
97
+ def leading_whitespace
98
+ (text[0...first_column] or raise "Invalid first_column: #{first_column}")
99
+ .gsub(/[^\t]/, ' ')
100
+ end
101
+
102
+ # @rbs () -> String
103
+ def highlight_marker
104
+ length = last_column - first_column
105
+ '^' + '~' * [0, length - 1].max
106
+ end
107
+
108
+ # @rbs () -> String
109
+ def formatted_first_lineno
110
+ first_line.to_s.rjust(4)
111
+ end
112
+
113
+ # @rbs () -> String
114
+ def line_number_padding
115
+ ' ' * formatted_first_lineno.length
99
116
  end
100
117
 
101
118
  # @rbs () -> String
@@ -0,0 +1,73 @@
1
+ # rbs_inline: enabled
2
+ # frozen_string_literal: true
3
+
4
+ module Lrama
5
+ class Lexer
6
+ module Token
7
+ class Base
8
+ attr_reader :s_value #: String
9
+ attr_reader :location #: Location
10
+ attr_accessor :alias_name #: String
11
+ attr_accessor :referred #: bool
12
+ attr_reader :errors #: Array[String]
13
+
14
+ # @rbs (s_value: String, ?alias_name: String, ?location: Location) -> void
15
+ def initialize(s_value:, alias_name: nil, location: nil)
16
+ s_value.freeze
17
+ @s_value = s_value
18
+ @alias_name = alias_name
19
+ @location = location
20
+ @errors = []
21
+ end
22
+
23
+ # @rbs () -> String
24
+ def to_s
25
+ "value: `#{s_value}`, location: #{location}"
26
+ end
27
+
28
+ # @rbs (String string) -> bool
29
+ def referred_by?(string)
30
+ [self.s_value, self.alias_name].compact.include?(string)
31
+ end
32
+
33
+ # @rbs (Lexer::Token::Base other) -> bool
34
+ def ==(other)
35
+ self.class == other.class && self.s_value == other.s_value
36
+ end
37
+
38
+ # @rbs () -> Integer
39
+ def first_line
40
+ location.first_line
41
+ end
42
+ alias :line :first_line
43
+
44
+ # @rbs () -> Integer
45
+ def first_column
46
+ location.first_column
47
+ end
48
+ alias :column :first_column
49
+
50
+ # @rbs () -> Integer
51
+ def last_line
52
+ location.last_line
53
+ end
54
+
55
+ # @rbs () -> Integer
56
+ def last_column
57
+ location.last_column
58
+ end
59
+
60
+ # @rbs (Lrama::Grammar::Reference ref, String message) -> bot
61
+ def invalid_ref(ref, message)
62
+ location = self.location.partial_location(ref.first_column, ref.last_column)
63
+ raise location.generate_error_message(message)
64
+ end
65
+
66
+ # @rbs () -> bool
67
+ def validate
68
+ true
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -3,8 +3,21 @@
3
3
 
4
4
  module Lrama
5
5
  class Lexer
6
- class Token
7
- class Char < Token
6
+ module Token
7
+ class Char < Base
8
+ # @rbs () -> void
9
+ def validate
10
+ validate_ascii_code_range
11
+ end
12
+
13
+ private
14
+
15
+ # @rbs () -> void
16
+ def validate_ascii_code_range
17
+ unless s_value.ascii_only?
18
+ errors << "Invalid character: `#{s_value}`. Only ASCII characters are allowed."
19
+ end
20
+ end
8
21
  end
9
22
  end
10
23
  end
@@ -0,0 +1,14 @@
1
+ # rbs_inline: enabled
2
+ # frozen_string_literal: true
3
+
4
+ module Lrama
5
+ class Lexer
6
+ module Token
7
+ class Empty < Base
8
+ def initialize(location: nil)
9
+ super(s_value: '%empty', location: location)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -3,8 +3,8 @@
3
3
 
4
4
  module Lrama
5
5
  class Lexer
6
- class Token
7
- class Ident < Token
6
+ module Token
7
+ class Ident < Base
8
8
  end
9
9
  end
10
10
  end