lrama 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +2 -0
  3. data/.github/workflows/codespell.yaml +1 -1
  4. data/.github/workflows/gh-pages.yml +5 -6
  5. data/.github/workflows/test.yaml +25 -14
  6. data/Gemfile +4 -3
  7. data/NEWS.md +370 -35
  8. data/README.md +7 -88
  9. data/Rakefile +3 -2
  10. data/Steepfile +11 -5
  11. data/doc/Index.md +1 -1
  12. data/doc/development/compressed_state_table/parser.rb +2 -0
  13. data/doc/development/profiling.md +44 -0
  14. data/exe/lrama +1 -1
  15. data/lib/lrama/bitmap.rb +18 -5
  16. data/lib/lrama/command.rb +95 -43
  17. data/lib/lrama/context.rb +22 -24
  18. data/lib/lrama/counterexamples/derivation.rb +14 -4
  19. data/lib/lrama/counterexamples/example.rb +47 -22
  20. data/lib/lrama/counterexamples/node.rb +30 -0
  21. data/lib/lrama/counterexamples/path.rb +12 -14
  22. data/lib/lrama/counterexamples/state_item.rb +24 -1
  23. data/lib/lrama/counterexamples/triple.rb +27 -9
  24. data/lib/lrama/counterexamples.rb +216 -88
  25. data/lib/lrama/diagram.rb +77 -0
  26. data/lib/lrama/digraph.rb +28 -7
  27. data/lib/lrama/erb.rb +29 -0
  28. data/lib/lrama/grammar/auxiliary.rb +6 -1
  29. data/lib/lrama/grammar/binding.rb +37 -25
  30. data/lib/lrama/grammar/code/destructor_code.rb +11 -0
  31. data/lib/lrama/grammar/code/initial_action_code.rb +3 -0
  32. data/lib/lrama/grammar/code/no_reference_code.rb +3 -0
  33. data/lib/lrama/grammar/code/printer_code.rb +11 -0
  34. data/lib/lrama/grammar/code/rule_action.rb +17 -0
  35. data/lib/lrama/grammar/code.rb +16 -1
  36. data/lib/lrama/grammar/counter.rb +10 -0
  37. data/lib/lrama/grammar/destructor.rb +14 -1
  38. data/lib/lrama/grammar/error_token.rb +14 -1
  39. data/lib/lrama/grammar/inline/resolver.rb +80 -0
  40. data/lib/lrama/grammar/inline.rb +3 -0
  41. data/lib/lrama/grammar/{parameterizing_rule → parameterized}/resolver.rb +19 -8
  42. data/lib/lrama/grammar/{parameterizing_rule → parameterized}/rhs.rb +7 -2
  43. data/lib/lrama/grammar/parameterized/rule.rb +36 -0
  44. data/lib/lrama/grammar/parameterized.rb +5 -0
  45. data/lib/lrama/grammar/percent_code.rb +12 -1
  46. data/lib/lrama/grammar/precedence.rb +43 -1
  47. data/lib/lrama/grammar/printer.rb +9 -0
  48. data/lib/lrama/grammar/reference.rb +13 -0
  49. data/lib/lrama/grammar/rule.rb +61 -1
  50. data/lib/lrama/grammar/rule_builder.rb +84 -69
  51. data/lib/lrama/grammar/stdlib.y +68 -48
  52. data/lib/lrama/grammar/symbol.rb +63 -19
  53. data/lib/lrama/grammar/symbols/resolver.rb +64 -3
  54. data/lib/lrama/grammar/type.rb +13 -1
  55. data/lib/lrama/grammar/union.rb +12 -1
  56. data/lib/lrama/grammar.rb +231 -35
  57. data/lib/lrama/lexer/location.rb +25 -8
  58. data/lib/lrama/lexer/token/base.rb +73 -0
  59. data/lib/lrama/lexer/token/char.rb +15 -2
  60. data/lib/lrama/lexer/token/empty.rb +14 -0
  61. data/lib/lrama/lexer/token/ident.rb +2 -2
  62. data/lib/lrama/lexer/token/instantiate_rule.rb +4 -4
  63. data/lib/lrama/lexer/token/int.rb +14 -0
  64. data/lib/lrama/lexer/token/str.rb +11 -0
  65. data/lib/lrama/lexer/token/tag.rb +2 -2
  66. data/lib/lrama/lexer/token/token.rb +11 -0
  67. data/lib/lrama/lexer/token/user_code.rb +63 -37
  68. data/lib/lrama/lexer/token.rb +6 -56
  69. data/lib/lrama/lexer.rb +51 -23
  70. data/lib/lrama/logger.rb +12 -2
  71. data/lib/lrama/option_parser.rb +63 -9
  72. data/lib/lrama/options.rb +25 -7
  73. data/lib/lrama/output.rb +4 -11
  74. data/lib/lrama/parser.rb +854 -723
  75. data/lib/lrama/reporter/conflicts.rb +44 -0
  76. data/lib/lrama/reporter/grammar.rb +39 -0
  77. data/lib/lrama/reporter/precedences.rb +54 -0
  78. data/lib/lrama/reporter/profile/call_stack.rb +45 -0
  79. data/lib/lrama/reporter/profile/memory.rb +44 -0
  80. data/lib/lrama/reporter/profile.rb +4 -0
  81. data/lib/lrama/reporter/rules.rb +43 -0
  82. data/lib/lrama/reporter/states.rb +387 -0
  83. data/lib/lrama/reporter/terms.rb +44 -0
  84. data/lib/lrama/reporter.rb +39 -0
  85. data/lib/lrama/state/action/goto.rb +33 -0
  86. data/lib/lrama/state/action/reduce.rb +71 -0
  87. data/lib/lrama/state/action/shift.rb +39 -0
  88. data/lib/lrama/state/action.rb +5 -0
  89. data/lib/lrama/state/inadequacy_annotation.rb +140 -0
  90. data/lib/lrama/{states → state}/item.rb +33 -4
  91. data/lib/lrama/state/reduce_reduce_conflict.rb +14 -1
  92. data/lib/lrama/state/resolved_conflict.rb +38 -4
  93. data/lib/lrama/state/shift_reduce_conflict.rb +14 -1
  94. data/lib/lrama/state.rb +301 -200
  95. data/lib/lrama/states.rb +447 -175
  96. data/lib/lrama/tracer/actions.rb +22 -0
  97. data/lib/lrama/tracer/closure.rb +30 -0
  98. data/lib/lrama/tracer/duration.rb +38 -0
  99. data/lib/lrama/tracer/only_explicit_rules.rb +24 -0
  100. data/lib/lrama/tracer/rules.rb +23 -0
  101. data/lib/lrama/tracer/state.rb +33 -0
  102. data/lib/lrama/tracer.rb +51 -0
  103. data/lib/lrama/version.rb +2 -1
  104. data/lib/lrama/warnings/conflicts.rb +27 -0
  105. data/lib/lrama/warnings/implicit_empty.rb +29 -0
  106. data/lib/lrama/warnings/name_conflicts.rb +63 -0
  107. data/lib/lrama/warnings/redefined_rules.rb +23 -0
  108. data/lib/lrama/warnings/required.rb +23 -0
  109. data/lib/lrama/warnings/useless_precedence.rb +25 -0
  110. data/lib/lrama/warnings.rb +33 -0
  111. data/lib/lrama.rb +5 -5
  112. data/parser.y +495 -404
  113. data/rbs_collection.lock.yaml +27 -3
  114. data/rbs_collection.yaml +2 -0
  115. data/sig/generated/lrama/bitmap.rbs +12 -4
  116. data/sig/generated/lrama/counterexamples/derivation.rbs +36 -0
  117. data/sig/generated/lrama/counterexamples/example.rbs +58 -0
  118. data/sig/generated/lrama/counterexamples/node.rbs +18 -0
  119. data/sig/generated/lrama/counterexamples/path.rbs +23 -0
  120. data/sig/generated/lrama/counterexamples/state_item.rbs +19 -0
  121. data/sig/generated/lrama/counterexamples/triple.rbs +32 -0
  122. data/sig/generated/lrama/counterexamples.rbs +98 -0
  123. data/sig/generated/lrama/diagram.rbs +34 -0
  124. data/sig/generated/lrama/digraph.rbs +26 -6
  125. data/sig/generated/lrama/erb.rbs +14 -0
  126. data/sig/generated/lrama/grammar/auxiliary.rbs +16 -0
  127. data/sig/generated/lrama/grammar/binding.rbs +18 -12
  128. data/sig/generated/lrama/grammar/code/destructor_code.rbs +26 -0
  129. data/sig/{lrama → generated/lrama}/grammar/code/initial_action_code.rbs +6 -0
  130. data/sig/{lrama → generated/lrama}/grammar/code/no_reference_code.rbs +6 -0
  131. data/sig/generated/lrama/grammar/code/printer_code.rbs +26 -0
  132. data/sig/generated/lrama/grammar/code/rule_action.rbs +63 -0
  133. data/sig/generated/lrama/grammar/code.rbs +38 -0
  134. data/sig/{lrama → generated/lrama}/grammar/counter.rbs +4 -0
  135. data/sig/generated/lrama/grammar/destructor.rbs +19 -0
  136. data/sig/generated/lrama/grammar/error_token.rbs +19 -0
  137. data/sig/generated/lrama/grammar/inline/resolver.rbs +26 -0
  138. data/sig/generated/lrama/grammar/parameterized/resolver.rbs +42 -0
  139. data/sig/generated/lrama/grammar/parameterized/rhs.rbs +21 -0
  140. data/sig/generated/lrama/grammar/parameterized/rule.rbs +28 -0
  141. data/sig/{lrama → generated/lrama}/grammar/percent_code.rbs +8 -0
  142. data/sig/generated/lrama/grammar/precedence.rbs +45 -0
  143. data/sig/{lrama/grammar/error_token.rbs → generated/lrama/grammar/printer.rbs} +8 -3
  144. data/sig/generated/lrama/grammar/reference.rbs +31 -0
  145. data/sig/generated/lrama/grammar/rule.rbs +83 -0
  146. data/sig/generated/lrama/grammar/rule_builder.rbs +91 -0
  147. data/sig/generated/lrama/grammar/symbol.rbs +89 -0
  148. data/sig/generated/lrama/grammar/symbols/resolver.rbs +131 -0
  149. data/sig/generated/lrama/grammar/type.rbs +21 -0
  150. data/sig/generated/lrama/grammar/union.rbs +17 -0
  151. data/sig/generated/lrama/grammar.rbs +289 -0
  152. data/sig/generated/lrama/lexer/location.rbs +12 -3
  153. data/sig/generated/lrama/lexer/token/base.rbs +53 -0
  154. data/sig/generated/lrama/lexer/token/char.rbs +9 -2
  155. data/sig/generated/lrama/lexer/token/empty.rbs +11 -0
  156. data/sig/generated/lrama/lexer/token/ident.rbs +2 -2
  157. data/sig/generated/lrama/lexer/token/instantiate_rule.rbs +5 -5
  158. data/sig/generated/lrama/lexer/token/int.rbs +13 -0
  159. data/sig/generated/lrama/lexer/token/str.rbs +10 -0
  160. data/sig/generated/lrama/lexer/token/tag.rbs +2 -2
  161. data/sig/generated/lrama/lexer/token/token.rbs +10 -0
  162. data/sig/generated/lrama/lexer/token/user_code.rbs +2 -2
  163. data/sig/generated/lrama/lexer/token.rbs +1 -39
  164. data/sig/generated/lrama/lexer.rbs +54 -0
  165. data/sig/generated/lrama/logger.rbs +6 -0
  166. data/sig/generated/lrama/option_parser.rbs +52 -0
  167. data/sig/{lrama → generated/lrama}/options.rbs +27 -3
  168. data/sig/generated/lrama/reporter/conflicts.rbs +18 -0
  169. data/sig/generated/lrama/reporter/grammar.rbs +13 -0
  170. data/sig/generated/lrama/reporter/precedences.rbs +15 -0
  171. data/sig/generated/lrama/reporter/profile/call_stack.rbs +19 -0
  172. data/sig/generated/lrama/reporter/profile/memory.rbs +19 -0
  173. data/sig/generated/lrama/reporter/rules.rbs +13 -0
  174. data/sig/generated/lrama/reporter/states.rbs +69 -0
  175. data/sig/generated/lrama/reporter/terms.rbs +13 -0
  176. data/sig/generated/lrama/reporter.rbs +13 -0
  177. data/sig/generated/lrama/state/action/goto.rbs +28 -0
  178. data/sig/generated/lrama/state/action/reduce.rbs +49 -0
  179. data/sig/generated/lrama/state/action/shift.rbs +33 -0
  180. data/sig/generated/lrama/state/inadequacy_annotation.rbs +45 -0
  181. data/sig/generated/lrama/state/item.rbs +75 -0
  182. data/sig/generated/lrama/state/reduce_reduce_conflict.rbs +19 -0
  183. data/sig/generated/lrama/state/resolved_conflict.rbs +38 -0
  184. data/sig/generated/lrama/state/shift_reduce_conflict.rbs +19 -0
  185. data/sig/generated/lrama/state.rbs +231 -0
  186. data/sig/generated/lrama/states.rbs +215 -0
  187. data/sig/generated/lrama/tracer/actions.rbs +13 -0
  188. data/sig/generated/lrama/tracer/closure.rbs +13 -0
  189. data/sig/generated/lrama/tracer/duration.rbs +18 -0
  190. data/sig/generated/lrama/tracer/only_explicit_rules.rbs +13 -0
  191. data/sig/generated/lrama/tracer/rules.rbs +13 -0
  192. data/sig/generated/lrama/tracer/state.rbs +16 -0
  193. data/sig/generated/lrama/tracer.rbs +23 -0
  194. data/sig/generated/lrama/version.rbs +5 -0
  195. data/sig/generated/lrama/warnings/conflicts.rbs +13 -0
  196. data/sig/generated/lrama/warnings/implicit_empty.rbs +17 -0
  197. data/sig/generated/lrama/warnings/name_conflicts.rbs +31 -0
  198. data/sig/generated/lrama/warnings/redefined_rules.rbs +13 -0
  199. data/sig/generated/lrama/warnings/required.rbs +13 -0
  200. data/sig/generated/lrama/warnings/useless_precedence.rbs +13 -0
  201. data/sig/generated/lrama/warnings.rbs +11 -0
  202. data/sig/railroad_diagrams/railroad_diagrams.rbs +16 -0
  203. data/template/bison/_yacc.h +8 -0
  204. data/template/diagram/diagram.html +102 -0
  205. metadata +126 -66
  206. data/lib/lrama/counterexamples/production_path.rb +0 -19
  207. data/lib/lrama/counterexamples/start_path.rb +0 -23
  208. data/lib/lrama/counterexamples/transition_path.rb +0 -19
  209. data/lib/lrama/diagnostics.rb +0 -36
  210. data/lib/lrama/grammar/parameterizing_rule/rule.rb +0 -24
  211. data/lib/lrama/grammar/parameterizing_rule.rb +0 -5
  212. data/lib/lrama/grammar_validator.rb +0 -37
  213. data/lib/lrama/report/duration.rb +0 -27
  214. data/lib/lrama/report/profile.rb +0 -16
  215. data/lib/lrama/report.rb +0 -4
  216. data/lib/lrama/state/reduce.rb +0 -37
  217. data/lib/lrama/state/shift.rb +0 -15
  218. data/lib/lrama/states_reporter.rb +0 -362
  219. data/lib/lrama/trace_reporter.rb +0 -45
  220. data/sig/generated/lrama/trace_reporter.rbs +0 -25
  221. data/sig/lrama/counterexamples/derivation.rbs +0 -33
  222. data/sig/lrama/counterexamples/example.rbs +0 -45
  223. data/sig/lrama/counterexamples/path.rbs +0 -21
  224. data/sig/lrama/counterexamples/production_path.rbs +0 -11
  225. data/sig/lrama/counterexamples/start_path.rbs +0 -13
  226. data/sig/lrama/counterexamples/state_item.rbs +0 -10
  227. data/sig/lrama/counterexamples/transition_path.rbs +0 -11
  228. data/sig/lrama/counterexamples/triple.rbs +0 -20
  229. data/sig/lrama/counterexamples.rbs +0 -29
  230. data/sig/lrama/grammar/auxiliary.rbs +0 -10
  231. data/sig/lrama/grammar/code/destructor_code.rbs +0 -14
  232. data/sig/lrama/grammar/code/printer_code.rbs +0 -14
  233. data/sig/lrama/grammar/code/rule_action.rbs +0 -19
  234. data/sig/lrama/grammar/code.rbs +0 -24
  235. data/sig/lrama/grammar/destructor.rbs +0 -13
  236. data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +0 -24
  237. data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +0 -14
  238. data/sig/lrama/grammar/parameterizing_rule/rule.rbs +0 -16
  239. data/sig/lrama/grammar/parameterizing_rule.rbs +0 -6
  240. data/sig/lrama/grammar/precedence.rbs +0 -13
  241. data/sig/lrama/grammar/printer.rbs +0 -13
  242. data/sig/lrama/grammar/reference.rbs +0 -22
  243. data/sig/lrama/grammar/rule.rbs +0 -45
  244. data/sig/lrama/grammar/rule_builder.rbs +0 -47
  245. data/sig/lrama/grammar/symbol.rbs +0 -38
  246. data/sig/lrama/grammar/symbols/resolver.rbs +0 -60
  247. data/sig/lrama/grammar/type.rbs +0 -11
  248. data/sig/lrama/grammar/union.rbs +0 -12
  249. data/sig/lrama/grammar.rbs +0 -108
  250. data/sig/lrama/report/duration.rbs +0 -11
  251. data/sig/lrama/report/profile.rbs +0 -7
  252. data/sig/lrama/state/reduce.rbs +0 -20
  253. data/sig/lrama/state/reduce_reduce_conflict.rbs +0 -13
  254. data/sig/lrama/state/resolved_conflict.rbs +0 -14
  255. data/sig/lrama/state/shift.rbs +0 -14
  256. data/sig/lrama/state/shift_reduce_conflict.rbs +0 -13
  257. data/sig/lrama/state.rbs +0 -79
  258. data/sig/lrama/states/item.rbs +0 -30
  259. data/sig/lrama/states.rbs +0 -101
  260. data/sig/lrama/warning.rbs +0 -16
@@ -3,12 +3,12 @@
3
3
 
4
4
  module Lrama
5
5
  class Lexer
6
- class Token
7
- class InstantiateRule < Token
8
- attr_reader :args #: Array[Lexer::Token]
6
+ module Token
7
+ class InstantiateRule < Base
8
+ attr_reader :args #: Array[Lexer::Token::Base]
9
9
  attr_reader :lhs_tag #: Lexer::Token::Tag?
10
10
 
11
- # @rbs (s_value: String, ?alias_name: String, ?location: Location, ?args: Array[Lexer::Token], ?lhs_tag: Lexer::Token::Tag?) -> void
11
+ # @rbs (s_value: String, ?alias_name: String, ?location: Location, ?args: Array[Lexer::Token::Base], ?lhs_tag: Lexer::Token::Tag?) -> void
12
12
  def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
13
13
  super s_value: s_value, alias_name: alias_name, location: location
14
14
  @args = args
@@ -0,0 +1,14 @@
1
+ # rbs_inline: enabled
2
+ # frozen_string_literal: true
3
+
4
+ module Lrama
5
+ class Lexer
6
+ module Token
7
+ class Int < Base
8
+ # @rbs!
9
+ # def initialize: (s_value: Integer, ?alias_name: String, ?location: Location) -> void
10
+ # def s_value: () -> Integer
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ # rbs_inline: enabled
2
+ # frozen_string_literal: true
3
+
4
+ module Lrama
5
+ class Lexer
6
+ module Token
7
+ class Str < Base
8
+ end
9
+ end
10
+ end
11
+ end
@@ -3,8 +3,8 @@
3
3
 
4
4
  module Lrama
5
5
  class Lexer
6
- class Token
7
- class Tag < Token
6
+ module Token
7
+ class Tag < Base
8
8
  # @rbs () -> String
9
9
  def member
10
10
  # Omit "<>"
@@ -0,0 +1,11 @@
1
+ # rbs_inline: enabled
2
+ # frozen_string_literal: true
3
+
4
+ module Lrama
5
+ class Lexer
6
+ module Token
7
+ class Token < Base
8
+ end
9
+ end
10
+ end
11
+ end
@@ -5,8 +5,8 @@ require "strscan"
5
5
 
6
6
  module Lrama
7
7
  class Lexer
8
- class Token
9
- class UserCode < Token
8
+ module Token
9
+ class UserCode < Base
10
10
  attr_accessor :tag #: Lexer::Token::Tag
11
11
 
12
12
  # @rbs () -> Array[Lrama::Grammar::Reference]
@@ -38,43 +38,69 @@ module Lrama
38
38
  # @rbs (StringScanner scanner) -> Lrama::Grammar::Reference?
39
39
  def scan_reference(scanner)
40
40
  start = scanner.pos
41
- case
42
- # $ references
43
- # It need to wrap an identifier with brackets to use ".-" for identifiers
44
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
45
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
46
- return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
47
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
48
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
49
- return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
50
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
51
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
52
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
53
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
54
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
55
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
56
-
57
- # @ references
58
- # It need to wrap an identifier with brackets to use ".-" for identifiers
59
- when scanner.scan(/@\$/) # @$
60
- return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
61
- when scanner.scan(/@(\d+)/) # @1
62
- return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
63
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
64
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
65
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
66
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
41
+ if scanner.scan(/
42
+ # $ references
43
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
44
+ \$(<[a-zA-Z0-9_]+>)?(?:
45
+ (\$) # $$, $<long>$
46
+ | (\d+) # $1, $2, $<long>1
47
+ | ([a-zA-Z_][a-zA-Z0-9_]*) # $foo, $expr, $<long>program (named reference without brackets)
48
+ | \[([a-zA-Z_.][-a-zA-Z0-9_.]*)\] # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
49
+ )
50
+ |
51
+ # @ references
52
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
53
+ @(?:
54
+ (\$) # @$
55
+ | (\d+) # @1
56
+ | ([a-zA-Z_][a-zA-Z0-9_]*) # @foo, @expr (named reference without brackets)
57
+ | \[([a-zA-Z_.][-a-zA-Z0-9_.]*)\] # @[expr.right], @[expr-right] (named reference with brackets)
58
+ )
59
+ |
60
+ # $: references
61
+ \$:
62
+ (?:
63
+ (\$) # $:$
64
+ | (\d+) # $:1
65
+ | ([a-zA-Z_][a-zA-Z0-9_]*) # $:foo, $:expr (named reference without brackets)
66
+ | \[([a-zA-Z_.][-a-zA-Z0-9_.]*)\] # $:[expr.right], $:[expr-right] (named reference with brackets)
67
+ )
68
+ /x)
69
+ case
70
+ # $ references
71
+ when scanner[2] # $$, $<long>$
72
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
73
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
74
+ when scanner[3] # $1, $2, $<long>1
75
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
76
+ return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[3]), index: Integer(scanner[3]), ex_tag: tag, first_column: start, last_column: scanner.pos)
77
+ when scanner[4] # $foo, $expr, $<long>program (named reference without brackets)
78
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
79
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[4], ex_tag: tag, first_column: start, last_column: scanner.pos)
80
+ when scanner[5] # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
81
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
82
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[5], ex_tag: tag, first_column: start, last_column: scanner.pos)
67
83
 
68
- # $: references
69
- when scanner.scan(/\$:\$/) # $:$
70
- return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
71
- when scanner.scan(/\$:(\d+)/) # $:1
72
- return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
73
- when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
74
- return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
75
- when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
76
- return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
84
+ # @ references
85
+ when scanner[6] # @$
86
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
87
+ when scanner[7] # @1
88
+ return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[7]), index: Integer(scanner[7]), first_column: start, last_column: scanner.pos)
89
+ when scanner[8] # @foo, @expr (named reference without brackets)
90
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[8], first_column: start, last_column: scanner.pos)
91
+ when scanner[9] # @[expr.right], @[expr-right] (named reference with brackets)
92
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[9], first_column: start, last_column: scanner.pos)
77
93
 
94
+ # $: references
95
+ when scanner[10] # $:$
96
+ return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
97
+ when scanner[11] # $:1
98
+ return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[11]), index: Integer(scanner[11]), first_column: start, last_column: scanner.pos)
99
+ when scanner[12] # $:foo, $:expr (named reference without brackets)
100
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[12], first_column: start, last_column: scanner.pos)
101
+ when scanner[13] # $:[expr.right], $:[expr-right] (named reference with brackets)
102
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[13], first_column: start, last_column: scanner.pos)
103
+ end
78
104
  end
79
105
  end
80
106
  end
@@ -1,70 +1,20 @@
1
1
  # rbs_inline: enabled
2
2
  # frozen_string_literal: true
3
3
 
4
+ require_relative 'token/base'
4
5
  require_relative 'token/char'
6
+ require_relative 'token/empty'
5
7
  require_relative 'token/ident'
6
8
  require_relative 'token/instantiate_rule'
9
+ require_relative 'token/int'
10
+ require_relative 'token/str'
7
11
  require_relative 'token/tag'
12
+ require_relative 'token/token'
8
13
  require_relative 'token/user_code'
9
14
 
10
15
  module Lrama
11
16
  class Lexer
12
- class Token
13
- attr_reader :s_value #: String
14
- attr_reader :location #: Location
15
- attr_accessor :alias_name #: String
16
- attr_accessor :referred #: bool
17
-
18
- # @rbs (s_value: String, ?alias_name: String, ?location: Location) -> void
19
- def initialize(s_value:, alias_name: nil, location: nil)
20
- s_value.freeze
21
- @s_value = s_value
22
- @alias_name = alias_name
23
- @location = location
24
- end
25
-
26
- # @rbs () -> String
27
- def to_s
28
- "value: `#{s_value}`, location: #{location}"
29
- end
30
-
31
- # @rbs (String string) -> bool
32
- def referred_by?(string)
33
- [self.s_value, self.alias_name].compact.include?(string)
34
- end
35
-
36
- # @rbs (Token other) -> bool
37
- def ==(other)
38
- self.class == other.class && self.s_value == other.s_value
39
- end
40
-
41
- # @rbs () -> Integer
42
- def first_line
43
- location.first_line
44
- end
45
- alias :line :first_line
46
-
47
- # @rbs () -> Integer
48
- def first_column
49
- location.first_column
50
- end
51
- alias :column :first_column
52
-
53
- # @rbs () -> Integer
54
- def last_line
55
- location.last_line
56
- end
57
-
58
- # @rbs () -> Integer
59
- def last_column
60
- location.last_column
61
- end
62
-
63
- # @rbs (Lrama::Grammar::Reference ref, String message) -> bot
64
- def invalid_ref(ref, message)
65
- location = self.location.partial_location(ref.first_column, ref.last_column)
66
- raise location.generate_error_message(message)
67
- end
17
+ module Token
68
18
  end
69
19
  end
70
20
  end
data/lib/lrama/lexer.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # rbs_inline: enabled
1
2
  # frozen_string_literal: true
2
3
 
3
4
  require "strscan"
@@ -8,10 +9,26 @@ require_relative "lexer/token"
8
9
 
9
10
  module Lrama
10
11
  class Lexer
11
- attr_reader :head_line, :head_column, :line
12
- attr_accessor :status, :end_symbol
13
-
14
- SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze
12
+ # @rbs!
13
+ #
14
+ # type token = lexer_token | c_token
15
+ #
16
+ # type lexer_token = [String, Token::Token] |
17
+ # [::Symbol, Token::Tag] |
18
+ # [::Symbol, Token::Char] |
19
+ # [::Symbol, Token::Str] |
20
+ # [::Symbol, Token::Int] |
21
+ # [::Symbol, Token::Ident]
22
+ #
23
+ # type c_token = [:C_DECLARATION, Token::UserCode]
24
+
25
+ attr_reader :head_line #: Integer
26
+ attr_reader :head_column #: Integer
27
+ attr_reader :line #: Integer
28
+ attr_accessor :status #: :initial | :c_declaration
29
+ attr_accessor :end_symbol #: String?
30
+
31
+ SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze #: Array[String]
15
32
  PERCENT_TOKENS = %w(
16
33
  %union
17
34
  %token
@@ -42,8 +59,11 @@ module Lrama
42
59
  %no-stdlib
43
60
  %inline
44
61
  %locations
45
- ).freeze
62
+ %categories
63
+ %start
64
+ ).freeze #: Array[String]
46
65
 
66
+ # @rbs (GrammarFile grammar_file) -> void
47
67
  def initialize(grammar_file)
48
68
  @grammar_file = grammar_file
49
69
  @scanner = StringScanner.new(grammar_file.text)
@@ -53,6 +73,7 @@ module Lrama
53
73
  @end_symbol = nil
54
74
  end
55
75
 
76
+ # @rbs () -> token?
56
77
  def next_token
57
78
  case @status
58
79
  when :initial
@@ -62,10 +83,12 @@ module Lrama
62
83
  end
63
84
  end
64
85
 
86
+ # @rbs () -> Integer
65
87
  def column
66
88
  @scanner.pos - @head
67
89
  end
68
90
 
91
+ # @rbs () -> Location
69
92
  def location
70
93
  Location.new(
71
94
  grammar_file: @grammar_file,
@@ -74,13 +97,14 @@ module Lrama
74
97
  )
75
98
  end
76
99
 
100
+ # @rbs () -> lexer_token?
77
101
  def lex_token
78
102
  until @scanner.eos? do
79
103
  case
80
104
  when @scanner.scan(/\n/)
81
105
  newline
82
106
  when @scanner.scan(/\s+/)
83
- # noop
107
+ @scanner.matched.count("\n").times { newline }
84
108
  when @scanner.scan(/\/\*/)
85
109
  lex_comment
86
110
  when @scanner.scan(/\/\/.*(?<newline>\n)?/)
@@ -96,11 +120,11 @@ module Lrama
96
120
  when @scanner.eos?
97
121
  return
98
122
  when @scanner.scan(/#{SYMBOLS.join('|')}/)
99
- return [@scanner.matched, @scanner.matched]
123
+ return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
100
124
  when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
101
- return [@scanner.matched, @scanner.matched]
125
+ return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
102
126
  when @scanner.scan(/[\?\+\*]/)
103
- return [@scanner.matched, @scanner.matched]
127
+ return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
104
128
  when @scanner.scan(/<\w+>/)
105
129
  return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)]
106
130
  when @scanner.scan(/'.'/)
@@ -108,9 +132,9 @@ module Lrama
108
132
  when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
109
133
  return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
110
134
  when @scanner.scan(/".*?"/)
111
- return [:STRING, %Q(#{@scanner.matched})]
135
+ return [:STRING, Lrama::Lexer::Token::Str.new(s_value: %Q(#{@scanner.matched}), location: location)]
112
136
  when @scanner.scan(/\d+/)
113
- return [:INTEGER, Integer(@scanner.matched)]
137
+ return [:INTEGER, Lrama::Lexer::Token::Int.new(s_value: Integer(@scanner.matched), location: location)]
114
138
  when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
115
139
  token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location)
116
140
  type =
@@ -121,51 +145,53 @@ module Lrama
121
145
  end
122
146
  return [type, token]
123
147
  else
124
- raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}."
148
+ raise ParseError, location.generate_error_message("Unexpected token") # steep:ignore UnknownConstant
125
149
  end
126
150
  end
127
151
 
152
+ # @rbs () -> c_token
128
153
  def lex_c_code
129
154
  nested = 0
130
- code = ''
155
+ code = +''
131
156
  reset_first_position
132
157
 
133
158
  until @scanner.eos? do
134
159
  case
135
160
  when @scanner.scan(/{/)
136
- code += @scanner.matched
161
+ code << @scanner.matched
137
162
  nested += 1
138
163
  when @scanner.scan(/}/)
139
164
  if nested == 0 && @end_symbol == '}'
140
165
  @scanner.unscan
141
166
  return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
142
167
  else
143
- code += @scanner.matched
168
+ code << @scanner.matched
144
169
  nested -= 1
145
170
  end
146
171
  when @scanner.check(/#{@end_symbol}/)
147
172
  return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
148
173
  when @scanner.scan(/\n/)
149
- code += @scanner.matched
174
+ code << @scanner.matched
150
175
  newline
151
176
  when @scanner.scan(/".*?"/)
152
- code += %Q(#{@scanner.matched})
177
+ code << %Q(#{@scanner.matched})
153
178
  @line += @scanner.matched.count("\n")
154
179
  when @scanner.scan(/'.*?'/)
155
- code += %Q(#{@scanner.matched})
180
+ code << %Q(#{@scanner.matched})
156
181
  when @scanner.scan(/[^\"'\{\}\n]+/)
157
- code += @scanner.matched
158
- when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
159
- code += @scanner.matched
182
+ code << @scanner.matched
183
+ when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) # steep:ignore
184
+ code << @scanner.matched
160
185
  else
161
- code += @scanner.getch
186
+ code << @scanner.getch
162
187
  end
163
188
  end
164
- raise ParseError, "Unexpected code: #{code}."
189
+ raise ParseError, location.generate_error_message("Unexpected code: #{code}") # steep:ignore UnknownConstant
165
190
  end
166
191
 
167
192
  private
168
193
 
194
+ # @rbs () -> void
169
195
  def lex_comment
170
196
  until @scanner.eos? do
171
197
  case
@@ -178,11 +204,13 @@ module Lrama
178
204
  end
179
205
  end
180
206
 
207
+ # @rbs () -> void
181
208
  def reset_first_position
182
209
  @head_line = line
183
210
  @head_column = column
184
211
  end
185
212
 
213
+ # @rbs () -> void
186
214
  def newline
187
215
  @line += 1
188
216
  @head = @scanner.pos
data/lib/lrama/logger.rb CHANGED
@@ -8,14 +8,24 @@ module Lrama
8
8
  @out = out
9
9
  end
10
10
 
11
+ # @rbs () -> void
12
+ def line_break
13
+ @out << "\n"
14
+ end
15
+
11
16
  # @rbs (String message) -> void
12
- def warn(message)
17
+ def trace(message)
13
18
  @out << message << "\n"
14
19
  end
15
20
 
21
+ # @rbs (String message) -> void
22
+ def warn(message)
23
+ @out << 'warning: ' << message << "\n"
24
+ end
25
+
16
26
  # @rbs (String message) -> void
17
27
  def error(message)
18
- @out << message << "\n"
28
+ @out << 'error: ' << message << "\n"
19
29
  end
20
30
  end
21
31
  end
@@ -1,3 +1,4 @@
1
+ # rbs_inline: enabled
1
2
  # frozen_string_literal: true
2
3
 
3
4
  require 'optparse'
@@ -5,17 +6,32 @@ require 'optparse'
5
6
  module Lrama
6
7
  # Handle option parsing for the command line interface.
7
8
  class OptionParser
9
+ # @rbs!
10
+ # @options: Lrama::Options
11
+ # @trace: Array[String]
12
+ # @report: Array[String]
13
+ # @profile: Array[String]
14
+
15
+ # @rbs (Array[String]) -> Lrama::Options
16
+ def self.parse(argv)
17
+ new.parse(argv)
18
+ end
19
+
20
+ # @rbs () -> void
8
21
  def initialize
9
22
  @options = Options.new
10
23
  @trace = []
11
24
  @report = []
25
+ @profile = []
12
26
  end
13
27
 
28
+ # @rbs (Array[String]) -> Lrama::Options
14
29
  def parse(argv)
15
30
  parse_by_option_parser(argv)
16
31
 
17
32
  @options.trace_opts = validate_trace(@trace)
18
33
  @options.report_opts = validate_report(@report)
34
+ @options.profile_opts = validate_profile(@profile)
19
35
  @options.grammar_file = argv.shift
20
36
 
21
37
  unless @options.grammar_file
@@ -46,6 +62,7 @@ module Lrama
46
62
 
47
63
  private
48
64
 
65
+ # @rbs (Array[String]) -> void
49
66
  def parse_by_option_parser(argv)
50
67
  ::OptionParser.new do |o|
51
68
  o.banner = <<~BANNER
@@ -60,7 +77,14 @@ module Lrama
60
77
  o.separator 'Tuning the Parser:'
61
78
  o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
62
79
  o.on('-t', '--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true }
63
- o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v }
80
+ o.separator " same as '-Dparse.trace'"
81
+ o.on('--locations', 'enable location support') {|v| @options.locations = true }
82
+ o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") do |v|
83
+ @options.define = v.each_with_object({}) do |item, hash| # steep:ignore UnannotatedEmptyCollection
84
+ key, value = item.split('=', 2)
85
+ hash[key] = value
86
+ end
87
+ end
64
88
  o.separator ''
65
89
  o.separator 'Output:'
66
90
  o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
@@ -91,10 +115,19 @@ module Lrama
91
115
  o.on_tail ' time display generation time'
92
116
  o.on_tail ' all include all the above traces'
93
117
  o.on_tail ' none disable all traces'
118
+ o.on('--diagram=[FILE]', 'generate a diagram of the rules') do |v|
119
+ @options.diagram = true
120
+ @options.diagram_file = v if v
121
+ end
122
+ o.on('--profile=PROFILES', Array, 'profiles parser generation parts') {|v| @profile = v }
123
+ o.on_tail ''
124
+ o.on_tail 'PROFILES is a list of comma-separated words that can include:'
125
+ o.on_tail ' call-stack use sampling call-stack profiler (stackprof gem)'
126
+ o.on_tail ' memory use memory profiler (memory_profiler gem)'
94
127
  o.on('-v', '--verbose', "same as '--report=state'") {|_v| @report << 'states' }
95
128
  o.separator ''
96
129
  o.separator 'Diagnostics:'
97
- o.on('-W', '--warnings', 'report the warnings') {|v| @options.diagnostic = true }
130
+ o.on('-W', '--warnings', 'report the warnings') {|v| @options.warnings = true }
98
131
  o.separator ''
99
132
  o.separator 'Error Recovery:'
100
133
  o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true }
@@ -107,9 +140,10 @@ module Lrama
107
140
  end
108
141
  end
109
142
 
110
- ALIASED_REPORTS = { cex: :counterexamples }.freeze
111
- VALID_REPORTS = %i[states itemsets lookaheads solved counterexamples rules terms verbose].freeze
143
+ ALIASED_REPORTS = { cex: :counterexamples }.freeze #: Hash[Symbol, Symbol]
144
+ VALID_REPORTS = %i[states itemsets lookaheads solved counterexamples rules terms verbose].freeze #: Array[Symbol]
112
145
 
146
+ # @rbs (Array[String]) -> Hash[Symbol, bool]
113
147
  def validate_report(report)
114
148
  h = { grammar: true }
115
149
  return h if report.empty?
@@ -131,6 +165,7 @@ module Lrama
131
165
  return h
132
166
  end
133
167
 
168
+ # @rbs (String) -> Symbol
134
169
  def aliased_report_option(opt)
135
170
  (ALIASED_REPORTS[opt.to_sym] || opt).to_sym
136
171
  end
@@ -139,15 +174,16 @@ module Lrama
139
174
  locations scan parse automaton bitsets closure
140
175
  grammar rules only-explicit-rules actions resource
141
176
  sets muscles tools m4-early m4 skeleton time ielr cex
142
- ].freeze
177
+ ].freeze #: Array[String]
143
178
  NOT_SUPPORTED_TRACES = %w[
144
179
  locations scan parse bitsets grammar resource
145
180
  sets muscles tools m4-early m4 skeleton ielr cex
146
- ].freeze
147
- SUPPORTED_TRACES = VALID_TRACES - NOT_SUPPORTED_TRACES
181
+ ].freeze #: Array[String]
182
+ SUPPORTED_TRACES = VALID_TRACES - NOT_SUPPORTED_TRACES #: Array[String]
148
183
 
184
+ # @rbs (Array[String]) -> Hash[Symbol, bool]
149
185
  def validate_trace(trace)
150
- h = {}
186
+ h = {} #: Hash[Symbol, bool]
151
187
  return h if trace.empty? || trace == ['none']
152
188
  all_traces = SUPPORTED_TRACES - %w[only-explicit-rules]
153
189
  if trace == ['all']
@@ -159,7 +195,25 @@ module Lrama
159
195
  if SUPPORTED_TRACES.include?(t)
160
196
  h[t.gsub(/-/, '_').to_sym] = true
161
197
  else
162
- raise "Invalid trace option \"#{t}\"."
198
+ raise "Invalid trace option \"#{t}\".\nValid options are [#{SUPPORTED_TRACES.join(", ")}]."
199
+ end
200
+ end
201
+
202
+ return h
203
+ end
204
+
205
+ VALID_PROFILES = %w[call-stack memory].freeze #: Array[String]
206
+
207
+ # @rbs (Array[String]) -> Hash[Symbol, bool]
208
+ def validate_profile(profile)
209
+ h = {} #: Hash[Symbol, bool]
210
+ return h if profile.empty?
211
+
212
+ profile.each do |t|
213
+ if VALID_PROFILES.include?(t)
214
+ h[t.gsub(/-/, '_').to_sym] = true
215
+ else
216
+ raise "Invalid profile option \"#{t}\".\nValid options are [#{VALID_PROFILES.join(", ")}]."
163
217
  end
164
218
  end
165
219