rusa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +175 -0
- data/Rakefile +26 -0
- data/Steepfile +9 -0
- data/examples/calc.rb +29 -0
- data/examples/json.rb +55 -0
- data/examples/mini_lang.rb +52 -0
- data/exe/rusa +6 -0
- data/lib/rusa/analysis/automaton.rb +60 -0
- data/lib/rusa/analysis/conflict_resolver.rb +211 -0
- data/lib/rusa/analysis/first_follow.rb +106 -0
- data/lib/rusa/analysis/item.rb +51 -0
- data/lib/rusa/analysis/item_set.rb +64 -0
- data/lib/rusa/analysis/lalr_table.rb +460 -0
- data/lib/rusa/analysis/parse_action.rb +81 -0
- data/lib/rusa/cli.rb +188 -0
- data/lib/rusa/errors.rb +12 -0
- data/lib/rusa/generator/code_generator.rb +334 -0
- data/lib/rusa/grammar/action_capture.rb +128 -0
- data/lib/rusa/grammar/dsl.rb +123 -0
- data/lib/rusa/grammar/grammar.rb +212 -0
- data/lib/rusa/grammar/precedence.rb +29 -0
- data/lib/rusa/grammar/rule.rb +55 -0
- data/lib/rusa/grammar/symbol.rb +71 -0
- data/lib/rusa/version.rb +5 -0
- data/lib/rusa.rb +31 -0
- data/sig/generated/rusa/analysis/automaton.rbs +25 -0
- data/sig/generated/rusa/analysis/conflict_resolver.rbs +57 -0
- data/sig/generated/rusa/analysis/first_follow.rbs +33 -0
- data/sig/generated/rusa/analysis/item.rbs +35 -0
- data/sig/generated/rusa/analysis/item_set.rbs +31 -0
- data/sig/generated/rusa/analysis/lalr_table.rbs +182 -0
- data/sig/generated/rusa/analysis/parse_action.rbs +58 -0
- data/sig/generated/rusa/cli.rbs +68 -0
- data/sig/generated/rusa/errors.rbs +24 -0
- data/sig/generated/rusa/generator/code_generator.rbs +82 -0
- data/sig/generated/rusa/grammar/action_capture.rbs +46 -0
- data/sig/generated/rusa/grammar/dsl.rbs +62 -0
- data/sig/generated/rusa/grammar/grammar.rbs +103 -0
- data/sig/generated/rusa/grammar/precedence.rbs +23 -0
- data/sig/generated/rusa/grammar/rule.rbs +35 -0
- data/sig/generated/rusa/grammar/symbol.rbs +51 -0
- data/sig/generated/rusa/version.rbs +5 -0
- data/sig/generated/rusa.rbs +6 -0
- data/test/test_automaton.rb +27 -0
- data/test/test_code_generator.rb +74 -0
- data/test/test_dsl.rb +77 -0
- data/test/test_e2e.rb +134 -0
- data/test/test_first_follow.rb +70 -0
- data/test/test_grammar_model.rb +60 -0
- data/test/test_helper.rb +6 -0
- data/test/test_lalr_table.rb +64 -0
- metadata +96 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ripper"
|
|
4
|
+
|
|
5
|
+
module Rusa
|
|
6
|
+
module Grammar
|
|
7
|
+
# Captures Ruby block source so semantic actions can be emitted into generated parsers.
|
|
8
|
+
module ActionCapture
|
|
9
|
+
BLOCK_OPENERS = %i[on_lbrace].freeze
|
|
10
|
+
PUSH_TOKENS = %i[on_lbrace on_lbracket on_lparen].freeze
|
|
11
|
+
POP_TOKENS = %i[on_rbrace on_rbracket on_rparen].freeze
|
|
12
|
+
BLOCK_KEYWORDS = %w[
|
|
13
|
+
do begin case class module def if unless while until for
|
|
14
|
+
].freeze
|
|
15
|
+
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
#: (Proc) -> String?
|
|
19
|
+
def capture(action)
|
|
20
|
+
file, line = action.source_location
|
|
21
|
+
return nil unless file && line && File.file?(file)
|
|
22
|
+
|
|
23
|
+
source = File.readlines(file)[(line - 1)..]&.join
|
|
24
|
+
return nil unless source
|
|
25
|
+
|
|
26
|
+
block_source = extract_block(source)
|
|
27
|
+
block_source ? "lambda #{block_source}" : nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
#: (String) -> String?
|
|
31
|
+
def extract_block(source)
|
|
32
|
+
tokens = Ripper.lex(source)
|
|
33
|
+
opener_index = tokens.index do |token|
|
|
34
|
+
_position, type, text, = token
|
|
35
|
+
block_opener?(type, text)
|
|
36
|
+
end
|
|
37
|
+
return nil unless opener_index
|
|
38
|
+
|
|
39
|
+
stack = [] #: Array[[Symbol, String]]
|
|
40
|
+
start_offset = nil
|
|
41
|
+
block_tokens = block_tokens_from(tokens, opener_index)
|
|
42
|
+
|
|
43
|
+
block_tokens.each do |token|
|
|
44
|
+
position, type, text, = token
|
|
45
|
+
offset = offset_for(source, position)
|
|
46
|
+
start_offset ||= offset
|
|
47
|
+
|
|
48
|
+
case structural_event(type, text)
|
|
49
|
+
when :push
|
|
50
|
+
stack << closing_token_for(type, text)
|
|
51
|
+
when :pop
|
|
52
|
+
expected = stack.last
|
|
53
|
+
next unless expected == [type, text]
|
|
54
|
+
|
|
55
|
+
stack.pop
|
|
56
|
+
return source[start_offset...(offset + text.length)] if stack.empty?
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
#: (Symbol, String) -> bool
|
|
64
|
+
def block_opener?(type, text)
|
|
65
|
+
BLOCK_OPENERS.include?(type) || (type == :on_kw && text == "do")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
#: (Symbol, String) -> Symbol?
|
|
69
|
+
def structural_event(type, text)
|
|
70
|
+
return :push if PUSH_TOKENS.include?(type)
|
|
71
|
+
return :push if type == :on_kw && BLOCK_KEYWORDS.include?(text)
|
|
72
|
+
return :pop if POP_TOKENS.include?(type)
|
|
73
|
+
return :pop if type == :on_kw && text == "end"
|
|
74
|
+
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
#: (Symbol, String) -> [Symbol, String]
|
|
79
|
+
def closing_token_for(type, text)
|
|
80
|
+
case [type, text]
|
|
81
|
+
when [:on_lbrace, "{"]
|
|
82
|
+
[:on_rbrace, "}"]
|
|
83
|
+
when [:on_lbracket, "["]
|
|
84
|
+
[:on_rbracket, "]"]
|
|
85
|
+
when [:on_lparen, "("]
|
|
86
|
+
[:on_rparen, ")"]
|
|
87
|
+
else
|
|
88
|
+
[:on_kw, "end"]
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
#: (String, [Integer, Integer]) -> Integer
|
|
93
|
+
def offset_for(source, position)
|
|
94
|
+
line_number, column = position
|
|
95
|
+
line_offsets = cached_line_offsets(source)
|
|
96
|
+
|
|
97
|
+
line_offsets[line_number - 1] + column
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
#: (Array[[[Integer, Integer], Symbol, String, Ripper::Lexer::State]], Integer) -> Array[[[Integer, Integer], Symbol, String, Ripper::Lexer::State]]
|
|
101
|
+
def block_tokens_from(tokens, opener_index)
|
|
102
|
+
tokens[opener_index..] || []
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
#: (String) -> Array[Integer]
|
|
106
|
+
def cached_line_offsets(source)
|
|
107
|
+
line_offsets_cache[source.object_id] ||= build_line_offsets(source)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
#: () -> Hash[Integer, Array[Integer]]
|
|
111
|
+
def line_offsets_cache
|
|
112
|
+
@line_offsets_cache ||= {} #: Hash[Integer, Array[Integer]]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
#: (String) -> Array[Integer]
|
|
116
|
+
def build_line_offsets(source)
|
|
117
|
+
offsets = [0]
|
|
118
|
+
|
|
119
|
+
source.each_line.reduce(0) do |acc, line|
|
|
120
|
+
offsets << acc + line.length
|
|
121
|
+
acc + line.length
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
offsets
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Grammar
|
|
5
|
+
# DSL evaluates the Ruby grammar definition and builds a Grammar object.
|
|
6
|
+
class DSL
|
|
7
|
+
attr_reader :grammar #: Grammar
|
|
8
|
+
|
|
9
|
+
#: () -> void
|
|
10
|
+
def initialize
|
|
11
|
+
@grammar = Grammar.new
|
|
12
|
+
@current_rule = nil
|
|
13
|
+
@precedence_level = 0
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
#: (Symbol | String, String | Regexp) -> TerminalSymbol
|
|
17
|
+
def token(name, pattern)
|
|
18
|
+
grammar.add_terminal(name, normalize_token_pattern(pattern))
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
#: (Regexp) -> Regexp
|
|
22
|
+
def skip(pattern)
|
|
23
|
+
grammar.add_skip_pattern(anchor_regexp(pattern))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
#: (*Symbol | String) -> Array[Symbol | String]
|
|
27
|
+
def left(*tokens)
|
|
28
|
+
register_precedence(:left, tokens)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#: (*Symbol | String) -> Array[Symbol | String]
|
|
32
|
+
def right(*tokens)
|
|
33
|
+
register_precedence(:right, tokens)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
#: (*Symbol | String) -> Array[Symbol | String]
|
|
37
|
+
def nonassoc(*tokens)
|
|
38
|
+
register_precedence(:nonassoc, tokens)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
#: (Symbol | String) -> NonterminalSymbol
|
|
42
|
+
def start(symbol)
|
|
43
|
+
grammar.start_symbol = symbol.to_sym
|
|
44
|
+
grammar.add_nonterminal(symbol)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
#: (Symbol | String) { (self) [self: self] -> void } -> void
|
|
48
|
+
def rule(name, &block)
|
|
49
|
+
raise InvalidGrammarError, "nested rule declarations are not supported" if @current_rule
|
|
50
|
+
|
|
51
|
+
@current_rule = name.to_sym
|
|
52
|
+
grammar.add_nonterminal(@current_rule)
|
|
53
|
+
instance_eval(&(block or raise ArgumentError, "block is required"))
|
|
54
|
+
ensure
|
|
55
|
+
@current_rule = nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
#: (*Symbol | String) -> AltBuilder
|
|
59
|
+
#: (*Symbol | String) { (*Object) -> Object } -> AltBuilder
|
|
60
|
+
def alt(*symbols, &action)
|
|
61
|
+
raise InvalidGrammarError, "alt must be declared inside rule" unless @current_rule
|
|
62
|
+
|
|
63
|
+
production = Production.new(
|
|
64
|
+
@current_rule,
|
|
65
|
+
symbols,
|
|
66
|
+
action,
|
|
67
|
+
action_source: action ? ActionCapture.capture(action) : nil
|
|
68
|
+
)
|
|
69
|
+
grammar.add_production(production)
|
|
70
|
+
AltBuilder.new(production)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
#: (Symbol, Array[Symbol | String]) -> Array[Symbol | String]
|
|
76
|
+
def register_precedence(associativity, tokens)
|
|
77
|
+
@precedence_level += 1
|
|
78
|
+
tokens.each do |token|
|
|
79
|
+
grammar.set_precedence(token, @precedence_level, associativity)
|
|
80
|
+
end
|
|
81
|
+
tokens
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
#: (String | Regexp) -> Regexp
|
|
85
|
+
def normalize_token_pattern(pattern)
|
|
86
|
+
if pattern.is_a?(String)
|
|
87
|
+
source = "\\A#{Regexp.escape(pattern)}"
|
|
88
|
+
source = "#{source}\\b" if pattern.match?(/\A[a-zA-Z_]\w*\z/)
|
|
89
|
+
Regexp.new(source)
|
|
90
|
+
else
|
|
91
|
+
anchor_regexp(pattern)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
#: (Regexp) -> Regexp
|
|
96
|
+
def anchor_regexp(pattern)
|
|
97
|
+
return pattern if pattern.source.start_with?("\\A")
|
|
98
|
+
|
|
99
|
+
Regexp.new("\\A(?:#{pattern.source})", pattern.options)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# AltBuilder allows precedence and source overrides to be chained from alt.
|
|
104
|
+
class AltBuilder
|
|
105
|
+
#: (Production) -> void
|
|
106
|
+
def initialize(production)
|
|
107
|
+
@production = production
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
#: (Symbol | String) -> self
|
|
111
|
+
def prec(token)
|
|
112
|
+
@production.context_precedence = token.to_sym
|
|
113
|
+
self
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
#: (String) -> self
|
|
117
|
+
def action(source)
|
|
118
|
+
@production.action_source = source
|
|
119
|
+
self
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Grammar
|
|
5
|
+
# Grammar keeps the normalized internal representation of the DSL.
|
|
6
|
+
class Grammar
|
|
7
|
+
AUGMENTED_START = :"$start"
|
|
8
|
+
END_OF_INPUT = :"$end"
|
|
9
|
+
|
|
10
|
+
attr_accessor :start_symbol #: Symbol?
|
|
11
|
+
attr_reader :terminals #: Hash[Symbol, TerminalSymbol]
|
|
12
|
+
attr_reader :nonterminals #: Hash[Symbol, NonterminalSymbol]
|
|
13
|
+
attr_reader :productions #: Array[Production]
|
|
14
|
+
attr_reader :precedences #: Hash[Symbol, Precedence]
|
|
15
|
+
attr_reader :skip_patterns #: Array[Regexp]
|
|
16
|
+
attr_reader :warnings #: Array[String]
|
|
17
|
+
|
|
18
|
+
#: () -> void
|
|
19
|
+
def initialize
|
|
20
|
+
@terminals = {} #: Hash[Symbol, TerminalSymbol]
|
|
21
|
+
@nonterminals = {} #: Hash[Symbol, NonterminalSymbol]
|
|
22
|
+
@productions = [] #: Array[Production]
|
|
23
|
+
@precedences = {} #: Hash[Symbol, Precedence]
|
|
24
|
+
@skip_patterns = [] #: Array[Regexp]
|
|
25
|
+
@warnings = [] #: Array[String]
|
|
26
|
+
@start_symbol = nil
|
|
27
|
+
@augmented = false
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
#: (Symbol | String, String | Regexp) -> TerminalSymbol
|
|
31
|
+
def add_terminal(name, pattern)
|
|
32
|
+
name = name.to_sym
|
|
33
|
+
raise DuplicateTokenError, "terminal #{name} is already defined" if terminals.key?(name)
|
|
34
|
+
raise DuplicateTokenError, duplicate_nonterminal_message(name) if nonterminals.key?(name)
|
|
35
|
+
|
|
36
|
+
terminals[name] = TerminalSymbol.new(name, pattern)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
#: (Symbol | String) -> NonterminalSymbol
|
|
40
|
+
def add_nonterminal(name)
|
|
41
|
+
name = name.to_sym
|
|
42
|
+
raise DuplicateRuleError, "#{name} is already defined as a terminal" if terminals.key?(name)
|
|
43
|
+
|
|
44
|
+
nonterminals[name] ||= NonterminalSymbol.new(name)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
#: (Production) -> Production
|
|
48
|
+
def add_production(production)
|
|
49
|
+
add_nonterminal(production.lhs)
|
|
50
|
+
production.id = productions.length
|
|
51
|
+
productions << production
|
|
52
|
+
production
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#: (Regexp) -> Regexp
|
|
56
|
+
def add_skip_pattern(pattern)
|
|
57
|
+
skip_patterns << pattern
|
|
58
|
+
pattern
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
#: (Symbol | String, Integer, Symbol) -> Precedence
|
|
62
|
+
def set_precedence(token, level, associativity)
|
|
63
|
+
precedence = Precedence.new(level, associativity)
|
|
64
|
+
precedences[token.to_sym] = precedence
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
#: (Symbol | String) -> Precedence?
|
|
68
|
+
def precedence_for(token)
|
|
69
|
+
precedences[token.to_sym]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
#: (Symbol | String) -> Array[Production]
|
|
73
|
+
def productions_for(nonterminal)
|
|
74
|
+
productions.select { |production| production.lhs == nonterminal.to_sym }
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#: () -> self
|
|
78
|
+
def augment!
|
|
79
|
+
return self if @augmented
|
|
80
|
+
current_start_symbol = required_start_symbol
|
|
81
|
+
|
|
82
|
+
add_nonterminal(current_start_symbol)
|
|
83
|
+
add_nonterminal(AUGMENTED_START)
|
|
84
|
+
terminals[END_OF_INPUT] ||= TerminalSymbol.new(END_OF_INPUT, /\z/)
|
|
85
|
+
|
|
86
|
+
augmented = Production.new(AUGMENTED_START, [current_start_symbol])
|
|
87
|
+
@productions.unshift(augmented)
|
|
88
|
+
reindex_productions!
|
|
89
|
+
@augmented = true
|
|
90
|
+
self
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
#: () -> self
|
|
94
|
+
def validate!
|
|
95
|
+
required_start_symbol
|
|
96
|
+
|
|
97
|
+
undefined = undefined_symbols
|
|
98
|
+
raise UndefinedSymbolError, "undefined symbols: #{undefined.join(', ')}" if undefined.any?
|
|
99
|
+
|
|
100
|
+
@warnings = unreachable_warnings
|
|
101
|
+
|
|
102
|
+
self
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
#: () -> Symbol
|
|
106
|
+
def augmented_start
|
|
107
|
+
AUGMENTED_START
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
#: () -> Production
|
|
111
|
+
def augmented_production
|
|
112
|
+
productions.find { |production| production.lhs == AUGMENTED_START } ||
|
|
113
|
+
raise(InvalidGrammarError, "grammar is not augmented")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
#: () -> Hash[Symbol, TerminalSymbol | NonterminalSymbol]
|
|
117
|
+
def symbols
|
|
118
|
+
terminals.merge(nonterminals)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
#: (Symbol | String) -> bool
|
|
122
|
+
def terminal?(name)
|
|
123
|
+
terminals.key?(name.to_sym)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
#: (Symbol | String) -> bool
|
|
127
|
+
def nonterminal?(name)
|
|
128
|
+
nonterminals.key?(name.to_sym)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
#: () -> String
|
|
132
|
+
def dump
|
|
133
|
+
productions.map { |production| "[#{production.id}] #{production}" }.join("\n")
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
private
|
|
137
|
+
|
|
138
|
+
#: (Symbol) -> String
|
|
139
|
+
def duplicate_nonterminal_message(name)
|
|
140
|
+
"#{name} is already defined as a nonterminal"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
#: () -> Symbol
|
|
144
|
+
def required_start_symbol
|
|
145
|
+
raise NoStartSymbolError, "start symbol is not set" unless start_symbol
|
|
146
|
+
|
|
147
|
+
start_symbol
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
#: () -> Array[Symbol]
|
|
151
|
+
def undefined_symbols
|
|
152
|
+
productions
|
|
153
|
+
.flat_map { |production| undefined_symbols_in(production) }
|
|
154
|
+
.uniq
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
#: (Production) -> Array[Symbol]
|
|
158
|
+
def undefined_symbols_in(production)
|
|
159
|
+
production.rhs.reject { |symbol| defined_symbol?(symbol) }
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
#: (Symbol) -> bool
|
|
163
|
+
def defined_symbol?(symbol)
|
|
164
|
+
symbol == END_OF_INPUT || terminal?(symbol) || nonterminal?(symbol)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
#: () -> Array[String]
|
|
168
|
+
def unreachable_warnings
|
|
169
|
+
current_start_symbol = required_start_symbol
|
|
170
|
+
|
|
171
|
+
unreachable_nonterminals.map do |name|
|
|
172
|
+
"nonterminal #{name} is unreachable from #{current_start_symbol}"
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
#: () -> Array[Production]
|
|
177
|
+
def reindex_productions!
|
|
178
|
+
productions.each_with_index do |production, index|
|
|
179
|
+
production.id = index
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
#: () -> Array[Symbol]
|
|
184
|
+
def unreachable_nonterminals
|
|
185
|
+
return [] unless start_symbol
|
|
186
|
+
|
|
187
|
+
reachable = [start_symbol.to_sym]
|
|
188
|
+
queue = reachable.dup
|
|
189
|
+
|
|
190
|
+
until queue.empty?
|
|
191
|
+
current = queue.shift
|
|
192
|
+
next_reachable_nonterminals(current, reachable).each do |symbol|
|
|
193
|
+
reachable << symbol
|
|
194
|
+
queue << symbol
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
nonterminals.keys.reject do |symbol|
|
|
199
|
+
symbol == AUGMENTED_START || reachable.include?(symbol)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
#: (Symbol, Array[Symbol]) -> Array[Symbol]
|
|
204
|
+
def next_reachable_nonterminals(current, reachable)
|
|
205
|
+
productions_for(current)
|
|
206
|
+
.flat_map(&:rhs)
|
|
207
|
+
.select { |symbol| nonterminal?(symbol) }
|
|
208
|
+
.reject { |symbol| reachable.include?(symbol) }
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Grammar
|
|
5
|
+
# Operator precedence metadata used to resolve parser conflicts.
|
|
6
|
+
class Precedence
|
|
7
|
+
attr_reader :level #: Integer
|
|
8
|
+
attr_reader :associativity #: Symbol
|
|
9
|
+
|
|
10
|
+
#: (Integer, Symbol) -> void
|
|
11
|
+
def initialize(level, associativity)
|
|
12
|
+
@level = level
|
|
13
|
+
@associativity = associativity
|
|
14
|
+
freeze
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
#: (Object) -> bool
|
|
18
|
+
def ==(other)
|
|
19
|
+
other.is_a?(self.class) && other.level == level && other.associativity == associativity
|
|
20
|
+
end
|
|
21
|
+
alias eql? ==
|
|
22
|
+
|
|
23
|
+
#: () -> Integer
|
|
24
|
+
def hash
|
|
25
|
+
[level, associativity].hash
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Grammar
|
|
5
|
+
# Production rules connect one nonterminal with a sequence of symbols.
|
|
6
|
+
class Production
|
|
7
|
+
attr_accessor :id #: Integer?
|
|
8
|
+
attr_accessor :lhs #: Symbol
|
|
9
|
+
attr_accessor :rhs #: Array[Symbol]
|
|
10
|
+
attr_accessor :action #: Proc?
|
|
11
|
+
attr_accessor :context_precedence #: Symbol?
|
|
12
|
+
attr_accessor :action_source #: String?
|
|
13
|
+
|
|
14
|
+
#: (Symbol | String, Array[Symbol | String], ?Proc?, ?context_precedence: Symbol | String?, ?action_source: String?) -> void
|
|
15
|
+
def initialize(
|
|
16
|
+
lhs,
|
|
17
|
+
rhs,
|
|
18
|
+
action = nil,
|
|
19
|
+
context_precedence: nil,
|
|
20
|
+
action_source: nil
|
|
21
|
+
)
|
|
22
|
+
@lhs = lhs.to_sym
|
|
23
|
+
@rhs = rhs.map(&:to_sym).freeze
|
|
24
|
+
@action = action
|
|
25
|
+
@context_precedence = context_precedence&.to_sym
|
|
26
|
+
@action_source = action_source
|
|
27
|
+
@id = nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
#: (Grammar) -> Symbol?
|
|
31
|
+
def precedence_token(grammar)
|
|
32
|
+
return context_precedence if context_precedence
|
|
33
|
+
|
|
34
|
+
rhs.reverse.find { |symbol| grammar.terminal?(symbol) }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
#: (Grammar) -> Precedence?
|
|
38
|
+
def precedence(grammar)
|
|
39
|
+
token = precedence_token(grammar)
|
|
40
|
+
token ? grammar.precedence_for(token) : nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
#: () -> bool
|
|
44
|
+
def empty?
|
|
45
|
+
rhs.empty?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
#: () -> String
|
|
49
|
+
def to_s
|
|
50
|
+
rendered_rhs = rhs.empty? ? "ε" : rhs.join(" ")
|
|
51
|
+
"#{lhs} -> #{rendered_rhs}"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Grammar
|
|
5
|
+
# Terminal symbols are matched by the generated tokenizer.
|
|
6
|
+
class TerminalSymbol
|
|
7
|
+
attr_reader :name #: Symbol
|
|
8
|
+
attr_reader :pattern #: Regexp
|
|
9
|
+
|
|
10
|
+
#: (Symbol | String, String | Regexp) -> void
|
|
11
|
+
def initialize(name, pattern)
|
|
12
|
+
@name = name.to_sym
|
|
13
|
+
@pattern = pattern.is_a?(String) ? Regexp.new(Regexp.escape(pattern)) : pattern
|
|
14
|
+
freeze
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
#: () -> bool
|
|
18
|
+
def terminal?
|
|
19
|
+
true
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
#: () -> bool
|
|
23
|
+
def nonterminal?
|
|
24
|
+
false
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
#: (Object) -> bool
|
|
28
|
+
def ==(other)
|
|
29
|
+
other.is_a?(self.class) && other.name == name
|
|
30
|
+
end
|
|
31
|
+
alias eql? ==
|
|
32
|
+
|
|
33
|
+
#: () -> Integer
|
|
34
|
+
def hash
|
|
35
|
+
[self.class, name].hash
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Nonterminal symbols are expanded by grammar productions.
|
|
40
|
+
class NonterminalSymbol
|
|
41
|
+
attr_reader :name #: Symbol
|
|
42
|
+
|
|
43
|
+
#: (Symbol | String) -> void
|
|
44
|
+
def initialize(name)
|
|
45
|
+
@name = name.to_sym
|
|
46
|
+
freeze
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
#: () -> bool
|
|
50
|
+
def terminal?
|
|
51
|
+
false
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
#: () -> bool
|
|
55
|
+
def nonterminal?
|
|
56
|
+
true
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
#: (Object) -> bool
|
|
60
|
+
def ==(other)
|
|
61
|
+
other.is_a?(self.class) && other.name == name
|
|
62
|
+
end
|
|
63
|
+
alias eql? ==
|
|
64
|
+
|
|
65
|
+
#: () -> Integer
|
|
66
|
+
def hash
|
|
67
|
+
[self.class, name].hash
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
data/lib/rusa/version.rb
ADDED
data/lib/rusa.rb
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "rusa/version"
|
|
4
|
+
require_relative "rusa/errors"
|
|
5
|
+
require_relative "rusa/grammar/symbol"
|
|
6
|
+
require_relative "rusa/grammar/rule"
|
|
7
|
+
require_relative "rusa/grammar/precedence"
|
|
8
|
+
require_relative "rusa/grammar/grammar"
|
|
9
|
+
require_relative "rusa/grammar/action_capture"
|
|
10
|
+
require_relative "rusa/grammar/dsl"
|
|
11
|
+
require_relative "rusa/analysis/first_follow"
|
|
12
|
+
require_relative "rusa/analysis/item"
|
|
13
|
+
require_relative "rusa/analysis/item_set"
|
|
14
|
+
require_relative "rusa/analysis/automaton"
|
|
15
|
+
require_relative "rusa/analysis/parse_action"
|
|
16
|
+
require_relative "rusa/analysis/conflict_resolver"
|
|
17
|
+
require_relative "rusa/analysis/lalr_table"
|
|
18
|
+
require_relative "rusa/generator/code_generator"
|
|
19
|
+
require_relative "rusa/cli"
|
|
20
|
+
|
|
21
|
+
module Rusa
|
|
22
|
+
#: () { (Grammar::DSL) [self: Grammar::DSL] -> void } -> Grammar::Grammar
|
|
23
|
+
def self.grammar(&block)
|
|
24
|
+
dsl = Grammar::DSL.new
|
|
25
|
+
dsl.instance_eval(&(block or raise ArgumentError, "block is required"))
|
|
26
|
+
grammar = dsl.grammar
|
|
27
|
+
grammar.augment!
|
|
28
|
+
grammar.validate!
|
|
29
|
+
grammar
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Generated from lib/rusa/analysis/automaton.rb with RBS::Inline
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Analysis
|
|
5
|
+
# Automaton builds the canonical LR(0) state graph.
|
|
6
|
+
class Automaton
|
|
7
|
+
attr_reader states: Array[ItemSet]
|
|
8
|
+
|
|
9
|
+
attr_reader transitions: Hash[Integer, Hash[Symbol, Integer]]
|
|
10
|
+
|
|
11
|
+
# : (Grammar::Grammar) -> void
|
|
12
|
+
def initialize: (Grammar::Grammar) -> void
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
|
|
16
|
+
attr_reader grammar: Grammar::Grammar
|
|
17
|
+
|
|
18
|
+
# : () -> void
|
|
19
|
+
def build: () -> void
|
|
20
|
+
|
|
21
|
+
# : (Enumerable[Item]) -> Array[Array[Integer?]]
|
|
22
|
+
def kernel_key: (Enumerable[Item]) -> Array[Array[Integer?]]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|