rusa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +175 -0
  4. data/Rakefile +26 -0
  5. data/Steepfile +9 -0
  6. data/examples/calc.rb +29 -0
  7. data/examples/json.rb +55 -0
  8. data/examples/mini_lang.rb +52 -0
  9. data/exe/rusa +6 -0
  10. data/lib/rusa/analysis/automaton.rb +60 -0
  11. data/lib/rusa/analysis/conflict_resolver.rb +211 -0
  12. data/lib/rusa/analysis/first_follow.rb +106 -0
  13. data/lib/rusa/analysis/item.rb +51 -0
  14. data/lib/rusa/analysis/item_set.rb +64 -0
  15. data/lib/rusa/analysis/lalr_table.rb +460 -0
  16. data/lib/rusa/analysis/parse_action.rb +81 -0
  17. data/lib/rusa/cli.rb +188 -0
  18. data/lib/rusa/errors.rb +12 -0
  19. data/lib/rusa/generator/code_generator.rb +334 -0
  20. data/lib/rusa/grammar/action_capture.rb +128 -0
  21. data/lib/rusa/grammar/dsl.rb +123 -0
  22. data/lib/rusa/grammar/grammar.rb +212 -0
  23. data/lib/rusa/grammar/precedence.rb +29 -0
  24. data/lib/rusa/grammar/rule.rb +55 -0
  25. data/lib/rusa/grammar/symbol.rb +71 -0
  26. data/lib/rusa/version.rb +5 -0
  27. data/lib/rusa.rb +31 -0
  28. data/sig/generated/rusa/analysis/automaton.rbs +25 -0
  29. data/sig/generated/rusa/analysis/conflict_resolver.rbs +57 -0
  30. data/sig/generated/rusa/analysis/first_follow.rbs +33 -0
  31. data/sig/generated/rusa/analysis/item.rbs +35 -0
  32. data/sig/generated/rusa/analysis/item_set.rbs +31 -0
  33. data/sig/generated/rusa/analysis/lalr_table.rbs +182 -0
  34. data/sig/generated/rusa/analysis/parse_action.rbs +58 -0
  35. data/sig/generated/rusa/cli.rbs +68 -0
  36. data/sig/generated/rusa/errors.rbs +24 -0
  37. data/sig/generated/rusa/generator/code_generator.rbs +82 -0
  38. data/sig/generated/rusa/grammar/action_capture.rbs +46 -0
  39. data/sig/generated/rusa/grammar/dsl.rbs +62 -0
  40. data/sig/generated/rusa/grammar/grammar.rbs +103 -0
  41. data/sig/generated/rusa/grammar/precedence.rbs +23 -0
  42. data/sig/generated/rusa/grammar/rule.rbs +35 -0
  43. data/sig/generated/rusa/grammar/symbol.rbs +51 -0
  44. data/sig/generated/rusa/version.rbs +5 -0
  45. data/sig/generated/rusa.rbs +6 -0
  46. data/test/test_automaton.rb +27 -0
  47. data/test/test_code_generator.rb +74 -0
  48. data/test/test_dsl.rb +77 -0
  49. data/test/test_e2e.rb +134 -0
  50. data/test/test_first_follow.rb +70 -0
  51. data/test/test_grammar_model.rb +60 -0
  52. data/test/test_helper.rb +6 -0
  53. data/test/test_lalr_table.rb +64 -0
  54. metadata +96 -0
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rusa
4
+ module Analysis
5
+ # Parse actions populate ACTION tables.
6
+ class Shift
7
+ attr_reader :state #: Integer
8
+
9
+ #: (Integer) -> void
10
+ def initialize(state)
11
+ @state = state
12
+ freeze
13
+ end
14
+
15
+ #: () -> [:shift, Integer]
16
+ def to_a
17
+ [:shift, state]
18
+ end
19
+
20
+ #: (Object) -> bool
21
+ def ==(other)
22
+ other.is_a?(self.class) && other.state == state
23
+ end
24
+ alias eql? ==
25
+
26
+ #: () -> Integer
27
+ def hash
28
+ [self.class, state].hash
29
+ end
30
+ end
31
+
32
+ class Reduce
33
+ attr_reader :production_id #: Integer?
34
+
35
+ #: (Integer?) -> void
36
+ def initialize(production_id)
37
+ @production_id = production_id
38
+ freeze
39
+ end
40
+
41
+ #: () -> [:reduce, Integer?]
42
+ def to_a
43
+ [:reduce, production_id]
44
+ end
45
+
46
+ #: (Object) -> bool
47
+ def ==(other)
48
+ other.is_a?(self.class) && other.production_id == production_id
49
+ end
50
+ alias eql? ==
51
+
52
+ #: () -> Integer
53
+ def hash
54
+ [self.class, production_id].hash
55
+ end
56
+ end
57
+
58
+ class Accept
59
+ #: () -> void
60
+ def initialize
61
+ freeze
62
+ end
63
+
64
+ #: () -> [:accept]
65
+ def to_a
66
+ [:accept]
67
+ end
68
+
69
+ #: (Object) -> bool
70
+ def ==(other)
71
+ other.is_a?(self.class)
72
+ end
73
+ alias eql? ==
74
+
75
+ #: () -> Integer
76
+ def hash
77
+ self.class.hash
78
+ end
79
+ end
80
+ end
81
+ end
data/lib/rusa/cli.rb ADDED
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "optparse"
4
+
5
+ module Rusa
6
+ # CLI wires grammar loading, table generation, and code output.
7
+ class CLI
8
+ #: (Array[String]) -> Integer
9
+ def run(argv)
10
+ command = argv.shift
11
+
12
+ case command
13
+ when "generate"
14
+ generate(argv)
15
+ when "report"
16
+ report(argv)
17
+ when "check"
18
+ check(argv)
19
+ else
20
+ $stderr.puts(usage)
21
+ 1
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ #: (Array[String]) -> Integer
28
+ def generate(argv)
29
+ options = parse_generate_options(argv)
30
+ grammar_file = required_grammar_file(argv)
31
+ grammar, table = build(grammar_file)
32
+ generator = Generator::CodeGenerator.new(
33
+ grammar,
34
+ table,
35
+ class_name: generator_class_name(options, grammar_file),
36
+ compact: compact_option(options),
37
+ line_tracking: line_tracking_option(options)
38
+ )
39
+ output_path = output_path_for(options, grammar_file)
40
+ File.write(output_path, generator.generate)
41
+ puts "generated #{output_path}"
42
+ puts table.report if verbose_option(options)
43
+ 0
44
+ end
45
+
46
+ #: (Array[String]) -> Integer
47
+ def report(argv)
48
+ grammar_file = required_grammar_file(argv)
49
+ grammar, table = build(grammar_file)
50
+ print_section("Grammar warnings:", grammar.warnings)
51
+ puts
52
+ print_section("Conflict report:", conflict_messages(table))
53
+ puts
54
+ puts table.report
55
+ 0
56
+ end
57
+
58
+ #: (Array[String]) -> Integer
59
+ def check(argv)
60
+ grammar_file = required_grammar_file(argv)
61
+ grammar, table = build(grammar_file)
62
+ puts "ok: #{grammar_file}"
63
+ puts "warnings: #{grammar.warnings.length}"
64
+ puts "conflicts: #{table.conflicts.length}"
65
+ 0
66
+ end
67
+
68
+ #: (String) -> [Grammar::Grammar, Analysis::LALRTable]
69
+ def build(grammar_file)
70
+ grammar = load_grammar(grammar_file)
71
+ [grammar, Analysis::LALRTable.new(grammar)]
72
+ end
73
+
74
+ #: (String) -> Grammar::Grammar
75
+ def load_grammar(path)
76
+ source = File.read(path)
77
+ result = TOPLEVEL_BINDING.eval(source, path)
78
+ return result if result.is_a?(Grammar::Grammar)
79
+
80
+ raise ArgumentError, "#{path} did not evaluate to a Rusa grammar"
81
+ end
82
+
83
+ #: (String) -> String
84
+ def default_output_path(grammar_file)
85
+ base = File.basename(grammar_file, File.extname(grammar_file))
86
+ "#{base}_parser.rb"
87
+ end
88
+
89
+ #: (String) -> String
90
+ def default_class_name(grammar_file)
91
+ base = File.basename(grammar_file, File.extname(grammar_file))
92
+ classify(base) + "Parser"
93
+ end
94
+
95
+ #: () -> String
96
+ def usage
97
+ <<~TEXT
98
+ Usage:
99
+ rusa generate GRAMMAR.rb [-o OUTPUT.rb] [--class NAME]
100
+ [--compact] [--verbose] [--no-line-tracking]
101
+ rusa report GRAMMAR.rb
102
+ rusa check GRAMMAR.rb
103
+ TEXT
104
+ end
105
+
106
+ #: (Array[String]) -> Hash[Symbol, bool | String]
107
+ def parse_generate_options(argv)
108
+ options = default_generate_options
109
+ OptionParser.new do |opts|
110
+ opts.on("-o FILE") { |value| options[:output] = value }
111
+ opts.on("--class NAME") { |value| options[:class_name] = value }
112
+ opts.on("--compact") { options[:compact] = true }
113
+ opts.on("--verbose") { options[:verbose] = true }
114
+ opts.on("--no-line-tracking") { options[:line_tracking] = false }
115
+ end.parse!(argv)
116
+ options
117
+ end
118
+
119
+ #: () -> Hash[Symbol, bool | String]
120
+ def default_generate_options
121
+ {
122
+ compact: false,
123
+ line_tracking: true,
124
+ verbose: false
125
+ }
126
+ end
127
+
128
+ #: (Array[String]) -> String
129
+ def required_grammar_file(argv)
130
+ argv.first or raise ArgumentError, "grammar file is required"
131
+ end
132
+
133
+ #: (Hash[Symbol, bool | String], String) -> String
134
+ def generator_class_name(options, grammar_file)
135
+ class_name = options[:class_name]
136
+ return class_name if class_name.is_a?(String)
137
+
138
+ default_class_name(grammar_file)
139
+ end
140
+
141
+ #: (Hash[Symbol, bool | String], String) -> String
142
+ def output_path_for(options, grammar_file)
143
+ output = options[:output]
144
+ return output if output.is_a?(String)
145
+
146
+ default_output_path(grammar_file)
147
+ end
148
+
149
+ #: (Hash[Symbol, bool | String]) -> bool
150
+ def compact_option(options)
151
+ options[:compact] == true
152
+ end
153
+
154
+ #: (Hash[Symbol, bool | String]) -> bool
155
+ def line_tracking_option(options)
156
+ options[:line_tracking] != false
157
+ end
158
+
159
+ #: (Hash[Symbol, bool | String]) -> bool
160
+ def verbose_option(options)
161
+ options[:verbose] == true
162
+ end
163
+
164
+ #: (String, Array[String]) -> void
165
+ def print_section(title, items)
166
+ puts title
167
+ return puts(" none") if items.empty?
168
+
169
+ items.each { |item| puts " #{item}" }
170
+ end
171
+
172
+ #: (Analysis::LALRTable) -> Array[String]
173
+ def conflict_messages(table)
174
+ table.conflicts.map do |conflict|
175
+ "state #{conflict.state_id}, lookahead #{conflict.lookahead}: #{conflict.message}"
176
+ end
177
+ end
178
+
179
+ #: (String) -> String
180
+ def classify(text)
181
+ text
182
+ .split(/[^a-zA-Z0-9]+/)
183
+ .reject(&:empty?)
184
+ .map(&:capitalize)
185
+ .join
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rusa
4
+ class Error < StandardError; end
5
+
6
+ class GrammarError < Error; end
7
+ class DuplicateTokenError < GrammarError; end
8
+ class DuplicateRuleError < GrammarError; end
9
+ class UndefinedSymbolError < GrammarError; end
10
+ class NoStartSymbolError < GrammarError; end
11
+ class InvalidGrammarError < GrammarError; end
12
+ end
@@ -0,0 +1,334 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+ require "erb"
5
+
6
+ module Rusa
7
+ module Generator
8
+ # CodeGenerator emits a standalone Ruby parser class.
9
+ class CodeGenerator
10
+ TEMPLATE = <<~'RUBY'
11
+ # frozen_string_literal: true
12
+ # Generated by Rusa v<%= version %>
13
+ <% if compact %>
14
+ require "base64"
15
+ <% end %>
16
+
17
+ class <%= class_name %>
18
+ class ParseError < StandardError; end
19
+ Token = Struct.new(:type, :value, :line, :column)
20
+
21
+ ACTION_TABLE = <%= action_table_source %>.freeze
22
+ GOTO_TABLE = <%= goto_table_source %>.freeze
23
+ PRODUCTIONS = <%= productions_source %>.freeze
24
+ TOKEN_PATTERNS = <%= token_patterns_source %>.freeze
25
+ SKIP_PATTERNS = <%= skip_patterns_source %>.freeze
26
+
27
+ def tokenize(input)
28
+ tokens = []
29
+ pos = 0
30
+ line = 1
31
+ column = 1
32
+
33
+ while pos < input.length
34
+ chunk = input[pos..]
35
+
36
+ if (consumed = consume_pattern(SKIP_PATTERNS, chunk))
37
+ line, column = advance_position(consumed, line, column)
38
+ pos += consumed.length
39
+ next
40
+ end
41
+
42
+ matched = consume_token(chunk)
43
+ unless matched
44
+ message =
45
+ "Unexpected character #{input[pos].inspect}" \
46
+ "#{position_suffix(line, column)}"
47
+ raise ParseError, message
48
+ end
49
+
50
+ name, lexeme = matched
51
+ tokens << Token.new(name, lexeme, line, column)
52
+ line, column = advance_position(lexeme, line, column)
53
+ pos += lexeme.length
54
+ end
55
+
56
+ tokens << Token.new(:$end, nil, line, column)
57
+ end
58
+
59
+ def parse(input)
60
+ tokens = tokenize(input)
61
+ stack = [0]
62
+ values = []
63
+ position = 0
64
+
65
+ loop do
66
+ state = stack.last
67
+ token = tokens.fetch(position)
68
+ action = ACTION_TABLE.dig(state, token.type)
69
+
70
+ unless action
71
+ raise ParseError, unexpected_token_message(state, token, tokens, position)
72
+ end
73
+
74
+ case action[0]
75
+ when :shift
76
+ stack << action[1]
77
+ values << token.value
78
+ position += 1
79
+ when :reduce
80
+ production_id = action[1]
81
+ lhs, rhs_length = PRODUCTIONS.fetch(production_id)
82
+ args = rhs_length.zero? ? [] : values.pop(rhs_length)
83
+ rhs_length.times { stack.pop }
84
+ values << semantic_action(production_id, args)
85
+
86
+ goto_state = GOTO_TABLE.dig(stack.last, lhs)
87
+ raise ParseError, "Internal parser error: missing goto for #{lhs}" unless goto_state
88
+
89
+ stack << goto_state
90
+ when :accept
91
+ return values.last
92
+ else
93
+ raise ParseError, "Unknown parser action: #{action.inspect}"
94
+ end
95
+ end
96
+ end
97
+
98
+ private
99
+
100
+ def consume_pattern(patterns, chunk)
101
+ patterns.each do |pattern|
102
+ if (match = pattern.match(chunk)) && match.begin(0).zero?
103
+ return match[0]
104
+ end
105
+ end
106
+
107
+ nil
108
+ end
109
+
110
+ def consume_token(chunk)
111
+ TOKEN_PATTERNS.each do |name, pattern|
112
+ if (match = pattern.match(chunk)) && match.begin(0).zero?
113
+ return [name, match[0]]
114
+ end
115
+ end
116
+
117
+ nil
118
+ end
119
+
120
+ def advance_position(text, line, column)
121
+ text.each_char do |char|
122
+ if char == "\n"
123
+ line += 1
124
+ column = 1
125
+ else
126
+ column += 1
127
+ end
128
+ end
129
+
130
+ [line, column]
131
+ end
132
+
133
+ def position_suffix(line, column)
134
+ <% if line_tracking %>
135
+ " at line #{line}, column #{column}"
136
+ <% else %>
137
+ ""
138
+ <% end %>
139
+ end
140
+
141
+ def semantic_action(production_id, args)
142
+ case production_id
143
+ <%= action_cases %>
144
+ else
145
+ default_semantic_action(args)
146
+ end
147
+ end
148
+
149
+ def default_semantic_action(args)
150
+ return nil if args.empty?
151
+ return args.first if args.length == 1
152
+
153
+ args
154
+ end
155
+
156
+ def unexpected_token_message(state, token, tokens, position)
157
+ expected = expected_tokens_for(state)
158
+ context = context_for(tokens, position)
159
+
160
+ "Unexpected #{token.type}#{position_suffix(token.line, token.column)}. " \
161
+ "Expected: #{expected.join(', ')}. Context: #{context}"
162
+ end
163
+
164
+ def expected_tokens_for(state)
165
+ ACTION_TABLE.fetch(state, {}).keys.reject { |name| name == :$end }
166
+ end
167
+
168
+ def context_for(tokens, position)
169
+ tokens
170
+ .slice([position - 2, 0].max, 5)
171
+ .to_a
172
+ .map { |entry| entry.value || entry.type }
173
+ .join(" ")
174
+ end
175
+ end
176
+ RUBY
177
+
178
+ #: (Grammar::Grammar, Analysis::LALRTable, ?class_name: String, ?compact: bool, ?line_tracking: bool) -> void
179
+ def initialize(
180
+ grammar,
181
+ lalr_table,
182
+ class_name: "Parser",
183
+ compact: false,
184
+ line_tracking: true
185
+ )
186
+ @grammar = grammar
187
+ @table = lalr_table
188
+ @class_name = class_name
189
+ @compact = compact
190
+ @line_tracking = line_tracking
191
+ end
192
+
193
+ #: () -> String
194
+ def generate
195
+ ERB.new(TEMPLATE, trim_mode: "-").result_with_hash(template_context)
196
+ end
197
+
198
+ private
199
+
200
+ attr_reader :grammar #: Grammar::Grammar
201
+ attr_reader :table #: Analysis::LALRTable
202
+ attr_reader :class_name #: String
203
+ attr_reader :compact #: bool
204
+ attr_reader :line_tracking #: bool
205
+
206
+ #: () -> Hash[Symbol, String | bool]
207
+ def template_context
208
+ {
209
+ version: Rusa::VERSION,
210
+ compact: compact,
211
+ class_name: class_name,
212
+ line_tracking: line_tracking,
213
+ action_table_source: serialized_action_table,
214
+ goto_table_source: serialized_plain_object(table.goto_table),
215
+ productions_source: serialized_productions,
216
+ token_patterns_source: serialized_tokens,
217
+ skip_patterns_source: serialized_skip_patterns,
218
+ action_cases: action_cases
219
+ }
220
+ end
221
+
222
+ #: () -> String
223
+ def serialized_action_table
224
+ serialize(action_table_payload)
225
+ end
226
+
227
+ #: () -> String
228
+ def serialized_productions
229
+ serialize(production_payload)
230
+ end
231
+
232
+ #: () -> String
233
+ def serialized_tokens
234
+ serialize(token_payload)
235
+ end
236
+
237
+ #: () -> String
238
+ def serialized_skip_patterns
239
+ serialize(skip_payload)
240
+ end
241
+
242
+ #: (Object) -> String
243
+ def serialized_plain_object(object)
244
+ serialize(plain_object(object))
245
+ end
246
+
247
+ #: () -> Hash[Integer, Hash[Symbol, [:shift, Integer] | [:reduce, Integer?] | [:accept]]]
248
+ def action_table_payload
249
+ table.action_table.each_with_object(action_table_index) do |(state_id, actions), payload|
250
+ payload[state_id] = actions.transform_values(&:to_a)
251
+ end
252
+ end
253
+
254
+ #: () -> Array[[Symbol, Integer]]
255
+ def production_payload
256
+ grammar.productions.map do |production|
257
+ [production.lhs, production.rhs.length]
258
+ end
259
+ end
260
+
261
+ #: () -> Array[[Symbol, Regexp]]
262
+ def token_payload
263
+ token_terminals.map { |terminal| [terminal.name, terminal.pattern] }
264
+ end
265
+
266
+ #: () -> Array[Regexp]
267
+ def skip_payload
268
+ grammar.skip_patterns
269
+ end
270
+
271
+ #: (Object) -> String
272
+ def serialize(object)
273
+ return object.inspect unless compact
274
+
275
+ encoded = Base64.strict_encode64(Marshal.dump(object))
276
+ "Marshal.load(Base64.decode64(#{encoded.inspect}))"
277
+ end
278
+
279
+ #: (Object) -> Object
280
+ def plain_object(object)
281
+ case object
282
+ when Hash
283
+ plain_hash = {} #: Hash[Object, Object]
284
+ object.each do |key, value|
285
+ plain_hash[key] = plain_object(value)
286
+ end
287
+ plain_hash
288
+ when Array
289
+ object.map { |entry| plain_object(entry) }
290
+ else
291
+ object
292
+ end
293
+ end
294
+
295
+ #: () -> Hash[Integer, Hash[Symbol, [:shift, Integer] | [:reduce, Integer?] | [:accept]]]
296
+ def action_table_index
297
+ {} #: Hash[Integer, Hash[Symbol, [:shift, Integer] | [:reduce, Integer?] | [:accept]]]
298
+ end
299
+
300
+ #: () -> Array[Grammar::TerminalSymbol]
301
+ def token_terminals
302
+ grammar.terminals.values.reject do |terminal|
303
+ terminal.name == Grammar::Grammar::END_OF_INPUT
304
+ end
305
+ end
306
+
307
+ #: () -> String
308
+ def action_cases
309
+ grammar.productions.map { |production| action_case_for(production) }.join("\n")
310
+ end
311
+
312
+ #: (Grammar::Production) -> String
313
+ def action_case_for(production)
314
+ " when #{production.id} then #{action_body_for(production)}"
315
+ end
316
+
317
+ #: (Grammar::Production) -> String
318
+ def action_body_for(production)
319
+ source = normalize_action_source(production.action_source)
320
+ return "default_semantic_action(args)" unless source
321
+
322
+ "(#{source}).call(*args)"
323
+ end
324
+
325
+ #: (String?) -> String?
326
+ def normalize_action_source(source)
327
+ return nil unless source
328
+ return source if source.lstrip.start_with?("lambda", "->", "proc")
329
+
330
+ "proc { #{source} }"
331
+ end
332
+ end
333
+ end
334
+ end