descent 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ desc 'Debug/inspect .desc file parsing stages'
4
+ long_desc <<~DESC
5
+ Shows the intermediate parsing stages for a .desc file:
6
+ tokens, AST, and IR. Useful for debugging grammar issues.
7
+
8
+ Examples:
9
+ descent debug parser.desc # Show all stages
10
+ descent debug parser.desc --tokens # Show tokens only
11
+ descent debug parser.desc --ast # Show AST only
12
+ descent debug parser.desc --ir # Show IR only
13
+ DESC
14
+
15
+ required_arg :file, desc: '.desc specification file'
16
+ flag :tokens, '--tokens', desc: 'Show tokens only'
17
+ flag :ast, '--ast', desc: 'Show AST only'
18
+ flag :ir, '--ir', desc: 'Show IR only'
19
+
20
+ def run
21
+ require 'descent'
22
+
23
+ unless File.exist?(file)
24
+ Output.error "File not found: #{file}"
25
+ exit 1
26
+ end
27
+
28
+ # Default: show all if no specific flag
29
+ show_all = !tokens && !ast && !ir
30
+
31
+ begin
32
+ content = File.read(file)
33
+ token_list = Descent::Lexer.new(content, source_file: file).tokenize
34
+
35
+ if tokens || show_all
36
+ Output.header 'TOKENS'
37
+ token_list.each_with_index do |t, i|
38
+ puts format('%3d: L%-3d %-12s id=%-20s rest=%s',
39
+ i, t.lineno, t.tag.inspect, t.id.inspect, t.rest.inspect)
40
+ end
41
+ puts
42
+ end
43
+
44
+ ast_result = Descent::Parser.new(token_list).parse
45
+
46
+ if ast || show_all
47
+ Output.header 'AST'
48
+ puts "Parser: #{ast_result.name.inspect}"
49
+ puts "Entry: #{ast_result.entry_point.inspect}"
50
+ puts
51
+ puts 'Types:'
52
+ ast_result.types.each { |t| puts " #{t.name}: #{t.kind}" }
53
+ puts
54
+ puts 'Functions:'
55
+ ast_result.functions.each do |f|
56
+ puts " #{f.name}#{":#{f.return_type}" if f.return_type} #{f.params.map { |p| ":#{p}" }.join(' ')}"
57
+ f.states.each do |s|
58
+ puts " state[:#{s.name}]"
59
+ s.cases.each do |c|
60
+ chars_desc = c.chars.nil? ? 'default' : "c[#{c.chars}]"
61
+ puts " #{chars_desc}#{" .#{c.substate}" if c.substate}"
62
+ c.commands.each { |cmd| puts " #{cmd.type}: #{cmd.value.inspect}" }
63
+ end
64
+ end
65
+ end
66
+ puts
67
+ end
68
+
69
+ if ir || show_all
70
+ ir_result = Descent::IRBuilder.new(ast_result).build
71
+
72
+ Output.header 'IR'
73
+ puts "Parser: #{ir_result.name.inspect}"
74
+ puts "Entry: #{ir_result.entry_point.inspect}"
75
+ puts
76
+ puts 'Types:'
77
+ ir_result.types.each do |t|
78
+ flags = []
79
+ flags << 'emits_start' if t.emits_start
80
+ flags << 'emits_end' if t.emits_end
81
+ puts " #{t.name}: #{t.kind} #{flags.join(', ')}"
82
+ end
83
+ puts
84
+ puts 'Functions:'
85
+ ir_result.functions.each do |f|
86
+ puts " #{f.name}#{":#{f.return_type}" if f.return_type} #{f.params.map { |p| ":#{p}" }.join(' ')}"
87
+ puts " emits_events: #{f.emits_events}"
88
+ puts " expects_char: #{f.expects_char.inspect}" if f.expects_char
89
+ puts " locals: #{f.locals.inspect}" unless f.locals.empty?
90
+ f.states.each do |s|
91
+ scan_info = s.scannable? ? " SCAN(#{s.scan_chars.map(&:inspect).join(', ')})" : ''
92
+ puts " state[:#{s.name}]#{scan_info}"
93
+ s.cases.each do |c|
94
+ chars_desc = if c.conditional?
95
+ "if[#{c.condition}]"
96
+ elsif c.default?
97
+ 'default'
98
+ elsif c.special_class
99
+ c.special_class.to_s.upcase
100
+ elsif c.chars
101
+ "c[#{c.chars.map { |ch| ch == "\n" ? '\\n' : ch }.join}]"
102
+ else
103
+ 'c[???]'
104
+ end
105
+ puts " #{chars_desc}#{" .#{c.substate}" if c.substate}"
106
+ c.commands.each { |cmd| puts " #{cmd.type}: #{cmd.args.inspect}" }
107
+ end
108
+ end
109
+ end
110
+ end
111
+ rescue Descent::Error => e
112
+ Output.error e.message
113
+ exit 1
114
+ end
115
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ desc 'Generate railroad diagram from .desc file'
4
+ long_desc <<~DESC
5
+ Generates Python code that produces railroad diagrams (like JSON.org).
6
+ The output uses the railroad-diagrams library.
7
+
8
+ Examples:
9
+ descent diagram parser.desc > diagram.py
10
+ python diagram.py > diagram.html # All diagrams as HTML
11
+ python diagram.py --split # Each function as separate SVG
12
+
13
+ Requires: pip install railroad-diagrams
14
+ DESC
15
+
16
+ required_arg :file, desc: '.desc specification file'
17
+ flag :output, '-o FILE', '--output=FILE', desc: 'Output file (default: stdout)'
18
+
19
+ def run
20
+ require 'descent'
21
+ require 'descent/railroad'
22
+
23
+ unless File.exist?(file)
24
+ Output.error "File not found: #{file}"
25
+ exit 1
26
+ end
27
+
28
+ begin
29
+ content = File.read(file)
30
+ source_file = file
31
+
32
+ tokens = Descent::Lexer.new(content, source_file:).tokenize
33
+ ast = Descent::Parser.new(tokens).parse
34
+ ir = Descent::IRBuilder.new(ast).build
35
+
36
+ code = Descent::Railroad.new(ir).generate
37
+
38
+ if output
39
+ File.write(output, code)
40
+ Output.success "Generated #{output}"
41
+ else
42
+ puts code
43
+ end
44
+ rescue Descent::Error => e
45
+ Output.error e.message
46
+ exit 1
47
+ end
48
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ desc 'Generate parser from .desc file'
4
+ long_desc <<~DESC
5
+ Parses the .desc specification file and generates a parser in the
6
+ target language. Currently supports Rust output.
7
+
8
+ Examples:
9
+ descent generate parser.desc
10
+ descent generate parser.desc -o src/parser.rs
11
+ descent generate parser.desc --target rust
12
+ descent generate parser.desc --trace
13
+ DESC
14
+
15
+ required_arg :file, desc: '.desc specification file'
16
+ flag :output, '-o FILE', '--output=FILE', desc: 'Output file (default: stdout)'
17
+ flag :target, '-t TARGET', '--target=TARGET', desc: 'Target language: rust, c (default: rust)'
18
+ flag :trace, '--trace', desc: 'Enable trace output in generated parser'
19
+
20
+ def run
21
+ require 'descent'
22
+
23
+ unless File.exist?(file)
24
+ Output.error "File not found: #{file}"
25
+ exit 1
26
+ end
27
+
28
+ target_sym = (target || 'rust').to_sym
29
+ trace_opt = trace ? true : false
30
+
31
+ begin
32
+ code = Descent.generate(file, target: target_sym, trace: trace_opt)
33
+
34
+ # Clean up excessive whitespace
35
+ code = code.gsub(/\n{3,}/, "\n\n")
36
+
37
+ if output
38
+ File.write(output, code)
39
+ Output.success "Generated #{output}"
40
+ else
41
+ puts code
42
+ end
43
+ rescue Descent::Error => e
44
+ Output.error e.message
45
+ exit 1
46
+ end
47
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ desc 'Validate .desc file without generating'
4
+ long_desc <<~DESC
5
+ Parses and validates the .desc specification file, checking for:
6
+ - Syntax errors
7
+ - Undefined type references
8
+ - Undefined function references
9
+ - Empty states
10
+ - Other semantic issues
11
+
12
+ Examples:
13
+ descent validate parser.desc
14
+ DESC
15
+
16
+ required_arg :file, desc: '.desc specification file'
17
+
18
+ def run
19
+ require 'descent'
20
+
21
+ unless File.exist?(file)
22
+ Output.error "File not found: #{file}"
23
+ exit 1
24
+ end
25
+
26
+ begin
27
+ content = File.read(file)
28
+ tokens = Descent::Lexer.new(content, source_file: file).tokenize
29
+ ast = Descent::Parser.new(tokens).parse
30
+ ir = Descent::IRBuilder.new(ast).build
31
+
32
+ result = Descent::Validator.new(ir).validate
33
+
34
+ if result.errors.any?
35
+ Output.error "Validation failed with #{result.errors.size} error(s):"
36
+ result.errors.each { |e| Output.bullet e }
37
+ exit 1
38
+ end
39
+
40
+ if result.warnings.any?
41
+ Output.warn "Validation passed with #{result.warnings.size} warning(s):"
42
+ result.warnings.each { |w| Output.bullet w }
43
+ else
44
+ Output.success "Validation passed: #{file}"
45
+ end
46
+
47
+ # Show summary
48
+ puts
49
+ puts "Types: #{ir.types.size}"
50
+ puts "Functions: #{ir.functions.size}"
51
+ puts "States: #{ir.functions.sum { |f| f.states.size }}"
52
+ rescue Descent::Error => e
53
+ Output.error e.message
54
+ exit 1
55
+ end
56
+ end
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Descent
4
+ # Validates IR and collects warnings/errors.
5
+ #
6
+ # Run after IR building to catch issues before code generation.
7
+ class Validator
8
+ Warning = Data.define(:message, :location)
9
+ Error = Data.define(:message, :location)
10
+
11
+ attr_reader :warnings, :errors
12
+
13
+ def initialize(ir)
14
+ @ir = ir
15
+ @warnings = []
16
+ @errors = []
17
+ end
18
+
19
+ def validate
20
+ validate_parser_name
21
+ validate_types
22
+ validate_functions
23
+ validate_entry_point
24
+
25
+ self
26
+ end
27
+
28
+ def valid? = @errors.empty?
29
+
30
+ def report
31
+ output = []
32
+
33
+ @errors.each do |e|
34
+ loc = e.location ? " (#{e.location})" : ''
35
+ output << "ERROR#{loc}: #{e.message}"
36
+ end
37
+
38
+ @warnings.each do |w|
39
+ loc = w.location ? " (#{w.location})" : ''
40
+ output << "WARNING#{loc}: #{w.message}"
41
+ end
42
+
43
+ output.join("\n")
44
+ end
45
+
46
+ private
47
+
48
+ def warn(message, location: nil) = @warnings << Warning.new(message:, location:)
49
+
50
+ def error(message, location: nil) = @errors << Error.new(message:, location:)
51
+
52
+ # ========== Parser Name Validation ==========
53
+
54
+ def validate_parser_name
55
+ return if @ir.name && !@ir.name.empty?
56
+
57
+ error 'Missing parser name: add |parser <name> directive',
58
+ location: 'L1'
59
+ end
60
+
61
+ # ========== Type Validation ==========
62
+
63
+ def validate_types
64
+ seen_types = {}
65
+
66
+ @ir.types.each do |type|
67
+ loc = "L#{type.lineno}"
68
+
69
+ # Check for duplicate type names
70
+ if seen_types[type.name]
71
+ error "Duplicate type declaration: #{type.name}",
72
+ location: loc
73
+ end
74
+ seen_types[type.name] = type
75
+
76
+ # Check for valid kind
77
+ unless %i[bracket content internal].include?(type.kind)
78
+ error "Unknown type kind '#{type.kind}' for #{type.name}",
79
+ location: loc
80
+ end
81
+ end
82
+ end
83
+
84
+ # ========== Function Validation ==========
85
+
86
+ def validate_functions
87
+ seen_functions = {}
88
+
89
+ @ir.functions.each do |func|
90
+ loc = "L#{func.lineno}"
91
+
92
+ # Check for duplicate function names
93
+ if seen_functions[func.name]
94
+ warn "Duplicate function definition: #{func.name}",
95
+ location: loc
96
+ end
97
+ seen_functions[func.name] = func
98
+
99
+ # Check for empty functions (no states)
100
+ if func.states.empty?
101
+ warn "Function '#{func.name}' has no states",
102
+ location: loc
103
+ end
104
+
105
+ # Check return type exists
106
+ if func.return_type && !type_exists?(func.return_type)
107
+ warn "Return type '#{func.return_type}' not declared",
108
+ location: loc
109
+ end
110
+
111
+ validate_function_states(func)
112
+ end
113
+ end
114
+
115
+ def validate_function_states(func)
116
+ func.states.each do |state|
117
+ loc = "L#{state.lineno}"
118
+
119
+ # Check for empty states (no cases)
120
+ if state.cases.empty? && state.eof_handler.nil?
121
+ warn "State '#{state.name}' in #{func.name} has no cases",
122
+ location: loc
123
+ end
124
+
125
+ # NOTE: EOF handling is inferred per spec, so we don't warn about missing handlers
126
+ # The code generator will infer appropriate EOF behavior based on:
127
+ # - MARK status (emit content if marked)
128
+ # - Return type (BRACKET types emit End, CONTENT types emit content)
129
+ # - EXPECTS annotation (error if not satisfied)
130
+
131
+ validate_state_cases(func, state)
132
+ end
133
+ end
134
+
135
+ def validate_state_cases(func, state)
136
+ loc = "L#{state.lineno}"
137
+
138
+ state.cases.each do |kase|
139
+ # Check for empty character matches
140
+ if kase.chars&.empty? && !kase.default? && kase.special_class.nil?
141
+ warn "Empty character match in #{func.name}:#{state.name}",
142
+ location: loc
143
+ end
144
+
145
+ # Check for cases with no commands (this is actually often valid - just advance)
146
+ # if kase.commands.empty?
147
+ # warn "Case with no commands in #{func.name}:#{state.name}",
148
+ # location: loc
149
+ # end
150
+
151
+ validate_commands(func, state, kase.commands, loc)
152
+ end
153
+ end
154
+
155
+ def validate_commands(func, _state, commands, loc)
156
+ commands.each do |cmd|
157
+ case cmd.type
158
+ when :call
159
+ # Check if called function exists
160
+ # IR builder stores name in :name field (not :value)
161
+ func_name = cmd.args[:name] || cmd.args['name']
162
+ unless func_name && function_exists?(func_name)
163
+ warn "Call to undefined function '#{func_name}'",
164
+ location: loc
165
+ end
166
+ when :emit
167
+ # Check if emitted type exists
168
+ emitted = cmd.args[:value] || cmd.args['value']
169
+ # Strip any suffix like "Start" or "End"
170
+ base_type = emitted&.sub(/(Start|End|Anon)$/, '')
171
+ unless type_exists?(base_type) || builtin_emit?(emitted)
172
+ warn "Emit of undefined type '#{emitted}'",
173
+ location: loc
174
+ end
175
+ when :transition
176
+ # Check if target state exists (if specified)
177
+ target = cmd.args[:value] || cmd.args['value']
178
+ next if target.nil? || target.empty? # Self-loop, valid
179
+
180
+ if target.start_with?(':')
181
+ # Target is :statename - validate state exists
182
+ state_name = target.delete_prefix(':')
183
+ unless state_exists_in_function?(func, state_name)
184
+ warn "Transition to undefined state '#{target}'",
185
+ location: loc
186
+ end
187
+ else
188
+ # Target doesn't start with : but isn't empty - probably malformed
189
+ warn "Invalid transition target '#{target}' (should be :statename or empty)",
190
+ location: loc
191
+ end
192
+ end
193
+ end
194
+ end
195
+
196
+ # ========== Entry Point Validation ==========
197
+
198
+ def validate_entry_point
199
+ return unless @ir.entry_point
200
+
201
+ # Parse entry point: /function or /function:state
202
+ entry = @ir.entry_point.delete_prefix('/')
203
+ func_name, _state_name = entry.split(':')
204
+
205
+ return if function_exists?(func_name)
206
+
207
+ error "Entry point references undefined function '#{func_name}'",
208
+ location: 'entry-point'
209
+ end
210
+
211
+ # ========== Helpers ==========
212
+
213
+ def type_exists?(name)
214
+ return false if name.nil?
215
+
216
+ @ir.types.any? { |t| t.name.downcase == name.downcase }
217
+ end
218
+
219
+ def function_exists?(name)
220
+ return false if name.nil?
221
+
222
+ @ir.functions.any? { |f| f.name == name }
223
+ end
224
+
225
+ def state_exists_in_function?(func, state_name) = func.states.any? { |s| s.name == state_name }
226
+
227
+ def has_eof_in_cases?(_state) = false
228
+
229
+ def builtin_emit?(name) = %w[Error Warning].include?(name)
230
+ end
231
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Descent
4
+ VERSION = '0.7.1'
5
+ end
data/lib/descent.rb ADDED
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'descent/version'
4
+
5
+ module Descent
6
+ class Error < StandardError; end
7
+ class ParseError < Error; end
8
+ class ValidationError < Error; end
9
+
10
+ autoload :AST, 'descent/ast'
11
+ autoload :IR, 'descent/ir'
12
+ autoload :Lexer, 'descent/lexer'
13
+ autoload :Parser, 'descent/parser'
14
+ autoload :IRBuilder, 'descent/ir_builder'
15
+ autoload :Validator, 'descent/validator'
16
+ autoload :Generator, 'descent/generator'
17
+
18
+ # Main entry point: parse a .desc file and generate output
19
+ #
20
+ # @param input [String] Path to .desc file or string content
21
+ # @param target [Symbol] Target language (:rust, :c)
22
+ # @param options [Hash] Additional options
23
+ # @return [String] Generated parser code
24
+ def self.generate(input, target:, **)
25
+ content = File.exist?(input) ? File.read(input) : input
26
+ source_file = File.exist?(input) ? input : '(string)'
27
+
28
+ tokens = Lexer.new(content, source_file:).tokenize
29
+ ast = Parser.new(tokens).parse
30
+ ir = IRBuilder.new(ast).build
31
+
32
+ Generator.new(ir, target:, **).generate
33
+ end
34
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: descent
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.1
5
+ platform: ruby
6
+ authors:
7
+ - Joseph Wecker
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2026-01-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: devex
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: liquid
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ description: |
42
+ Generates high-performance callback-based recursive descent parsers from
43
+ declarative .desc specifications. Supports target language of Rust and will
44
+ soon support multiple more target languages (Rust, C, wasm, ruby, go,
45
+ elixir...) via Liquid templates. The .desc format is valid UDON, enabling
46
+ future bootstrapping where descent can parse its own input format.
47
+ email:
48
+ - joseph.wecker@gmail.com
49
+ executables:
50
+ - descent
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - CHANGELOG.md
55
+ - README.md
56
+ - SYNTAX.md
57
+ - exe/descent
58
+ - lib/descent.rb
59
+ - lib/descent/ast.rb
60
+ - lib/descent/generator.rb
61
+ - lib/descent/ir.rb
62
+ - lib/descent/ir_builder.rb
63
+ - lib/descent/lexer.rb
64
+ - lib/descent/parser.rb
65
+ - lib/descent/railroad.rb
66
+ - lib/descent/templates/rust/_command.liquid
67
+ - lib/descent/templates/rust/parser.liquid
68
+ - lib/descent/tools/debug.rb
69
+ - lib/descent/tools/diagram.rb
70
+ - lib/descent/tools/generate.rb
71
+ - lib/descent/tools/validate.rb
72
+ - lib/descent/validator.rb
73
+ - lib/descent/version.rb
74
+ homepage: https://github.com/v2-io/descent
75
+ licenses:
76
+ - MIT
77
+ metadata:
78
+ homepage_uri: https://github.com/v2-io/descent
79
+ source_code_uri: https://github.com/v2-io/descent
80
+ changelog_uri: https://github.com/v2-io/descent/blob/main/CHANGELOG.md
81
+ rubygems_mfa_required: 'true'
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 3.3.0
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubygems_version: 3.5.22
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Recursive descent parser generator from .desc specifications
101
+ test_files: []