descent 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,272 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Descent
4
+ # Generates Python code using the railroad-diagrams library.
5
+ #
6
+ # This is a SPIKE - quick proof-of-concept, not production quality.
7
+ # The railroad-diagrams library is what JSON.org uses for its diagrams.
8
+ #
9
+ # Usage:
10
+ # pip install railroad-diagrams
11
+ # descent diagram parser.desc > diagram.py
12
+ # python diagram.py > diagram.svg
13
+ #
14
+ # Mapping from .desc concepts to railroad primitives:
15
+ # - Function → one Diagram per function
16
+ # - State cases → Choice (alternatives)
17
+ # - Self-looping state → ZeroOrMore or OneOrMore
18
+ # - c[...] match → Terminal (circles)
19
+ # - /function call → NonTerminal (rectangles)
20
+ # - |return → end of path
21
+ # - |>> → loop/continue
22
+ class Railroad
23
+ def initialize(ir) = @ir = ir
24
+
25
+ def generate
26
+ lines = []
27
+ lines << header
28
+ lines << ''
29
+
30
+ @ir.functions.each do |func|
31
+ lines << function_diagram(func)
32
+ lines << ''
33
+ end
34
+
35
+ lines << footer
36
+ lines.join("\n")
37
+ end
38
+
39
+ private
40
+
41
+ def header
42
+ <<~PY
43
+ #!/usr/bin/env python3
44
+ """
45
+ Railroad diagrams for #{@ir.name} parser.
46
+ Generated by descent - run this to produce SVG.
47
+
48
+ Usage:
49
+ python #{@ir.name}_diagram.py > #{@ir.name}_diagram.svg
50
+
51
+ Or for individual function SVGs:
52
+ python #{@ir.name}_diagram.py --split
53
+
54
+ Requires: pip install railroad-diagrams
55
+ """
56
+ from railroad import Diagram, Choice, Sequence, Optional, ZeroOrMore, OneOrMore
57
+ from railroad import Terminal, NonTerminal, Comment, Skip, Start, End
58
+ import sys
59
+
60
+ DIAGRAMS = {}
61
+ PY
62
+ end
63
+
64
+ def footer
65
+ <<~PY
66
+ def main():
67
+ import railroad
68
+ if '--split' in sys.argv:
69
+ # Output each function to separate file
70
+ for name, diag in DIAGRAMS.items():
71
+ with open(f'{name}.svg', 'w') as f:
72
+ diag.writeStandalone(f.write)
73
+ print(f'Wrote {name}.svg', file=sys.stderr)
74
+ else:
75
+ # Output all diagrams as single HTML with proper CSS
76
+ print('<!DOCTYPE html><html><head>')
77
+ print(f'<style>{railroad.DEFAULT_STYLE}</style>')
78
+ print('<style>svg { margin: 20px; display: block; } h2 { font-family: sans-serif; }</style>')
79
+ print('</head><body>')
80
+ for name, diag in DIAGRAMS.items():
81
+ print(f'<h2>{name}</h2>')
82
+ diag.writeSvg(print)
83
+ print('</body></html>')
84
+
85
+ if __name__ == '__main__':
86
+ main()
87
+ PY
88
+ end
89
+
90
+ def function_diagram(func)
91
+ # Build the diagram for this function
92
+ # Analyze state transitions to build proper railroad structure
93
+
94
+ states = func.states
95
+ return "# #{func.name}: no states" if states.empty?
96
+
97
+ # Build a map of state names to states for lookup
98
+ state_map = states.to_h { |s| [s.name, s] }
99
+
100
+ # Start from the first state and build the diagram
101
+ diagram_content = build_state_sequence(states.first, state_map, Set.new)
102
+
103
+ # Add return type info as comment if present
104
+ comment = func.return_type ? ", Comment('→ #{func.return_type}')" : ''
105
+
106
+ <<~PY
107
+ # Function: #{func.name}#{"(#{func.params.join(', ')})" unless func.params.empty?}
108
+ DIAGRAMS['#{func.name}'] = Diagram(
109
+ #{diagram_content}#{comment}
110
+ )
111
+ PY
112
+ end
113
+
114
+ def build_state_sequence(state, state_map, visited)
115
+ return 'Skip()' if state.nil? || visited.include?(state.name)
116
+
117
+ visited += [state.name]
118
+ cases = state.cases.reject(&:conditional?)
119
+ return 'Skip()' if cases.empty?
120
+
121
+ # Categorize cases by their transition type
122
+ exit_cases = [] # Cases that return (exit the function)
123
+ loop_cases = [] # Cases that self-loop (stay in this state)
124
+ forward_cases = [] # Cases that go to another state
125
+
126
+ cases.each do |kase|
127
+ transition = find_transition(kase)
128
+ # Normalize transition: remove leading colon if present
129
+ transition = transition.sub(/^:/, '') if transition
130
+ if returns?(kase)
131
+ exit_cases << kase
132
+ elsif transition.nil? || transition.empty? || transition == state.name
133
+ loop_cases << kase
134
+ else
135
+ forward_cases << [kase, transition]
136
+ end
137
+ end
138
+
139
+ parts = []
140
+
141
+ # If we have forward transitions, they form a sequence
142
+ # Group cases by their target state
143
+ if forward_cases.any?
144
+ # Cases that go to the same next state can be shown as Choice
145
+ by_target = forward_cases.group_by { |_, target| target }
146
+
147
+ by_target.each do |target_state, cases_to_target|
148
+ entry_cases = cases_to_target.map(&:first)
149
+ if entry_cases.size == 1
150
+ parts << case_to_element(entry_cases.first, nil)
151
+ else
152
+ elements = entry_cases.map { |c| case_to_element(c, nil) }
153
+ parts << "Choice(0, #{elements.join(', ')})"
154
+ end
155
+
156
+ # Recurse into the target state
157
+ if state_map[target_state] && !visited.include?(target_state)
158
+ next_part = build_state_sequence(state_map[target_state], state_map, visited)
159
+ parts << next_part unless next_part == 'Skip()'
160
+ end
161
+ end
162
+ end
163
+
164
+ # Loop cases become ZeroOrMore
165
+ if loop_cases.any?
166
+ if loop_cases.size == 1
167
+ loop_content = case_to_element(loop_cases.first, nil)
168
+ else
169
+ elements = loop_cases.map { |c| case_to_element(c, nil) }
170
+ loop_content = "Choice(0, #{elements.join(', ')})"
171
+ end
172
+ parts << "ZeroOrMore(#{loop_content})"
173
+ end
174
+
175
+ # Exit cases become the terminator
176
+ if exit_cases.any?
177
+ if exit_cases.size == 1
178
+ parts << case_to_element(exit_cases.first, nil)
179
+ else
180
+ elements = exit_cases.map { |c| case_to_element(c, nil) }
181
+ parts << "Choice(0, #{elements.join(', ')})"
182
+ end
183
+ end
184
+
185
+ return 'Skip()' if parts.empty?
186
+ return parts.first if parts.size == 1
187
+
188
+ "Sequence(#{parts.join(', ')})"
189
+ end
190
+
191
+ def find_transition(kase)
192
+ kase.commands.each do |cmd|
193
+ return cmd.args[:value] || cmd.args['value'] if cmd.type == :transition
194
+ end
195
+ nil
196
+ end
197
+
198
+ def returns?(kase) = kase.commands.any? { |cmd| cmd.type == :return }
199
+
200
+ # Known character class patterns - map expanded chars back to names
201
+ CHAR_CLASS_NAMES = {
202
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' => 'LETTER',
203
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-' => 'LABEL_CONT',
204
+ '0123456789' => 'DIGIT',
205
+ '0123456789abcdefABCDEF' => 'HEX_DIGIT',
206
+ " \t" => 'WS',
207
+ 'abcdefghijklmnopqrstuvwxyz' => 'a-z',
208
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' => 'A-Z'
209
+ }.freeze
210
+
211
+ def case_to_element(kase, _func)
212
+ # Build element for a single case
213
+ parts = []
214
+
215
+ # What we're matching
216
+ if kase.default?
217
+ parts << 'Skip()' # Default case = any other char
218
+ elsif kase.chars && !kase.chars.empty?
219
+ # Character match - show as terminal
220
+ chars_display = format_chars(kase.chars)
221
+ parts << "Terminal(#{chars_display.inspect})"
222
+ elsif kase.special_class
223
+ # Special class like LETTER, DIGIT
224
+ parts << "NonTerminal(#{kase.special_class.to_s.upcase.inspect})"
225
+ elsif kase.param_ref
226
+ # Parameter reference
227
+ parts << "NonTerminal(':#{kase.param_ref}')"
228
+ end
229
+
230
+ # What actions we take - look for function calls
231
+ kase.commands.each do |cmd|
232
+ case cmd.type
233
+ when :call
234
+ func_name = cmd.args[:name]
235
+ parts << "NonTerminal(#{func_name.inspect})"
236
+ when :emit
237
+ # Inline emit - show as comment
238
+ event_type = cmd.args[:type]
239
+ parts << "Comment(#{event_type.inspect})" if event_type
240
+ when :keywords
241
+ # Keywords lookup
242
+ kw_name = cmd.args[:name]
243
+ parts << "NonTerminal('KEYWORDS(#{kw_name})')"
244
+ end
245
+ end
246
+
247
+ return parts.first if parts.size == 1
248
+
249
+ "Sequence(#{parts.join(', ')})"
250
+ end
251
+
252
+ def format_chars(chars)
253
+ if chars.size == 1
254
+ escape_char(chars.first)
255
+ else
256
+ # Check if this matches a known class
257
+ joined = chars.join
258
+ CHAR_CLASS_NAMES[joined] || chars.map { |c| escape_char(c) }.join('')
259
+ end
260
+ end
261
+
262
+ def escape_char(char)
263
+ case char
264
+ when "\n" then '\\n'
265
+ when "\t" then '\\t'
266
+ when "\r" then '\\r'
267
+ when ' ' then '␣'
268
+ else char
269
+ end
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,174 @@
1
+ {% comment %}
2
+ Command partial - generates Rust code for a single IR::Command
3
+
4
+ Variables in scope:
5
+ - cmd: the command hash {type, args}
6
+ - func: the function hash
7
+ - return_type_info: the function's return type info (or nil)
8
+ - states: array of states (for multi-state functions)
9
+ {% endcomment %}
10
+
11
+ {% case cmd.type %}
12
+
13
+ {% when "advance" %}
14
+ self.advance();
15
+
16
+ {% when "advance_to" %}
17
+ {% comment %} Explicit advance-to: ->[chars] uses memchr to scan forward.
18
+ Limited to 1-6 chars (validated by IR builder). {% endcomment %}
19
+ {% assign target_chars = cmd.args.value | split: "" %}
20
+ {% assign char_count = target_chars | size %}
21
+ {% if char_count == 1 %}
22
+ self.scan_to1({{ target_chars[0] | escape_rust_char }});
23
+ {% elsif char_count == 2 %}
24
+ self.scan_to2({{ target_chars[0] | escape_rust_char }}, {{ target_chars[1] | escape_rust_char }});
25
+ {% elsif char_count == 3 %}
26
+ self.scan_to3({{ target_chars[0] | escape_rust_char }}, {{ target_chars[1] | escape_rust_char }}, {{ target_chars[2] | escape_rust_char }});
27
+ {% elsif char_count == 4 %}
28
+ self.scan_to4({{ target_chars[0] | escape_rust_char }}, {{ target_chars[1] | escape_rust_char }}, {{ target_chars[2] | escape_rust_char }}, {{ target_chars[3] | escape_rust_char }});
29
+ {% elsif char_count == 5 %}
30
+ self.scan_to5({{ target_chars[0] | escape_rust_char }}, {{ target_chars[1] | escape_rust_char }}, {{ target_chars[2] | escape_rust_char }}, {{ target_chars[3] | escape_rust_char }}, {{ target_chars[4] | escape_rust_char }});
31
+ {% elsif char_count == 6 %}
32
+ self.scan_to6({{ target_chars[0] | escape_rust_char }}, {{ target_chars[1] | escape_rust_char }}, {{ target_chars[2] | escape_rust_char }}, {{ target_chars[3] | escape_rust_char }}, {{ target_chars[4] | escape_rust_char }}, {{ target_chars[5] | escape_rust_char }});
33
+ {% else %}
34
+ unreachable!("advance_to validated to 1-6 chars");
35
+ {% endif %}
36
+
37
+ {% when "mark" %}
38
+ self.mark();
39
+
40
+ {% when "term" %}
41
+ {% assign term_offset = cmd.args.offset | default: 0 %}
42
+ self.set_term({{ term_offset }});
43
+
44
+ {% when "prepend" %}
45
+ {% comment %} Prepend literal bytes to the accumulation buffer. Empty literals are
46
+ filtered as :noop at parse time, so we always have content here. {% endcomment %}
47
+ self.prepend_bytes(b"{{ cmd.args.literal }}");
48
+
49
+ {% when "prepend_param" %}
50
+ {% comment %} Prepend parameter bytes to accumulation buffer. Parameter is &'static [u8],
51
+ so empty slice is naturally a no-op, NUL bytes work fine. {% endcomment %}
52
+ {% assign param_name = cmd.args.param_ref %}
53
+ self.prepend_bytes({{ param_name }});
54
+
55
+ {% when "return" %}
56
+ {% comment %} Emit appropriate event based on return type, then return {% endcomment %}
57
+ {% if cmd.args.return_value %}
58
+ {% comment %} INTERNAL type returning a computed value {% endcomment %}
59
+ return {{ cmd.args.return_value }};
60
+ {% elsif cmd.args.emit_type %}
61
+ {% comment %} Explicit return type specified {% endcomment %}
62
+ {% if cmd.args.emit_mode == "mark" %}
63
+ on_event(Event::{{ cmd.args.emit_type }} { content: self.term(), span: self.span_from_mark() });
64
+ {% elsif cmd.args.emit_mode == "literal" %}
65
+ on_event(Event::{{ cmd.args.emit_type }} { content: std::borrow::Cow::Borrowed(b"{{ cmd.args.literal }}"), span: self.span() });
66
+ {% else %}
67
+ on_event(Event::{{ cmd.args.emit_type }} { content: std::borrow::Cow::Borrowed(b""), span: self.span() });
68
+ {% endif %}
69
+ return;
70
+ {% elsif return_type_info.kind == "internal" %}
71
+ {% comment %} INTERNAL type with no explicit return value - return 0 as default {% endcomment %}
72
+ return 0;
73
+ {% elsif cmd.args.suppress_auto_emit and return_type_info.kind == "content" %}
74
+ {% comment %} Fix #11: Inline emit already happened for CONTENT type - skip auto-emit.
75
+ Note: BRACKET types ALWAYS emit End event regardless of inline emits,
76
+ because End is structural, not a content event. {% endcomment %}
77
+ return;
78
+ {% elsif return_type_info.kind == "content" %}
79
+ on_event(Event::{{ func.return_type }} { content: self.term(), span: self.span_from_mark() });
80
+ return;
81
+ {% elsif return_type_info.kind == "bracket" %}
82
+ on_event(Event::{{ func.return_type }}End { span: self.span() });
83
+ return;
84
+ {% else %}
85
+ return;
86
+ {% endif %}
87
+
88
+ {% when "transition" %}
89
+ {% if cmd.args.value == "" or cmd.args.value == nil %}
90
+ {% comment %} Self-loop - continue {% endcomment %}
91
+ continue;
92
+ {% else %}
93
+ state = State::{{ cmd.args.value | remove: ":" | pascalcase }};
94
+ continue;
95
+ {% endif %}
96
+
97
+ {% when "call" %}
98
+ {% comment %} Function call - use pre-parsed name and call_args from IR {% endcomment %}
99
+ {% if cmd.args.is_error %}
100
+ {% comment %} Built-in /error(ErrorCode) - emit error event (caller adds |return if needed) {% endcomment %}
101
+ {% if cmd.args.call_args %}
102
+ on_event(Event::Error { code: ParseErrorCode::{{ cmd.args.call_args | pascalcase }}, span: self.span() });
103
+ {% else %}
104
+ on_event(Event::Error { code: ParseErrorCode::UnexpectedChar, span: self.span() });
105
+ {% endif %}
106
+ {% elsif cmd.args.call_args %}
107
+ self.parse_{{ cmd.args.name }}({{ cmd.args.call_args | rust_expr }}, on_event);
108
+ {% else %}
109
+ self.parse_{{ cmd.args.name }}(on_event);
110
+ {% endif %}
111
+
112
+ {% when "assign" %}
113
+ {% if cmd.args.var and cmd.args.expr %}
114
+ {{ cmd.args.var }} = {{ cmd.args.expr | rust_expr }};
115
+ {% endif %}
116
+
117
+ {% when "add_assign" %}
118
+ {% if cmd.args.var and cmd.args.expr %}
119
+ {{ cmd.args.var }} += {{ cmd.args.expr | rust_expr }};
120
+ {% endif %}
121
+
122
+ {% when "sub_assign" %}
123
+ {% if cmd.args.var and cmd.args.expr %}
124
+ {{ cmd.args.var }} -= {{ cmd.args.expr | rust_expr }};
125
+ {% endif %}
126
+
127
+ {% when "emit" %}
128
+ {% comment %} Explicit emit - used for mid-function emissions {% endcomment %}
129
+ {% assign emit_type = cmd.args.value %}
130
+ on_event(Event::{{ emit_type }} { content: self.term(), span: self.span_from_mark() });
131
+
132
+ {% when "inline_emit_bare" %}
133
+ {% comment %} Inline emit with no payload: TypeName - CONTENT types still need content field {% endcomment %}
134
+ on_event(Event::{{ cmd.args.type }} { content: std::borrow::Cow::Borrowed(b""), span: self.span() });
135
+
136
+ {% when "inline_emit_mark" %}
137
+ {% comment %} Inline emit using accumulated content: TypeName(USE_MARK) {% endcomment %}
138
+ on_event(Event::{{ cmd.args.type }} { content: self.term(), span: self.span_from_mark() });
139
+
140
+ {% when "inline_emit_literal" %}
141
+ {% comment %} Inline emit with literal: TypeName(literal) - strip $ sigil if present {% endcomment %}
142
+ {% assign literal = cmd.args.literal | remove_first: '$' %}
143
+ on_event(Event::{{ cmd.args.type }} { content: std::borrow::Cow::Borrowed(b"{{ literal }}"), span: self.span() });
144
+
145
+ {% when "error" %}
146
+ {% assign error_code = cmd.args.value | pascalcase %}
147
+ on_event(Event::Error { code: ParseErrorCode::{{ error_code }}, span: self.span() });
148
+
149
+ {% when "keywords_lookup" %}
150
+ {% comment %} Look up accumulated content in keyword map, call fallback if not found {% endcomment %}
151
+ self.lookup_{{ cmd.args.name }}_or_fallback(on_event);
152
+
153
+ {% when "conditional" %}
154
+ {% comment %} Inline conditional: if/else chain {% endcomment %}
155
+ {% for clause in cmd.args.clauses %}
156
+ {% if forloop.first %}
157
+ if {{ clause.condition | rust_expr }} {
158
+ {% else %}
159
+ {% if clause.condition %}
160
+ } else if {{ clause.condition | rust_expr }} {
161
+ {% else %}
162
+ } else {
163
+ {% endif %}
164
+ {% endif %}
165
+ {% for nested_cmd in clause.commands %}
166
+ {% include 'command' cmd: nested_cmd, func: func, return_type_info: return_type_info, states: states %}
167
+ {% endfor %}
168
+ {% endfor %}
169
+ }
170
+
171
+ {% else %}
172
+ compile_error!("Unknown command type passed to template: {{ cmd.type }}");
173
+
174
+ {% endcase %}