descent 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +285 -0
- data/README.md +583 -0
- data/SYNTAX.md +334 -0
- data/exe/descent +15 -0
- data/lib/descent/ast.rb +69 -0
- data/lib/descent/generator.rb +489 -0
- data/lib/descent/ir.rb +98 -0
- data/lib/descent/ir_builder.rb +1479 -0
- data/lib/descent/lexer.rb +308 -0
- data/lib/descent/parser.rb +450 -0
- data/lib/descent/railroad.rb +272 -0
- data/lib/descent/templates/rust/_command.liquid +174 -0
- data/lib/descent/templates/rust/parser.liquid +1163 -0
- data/lib/descent/tools/debug.rb +115 -0
- data/lib/descent/tools/diagram.rb +48 -0
- data/lib/descent/tools/generate.rb +47 -0
- data/lib/descent/tools/validate.rb +56 -0
- data/lib/descent/validator.rb +231 -0
- data/lib/descent/version.rb +5 -0
- data/lib/descent.rb +34 -0
- metadata +101 -0
data/SYNTAX.md
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# `.desc` DSL Syntax Reference
|
|
2
|
+
|
|
3
|
+
Complete syntax reference for descent parser specifications.
|
|
4
|
+
|
|
5
|
+
For character literal syntax, see [characters.md](characters.md).
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Document Structure
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
|parser <name> ; Required: parser name
|
|
13
|
+
|
|
14
|
+
|type[<TypeName>] <KIND> ; Type declarations (zero or more)
|
|
15
|
+
|
|
16
|
+
|entry-point /<function> ; Required: where parsing begins
|
|
17
|
+
|
|
18
|
+
|keywords[<name>] ... ; Keyword blocks (zero or more)
|
|
19
|
+
|
|
20
|
+
|function[<name>] ... ; Function definitions (one or more)
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Type Declarations
|
|
26
|
+
|
|
27
|
+
Types determine what events are emitted for functions returning that type.
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
|type[<Name>] <KIND>
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
| KIND | On Entry | On Return |
|
|
34
|
+
|----------|-------------------|----------------------------|
|
|
35
|
+
| BRACKET | Emit `NameStart` | Emit `NameEnd` |
|
|
36
|
+
| CONTENT | `MARK` position | Emit `Name` with content |
|
|
37
|
+
| INTERNAL | Nothing | Nothing (internal use) |
|
|
38
|
+
|
|
39
|
+
**Examples:**
|
|
40
|
+
```
|
|
41
|
+
|type[Element] BRACKET ; ElementStart on entry, ElementEnd on return
|
|
42
|
+
|type[Text] CONTENT ; MARK on entry, emit Text with span on return
|
|
43
|
+
|type[Counter] INTERNAL ; No emit - for internal values only
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Entry Point
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
|entry-point /<function>
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Specifies which function begins parsing. The leading `/` is required.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Functions
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
|function[<name>] ; Void function (no auto-emit)
|
|
62
|
+
|function[<name>:<Type>] ; Returns/emits Type
|
|
63
|
+
|function[<name>:<Type>] :<param> ; With one parameter
|
|
64
|
+
|function[<name>:<Type>] :p1 :p2 ; Multiple parameters
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Entry Actions
|
|
68
|
+
|
|
69
|
+
Actions on the function line execute once on function entry:
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
|function[<name>] | <action> | <action>
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Example:**
|
|
76
|
+
```
|
|
77
|
+
|function[brace_comment] | depth = 1
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Parameter Types
|
|
81
|
+
|
|
82
|
+
Parameter types are inferred from usage:
|
|
83
|
+
|
|
84
|
+
| Usage | Inferred Type |
|
|
85
|
+
|------------------------|---------------------|
|
|
86
|
+
| `\|c[:param]` | `:byte` (u8) |
|
|
87
|
+
| `PREPEND(:param)` | `:bytes` (&[u8]) |
|
|
88
|
+
| Arithmetic/conditions | `:i32` |
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## States
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
|state[:<name>]
|
|
96
|
+
<cases...>
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
States contain cases that match input and execute actions.
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Cases
|
|
104
|
+
|
|
105
|
+
Cases have the form: `|<match> |<substate> | <actions> |<transition>`
|
|
106
|
+
|
|
107
|
+
### Match Types
|
|
108
|
+
|
|
109
|
+
| Syntax | Matches |
|
|
110
|
+
|---------------|--------------------------------------|
|
|
111
|
+
| `c[x]` | Single character `x` |
|
|
112
|
+
| `c[abc]` | Any of `a`, `b`, or `c` |
|
|
113
|
+
| `c['\n']` | Quoted character (newline) |
|
|
114
|
+
| `c[<0-9>]` | Character class (digits) |
|
|
115
|
+
| `c[:param]` | Byte parameter value |
|
|
116
|
+
| `LETTER` | ASCII letter (a-z, A-Z) |
|
|
117
|
+
| `DIGIT` | ASCII digit (0-9) |
|
|
118
|
+
| `HEX_DIGIT` | Hex digit (0-9, a-f, A-F) |
|
|
119
|
+
| `LABEL_CONT` | Letter, digit, `_`, or `-` |
|
|
120
|
+
| `default` | Fallback (any other byte) |
|
|
121
|
+
| `eof` | End of input |
|
|
122
|
+
| `if[<cond>]` | Conditional guard |
|
|
123
|
+
| (empty) | Bare action (unconditional) |
|
|
124
|
+
|
|
125
|
+
### Substate Label
|
|
126
|
+
|
|
127
|
+
Optional label for debugging (appears in trace output):
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
|c[x] |.found | -> |>> :next ; .found is the substate label
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Actions
|
|
136
|
+
|
|
137
|
+
Actions are pipe-separated and execute left-to-right:
|
|
138
|
+
|
|
139
|
+
| Action | Description |
|
|
140
|
+
|----------------------|------------------------------------------|
|
|
141
|
+
| `->` | Advance one byte |
|
|
142
|
+
| `->[<chars>]` | Advance TO first occurrence (SIMD scan) |
|
|
143
|
+
| `MARK` | Mark position for accumulation |
|
|
144
|
+
| `TERM` | Terminate slice (MARK to current) |
|
|
145
|
+
| `TERM(-N)` | Terminate excluding last N bytes |
|
|
146
|
+
| `/<func>` | Call function |
|
|
147
|
+
| `/<func>(<args>)` | Call with arguments |
|
|
148
|
+
| `/error` | Emit error event |
|
|
149
|
+
| `/error(<Code>)` | Emit error with custom code |
|
|
150
|
+
| `<var> = <value>` | Assignment |
|
|
151
|
+
| `<var> += <N>` | Increment |
|
|
152
|
+
| `PREPEND('<lit>')` | Prepend literal to accumulation |
|
|
153
|
+
| `PREPEND(:param)` | Prepend parameter bytes |
|
|
154
|
+
| `KEYWORDS(<name>)` | Lookup in keyword map |
|
|
155
|
+
| `<Type>` | Emit event with no payload |
|
|
156
|
+
| `<Type>('<lit>')` | Emit event with literal value |
|
|
157
|
+
| `<Type>(USE_MARK)` | Emit event with accumulated content |
|
|
158
|
+
|
|
159
|
+
### Advance-To Constraints
|
|
160
|
+
|
|
161
|
+
`->[<chars>]` uses SIMD memchr and supports:
|
|
162
|
+
- 1-6 literal bytes only
|
|
163
|
+
- Quoted characters: `->['\n\t']`
|
|
164
|
+
- NO character classes, NO parameter refs
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Transitions
|
|
169
|
+
|
|
170
|
+
| Syntax | Description |
|
|
171
|
+
|----------------|----------------------------------|
|
|
172
|
+
| `\|>>` | Self-loop (stay in current state)|
|
|
173
|
+
| `\|>> :<state>` | Go to named state |
|
|
174
|
+
| `\|return` | Return from function |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Conditionals
|
|
179
|
+
|
|
180
|
+
Single-line guards (no block structure):
|
|
181
|
+
|
|
182
|
+
```
|
|
183
|
+
|if[<condition>] | <actions> |<transition>
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Followed by fallthrough case:
|
|
187
|
+
```
|
|
188
|
+
|if[COL <= :col] |return
|
|
189
|
+
| |>> :continue ; else branch
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Condition Syntax
|
|
193
|
+
|
|
194
|
+
- Comparisons: `==`, `!=`, `<`, `<=`, `>`, `>=`
|
|
195
|
+
- Variables: `COL`, `LINE`, `PREV`, `:param`, local vars
|
|
196
|
+
- Parentheses allowed: `(COL == 1)`
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Special Variables
|
|
201
|
+
|
|
202
|
+
| Variable | Type | Description |
|
|
203
|
+
|----------|-------|--------------------------------------|
|
|
204
|
+
| `COL` | i32 | Current column (1-indexed) |
|
|
205
|
+
| `LINE` | i32 | Current line (1-indexed) |
|
|
206
|
+
| `PREV` | byte | Previous byte (0 at start) |
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Keywords
|
|
211
|
+
|
|
212
|
+
Perfect hash lookup for keyword matching:
|
|
213
|
+
|
|
214
|
+
```
|
|
215
|
+
|keywords[<name>] :fallback /<func>
|
|
216
|
+
| <keyword> => <EventType>
|
|
217
|
+
| <keyword> => <EventType>
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Usage:**
|
|
221
|
+
```
|
|
222
|
+
|default | TERM | KEYWORDS(<name>) |return
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
**Example:**
|
|
226
|
+
```
|
|
227
|
+
|keywords[bare] :fallback /identifier
|
|
228
|
+
| true => BoolTrue
|
|
229
|
+
| false => BoolFalse
|
|
230
|
+
| null => Nil
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Comments
|
|
236
|
+
|
|
237
|
+
Semicolon starts a comment (rest of line ignored):
|
|
238
|
+
|
|
239
|
+
```
|
|
240
|
+
|parser test ; this is a comment
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## Character Classes
|
|
246
|
+
|
|
247
|
+
See [characters.md](characters.md) for complete specification.
|
|
248
|
+
|
|
249
|
+
### Quick Reference
|
|
250
|
+
|
|
251
|
+
| Syntax | Description |
|
|
252
|
+
|-------------|--------------------------------|
|
|
253
|
+
| `'x'` | Single character |
|
|
254
|
+
| `'\n'` | Escape sequence |
|
|
255
|
+
| `'\xHH'` | Hex byte |
|
|
256
|
+
| `<abc>` | Character class (a, b, c) |
|
|
257
|
+
| `<0-9>` | Predefined range (digits) |
|
|
258
|
+
| `<LETTER>` | Predefined class |
|
|
259
|
+
| `<P>` | DSL-reserved char (`\|`) |
|
|
260
|
+
|
|
261
|
+
### Predefined Classes
|
|
262
|
+
|
|
263
|
+
| Name | Characters |
|
|
264
|
+
|--------------|--------------------------------|
|
|
265
|
+
| `LETTER` | a-z, A-Z |
|
|
266
|
+
| `DIGIT` | 0-9 |
|
|
267
|
+
| `HEX_DIGIT` | 0-9, a-f, A-F |
|
|
268
|
+
| `LABEL_CONT` | LETTER + DIGIT + `_` + `-` |
|
|
269
|
+
| `WS` | Space + tab |
|
|
270
|
+
| `NL` | Newline |
|
|
271
|
+
|
|
272
|
+
### DSL-Reserved Escapes
|
|
273
|
+
|
|
274
|
+
| Name | Char | Name | Char |
|
|
275
|
+
|------|------|------|------|
|
|
276
|
+
| `P` | `\|` | `SQ` | `'` |
|
|
277
|
+
| `L` | `[` | `DQ` | `"` |
|
|
278
|
+
| `R` | `]` | `BS` | `\` |
|
|
279
|
+
| `LB` | `{` | `LP` | `(` |
|
|
280
|
+
| `RB` | `}` | `RP` | `)` |
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Complete Example
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
|parser json_value
|
|
288
|
+
|
|
289
|
+
|type[StringValue] CONTENT
|
|
290
|
+
|type[Object] BRACKET
|
|
291
|
+
|
|
292
|
+
|entry-point /value
|
|
293
|
+
|
|
294
|
+
|keywords[kw] :fallback /identifier
|
|
295
|
+
| true => BoolTrue
|
|
296
|
+
| false => BoolFalse
|
|
297
|
+
| null => Nil
|
|
298
|
+
|
|
299
|
+
|function[value]
|
|
300
|
+
|state[:dispatch]
|
|
301
|
+
|c['"'] | -> | /string_value |return
|
|
302
|
+
|c['{'] | -> | /object |return
|
|
303
|
+
|LETTER | /bare_keyword |return
|
|
304
|
+
|default | /error |return
|
|
305
|
+
|
|
306
|
+
|function[string_value:StringValue]
|
|
307
|
+
|state[:main]
|
|
308
|
+
|c['"'] | -> |return ; Close quote
|
|
309
|
+
|c['\\'] | -> | -> |>> ; Escape: skip 2
|
|
310
|
+
|default | -> |>> ; Collect
|
|
311
|
+
|
|
312
|
+
|function[object:Object]
|
|
313
|
+
|state[:main]
|
|
314
|
+
|c['}'] | -> |return ; Close brace
|
|
315
|
+
|c['"'] | -> | /string_value |>> :after
|
|
316
|
+
|WS | -> |>>
|
|
317
|
+
|default | /error |return
|
|
318
|
+
|
|
319
|
+
|state[:after]
|
|
320
|
+
|c[':'] | -> | /value |>> :comma
|
|
321
|
+
|WS | -> |>>
|
|
322
|
+
|default | /error |return
|
|
323
|
+
|
|
324
|
+
|state[:comma]
|
|
325
|
+
|c[','] | -> |>> :main
|
|
326
|
+
|c['}'] | -> |return
|
|
327
|
+
|WS | -> |>>
|
|
328
|
+
|default | /error |return
|
|
329
|
+
|
|
330
|
+
|function[bare_keyword]
|
|
331
|
+
|state[:main]
|
|
332
|
+
|LETTER | -> |>>
|
|
333
|
+
|default | TERM | KEYWORDS(kw) |return
|
|
334
|
+
```
|
data/exe/descent
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'devex/core'
|
|
5
|
+
|
|
6
|
+
config = Devex::Core::Configuration.new(
|
|
7
|
+
executable_name: 'descent',
|
|
8
|
+
flag_prefix: 'descent',
|
|
9
|
+
project_markers: %w[.desc Gemfile .git],
|
|
10
|
+
env_prefix: 'DESCENT'
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
cli = Devex::Core::CLI.new(config:)
|
|
14
|
+
cli.load_tools(File.expand_path('../lib/descent/tools', __dir__))
|
|
15
|
+
exit cli.run(ARGV)
|
data/lib/descent/ast.rb
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Descent
|
|
4
|
+
# Abstract Syntax Tree nodes - pure data, no behavior.
|
|
5
|
+
#
|
|
6
|
+
# Uses Ruby 3.2+ Data class for immutable value objects.
|
|
7
|
+
# These represent the direct parse result before semantic analysis.
|
|
8
|
+
module AST
|
|
9
|
+
# Top-level machine definition
|
|
10
|
+
Machine = Data.define(:name, :entry_point, :types, :functions, :keywords) do
|
|
11
|
+
def initialize(name:, entry_point: nil, types: [], functions: [], keywords: []) = super
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Type declaration: |type[Name] KIND
|
|
15
|
+
TypeDecl = Data.define(:name, :kind, :lineno) do
|
|
16
|
+
def initialize(name:, kind:, lineno: 0) = super
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Function definition
|
|
20
|
+
Function = Data.define(:name, :return_type, :params, :states, :eof_handler, :entry_actions, :lineno) do
|
|
21
|
+
def initialize(name:, return_type: nil, params: [], states: [], eof_handler: nil, entry_actions: [],
|
|
22
|
+
lineno: 0) = super
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# State within a function
|
|
26
|
+
State = Data.define(:name, :cases, :eof_handler, :inline_commands, :lineno) do
|
|
27
|
+
def initialize(name:, cases: [], eof_handler: nil, inline_commands: [], lineno: 0) = super
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Case within a state: |c[chars], |default, or |if[condition]
|
|
31
|
+
Case = Data.define(:chars, :condition, :substate, :commands, :lineno) do
|
|
32
|
+
def initialize(chars: nil, condition: nil, substate: nil, commands: [], lineno: 0) = super
|
|
33
|
+
|
|
34
|
+
def default? = chars.nil? && condition.nil?
|
|
35
|
+
def conditional? = !condition.nil?
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# EOF handler
|
|
39
|
+
EOFHandler = Data.define(:commands, :lineno) do
|
|
40
|
+
def initialize(commands: [], lineno: 0) = super
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Command/action within a case
|
|
44
|
+
Command = Data.define(:type, :value, :lineno) do
|
|
45
|
+
def initialize(type:, value: nil, lineno: 0) = super
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Conditional: |if[cond] ... |endif
|
|
49
|
+
Conditional = Data.define(:clauses, :lineno) do
|
|
50
|
+
def initialize(clauses: [], lineno: 0) = super
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# A clause within a conditional
|
|
54
|
+
Clause = Data.define(:condition, :commands) do
|
|
55
|
+
def initialize(condition: nil, commands: []) = super
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Keywords block for phf perfect hash lookup
|
|
59
|
+
# Example: |keywords :fallback /bare_string
|
|
60
|
+
# | true => BoolTrue
|
|
61
|
+
# | false => BoolFalse
|
|
62
|
+
Keywords = Data.define(:name, :fallback, :mappings, :lineno) do
|
|
63
|
+
# name: identifier for the keyword map (e.g., "bare" generates BARE_KEYWORDS)
|
|
64
|
+
# fallback: function to call when no keyword matches (e.g., "/bare_string")
|
|
65
|
+
# mappings: Array of {keyword: "string", event_type: "TypeName"}
|
|
66
|
+
def initialize(name:, fallback: nil, mappings: [], lineno: 0) = super
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|