descent 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/SYNTAX.md ADDED
@@ -0,0 +1,334 @@
1
+ # `.desc` DSL Syntax Reference
2
+
3
+ Complete syntax reference for descent parser specifications.
4
+
5
+ For character literal syntax, see [characters.md](characters.md).
6
+
7
+ ---
8
+
9
+ ## Document Structure
10
+
11
+ ```
12
+ |parser <name> ; Required: parser name
13
+
14
+ |type[<TypeName>] <KIND> ; Type declarations (zero or more)
15
+
16
+ |entry-point /<function> ; Required: where parsing begins
17
+
18
+ |keywords[<name>] ... ; Keyword blocks (zero or more)
19
+
20
+ |function[<name>] ... ; Function definitions (one or more)
21
+ ```
22
+
23
+ ---
24
+
25
+ ## Type Declarations
26
+
27
+ Types determine what events are emitted for functions returning that type.
28
+
29
+ ```
30
+ |type[<Name>] <KIND>
31
+ ```
32
+
33
+ | KIND | On Entry | On Return |
34
+ |----------|-------------------|----------------------------|
35
+ | BRACKET | Emit `NameStart` | Emit `NameEnd` |
36
+ | CONTENT | `MARK` position | Emit `Name` with content |
37
+ | INTERNAL | Nothing | Nothing (internal use) |
38
+
39
+ **Examples:**
40
+ ```
41
+ |type[Element] BRACKET ; ElementStart on entry, ElementEnd on return
42
+ |type[Text] CONTENT ; MARK on entry, emit Text with span on return
43
+ |type[Counter] INTERNAL ; No emit - for internal values only
44
+ ```
45
+
46
+ ---
47
+
48
+ ## Entry Point
49
+
50
+ ```
51
+ |entry-point /<function>
52
+ ```
53
+
54
+ Specifies which function begins parsing. The leading `/` is required.
55
+
56
+ ---
57
+
58
+ ## Functions
59
+
60
+ ```
61
+ |function[<name>] ; Void function (no auto-emit)
62
+ |function[<name>:<Type>] ; Returns/emits Type
63
+ |function[<name>:<Type>] :<param> ; With one parameter
64
+ |function[<name>:<Type>] :p1 :p2 ; Multiple parameters
65
+ ```
66
+
67
+ ### Entry Actions
68
+
69
+ Actions on the function line execute once on function entry:
70
+
71
+ ```
72
+ |function[<name>] | <action> | <action>
73
+ ```
74
+
75
+ **Example:**
76
+ ```
77
+ |function[brace_comment] | depth = 1
78
+ ```
79
+
80
+ ### Parameter Types
81
+
82
+ Parameter types are inferred from usage:
83
+
84
+ | Usage | Inferred Type |
85
+ |------------------------|---------------------|
86
+ | `\|c[:param]` | `:byte` (u8) |
87
+ | `PREPEND(:param)` | `:bytes` (&[u8]) |
88
+ | Arithmetic/conditions | `:i32` |
89
+
90
+ ---
91
+
92
+ ## States
93
+
94
+ ```
95
+ |state[:<name>]
96
+ <cases...>
97
+ ```
98
+
99
+ States contain cases that match input and execute actions.
100
+
101
+ ---
102
+
103
+ ## Cases
104
+
105
+ Cases have the form: `|<match> |<substate> | <actions> |<transition>`
106
+
107
+ ### Match Types
108
+
109
+ | Syntax | Matches |
110
+ |---------------|--------------------------------------|
111
+ | `c[x]` | Single character `x` |
112
+ | `c[abc]` | Any of `a`, `b`, or `c` |
113
+ | `c['\n']` | Quoted character (newline) |
114
+ | `c[<0-9>]` | Character class (digits) |
115
+ | `c[:param]` | Byte parameter value |
116
+ | `LETTER` | ASCII letter (a-z, A-Z) |
117
+ | `DIGIT` | ASCII digit (0-9) |
118
+ | `HEX_DIGIT` | Hex digit (0-9, a-f, A-F) |
119
+ | `LABEL_CONT` | Letter, digit, `_`, or `-` |
120
+ | `default` | Fallback (any other byte) |
121
+ | `eof` | End of input |
122
+ | `if[<cond>]` | Conditional guard |
123
+ | (empty) | Bare action (unconditional) |
124
+
125
+ ### Substate Label
126
+
127
+ Optional label for debugging (appears in trace output):
128
+
129
+ ```
130
+ |c[x] |.found | -> |>> :next ; .found is the substate label
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Actions
136
+
137
+ Actions are pipe-separated and execute left-to-right:
138
+
139
+ | Action | Description |
140
+ |----------------------|------------------------------------------|
141
+ | `->` | Advance one byte |
142
+ | `->[<chars>]` | Advance TO first occurrence (SIMD scan) |
143
+ | `MARK` | Mark position for accumulation |
144
+ | `TERM` | Terminate slice (MARK to current) |
145
+ | `TERM(-N)` | Terminate excluding last N bytes |
146
+ | `/<func>` | Call function |
147
+ | `/<func>(<args>)` | Call with arguments |
148
+ | `/error` | Emit error event |
149
+ | `/error(<Code>)` | Emit error with custom code |
150
+ | `<var> = <value>` | Assignment |
151
+ | `<var> += <N>` | Increment |
152
+ | `PREPEND('<lit>')` | Prepend literal to accumulation |
153
+ | `PREPEND(:param)` | Prepend parameter bytes |
154
+ | `KEYWORDS(<name>)` | Lookup in keyword map |
155
+ | `<Type>` | Emit event with no payload |
156
+ | `<Type>('<lit>')` | Emit event with literal value |
157
+ | `<Type>(USE_MARK)` | Emit event with accumulated content |
158
+
159
+ ### Advance-To Constraints
160
+
161
+ `->[<chars>]` uses SIMD memchr and supports:
162
+ - 1-6 literal bytes only
163
+ - Quoted characters: `->['\n\t']`
164
+ - NO character classes, NO parameter refs
165
+
166
+ ---
167
+
168
+ ## Transitions
169
+
170
+ | Syntax | Description |
171
+ |----------------|----------------------------------|
172
+ | `\|>>` | Self-loop (stay in current state)|
173
+ | `\|>> :<state>` | Go to named state |
174
+ | `\|return` | Return from function |
175
+
176
+ ---
177
+
178
+ ## Conditionals
179
+
180
+ Single-line guards (no block structure):
181
+
182
+ ```
183
+ |if[<condition>] | <actions> |<transition>
184
+ ```
185
+
186
+ Followed by fallthrough case:
187
+ ```
188
+ |if[COL <= :col] |return
189
+ | |>> :continue ; else branch
190
+ ```
191
+
192
+ ### Condition Syntax
193
+
194
+ - Comparisons: `==`, `!=`, `<`, `<=`, `>`, `>=`
195
+ - Variables: `COL`, `LINE`, `PREV`, `:param`, local vars
196
+ - Parentheses allowed: `(COL == 1)`
197
+
198
+ ---
199
+
200
+ ## Special Variables
201
+
202
+ | Variable | Type | Description |
203
+ |----------|-------|--------------------------------------|
204
+ | `COL` | i32 | Current column (1-indexed) |
205
+ | `LINE` | i32 | Current line (1-indexed) |
206
+ | `PREV` | byte | Previous byte (0 at start) |
207
+
208
+ ---
209
+
210
+ ## Keywords
211
+
212
+ Perfect hash lookup for keyword matching:
213
+
214
+ ```
215
+ |keywords[<name>] :fallback /<func>
216
+ | <keyword> => <EventType>
217
+ | <keyword> => <EventType>
218
+ ```
219
+
220
+ **Usage:**
221
+ ```
222
+ |default | TERM | KEYWORDS(<name>) |return
223
+ ```
224
+
225
+ **Example:**
226
+ ```
227
+ |keywords[bare] :fallback /identifier
228
+ | true => BoolTrue
229
+ | false => BoolFalse
230
+ | null => Nil
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Comments
236
+
237
+ Semicolon starts a comment (rest of line ignored):
238
+
239
+ ```
240
+ |parser test ; this is a comment
241
+ ```
242
+
243
+ ---
244
+
245
+ ## Character Classes
246
+
247
+ See [characters.md](characters.md) for complete specification.
248
+
249
+ ### Quick Reference
250
+
251
+ | Syntax | Description |
252
+ |-------------|--------------------------------|
253
+ | `'x'` | Single character |
254
+ | `'\n'` | Escape sequence |
255
+ | `'\xHH'` | Hex byte |
256
+ | `<abc>` | Character class (a, b, c) |
257
+ | `<0-9>` | Predefined range (digits) |
258
+ | `<LETTER>` | Predefined class |
259
+ | `<P>` | DSL-reserved char (`\|`) |
260
+
261
+ ### Predefined Classes
262
+
263
+ | Name | Characters |
264
+ |--------------|--------------------------------|
265
+ | `LETTER` | a-z, A-Z |
266
+ | `DIGIT` | 0-9 |
267
+ | `HEX_DIGIT` | 0-9, a-f, A-F |
268
+ | `LABEL_CONT` | LETTER + DIGIT + `_` + `-` |
269
+ | `WS` | Space + tab |
270
+ | `NL` | Newline |
271
+
272
+ ### DSL-Reserved Escapes
273
+
274
+ | Name | Char | Name | Char |
275
+ |------|------|------|------|
276
+ | `P` | `\|` | `SQ` | `'` |
277
+ | `L` | `[` | `DQ` | `"` |
278
+ | `R` | `]` | `BS` | `\` |
279
+ | `LB` | `{` | `LP` | `(` |
280
+ | `RB` | `}` | `RP` | `)` |
281
+
282
+ ---
283
+
284
+ ## Complete Example
285
+
286
+ ```
287
+ |parser json_value
288
+
289
+ |type[StringValue] CONTENT
290
+ |type[Object] BRACKET
291
+
292
+ |entry-point /value
293
+
294
+ |keywords[kw] :fallback /identifier
295
+ | true => BoolTrue
296
+ | false => BoolFalse
297
+ | null => Nil
298
+
299
+ |function[value]
300
+ |state[:dispatch]
301
+ |c['"'] | -> | /string_value |return
302
+ |c['{'] | -> | /object |return
303
+ |LETTER | /bare_keyword |return
304
+ |default | /error |return
305
+
306
+ |function[string_value:StringValue]
307
+ |state[:main]
308
+ |c['"'] | -> |return ; Close quote
309
+ |c['\\'] | -> | -> |>> ; Escape: skip 2
310
+ |default | -> |>> ; Collect
311
+
312
+ |function[object:Object]
313
+ |state[:main]
314
+ |c['}'] | -> |return ; Close brace
315
+ |c['"'] | -> | /string_value |>> :after
316
+ |WS | -> |>>
317
+ |default | /error |return
318
+
319
+ |state[:after]
320
+ |c[':'] | -> | /value |>> :comma
321
+ |WS | -> |>>
322
+ |default | /error |return
323
+
324
+ |state[:comma]
325
+ |c[','] | -> |>> :main
326
+ |c['}'] | -> |return
327
+ |WS | -> |>>
328
+ |default | /error |return
329
+
330
+ |function[bare_keyword]
331
+ |state[:main]
332
+ |LETTER | -> |>>
333
+ |default | TERM | KEYWORDS(kw) |return
334
+ ```
data/exe/descent ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'devex/core'
5
+
6
+ config = Devex::Core::Configuration.new(
7
+ executable_name: 'descent',
8
+ flag_prefix: 'descent',
9
+ project_markers: %w[.desc Gemfile .git],
10
+ env_prefix: 'DESCENT'
11
+ )
12
+
13
+ cli = Devex::Core::CLI.new(config:)
14
+ cli.load_tools(File.expand_path('../lib/descent/tools', __dir__))
15
+ exit cli.run(ARGV)
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Descent
4
+ # Abstract Syntax Tree nodes - pure data, no behavior.
5
+ #
6
+ # Uses Ruby 3.2+ Data class for immutable value objects.
7
+ # These represent the direct parse result before semantic analysis.
8
+ module AST
9
+ # Top-level machine definition
10
+ Machine = Data.define(:name, :entry_point, :types, :functions, :keywords) do
11
+ def initialize(name:, entry_point: nil, types: [], functions: [], keywords: []) = super
12
+ end
13
+
14
+ # Type declaration: |type[Name] KIND
15
+ TypeDecl = Data.define(:name, :kind, :lineno) do
16
+ def initialize(name:, kind:, lineno: 0) = super
17
+ end
18
+
19
+ # Function definition
20
+ Function = Data.define(:name, :return_type, :params, :states, :eof_handler, :entry_actions, :lineno) do
21
+ def initialize(name:, return_type: nil, params: [], states: [], eof_handler: nil, entry_actions: [],
22
+ lineno: 0) = super
23
+ end
24
+
25
+ # State within a function
26
+ State = Data.define(:name, :cases, :eof_handler, :inline_commands, :lineno) do
27
+ def initialize(name:, cases: [], eof_handler: nil, inline_commands: [], lineno: 0) = super
28
+ end
29
+
30
+ # Case within a state: |c[chars], |default, or |if[condition]
31
+ Case = Data.define(:chars, :condition, :substate, :commands, :lineno) do
32
+ def initialize(chars: nil, condition: nil, substate: nil, commands: [], lineno: 0) = super
33
+
34
+ def default? = chars.nil? && condition.nil?
35
+ def conditional? = !condition.nil?
36
+ end
37
+
38
+ # EOF handler
39
+ EOFHandler = Data.define(:commands, :lineno) do
40
+ def initialize(commands: [], lineno: 0) = super
41
+ end
42
+
43
+ # Command/action within a case
44
+ Command = Data.define(:type, :value, :lineno) do
45
+ def initialize(type:, value: nil, lineno: 0) = super
46
+ end
47
+
48
+ # Conditional: |if[cond] ... |endif
49
+ Conditional = Data.define(:clauses, :lineno) do
50
+ def initialize(clauses: [], lineno: 0) = super
51
+ end
52
+
53
+ # A clause within a conditional
54
+ Clause = Data.define(:condition, :commands) do
55
+ def initialize(condition: nil, commands: []) = super
56
+ end
57
+
58
+ # Keywords block for phf perfect hash lookup
59
+ # Example: |keywords :fallback /bare_string
60
+ # | true => BoolTrue
61
+ # | false => BoolFalse
62
+ Keywords = Data.define(:name, :fallback, :mappings, :lineno) do
63
+ # name: identifier for the keyword map (e.g., "bare" generates BARE_KEYWORDS)
64
+ # fallback: function to call when no keyword matches (e.g., "/bare_string")
65
+ # mappings: Array of {keyword: "string", event_type: "TypeName"}
66
+ def initialize(name:, fallback: nil, mappings: [], lineno: 0) = super
67
+ end
68
+ end
69
+ end