parseman 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +510 -0
  2. package/dist/combinators/choice.d.ts +15 -0
  3. package/dist/combinators/choice.d.ts.map +1 -0
  4. package/dist/combinators/first-set.d.ts +8 -0
  5. package/dist/combinators/first-set.d.ts.map +1 -0
  6. package/dist/combinators/grammar.d.ts +8 -0
  7. package/dist/combinators/grammar.d.ts.map +1 -0
  8. package/dist/combinators/guard.d.ts +15 -0
  9. package/dist/combinators/guard.d.ts.map +1 -0
  10. package/dist/combinators/lazy.d.ts +14 -0
  11. package/dist/combinators/lazy.d.ts.map +1 -0
  12. package/dist/combinators/literal.d.ts +6 -0
  13. package/dist/combinators/literal.d.ts.map +1 -0
  14. package/dist/combinators/map.d.ts +8 -0
  15. package/dist/combinators/map.d.ts.map +1 -0
  16. package/dist/combinators/not.d.ts +13 -0
  17. package/dist/combinators/not.d.ts.map +1 -0
  18. package/dist/combinators/parser.d.ts +25 -0
  19. package/dist/combinators/parser.d.ts.map +1 -0
  20. package/dist/combinators/recover.d.ts +20 -0
  21. package/dist/combinators/recover.d.ts.map +1 -0
  22. package/dist/combinators/ref.d.ts +18 -0
  23. package/dist/combinators/ref.d.ts.map +1 -0
  24. package/dist/combinators/regex.d.ts +3 -0
  25. package/dist/combinators/regex.d.ts.map +1 -0
  26. package/dist/combinators/repeat.d.ts +6 -0
  27. package/dist/combinators/repeat.d.ts.map +1 -0
  28. package/dist/combinators/scanTo.d.ts +30 -0
  29. package/dist/combinators/scanTo.d.ts.map +1 -0
  30. package/dist/combinators/sequence.d.ts +7 -0
  31. package/dist/combinators/sequence.d.ts.map +1 -0
  32. package/dist/combinators/withCtx.d.ts +13 -0
  33. package/dist/combinators/withCtx.d.ts.map +1 -0
  34. package/dist/compiler/codegen.d.ts +23 -0
  35. package/dist/compiler/codegen.d.ts.map +1 -0
  36. package/dist/compiler/line-index.d.ts +16 -0
  37. package/dist/compiler/line-index.d.ts.map +1 -0
  38. package/dist/cst/grammar.d.ts +84 -0
  39. package/dist/cst/grammar.d.ts.map +1 -0
  40. package/dist/cst/incremental.d.ts +34 -0
  41. package/dist/cst/incremental.d.ts.map +1 -0
  42. package/dist/cst/types.d.ts +74 -0
  43. package/dist/cst/types.d.ts.map +1 -0
  44. package/dist/index.cjs +1795 -0
  45. package/dist/index.cjs.map +7 -0
  46. package/dist/index.d.ts +26 -0
  47. package/dist/index.d.ts.map +1 -0
  48. package/dist/index.js +1762 -0
  49. package/dist/index.js.map +7 -0
  50. package/dist/plugin/evaluator.d.ts +15 -0
  51. package/dist/plugin/evaluator.d.ts.map +1 -0
  52. package/dist/plugin/index.cjs +1473 -0
  53. package/dist/plugin/index.cjs.map +7 -0
  54. package/dist/plugin/index.d.ts +12 -0
  55. package/dist/plugin/index.d.ts.map +1 -0
  56. package/dist/plugin/index.js +1442 -0
  57. package/dist/plugin/index.js.map +7 -0
  58. package/dist/types.d.ts +201 -0
  59. package/dist/types.d.ts.map +1 -0
  60. package/package.json +67 -0
package/README.md ADDED
@@ -0,0 +1,510 @@
1
+ <p align="center">
2
+ <img src="assets/parseman.png" alt="Parmésan — 100% Pure Parsing" width="220" />
3
+ </p>
4
+
5
+ # Parmésan (PAR-zə-mahn)
6
+
7
+ Write parsers with combinators, then let the bundler plugin compile them to optimized inline functions at build time — `charCodeAt` dispatch, `while` loops, zero allocation on failure paths. No generated boilerplate, no codegen step, no separate schema files.
8
+
9
+ The same code runs without the plugin: the interpreter produces identical results. Use the macro build for production; skip it in tests and anywhere a bundler isn't in the picture.
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ npm install parseman
15
+ # pnpm add parseman
16
+ ```
17
+
18
+ ---
19
+
20
+ ## Quick start
21
+
22
+ ```ts
23
+ import { literal, sequence, choice, regex, transform, parse } from 'parseman'
24
+
25
+ const method = choice(literal('GET'), literal('POST'), literal('PUT'), literal('DELETE'))
26
+ const target = regex(/[^\s]+/)
27
+ const version = regex(/1\.[01]/)
28
+
29
+ const requestLine = transform(
30
+ sequence(method, literal(' '), target, literal(' HTTP/'), version),
31
+ ([verb, , path, , ver]) => ({ verb, path, version: `HTTP/${ver}` })
32
+ )
33
+
34
+ parse(requestLine, 'GET /api/v1 HTTP/1.1')
35
+ // { ok: true, value: { verb: 'GET', path: '/api/v1', version: 'HTTP/1.1' }, span: ... }
36
+ ```
37
+
38
+ ---
39
+
40
+ ## Macro mode
41
+
42
+ Add the plugin once — your parser imports are evaluated and compiled at build time. The `parseman` import disappears from the bundle entirely.
43
+
44
+ ### 1. Register the plugin
45
+
46
+ ```ts
47
+ // vite.config.ts
48
+ import parseman from 'parseman/plugin'
49
+ export default { plugins: [parseman()] }
50
+ ```
51
+
52
+ ```js
53
+ // rollup.config.js
54
+ import parseman from 'parseman/plugin'
55
+ export default { plugins: [parseman.rollup()] }
56
+ ```
57
+
58
+ ```js
59
+ // webpack.config.js
60
+ const parseman = require('parseman/plugin')
61
+ module.exports = { plugins: [parseman.webpack()] }
62
+ ```
63
+
64
+ ### 2. Import with `with { type: 'macro' }`
65
+
66
+ ```ts
67
+ import { literal, sequence, choice, regex, transform } from 'parseman' with { type: 'macro' }
68
+ ```
69
+
70
+ Same combinators, no other changes. The plugin walks the initializer, evaluates it at build time, and replaces it with an inline function.
71
+
72
+ ### What gets emitted
73
+
74
+ ```js
75
+ // Before (source):
76
+ const method = choice(literal('GET'), literal('POST'), literal('PUT'), literal('DELETE'))
77
+
78
+ // After (bundle output):
79
+ const method = function(input, _pos, _ctx) {
80
+ const _code = _pos < input.length ? input.codePointAt(_pos) : -1
81
+ if (_code === 71) { /* G-E-T */ }
82
+ else if (_code === 80) { /* P-O-S-T */ }
83
+ else if (_code === 68) { /* D-E-L-E-T-E */ }
84
+ else return { ok: false, expected: ['"GET"', '"POST"', ...], span: { start: _pos, end: _pos } }
85
+ ...
86
+ }
87
+ ```
88
+
89
+ Disjoint first characters → single `codePointAt` dispatch. Regex parsers → sticky `/pattern/y` hoisted to closure scope. No objects allocated on failure paths.
90
+
91
+ ### Debugging still works
92
+
93
+ The plugin emits a precise source map via [magic-string](https://github.com/Rich-Harris/magic-string). Breakpoints set on the original `choice(...)` lines are hit when the compiled function runs; step-through shows original combinator source, not emitted charCode checks.
94
+
95
+ If `with { type: 'macro' }` is stripped (older bundlers, test runners), the attribute is silently ignored and the interpreter runs instead — identical results, no errors.
96
+
97
+ ### What gets compiled
98
+
99
+ Pure combinator trees — `literal`, `regex`, `sequence`, `choice`, `many`, `oneOrMore`, `optional`, `sepBy`, `transform`, `skip`. Parsers using `ref()` for recursion or that close over external variables stay as-is. The plugin compiles what it can and quietly leaves the rest alone.
100
+
101
+ ---
102
+
103
+ ## Combinators
104
+
105
+ | Combinator | Description |
106
+ |---|---|
107
+ | `literal(s, opts?)` | Exact string match. `opts.caseInsensitive` for locale-aware comparison. |
108
+ | `regex(pattern)` | Match a regex at the current position. Patterns are optimized via `regexp-tree`. |
109
+ | `sequence(...parsers)` | Match all in order; returns a tuple `[v1, v2, ...]`. Skips trivia between terms when trivia is set. |
110
+ | `choice(...parsers)` | Ordered alternatives (PEG — first match wins). Disjoint first chars → O(1) dispatch. |
111
+ | `many(parser)` | Zero or more; compiles to a `while` loop. |
112
+ | `oneOrMore(parser)` | One or more; fails if nothing matches. |
113
+ | `optional(parser)` | Zero or one; returns `null` on no match. |
114
+ | `sepBy(parser, sep)` | Zero or more `parser` separated by `sep`. |
115
+ | `transform(parser, fn)` | Map the result: `fn(value, span) → newValue`. |
116
+ | `skip(main, skipped)` | Match `main` then `skipped`; return `main`'s value. |
117
+ | `parser(factory)` | Mutually recursive grammar rules — no forward declarations needed. |
118
+ | `ref<T>()` | Low-level forward declaration slot (use `parser()` in most cases). |
119
+ | `not(parser)` | Negative lookahead — succeeds (consuming nothing) when `parser` fails. |
120
+ | `guard(predicate)` | Succeeds only when `predicate(ctx)` returns true; used for context-sensitive rules. |
121
+ | `withCtx(extra, parser)` | Merge `extra` into the user context for the duration of `parser`. |
122
+ | `recover(parser, sentinel)` | On failure, skip input until `sentinel` matches; returns a `CSTError` node. |
123
+ | `scanTo(sentinel, skips?, opts?)` | Consume input up to (and including) `sentinel`, optionally skipping balanced pairs. |
124
+ | `balanced(open, close)` | Match a balanced pair (e.g. `(…)`, `[…]`). Used as a `skip` argument to `scanTo`. |
125
+
126
+ ---
127
+
128
+ ## Whitespace and comment skipping
129
+
130
+ Pass `trivia` to `parse()` and `sequence()` will automatically skip it between terms:
131
+
132
+ ```ts
133
+ import { regex, sepBy, literal, parse } from 'parseman'
134
+
135
+ const ws = regex(/\s*/)
136
+ const word = regex(/[a-z]+/)
137
+ const list = sepBy(word, literal(','))
138
+
139
+ parse(list, 'foo , bar , baz', { trivia: ws })
140
+ // { ok: true, value: ['foo', 'bar', 'baz'], ... }
141
+ ```
142
+
143
+ Multiple trivia types — whitespace and comments — combine with `choice()` and `many()`:
144
+
145
+ ```ts
146
+ const lineComment = sequence(literal('//'), regex(/[^\n]*/))
147
+ const blockComment = sequence(literal('/*'), scanTo(literal('*/'), []))
148
+ const trivia = many(choice(regex(/\s+/), lineComment, blockComment))
149
+ ```
150
+
151
+ Use `grammar(opts, root)` instead of the `parse()` trivia option when you want trivia only for a subtree within a larger parse:
152
+
153
+ ```ts
154
+ import { grammar } from 'parseman'
155
+
156
+ const jsonValue = grammar({ trivia: ws }, choice(object, array, str, num, bool, nil))
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Ordered choice and keyword disambiguation
162
+
163
+ `choice()` uses PEG ordered-choice semantics: first match wins. **Order matters.**
164
+
165
+ For keywords — where `if` should not match the prefix of `ifdef` — use `not()`:
166
+
167
+ ```ts
168
+ const wordChar = regex(/\w/)
169
+ const keyword = (s: string) => transform(sequence(literal(s), not(wordChar)), ([kw]) => kw)
170
+ const ident = regex(/[a-zA-Z_]\w*/)
171
+
172
+ const token = choice(
173
+ keyword('if'),
174
+ keyword('else'),
175
+ keyword('return'),
176
+ ident,
177
+ )
178
+ ```
179
+
180
+ When alternatives share a prefix, put the longer one first:
181
+
182
+ ```ts
183
+ // Wrong: choice(literal('in'), literal('instanceof')) — 'instanceof' never reached
184
+ const op = choice(literal('instanceof'), literal('in'), literal('if'))
185
+ ```
186
+
187
+ ---
188
+
189
+ ## Recursive grammars
190
+
191
+ Use `parser()` for mutually recursive rules. Pass a factory that receives all rule names as ready-to-use references and returns the definitions:
192
+
193
+ ```ts
194
+ import { parser, choice, sequence, literal, sepBy, transform, regex } from 'parseman'
195
+ import type { Combinator } from 'parseman'
196
+
197
+ type JSON = null | boolean | number | string | JSON[] | Record<string, JSON>
198
+
199
+ const ws = regex(/[ \t\n\r]*/)
200
+
201
+ const { value } = parser<{ value: Combinator<JSON> }>(g => {
202
+ const comma = sequence(ws, literal(','), ws)
203
+
204
+ const array = transform(
205
+ sequence(literal('['), sepBy(g.value, comma), literal(']')),
206
+ ([, items]) => items as JSON[]
207
+ )
208
+ const pair = transform(
209
+ sequence(jsonString, literal(':'), g.value),
210
+ ([key,, val]) => [key, val] as [string, JSON]
211
+ )
212
+ const object = transform(
213
+ sequence(literal('{'), sepBy(pair, comma), literal('}')),
214
+ ([, pairs]) => Object.fromEntries(pairs) as Record<string, JSON>
215
+ )
216
+
217
+ return {
218
+ value: grammar(
219
+ { trivia: ws },
220
+ choice(object, array, jsonString, jsonNumber, jsonBool, jsonNull)
221
+ ) as Combinator<JSON>,
222
+ }
223
+ })
224
+ ```
225
+
226
+ `g.value` is a parser reference that works anywhere inside the factory regardless of order. Local helpers (`comma`, `pair`, `object`) that don't need to be cross-referenced can be plain `const`. Only put a rule in the returned object if other rules need to reach it as `g.xxx`.
227
+
228
+ > **Macro note:** Recursive parsers can't be inlined as a single expression, so the plugin leaves them as-is — but you can still call `compile()` at runtime for the same speedups. The codegen emits mutually recursive named functions and handles cycles just fine. Non-recursive leaf parsers in the same file still get inlined.
229
+
230
+ ### `ref<T>()` — low-level forward declaration
231
+
232
+ `parser()` is the right tool for most recursive grammars. `ref<T>()` is the lower-level primitive it uses internally, exposed for cases where you need a single forward slot outside of a `parser()` call:
233
+
234
+ ```ts
235
+ const value = ref<JSON>()
236
+ // ... build parsers that use value ...
237
+ value.define(grammar({ trivia: ws }, choice(object, array, str, num, bool, nil)))
238
+ ```
239
+
240
+ ---
241
+
242
+ ## Class-based grammars
243
+
244
+ For grammars that need automatic CST construction, incremental re-parsing, or custom AST nodes, extend `Parser`. Capital-letter rules produce named CST nodes; lowercase rules are transparent helpers whose terminals surface as leaves of the nearest enclosing rule.
245
+
246
+ ```ts
247
+ import { Parser, parse, regex, literal, choice, sequence, many, sepBy } from 'parseman'
248
+ import type { Refs } from 'parseman'
249
+
250
+ class ExprParser extends Parser {
251
+ ws = regex(/\s*/)
252
+ digits = regex(/[0-9]+/)
253
+ ident = regex(/[a-zA-Z_]\w*/)
254
+
255
+ // Plain initializer when no cross-reference needed
256
+ Str = sequence(literal('"'), regex(/[^"]*/), literal('"'))
257
+
258
+ // Thunk form for forward / mutual references
259
+ Num = (g: Refs<ExprParser>) => g.digits
260
+ Id = (g: Refs<ExprParser>) => g.ident
261
+ Add = (g: Refs<ExprParser>) => sequence(g.Num, many(sequence(literal('+'), g.Num)))
262
+ Expr = (g: Refs<ExprParser>) => choice(g.Add, g.Num, g.Id)
263
+ }
264
+
265
+ const expr = new ExprParser()
266
+ const r = parse(expr.rule('Expr'), '1+2+3')
267
+ // r.value is a CSTNode { _tag: 'node', type: 'Expr', span, children, savedContext }
268
+ ```
269
+
270
+ ### Inheritance
271
+
272
+ Override any rule by redeclaring it in a subclass — subclass initializers run after the parent's, so the override wins automatically:
273
+
274
+ ```ts
275
+ class JSXParser extends ExprParser {
276
+ // replace just the ident rule; everything else stays
277
+ ident = regex(/[a-zA-Z_$][\w$]*/)
278
+ }
279
+ ```
280
+
281
+ ### Custom AST nodes (`buildNode`)
282
+
283
+ Override `buildNode` to return your own node type instead of the default `CSTNode`:
284
+
285
+ ```ts
286
+ import type { CSTLeaf, CSTError, CSTRawChild, Span } from 'parseman'
287
+
288
+ type MyNode = { _tag: 'node'; type: string; span: Span; savedContext: unknown; children: MyNode[]; text: string }
289
+
290
+ class MyParser extends Parser<MyNode> {
291
+ // ... rules ...
292
+
293
+ protected buildNode(
294
+ type: string,
295
+ span: Span,
296
+ children: ReadonlyArray<MyNode | CSTLeaf | CSTError>,
297
+ savedContext: unknown,
298
+ rawChildren: ReadonlyArray<CSTRawChild>,
299
+ ): MyNode {
300
+ return { _tag: 'node', type, span, savedContext, children: children as MyNode[], text: '...' }
301
+ }
302
+ }
303
+ ```
304
+
305
+ `children` contains the structural children (sub-nodes and leaf tokens, no trivia). `rawChildren` contains everything in parse order including trivia tokens — useful for whitespace-sensitive grammars.
306
+
307
+ ### Whitespace-sensitive rules with `rawChildren`
308
+
309
+ When whitespace is semantically meaningful (e.g. CSS where `div p` is a descendant combinator but `div+p` is adjacent), inspect `rawChildren` inside `buildNode`:
310
+
311
+ ```ts
312
+ import type { CSTTrivia } from 'parseman'
313
+
314
+ class CssParser extends Parser<SelectorNode> {
315
+ ident = regex(/[a-zA-Z-]+/)
316
+ Selector = (g: Refs<CssParser>) => sequence(g.ident, g.ident)
317
+
318
+ protected buildNode(type, span, children, savedContext, rawChildren) {
319
+ if (type === 'Selector') {
320
+ // rawChildren: [Ident("div"), CSTTrivia(" "), Ident("p")]
321
+ const hasDescendant = rawChildren.some(c => c._tag === 'trivia')
322
+ }
323
+ return ...
324
+ }
325
+ }
326
+
327
+ // Trivia is set on parse() — the whitespace skip happens globally
328
+ parse(css.rule('Stylesheet'), src, { trivia: many(choice(regex(/\s+/), comment)) })
329
+ ```
330
+
331
+ `CSTTrivia` nodes only appear in `rawChildren`, never in `children`. Zero-length trivia matches (e.g. `\s*` at a non-whitespace position) are not emitted.
332
+
333
+ ### Incremental re-parsing
334
+
335
+ `Parser.parse(ruleName, input)` returns a `ParseDoc` — an object holding the tree, any parse errors, and an `edit()` method for incremental re-parsing. The parser itself stays stateless; all the incremental state lives in the doc.
336
+
337
+ ```ts
338
+ const css = new CSSParser()
339
+
340
+ const doc = css.parse('Stylesheet', src)
341
+ doc.tree // CSTNode root, or null on failure
342
+ doc.errors // ParseFail[], empty on success
343
+ doc.input // the source string
344
+
345
+ // subsequent edits return a new doc — old one is untouched
346
+ // edit(from, to, replacement): select from→to in the old text, replace with replacement
347
+ const doc2 = doc.edit(changeStart, changeStart + changeLength, newText)
348
+ ```
349
+
350
+ `edit(from, to, replacement)` takes two byte offsets into the **old** text (`from` and `to`), plus the replacement string. Think of it as "highlight characters from→to, type replacement" — the same three things any editor already knows on every keystroke. Internally it runs `old.slice(0, from) + replacement + old.slice(to)`. It finds the smallest node containing the change, re-parses just that subtree using its saved context, and stops early when the new span end matches the expected position. O(changed region) amortized for typical edits. Nodes unaffected by the edit are structurally shared between old and new docs.
351
+
352
+ Context-sensitive grammars work correctly: each CST node records a `ctx.user` snapshot at parse time (`savedContext`), so re-parsing resumes from the exact same state. Solid enough for a language server.
353
+
354
+ **In an IDE extension**, hold one parser instance per language, one `ParseDoc` per open document. On each keystroke your editor gives you the changed range as byte offsets — pass those straight to `edit()`:
355
+
356
+ ```ts
357
+ // VS Code example
358
+ const parser = new CSSParser()
359
+ const docs = new Map<string, ParseDoc<CSTNode>>()
360
+
361
+ vscode.workspace.onDidOpenTextDocument(document => {
362
+ docs.set(document.uri.toString(), parser.parse('Stylesheet', document.getText()))
363
+ })
364
+
365
+ vscode.workspace.onDidChangeTextDocument(event => {
366
+ const uri = event.document.uri.toString()
367
+ let doc = docs.get(uri)!
368
+ for (const change of event.contentChanges) {
369
+ doc = doc.edit(change.rangeOffset, change.rangeOffset + change.rangeLength, change.text)
370
+ }
371
+ docs.set(uri, doc)
372
+ // walk doc.tree to emit diagnostics, folding ranges, semantic tokens, etc.
373
+ })
374
+ ```
375
+
376
+ ---
377
+
378
+ ## Context-sensitive parsing
379
+
380
+ `withCtx` and `guard` implement context-sensitive rules without mutating shared state.
381
+
382
+ `withCtx(extra, parser)` merges `extra` into the user context for the duration of `parser`. `guard(predicate)` succeeds only when `predicate(ctx)` returns true, effectively gating a rule behind runtime context.
383
+
384
+ ```ts
385
+ import { withCtx, guard, many, sequence, choice, literal, regex } from 'parseman'
386
+
387
+ class LangParser extends Parser {
388
+ ws = regex(/\s*/)
389
+
390
+ Expr = regex(/[a-z]+/)
391
+ Return = (g: Refs<LangParser>) => sequence(
392
+ guard((ctx: { inFn?: boolean }) => ctx.inFn === true),
393
+ literal('return'),
394
+ )
395
+ Stmt = (g: Refs<LangParser>) => choice(g.Return, g.Expr)
396
+ Body = (g: Refs<LangParser>) => withCtx({ inFn: true }, many(sequence(g.Stmt, g.ws)))
397
+ Program = (g: Refs<LangParser>) => many(g.Body)
398
+ }
399
+ ```
400
+
401
+ `Return` is only reachable inside a `Body` because `guard` rejects it when `inFn` is not set. `ParseDoc.edit()` replays the correct context on incremental edits because `savedContext` captures the `inFn: true` snapshot at the node that originally set it.
402
+
403
+ ---
404
+
405
+ ## Error recovery
406
+
407
+ `recover(parser, sentinel)` wraps a parser so that on failure it skips forward until `sentinel` matches, then returns a `CSTError` node instead of bailing on the whole parse. Error recovery is never pretty, but at least you can keep going.
408
+
409
+ ```ts
410
+ import { recover, scanTo, balanced, literal } from 'parseman'
411
+
412
+ // Skip to ';' if a statement fails to parse
413
+ const stmt = recover(g.Stmt, literal(';'))
414
+
415
+ // Consume everything up to '}', skipping balanced () and [] pairs
416
+ const block = scanTo(literal('}'), [balanced(literal('('), literal(')')), balanced(literal('['), literal(']'))])
417
+ ```
418
+
419
+ `scanTo(sentinel, skips?, opts?)` consumes input character-by-character until `sentinel` matches. Pass `skips` to skip over balanced pairs that might contain the sentinel character. Pass `opts.orEOF: true` to succeed at end-of-input if the sentinel is never found.
420
+
421
+ ---
422
+
423
+ ## Line / column tracking
424
+
425
+ ```ts
426
+ const r = parse(myParser, 'hello\nworld', { trackLines: true })
427
+
428
+ if (r.ok) {
429
+ r.span.startLine // 1
430
+ r.span.startColumn // 1
431
+ r.span.endLine // 2
432
+ r.span.endColumn // 6
433
+ }
434
+ ```
435
+
436
+ Line lookup is O(log n) via binary search on a precomputed newline index built once per input string. When `trackLines` is false (the default), no index is built and spans carry only byte offsets.
437
+
438
+ ---
439
+
440
+ ## `compile()` — runtime compilation
441
+
442
+ `compile()` runs the same optimizer as the plugin, but at runtime — handy when you're assembling a grammar dynamically, or just want the speed without a build step:
443
+
444
+ ```ts
445
+ import { choice, literal, compile } from 'parseman'
446
+
447
+ const compiled = compile(choice(literal('yes'), literal('no')))
448
+ compiled.parse('yes', 0, { trackLines: false }) // { ok: true, value: 'yes', ... }
449
+ compiled.source // generated JS source string
450
+ compiled.inlineExpression // self-contained expression (what the plugin inlines)
451
+ ```
452
+
453
+ ---
454
+
455
+ ## ParseResult types
456
+
457
+ ```ts
458
+ type ParseOk<T> = { ok: true; value: T; span: Span }
459
+ type ParseFail = { ok: false; expected: string[]; span: Span }
460
+ type ParseResult<T> = ParseOk<T> | ParseFail
461
+
462
+ type Span = {
463
+ start: number // byte offset, inclusive
464
+ end: number // byte offset, exclusive
465
+ startLine?: number // 1-based; only when trackLines: true
466
+ startColumn?: number
467
+ endLine?: number
468
+ endColumn?: number
469
+ }
470
+ ```
471
+
472
+ ### CST types
473
+
474
+ ```ts
475
+ type CSTNode = { _tag: 'node'; type: string; span: Span; children: CSTChild[]; savedContext: unknown }
476
+ type CSTLeaf = { _tag: 'leaf'; value: string; span: Span }
477
+ type CSTError = { _tag: 'error'; type: string; span: Span; expected: string[]; children: CSTChild[]; savedContext: unknown }
478
+ type CSTTrivia = { _tag: 'trivia'; value: string; span: Span } // only in rawChildren
479
+
480
+ type CSTChild = CSTNode | CSTLeaf | CSTError
481
+ type CSTRawChild = CSTNode | CSTLeaf | CSTTrivia | CSTError
482
+ ```
483
+
484
+ ---
485
+
486
+ ## Benchmarks
487
+
488
+ Measured on Apple M2 Pro. Bars show µs per parse — shorter is faster.
489
+
490
+ ![JSON parsing benchmarks](assets/bench-json.svg)
491
+
492
+ ![CSV parsing benchmarks](assets/bench-csv.svg)
493
+
494
+ Parmésan compiled edges out Peggy on small and medium JSON. At 12 kB Peggy pulls ahead by ~10% — it's been doing this a while. On CSV, where the grammar is non-recursive and fully inlines, Parmésan compiled wins going away.
495
+
496
+ ---
497
+
498
+ ## Developing
499
+
500
+ ```bash
501
+ pnpm install
502
+ pnpm test # Vitest — interpreter + compiler parity + ordered-choice semantics
503
+ pnpm typecheck # TypeScript 7
504
+ pnpm build # ESM + CJS + .d.ts → dist/
505
+ pnpm bench # Parmésan vs Peggy vs Parsimmon vs Chevrotain
506
+ ```
507
+
508
+ ## License
509
+
510
+ MIT © [Matthew Dean](https://github.com/matthew-dean)
@@ -0,0 +1,15 @@
1
+ import type { Combinator, GatedArm } from '../types.ts';
2
+ type ArmParser<T> = T extends GatedArm<infer U> ? Combinator<U> : T extends Combinator<infer U> ? Combinator<U> : never;
3
+ type UnionArms<T extends (Combinator<unknown> | GatedArm<unknown>)[]> = {
4
+ [K in keyof T]: ArmParser<T[K]>;
5
+ }[number] extends Combinator<infer U> ? U : never;
6
+ export declare function choice<T extends [Combinator<unknown> | GatedArm<unknown>, ...(Combinator<unknown> | GatedArm<unknown>)[]]>(...args: T): Combinator<UnionArms<T>>;
7
+ /** Walk transform wrappers to find an inner literal's string value. */
8
+ export declare function getCoreLiteralValue(p: Combinator<unknown>): string | null;
9
+ /** Walk transform wrappers to find an inner regex's source/flags. */
10
+ export declare function getCoreRegexDef(p: Combinator<unknown>): {
11
+ source: string;
12
+ flags: string;
13
+ } | null;
14
+ export {};
15
+ //# sourceMappingURL=choice.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"choice.d.ts","sourceRoot":"","sources":["../../src/combinators/choice.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,UAAU,EAC+B,QAAQ,EAClD,MAAM,aAAa,CAAA;AAGpB,KAAK,SAAS,CAAC,CAAC,IAAI,CAAC,SAAS,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,GAAG,KAAK,CAAA;AACvH,KAAK,SAAS,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,IAAI;KACrE,CAAC,IAAI,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;CAChC,CAAC,MAAM,CAAC,SAAS,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,GAAG,KAAK,CAAA;AAEjD,wBAAgB,MAAM,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC,OAAO,CAAC,EAAE,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,EACxH,GAAG,IAAI,EAAE,CAAC,GACT,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAqH1B;AAwFD,uEAAuE;AACvE,wBAAgB,mBAAmB,CAAC,CAAC,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,MAAM,GAAG,IAAI,CAKzE;AAED,qEAAqE;AACrE,wBAAgB,eAAe,CAAC,CAAC,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAKhG"}
@@ -0,0 +1,8 @@
1
+ import type { FirstSet } from '../types.ts';
2
+ export declare function union(a: FirstSet, b: FirstSet): FirstSet;
3
+ export declare function intersects(a: FirstSet, b: FirstSet): boolean;
4
+ export declare function fromChar(code: number): FirstSet;
5
+ export declare function fromRange(lo: number, hi: number): FirstSet;
6
+ export declare function any(): FirstSet;
7
+ export declare function empty(): FirstSet;
8
+ //# sourceMappingURL=first-set.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"first-set.d.ts","sourceRoot":"","sources":["../../src/combinators/first-set.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAa,QAAQ,EAAE,MAAM,aAAa,CAAA;AAEtD,wBAAgB,KAAK,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,GAAG,QAAQ,CAKxD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,GAAG,OAAO,CAS5D;AAED,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,CAE/C;AAED,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,QAAQ,CAE1D;AAED,wBAAgB,GAAG,IAAI,QAAQ,CAE9B;AAED,wBAAgB,KAAK,IAAI,QAAQ,CAEhC"}
@@ -0,0 +1,8 @@
1
+ import type { Combinator, ParseResult } from '../types.ts';
2
+ export type ParseOptions = {
3
+ trivia?: Combinator<unknown>;
4
+ trackLines?: boolean;
5
+ };
6
+ export declare function grammar<T>(opts: ParseOptions, root: Combinator<T>): Combinator<T>;
7
+ export declare function parse<T>(parser: Combinator<T>, input: string, opts?: ParseOptions): ParseResult<T>;
8
+ //# sourceMappingURL=grammar.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grammar.d.ts","sourceRoot":"","sources":["../../src/combinators/grammar.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAgB,WAAW,EAAE,MAAM,aAAa,CAAA;AAGxE,MAAM,MAAM,YAAY,GAAG;IACzB,MAAM,CAAC,EAAE,UAAU,CAAC,OAAO,CAAC,CAAA;IAC5B,UAAU,CAAC,EAAE,OAAO,CAAA;CACrB,CAAA;AAED,wBAAgB,OAAO,CAAC,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAuBjF;AAED,wBAAgB,KAAK,CAAC,CAAC,EACrB,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,EACrB,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,YAAiB,GACtB,WAAW,CAAC,CAAC,CAAC,CAWhB"}
@@ -0,0 +1,15 @@
1
+ import type { Combinator } from '../types.ts';
2
+ /**
3
+ * Zero-width assertion: succeeds (consuming nothing) only when `predicate`
4
+ * returns true for `ctx.user`. Fails otherwise.
5
+ *
6
+ * Intended for use inside sequence() to gate subsequent parsing on runtime
7
+ * context set with withCtx().
8
+ *
9
+ * const returnStmt = sequence(
10
+ * guard(ctx => (ctx as { inFn: boolean }).inFn),
11
+ * literal('return'), optional(expr)
12
+ * )
13
+ */
14
+ export declare function guard(predicate: (user: unknown) => boolean): Combinator<null>;
15
+ //# sourceMappingURL=guard.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"guard.d.ts","sourceRoot":"","sources":["../../src/combinators/guard.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAyC,MAAM,aAAa,CAAA;AAEpF;;;;;;;;;;;GAWG;AACH,wBAAgB,KAAK,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,OAAO,KAAK,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,CAgB7E"}
@@ -0,0 +1,14 @@
1
+ import type { Combinator } from '../types.ts';
2
+ /**
3
+ * Defers parser construction until first use — necessary for recursive grammars
4
+ * where a parser references itself (e.g. JSON value contains JSON arrays/objects).
5
+ *
6
+ * The thunk is called once and the result cached. First-set metadata is
7
+ * approximated as 'any' since it's unknown at construction time; this means
8
+ * lazy parsers inside choice() won't get O(1) disjoint dispatch, but they
9
+ * will work correctly.
10
+ *
11
+ * The compiler treats lazy as a runtime fallback (can't inline recursive parsers).
12
+ */
13
+ export declare function lazy<T>(thunk: () => Combinator<T>): Combinator<T>;
14
+ //# sourceMappingURL=lazy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lazy.d.ts","sourceRoot":"","sources":["../../src/combinators/lazy.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAyC,MAAM,aAAa,CAAA;AAGpF;;;;;;;;;;GAUG;AACH,wBAAgB,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAkBjE"}
@@ -0,0 +1,6 @@
1
+ import type { Combinator } from '../types.ts';
2
+ export type LiteralOptions = {
3
+ caseInsensitive?: boolean;
4
+ };
5
+ export declare function literal(value: string, opts?: LiteralOptions): Combinator<string>;
6
+ //# sourceMappingURL=literal.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"literal.d.ts","sourceRoot":"","sources":["../../src/combinators/literal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAyC,MAAM,aAAa,CAAA;AAWpF,MAAM,MAAM,cAAc,GAAG;IAC3B,eAAe,CAAC,EAAE,OAAO,CAAA;CAC1B,CAAA;AAED,wBAAgB,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,UAAU,CAAC,MAAM,CAAC,CAgDpF"}
@@ -0,0 +1,8 @@
1
+ import type { Combinator } from '../types.ts';
2
+ export declare function transform<T, U>(parser: Combinator<T>, fn: (value: T, span: {
3
+ start: number;
4
+ end: number;
5
+ }) => U): Combinator<U>;
6
+ export declare function skip<T, S>(main: Combinator<T>, skipped: Combinator<S>): Combinator<T>;
7
+ export declare function trivia<T>(parser: Combinator<T>): Combinator<T>;
8
+ //# sourceMappingURL=map.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/combinators/map.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAA6B,MAAM,aAAa,CAAA;AAExE,wBAAgB,SAAS,CAAC,CAAC,EAAE,CAAC,EAC5B,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,EACrB,EAAE,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,KAAK,CAAC,GACxD,UAAU,CAAC,CAAC,CAAC,CAWf;AAED,wBAAgB,IAAI,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAarF;AAED,wBAAgB,MAAM,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAO9D"}
@@ -0,0 +1,13 @@
1
+ import type { Combinator } from '../types.ts';
2
+ /**
3
+ * Negative lookahead. Succeeds (consuming nothing) when `parser` fails;
4
+ * fails when `parser` succeeds.
5
+ *
6
+ * The standard way to match a keyword without also matching the prefix
7
+ * of a longer identifier:
8
+ *
9
+ * const kwTrue = sequence(literal('true'), not(regex(/\w/)))
10
+ * // matches "true" in "true && x" but NOT in "trueish" or "trueness"
11
+ */
12
+ export declare function not(parser: Combinator<unknown>): Combinator<null>;
13
+ //# sourceMappingURL=not.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"not.d.ts","sourceRoot":"","sources":["../../src/combinators/not.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAyC,MAAM,aAAa,CAAA;AAGpF;;;;;;;;;GASG;AACH,wBAAgB,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,CAmBjE"}
@@ -0,0 +1,25 @@
1
+ import type { Combinator } from '../types.ts';
2
+ /**
3
+ * Define mutually recursive grammar parser without forward declarations.
4
+ *
5
+ * Pass a factory that receives all rule names as parser references (via a Proxy)
6
+ * and returns a record of parser definitions. parser() handles creating ref()
7
+ * placeholders and wiring them up — the user never sees ref() at all.
8
+ *
9
+ * const { value } = parser(g => ({
10
+ * value: choice(g.object, g.array, str, num, bool, nil),
11
+ * object: transform(sequence('{', sepBy(g.pair, ','), '}'), Object.fromEntries),
12
+ * array: transform(sequence('[', sepBy(g.value, ','), ']'), ([, items]) => items),
13
+ * pair: transform(sequence(g.key, literal(':'), g.value), ([k,, v]) => [k, v]),
14
+ * }))
15
+ *
16
+ * Not every name in the factory must appear in the returned object — local helpers
17
+ * (like `comma`, `key`) can be plain const inside the factory and composed normally.
18
+ * Only names that OTHER parser reference via `g.xxx` need to be in the returned record.
19
+ *
20
+ * TypeScript: use an explicit type parameter for full type safety on `g`:
21
+ * parser<{ value: Combinator<JSONValue>; array: Combinator<JSONValue[]> }>(g => ({ ... }))
22
+ * Without it, `g.*` accesses are typed as `any` but the return is still inferred.
23
+ */
24
+ export declare function parser<T extends Record<string, Combinator<unknown>>>(factory: (self: any) => T): T;
25
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/combinators/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AAG7C;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,MAAM,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,EAClE,OAAO,EAAE,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC,GACxB,CAAC,CAmCH"}