@plurnk/plurnk-grammar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "@plurnk/plurnk-grammar",
3
+ "version": "0.1.0",
4
+ "description": "ANTLR4 grammar for the Plurnk LLM agent protocol",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "publishConfig": {
8
+ "access": "public"
9
+ },
10
+ "engines": {
11
+ "node": ">=23.6"
12
+ },
13
+ "exports": {
14
+ ".": "./src/index.ts",
15
+ "./package.json": "./package.json"
16
+ },
17
+ "bin": {
18
+ "plurnk": "./bin/plurnk.ts"
19
+ },
20
+ "files": [
21
+ "bin/plurnk.ts",
22
+ "src/*.ts",
23
+ "src/generated/*.ts",
24
+ "SPEC.md",
25
+ "plurnk.md"
26
+ ],
27
+ "scripts": {
28
+ "build:grammar": "antlr-ng -D language=TypeScript -o src/generated --generate-visitor true --generate-listener false plurnkLexer.g4 plurnkParser.g4 && node scriptify/fix-generated-imports.ts",
29
+ "antlr:tokens": "testrig src/generated/plurnk document --tokens",
30
+ "antlr:trace": "testrig src/generated/plurnk document --trace",
31
+ "antlr:parse": "testrig src/generated/plurnk document --tree",
32
+ "test:lint": "tsc --noEmit",
33
+ "test:unit": "node --test test/unit/*.test.ts",
34
+ "test:intg": "node --test test/integration/*.test.ts",
35
+ "test:demo": "node --test test/demo/*.test.ts",
36
+ "test:all": "npm run test:lint && npm run test:unit && npm run test:intg && npm run test:demo",
37
+ "prepublishOnly": "npm run build:grammar && npm run test:all"
38
+ },
39
+ "dependencies": {
40
+ "antlr4ng": "^3.0.0",
41
+ "jsonpath-plus": "^10.4.0",
42
+ "xpath": "^0.0.34"
43
+ },
44
+ "devDependencies": {
45
+ "@types/node": "^25.8.0",
46
+ "antlr-ng": "^1.0.10",
47
+ "typescript": "^6.0.3"
48
+ }
49
+ }
package/plurnk.md ADDED
@@ -0,0 +1,116 @@
1
+ # Plurnk System Grammar
2
+
3
+ YOU MUST ONLY use the HEREDOC-inspired Plurnk Operations (FIND|READ|EDIT|COPY|MOVE|SHOW|HIDE|SEND|EXEC).
4
+
5
+ ## Syntax
6
+
7
+ ```
8
+ <<OPsuffix[signal]?(path)?<L>?:body?:OPsuffix
9
+ ```
10
+
11
+ Slot order is fixed. Slots between `<<OPsuffix` and `:body:` are all optional. `:body:` fences are required (use `::` when body is empty). Close tag's `OPsuffix` must character-match the open.
12
+
13
+ ## Operations
14
+
15
+ | OP | `[signal]` | `(path)` | `<L>` | body |
16
+ |------|---------------|----------|------------------|--------------------------|
17
+ | FIND | filter tags | required | results `N-M` | matcher |
18
+ | READ | filter tags | required | lines `N-M` | matcher |
19
+ | EDIT | tags | required | lines `N-M` | content (empty = clear) |
20
+ | COPY | apply tags | required | lines `N-M` | destination URI |
21
+ | MOVE | apply tags | required | lines `N-M` | destination URI |
22
+ | SHOW | filter tags | required | results `N-M` | matcher |
23
+ | HIDE | filter tags | required | results `N-M` | matcher |
24
+ | SEND | HTTP status | optional | — | message (JSON for data) |
25
+ | EXEC | Runtime Tag | required | — | command or code |
26
+
27
+ SEND signal is a single integer. EXEC signal is a single Runtime Tag (`sh`, `node`, `python`, etc.). All other signals are CSV.
28
+
29
+ ## `<L>`
30
+
31
+ `<N>` selects position N. `<N-M>` selects the inclusive range N-M. N and M are signed integers. Sentinels: `<0>` before position 1 (prepend), `<-1>` after the last position (append). Range example: `<-3--1>` is positions -3..-1.
32
+
33
+ ## Body matcher dispatch (FIND, READ, SHOW, HIDE)
34
+
35
+ | leading prefix | dialect | form |
36
+ |----------------|----------|-----------------------|
37
+ | `//` | xpath | `//selector` |
38
+ | `/` | regex | `/pattern/[igmsu]?` |
39
+ | `$` | jsonpath | `$.field` |
40
+ | otherwise | glob | `pattern` |
41
+
42
+ Escape `/` inside a regex pattern as `\/`. XPath body begins with `//`.
43
+
44
+ ## Paths
45
+
46
+ URI-shaped: `[scheme://]rest`.
47
+
48
+ * Bare paths (no scheme) default to local relative project file paths.
49
+ * Glob metacharacters (`*`, `**`, `?`, `[...]`) are allowed in path segments.
50
+
51
+ Internal schemes:
52
+
53
+ - `unknown://` — pending / open questions.
54
+ - `known://` — knowledgebase entries.
55
+ - `skill://` — available skill entries.
56
+ - `log://<loop>/<turn>/<action>/...` — event log.
57
+ - `stream://` — live data streams.
58
+
59
+ ## Context
60
+
61
+ The agent maintains two contexts:
62
+
63
+ - **Index** — entries listed in the active index.
64
+ - **Archive** — entries archived; out of working memory (HIDE), but promotable (SHOW) by path or pattern lookup.
65
+
66
+ `SHOW` promotes matching entries to the active index. `HIDE` demotes to archive. The model curates its own working memory by issuing these between substantive operations. New entries created via `EDIT` enter active index by default.
67
+
68
+ ## Suffix
69
+
70
+ For nested Plurnk Operations inside a body (recording, quoting, demonstrating), the outer statement uses an optional non-empty suffix so its close tag is distinct from inner close tags. Empty suffix is default. The suffix character class is `[A-Za-z0-9_]`.
71
+
72
+ ```
73
+ <<EDITouter(known://demo):
74
+ quoted: <<EDIT(known://inner):hello:EDIT
75
+ :EDITouter
76
+ ```
77
+
78
+ ## Examples
79
+
80
+ ```
81
+ <<FIND(config/**/*.xml)://user[@role='admin']:FIND
82
+ <<READ(lang/??.json):$.greeting:READ
83
+ <<READ(https://en.wikipedia.org/wiki/Paris)<426-465>::READ
84
+ <<EDIT[philosophy,existentialism](known://philosophy/existentialism/meaning):The meaning of life is 42:EDIT
85
+ <<EDIT[france,geography](unknown://countries/france/capital):What is the capital of France?:EDIT
86
+ <<EDIT[plan,france,task](known://plan):
87
+ - [ ] Decompose prompt into unknowns
88
+ - [ ] Discover capital of France
89
+ - [ ] Deliver
90
+ :EDIT
91
+ <<EDIT(known://plan)<2>:- [x] Discover capital of France:EDIT
92
+ <<EDIT(known://countries/france/capital)<-1>:[Wikipedia: Paris](https://en.wikipedia.org/wiki/Paris):EDIT
93
+ <<EDIT(known://countries/france/capital)::EDIT
94
+ <<COPY[archive,2026-05-14](known://draft):known://archive/2026-05-14/draft:COPY
95
+ <<MOVE(known://draft):known://final/answer:MOVE
96
+ <<SHOW[france](known://countries/**):Paris*:SHOW
97
+ <<HIDE(log://**/get)<101-200>::HIDE
98
+ <<FIND(log://**/error):/timeout|deadline exceeded/i:FIND
99
+ <<EXEC[node](./):
100
+ const sum = [1, 2, 3].reduce((a, b) => a + b, 0);
101
+ console.log(sum);
102
+ :EXEC
103
+ <<SEND[102]:decomposed prompt; plan initialized:SEND
104
+ <<SEND[200]:{"answer":"Paris","confidence":0.95}:SEND
105
+ ```
106
+
107
+ ## Invariants
108
+
109
+ - `<<OPsuffix` and `:OPsuffix` MUST character-match.
110
+ - `:body:` fences MUST be present (use `::` for empty body).
111
+ - `:` and `OPsuffix` in the close tag MUST be character-adjacent.
112
+ - Header slot order MUST be `[signal]` → `(path)` → `<L>` → `:`.
113
+ - Inside `[…]`, `(…)`, `<…>`, between `OP` and `suffix` — no whitespace.
114
+ - Between header elements — whitespace is non-significant.
115
+ - Inside body — whitespace and newlines are preserved verbatim.
116
+ - A body containing `:OPkeyword` MUST use a suffix on the enclosing statement.
package/src/ast.ts ADDED
@@ -0,0 +1,348 @@
1
+ import type { StatementContext } from "./generated/plurnkParser.ts";
2
+ import { PlurnkParseError } from "./errors.ts";
3
+ import * as xpath from "xpath";
4
+ import { JSONPath } from "jsonpath-plus";
5
+
6
+ // The xpath package's .d.ts omits its `parse` function (it only types the
7
+ // document-evaluation surface). Augment the type here — `parse` is exported
8
+ // at runtime and throws on a syntactically invalid XPath 1.0 expression.
9
+ declare module "xpath" {
10
+ export function parse(expression: string): unknown;
11
+ }
12
+
13
+ export type Position = { line: number; column: number };
14
+
15
+ export type PlurnkOp =
16
+ | "FIND"
17
+ | "READ"
18
+ | "EDIT"
19
+ | "COPY"
20
+ | "MOVE"
21
+ | "SHOW"
22
+ | "HIDE"
23
+ | "SEND"
24
+ | "EXEC";
25
+
26
+ export interface LineMarker {
27
+ /** First position. Can be negative (sentinels: 0 = prepend anchor, -1 = append anchor). */
28
+ first: number;
29
+ /** Second position when the marker is a range `<N-M>`; null when single `<N>`. */
30
+ last: number | null;
31
+ }
32
+
33
+ /**
34
+ * Parsed path slot. A path is either a local source path (no scheme — filesystem-style)
35
+ * or a URL with a recognized `scheme://` prefix. URLs are fully decomposed via WHATWG URL;
36
+ * locals are kept as raw strings (resolution is the runtime's job).
37
+ *
38
+ * Subdomain / registrable-domain splitting requires the public suffix list (PSL) and is
39
+ * deferred to the runtime — `hostname` is the full host string as parsed by URL.
40
+ */
41
+ export type ParsedPath = LocalPath | UrlPath;
42
+
43
+ export interface LocalPath {
44
+ kind: "local";
45
+ raw: string;
46
+ }
47
+
48
+ export interface UrlPath {
49
+ kind: "url";
50
+ raw: string;
51
+ /** Scheme without trailing `:` — `"https"`, `"known"`, etc. */
52
+ scheme: string;
53
+ username: string | null;
54
+ password: string | null;
55
+ /** Full hostname as parsed by URL. For custom schemes (`known://draft`), this is the first authority segment. */
56
+ hostname: string | null;
57
+ port: number | null;
58
+ /** Path component (everything after the authority, before `?` or `#`). May be empty. */
59
+ pathname: string;
60
+ /** Query parameters. Multi-value keys are arrays. Empty record if no query. */
61
+ search: Record<string, string | string[]>;
62
+ /** Fragment (after `#`), with the `#` stripped; null if no fragment. */
63
+ fragment: string | null;
64
+ }
65
+
66
+ interface StatementBase<S> {
67
+ suffix: string;
68
+ signal: S | null;
69
+ path: ParsedPath | null;
70
+ lineMarker: LineMarker | null;
71
+ position: Position;
72
+ }
73
+
74
+ /**
75
+ * Typed body for FIND/READ/SHOW/HIDE pattern matchers. The dialect is detected
76
+ * by the body's leading characters and validated by the Visitor — `xpath` via
77
+ * `xpath.parse()`, `regex` via `new RegExp()`, `jsonpath` via `jsonpath-plus`,
78
+ * `glob` is pass-through.
79
+ */
80
+ export type MatcherBody =
81
+ | { dialect: "xpath"; raw: string }
82
+ | { dialect: "regex"; raw: string; pattern: string; flags: string; regexp: RegExp }
83
+ | { dialect: "jsonpath"; raw: string }
84
+ | { dialect: "glob"; raw: string };
85
+
86
+ /**
87
+ * Typed body for SEND. The raw payload is always present; `json` holds the
88
+ * `JSON.parse(raw)` result if the body is valid JSON, null otherwise.
89
+ * Best-effort: plain-text bodies (`<<SEND[200]:Paris:SEND`) leave json=null.
90
+ */
91
+ export interface SendBody {
92
+ raw: string;
93
+ json: unknown | null;
94
+ }
95
+
96
+ /** Matcher OPs: body is a typed pattern matcher (or null if no body). */
97
+ export interface FindStatement extends StatementBase<string[]> { op: "FIND"; body: MatcherBody | null; }
98
+ export interface ReadStatement extends StatementBase<string[]> { op: "READ"; body: MatcherBody | null; }
99
+ export interface ShowStatement extends StatementBase<string[]> { op: "SHOW"; body: MatcherBody | null; }
100
+ export interface HideStatement extends StatementBase<string[]> { op: "HIDE"; body: MatcherBody | null; }
101
+
102
+ /** EDIT body is arbitrary content (markdown, code, JSON, prose) — kept raw. */
103
+ export interface EditStatement extends StatementBase<string[]> { op: "EDIT"; body: string | null; }
104
+
105
+ /** COPY/MOVE body is the destination URI, parsed identically to the path slot. */
106
+ export interface CopyStatement extends StatementBase<string[]> { op: "COPY"; body: ParsedPath | null; }
107
+ export interface MoveStatement extends StatementBase<string[]> { op: "MOVE"; body: ParsedPath | null; }
108
+
109
+ /** SEND body: raw plus best-effort JSON parse. */
110
+ export interface SendStatement extends StatementBase<number> { op: "SEND"; body: SendBody | null; }
111
+
112
+ /** EXEC body is a command or code snippet — kept raw. */
113
+ export interface ExecStatement extends StatementBase<string> { op: "EXEC"; body: string | null; }
114
+
115
+ export type PlurnkStatement =
116
+ | FindStatement
117
+ | ReadStatement
118
+ | EditStatement
119
+ | CopyStatement
120
+ | MoveStatement
121
+ | ShowStatement
122
+ | HideStatement
123
+ | SendStatement
124
+ | ExecStatement;
125
+
126
+ const OPS: readonly PlurnkOp[] = [
127
+ "FIND", "READ", "EDIT", "COPY", "MOVE", "SHOW", "HIDE", "SEND", "EXEC",
128
+ ];
129
+
130
+ const splitOpAndSuffix = (openTagText: string): { op: PlurnkOp; suffix: string } => {
131
+ const stripped = openTagText.slice(2);
132
+ for (const op of OPS) {
133
+ if (stripped.startsWith(op)) {
134
+ return { op, suffix: stripped.slice(op.length) };
135
+ }
136
+ }
137
+ throw new Error(`unrecognized OP in open tag: ${openTagText}`);
138
+ };
139
+
140
+ const isDigit = (c: string | undefined): boolean => c !== undefined && c >= "0" && c <= "9";
141
+
142
+ const parseLineMarker = (text: string): LineMarker => {
143
+ const inner = text.slice(1, -1);
144
+ let i = 0;
145
+ if (inner[i] === "-") i++;
146
+ while (isDigit(inner[i])) i++;
147
+ const first = Number.parseInt(inner.slice(0, i), 10);
148
+ if (i >= inner.length) return { first, last: null };
149
+ i++;
150
+ const last = Number.parseInt(inner.slice(i), 10);
151
+ return { first, last };
152
+ };
153
+
154
+ const coerceSendSignal = (raw: string[] | null, pos: Position): number | null => {
155
+ if (raw === null) return null;
156
+ if (raw.length === 0) {
157
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", "SEND signal slot is present but empty");
158
+ }
159
+ if (raw.length > 1) {
160
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `SEND signal must be a single integer; got ${raw.length} values`);
161
+ }
162
+ const text = raw[0]!;
163
+ if (!/^-?\d+$/.test(text)) {
164
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `SEND signal must be an integer; got "${text}"`);
165
+ }
166
+ return Number.parseInt(text, 10);
167
+ };
168
+
169
+ const coerceExecSignal = (raw: string[] | null, pos: Position): string | null => {
170
+ if (raw === null) return null;
171
+ if (raw.length === 0) {
172
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", "EXEC signal slot is present but empty");
173
+ }
174
+ if (raw.length > 1) {
175
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `EXEC signal must be a single runtime tag; got ${raw.length} values`);
176
+ }
177
+ return raw[0]!;
178
+ };
179
+
180
+ const SCHEME_PATTERN = /^[a-z][a-z0-9+.-]*:\/\//i;
181
+
182
+ const parsePath = (raw: string, pos: Position): ParsedPath | null => {
183
+ if (raw.length === 0) return null;
184
+ if (!SCHEME_PATTERN.test(raw)) {
185
+ return { kind: "local", raw };
186
+ }
187
+ let url: URL;
188
+ try {
189
+ url = new URL(raw);
190
+ } catch (e: any) {
191
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid URI in path: ${e?.message ?? raw}`);
192
+ }
193
+ const search: Record<string, string | string[]> = {};
194
+ for (const [key, value] of url.searchParams) {
195
+ const existing = search[key];
196
+ if (existing === undefined) {
197
+ search[key] = value;
198
+ } else if (Array.isArray(existing)) {
199
+ existing.push(value);
200
+ } else {
201
+ search[key] = [existing, value];
202
+ }
203
+ }
204
+ return {
205
+ kind: "url",
206
+ raw,
207
+ scheme: url.protocol.replace(/:$/, ""),
208
+ username: url.username || null,
209
+ password: url.password || null,
210
+ hostname: url.hostname || null,
211
+ port: url.port ? Number.parseInt(url.port, 10) : null,
212
+ pathname: url.pathname,
213
+ search,
214
+ fragment: url.hash ? url.hash.slice(1) : null,
215
+ };
216
+ };
217
+
218
+ type MatcherDialect = "xpath" | "regex" | "jsonpath" | "glob";
219
+
220
+ const detectMatcherDialect = (body: string): MatcherDialect => {
221
+ if (body.startsWith("//")) return "xpath";
222
+ if (body.startsWith("/")) return "regex";
223
+ if (body.startsWith("$")) return "jsonpath";
224
+ return "glob";
225
+ };
226
+
227
+ const parseRegexLiteral = (body: string, pos: Position): { pattern: string; flags: string } => {
228
+ let i = 1;
229
+ while (i < body.length) {
230
+ if (body[i] === "\\") { i += 2; continue; }
231
+ if (body[i] === "/") break;
232
+ i++;
233
+ }
234
+ if (i >= body.length) {
235
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", "regex body missing closing /");
236
+ }
237
+ return { pattern: body.slice(1, i), flags: body.slice(i + 1) };
238
+ };
239
+
240
+ const parseMatcherBody = (body: string, pos: Position): MatcherBody => {
241
+ const dialect = detectMatcherDialect(body);
242
+ if (dialect === "regex") {
243
+ const { pattern, flags } = parseRegexLiteral(body, pos);
244
+ let regexp: RegExp;
245
+ try {
246
+ regexp = new RegExp(pattern, flags);
247
+ } catch (e: any) {
248
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid regex: ${e?.message ?? body}`);
249
+ }
250
+ return { dialect: "regex", raw: body, pattern, flags, regexp };
251
+ }
252
+ if (dialect === "xpath") {
253
+ try {
254
+ xpath.parse(body);
255
+ } catch (e: any) {
256
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid xpath: ${e?.message ?? body}`);
257
+ }
258
+ return { dialect: "xpath", raw: body };
259
+ }
260
+ if (dialect === "jsonpath") {
261
+ try {
262
+ JSONPath({ path: body, json: {} });
263
+ } catch (e: any) {
264
+ throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid jsonpath: ${e?.message ?? body}`);
265
+ }
266
+ return { dialect: "jsonpath", raw: body };
267
+ }
268
+ return { dialect: "glob", raw: body };
269
+ };
270
+
271
+ const parseSendBody = (raw: string): SendBody => {
272
+ let json: unknown | null = null;
273
+ try { json = JSON.parse(raw); } catch { /* best-effort: not all SEND bodies are JSON */ }
274
+ return { raw, json };
275
+ };
276
+
277
+ export const buildStatement = (ctx: StatementContext): PlurnkStatement => {
278
+ const openTagCtx = ctx.openTag();
279
+ const openTagText = openTagCtx.getText();
280
+ const { op, suffix } = splitOpAndSuffix(openTagText);
281
+
282
+ const start = ctx.start ?? openTagCtx.start;
283
+ const position: Position = {
284
+ line: start?.line ?? 0,
285
+ column: start?.column ?? 0,
286
+ };
287
+
288
+ const signalCtx = ctx.signal();
289
+ let rawSignal: string[] | null = null;
290
+ if (signalCtx) {
291
+ const text = signalCtx.SIGNAL_TEXT()?.getText() ?? "";
292
+ rawSignal = text.length > 0 ? text.split(",") : [];
293
+ }
294
+
295
+ const pathCtx = ctx.path();
296
+ let rawPath: string | null = null;
297
+ if (pathCtx) {
298
+ rawPath = pathCtx.PATH_TEXT()?.getText() ?? "";
299
+ }
300
+ const path: ParsedPath | null = rawPath !== null ? parsePath(rawPath, position) : null;
301
+
302
+ const lineMarkerCtx = ctx.lineMarker();
303
+ let lineMarker: LineMarker | null = null;
304
+ if (lineMarkerCtx) {
305
+ const text = lineMarkerCtx.L_MARKER()?.getText() ?? "";
306
+ lineMarker = parseLineMarker(text);
307
+ }
308
+
309
+ const bodyCtx = ctx.body();
310
+ const rawBody: string | null = bodyCtx ? bodyCtx.getText() : null;
311
+
312
+ // Per-OP signal coercion.
313
+ let signal: string[] | number | string | null;
314
+ switch (op) {
315
+ case "SEND":
316
+ signal = coerceSendSignal(rawSignal, position);
317
+ break;
318
+ case "EXEC":
319
+ signal = coerceExecSignal(rawSignal, position);
320
+ break;
321
+ default:
322
+ signal = rawSignal;
323
+ }
324
+
325
+ // Per-OP body shaping.
326
+ let body: MatcherBody | ParsedPath | SendBody | string | null;
327
+ switch (op) {
328
+ case "FIND":
329
+ case "READ":
330
+ case "SHOW":
331
+ case "HIDE":
332
+ body = rawBody !== null ? parseMatcherBody(rawBody, position) : null;
333
+ break;
334
+ case "COPY":
335
+ case "MOVE":
336
+ body = rawBody !== null ? parsePath(rawBody, position) : null;
337
+ break;
338
+ case "SEND":
339
+ body = rawBody !== null ? parseSendBody(rawBody) : null;
340
+ break;
341
+ case "EDIT":
342
+ case "EXEC":
343
+ body = rawBody;
344
+ break;
345
+ }
346
+
347
+ return { op, suffix, signal, path, lineMarker, body, position } as PlurnkStatement;
348
+ };
@@ -0,0 +1,140 @@
1
+ import {
2
+ DefaultErrorStrategy,
3
+ InputMismatchException,
4
+ NoViableAltException,
5
+ Token,
6
+ type Parser,
7
+ type RecognitionException,
8
+ } from "antlr4ng";
9
+ import { plurnkParser } from "./generated/plurnkParser.ts";
10
+ import { plurnkLexer } from "./generated/plurnkLexer.ts";
11
+
12
+ const LEXER_MODE_CONTEXT: Record<string, string> = {
13
+ DEFAULT_MODE: "between statements",
14
+ OPENED: "in statement header",
15
+ POST_SIGNAL: "in statement header",
16
+ POST_PATH: "in statement header",
17
+ POST_L: "in statement header",
18
+ SIGNAL: "in signal",
19
+ PATH: "in path",
20
+ BODY: "in body",
21
+ };
22
+
23
+ const OFFENDING_CHAR_RE = /at: '([^']*)'$/;
24
+
25
+ const extractOffendingChar = (msg: string): string => {
26
+ const m = OFFENDING_CHAR_RE.exec(msg);
27
+ if (!m) return "input";
28
+ const text = m[1];
29
+ return text === "" ? "end of input" : `'${text}'`;
30
+ };
31
+
32
+ export const translateLexerMessage = (lexer: plurnkLexer, originalMsg: string): string => {
33
+ const modeName = lexer.modeNames[lexer.mode] ?? "DEFAULT_MODE";
34
+ const context = LEXER_MODE_CONTEXT[modeName] ?? "between statements";
35
+ const ch = extractOffendingChar(originalMsg);
36
+ return `unrecognized character ${ch} ${context}`;
37
+ };
38
+
39
+ const SLOT_BY_TOKEN: Record<number, string> = {
40
+ [plurnkParser.OPEN_FIND]: "open tag",
41
+ [plurnkParser.OPEN_READ]: "open tag",
42
+ [plurnkParser.OPEN_EDIT]: "open tag",
43
+ [plurnkParser.OPEN_COPY]: "open tag",
44
+ [plurnkParser.OPEN_MOVE]: "open tag",
45
+ [plurnkParser.OPEN_SHOW]: "open tag",
46
+ [plurnkParser.OPEN_HIDE]: "open tag",
47
+ [plurnkParser.OPEN_SEND]: "open tag",
48
+ [plurnkParser.OPEN_EXEC]: "open tag",
49
+ [plurnkParser.LBRACKET]: "'['",
50
+ [plurnkParser.RBRACKET]: "']'",
51
+ [plurnkParser.LPAREN]: "'('",
52
+ [plurnkParser.RPAREN]: "')'",
53
+ [plurnkParser.L_MARKER]: "line marker",
54
+ [plurnkParser.COLON]: "':'",
55
+ [plurnkParser.SIGNAL_TEXT]: "signal content",
56
+ [plurnkParser.PATH_TEXT]: "path content",
57
+ [plurnkParser.BODY_TEXT]: "body content",
58
+ [plurnkParser.CLOSE_TAG]: "close tag",
59
+ [plurnkParser.TEXT]: "text between statements",
60
+ };
61
+
62
+ const describeToken = (tok: Token | null): string => {
63
+ if (!tok || tok.type === Token.EOF) return "end of input";
64
+ const slot = SLOT_BY_TOKEN[tok.type];
65
+ if (slot) return slot;
66
+ const text = tok.text ?? "";
67
+ return text.length > 0 ? `'${text}'` : "input";
68
+ };
69
+
70
+ const describeExpected = (
71
+ _parser: Parser,
72
+ e: RecognitionException,
73
+ ): string | null => {
74
+ const expected = e.getExpectedTokens();
75
+ if (!expected) return null;
76
+ const types: number[] = expected.toArray();
77
+ if (types.length === 0) return null;
78
+ const names = types
79
+ .map((t) => SLOT_BY_TOKEN[t])
80
+ .filter((s): s is string => Boolean(s));
81
+ if (names.length === 0) return null;
82
+ if (names.length === 1) return names[0];
83
+ if (names.length === 2) return `${names[0]} or ${names[1]}`;
84
+ return `${names.slice(0, -1).join(", ")}, or ${names[names.length - 1]}`;
85
+ };
86
+
87
+ export class PlurnkErrorStrategy extends DefaultErrorStrategy {
88
+ public override reportError(recognizer: Parser, e: RecognitionException): void {
89
+ if (this.inErrorRecoveryMode(recognizer)) return;
90
+ this.beginErrorCondition(recognizer);
91
+
92
+ const got = describeToken(e.offendingToken);
93
+ const expected = describeExpected(recognizer, e);
94
+
95
+ let msg: string;
96
+ if (e instanceof InputMismatchException || e instanceof NoViableAltException) {
97
+ msg = expected ? `unexpected ${got}; expected ${expected}` : `unexpected ${got}`;
98
+ } else {
99
+ msg = `unexpected ${got}`;
100
+ }
101
+
102
+ recognizer.notifyErrorListeners(msg, e.offendingToken, e);
103
+ }
104
+
105
+ public override reportMissingToken(recognizer: Parser): void {
106
+ if (this.inErrorRecoveryMode(recognizer)) return;
107
+ this.beginErrorCondition(recognizer);
108
+ const tok = recognizer.getCurrentToken();
109
+ const expectedTokens = this.getExpectedTokens(recognizer);
110
+ const expectedNames = expectedTokens
111
+ .toArray()
112
+ .map((t) => SLOT_BY_TOKEN[t])
113
+ .filter((s): s is string => Boolean(s));
114
+ const expected = expectedNames.length > 0
115
+ ? (expectedNames.length === 1 ? expectedNames[0] : expectedNames.join(" or "))
116
+ : "more input";
117
+ const got = describeToken(tok);
118
+ const msg = `expected ${expected}; got ${got}`;
119
+ recognizer.notifyErrorListeners(msg, tok, null);
120
+ }
121
+
122
+ public override reportUnwantedToken(recognizer: Parser): void {
123
+ if (this.inErrorRecoveryMode(recognizer)) return;
124
+ this.beginErrorCondition(recognizer);
125
+ const tok = recognizer.getCurrentToken();
126
+ const got = describeToken(tok);
127
+ const expectedTokens = this.getExpectedTokens(recognizer);
128
+ const expectedNames = expectedTokens
129
+ .toArray()
130
+ .map((t) => SLOT_BY_TOKEN[t])
131
+ .filter((s): s is string => Boolean(s));
132
+ const expected = expectedNames.length > 0
133
+ ? (expectedNames.length === 1 ? expectedNames[0] : expectedNames.join(" or "))
134
+ : null;
135
+ const msg = expected
136
+ ? `unexpected ${got}; expected ${expected}`
137
+ : `unexpected ${got}`;
138
+ recognizer.notifyErrorListeners(msg, tok, null);
139
+ }
140
+ }
package/src/errors.ts ADDED
@@ -0,0 +1,25 @@
1
+ export type ErrorSource = "lexer" | "parser" | "visitor";
2
+
3
+ export class PlurnkParseError extends Error {
4
+ readonly line: number;
5
+ readonly column: number;
6
+ readonly source: ErrorSource;
7
+
8
+ constructor(line: number, column: number, source: ErrorSource, message: string) {
9
+ super(`Plurnk ${source} error at ${line}:${column} — ${message}`);
10
+ this.name = "PlurnkParseError";
11
+ this.line = line;
12
+ this.column = column;
13
+ this.source = source;
14
+ }
15
+
16
+ /** JSON serialization — `JSON.stringify` picks this up automatically. */
17
+ toJSON(): { line: number; column: number; source: ErrorSource; message: string } {
18
+ return {
19
+ line: this.line,
20
+ column: this.column,
21
+ source: this.source,
22
+ message: this.message,
23
+ };
24
+ }
25
+ }