@plurnk/plurnk-grammar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +184 -0
- package/SPEC.md +625 -0
- package/bin/plurnk.ts +43 -0
- package/package.json +49 -0
- package/plurnk.md +116 -0
- package/src/ast.ts +348 -0
- package/src/error-strategy.ts +140 -0
- package/src/errors.ts +25 -0
- package/src/generated/plurnkLexer.ts +419 -0
- package/src/generated/plurnkParser.ts +625 -0
- package/src/generated/plurnkParserVisitor.ts +65 -0
- package/src/index.ts +142 -0
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@plurnk/plurnk-grammar",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "ANTLR4 grammar for the Plurnk LLM agent protocol",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"publishConfig": {
|
|
8
|
+
"access": "public"
|
|
9
|
+
},
|
|
10
|
+
"engines": {
|
|
11
|
+
"node": ">=23.6"
|
|
12
|
+
},
|
|
13
|
+
"exports": {
|
|
14
|
+
".": "./src/index.ts",
|
|
15
|
+
"./package.json": "./package.json"
|
|
16
|
+
},
|
|
17
|
+
"bin": {
|
|
18
|
+
"plurnk": "./bin/plurnk.ts"
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"bin/plurnk.ts",
|
|
22
|
+
"src/*.ts",
|
|
23
|
+
"src/generated/*.ts",
|
|
24
|
+
"SPEC.md",
|
|
25
|
+
"plurnk.md"
|
|
26
|
+
],
|
|
27
|
+
"scripts": {
|
|
28
|
+
"build:grammar": "antlr-ng -D language=TypeScript -o src/generated --generate-visitor true --generate-listener false plurnkLexer.g4 plurnkParser.g4 && node scriptify/fix-generated-imports.ts",
|
|
29
|
+
"antlr:tokens": "testrig src/generated/plurnk document --tokens",
|
|
30
|
+
"antlr:trace": "testrig src/generated/plurnk document --trace",
|
|
31
|
+
"antlr:parse": "testrig src/generated/plurnk document --tree",
|
|
32
|
+
"test:lint": "tsc --noEmit",
|
|
33
|
+
"test:unit": "node --test test/unit/*.test.ts",
|
|
34
|
+
"test:intg": "node --test test/integration/*.test.ts",
|
|
35
|
+
"test:demo": "node --test test/demo/*.test.ts",
|
|
36
|
+
"test:all": "npm run test:lint && npm run test:unit && npm run test:intg && npm run test:demo",
|
|
37
|
+
"prepublishOnly": "npm run build:grammar && npm run test:all"
|
|
38
|
+
},
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"antlr4ng": "^3.0.0",
|
|
41
|
+
"jsonpath-plus": "^10.4.0",
|
|
42
|
+
"xpath": "^0.0.34"
|
|
43
|
+
},
|
|
44
|
+
"devDependencies": {
|
|
45
|
+
"@types/node": "^25.8.0",
|
|
46
|
+
"antlr-ng": "^1.0.10",
|
|
47
|
+
"typescript": "^6.0.3"
|
|
48
|
+
}
|
|
49
|
+
}
|
package/plurnk.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Plurnk System Grammar
|
|
2
|
+
|
|
3
|
+
YOU MUST ONLY use the HEREDOC-inspired Plurnk Operations (FIND|READ|EDIT|COPY|MOVE|SHOW|HIDE|SEND|EXEC).
|
|
4
|
+
|
|
5
|
+
## Syntax
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
<<OPsuffix[signal]?(path)?<L>?:body?:OPsuffix
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Slot order is fixed. Slots between `<<OPsuffix` and `:body:` are all optional. `:body:` fences are required (use `::` when body is empty). Close tag's `OPsuffix` must character-match the open.
|
|
12
|
+
|
|
13
|
+
## Operations
|
|
14
|
+
|
|
15
|
+
| OP | `[signal]` | `(path)` | `<L>` | body |
|
|
16
|
+
|------|---------------|----------|------------------|--------------------------|
|
|
17
|
+
| FIND | filter tags | required | results `N-M` | matcher |
|
|
18
|
+
| READ | filter tags | required | lines `N-M` | matcher |
|
|
19
|
+
| EDIT | tags | required | lines `N-M` | content (empty = clear) |
|
|
20
|
+
| COPY | apply tags | required | lines `N-M` | destination URI |
|
|
21
|
+
| MOVE | apply tags | required | lines `N-M` | destination URI |
|
|
22
|
+
| SHOW | filter tags | required | results `N-M` | matcher |
|
|
23
|
+
| HIDE | filter tags | required | results `N-M` | matcher |
|
|
24
|
+
| SEND | HTTP status | optional | — | message (JSON for data) |
|
|
25
|
+
| EXEC | Runtime Tag | required | — | command or code |
|
|
26
|
+
|
|
27
|
+
SEND signal is a single integer. EXEC signal is a single Runtime Tag (`sh`, `node`, `python`, etc.). All other signals are CSV.
|
|
28
|
+
|
|
29
|
+
## `<L>`
|
|
30
|
+
|
|
31
|
+
`<N>` selects position N. `<N-M>` selects the inclusive range N-M. N and M are signed integers. Sentinels: `<0>` before position 1 (prepend), `<-1>` after the last position (append). Range example: `<-3--1>` is positions -3..-1.
|
|
32
|
+
|
|
33
|
+
## Body matcher dispatch (FIND, READ, SHOW, HIDE)
|
|
34
|
+
|
|
35
|
+
| leading prefix | dialect | form |
|
|
36
|
+
|----------------|----------|-----------------------|
|
|
37
|
+
| `//` | xpath | `//selector` |
|
|
38
|
+
| `/` | regex | `/pattern/[igmsu]?` |
|
|
39
|
+
| `$` | jsonpath | `$.field` |
|
|
40
|
+
| otherwise | glob | `pattern` |
|
|
41
|
+
|
|
42
|
+
Escape `/` inside a regex pattern as `\/`. XPath body begins with `//`.
|
|
43
|
+
|
|
44
|
+
## Paths
|
|
45
|
+
|
|
46
|
+
URI-shaped: `[scheme://]rest`.
|
|
47
|
+
|
|
48
|
+
* Bare paths (no scheme) default to local relative project file paths.
|
|
49
|
+
* Glob metacharacters (`*`, `**`, `?`, `[...]`) are allowed in path segments.
|
|
50
|
+
|
|
51
|
+
Internal schemes:
|
|
52
|
+
|
|
53
|
+
- `unknown://` — pending / open questions.
|
|
54
|
+
- `known://` — knowledgebase entries.
|
|
55
|
+
- `skill://` — available skill entries.
|
|
56
|
+
- `log://<loop>/<turn>/<action>/...` — event log.
|
|
57
|
+
- `stream://` — live data streams.
|
|
58
|
+
|
|
59
|
+
## Context
|
|
60
|
+
|
|
61
|
+
The agent maintains two contexts:
|
|
62
|
+
|
|
63
|
+
- **Index** — entries listed in the active index.
|
|
64
|
+
- **Archive** — entries archived; out of working memory (HIDE), but promotable (SHOW) by path or pattern lookup.
|
|
65
|
+
|
|
66
|
+
`SHOW` promotes matching entries to the active index. `HIDE` demotes to archive. The model curates its own working memory by issuing these between substantive operations. New entries created via `EDIT` enter active index by default.
|
|
67
|
+
|
|
68
|
+
## Suffix
|
|
69
|
+
|
|
70
|
+
For nested Plurnk Operations inside a body (recording, quoting, demonstrating), the outer statement uses an optional non-empty suffix so its close tag is distinct from inner close tags. Empty suffix is default. The suffix character class is `[A-Za-z0-9_]`.
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
<<EDITouter(known://demo):
|
|
74
|
+
quoted: <<EDIT(known://inner):hello:EDIT
|
|
75
|
+
:EDITouter
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Examples
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
<<FIND(config/**/*.xml)://user[@role='admin']:FIND
|
|
82
|
+
<<READ(lang/??.json):$.greeting:READ
|
|
83
|
+
<<READ(https://en.wikipedia.org/wiki/Paris)<426-465>::READ
|
|
84
|
+
<<EDIT[philosophy,existentialism](known://philosophy/existentialism/meaning):The meaning of life is 42:EDIT
|
|
85
|
+
<<EDIT[france,geography](unknown://countries/france/capital):What is the capital of France?:EDIT
|
|
86
|
+
<<EDIT[plan,france,task](known://plan):
|
|
87
|
+
- [ ] Decompose prompt into unknowns
|
|
88
|
+
- [ ] Discover capital of France
|
|
89
|
+
- [ ] Deliver
|
|
90
|
+
:EDIT
|
|
91
|
+
<<EDIT(known://plan)<2>:- [x] Discover capital of France:EDIT
|
|
92
|
+
<<EDIT(known://countries/france/capital)<-1>:[Wikipedia: Paris](https://en.wikipedia.org/wiki/Paris):EDIT
|
|
93
|
+
<<EDIT(known://countries/france/capital)::EDIT
|
|
94
|
+
<<COPY[archive,2026-05-14](known://draft):known://archive/2026-05-14/draft:COPY
|
|
95
|
+
<<MOVE(known://draft):known://final/answer:MOVE
|
|
96
|
+
<<SHOW[france](known://countries/**):Paris*:SHOW
|
|
97
|
+
<<HIDE(log://**/get)<101-200>::HIDE
|
|
98
|
+
<<FIND(log://**/error):/timeout|deadline exceeded/i:FIND
|
|
99
|
+
<<EXEC[node](./):
|
|
100
|
+
const sum = [1, 2, 3].reduce((a, b) => a + b, 0);
|
|
101
|
+
console.log(sum);
|
|
102
|
+
:EXEC
|
|
103
|
+
<<SEND[102]:decomposed prompt; plan initialized:SEND
|
|
104
|
+
<<SEND[200]:{"answer":"Paris","confidence":0.95}:SEND
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Invariants
|
|
108
|
+
|
|
109
|
+
- `<<OPsuffix` and `:OPsuffix` MUST character-match.
|
|
110
|
+
- `:body:` fences MUST be present (use `::` for empty body).
|
|
111
|
+
- `:` and `OPsuffix` in the close tag MUST be character-adjacent.
|
|
112
|
+
- Header slot order MUST be `[signal]` → `(path)` → `<L>` → `:`.
|
|
113
|
+
- Inside `[…]`, `(…)`, `<…>`, between `OP` and `suffix` — no whitespace.
|
|
114
|
+
- Between header elements — whitespace is non-significant.
|
|
115
|
+
- Inside body — whitespace and newlines are preserved verbatim.
|
|
116
|
+
- A body containing `:OPkeyword` MUST use a suffix on the enclosing statement.
|
package/src/ast.ts
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
import type { StatementContext } from "./generated/plurnkParser.ts";
|
|
2
|
+
import { PlurnkParseError } from "./errors.ts";
|
|
3
|
+
import * as xpath from "xpath";
|
|
4
|
+
import { JSONPath } from "jsonpath-plus";
|
|
5
|
+
|
|
6
|
+
// The xpath package's .d.ts omits its `parse` function (it only types the
|
|
7
|
+
// document-evaluation surface). Augment the type here — `parse` is exported
|
|
8
|
+
// at runtime and throws on a syntactically invalid XPath 1.0 expression.
|
|
9
|
+
declare module "xpath" {
|
|
10
|
+
export function parse(expression: string): unknown;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export type Position = { line: number; column: number };
|
|
14
|
+
|
|
15
|
+
export type PlurnkOp =
|
|
16
|
+
| "FIND"
|
|
17
|
+
| "READ"
|
|
18
|
+
| "EDIT"
|
|
19
|
+
| "COPY"
|
|
20
|
+
| "MOVE"
|
|
21
|
+
| "SHOW"
|
|
22
|
+
| "HIDE"
|
|
23
|
+
| "SEND"
|
|
24
|
+
| "EXEC";
|
|
25
|
+
|
|
26
|
+
export interface LineMarker {
|
|
27
|
+
/** First position. Can be negative (sentinels: 0 = prepend anchor, -1 = append anchor). */
|
|
28
|
+
first: number;
|
|
29
|
+
/** Second position when the marker is a range `<N-M>`; null when single `<N>`. */
|
|
30
|
+
last: number | null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Parsed path slot. A path is either a local source path (no scheme — filesystem-style)
|
|
35
|
+
* or a URL with a recognized `scheme://` prefix. URLs are fully decomposed via WHATWG URL;
|
|
36
|
+
* locals are kept as raw strings (resolution is the runtime's job).
|
|
37
|
+
*
|
|
38
|
+
* Subdomain / registrable-domain splitting requires the public suffix list (PSL) and is
|
|
39
|
+
* deferred to the runtime — `hostname` is the full host string as parsed by URL.
|
|
40
|
+
*/
|
|
41
|
+
export type ParsedPath = LocalPath | UrlPath;
|
|
42
|
+
|
|
43
|
+
export interface LocalPath {
|
|
44
|
+
kind: "local";
|
|
45
|
+
raw: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface UrlPath {
|
|
49
|
+
kind: "url";
|
|
50
|
+
raw: string;
|
|
51
|
+
/** Scheme without trailing `:` — `"https"`, `"known"`, etc. */
|
|
52
|
+
scheme: string;
|
|
53
|
+
username: string | null;
|
|
54
|
+
password: string | null;
|
|
55
|
+
/** Full hostname as parsed by URL. For custom schemes (`known://draft`), this is the first authority segment. */
|
|
56
|
+
hostname: string | null;
|
|
57
|
+
port: number | null;
|
|
58
|
+
/** Path component (everything after the authority, before `?` or `#`). May be empty. */
|
|
59
|
+
pathname: string;
|
|
60
|
+
/** Query parameters. Multi-value keys are arrays. Empty record if no query. */
|
|
61
|
+
search: Record<string, string | string[]>;
|
|
62
|
+
/** Fragment (after `#`), with the `#` stripped; null if no fragment. */
|
|
63
|
+
fragment: string | null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
interface StatementBase<S> {
|
|
67
|
+
suffix: string;
|
|
68
|
+
signal: S | null;
|
|
69
|
+
path: ParsedPath | null;
|
|
70
|
+
lineMarker: LineMarker | null;
|
|
71
|
+
position: Position;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Typed body for FIND/READ/SHOW/HIDE pattern matchers. The dialect is detected
|
|
76
|
+
* by the body's leading characters and validated by the Visitor — `xpath` via
|
|
77
|
+
* `xpath.parse()`, `regex` via `new RegExp()`, `jsonpath` via `jsonpath-plus`,
|
|
78
|
+
* `glob` is pass-through.
|
|
79
|
+
*/
|
|
80
|
+
export type MatcherBody =
|
|
81
|
+
| { dialect: "xpath"; raw: string }
|
|
82
|
+
| { dialect: "regex"; raw: string; pattern: string; flags: string; regexp: RegExp }
|
|
83
|
+
| { dialect: "jsonpath"; raw: string }
|
|
84
|
+
| { dialect: "glob"; raw: string };
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Typed body for SEND. The raw payload is always present; `json` holds the
|
|
88
|
+
* `JSON.parse(raw)` result if the body is valid JSON, null otherwise.
|
|
89
|
+
* Best-effort: plain-text bodies (`<<SEND[200]:Paris:SEND`) leave json=null.
|
|
90
|
+
*/
|
|
91
|
+
export interface SendBody {
|
|
92
|
+
raw: string;
|
|
93
|
+
json: unknown | null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Matcher OPs: body is a typed pattern matcher (or null if no body). */
|
|
97
|
+
export interface FindStatement extends StatementBase<string[]> { op: "FIND"; body: MatcherBody | null; }
|
|
98
|
+
export interface ReadStatement extends StatementBase<string[]> { op: "READ"; body: MatcherBody | null; }
|
|
99
|
+
export interface ShowStatement extends StatementBase<string[]> { op: "SHOW"; body: MatcherBody | null; }
|
|
100
|
+
export interface HideStatement extends StatementBase<string[]> { op: "HIDE"; body: MatcherBody | null; }
|
|
101
|
+
|
|
102
|
+
/** EDIT body is arbitrary content (markdown, code, JSON, prose) — kept raw. */
|
|
103
|
+
export interface EditStatement extends StatementBase<string[]> { op: "EDIT"; body: string | null; }
|
|
104
|
+
|
|
105
|
+
/** COPY/MOVE body is the destination URI, parsed identically to the path slot. */
|
|
106
|
+
export interface CopyStatement extends StatementBase<string[]> { op: "COPY"; body: ParsedPath | null; }
|
|
107
|
+
export interface MoveStatement extends StatementBase<string[]> { op: "MOVE"; body: ParsedPath | null; }
|
|
108
|
+
|
|
109
|
+
/** SEND body: raw plus best-effort JSON parse. */
|
|
110
|
+
export interface SendStatement extends StatementBase<number> { op: "SEND"; body: SendBody | null; }
|
|
111
|
+
|
|
112
|
+
/** EXEC body is a command or code snippet — kept raw. */
|
|
113
|
+
export interface ExecStatement extends StatementBase<string> { op: "EXEC"; body: string | null; }
|
|
114
|
+
|
|
115
|
+
export type PlurnkStatement =
|
|
116
|
+
| FindStatement
|
|
117
|
+
| ReadStatement
|
|
118
|
+
| EditStatement
|
|
119
|
+
| CopyStatement
|
|
120
|
+
| MoveStatement
|
|
121
|
+
| ShowStatement
|
|
122
|
+
| HideStatement
|
|
123
|
+
| SendStatement
|
|
124
|
+
| ExecStatement;
|
|
125
|
+
|
|
126
|
+
const OPS: readonly PlurnkOp[] = [
|
|
127
|
+
"FIND", "READ", "EDIT", "COPY", "MOVE", "SHOW", "HIDE", "SEND", "EXEC",
|
|
128
|
+
];
|
|
129
|
+
|
|
130
|
+
const splitOpAndSuffix = (openTagText: string): { op: PlurnkOp; suffix: string } => {
|
|
131
|
+
const stripped = openTagText.slice(2);
|
|
132
|
+
for (const op of OPS) {
|
|
133
|
+
if (stripped.startsWith(op)) {
|
|
134
|
+
return { op, suffix: stripped.slice(op.length) };
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
throw new Error(`unrecognized OP in open tag: ${openTagText}`);
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const isDigit = (c: string | undefined): boolean => c !== undefined && c >= "0" && c <= "9";
|
|
141
|
+
|
|
142
|
+
const parseLineMarker = (text: string): LineMarker => {
|
|
143
|
+
const inner = text.slice(1, -1);
|
|
144
|
+
let i = 0;
|
|
145
|
+
if (inner[i] === "-") i++;
|
|
146
|
+
while (isDigit(inner[i])) i++;
|
|
147
|
+
const first = Number.parseInt(inner.slice(0, i), 10);
|
|
148
|
+
if (i >= inner.length) return { first, last: null };
|
|
149
|
+
i++;
|
|
150
|
+
const last = Number.parseInt(inner.slice(i), 10);
|
|
151
|
+
return { first, last };
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
const coerceSendSignal = (raw: string[] | null, pos: Position): number | null => {
|
|
155
|
+
if (raw === null) return null;
|
|
156
|
+
if (raw.length === 0) {
|
|
157
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", "SEND signal slot is present but empty");
|
|
158
|
+
}
|
|
159
|
+
if (raw.length > 1) {
|
|
160
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `SEND signal must be a single integer; got ${raw.length} values`);
|
|
161
|
+
}
|
|
162
|
+
const text = raw[0]!;
|
|
163
|
+
if (!/^-?\d+$/.test(text)) {
|
|
164
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `SEND signal must be an integer; got "${text}"`);
|
|
165
|
+
}
|
|
166
|
+
return Number.parseInt(text, 10);
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
const coerceExecSignal = (raw: string[] | null, pos: Position): string | null => {
|
|
170
|
+
if (raw === null) return null;
|
|
171
|
+
if (raw.length === 0) {
|
|
172
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", "EXEC signal slot is present but empty");
|
|
173
|
+
}
|
|
174
|
+
if (raw.length > 1) {
|
|
175
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `EXEC signal must be a single runtime tag; got ${raw.length} values`);
|
|
176
|
+
}
|
|
177
|
+
return raw[0]!;
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const SCHEME_PATTERN = /^[a-z][a-z0-9+.-]*:\/\//i;
|
|
181
|
+
|
|
182
|
+
const parsePath = (raw: string, pos: Position): ParsedPath | null => {
|
|
183
|
+
if (raw.length === 0) return null;
|
|
184
|
+
if (!SCHEME_PATTERN.test(raw)) {
|
|
185
|
+
return { kind: "local", raw };
|
|
186
|
+
}
|
|
187
|
+
let url: URL;
|
|
188
|
+
try {
|
|
189
|
+
url = new URL(raw);
|
|
190
|
+
} catch (e: any) {
|
|
191
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid URI in path: ${e?.message ?? raw}`);
|
|
192
|
+
}
|
|
193
|
+
const search: Record<string, string | string[]> = {};
|
|
194
|
+
for (const [key, value] of url.searchParams) {
|
|
195
|
+
const existing = search[key];
|
|
196
|
+
if (existing === undefined) {
|
|
197
|
+
search[key] = value;
|
|
198
|
+
} else if (Array.isArray(existing)) {
|
|
199
|
+
existing.push(value);
|
|
200
|
+
} else {
|
|
201
|
+
search[key] = [existing, value];
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return {
|
|
205
|
+
kind: "url",
|
|
206
|
+
raw,
|
|
207
|
+
scheme: url.protocol.replace(/:$/, ""),
|
|
208
|
+
username: url.username || null,
|
|
209
|
+
password: url.password || null,
|
|
210
|
+
hostname: url.hostname || null,
|
|
211
|
+
port: url.port ? Number.parseInt(url.port, 10) : null,
|
|
212
|
+
pathname: url.pathname,
|
|
213
|
+
search,
|
|
214
|
+
fragment: url.hash ? url.hash.slice(1) : null,
|
|
215
|
+
};
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
type MatcherDialect = "xpath" | "regex" | "jsonpath" | "glob";
|
|
219
|
+
|
|
220
|
+
const detectMatcherDialect = (body: string): MatcherDialect => {
|
|
221
|
+
if (body.startsWith("//")) return "xpath";
|
|
222
|
+
if (body.startsWith("/")) return "regex";
|
|
223
|
+
if (body.startsWith("$")) return "jsonpath";
|
|
224
|
+
return "glob";
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
const parseRegexLiteral = (body: string, pos: Position): { pattern: string; flags: string } => {
|
|
228
|
+
let i = 1;
|
|
229
|
+
while (i < body.length) {
|
|
230
|
+
if (body[i] === "\\") { i += 2; continue; }
|
|
231
|
+
if (body[i] === "/") break;
|
|
232
|
+
i++;
|
|
233
|
+
}
|
|
234
|
+
if (i >= body.length) {
|
|
235
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", "regex body missing closing /");
|
|
236
|
+
}
|
|
237
|
+
return { pattern: body.slice(1, i), flags: body.slice(i + 1) };
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
const parseMatcherBody = (body: string, pos: Position): MatcherBody => {
|
|
241
|
+
const dialect = detectMatcherDialect(body);
|
|
242
|
+
if (dialect === "regex") {
|
|
243
|
+
const { pattern, flags } = parseRegexLiteral(body, pos);
|
|
244
|
+
let regexp: RegExp;
|
|
245
|
+
try {
|
|
246
|
+
regexp = new RegExp(pattern, flags);
|
|
247
|
+
} catch (e: any) {
|
|
248
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid regex: ${e?.message ?? body}`);
|
|
249
|
+
}
|
|
250
|
+
return { dialect: "regex", raw: body, pattern, flags, regexp };
|
|
251
|
+
}
|
|
252
|
+
if (dialect === "xpath") {
|
|
253
|
+
try {
|
|
254
|
+
xpath.parse(body);
|
|
255
|
+
} catch (e: any) {
|
|
256
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid xpath: ${e?.message ?? body}`);
|
|
257
|
+
}
|
|
258
|
+
return { dialect: "xpath", raw: body };
|
|
259
|
+
}
|
|
260
|
+
if (dialect === "jsonpath") {
|
|
261
|
+
try {
|
|
262
|
+
JSONPath({ path: body, json: {} });
|
|
263
|
+
} catch (e: any) {
|
|
264
|
+
throw new PlurnkParseError(pos.line, pos.column, "visitor", `invalid jsonpath: ${e?.message ?? body}`);
|
|
265
|
+
}
|
|
266
|
+
return { dialect: "jsonpath", raw: body };
|
|
267
|
+
}
|
|
268
|
+
return { dialect: "glob", raw: body };
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
const parseSendBody = (raw: string): SendBody => {
|
|
272
|
+
let json: unknown | null = null;
|
|
273
|
+
try { json = JSON.parse(raw); } catch { /* best-effort: not all SEND bodies are JSON */ }
|
|
274
|
+
return { raw, json };
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
export const buildStatement = (ctx: StatementContext): PlurnkStatement => {
|
|
278
|
+
const openTagCtx = ctx.openTag();
|
|
279
|
+
const openTagText = openTagCtx.getText();
|
|
280
|
+
const { op, suffix } = splitOpAndSuffix(openTagText);
|
|
281
|
+
|
|
282
|
+
const start = ctx.start ?? openTagCtx.start;
|
|
283
|
+
const position: Position = {
|
|
284
|
+
line: start?.line ?? 0,
|
|
285
|
+
column: start?.column ?? 0,
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
const signalCtx = ctx.signal();
|
|
289
|
+
let rawSignal: string[] | null = null;
|
|
290
|
+
if (signalCtx) {
|
|
291
|
+
const text = signalCtx.SIGNAL_TEXT()?.getText() ?? "";
|
|
292
|
+
rawSignal = text.length > 0 ? text.split(",") : [];
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const pathCtx = ctx.path();
|
|
296
|
+
let rawPath: string | null = null;
|
|
297
|
+
if (pathCtx) {
|
|
298
|
+
rawPath = pathCtx.PATH_TEXT()?.getText() ?? "";
|
|
299
|
+
}
|
|
300
|
+
const path: ParsedPath | null = rawPath !== null ? parsePath(rawPath, position) : null;
|
|
301
|
+
|
|
302
|
+
const lineMarkerCtx = ctx.lineMarker();
|
|
303
|
+
let lineMarker: LineMarker | null = null;
|
|
304
|
+
if (lineMarkerCtx) {
|
|
305
|
+
const text = lineMarkerCtx.L_MARKER()?.getText() ?? "";
|
|
306
|
+
lineMarker = parseLineMarker(text);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const bodyCtx = ctx.body();
|
|
310
|
+
const rawBody: string | null = bodyCtx ? bodyCtx.getText() : null;
|
|
311
|
+
|
|
312
|
+
// Per-OP signal coercion.
|
|
313
|
+
let signal: string[] | number | string | null;
|
|
314
|
+
switch (op) {
|
|
315
|
+
case "SEND":
|
|
316
|
+
signal = coerceSendSignal(rawSignal, position);
|
|
317
|
+
break;
|
|
318
|
+
case "EXEC":
|
|
319
|
+
signal = coerceExecSignal(rawSignal, position);
|
|
320
|
+
break;
|
|
321
|
+
default:
|
|
322
|
+
signal = rawSignal;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Per-OP body shaping.
|
|
326
|
+
let body: MatcherBody | ParsedPath | SendBody | string | null;
|
|
327
|
+
switch (op) {
|
|
328
|
+
case "FIND":
|
|
329
|
+
case "READ":
|
|
330
|
+
case "SHOW":
|
|
331
|
+
case "HIDE":
|
|
332
|
+
body = rawBody !== null ? parseMatcherBody(rawBody, position) : null;
|
|
333
|
+
break;
|
|
334
|
+
case "COPY":
|
|
335
|
+
case "MOVE":
|
|
336
|
+
body = rawBody !== null ? parsePath(rawBody, position) : null;
|
|
337
|
+
break;
|
|
338
|
+
case "SEND":
|
|
339
|
+
body = rawBody !== null ? parseSendBody(rawBody) : null;
|
|
340
|
+
break;
|
|
341
|
+
case "EDIT":
|
|
342
|
+
case "EXEC":
|
|
343
|
+
body = rawBody;
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return { op, suffix, signal, path, lineMarker, body, position } as PlurnkStatement;
|
|
348
|
+
};
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DefaultErrorStrategy,
|
|
3
|
+
InputMismatchException,
|
|
4
|
+
NoViableAltException,
|
|
5
|
+
Token,
|
|
6
|
+
type Parser,
|
|
7
|
+
type RecognitionException,
|
|
8
|
+
} from "antlr4ng";
|
|
9
|
+
import { plurnkParser } from "./generated/plurnkParser.ts";
|
|
10
|
+
import { plurnkLexer } from "./generated/plurnkLexer.ts";
|
|
11
|
+
|
|
12
|
+
const LEXER_MODE_CONTEXT: Record<string, string> = {
|
|
13
|
+
DEFAULT_MODE: "between statements",
|
|
14
|
+
OPENED: "in statement header",
|
|
15
|
+
POST_SIGNAL: "in statement header",
|
|
16
|
+
POST_PATH: "in statement header",
|
|
17
|
+
POST_L: "in statement header",
|
|
18
|
+
SIGNAL: "in signal",
|
|
19
|
+
PATH: "in path",
|
|
20
|
+
BODY: "in body",
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const OFFENDING_CHAR_RE = /at: '([^']*)'$/;
|
|
24
|
+
|
|
25
|
+
const extractOffendingChar = (msg: string): string => {
|
|
26
|
+
const m = OFFENDING_CHAR_RE.exec(msg);
|
|
27
|
+
if (!m) return "input";
|
|
28
|
+
const text = m[1];
|
|
29
|
+
return text === "" ? "end of input" : `'${text}'`;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export const translateLexerMessage = (lexer: plurnkLexer, originalMsg: string): string => {
|
|
33
|
+
const modeName = lexer.modeNames[lexer.mode] ?? "DEFAULT_MODE";
|
|
34
|
+
const context = LEXER_MODE_CONTEXT[modeName] ?? "between statements";
|
|
35
|
+
const ch = extractOffendingChar(originalMsg);
|
|
36
|
+
return `unrecognized character ${ch} ${context}`;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const SLOT_BY_TOKEN: Record<number, string> = {
|
|
40
|
+
[plurnkParser.OPEN_FIND]: "open tag",
|
|
41
|
+
[plurnkParser.OPEN_READ]: "open tag",
|
|
42
|
+
[plurnkParser.OPEN_EDIT]: "open tag",
|
|
43
|
+
[plurnkParser.OPEN_COPY]: "open tag",
|
|
44
|
+
[plurnkParser.OPEN_MOVE]: "open tag",
|
|
45
|
+
[plurnkParser.OPEN_SHOW]: "open tag",
|
|
46
|
+
[plurnkParser.OPEN_HIDE]: "open tag",
|
|
47
|
+
[plurnkParser.OPEN_SEND]: "open tag",
|
|
48
|
+
[plurnkParser.OPEN_EXEC]: "open tag",
|
|
49
|
+
[plurnkParser.LBRACKET]: "'['",
|
|
50
|
+
[plurnkParser.RBRACKET]: "']'",
|
|
51
|
+
[plurnkParser.LPAREN]: "'('",
|
|
52
|
+
[plurnkParser.RPAREN]: "')'",
|
|
53
|
+
[plurnkParser.L_MARKER]: "line marker",
|
|
54
|
+
[plurnkParser.COLON]: "':'",
|
|
55
|
+
[plurnkParser.SIGNAL_TEXT]: "signal content",
|
|
56
|
+
[plurnkParser.PATH_TEXT]: "path content",
|
|
57
|
+
[plurnkParser.BODY_TEXT]: "body content",
|
|
58
|
+
[plurnkParser.CLOSE_TAG]: "close tag",
|
|
59
|
+
[plurnkParser.TEXT]: "text between statements",
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const describeToken = (tok: Token | null): string => {
|
|
63
|
+
if (!tok || tok.type === Token.EOF) return "end of input";
|
|
64
|
+
const slot = SLOT_BY_TOKEN[tok.type];
|
|
65
|
+
if (slot) return slot;
|
|
66
|
+
const text = tok.text ?? "";
|
|
67
|
+
return text.length > 0 ? `'${text}'` : "input";
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const describeExpected = (
|
|
71
|
+
_parser: Parser,
|
|
72
|
+
e: RecognitionException,
|
|
73
|
+
): string | null => {
|
|
74
|
+
const expected = e.getExpectedTokens();
|
|
75
|
+
if (!expected) return null;
|
|
76
|
+
const types: number[] = expected.toArray();
|
|
77
|
+
if (types.length === 0) return null;
|
|
78
|
+
const names = types
|
|
79
|
+
.map((t) => SLOT_BY_TOKEN[t])
|
|
80
|
+
.filter((s): s is string => Boolean(s));
|
|
81
|
+
if (names.length === 0) return null;
|
|
82
|
+
if (names.length === 1) return names[0];
|
|
83
|
+
if (names.length === 2) return `${names[0]} or ${names[1]}`;
|
|
84
|
+
return `${names.slice(0, -1).join(", ")}, or ${names[names.length - 1]}`;
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
export class PlurnkErrorStrategy extends DefaultErrorStrategy {
|
|
88
|
+
public override reportError(recognizer: Parser, e: RecognitionException): void {
|
|
89
|
+
if (this.inErrorRecoveryMode(recognizer)) return;
|
|
90
|
+
this.beginErrorCondition(recognizer);
|
|
91
|
+
|
|
92
|
+
const got = describeToken(e.offendingToken);
|
|
93
|
+
const expected = describeExpected(recognizer, e);
|
|
94
|
+
|
|
95
|
+
let msg: string;
|
|
96
|
+
if (e instanceof InputMismatchException || e instanceof NoViableAltException) {
|
|
97
|
+
msg = expected ? `unexpected ${got}; expected ${expected}` : `unexpected ${got}`;
|
|
98
|
+
} else {
|
|
99
|
+
msg = `unexpected ${got}`;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
recognizer.notifyErrorListeners(msg, e.offendingToken, e);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
public override reportMissingToken(recognizer: Parser): void {
|
|
106
|
+
if (this.inErrorRecoveryMode(recognizer)) return;
|
|
107
|
+
this.beginErrorCondition(recognizer);
|
|
108
|
+
const tok = recognizer.getCurrentToken();
|
|
109
|
+
const expectedTokens = this.getExpectedTokens(recognizer);
|
|
110
|
+
const expectedNames = expectedTokens
|
|
111
|
+
.toArray()
|
|
112
|
+
.map((t) => SLOT_BY_TOKEN[t])
|
|
113
|
+
.filter((s): s is string => Boolean(s));
|
|
114
|
+
const expected = expectedNames.length > 0
|
|
115
|
+
? (expectedNames.length === 1 ? expectedNames[0] : expectedNames.join(" or "))
|
|
116
|
+
: "more input";
|
|
117
|
+
const got = describeToken(tok);
|
|
118
|
+
const msg = `expected ${expected}; got ${got}`;
|
|
119
|
+
recognizer.notifyErrorListeners(msg, tok, null);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
public override reportUnwantedToken(recognizer: Parser): void {
|
|
123
|
+
if (this.inErrorRecoveryMode(recognizer)) return;
|
|
124
|
+
this.beginErrorCondition(recognizer);
|
|
125
|
+
const tok = recognizer.getCurrentToken();
|
|
126
|
+
const got = describeToken(tok);
|
|
127
|
+
const expectedTokens = this.getExpectedTokens(recognizer);
|
|
128
|
+
const expectedNames = expectedTokens
|
|
129
|
+
.toArray()
|
|
130
|
+
.map((t) => SLOT_BY_TOKEN[t])
|
|
131
|
+
.filter((s): s is string => Boolean(s));
|
|
132
|
+
const expected = expectedNames.length > 0
|
|
133
|
+
? (expectedNames.length === 1 ? expectedNames[0] : expectedNames.join(" or "))
|
|
134
|
+
: null;
|
|
135
|
+
const msg = expected
|
|
136
|
+
? `unexpected ${got}; expected ${expected}`
|
|
137
|
+
: `unexpected ${got}`;
|
|
138
|
+
recognizer.notifyErrorListeners(msg, tok, null);
|
|
139
|
+
}
|
|
140
|
+
}
|
package/src/errors.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export type ErrorSource = "lexer" | "parser" | "visitor";
|
|
2
|
+
|
|
3
|
+
export class PlurnkParseError extends Error {
|
|
4
|
+
readonly line: number;
|
|
5
|
+
readonly column: number;
|
|
6
|
+
readonly source: ErrorSource;
|
|
7
|
+
|
|
8
|
+
constructor(line: number, column: number, source: ErrorSource, message: string) {
|
|
9
|
+
super(`Plurnk ${source} error at ${line}:${column} — ${message}`);
|
|
10
|
+
this.name = "PlurnkParseError";
|
|
11
|
+
this.line = line;
|
|
12
|
+
this.column = column;
|
|
13
|
+
this.source = source;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/** JSON serialization — `JSON.stringify` picks this up automatically. */
|
|
17
|
+
toJSON(): { line: number; column: number; source: ErrorSource; message: string } {
|
|
18
|
+
return {
|
|
19
|
+
line: this.line,
|
|
20
|
+
column: this.column,
|
|
21
|
+
source: this.source,
|
|
22
|
+
message: this.message,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|