rewritable 0.3.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/create.mjs ADDED
@@ -0,0 +1,256 @@
1
+ // `rwa create <task…>` (and the `draft` alias) — scaffold + agent-fill into a
2
+ // SELF-CONTAINED rewritable in one shot (design 2026-05-31 §4). The task is a CLI
3
+ // INPUT, never a file capability: the CLI bakes the generated content (and any
4
+ // --data) into the INLINE_DOC snapshot, and the emitted file is thereafter an
5
+ // ordinary, dependency-free rewritable. Recurrence = re-run the CLI.
6
+ //
7
+ // Pipeline (§4.6): scaffold in memory → runAgentLoop (authoring) → apply the
8
+ // envelope to a temp file → assertSelfContained → write ONCE atomically. Nothing
9
+ // is written to the destination unless the whole pipeline succeeds, so a failed
10
+ // run never leaves a half-baked file on disk.
11
+
12
+ import crypto from 'node:crypto';
13
+ import path from 'node:path';
14
+ import { readFile, rm, stat } from 'node:fs/promises';
15
+ import { tmpdir } from 'node:os';
16
+ import { loadSeed, applySeedSubs, replaceInlineDoc, extractInlineDoc, kindOverrides, KNOWN_KINDS } from './seed.mjs';
17
+ import { resolveBareWord } from './template.mjs';
18
+ import { extractFromSeed } from './seed-extract.mjs';
19
+ import { runAgentLoop } from './agent-loop.mjs';
20
+ import { applyPlan, CliError } from './edit.mjs';
21
+ import { assertSelfContained } from './self-contained.mjs';
22
+ import { findFrozenZones } from './apply-edits.mjs';
23
+ import { resolveApiKey, envBaseUrl } from './backend.mjs';
24
+ import { atomicWrite } from './atomic-write.mjs';
25
+
26
+ // Hard cap on --data baked into the snapshot. The dataset lands inside INLINE_DOC
27
+ // of a single self-contained file the user will ship; an unbounded paste would
28
+ // bloat the artifact and the model context. Over the cap → fail loud (§4.3).
29
+ const DATA_CAP = 200_000;
30
+
31
+ const VALUE_FLAGS = new Set([
32
+ '--kind', '--from', '--data', '--out',
33
+ '--backend', '--model', '--base-url', '--api-key',
34
+ ]);
35
+
36
+ /**
37
+ * Parse `rwa create` argv into flags + the positional task words. Pure: no IO,
38
+ * no kind resolution (that is resolveBareWord's job, §4.2 Stage 1). A value
39
+ * flag's argument is never collected as a task word.
40
+ *
41
+ * @param {string[]} argv — args after the `create`/`draft` verb
42
+ * @returns {{kind:string|null, from:string|null, data:string|null, out:string|null,
43
+ * force:boolean, open:boolean,
44
+ * backend:{name:string|null, model:string|null, baseUrl:string|null, apiKey:string|null},
45
+ * words:string[]}}
46
+ */
47
+ export function parseCreateArgs(argv) {
48
+ const get = (name) => {
49
+ const i = argv.indexOf(name);
50
+ return i >= 0 ? (argv[i + 1] ?? null) : null;
51
+ };
52
+ const words = argv.filter((a, i) => {
53
+ if (a.startsWith('-')) return false; // a flag itself
54
+ if (VALUE_FLAGS.has(argv[i - 1])) return false; // a value-flag's argument
55
+ return true;
56
+ });
57
+ return {
58
+ kind: get('--kind'),
59
+ from: get('--from'),
60
+ data: get('--data'),
61
+ out: get('--out'),
62
+ force: argv.includes('--force') || argv.includes('-f'),
63
+ open: argv.includes('--open') || argv.includes('-o'),
64
+ backend: {
65
+ name: get('--backend'),
66
+ model: get('--model'),
67
+ baseUrl: get('--base-url'),
68
+ apiKey: get('--api-key'),
69
+ },
70
+ words,
71
+ };
72
+ }
73
+
74
+ // The create-only generation contract (design §4.5): output must run with ZERO
75
+ // external runtime dependencies. Appended to whichever per-kind system prompt the
76
+ // resolved frame selects — this is CLI-exclusive framing, never shipped in the
77
+ // seed bytes. assertSelfContained (below) is the code-level tripwire behind it.
78
+ const SELF_CONTAINMENT_DIRECTIVE = `
79
+
80
+ CRITICAL — the document you produce MUST be fully self-contained and run with NO external runtime dependencies:
81
+ - Do NOT reference any external URL: no <script src=...> to a CDN, no <link href=...> stylesheet, no remote <img>, no @import or url() pointing off-document. Everything is inlined.
82
+ - For any chart/graph/visualization, hand-roll it with inline <svg> or <canvas> + plain JavaScript. Do NOT use D3, Chart.js, or any library.
83
+ - Embed every piece of data directly in the document (e.g. a <script type="application/json"> island or a JS const). Never fetch data at runtime.
84
+ - Produce the COMPLETE document for the request — this is authoring from a starter, so a wholesale replace_document is appropriate.`;
85
+
86
+ // Mirror of commands.mjs titleFromBasename (kept local — create.mjs is a peer of
87
+ // commands.mjs, not a dependent). Filename → Title Case, with a safe fallback.
88
+ function titleFromBasename(basename) {
89
+ return basename
90
+ .replace(/[-_]+/g, ' ')
91
+ .split(' ')
92
+ .filter(Boolean)
93
+ .map(w => w[0].toUpperCase() + w.slice(1))
94
+ .join(' ') || 'Untitled';
95
+ }
96
+
97
+ function rel(p, cwd) {
98
+ const r = path.relative(cwd, p);
99
+ return r || p;
100
+ }
101
+
102
+ // Resolve the creation FRAME — kind + scaffold body + the brief words — from the
103
+ // parsed args (design §4.2). --kind wins and disables leading-word detection; an
104
+ // explicit kind keeps the full word list as the brief. Otherwise the leading word
105
+ // is matched template-first via resolveBareWord (the SAME resolver `rwa new` uses,
106
+ // so the two surfaces never diverge), and that word is consumed from the brief.
107
+ // A silent frame (no kind, no template match) defaults to document with the whole
108
+ // word list as the brief — Stage 2 model inference is deferred to v2 (§9.2).
109
+ async function resolveFrame(parsed, cwd) {
110
+ if (parsed.kind) {
111
+ if (!KNOWN_KINDS.includes(parsed.kind)) {
112
+ throw new CliError(1, 'unknown_kind', { kind: parsed.kind, known: KNOWN_KINDS });
113
+ }
114
+ return { kind: parsed.kind, scaffoldBody: kindOverrides(parsed.kind).body, briefWords: parsed.words, fromMsg: '' };
115
+ }
116
+ const lead = parsed.words[0];
117
+ const frame = lead ? await resolveBareWord(lead, cwd) : null;
118
+ if (frame && frame.source === 'template') {
119
+ return {
120
+ kind: 'document', // a cloned instance is a document
121
+ scaffoldBody: frame.body, // already label-stripped by resolveBareWord
122
+ briefWords: parsed.words.slice(1),
123
+ fromMsg: ` (from template ${rel(frame.templatePath, cwd)})`,
124
+ };
125
+ }
126
+ if (frame && frame.source === 'kind') {
127
+ return { kind: frame.kind, scaffoldBody: kindOverrides(frame.kind).body, briefWords: parsed.words.slice(1), fromMsg: '' };
128
+ }
129
+ return { kind: 'document', scaffoldBody: null, briefWords: parsed.words, fromMsg: '' };
130
+ }
131
+
132
+ /**
133
+ * `rwa create` / `rwa draft`: scaffold a fresh container, drive the agent loop to
134
+ * author it, validate self-containment, and write ONCE — atomically. The emitted
135
+ * file is an ordinary self-contained rewritable; the task left no capability in it.
136
+ *
137
+ * @param {object} parsed — parseCreateArgs output
138
+ * @param {object} opts
139
+ * @param {string[]} opts.seedCandidates — seed search paths (loadSeed order)
140
+ * @param {string} [opts.cwd] — base dir for relative paths + template scan
141
+ * @param {string} [opts.stdinData] — content for `--data -` (caller drains stdin)
142
+ * @returns {Promise<{out:string, kind:string, fromMsg:string}>}
143
+ * @throws {CliError} exit 1 usage / 2 file / 3 envelope / 4 agent
144
+ */
145
+ export async function createCmd(parsed, { seedCandidates, cwd = process.cwd(), stdinData } = {}) {
146
+ let { kind, scaffoldBody, briefWords, fromMsg } = await resolveFrame(parsed, cwd);
147
+
148
+ // --from: base the artifact on an existing rewritable's editable body. Reuses
149
+ // the same exit-2 surface (not_found / not_a_rewritable) as the rest of the CLI.
150
+ if (parsed.from) {
151
+ const fromPath = path.resolve(cwd, parsed.from);
152
+ let fromText;
153
+ try {
154
+ fromText = await readFile(fromPath, 'utf8');
155
+ } catch (e) {
156
+ if (e && e.code === 'ENOENT') throw new CliError(2, 'not_found', { path: fromPath });
157
+ throw new CliError(2, 'read_error', { path: fromPath, errno: e && e.code, message: e && e.message });
158
+ }
159
+ try {
160
+ scaffoldBody = extractInlineDoc(fromText);
161
+ } catch {
162
+ throw new CliError(2, 'not_a_rewritable', { path: fromPath });
163
+ }
164
+ fromMsg = ` (from ${rel(fromPath, cwd)})`;
165
+ }
166
+
167
+ // --data: read the dataset to bake into the brief. `-` reads stdin (drained by
168
+ // the caller). Never fetched at runtime; embedded inline by the agent (§4.3).
169
+ let dataContent = null;
170
+ if (parsed.data === '-') {
171
+ dataContent = stdinData == null ? '' : stdinData;
172
+ } else if (parsed.data) {
173
+ const dataPath = path.resolve(cwd, parsed.data);
174
+ try {
175
+ dataContent = await readFile(dataPath, 'utf8');
176
+ } catch (e) {
177
+ if (e && e.code === 'ENOENT') throw new CliError(2, 'not_found', { path: dataPath });
178
+ throw new CliError(2, 'read_error', { path: dataPath, errno: e && e.code, message: e && e.message });
179
+ }
180
+ }
181
+ if (dataContent != null && dataContent.length > DATA_CAP) {
182
+ throw new CliError(1, 'data_too_large', { bytes: dataContent.length, cap: DATA_CAP });
183
+ }
184
+
185
+ // Output path + clobber guard (matches new/import's --force semantics).
186
+ const dated = `./${kind}-${new Date().toISOString().slice(0, 10)}.html`;
187
+ const out = path.resolve(cwd, parsed.out || dated);
188
+ try {
189
+ await stat(out);
190
+ if (!parsed.force) throw new CliError(2, 'dest_exists', { path: out });
191
+ } catch (e) {
192
+ if (e instanceof CliError) throw e;
193
+ // ENOENT is the happy path (file doesn't exist yet); anything else is a real
194
+ // stat error worth surfacing.
195
+ if (!(e && e.code === 'ENOENT')) throw new CliError(2, 'read_error', { path: out, errno: e && e.code });
196
+ }
197
+
198
+ // Build the scaffold in memory — identical subs flow to newCmd.
199
+ const seed = await loadSeed(seedCandidates);
200
+ const overrides = kindOverrides(kind);
201
+ let scaffold = applySeedSubs(seed, {
202
+ uuid: crypto.randomUUID(),
203
+ title: titleFromBasename(path.basename(out, path.extname(out))),
204
+ fileMeta: path.basename(out),
205
+ lensPlaceholder: overrides.lensPlaceholder,
206
+ palPlaceholder: overrides.palPlaceholder,
207
+ productHeader: overrides.productHeader,
208
+ productKind: kind,
209
+ lensClickToAnchor: overrides.lensClickToAnchor,
210
+ });
211
+ const body = scaffoldBody != null ? scaffoldBody : overrides.body;
212
+ if (body != null) scaffold = replaceInlineDoc(scaffold, body);
213
+ const scaffoldDoc = extractInlineDoc(scaffold);
214
+
215
+ // Backend: flag → env → default. The key is used ONLY for the model call here;
216
+ // it is never written into the artifact (the file carries content, not creds).
217
+ const backendName = parsed.backend.name || process.env.RWA_BACKEND || 'openrouter';
218
+ const backend = {
219
+ baseUrl: parsed.backend.baseUrl || envBaseUrl(backendName),
220
+ model: parsed.backend.model || process.env.RWA_MODEL || 'google/gemini-3.5-flash',
221
+ apiKey: resolveApiKey(backendName, parsed.backend.apiKey),
222
+ };
223
+
224
+ // Per-kind system prompt + the create-only self-containment directive; the brief
225
+ // carries any --data inline so the agent embeds it (never fetches).
226
+ const { SYSTEM_PROMPTS, TOOL_SCHEMAS } = extractFromSeed(seed);
227
+ const systemPrompt = (SYSTEM_PROMPTS[kind] || SYSTEM_PROMPTS.document) + SELF_CONTAINMENT_DIRECTIVE;
228
+ const frozenZoneNames = findFrozenZones(scaffoldDoc).map(z => z.name);
229
+ let instruction = briefWords.join(' ').trim() || `Author a complete ${kind} for this document.`;
230
+ if (dataContent != null) {
231
+ instruction += `\n\nUse this data — embed it inline in the document, do NOT fetch it at runtime:\n${dataContent}`;
232
+ }
233
+
234
+ let result;
235
+ try {
236
+ result = await runAgentLoop({ systemPrompt, toolSchemas: TOOL_SCHEMAS, currentDoc: scaffoldDoc, instruction, frozenZoneNames, backend });
237
+ } catch (e) {
238
+ throw new CliError(4, e.subcode || 'agent_error', e.details || { message: e && e.message });
239
+ }
240
+
241
+ // Atomicity (§4.6): apply + validate against a TEMP file, never the destination.
242
+ // The destination is written exactly once, only after self-containment passes —
243
+ // so any failure (envelope, frozen-zone, external-ref) leaves --out untouched.
244
+ const tmp = path.join(tmpdir(), `rwa-create-${crypto.randomUUID()}.html`);
245
+ try {
246
+ await atomicWrite(tmp, scaffold);
247
+ await applyPlan(tmp, result.envelope); // throws CliError on envelope/frozen issues
248
+ const filled = await readFile(tmp, 'utf8');
249
+ assertSelfContained(extractInlineDoc(filled)); // throws CliError(4) → out untouched
250
+ await atomicWrite(out, filled);
251
+ } finally {
252
+ await rm(tmp, { force: true });
253
+ }
254
+
255
+ return { out, kind, fromMsg };
256
+ }
package/src/doc.mjs ADDED
@@ -0,0 +1,69 @@
1
+ // Read-path entry for `rwa doc` — the counterpart to `rwa edit`'s applyPlan.
2
+ // Where applyPlan WRITES the editable body of a rewritable, inspectDoc READS
3
+ // it: it returns the exact LF-canonical text the rwa-edit contract operates
4
+ // on, plus the metadata an agent needs to edit safely (uuid, product kind,
5
+ // frozen-zone names).
6
+ //
7
+ // Error surface mirrors edit.mjs so callers dedupe file-error handling across
8
+ // read and write:
9
+ // exitCode 2 / subcode: 'not_found', 'read_error', 'not_a_rewritable'
10
+
11
+ import { readFile } from 'node:fs/promises';
12
+ import { extractInlineDoc } from './seed.mjs';
13
+ import { findFrozenZones } from './apply-edits.mjs';
14
+ import { resolveSelfDescription } from './identity.mjs';
15
+ import { CliError } from './edit.mjs';
16
+
17
+ // The bootstrap bakes both consts at emit time (cli/src/seed.mjs applySeedSubs).
18
+ // Reading them back is how we recover identity (uuid) and editing framing
19
+ // (kind) without a full HTML parse. Patterns mirror seed.mjs UUID_RE /
20
+ // PRODUCT_KIND_RE and rwa.mjs detectProductKind — keep them in step.
21
+ const UUID_RE = /const DOC_UUID = '([0-9a-f-]{36})';/;
22
+ const PRODUCT_KIND_RE = /const PRODUCT_KIND = '([^']*)';/;
23
+
24
+ /**
25
+ * Read a rewritable's editable document body, contract metadata, and the
26
+ * `self-description/1` projection (the "what is this?" surface, computed from the
27
+ * bytes — kind/affordances/title/blocks/baseline). The projection applies the
28
+ * v1.1 precedence (declared > static): a trustworthy embedded #rwa-affordances
29
+ * declaration (edit-unreachable) wins over the kind-template guess
30
+ * (`source:"declared"`); otherwise the static kind-derived projection
31
+ * (`source:"static"`). No `live` block (the CLI executes no JS). See
32
+ * ./identity.mjs and docs/specs/rwa-self-description-spec.md §3.1.
33
+ *
34
+ * @param {string} filePath — path to the target .html
35
+ * @returns {Promise<{doc: string, uuid: string|null, kind: string, frozenZones: string[], self: object}>}
36
+ * @throws {CliError} exitCode 2 on file / non-rewritable errors
37
+ */
38
+ export async function inspectDoc(filePath) {
39
+ let fileText;
40
+ try {
41
+ fileText = await readFile(filePath, 'utf8');
42
+ } catch (e) {
43
+ if (e && e.code === 'ENOENT') throw new CliError(2, 'not_found', { path: filePath });
44
+ throw new CliError(2, 'read_error', { path: filePath, errno: e && e.code, message: e && e.message });
45
+ }
46
+
47
+ // A plain-text or non-rewritable target throws here — the same gate `rwa
48
+ // edit` uses. Surfacing it as not_a_rewritable gives agents a deterministic
49
+ // "is this a rewritable?" probe (clean non-zero exit, empty stdout).
50
+ let doc;
51
+ try {
52
+ doc = extractInlineDoc(fileText);
53
+ } catch (_e) {
54
+ throw new CliError(2, 'not_a_rewritable', { path: filePath });
55
+ }
56
+
57
+ const uuid = (fileText.match(UUID_RE) || [])[1] || null;
58
+ // Pre-PRODUCT_KIND containers (and any unknown kind) default to 'document',
59
+ // matching how the runtime and `rwa edit` resolve SYSTEM_PROMPTS.
60
+ const kind = (fileText.match(PRODUCT_KIND_RE) || [])[1] || 'document';
61
+ const frozenZones = findFrozenZones(doc).map(z => z.name);
62
+ // The self-description/1 projection — "what is this, what can be done with it".
63
+ // resolveSelfDescription applies the v1.1 precedence (declared > static): a
64
+ // trustworthy embedded #rwa-affordances declaration (edit-unreachable) wins over
65
+ // the kind-template guess; otherwise the static kind-derived projection.
66
+ const self = resolveSelfDescription({ fileText, doc, uuid, kind, frozenZones });
67
+
68
+ return { doc, uuid, kind, frozenZones, self };
69
+ }
@@ -0,0 +1,357 @@
1
+ // rwa-edit-dsl/1 compiler — turns a DSL plan into an apply_edits or
2
+ // replace_document envelope. Read alongside rwa-edit-dsl-spec.md.
3
+ //
4
+ // Compile-down semantics:
5
+ // - replace_document plan → { tool: 'replace_document', envelope }
6
+ // - any other plan → { tool: 'apply_edits', envelope: { version, edits } }
7
+ //
8
+ // Multi-op plans apply sequentially against an evolving "shadow" doc — each
9
+ // op's anchor is resolved against the doc as it would look after preceding
10
+ // ops landed. The shadow is internal to compilation; the emitted edits are
11
+ // applied sequentially by the runtime in the same order, so every emitted
12
+ // `find` matches in turn.
13
+
14
+ const SUPPORTED_VERSION = 'rwa-edit-dsl/1';
15
+
16
+ class DslCompileError extends Error {
17
+ constructor(code, message, op) {
18
+ super(message);
19
+ this.code = code;
20
+ this.op = op;
21
+ }
22
+ }
23
+
24
+ function makeError(code, message, op) {
25
+ return new DslCompileError(code, message, op);
26
+ }
27
+
28
+ /**
29
+ * Compile a DSL plan against a doc. Returns:
30
+ * { tool: 'apply_edits' | 'replace_document', envelope: <rwa-edit/1 envelope> }
31
+ *
32
+ * Throws DslCompileError on any spec violation.
33
+ */
34
+ export function compileDslPlan(plan, doc) {
35
+ if (!plan || typeof plan !== 'object') {
36
+ throw makeError('op_malformed', 'plan must be an object');
37
+ }
38
+ if (plan.version !== SUPPORTED_VERSION) {
39
+ throw makeError('version_unsupported', `expected ${SUPPORTED_VERSION}, got ${plan.version}`);
40
+ }
41
+ if (!Array.isArray(plan.ops) || plan.ops.length === 0) {
42
+ throw makeError('op_malformed', 'plan.ops must be a non-empty array');
43
+ }
44
+
45
+ // replace_document is a sole-op escape hatch.
46
+ const hasReplaceDoc = plan.ops.some(op => op?.op === 'replace_document');
47
+ if (hasReplaceDoc) {
48
+ if (plan.ops.length !== 1) {
49
+ throw makeError('op_malformed', 'replace_document must be the sole op in a plan');
50
+ }
51
+ const op = plan.ops[0];
52
+ if (typeof op.doc !== 'string' || typeof op.reason !== 'string') {
53
+ throw makeError('op_malformed', 'replace_document requires doc and reason fields');
54
+ }
55
+ return {
56
+ tool: 'replace_document',
57
+ envelope: { version: 'rwa-edit/1', doc: op.doc, reason: op.reason },
58
+ };
59
+ }
60
+
61
+ // Otherwise: compile each op against an evolving shadow.
62
+ let shadow = doc;
63
+ const edits = [];
64
+ for (const op of plan.ops) {
65
+ const newEdits = compileOp(op, shadow);
66
+ for (const e of newEdits) {
67
+ validateEditApplies(shadow, e, op);
68
+ edits.push(e);
69
+ shadow = applyEditToShadow(shadow, e);
70
+ }
71
+ }
72
+ return {
73
+ tool: 'apply_edits',
74
+ envelope: { version: 'rwa-edit/1', edits },
75
+ };
76
+ }
77
+
78
+ function compileOp(op, doc) {
79
+ if (!op || typeof op !== 'object' || typeof op.op !== 'string') {
80
+ throw makeError('op_malformed', 'each op must be an object with a string `op` field');
81
+ }
82
+ switch (op.op) {
83
+ case 'replace': return compileReplace(op, doc);
84
+ case 'insert': return compileInsert(op, doc);
85
+ case 'delete': return compileDelete(op, doc);
86
+ case 'set_attr': return compileSetAttr(op, doc);
87
+ default: throw makeError('op_unknown', `unknown op: ${op.op}`, op);
88
+ }
89
+ }
90
+
91
+ // ---------- replace ----------
92
+
93
+ function compileReplace(op, doc) {
94
+ const { find, replace, region, all } = op;
95
+ if (typeof find !== 'string' || typeof replace !== 'string') {
96
+ throw makeError('op_malformed', 'replace requires `find` and `replace` strings', op);
97
+ }
98
+
99
+ let windowStart = 0;
100
+ let windowEnd = doc.length;
101
+ if (typeof region === 'string') {
102
+ const matches = allOccurrences(doc, region);
103
+ if (matches.length === 0) throw makeError('region_not_found', `region not found: ${preview(region)}`, op);
104
+ if (matches.length > 1) throw makeError('region_not_unique', `region matches ${matches.length} times`, op);
105
+ windowStart = matches[0];
106
+ windowEnd = matches[0] + region.length;
107
+ }
108
+
109
+ const window = doc.slice(windowStart, windowEnd);
110
+ const localOccs = allOccurrences(window, find);
111
+ if (localOccs.length === 0) {
112
+ throw makeError(all ? 'all_with_zero_matches' : 'op_malformed', `find has zero matches in search window: ${preview(find)}`, op);
113
+ }
114
+ if (!all && localOccs.length > 1) {
115
+ throw makeError('op_malformed', `find has ${localOccs.length} matches in search window but all=false: ${preview(find)}`, op);
116
+ }
117
+
118
+ // For all=false, single match in window. If find is also globally unique, emit raw.
119
+ // Otherwise contextualize using surrounding doc bytes.
120
+ if (!all) {
121
+ const globalOccs = allOccurrences(doc, find);
122
+ if (globalOccs.length === 1) {
123
+ return [{ find, replace }];
124
+ }
125
+ // Disambiguate with surrounding context drawn from the window.
126
+ const absoluteStart = windowStart + localOccs[0];
127
+ return [contextualizeEdit(doc, absoluteStart, find, replace)];
128
+ }
129
+
130
+ // all=true: emit one edit per local occurrence, contextualized.
131
+ return localOccs.map(localStart => {
132
+ const absoluteStart = windowStart + localStart;
133
+ return contextualizeEdit(doc, absoluteStart, find, replace);
134
+ });
135
+ }
136
+
137
+ // Extend find/replace bytes outward until find is uniquely locatable in doc.
138
+ // We extend backward by 1 char at a time then forward, alternating, until
139
+ // the candidate find appears exactly once in doc.
140
+ function contextualizeEdit(doc, absoluteStart, find, replace) {
141
+ const findEnd = absoluteStart + find.length;
142
+ let preLen = 0, postLen = 0;
143
+ // Bound: at most extend 200 chars in each direction. Most disambiguations
144
+ // need <20; 200 is a sanity cap.
145
+ const MAX = 200;
146
+ while (true) {
147
+ const ctxFind = doc.slice(absoluteStart - preLen, findEnd + postLen);
148
+ const ctxReplace = doc.slice(absoluteStart - preLen, absoluteStart) + replace + doc.slice(findEnd, findEnd + postLen);
149
+ const occs = allOccurrences(doc, ctxFind);
150
+ if (occs.length === 1) {
151
+ return { find: ctxFind, replace: ctxReplace };
152
+ }
153
+ if (preLen >= MAX && postLen >= MAX) {
154
+ throw makeError('op_malformed', `unable to disambiguate find within ${MAX} chars: ${preview(find)}`);
155
+ }
156
+ if (postLen <= preLen && findEnd + postLen < doc.length) postLen++;
157
+ else if (absoluteStart - preLen > 0) preLen++;
158
+ else postLen++;
159
+ }
160
+ }
161
+
162
+ // ---------- insert ----------
163
+
164
+ function compileInsert(op, doc) {
165
+ const { content, after, before } = op;
166
+ if (typeof content !== 'string') {
167
+ throw makeError('op_malformed', 'insert requires `content` string', op);
168
+ }
169
+ const positionalCount = (typeof after === 'string' ? 1 : 0) + (typeof before === 'string' ? 1 : 0);
170
+ if (positionalCount !== 1) {
171
+ throw makeError('op_malformed', 'insert requires exactly one of `after` or `before`', op);
172
+ }
173
+ const anchor = typeof after === 'string' ? after : before;
174
+ const occs = allOccurrences(doc, anchor);
175
+ if (occs.length === 0) throw makeError('op_malformed', `insert anchor not found: ${preview(anchor)}`, op);
176
+ if (occs.length > 1) throw makeError('op_malformed', `insert anchor not unique: ${preview(anchor)} (${occs.length} matches)`, op);
177
+ if (typeof after === 'string') {
178
+ return [{ find: anchor, replace: anchor + content }];
179
+ }
180
+ return [{ find: anchor, replace: content + anchor }];
181
+ }
182
+
183
+ // ---------- delete ----------
184
+
185
+ function compileDelete(op, doc) {
186
+ const { target } = op;
187
+ if (typeof target !== 'string') {
188
+ throw makeError('op_malformed', 'delete requires `target` string', op);
189
+ }
190
+ const occs = allOccurrences(doc, target);
191
+ if (occs.length === 0) throw makeError('op_malformed', `delete target not found: ${preview(target)}`, op);
192
+ if (occs.length > 1) throw makeError('op_malformed', `delete target not unique: ${preview(target)} (${occs.length} matches)`, op);
193
+ return [{ find: target, replace: '' }];
194
+ }
195
+
196
+ // ---------- set_attr ----------
197
+
198
+ function compileSetAttr(op, doc) {
199
+ const { anchor, attr, value } = op;
200
+ if (typeof anchor !== 'string' || typeof attr !== 'string' || typeof value !== 'string') {
201
+ throw makeError('op_malformed', 'set_attr requires anchor, attr, value strings', op);
202
+ }
203
+ if (!anchor.startsWith('<')) {
204
+ throw makeError('anchor_unparseable', 'set_attr.anchor must start with `<`', op);
205
+ }
206
+ if (anchor.endsWith('>')) {
207
+ throw makeError('anchor_unparseable', 'set_attr.anchor must end before `>`', op);
208
+ }
209
+ const occs = allOccurrences(doc, anchor);
210
+ if (occs.length === 0) throw makeError('op_malformed', `set_attr anchor not found: ${preview(anchor)}`, op);
211
+ if (occs.length > 1) throw makeError('op_malformed', `set_attr anchor not unique: ${preview(anchor)} (${occs.length} matches)`, op);
212
+ const start = occs[0];
213
+ const closeIdx = doc.indexOf('>', start + anchor.length);
214
+ if (closeIdx < 0) throw makeError('anchor_unparseable', 'no `>` found after set_attr anchor', op);
215
+ const fullTag = doc.slice(start, closeIdx + 1);
216
+
217
+ // Reject attribute values containing chars that can't survive serialization.
218
+ if (/[- -]/.test(value)) {
219
+ throw makeError('attr_value_unrepresentable', 'value contains control characters', op);
220
+ }
221
+ const escapedValue = value.replace(/&/g, '&amp;').replace(/"/g, '&quot;');
222
+
223
+ // Detect whether attr already appears in fullTag. We respect quote state to
224
+ // avoid matching attribute substrings inside another attribute's value.
225
+ const existingMatch = findAttrInTag(fullTag, attr);
226
+ let newTag;
227
+ if (existingMatch) {
228
+ const [attrStart, attrEnd] = existingMatch;
229
+ newTag = fullTag.slice(0, attrStart) + `${attr}="${escapedValue}"` + fullTag.slice(attrEnd);
230
+ } else {
231
+ newTag = fullTag.slice(0, -1) + ` ${attr}="${escapedValue}">`;
232
+ }
233
+ return [{ find: fullTag, replace: newTag }];
234
+ }
235
+
236
+ // Locate `attr` inside a parsed opening tag, returning [start, end) byte
237
+ // offsets within the tag of the attr's full `name="value"` substring (or
238
+ // `name='value'`, or `name=value`, or boolean `name`). Returns null if absent.
239
+ // Respects quote state to avoid false matches inside other attributes.
240
+ function findAttrInTag(tag, attrName) {
241
+ // Walk attribute by attribute. The tag starts with <tagname or </tagname.
242
+ // Skip past tagname.
243
+ const nameMatch = tag.match(/^<\/?([a-zA-Z][a-zA-Z0-9_-]*)/);
244
+ if (!nameMatch) return null;
245
+ let i = nameMatch[0].length;
246
+ while (i < tag.length - 1) {
247
+ // Skip whitespace
248
+ while (i < tag.length && /\s/.test(tag[i])) i++;
249
+ if (i >= tag.length || tag[i] === '>' || tag[i] === '/') break;
250
+ const attrStart = i;
251
+ // Read attribute name
252
+ let nameEnd = i;
253
+ while (nameEnd < tag.length && !/[\s=>/]/.test(tag[nameEnd])) nameEnd++;
254
+ const name = tag.slice(attrStart, nameEnd);
255
+ i = nameEnd;
256
+ // Optional = followed by value
257
+ let attrEnd = nameEnd;
258
+ if (tag[i] === '=') {
259
+ i++;
260
+ if (tag[i] === '"') {
261
+ const close = tag.indexOf('"', i + 1);
262
+ if (close < 0) return null;
263
+ attrEnd = close + 1;
264
+ i = attrEnd;
265
+ } else if (tag[i] === "'") {
266
+ const close = tag.indexOf("'", i + 1);
267
+ if (close < 0) return null;
268
+ attrEnd = close + 1;
269
+ i = attrEnd;
270
+ } else {
271
+ // Unquoted value
272
+ while (i < tag.length && !/[\s>]/.test(tag[i])) i++;
273
+ attrEnd = i;
274
+ }
275
+ } else {
276
+ attrEnd = nameEnd;
277
+ }
278
+ if (name === attrName) return [attrStart, attrEnd];
279
+ }
280
+ return null;
281
+ }
282
+
283
+ // ---------- shared helpers ----------
284
+
285
+ function allOccurrences(haystack, needle) {
286
+ const out = [];
287
+ if (needle.length === 0) return out;
288
+ let from = 0;
289
+ while (true) {
290
+ const idx = haystack.indexOf(needle, from);
291
+ if (idx < 0) break;
292
+ out.push(idx);
293
+ from = idx + 1; // overlapping matches allowed
294
+ }
295
+ return out;
296
+ }
297
+
298
+ function applyEditToShadow(doc, edit) {
299
+ const idx = doc.indexOf(edit.find);
300
+ if (idx < 0) {
301
+ throw makeError('op_malformed', `compiler shadow drift: emitted edit no longer matches: ${preview(edit.find)}`);
302
+ }
303
+ const next = doc.indexOf(edit.find, idx + 1);
304
+ if (next >= 0) {
305
+ throw makeError('op_malformed', `compiler shadow drift: emitted edit ambiguous (${allOccurrences(doc, edit.find).length} matches): ${preview(edit.find)}`);
306
+ }
307
+ return doc.slice(0, idx) + edit.replace + doc.slice(idx + edit.find.length);
308
+ }
309
+
310
+ function validateEditApplies(doc, edit, op) {
311
+ if (typeof edit.find !== 'string' || typeof edit.replace !== 'string') {
312
+ throw makeError('op_malformed', 'compiler bug: emitted non-string find/replace', op);
313
+ }
314
+ if (edit.find.length === 0) {
315
+ throw makeError('op_malformed', 'compiler bug: emitted empty find', op);
316
+ }
317
+ }
318
+
319
+ function preview(s) {
320
+ const trimmed = s.length > 60 ? s.slice(0, 57) + '...' : s;
321
+ return JSON.stringify(trimmed);
322
+ }
323
+
324
+ /**
325
+ * Apply an envelope (the compileDslPlan output, OR a model's apply_edits/replace_document)
326
+ * to a doc. Used by the fidelity-dsl runner's comparator to check round-trip
327
+ * equivalence between the DSL-compiled envelope and the scenario stub envelope.
328
+ *
329
+ * Mirrors the runtime's apply path: each find must match exactly once in turn.
330
+ *
331
+ * @param {string} doc — the input doc (LF-canonical)
332
+ * @param {{ tool: string, envelope: object }} env — { tool, envelope } pair
333
+ * @returns {string} the post-apply doc
334
+ */
335
+ export function applyEnvelopeToDoc(doc, env) {
336
+ if (env.tool === 'replace_document') {
337
+ return env.envelope.doc;
338
+ }
339
+ if (env.tool !== 'apply_edits') {
340
+ throw new Error(`applyEnvelopeToDoc: unknown tool "${env.tool}"`);
341
+ }
342
+ let result = doc;
343
+ for (const e of env.envelope.edits) {
344
+ const idx = result.indexOf(e.find);
345
+ if (idx < 0) {
346
+ throw new Error(`apply: find not found: ${preview(e.find)}`);
347
+ }
348
+ const next = result.indexOf(e.find, idx + 1);
349
+ if (next >= 0) {
350
+ throw new Error(`apply: find not unique: ${preview(e.find)}`);
351
+ }
352
+ result = result.slice(0, idx) + e.replace + result.slice(idx + e.find.length);
353
+ }
354
+ return result;
355
+ }
356
+
357
+ export { DslCompileError };