rewritable 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -5
- package/bin/rwa.mjs +1000 -9
- package/package.json +2 -2
- package/seeds/rewritable.html +4356 -315
- package/src/agent-loop.mjs +155 -0
- package/src/apply-edits.mjs +664 -0
- package/src/atomic-write.mjs +38 -0
- package/src/backend.mjs +43 -0
- package/src/clone-extract.mjs +249 -0
- package/src/clone.mjs +161 -0
- package/src/commands.mjs +90 -10
- package/src/create.mjs +256 -0
- package/src/doc.mjs +69 -0
- package/src/dsl-compiler.mjs +357 -0
- package/src/edit.mjs +300 -0
- package/src/fetch-page.mjs +346 -0
- package/src/host.mjs +126 -0
- package/src/identity.mjs +257 -0
- package/src/import-claude.mjs +28 -4
- package/src/import-vision.mjs +1 -1
- package/src/import.mjs +76 -10
- package/src/ls.mjs +105 -0
- package/src/publish-site.mjs +85 -0
- package/src/publish.mjs +98 -0
- package/src/seed-extract.mjs +40 -0
- package/src/seed.mjs +1387 -5
- package/src/self-contained.mjs +115 -0
- package/src/skill-manifest.mjs +227 -0
- package/src/skin.mjs +350 -0
- package/src/skins.mjs +274 -0
- package/src/template.mjs +109 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
// rwa-edit/1 apply-edits — hand-mirrored from seeds/rewritable.html's
|
|
2
|
+
// applyEdits pipeline (search `async function applyEdits`). Read alongside rwa-edit-spec.md §5
|
|
3
|
+
// (apply_edits semantics) and §7 (frozen zones).
|
|
4
|
+
//
|
|
5
|
+
// Differences from the seed, called out so future maintainers don't expect
|
|
6
|
+
// strict parity:
|
|
7
|
+
// 1. Seed collapses reserved-marker hits and zone-crossing hits both into
|
|
8
|
+
// `frozen_zone_violation`. CLI splits them: `reserved_substring` for
|
|
9
|
+
// a find/replace that *contains* a marker substring, and
|
|
10
|
+
// `frozen_zone_violation` for an edit whose find-range overlaps a
|
|
11
|
+
// marker-form frozen zone. The plan (Task 4/5 dispatch) is keyed on
|
|
12
|
+
// these distinct codes.
|
|
13
|
+
// 2. Seed enforces `data-rwa-frozen` attribute-form zones via a DOMParser
|
|
14
|
+
// snapshot of [data-rwa-frozen] elements. The CLI mirrors that guard
|
|
15
|
+
// parser-free (offline-first, no jsdom): `dataRwaFrozenSnapshot` captures
|
|
16
|
+
// each frozen element as `tag\0outerHTML` (sorted), and applyEdits rejects
|
|
17
|
+
// a batch that changes the set (`frozen_zone_violation`, `form:'attribute'`)
|
|
18
|
+
// — covering BOTH marker-form and attribute-form now. Reserved-substring
|
|
19
|
+
// detection still blocks edits that mention `data-rwa-frozen` literally.
|
|
20
|
+
// The seed's DOMParser handles edge cases (a `>` inside a quoted attribute
|
|
21
|
+
// value) that the CLI's pragmatic regex matcher does not; the before/after
|
|
22
|
+
// snapshot is relative, so a consistent mis-parse of an UNCHANGED element
|
|
23
|
+
// still compares equal. KEEP IN STEP with the seed (search
|
|
24
|
+
// `function dataRwaFrozenSnapshot`).
|
|
25
|
+
// 3. Seed's structural-shape check uses DOMParser + executable-script-
|
|
26
|
+
// type filtering + top-level-tag-types set. CLI v1 uses regex counting
|
|
27
|
+
// of <script>/<style> tags — enough to catch the realistic accidental-
|
|
28
|
+
// damage signal (a model emitting an inline <script> in a content
|
|
29
|
+
// edit) without pulling in a parser.
|
|
30
|
+
//
|
|
31
|
+
// ## Other known v1 scope-downs vs seed
|
|
32
|
+
//
|
|
33
|
+
// The seed (search `async function applyEdits` in seeds/rewritable.html) enforces additional
|
|
34
|
+
// invariants the CLI does NOT in v1. Tracked in cli/TODO.md for v2:
|
|
35
|
+
//
|
|
36
|
+
// - MAX_REPLACE = 8KB per-edit cap (seed throws 'replace_too_large')
|
|
37
|
+
// - MAX_DOC = 1MB whole-doc cap (seed throws 'target_size_exceeded')
|
|
38
|
+
// - isWellFormed lone-surrogate guard on find/replace/doc
|
|
39
|
+
// - canonLF normalization of find/replace before matching
|
|
40
|
+
// (CRLF-containing anchors fail with find_not_found in the CLI but
|
|
41
|
+
// match correctly in the browser)
|
|
42
|
+
// - Class-lock violation check on apply_edits (class_lock_violation — an edit
|
|
43
|
+
// find-range crossing a .rwa-locked subtree). NOTE: the replace_document
|
|
44
|
+
// coverage check (class_lock_uncovered) IS enforced — see edit.mjs
|
|
45
|
+
// assertFrozenPreserved + the exported lockedRangesIn/markerZoneRangesIn.
|
|
46
|
+
// - Reserved-id violation (reserved_id_used) — including data-rwa-id injection
|
|
47
|
+
// - HTML parse-validity post-apply (parse_error_post_apply)
|
|
48
|
+
|
|
49
|
+
export class RwaEditError extends Error {
|
|
50
|
+
constructor(code, editIndex = null, context = {}) {
|
|
51
|
+
super(code);
|
|
52
|
+
this.code = code;
|
|
53
|
+
this.editIndex = editIndex;
|
|
54
|
+
this.context = context;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Size caps — mirror of the seed's RWA_EDIT (search `MAX_REPLACE:` in
|
|
59
|
+
// seeds/rewritable.html). MAX_REPLACE is the per-edit `replace` cap;
|
|
60
|
+
// MAX_DOC is the whole-document cap after the batch applies. With images-v1
|
|
61
|
+
// these are measured on the VIRTUAL (rwa-asset token) form when the caller
|
|
62
|
+
// virtualizes — a text budget, never a pixel budget (rwa-edit-spec.md §19).
|
|
63
|
+
const MAX_REPLACE = 8 * 1024;
|
|
64
|
+
const MAX_DOC = 1024 * 1024;
|
|
65
|
+
// Real-bytes whole-document cap for the image paths, where MAX_DOC measures the
|
|
66
|
+
// VIRTUAL (token) form. Mirrors the GUI's container budget (RWA_IMG.FILE_STOP);
|
|
67
|
+
// authoritative server-side on the hosted /modify path (rwa-edit-spec.md §19).
|
|
68
|
+
export const MAX_DOC_EXPANDED = 10 * 1024 * 1024;
|
|
69
|
+
|
|
70
|
+
// LF canonicalization — mirror of the seed's canonLF. The seed normalizes the
|
|
71
|
+
// doc AND every find/replace to LF before matching, so a CRLF document or a
|
|
72
|
+
// CRLF-containing anchor behaves identically in the CLI and the browser.
|
|
73
|
+
// Without this a CRLF doc + LF anchor (or vice versa) spuriously misses.
|
|
74
|
+
const canonLF = (s) => (s == null ? '' : String(s).replace(/\r\n/g, '\n').replace(/\r/g, '\n'));
|
|
75
|
+
|
|
76
|
+
// UTF-16 well-formedness — a lone surrogate in find/replace becomes U+FFFD on
|
|
77
|
+
// UTF-8 encode (the durable file write) and silently corrupts byte-equality.
|
|
78
|
+
// Mirror of the seed's isWellFormed guard. String.prototype.isWellFormed is
|
|
79
|
+
// Node 22+; treat its absence as "no check available."
|
|
80
|
+
const isWellFormed = (s) => typeof s !== 'string' || typeof s.isWellFormed !== 'function' || s.isWellFormed();
|
|
81
|
+
|
|
82
|
+
// Plain-English, code-keyed recovery hints. Self-documenting failures: an agent
|
|
83
|
+
// (or `rwa edit --json` consumer) gets one actionable line, not just a code.
|
|
84
|
+
// A static lookup — never a model call (Rule 5). Keep in sync with the seed's
|
|
85
|
+
// FAILURE_HINTS (failureToToolResult). No angle brackets / reserved markers in
|
|
86
|
+
// the strings, so they stay safe to embed in the seed bootstrap and survive the
|
|
87
|
+
// CLI tree's reserved-marker scan.
|
|
88
|
+
export const FAILURE_HINTS = {
|
|
89
|
+
find_not_found: 'find must match the document byte-for-byte (whitespace and case included). If a closest match is shown, copy it exactly; otherwise pick a shorter, distinctive anchor.',
|
|
90
|
+
find_not_unique: 'find appears more than once. Extend it with neighbouring text until it is unique; the hints list shows where.',
|
|
91
|
+
frozen_zone_violation: 'This region is an author-protected frozen zone. Anchor on a different region — frozen zones change only by editing the file outside the runtime.',
|
|
92
|
+
reserved_substring: 'find or replace contains a reserved rwa marker. Anchor on ordinary document text instead.',
|
|
93
|
+
structural_shape_changed: 'The edit would change the document script/style tag count. Keep edits content-only, or use a structural plan.',
|
|
94
|
+
replace_too_large: 'replace exceeds the per-edit size cap. Split the change into smaller anchored edits.',
|
|
95
|
+
empty_find: 'find must be a non-empty string — provide the exact text to anchor on.',
|
|
96
|
+
parse_error_post_apply: 'The result was not well-formed HTML — check that the tags in replace are balanced.',
|
|
97
|
+
unknown_asset_reference: 'src uses an rwa-asset: token that does not exist in this document. Copy tokens verbatim from existing <img> tags; never invent or edit them.',
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// ─── Image-asset virtualization (images-v1) ─────────────────────────
|
|
101
|
+
// Hand-mirror of the seed block beside containsReservedMarker in
|
|
102
|
+
// seeds/rewritable.html (rwaAssetHash8/registerImageAsset/virtualizeImages/
|
|
103
|
+
// virtualizeWithMap/expandImages/assertNoNewAssetTokens). Normative contract:
|
|
104
|
+
// rwa-edit-spec.md §19. KEEP IN STEP with the seed.
|
|
105
|
+
//
|
|
106
|
+
// The model never sees image bytes: `rwa edit <instruction>` builds its prompt
|
|
107
|
+
// from the VIRTUAL doc (data:image src → rwa-asset:<hash8> token) and the
|
|
108
|
+
// apply expands tokens back before the file write. Hash-keyed (FNV-1a — token
|
|
109
|
+
// identity/dedupe, not integrity), so tokens are stable across moves and the
|
|
110
|
+
// map can be re-derived deterministically from the same doc bytes.
|
|
111
|
+
export function rwaAssetHash8(s) {
|
|
112
|
+
let h = 0x811c9dc5;
|
|
113
|
+
for (let i = 0; i < s.length; i++) { h ^= s.charCodeAt(i); h = Math.imul(h, 0x01000193) >>> 0; }
|
|
114
|
+
return h.toString(16).padStart(8, '0');
|
|
115
|
+
}
|
|
116
|
+
const RWA_ASSET_SRC_RE = /(\bsrc\s*=\s*)(["'])(data:image\/[^"']*)\2/g;
|
|
117
|
+
const RWA_ASSET_TOKEN_RE = /(\bsrc\s*=\s*)(["'])(rwa-asset:[0-9a-f]{8,})\2/g;
|
|
118
|
+
export function registerImageAsset(assets, uri) {
|
|
119
|
+
// Collision probe: deterministic re-salt (32-bit birthday ~1e-6 at 100 images).
|
|
120
|
+
let n = 1, token;
|
|
121
|
+
do { token = 'rwa-asset:' + rwaAssetHash8(n === 1 ? uri : uri + '\0' + n); n++; }
|
|
122
|
+
while (assets.has(token) && assets.get(token) !== uri);
|
|
123
|
+
assets.set(token, uri);
|
|
124
|
+
return token;
|
|
125
|
+
}
|
|
126
|
+
export function virtualizeImages(doc, assets) {
|
|
127
|
+
assets = assets || new Map();
|
|
128
|
+
// Orphans: tokens already present in the RAW doc (user-authored or
|
|
129
|
+
// pre-broken). They map to nothing; expansion passes them through instead
|
|
130
|
+
// of throwing, so a pre-broken doc stays editable.
|
|
131
|
+
const orphans = new Set();
|
|
132
|
+
let m;
|
|
133
|
+
RWA_ASSET_TOKEN_RE.lastIndex = 0;
|
|
134
|
+
while ((m = RWA_ASSET_TOKEN_RE.exec(doc)) !== null) orphans.add(m[3]);
|
|
135
|
+
const vdoc = doc.replace(RWA_ASSET_SRC_RE, (_, p, q, uri) => p + q + registerImageAsset(assets, uri) + q);
|
|
136
|
+
return { doc: vdoc, assets, orphans };
|
|
137
|
+
}
|
|
138
|
+
// URI→token substitution for ANY string (a doc slice virtualizes to the
|
|
139
|
+
// corresponding vdoc slice as long as it doesn't cut a URI in half).
|
|
140
|
+
export function virtualizeWithMap(s, assets) {
|
|
141
|
+
if (!s || !assets || assets.size === 0) return s;
|
|
142
|
+
let out = s;
|
|
143
|
+
for (const [token, uri] of assets) out = out.split(uri).join(token);
|
|
144
|
+
return out;
|
|
145
|
+
}
|
|
146
|
+
export function expandImages(vdoc, assets, orphans) {
|
|
147
|
+
return vdoc.replace(RWA_ASSET_TOKEN_RE, (whole, p, q, token) => {
|
|
148
|
+
const uri = assets ? assets.get(token) : null;
|
|
149
|
+
if (uri == null) {
|
|
150
|
+
if (orphans && orphans.has(token)) return whole;
|
|
151
|
+
throw new RwaEditError('unknown_asset_reference', null, { token });
|
|
152
|
+
}
|
|
153
|
+
return p + q + uri + q;
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
// Tokenize the data:image URIs inside an EXPANDED envelope's find/replace (and
|
|
157
|
+
// the replace_document `doc`), registering each into the shared `assets` map so
|
|
158
|
+
// expansion can resolve them afterward. Used by the hosted /modify path
|
|
159
|
+
// (rwa-edit-spec.md §19, opts.virtualizeEnvelope): the client relays an expanded
|
|
160
|
+
// envelope, the server tokenizes it against a map seeded from the stored doc so
|
|
161
|
+
// the apply runs on the token form (caps = text budget) and new image bytes ride
|
|
162
|
+
// in via the envelope's own URIs. Returns a NEW envelope; the input is untouched.
|
|
163
|
+
export function mapEnvelopeImages(envelope, assets) {
|
|
164
|
+
const tok = (s) => virtualizeImages(s || '', assets).doc; // shares + extends `assets`
|
|
165
|
+
if (Array.isArray(envelope.edits)) {
|
|
166
|
+
return { ...envelope, edits: envelope.edits.map(e => ({ ...e, find: tok(e.find), replace: tok(e.replace) })) };
|
|
167
|
+
}
|
|
168
|
+
if (typeof envelope.doc === 'string') {
|
|
169
|
+
return { ...envelope, doc: tok(envelope.doc) };
|
|
170
|
+
}
|
|
171
|
+
return envelope;
|
|
172
|
+
}
|
|
173
|
+
// No-assets writers must not introduce a NEW rwa-asset token — a token with no
|
|
174
|
+
// bytes behind it is a permanently broken image; committing one silently is the
|
|
175
|
+
// failure mode Rule 12 forbids. Tokens already in the current doc stay legal.
|
|
176
|
+
export function assertNoNewAssetTokens(currentDoc, work) {
|
|
177
|
+
const seen = new Set();
|
|
178
|
+
let m;
|
|
179
|
+
RWA_ASSET_TOKEN_RE.lastIndex = 0;
|
|
180
|
+
while ((m = RWA_ASSET_TOKEN_RE.exec(currentDoc)) !== null) seen.add(m[3]);
|
|
181
|
+
RWA_ASSET_TOKEN_RE.lastIndex = 0;
|
|
182
|
+
while ((m = RWA_ASSET_TOKEN_RE.exec(work)) !== null) {
|
|
183
|
+
if (!seen.has(m[3])) throw new RwaEditError('unknown_asset_reference', null, { token: m[3] });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Source of truth: seeds/rewritable.html RWA_EDIT.RESERVED (line ~1608).
|
|
188
|
+
// The string-concat trick on the comment/attribute markers prevents this
|
|
189
|
+
// source file itself from tripping reserved-marker scans run over the CLI
|
|
190
|
+
// tree.
|
|
191
|
+
const RESERVED_MARKERS = [
|
|
192
|
+
'rwa:frozen:begin',
|
|
193
|
+
'rwa:frozen:end',
|
|
194
|
+
'<' + '!-- rwa:',
|
|
195
|
+
'/*' + ' rwa:',
|
|
196
|
+
'//' + ' rwa:',
|
|
197
|
+
'data-rwa-frozen',
|
|
198
|
+
];
|
|
199
|
+
|
|
200
|
+
export function containsReservedMarker(s) {
|
|
201
|
+
if (!s) return false;
|
|
202
|
+
for (const m of RESERVED_MARKERS) if (s.includes(m)) return true;
|
|
203
|
+
return false;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function countOccurrences(haystack, needle) {
|
|
207
|
+
if (!needle) return 0;
|
|
208
|
+
let n = 0, i = 0;
|
|
209
|
+
while ((i = haystack.indexOf(needle, i)) !== -1) { n++; i += needle.length; }
|
|
210
|
+
return n;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Surrounding-context snippets for find_not_unique — mirrors the seed's
|
|
214
|
+
// nearbySnippets so `rwa edit --json` (and the agent loop) can disambiguate.
|
|
215
|
+
// Source of truth: seeds/rewritable.html nearbySnippets (~line 1783).
|
|
216
|
+
function nearbySnippets(haystack, needle, max = 3, ctx = 40) {
|
|
217
|
+
const out = []; let i = 0;
|
|
218
|
+
while ((i = haystack.indexOf(needle, i)) !== -1 && out.length < max) {
|
|
219
|
+
const a = Math.max(0, i - ctx);
|
|
220
|
+
const b = Math.min(haystack.length, i + needle.length + ctx);
|
|
221
|
+
out.push({ pos: i, before: haystack.slice(a, i), after: haystack.slice(i + needle.length, b) });
|
|
222
|
+
i += needle.length;
|
|
223
|
+
}
|
|
224
|
+
return out;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Deterministic near-miss finder for find_not_found. Given a `find` that does
|
|
228
|
+
// NOT appear verbatim in `doc`, return a context fragment {closest, match}
|
|
229
|
+
// describing the closest actual text so an agent (or human) can self-correct
|
|
230
|
+
// the anchor in one retry — no model call (Rule 5). Returns {} when nothing
|
|
231
|
+
// useful is found. Cold path (failure only), so an O(n) projection is fine.
|
|
232
|
+
// Source of truth: seeds/rewritable.html findClosestAnchor — keep in sync.
|
|
233
|
+
function findClosestAnchor(doc, find) {
|
|
234
|
+
if (!doc || !find) return {};
|
|
235
|
+
const needleNorm = find.replace(/[ \t\n\r\f]+/g, ' ').trim();
|
|
236
|
+
if (!needleNorm) return {};
|
|
237
|
+
|
|
238
|
+
// Whitespace-collapsed projection of `doc`, with an offset map back to the
|
|
239
|
+
// original bytes (map[k] = source index of norm[k]; a whitespace run collapses
|
|
240
|
+
// to one space mapped to its first char). lowNorm mirrors norm length-for-length
|
|
241
|
+
// (chars whose lowercase isn't single-char are left as-is) so the case pass
|
|
242
|
+
// shares the same map without desync.
|
|
243
|
+
let norm = '', lowNorm = '';
|
|
244
|
+
const map = [];
|
|
245
|
+
let inWs = false;
|
|
246
|
+
for (let i = 0; i < doc.length; i++) {
|
|
247
|
+
const c = doc[i];
|
|
248
|
+
if (c === ' ' || c === '\t' || c === '\n' || c === '\r' || c === '\f') {
|
|
249
|
+
if (!inWs) { norm += ' '; lowNorm += ' '; map.push(i); inWs = true; }
|
|
250
|
+
} else {
|
|
251
|
+
const lc = c.toLowerCase();
|
|
252
|
+
norm += c;
|
|
253
|
+
lowNorm += lc.length === 1 ? lc : c;
|
|
254
|
+
map.push(i);
|
|
255
|
+
inWs = false;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
// Cap the payload so an oversized anchor can't bloat the tool_result. When
|
|
259
|
+
// elided, flag truncated:true — the elided text LOCATES the region but is NOT
|
|
260
|
+
// byte-for-byte re-appliable, so the consumer must shorten its anchor rather
|
|
261
|
+
// than paste the string back (honest, machine-actionable).
|
|
262
|
+
const MAX = 300;
|
|
263
|
+
const mk = (raw, match) => raw.length <= MAX
|
|
264
|
+
? { closest: raw, match }
|
|
265
|
+
: { closest: raw.slice(0, MAX - 18) + ' …[' + (raw.length - MAX) + ' more]… ', match, truncated: true };
|
|
266
|
+
const span = (k, normLen) => doc.slice(map[k], map[k + normLen - 1] + 1); // trim() ⇒ non-ws ends
|
|
267
|
+
|
|
268
|
+
// Pass 1 — whitespace-only mismatch (verbatim normalized match).
|
|
269
|
+
let k = norm.indexOf(needleNorm);
|
|
270
|
+
if (k !== -1) return mk(span(k, needleNorm.length), 'whitespace');
|
|
271
|
+
|
|
272
|
+
// Pass 2 — case (± whitespace) mismatch.
|
|
273
|
+
k = lowNorm.indexOf(needleNorm.toLowerCase());
|
|
274
|
+
if (k !== -1) return mk(span(k, needleNorm.length), 'case');
|
|
275
|
+
|
|
276
|
+
// Pass 3 — partial: longest matching prefix of the needle (floor 12 chars).
|
|
277
|
+
// Prefix-match is monotonic in length, so binary-search the longest L.
|
|
278
|
+
const FLOOR = 12;
|
|
279
|
+
if (needleNorm.length >= FLOOR) {
|
|
280
|
+
let lo = FLOOR, hi = needleNorm.length, best = -1, bestK = -1;
|
|
281
|
+
while (lo <= hi) {
|
|
282
|
+
const mid = (lo + hi) >> 1;
|
|
283
|
+
const j = norm.indexOf(needleNorm.slice(0, mid));
|
|
284
|
+
if (j !== -1) { best = mid; bestK = j; lo = mid + 1; } else { hi = mid - 1; }
|
|
285
|
+
}
|
|
286
|
+
if (best !== -1) {
|
|
287
|
+
const start = map[bestK];
|
|
288
|
+
const matchEnd = map[bestK + best - 1] + 1;
|
|
289
|
+
const ctxEnd = Math.min(doc.length, matchEnd + 40); // show where it diverges
|
|
290
|
+
return mk(doc.slice(start, ctxEnd), 'partial');
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
return {};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Extract marker-form frozen zones. Returns array of
|
|
298
|
+
// `{ start, end, name }` covering the entire span from the opening
|
|
299
|
+
// `<!-- rwa:frozen:begin <name> -->` to the closing
|
|
300
|
+
// `<!-- rwa:frozen:end <name> -->` (inclusive of both markers).
|
|
301
|
+
//
|
|
302
|
+
// Scoped to the HTML-comment form. Source-of-truth seed also handles
|
|
303
|
+
// `/* rwa:frozen:* */` and `// rwa:frozen:*` (script/JS-comment forms);
|
|
304
|
+
// for the CLI v1 those are deferred — they were a niche need on the seed
|
|
305
|
+
// side and the substrate is the doc the CLI edits, not the bootstrap.
|
|
306
|
+
export function findFrozenZones(doc) {
|
|
307
|
+
const zones = [];
|
|
308
|
+
const beginRe = /<!--\s*rwa:frozen:begin\s+([A-Za-z0-9_-]+)\s*-->/g;
|
|
309
|
+
let m;
|
|
310
|
+
while ((m = beginRe.exec(doc)) !== null) {
|
|
311
|
+
const name = m[1];
|
|
312
|
+
const innerStart = m.index + m[0].length;
|
|
313
|
+
const endRe = new RegExp(
|
|
314
|
+
'<!--\\s*rwa:frozen:end\\s+' + name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\s*-->',
|
|
315
|
+
'g',
|
|
316
|
+
);
|
|
317
|
+
endRe.lastIndex = innerStart;
|
|
318
|
+
const e = endRe.exec(doc);
|
|
319
|
+
if (!e) continue; // unterminated — silently skipped; seed flags this elsewhere
|
|
320
|
+
zones.push({ start: m.index, end: e.index + e[0].length, name });
|
|
321
|
+
}
|
|
322
|
+
return zones;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Regex-escape a dynamic literal (zone name) before embedding it in a RegExp.
|
|
326
|
+
// Mirror of the seed's escapeRegex. Zone names are [A-Za-z0-9_-]+ today so this
|
|
327
|
+
// is belt-and-suspenders, but keeping it shared means the three fence-form
|
|
328
|
+
// builders below stay byte-aligned with the seed and with each other.
|
|
329
|
+
function escapeRegex(s) { return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); }
|
|
330
|
+
|
|
331
|
+
// Full 3-fence-form frozen-zone scan — faithful mirror of the seed's
|
|
332
|
+
// extractFrozenZones (seeds/rewritable.html, search `function extractFrozenZones`).
|
|
333
|
+
// Returns one entry per begin-marker: { name, inner } for a terminated zone, or
|
|
334
|
+
// { name, error: 'unterminated' | 'duplicate' }. This is the canonical scan the
|
|
335
|
+
// replace_document guard uses for byte-preservation, add-rejection, unterminated
|
|
336
|
+
// AND duplicate detection — across <!-- -->, /* */ and // fence forms — so the
|
|
337
|
+
// escape hatch can't silently drop, mint, half-open, or shadow-duplicate a zone
|
|
338
|
+
// in any fence form. (findFrozenZones below stays comment-form-only on purpose:
|
|
339
|
+
// it is the REPORTING source for `rwa doc`/`ls` frozenZones, where SD-04 pins it
|
|
340
|
+
// to the seed's reporting projection. This scan is the ENFORCEMENT source.)
|
|
341
|
+
// KEEP IN STEP with the seed.
|
|
342
|
+
export function extractFrozenZones3(doc) {
|
|
343
|
+
const zones = [];
|
|
344
|
+
if (!doc) return zones;
|
|
345
|
+
const seen = new Set();
|
|
346
|
+
const beginRe = /(<!--|\/\*|\/\/)\s*rwa:frozen:begin\s+([A-Za-z0-9_-]+)\s*(-->|\*\/|(?=\r?\n|$))/g;
|
|
347
|
+
let m;
|
|
348
|
+
while ((m = beginRe.exec(doc)) !== null) {
|
|
349
|
+
const opener = m[1];
|
|
350
|
+
const name = m[2];
|
|
351
|
+
let innerStart = m.index + m[0].length;
|
|
352
|
+
if (opener === '//') {
|
|
353
|
+
// Line-comment form: the inner zone starts after this line's newline.
|
|
354
|
+
while (innerStart < doc.length && doc[innerStart] !== '\n') innerStart++;
|
|
355
|
+
if (innerStart < doc.length) innerStart++;
|
|
356
|
+
}
|
|
357
|
+
let endRe;
|
|
358
|
+
if (opener === '<!--') endRe = new RegExp('<!--\\s*rwa:frozen:end\\s+' + escapeRegex(name) + '\\s*-->', 'g');
|
|
359
|
+
else if (opener === '/*') endRe = new RegExp('\\/\\*\\s*rwa:frozen:end\\s+' + escapeRegex(name) + '\\s*\\*\\/', 'g');
|
|
360
|
+
else endRe = new RegExp('\\/\\/\\s*rwa:frozen:end\\s+' + escapeRegex(name) + '(?=\\r?\\n|$)', 'g');
|
|
361
|
+
endRe.lastIndex = innerStart;
|
|
362
|
+
const e = endRe.exec(doc);
|
|
363
|
+
if (!e) { zones.push({ name, error: 'unterminated' }); continue; }
|
|
364
|
+
if (seen.has(name)) { zones.push({ name, error: 'duplicate' }); continue; }
|
|
365
|
+
seen.add(name);
|
|
366
|
+
zones.push({ name, inner: doc.slice(innerStart, e.index) });
|
|
367
|
+
}
|
|
368
|
+
return zones;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Detect an unterminated marker-form frozen zone (a begin marker with no
|
|
372
|
+
// matching end), across all three fence forms. Thin projection of
|
|
373
|
+
// extractFrozenZones3 so the standalone check and the full guard can never
|
|
374
|
+
// disagree. Returns the offending zone name, or null. KEEP IN STEP with the seed.
|
|
375
|
+
export function unterminatedFrozenMarker(doc) {
|
|
376
|
+
const z = extractFrozenZones3(doc).find(z => z.error === 'unterminated');
|
|
377
|
+
return z ? z.name : null;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
function editCrossesFrozenZone(doc, find, zones) {
|
|
381
|
+
const findIdx = doc.indexOf(find);
|
|
382
|
+
if (findIdx === -1) return null;
|
|
383
|
+
const findEnd = findIdx + find.length;
|
|
384
|
+
for (const z of zones) {
|
|
385
|
+
// Overlap: edit range intersects zone range. Adjacent (findEnd === z.start
|
|
386
|
+
// or findIdx === z.end) is OK — same convention as the seed's class-lock
|
|
387
|
+
// check (seeds/rewritable.html ~line 2860).
|
|
388
|
+
if (findIdx < z.end && findEnd > z.start) return z;
|
|
389
|
+
}
|
|
390
|
+
return null;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Void HTML elements have no closing tag, so the depth-matcher below must not
|
|
394
|
+
// scan to EOF looking for a close that never comes.
|
|
395
|
+
const VOID_ELEMENTS = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img',
|
|
396
|
+
'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
397
|
+
|
|
398
|
+
// Index just past the matching `</tag>` for an element opened at `from`,
|
|
399
|
+
// tracking nested same-tag depth so a naive "next close" can't stop early.
|
|
400
|
+
// -1 if unterminated. Mirror of the seed's findCloseTagEnd: EVERY non-close
|
|
401
|
+
// open of `tag` increments depth — including a self-closing `<tag/>`, because
|
|
402
|
+
// for the non-void container tags this is called with (void tags are guarded
|
|
403
|
+
// before the call), HTML ignores the trailing slash and treats it as an open.
|
|
404
|
+
// (A prior CLI deviation exempted `<tag/>`, diverging from the seed on
|
|
405
|
+
// malformed self-closing same-tag nesting — removed for parity.)
|
|
406
|
+
function matchingCloseEnd(doc, tag, from) {
|
|
407
|
+
const tagRe = new RegExp('<(/?)' + tag.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b[^>]*>', 'gi');
|
|
408
|
+
tagRe.lastIndex = from;
|
|
409
|
+
let depth = 1, t;
|
|
410
|
+
while ((t = tagRe.exec(doc)) !== null) {
|
|
411
|
+
if (t[1] === '/') { if (--depth === 0) return t.index + t[0].length; }
|
|
412
|
+
else depth++;
|
|
413
|
+
}
|
|
414
|
+
return -1;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// True iff `openTag` carries data-rwa-frozen as an actual attribute NAME — not
|
|
418
|
+
// inside a quoted value (class="data-rwa-frozen") and not a prefix of a longer
|
|
419
|
+
// name (data-rwa-frozen-note). Mirror of the seed's tagHasFrozenAttr
|
|
420
|
+
// (seeds/rewritable.html:2112) so the CLI's byte-range frozen detection agrees
|
|
421
|
+
// with the real DOM enforcement (querySelectorAll('[data-rwa-frozen]')) — the
|
|
422
|
+
// cheap /\bdata-rwa-frozen\b/ pre-filter's value/longer-name matches no longer
|
|
423
|
+
// false-positive. KEEP IN STEP with the seed.
|
|
424
|
+
export function tagHasFrozenAttr(openTag) {
|
|
425
|
+
const am = /^<[a-zA-Z][a-zA-Z0-9]*((?:\s[^>]*)?)\/?>$/.exec(openTag);
|
|
426
|
+
if (!am) return false;
|
|
427
|
+
const attrRe = /([^\s=/>]+)(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>]+))?/g;
|
|
428
|
+
let a;
|
|
429
|
+
while ((a = attrRe.exec(am[1])) !== null) {
|
|
430
|
+
if (a[1] === 'data-rwa-frozen') return true;
|
|
431
|
+
}
|
|
432
|
+
return false;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Parser-free mirror of the seed's dataRwaFrozenSnapshot (seeds/rewritable.html, search
|
|
436
|
+
// `function dataRwaFrozenSnapshot`): each data-rwa-frozen element captured as `tagName\0outerHTML`, sorted.
|
|
437
|
+
// applyEdits compares this before/after to reject ANY change (inner text,
|
|
438
|
+
// attributes, add/remove) to an attribute-form frozen element — position-
|
|
439
|
+
// independent (sorted; outerHTML self-contained), batch-level like the seed.
|
|
440
|
+
//
|
|
441
|
+
// The seed uses DOMParser; the CLI stays parser-free (offline-first, no jsdom),
|
|
442
|
+
// so this is a pragmatic regex + tag-depth matcher. Edge cases a real parser
|
|
443
|
+
// handles (a literal `>` inside a quoted attribute value, a tag name inside a
|
|
444
|
+
// comment/string) are out of v1 scope — but because the check is a RELATIVE
|
|
445
|
+
// before/after snapshot, a consistent mis-parse of an UNCHANGED frozen element
|
|
446
|
+
// still compares equal, and the conservative failure direction (false-positive
|
|
447
|
+
// rejection) is the safe one for a frozen-zone guard. KEEP IN STEP with the seed.
|
|
448
|
+
export function dataRwaFrozenSnapshot(doc) {
|
|
449
|
+
const out = [];
|
|
450
|
+
const openRe = /<([a-zA-Z][A-Za-z0-9-]*)\b[^>]*\bdata-rwa-frozen\b[^>]*>/g;
|
|
451
|
+
let m;
|
|
452
|
+
while ((m = openRe.exec(doc)) !== null) {
|
|
453
|
+
const tag = m[1].toLowerCase();
|
|
454
|
+
const openTag = m[0];
|
|
455
|
+
if (!tagHasFrozenAttr(openTag)) continue; // the cheap regex matched a value/longer-name; not a real frozen element
|
|
456
|
+
if (VOID_ELEMENTS.has(tag) || /\/>\s*$/.test(openTag)) {
|
|
457
|
+
out.push(tag + '\0' + openTag); // self-contained: no inner, no close
|
|
458
|
+
continue;
|
|
459
|
+
}
|
|
460
|
+
const closeEnd = matchingCloseEnd(doc, tag, m.index + openTag.length);
|
|
461
|
+
out.push(tag + '\0' + (closeEnd === -1 ? doc.slice(m.index) : doc.slice(m.index, closeEnd)));
|
|
462
|
+
}
|
|
463
|
+
return out.sort();
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
function snapshotsEqual(a, b) {
|
|
467
|
+
if (a.length !== b.length) return false;
|
|
468
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
469
|
+
return true;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// Parser-free port of the seed's lockedRangesIn (seeds/rewritable.html, search `function lockedRangesIn`):
|
|
473
|
+
// the [start, end] byte range of each .rwa-locked element's whole subtree.
|
|
474
|
+
// Used by replace_document's class-lock coverage check. matchingCloseEnd is the
|
|
475
|
+
// CLI's equivalent of the seed's findCloseTagEnd (depth-tracked same-tag close).
|
|
476
|
+
// KEEP IN STEP with the seed.
|
|
477
|
+
export function lockedRangesIn(doc) {
|
|
478
|
+
if (!doc) return [];
|
|
479
|
+
// Quoted ("…" / '…') OR unquoted (class=rwa-locked) attribute values — the
|
|
480
|
+
// browser's classList enforces the lock regardless of quoting, so the
|
|
481
|
+
// text-scan must too (mirror of the seed's lockedRangesIn).
|
|
482
|
+
const opening = /<([a-zA-Z][a-zA-Z0-9]*)\b[^>]*\bclass\s*=\s*("([^"]*)"|'([^']*)'|([^\s"'>]+))[^>]*>/g;
|
|
483
|
+
const out = [];
|
|
484
|
+
let m;
|
|
485
|
+
while ((m = opening.exec(doc)) !== null) {
|
|
486
|
+
const cls = (m[3] || m[4] || m[5] || '');
|
|
487
|
+
if (!/\brwa-locked\b/.test(cls)) continue;
|
|
488
|
+
const end = matchingCloseEnd(doc, m[1], m.index + m[0].length);
|
|
489
|
+
if (end !== -1) out.push([m.index, end]);
|
|
490
|
+
}
|
|
491
|
+
return out;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// Parser-free port of the seed's markerZoneRangesIn (seeds/rewritable.html, search `function markerZoneRangesIn`):
|
|
495
|
+
// the [start, end] byte ranges of every protected zone — marker-form frozen
|
|
496
|
+
// zones (all three fence forms, INCLUDING the fences) and data-rwa-frozen
|
|
497
|
+
// attribute-form element subtrees. Used by the class-lock coverage check to
|
|
498
|
+
// verify each .rwa-locked range is fully contained in a protected zone.
|
|
499
|
+
// Unterminated begin markers are skipped here (they carry no closed range);
|
|
500
|
+
// they are rejected separately by unterminatedFrozenMarker. KEEP IN STEP with
|
|
501
|
+
// the seed.
|
|
502
|
+
export function markerZoneRangesIn(doc) {
|
|
503
|
+
if (!doc) return [];
|
|
504
|
+
const out = [];
|
|
505
|
+
const beginRe = /(<!--|\/\*|\/\/)\s*rwa:frozen:begin\s+([A-Za-z0-9_-]+)\s*(-->|\*\/|(?=\r?\n|$))/g;
|
|
506
|
+
let m;
|
|
507
|
+
while ((m = beginRe.exec(doc)) !== null) {
|
|
508
|
+
const opener = m[1];
|
|
509
|
+
const name = m[2];
|
|
510
|
+
const startOfBegin = m.index;
|
|
511
|
+
let innerStart = m.index + m[0].length;
|
|
512
|
+
if (opener === '//') {
|
|
513
|
+
while (innerStart < doc.length && doc[innerStart] !== '\n') innerStart++;
|
|
514
|
+
if (innerStart < doc.length) innerStart++;
|
|
515
|
+
}
|
|
516
|
+
const esc = name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
517
|
+
let endRe;
|
|
518
|
+
if (opener === '<!--') endRe = new RegExp('<!--\\s*rwa:frozen:end\\s+' + esc + '\\s*-->', 'g');
|
|
519
|
+
else if (opener === '/*') endRe = new RegExp('\\/\\*\\s*rwa:frozen:end\\s+' + esc + '\\s*\\*\\/', 'g');
|
|
520
|
+
else endRe = new RegExp('\\/\\/\\s*rwa:frozen:end\\s+' + esc + '(?=\\r?\\n|$)', 'g');
|
|
521
|
+
endRe.lastIndex = innerStart;
|
|
522
|
+
const e = endRe.exec(doc);
|
|
523
|
+
if (!e) continue; // unterminated — skip (caught by unterminatedFrozenMarker)
|
|
524
|
+
out.push([startOfBegin, e.index + e[0].length]);
|
|
525
|
+
}
|
|
526
|
+
// data-rwa-frozen elements: opening tags carrying that attribute as a real
|
|
527
|
+
// NAME (tagHasFrozenAttr filters value/longer-name false positives).
|
|
528
|
+
const fzAttr = /<([a-zA-Z][a-zA-Z0-9]*)\b[^>]*\bdata-rwa-frozen\b[^>]*>/g;
|
|
529
|
+
while ((m = fzAttr.exec(doc)) !== null) {
|
|
530
|
+
if (!tagHasFrozenAttr(m[0])) continue;
|
|
531
|
+
const end = matchingCloseEnd(doc, m[1], m.index + m[0].length);
|
|
532
|
+
if (end !== -1) out.push([m.index, end]);
|
|
533
|
+
}
|
|
534
|
+
return out;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Structural-shape check (rwa-edit-spec.md §7).
|
|
538
|
+
// CLI v1: regex count of <script> and <style> tags. The seed additionally
|
|
539
|
+
// tracks top-level tag-types-set and exempts non-executable scripts
|
|
540
|
+
// (text/workflow-node, application/json) — both deferred for v1; the realistic
|
|
541
|
+
// damage signal (a model emitting an inline <script> inside a content edit)
|
|
542
|
+
// is fully caught by the count check.
|
|
543
|
+
function structuralShape(doc) {
|
|
544
|
+
return {
|
|
545
|
+
scripts: (doc.match(/<script[\s>]/gi) || []).length,
|
|
546
|
+
styles: (doc.match(/<style[\s>]/gi) || []).length,
|
|
547
|
+
};
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
export function applyEdits(doc, edits) {
|
|
551
|
+
if (!Array.isArray(edits) || edits.length === 0) {
|
|
552
|
+
throw new RwaEditError('malformed_envelope', null, { reason: 'edits must be a non-empty array' });
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// LF-canonicalize the document up front (mirror of the seed): all matching,
|
|
556
|
+
// splicing, and the post-apply doc are LF-only, so CRLF in the source no
|
|
557
|
+
// longer causes spurious find_not_found against LF anchors (or vice versa).
|
|
558
|
+
doc = canonLF(doc);
|
|
559
|
+
|
|
560
|
+
const before = structuralShape(doc);
|
|
561
|
+
const zones = findFrozenZones(doc);
|
|
562
|
+
// Attribute-form frozen zones (data-rwa-frozen) are enforced batch-level by
|
|
563
|
+
// snapshot equality (see dataRwaFrozenSnapshot), mirroring the seed.
|
|
564
|
+
const frozenAttr = dataRwaFrozenSnapshot(doc);
|
|
565
|
+
|
|
566
|
+
let working = doc;
|
|
567
|
+
for (let i = 0; i < edits.length; i++) {
|
|
568
|
+
const raw = edits[i] || {};
|
|
569
|
+
if (!raw.find) throw new RwaEditError('empty_find', i);
|
|
570
|
+
// Lone-surrogate guard BEFORE canonLF/match: a malformed find/replace would
|
|
571
|
+
// corrupt the durable file on UTF-8 encode (mirror of the seed).
|
|
572
|
+
if (!isWellFormed(raw.find) || !isWellFormed(raw.replace)) {
|
|
573
|
+
throw new RwaEditError('malformed_envelope', i, { reason: 'lone_surrogate' });
|
|
574
|
+
}
|
|
575
|
+
// Per-edit replace cap (mirror of the seed's MAX_REPLACE). Measured on the
|
|
576
|
+
// raw replace bytes the caller supplied (the virtual/token form under
|
|
577
|
+
// images-v1) — a text budget.
|
|
578
|
+
if ((raw.replace || '').length > MAX_REPLACE) {
|
|
579
|
+
throw new RwaEditError('replace_too_large', i, { length: (raw.replace || '').length, cap: MAX_REPLACE });
|
|
580
|
+
}
|
|
581
|
+
// Canonicalize the anchor + replacement to LF so a CRLF-containing find
|
|
582
|
+
// matches the LF-canonical working copy (and the splice stays LF-only).
|
|
583
|
+
const find = canonLF(raw.find);
|
|
584
|
+
const replace = canonLF(raw.replace);
|
|
585
|
+
|
|
586
|
+
// Reserved-substring check (spec §4 rule 6) — runs before the find lookup
|
|
587
|
+
// so a literal `data-rwa-frozen` in either side fails fast.
|
|
588
|
+
if (containsReservedMarker(find) || containsReservedMarker(replace)) {
|
|
589
|
+
throw new RwaEditError('reserved_substring', i, { find, replace });
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
const count = countOccurrences(working, find);
|
|
593
|
+
if (count === 0) throw new RwaEditError('find_not_found', i, { find, ...findClosestAnchor(working, find) });
|
|
594
|
+
if (count > 1) throw new RwaEditError('find_not_unique', i, { find, count, hints: nearbySnippets(working, find) });
|
|
595
|
+
|
|
596
|
+
// Frozen-zone overlap check (marker form). Recompute zones each iteration
|
|
597
|
+
// against `working` so prior edits can't shift the zone boundaries
|
|
598
|
+
// under the next edit's check.
|
|
599
|
+
const liveZones = findFrozenZones(working);
|
|
600
|
+
const zone = editCrossesFrozenZone(working, find, liveZones);
|
|
601
|
+
if (zone) {
|
|
602
|
+
throw new RwaEditError('frozen_zone_violation', i, { zone: zone.name });
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// Class-declared lock check (rwa-lens/1 spec §7; mirror of the seed's
|
|
606
|
+
// apply path). Reject any find-range overlapping a .rwa-locked source
|
|
607
|
+
// range. Adjacent insertions (find ends exactly where a lock begins, or
|
|
608
|
+
// starts where one ends) are OK. Recomputed per iteration because
|
|
609
|
+
// `working` mutates after each splice.
|
|
610
|
+
const idxLock = working.indexOf(find);
|
|
611
|
+
const editStart = idxLock, editEnd = idxLock + find.length;
|
|
612
|
+
for (const [ls, le] of lockedRangesIn(working)) {
|
|
613
|
+
if (editEnd > ls && editStart < le) {
|
|
614
|
+
throw new RwaEditError('class_lock_violation', i, { lockRange: [ls, le], editRange: [editStart, editEnd] });
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// Slice-based splice — String.prototype.replace honors $&/$`/$'/$$
|
|
619
|
+
// patterns in the replacement string even for literal-string searches,
|
|
620
|
+
// mangling content like "$$amount". Splicing keeps bytes verbatim.
|
|
621
|
+
const idx = working.indexOf(find);
|
|
622
|
+
working = working.slice(0, idx) + (replace || '') + working.slice(idx + find.length);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
const after = structuralShape(working);
|
|
626
|
+
if (before.scripts !== after.scripts || before.styles !== after.styles) {
|
|
627
|
+
throw new RwaEditError('structural_shape_changed', null, { before, after });
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// Frozen-zone integrity: zone count must match. (Marker-form-only; seed
|
|
631
|
+
// additionally diffs the inner bytes via extractFrozenZones — for the CLI
|
|
632
|
+
// v1 the count check + per-edit crossing check is the practical guard.)
|
|
633
|
+
const newZones = findFrozenZones(working);
|
|
634
|
+
if (newZones.length !== zones.length) {
|
|
635
|
+
throw new RwaEditError('frozen_zone_corrupted', null, {
|
|
636
|
+
before: zones.length,
|
|
637
|
+
after: newZones.length,
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Attribute-form frozen zones: the set of data-rwa-frozen elements (by
|
|
642
|
+
// tag+outerHTML) must be unchanged after the whole batch — mirrors the seed's
|
|
643
|
+
// dataRwaFrozenSnapshot/snapshotsEqual guard. Reported as frozen_zone_violation
|
|
644
|
+
// (the FAILURE_HINTS message already covers "author-protected frozen zone").
|
|
645
|
+
if (!snapshotsEqual(frozenAttr, dataRwaFrozenSnapshot(working))) {
|
|
646
|
+
throw new RwaEditError('frozen_zone_violation', null, { form: 'attribute' });
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// #5 opt-in (rwa-id-strict): mirror of the seed — a container declaring
|
|
650
|
+
// <meta name="rwa-id-strict"> (in a frozen zone) forbids losing an existing
|
|
651
|
+
// data-rwa-id (the default would backfill a fresh one, breaking #frag links).
|
|
652
|
+
if (/<meta\s+name\s*=\s*["']?rwa-id-strict\b/i.test(doc)) {
|
|
653
|
+
const ids = (s) => new Set([...s.matchAll(/\sdata-rwa-id\s*=\s*(?:"([^"]*)"|'([^']*)')/g)].map((m) => (m[1] != null ? m[1] : m[2])));
|
|
654
|
+
const after = ids(working);
|
|
655
|
+
for (const id of ids(doc)) if (!after.has(id)) throw new RwaEditError('rwa_id_stripped', null, { id });
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
// Whole-document cap (mirror of the seed's MAX_DOC). Measured on the final
|
|
659
|
+
// working copy — the virtual/token form under images-v1, so image bytes
|
|
660
|
+
// never count against the text budget.
|
|
661
|
+
if (working.length > MAX_DOC) throw new RwaEditError('target_size_exceeded', null, { length: working.length, cap: MAX_DOC });
|
|
662
|
+
|
|
663
|
+
return working;
|
|
664
|
+
}
|