rewritable 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -5
- package/bin/rwa.mjs +1000 -9
- package/package.json +2 -2
- package/seeds/rewritable.html +4356 -315
- package/src/agent-loop.mjs +155 -0
- package/src/apply-edits.mjs +664 -0
- package/src/atomic-write.mjs +38 -0
- package/src/backend.mjs +43 -0
- package/src/clone-extract.mjs +249 -0
- package/src/clone.mjs +161 -0
- package/src/commands.mjs +90 -10
- package/src/create.mjs +256 -0
- package/src/doc.mjs +69 -0
- package/src/dsl-compiler.mjs +357 -0
- package/src/edit.mjs +300 -0
- package/src/fetch-page.mjs +346 -0
- package/src/host.mjs +126 -0
- package/src/identity.mjs +257 -0
- package/src/import-claude.mjs +28 -4
- package/src/import-vision.mjs +1 -1
- package/src/import.mjs +76 -10
- package/src/ls.mjs +105 -0
- package/src/publish-site.mjs +85 -0
- package/src/publish.mjs +98 -0
- package/src/seed-extract.mjs +40 -0
- package/src/seed.mjs +1387 -5
- package/src/self-contained.mjs +115 -0
- package/src/skill-manifest.mjs +227 -0
- package/src/skin.mjs +350 -0
- package/src/skins.mjs +274 -0
- package/src/template.mjs +109 -0
package/src/host.mjs
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// `rwa host <file>` — ingest a local rewritable into a hosted runtime's `POST /r`
|
|
2
|
+
// (service/server.js handleHostedCreate) and print the `{id, token, url}` the
|
|
3
|
+
// server mints. The url carries the capability token in its `#k=` fragment — the
|
|
4
|
+
// only way the user keeps editing the hosted copy — so it is printed verbatim.
|
|
5
|
+
//
|
|
6
|
+
// This is the network-bearing INGEST client (the round-trip-edit foundation),
|
|
7
|
+
// the way `rwa publish` is the ephemeral-share client. Online by design (the
|
|
8
|
+
// offline-first invariant of new/import does not apply to a host action), so —
|
|
9
|
+
// like `clone`/`publish-site` — it is excluded from the offline-first rule.
|
|
10
|
+
//
|
|
11
|
+
// Design parity:
|
|
12
|
+
// - flags-over-env config (--url > $RWA_HOST_URL), nothing baked in — like
|
|
13
|
+
// publish-site's RWA_SITE_*.
|
|
14
|
+
// - injected transport ({transport, env}) so tests run offline — the same
|
|
15
|
+
// deps-seam shape publish-site uses for {execFile, env}. The default
|
|
16
|
+
// transport is a real node:http/node:https POST.
|
|
17
|
+
// - CliError exit codes: 2 file_error (not_found/read_error/not_a_rewritable),
|
|
18
|
+
// 1 config_error (no url), 4 host_error (transport/HTTP failure, carrying the
|
|
19
|
+
// server's status/body verbatim). The bin labels exit 4 `host_error`.
|
|
20
|
+
//
|
|
21
|
+
// Security: only the file bytes are sent — a rewritable carries NO secret (the
|
|
22
|
+
// API key is sessionStorage-only, never in the file). The returned token is
|
|
23
|
+
// surfaced to stdout (the bin) and nowhere else.
|
|
24
|
+
|
|
25
|
+
import { readFile } from 'node:fs/promises';
|
|
26
|
+
import { request as httpRequest } from 'node:http';
|
|
27
|
+
import { request as httpsRequest } from 'node:https';
|
|
28
|
+
import { extractInlineDoc } from './seed.mjs';
|
|
29
|
+
import { CliError } from './edit.mjs';
|
|
30
|
+
|
|
31
|
+
// Default transport: a single POST over node:http / node:https. Returns the raw
|
|
32
|
+
// status + body text; hostFile owns all status/JSON interpretation so the seam
|
|
33
|
+
// stays dumb and the contract lives in one place. Network failures reject — the
|
|
34
|
+
// caller maps them to host_error/network_error.
|
|
35
|
+
//
|
|
36
|
+
// @param {string} url — the full POST target (already includes the /r path)
|
|
37
|
+
// @param {{method:string, headers:object, body:string}} opts
|
|
38
|
+
// @returns {Promise<{status:number, body:string}>}
|
|
39
|
+
function defaultTransport(url, { method, headers, body }) {
|
|
40
|
+
const u = new URL(url);
|
|
41
|
+
const request = u.protocol === 'https:' ? httpsRequest : httpRequest;
|
|
42
|
+
return new Promise((resolve, reject) => {
|
|
43
|
+
const req = request(u, { method, headers }, (res) => {
|
|
44
|
+
const chunks = [];
|
|
45
|
+
res.on('data', (c) => chunks.push(c));
|
|
46
|
+
res.on('end', () => resolve({ status: res.statusCode, body: Buffer.concat(chunks).toString('utf8') }));
|
|
47
|
+
});
|
|
48
|
+
req.on('error', reject);
|
|
49
|
+
if (body != null) req.write(body);
|
|
50
|
+
req.end();
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Read, locally validate, and POST a rewritable's bytes to `<baseUrl>/r`.
|
|
56
|
+
*
|
|
57
|
+
* @param {string} filePath
|
|
58
|
+
* @param {{url?:string, transport?:Function, env?:object}} [deps]
|
|
59
|
+
* url — base url override (flag); falls back to env.RWA_HOST_URL
|
|
60
|
+
* transport— injection seam ((url, opts) => {status, body}); defaults to a
|
|
61
|
+
* real node:http/https POST
|
|
62
|
+
* env — env source (tests inject); defaults to process.env
|
|
63
|
+
* @returns {Promise<{id:string, token:string, url:string}>} the server's 200 object
|
|
64
|
+
* @throws {CliError} 2 file_error · 1 config_error · 4 host_error
|
|
65
|
+
*/
|
|
66
|
+
export async function hostFile(filePath, deps = {}) {
|
|
67
|
+
const env = deps.env || process.env;
|
|
68
|
+
const transport = deps.transport || defaultTransport;
|
|
69
|
+
|
|
70
|
+
// 1. Read — identical CliError file_error surface to publish.mjs / publish-site.mjs.
|
|
71
|
+
let bytes;
|
|
72
|
+
try {
|
|
73
|
+
bytes = await readFile(filePath, 'utf8');
|
|
74
|
+
} catch (e) {
|
|
75
|
+
if (e && e.code === 'ENOENT') throw new CliError(2, 'not_found', { path: filePath });
|
|
76
|
+
throw new CliError(2, 'read_error', { path: filePath, errno: e && e.code, message: e && e.message });
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// 2. Local fail-fast: is this even a rewritable? Same gate as `rwa publish`.
|
|
80
|
+
// The server re-validates authoritatively (it returns 400 not_a_rewritable);
|
|
81
|
+
// this just avoids a wasted round trip and gives an offline-detectable error.
|
|
82
|
+
try {
|
|
83
|
+
extractInlineDoc(bytes);
|
|
84
|
+
} catch {
|
|
85
|
+
throw new CliError(2, 'not_a_rewritable', { path: filePath });
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// 3. Config: flag url > $RWA_HOST_URL; nothing is baked into the package.
|
|
89
|
+
const urlBase = deps.url || env.RWA_HOST_URL;
|
|
90
|
+
if (!urlBase) throw new CliError(1, 'config_error', { missing: ['RWA_HOST_URL'] });
|
|
91
|
+
|
|
92
|
+
// 4. POST the raw bytes to <base>/r. text/html is the honest label for the
|
|
93
|
+
// payload (the server reads the body raw; service/server.js ignores
|
|
94
|
+
// content-type but is honest about what we send).
|
|
95
|
+
const endpoint = `${urlBase.replace(/\/+$/, '')}/r`;
|
|
96
|
+
let res;
|
|
97
|
+
try {
|
|
98
|
+
res = await transport(endpoint, {
|
|
99
|
+
method: 'POST',
|
|
100
|
+
headers: { 'Content-Type': 'text/html; charset=utf-8' },
|
|
101
|
+
body: bytes,
|
|
102
|
+
});
|
|
103
|
+
} catch (e) {
|
|
104
|
+
throw new CliError(4, 'network_error', { url: endpoint, message: (e && e.message) || String(e) });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Body may be empty or non-JSON on some error paths — parse defensively.
|
|
108
|
+
let payload = null;
|
|
109
|
+
if (res.body) { try { payload = JSON.parse(res.body); } catch { payload = null; } }
|
|
110
|
+
|
|
111
|
+
if (res.status === 200) {
|
|
112
|
+
if (!payload || typeof payload.id !== 'string' || typeof payload.token !== 'string' || typeof payload.url !== 'string') {
|
|
113
|
+
throw new CliError(4, 'malformed_success_response', { status: 200 });
|
|
114
|
+
}
|
|
115
|
+
return { id: payload.id, token: payload.token, url: payload.url };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Map the server's error envelope to an honest subcode. Prefer the server's
|
|
119
|
+
// own `error` name when present; carry the status + maxBytes verbatim so the
|
|
120
|
+
// user sees WHY ingest failed.
|
|
121
|
+
const errName = payload && typeof payload.error === 'string' ? payload.error : null;
|
|
122
|
+
if (res.status === 413 || errName === 'body_too_large') {
|
|
123
|
+
throw new CliError(4, 'body_too_large', { maxBytes: payload && payload.maxBytes });
|
|
124
|
+
}
|
|
125
|
+
throw new CliError(4, 'server_error', { status: res.status, error: errName });
|
|
126
|
+
}
|
package/src/identity.mjs
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
// Consumer-side static self-description for `self-description/1` — the answer to
|
|
2
|
+
// "what is this rewritable, and what can be done with it?" computed from the
|
|
3
|
+
// file BYTES, without executing the container's JS.
|
|
4
|
+
// Contract + reference: docs/specs/rwa-self-description-spec.md,
|
|
5
|
+
// tools/self-description.mjs (computeSelfDescription / validateSelfDescription).
|
|
6
|
+
//
|
|
7
|
+
// This is a PUBLISH-SAFE MIRROR of the reference's static computer. The CLI is a
|
|
8
|
+
// standalone npm package and cannot reach repo-root tools/ at runtime, so the
|
|
9
|
+
// kind→provider table, the substrate baseline, the title/blocks extraction, and
|
|
10
|
+
// the assembled object are duplicated here — the same pattern as
|
|
11
|
+
// cli/src/apply-edits.mjs mirroring the seed. The mirror is pinned to the single
|
|
12
|
+
// source by tests/identity.test.mjs (KIND_PROVIDERS / SUBSTRATE_BASELINE deep-equal
|
|
13
|
+
// the reference; the full assembled object deep-equals computeSelfDescription in
|
|
14
|
+
// doc.test.mjs). Drift fails loudly. KEEP IN STEP with tools/self-description.mjs.
|
|
15
|
+
|
|
16
|
+
import { tagHasFrozenAttr } from './apply-edits.mjs';
|
|
17
|
+
import { parseSkillZone } from './skill-manifest.mjs';
|
|
18
|
+
|
|
19
|
+
export const SCHEMA_TAG = 'self-description/1';
|
|
20
|
+
// Mirror of tools/self-description.mjs AFFORDANCE_KINDS / PROVENANCES — used by the
|
|
21
|
+
// declared-projection conformance gate (declaredIsConforming). Keep in step.
|
|
22
|
+
export const AFFORDANCE_KINDS = ['view', 'edit-surface', 'tool', 'compute', 'hook'];
|
|
23
|
+
export const PROVENANCES = ['first-party', 'installed'];
|
|
24
|
+
|
|
25
|
+
// kind -> registered provider bundle (spec §4). Each provider is {kind,name,label};
|
|
26
|
+
// `provenance:'first-party'` is added per emit (bootstrap-resident providers).
|
|
27
|
+
// The presentation entry mirrors the seed presentationProvider {name:'presentation',
|
|
28
|
+
// label:'Present'} (seeds/rewritable.html:3542-3543) so static == live by construction.
|
|
29
|
+
// ONLY kinds the runtime FIRST-PARTY-provides — custom kinds (datatable, …) are
|
|
30
|
+
// consumer-built via provide()/the declaration, so their honest static answer is
|
|
31
|
+
// [] (declared > static supplies the real affordances when a declaration exists).
|
|
32
|
+
export const KIND_PROVIDERS = {
|
|
33
|
+
document: [],
|
|
34
|
+
presentation: [{ kind: 'view', name: 'presentation', label: 'Present' }],
|
|
35
|
+
workflow: [],
|
|
36
|
+
// skill-host: no first-party affordances; installed skills (provenance:'installed')
|
|
37
|
+
// come from parseSkillZone (§8), not this table. Explicit [] mirrors the oracle.
|
|
38
|
+
'skill-host': [],
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// Substrate-universal ops — the SAME for every container regardless of kind. The
|
|
42
|
+
// "what can be done with me" data that is NOT an affordance (affordances stay
|
|
43
|
+
// kernel-pure: a base document is []). `history` is undo-only — there is no redo
|
|
44
|
+
// (re-write-able-spec Invariant 7).
|
|
45
|
+
export const SUBSTRATE_BASELINE = Object.freeze({
|
|
46
|
+
edit: ['lens'],
|
|
47
|
+
tools: ['apply_dsl_plan', 'apply_edits', 'replace_document'],
|
|
48
|
+
export: ['html', 'print'],
|
|
49
|
+
history: ['undo'],
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* The document's human-readable title: the text of its first <h1>, or null.
|
|
54
|
+
* Mirrors tools/self-description.mjs `staticTitle` exactly (so titles agree).
|
|
55
|
+
* @param {string} doc — the LF-canonical editable body
|
|
56
|
+
* @returns {string|null}
|
|
57
|
+
*/
|
|
58
|
+
export function extractTitle(doc) {
|
|
59
|
+
const m = (doc || '').match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
60
|
+
if (!m) return null;
|
|
61
|
+
const text = m[1].replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim();
|
|
62
|
+
return text || null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Count of data-rwa-id-addressable blocks — a coarse "how structured" signal.
|
|
67
|
+
* @param {string} doc — the LF-canonical editable body
|
|
68
|
+
* @returns {number}
|
|
69
|
+
*/
|
|
70
|
+
export function countBlocks(doc) {
|
|
71
|
+
return ((doc || '').match(/\bdata-rwa-id\b/g) || []).length;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Assemble the STATIC self-description projection from a container's already-
|
|
76
|
+
* extracted facts (so inspectDoc parses the file once). Equivalent to the
|
|
77
|
+
* reference `computeSelfDescription(fileText)`, minus the file parsing.
|
|
78
|
+
*
|
|
79
|
+
* @param {{doc:string, uuid:string|null, kind:string, frozenZones:string[]}} facts
|
|
80
|
+
* @returns {object} a `source:'static'` self-description/1 object (spec §2)
|
|
81
|
+
*/
|
|
82
|
+
export function buildSelfDescription({ doc, uuid, kind, frozenZones }) {
|
|
83
|
+
// First-party (kind-derived) + INSTALLED skills from the frozen #rwa-skills zone
|
|
84
|
+
// (§8). Mirrors tools/self-description.mjs computeSelfDescription exactly.
|
|
85
|
+
const affordances = [
|
|
86
|
+
...(KIND_PROVIDERS[kind] || []).map((p) => ({ ...p, provenance: 'first-party' })),
|
|
87
|
+
...parseSkillZone(doc),
|
|
88
|
+
];
|
|
89
|
+
return {
|
|
90
|
+
rwa: SCHEMA_TAG,
|
|
91
|
+
source: 'static',
|
|
92
|
+
uuid,
|
|
93
|
+
kind,
|
|
94
|
+
title: extractTitle(doc),
|
|
95
|
+
blocks: countBlocks(doc),
|
|
96
|
+
affordances,
|
|
97
|
+
frozenZones,
|
|
98
|
+
baseline: { ...SUBSTRATE_BASELINE },
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ── The `declared` projection (v1.1, spec §3.1) ───────────────────────────────
|
|
103
|
+
// A custom-affordance file (a datatable the kind table can only GUESS for) may
|
|
104
|
+
// carry its own answer: an inert `<script id="rwa-affordances">` block with a
|
|
105
|
+
// `source:"declared"` self-description. The reader prefers it (declared > static)
|
|
106
|
+
// only if it is TRUSTWORTHY — edit-unreachable so the lens/agent can't have
|
|
107
|
+
// drifted it. Mirror of tools/self-description.mjs DECL_RE / parseDeclaration /
|
|
108
|
+
// declarationFacts (publish-safe; the CLI can't reach repo-root tools/ at runtime).
|
|
109
|
+
// The oracle takes only fileText and extractInlineDoc's it; the CLI passes the
|
|
110
|
+
// already-extracted `doc` (== extractInlineDoc(fileText)) so the two agree.
|
|
111
|
+
// KEEP IN STEP with tools/self-description.mjs.
|
|
112
|
+
const DECL_RE = /<script\b[^>]*\bid=["']rwa-affordances["'][^>]*>([\s\S]*?)<\/script\s*>/i;
|
|
113
|
+
|
|
114
|
+
// A body declaration lives inside INLINE_DOC (its </script> escaped in raw bytes),
|
|
115
|
+
// so it is found in `doc`; a chrome declaration (immutable, outside INLINE_DOC) is
|
|
116
|
+
// found in the raw file text. Return which, so the reader can judge edit-reachability.
|
|
117
|
+
function declarationLocus(fileText, doc) {
|
|
118
|
+
if (doc && DECL_RE.test(doc)) return { hay: doc, inEditableBody: true };
|
|
119
|
+
return { hay: fileText, inEditableBody: false };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Extract the embedded #rwa-affordances declaration, if any.
|
|
124
|
+
* @returns {{ declaration: object|null, raw: string|null, error: string|null }}
|
|
125
|
+
*/
|
|
126
|
+
export function parseDeclaration(fileText, doc) {
|
|
127
|
+
const m = declarationLocus(fileText, doc).hay.match(DECL_RE);
|
|
128
|
+
if (!m) return { declaration: null, raw: null, error: null };
|
|
129
|
+
try {
|
|
130
|
+
return { declaration: JSON.parse(m[1]), raw: m[1], error: null };
|
|
131
|
+
} catch (e) {
|
|
132
|
+
return { declaration: null, raw: m[1], error: 'invalid JSON: ' + (e && e.message) };
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Edit-reachability facts for the declaration (spec §3.1). Trustworthy iff
|
|
138
|
+
* `!inEditableBody` (chrome) OR `frozenAttr` (data-rwa-frozen — enforced by the
|
|
139
|
+
* lens, and by the CLI as of attribute-form enforcement). `frozenZones` is NOT
|
|
140
|
+
* consulted (marker-form only, SD-04).
|
|
141
|
+
* @returns {{ found: boolean, inEditableBody: boolean, frozenAttr: boolean }}
|
|
142
|
+
*/
|
|
143
|
+
export function declarationFacts(fileText, doc) {
|
|
144
|
+
const { hay, inEditableBody } = declarationLocus(fileText, doc);
|
|
145
|
+
const m = hay.match(DECL_RE);
|
|
146
|
+
if (!m) return { found: false, inEditableBody: false, frozenAttr: false };
|
|
147
|
+
const openTag = m[0].slice(0, m[0].indexOf('>') + 1);
|
|
148
|
+
// DOM-accurate: data-rwa-frozen must be a real attribute NAME (not a value-
|
|
149
|
+
// mention / longer name), matching the seed's actual enforcement — else the
|
|
150
|
+
// CLI would over-trust a declaration the lens can still drift (euler #112).
|
|
151
|
+
return { found: true, inEditableBody, frozenAttr: tagHasFrozenAttr(openTag) };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export const SOURCES = ['static', 'live', 'declared'];
|
|
155
|
+
const isStrArray = (v) => Array.isArray(v) && v.every((x) => typeof x === 'string');
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Validate a self-description/1 object against the §2/§3.1 schema — a publish-safe
|
|
159
|
+
* MIRROR of tools/self-description.mjs validateSelfDescription, so the reader can
|
|
160
|
+
* guarantee it never emits a non-conforming declared answer without importing
|
|
161
|
+
* repo-root tools/ at runtime. Pinned to the oracle by test (identity.test.mjs).
|
|
162
|
+
* KEEP IN STEP with tools/self-description.mjs.
|
|
163
|
+
* @returns {{ valid: boolean, errors: string[] }}
|
|
164
|
+
*/
|
|
165
|
+
export function validateSelfDescription(obj) {
|
|
166
|
+
const errors = [];
|
|
167
|
+
if (obj === null || typeof obj !== 'object' || Array.isArray(obj)) return { valid: false, errors: ['not an object'] };
|
|
168
|
+
if (obj.rwa !== SCHEMA_TAG) errors.push('rwa must be "' + SCHEMA_TAG + '"');
|
|
169
|
+
if (!SOURCES.includes(obj.source)) errors.push('source must be one of ' + SOURCES.join(' | '));
|
|
170
|
+
// uuid/frozenZones are container facts the reader fills, so optional in a declaration.
|
|
171
|
+
if (obj.source === 'declared') {
|
|
172
|
+
if ('uuid' in obj && obj.uuid !== null && typeof obj.uuid !== 'string') errors.push('uuid, if present, must be a string or null');
|
|
173
|
+
} else if (!('uuid' in obj) || (obj.uuid !== null && typeof obj.uuid !== 'string')) {
|
|
174
|
+
errors.push('uuid must be a string or null');
|
|
175
|
+
}
|
|
176
|
+
if (typeof obj.kind !== 'string' || obj.kind.length === 0) errors.push('kind must be a non-empty string');
|
|
177
|
+
if ('title' in obj && obj.title !== null && typeof obj.title !== 'string') errors.push('title must be a string or null');
|
|
178
|
+
if ('blocks' in obj && (typeof obj.blocks !== 'number' || !Number.isFinite(obj.blocks))) errors.push('blocks must be a number');
|
|
179
|
+
if (!Array.isArray(obj.affordances)) {
|
|
180
|
+
errors.push('affordances must be an array');
|
|
181
|
+
} else {
|
|
182
|
+
obj.affordances.forEach((a, i) => {
|
|
183
|
+
if (a === null || typeof a !== 'object' || Array.isArray(a)) { errors.push('affordances[' + i + '] must be an object'); return; }
|
|
184
|
+
if (!AFFORDANCE_KINDS.includes(a.kind)) errors.push('affordances[' + i + '].kind unknown');
|
|
185
|
+
if (typeof a.name !== 'string' || !a.name) errors.push('affordances[' + i + '].name must be a non-empty string');
|
|
186
|
+
if ('label' in a && typeof a.label !== 'string') errors.push('affordances[' + i + '].label must be a string');
|
|
187
|
+
if (!PROVENANCES.includes(a.provenance)) errors.push('affordances[' + i + '].provenance must be first-party | installed');
|
|
188
|
+
if ('surface' in a && typeof a.surface !== 'string') errors.push('affordances[' + i + '].surface must be a string');
|
|
189
|
+
if ('target' in a && typeof a.target !== 'string') errors.push('affordances[' + i + '].target must be a string');
|
|
190
|
+
if ('output' in a && typeof a.output !== 'string') errors.push('affordances[' + i + '].output must be a string');
|
|
191
|
+
if ('inputs' in a && !isStrArray(a.inputs)) errors.push('affordances[' + i + '].inputs must be an array of strings');
|
|
192
|
+
if ('verified' in a && typeof a.verified !== 'boolean') errors.push('affordances[' + i + '].verified must be a boolean');
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
if ('data' in obj && obj.data !== null && typeof obj.data !== 'string') errors.push('data must be a string or null');
|
|
196
|
+
if ('frozenZones' in obj) {
|
|
197
|
+
if (!isStrArray(obj.frozenZones)) errors.push('frozenZones must be an array of strings');
|
|
198
|
+
} else if (obj.source !== 'declared') {
|
|
199
|
+
errors.push('frozenZones must be an array of strings');
|
|
200
|
+
}
|
|
201
|
+
if ('baseline' in obj) {
|
|
202
|
+
const b = obj.baseline;
|
|
203
|
+
if (b === null || typeof b !== 'object' || Array.isArray(b)) {
|
|
204
|
+
errors.push('baseline must be an object');
|
|
205
|
+
} else {
|
|
206
|
+
for (const k of ['edit', 'tools', 'export', 'history', 'view']) {
|
|
207
|
+
if (k in b && !isStrArray(b[k])) errors.push('baseline.' + k + ', if present, must be an array of strings');
|
|
208
|
+
}
|
|
209
|
+
if (Array.isArray(b.history) && b.history.includes('redo')) errors.push('baseline.history must not claim "redo"');
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (obj.source === 'static' && 'activeView' in obj) errors.push('static projection must omit activeView');
|
|
213
|
+
if ('activeView' in obj && obj.activeView !== null && typeof obj.activeView !== 'string') errors.push('activeView must be a string or null');
|
|
214
|
+
return { valid: errors.length === 0, errors };
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* The reader's one answer (spec §3.1 precedence: declared > static). If the file
|
|
219
|
+
* carries a TRUSTWORTHY (edit-unreachable) declaration that — once the reader
|
|
220
|
+
* fills container facts (uuid/frozenZones/blocks from the bytes, authoritative
|
|
221
|
+
* over any author claim) — VALIDATES, emit it as `source:'declared'`. Otherwise
|
|
222
|
+
* emit the static kind-derived projection. Validating the assembled object before
|
|
223
|
+
* trusting it guarantees the reader never emits a non-conforming answer (a subtly
|
|
224
|
+
* malformed trustworthy declaration safely falls back to static). No `live`
|
|
225
|
+
* registry on the static path, so there is no declared>live>static middle tier.
|
|
226
|
+
*
|
|
227
|
+
* @param {{fileText:string, doc:string, uuid:string|null, kind:string, frozenZones:string[]}} facts
|
|
228
|
+
* @returns {object} a self-description/1 object (`source:'declared'` or `'static'`)
|
|
229
|
+
*/
|
|
230
|
+
export function resolveSelfDescription({ fileText, doc, uuid, kind, frozenZones }) {
|
|
231
|
+
const f = declarationFacts(fileText, doc);
|
|
232
|
+
if (f.found && (!f.inEditableBody || f.frozenAttr)) {
|
|
233
|
+
const { declaration } = parseDeclaration(fileText, doc);
|
|
234
|
+
if (declaration && typeof declaration === 'object' && !Array.isArray(declaration)) {
|
|
235
|
+
// Fill ONLY container facts (uuid/frozenZones/blocks from the bytes —
|
|
236
|
+
// authoritative over any author claim). Do NOT force rwa/source: the
|
|
237
|
+
// discriminator and source are the author's claim and must already be
|
|
238
|
+
// correct, or the declaration is non-conforming and we must not "repair"
|
|
239
|
+
// it into a trusted answer (e.g. a `schema`-not-`rwa` pre-aligned block).
|
|
240
|
+
// Union installed skills (parseSkillZone) into the declared affordances —
|
|
241
|
+
// the static path does, so dropping them here made declared≠live (SD-04).
|
|
242
|
+
// Declared providers win a (kind,name) collision; mirrors the seed's
|
|
243
|
+
// runtimeDescribe registry→declared→installed precedence.
|
|
244
|
+
const declAff = Array.isArray(declaration.affordances) ? declaration.affordances : [];
|
|
245
|
+
const seen = new Set(declAff.map((a) => a.kind + '\0' + a.name));
|
|
246
|
+
const candidate = {
|
|
247
|
+
...declaration,
|
|
248
|
+
affordances: [...declAff, ...parseSkillZone(doc).filter((s) => !seen.has(s.kind + '\0' + s.name))],
|
|
249
|
+
uuid,
|
|
250
|
+
frozenZones,
|
|
251
|
+
blocks: countBlocks(doc),
|
|
252
|
+
};
|
|
253
|
+
if (candidate.source === 'declared' && validateSelfDescription(candidate).valid) return candidate;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return buildSelfDescription({ doc, uuid, kind, frozenZones });
|
|
257
|
+
}
|
package/src/import-claude.mjs
CHANGED
|
@@ -17,10 +17,19 @@ import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
|
|
|
17
17
|
// strictly better fidelity than either the local pdfjs heuristic or the
|
|
18
18
|
// raw-vision OpenRouter path, on documents where the skills apply.
|
|
19
19
|
//
|
|
20
|
-
// Trust model: this spawns a Claude Code subprocess
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
20
|
+
// Trust model: this spawns a Claude Code subprocess that reads the input file's
|
|
21
|
+
// CONTENTS into an agent context (the pdf/docx skill needs Python — pypdf,
|
|
22
|
+
// pdfplumber, mammoth — to extract them, so the agent genuinely needs tool
|
|
23
|
+
// access). That makes the file attacker-controlled input: prompt-injection text
|
|
24
|
+
// hidden in a third-party PDF/DOCX could hijack the agent. `import` is precisely
|
|
25
|
+
// the command you point at files you received from someone else, so "the user
|
|
26
|
+
// trusts their input file" is the WRONG threat model.
|
|
27
|
+
//
|
|
28
|
+
// Therefore `--claude` is gated behind an explicit `--trust-input` consent flag
|
|
29
|
+
// (convertViaClaudeCli throws below if it is absent). Only when the user vouches
|
|
30
|
+
// for the file do we add `--permission-mode bypassPermissions`. The default
|
|
31
|
+
// import path (pdfjs/mammoth — parses bytes, never executes the file's content)
|
|
32
|
+
// remains the safe, no-flag route. Documented in HELP.
|
|
24
33
|
|
|
25
34
|
const SKILL_FOR_EXT = { pdf: 'pdf', docx: 'docx' };
|
|
26
35
|
|
|
@@ -103,6 +112,21 @@ export async function convertViaClaudeCli(filePath, ext, opts = {}) {
|
|
|
103
112
|
throw e;
|
|
104
113
|
}
|
|
105
114
|
|
|
115
|
+
// Consent gate (SECURITY). Refuse to point an autonomous agent at the file
|
|
116
|
+
// unless the user explicitly vouched for it. Must run BEFORE any file read or
|
|
117
|
+
// subprocess spawn, so an unconsented file is never touched by the agent.
|
|
118
|
+
if (!opts.trustInput) {
|
|
119
|
+
const e = new Error(
|
|
120
|
+
`refusing to run an autonomous agent on ${filePath} without consent.\n` +
|
|
121
|
+
` --claude extraction reads the file's contents into a Claude Code agent, so a\n` +
|
|
122
|
+
` malicious file could hijack it (prompt-injection -> code execution).\n` +
|
|
123
|
+
` Re-run with --claude --trust-input only if you trust this file's source.\n` +
|
|
124
|
+
` (The default import, without --claude, parses the file safely and never executes its contents.)`
|
|
125
|
+
);
|
|
126
|
+
e.exitCode = 2;
|
|
127
|
+
throw e;
|
|
128
|
+
}
|
|
129
|
+
|
|
106
130
|
// docx isn't naturally page-chunkable (no fixed page boundaries inside the
|
|
107
131
|
// XML). Single call.
|
|
108
132
|
if (ext !== 'pdf') {
|
package/src/import-vision.mjs
CHANGED
|
@@ -57,7 +57,7 @@ export async function convertPdfViaVision(bytes, { apiKey, model, signal } = {})
|
|
|
57
57
|
const dataUri = `data:application/pdf;base64,${buf.toString('base64')}`;
|
|
58
58
|
|
|
59
59
|
const body = {
|
|
60
|
-
model: model || 'google/gemini-3-flash
|
|
60
|
+
model: model || 'google/gemini-3.5-flash',
|
|
61
61
|
messages: [
|
|
62
62
|
{ role: 'system', content: SYSTEM_PROMPT },
|
|
63
63
|
{
|
package/src/import.mjs
CHANGED
|
@@ -38,8 +38,9 @@ function toText(bytes) {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
function convertMd(md) {
|
|
41
|
-
const
|
|
42
|
-
|
|
41
|
+
const raw = marked.parse(md, { gfm: true, breaks: false });
|
|
42
|
+
const { html, warnings } = sanitizeImportedHtml(raw);
|
|
43
|
+
return { html: `<article>\n${html.trim()}\n</article>`, warnings };
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
function convertHtml(input) {
|
|
@@ -149,16 +150,30 @@ async function convertDocx(bytes) {
|
|
|
149
150
|
// escapes &, ", <, > inside values, so a regex match against `attr="..."` is
|
|
150
151
|
// sufficient — no quote-escape ambiguity to worry about.
|
|
151
152
|
const _SAFE_HREF_SCHEMES = new Set(['http', 'https', 'mailto', 'tel']);
|
|
153
|
+
// Two layers, both required:
|
|
154
|
+
// 1) Strip invisibles before parsing — whitespace + C0/C1 controls (\x00-\x1f,
|
|
155
|
+
// \x7f-\xa0) + soft hyphen (\xad) + Cf-class format chars (ZWSP/ZWNJ/ZWJ,
|
|
156
|
+
// LRM/RLM, LRE/RLE/PDF/LRO/RLO, word joiner, BOM, etc.). The previous
|
|
157
|
+
// regex used JS \s which doesn't match these — they slipped through and
|
|
158
|
+
// let a docx with `javascript:…` href bypass the scheme check.
|
|
159
|
+
// 2) Parse via WHATWG URL — the same parser the browser uses to navigate.
|
|
160
|
+
// Resolve against a synthetic base so scheme-less inputs (relative URL,
|
|
161
|
+
// fragment, path) round-trip back to that base and pass.
|
|
162
|
+
const _ATTR_STRIP_RE = /[\s\x00-\x1f\x7f-\xa0\xad---]/g;
|
|
163
|
+
const _SANITIZER_BASE = 'http://_rwa_sanitizer_base_/';
|
|
152
164
|
function _attrIsSafe(attr, val) {
|
|
153
|
-
const
|
|
154
|
-
|
|
155
|
-
|
|
165
|
+
const normalized = String(val).replace(_ATTR_STRIP_RE, '');
|
|
166
|
+
let parsed;
|
|
167
|
+
try { parsed = new URL(normalized, _SANITIZER_BASE); }
|
|
168
|
+
catch { return true; } // unparseable → cannot be an active URL scheme
|
|
169
|
+
if (parsed.origin === 'http://_rwa_sanitizer_base_') return true; // resolved relative — no scheme
|
|
170
|
+
const proto = parsed.protocol.replace(/:$/, '').toLowerCase();
|
|
156
171
|
if (_SAFE_HREF_SCHEMES.has(proto)) return true;
|
|
157
|
-
// Mammoth embeds images as data:image/...;base64,... — allow
|
|
158
|
-
//
|
|
159
|
-
//
|
|
160
|
-
//
|
|
161
|
-
if (attr === 'src' && proto === 'data' &&
|
|
172
|
+
// Mammoth embeds raster images as data:image/...;base64,... — allow on src.
|
|
173
|
+
// data:image/svg+xml passes here too, but <img src> renders SVG in image-
|
|
174
|
+
// loading mode with no script execution (HTML spec), so the narrow
|
|
175
|
+
// 'data:image/*' allowance is still safe for src. Keep scoped to src only.
|
|
176
|
+
if (attr === 'src' && proto === 'data' && /^data:image\//i.test(parsed.href)) return true;
|
|
162
177
|
return false;
|
|
163
178
|
}
|
|
164
179
|
function sanitizeMammothUrls(html) {
|
|
@@ -178,6 +193,57 @@ function sanitizeMammothUrls(html) {
|
|
|
178
193
|
};
|
|
179
194
|
}
|
|
180
195
|
|
|
196
|
+
// marked v14 explicitly does NOT sanitize HTML — its README points readers at
|
|
197
|
+
// DOMPurify. The seed bootstrap injects INLINE_DOC via m.innerHTML AND
|
|
198
|
+
// re-creates <script> tags so they execute (intended for documents that ship
|
|
199
|
+
// JS), so any active content in the imported HTML runs on container open. An
|
|
200
|
+
// imported .md must not be able to add active content.
|
|
201
|
+
//
|
|
202
|
+
// Regex-based strip (not a parser) for mirror-symmetry with the browser. The
|
|
203
|
+
// rules below are deliberately conservative: when in doubt, strip. Marked's
|
|
204
|
+
// output is well-formed and uses double-quoted attributes, so the regex shape
|
|
205
|
+
// matches reliably. Edge cases (CDATA, malformed nesting) are over-stripped
|
|
206
|
+
// rather than under-stripped — acceptable for an import path.
|
|
207
|
+
const _ACTIVE_TAGS = ['script', 'iframe', 'object', 'embed', 'svg', 'math', 'link', 'meta', 'base'];
|
|
208
|
+
export function sanitizeImportedHtml(html) {
|
|
209
|
+
const warnings = [];
|
|
210
|
+
let s = String(html);
|
|
211
|
+
// 1) Drop active-content tags (open+close blocks, then self-closing/unmatched).
|
|
212
|
+
for (const tag of _ACTIVE_TAGS) {
|
|
213
|
+
const block = new RegExp('<' + tag + '\\b[^>]*>[\\s\\S]*?<\\/' + tag + '\\s*>', 'gi');
|
|
214
|
+
const solo = new RegExp('<\\/?' + tag + '\\b[^>]*\\/?>', 'gi');
|
|
215
|
+
if (block.test(s) || solo.test(s)) warnings.push('imported md: stripped <' + tag + '> elements');
|
|
216
|
+
s = s.replace(block, '').replace(solo, '');
|
|
217
|
+
}
|
|
218
|
+
// 2) Drop on*= event-handler attributes from surviving elements.
|
|
219
|
+
// Match quoted (double/single) and unquoted-to-whitespace/> forms.
|
|
220
|
+
let onCount = 0;
|
|
221
|
+
s = s.replace(/\son[a-z]+\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, () => { onCount++; return ''; });
|
|
222
|
+
if (onCount) warnings.push('imported md: stripped ' + onCount + ' event-handler attribute(s)');
|
|
223
|
+
// 3) Apply scheme allow-list to surviving URL-bearing attributes. Marked's
|
|
224
|
+
// output is double-quoted href/src only, but rwa clone feeds ARBITRARY web
|
|
225
|
+
// HTML here — single-quoted, unquoted, and other URL attributes (action/
|
|
226
|
+
// formaction/poster/xlink:href) are all common and must be checked too, or
|
|
227
|
+
// a `href='javascript:…'` survives into the file:// container as a live,
|
|
228
|
+
// clickable link. Match all three value forms (mirror of the on*= strip)
|
|
229
|
+
// and the full reachable URL-attr set. data:image/* stays allowed on src.
|
|
230
|
+
let urlSkipped = 0;
|
|
231
|
+
s = s.replace(
|
|
232
|
+
/(\s)(xlink:href|formaction|href|src|action|poster)(\s*=\s*)("[^"]*"|'[^']*'|[^\s>]+)/gi,
|
|
233
|
+
(full, ws, name, eq, rawVal) => {
|
|
234
|
+
const lname = name.toLowerCase();
|
|
235
|
+
const attr = (lname === 'src' || lname === 'poster') ? 'src' : 'href';
|
|
236
|
+
const quoted = rawVal[0] === '"' || rawVal[0] === "'";
|
|
237
|
+
const val = quoted ? rawVal.slice(1, -1) : rawVal;
|
|
238
|
+
if (_attrIsSafe(attr, val)) return full;
|
|
239
|
+
urlSkipped++;
|
|
240
|
+
return ws + name + eq + '"#"';
|
|
241
|
+
}
|
|
242
|
+
);
|
|
243
|
+
if (urlSkipped) warnings.push('imported md: neutralised ' + urlSkipped + ' unsafe URL attribute(s)');
|
|
244
|
+
return { html: s, warnings };
|
|
245
|
+
}
|
|
246
|
+
|
|
181
247
|
async function convertPdf(bytes) {
|
|
182
248
|
// pdfjs explicitly rejects Node's Buffer (despite Buffer extending Uint8Array)
|
|
183
249
|
// and wants a plain Uint8Array view.
|
package/src/ls.mjs
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// `rwa ls` — collection-scale self-description. Where inspectDoc answers "what is
|
|
2
|
+
// THIS file?", listRewritables answers "what are all these?": it resolves a set
|
|
3
|
+
// of paths (files, directories, or — by default — the current directory) to
|
|
4
|
+
// candidate .html files and reports each one's self-description/1 projection,
|
|
5
|
+
// flagging non-rewritables. The scan is lenient: a missing path or a
|
|
6
|
+
// non-rewritable is a row in the result, never a thrown error — so one bad entry
|
|
7
|
+
// can't abort the inventory of a whole folder.
|
|
8
|
+
|
|
9
|
+
import { readdir, stat } from 'node:fs/promises';
|
|
10
|
+
import { join } from 'node:path';
|
|
11
|
+
import { inspectDoc } from './doc.mjs';
|
|
12
|
+
|
|
13
|
+
const HTML_RE = /\.html?$/i;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Expand input paths to a flat, ordered list of candidate files. A directory
|
|
17
|
+
* contributes its (non-recursive) .html children; a file contributes itself; a
|
|
18
|
+
* path that cannot be stat'd is kept as a `missing` candidate so the caller can
|
|
19
|
+
* report it rather than silently drop it. No inputs ⇒ scan the current directory.
|
|
20
|
+
*
|
|
21
|
+
* @param {string[]} paths
|
|
22
|
+
* @returns {Promise<Array<{file:string, missing?:boolean}>>}
|
|
23
|
+
*/
|
|
24
|
+
export async function resolveTargets(paths) {
|
|
25
|
+
const inputs = (paths && paths.length) ? paths : ['.'];
|
|
26
|
+
const targets = [];
|
|
27
|
+
for (const p of inputs) {
|
|
28
|
+
let st;
|
|
29
|
+
try { st = await stat(p); } catch { targets.push({ file: p, missing: true }); continue; }
|
|
30
|
+
if (st.isDirectory()) {
|
|
31
|
+
let names;
|
|
32
|
+
try { names = await readdir(p); } catch { targets.push({ file: p, missing: true }); continue; }
|
|
33
|
+
for (const name of names.filter(n => HTML_RE.test(n)).sort()) {
|
|
34
|
+
targets.push({ file: join(p, name) });
|
|
35
|
+
}
|
|
36
|
+
} else {
|
|
37
|
+
targets.push({ file: p });
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return targets;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Inspect each candidate and classify it. Each row is one of:
|
|
45
|
+
* { file, status:'rewritable', self } — self-description/1 object
|
|
46
|
+
* { file, status:'not_a_rewritable' } — a plain .html / other file
|
|
47
|
+
* { file, status:'error', reason } — not_found / read_error
|
|
48
|
+
*
|
|
49
|
+
* @param {string[]} paths
|
|
50
|
+
* @returns {Promise<Array<object>>}
|
|
51
|
+
*/
|
|
52
|
+
export async function listRewritables(paths) {
|
|
53
|
+
const targets = await resolveTargets(paths);
|
|
54
|
+
const rows = [];
|
|
55
|
+
for (const t of targets) {
|
|
56
|
+
if (t.missing) { rows.push({ file: t.file, status: 'error', reason: 'not_found' }); continue; }
|
|
57
|
+
try {
|
|
58
|
+
const info = await inspectDoc(t.file);
|
|
59
|
+
rows.push({ file: t.file, status: 'rewritable', self: info.self });
|
|
60
|
+
} catch (e) {
|
|
61
|
+
if (e && e.subcode === 'not_a_rewritable') {
|
|
62
|
+
rows.push({ file: t.file, status: 'not_a_rewritable' });
|
|
63
|
+
} else {
|
|
64
|
+
rows.push({ file: t.file, status: 'error', reason: (e && e.subcode) || 'read_error' });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return rows;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Render the rows as a human-readable, aligned inventory. Rewritables become a
|
|
73
|
+
* KIND/TITLE/AFFORDANCES/FILE table; a footer counts rewritables vs. other files
|
|
74
|
+
* and names any errors — nothing is silently dropped (Rule 12).
|
|
75
|
+
*
|
|
76
|
+
* @param {Array<object>} rows
|
|
77
|
+
* @returns {string}
|
|
78
|
+
*/
|
|
79
|
+
export function formatRows(rows) {
|
|
80
|
+
const rwa = rows.filter(r => r.status === 'rewritable');
|
|
81
|
+
const other = rows.filter(r => r.status === 'not_a_rewritable');
|
|
82
|
+
const errors = rows.filter(r => r.status === 'error');
|
|
83
|
+
|
|
84
|
+
const lines = [];
|
|
85
|
+
if (rwa.length) {
|
|
86
|
+
const cells = rwa.map(r => ({
|
|
87
|
+
kind: r.self.kind || '',
|
|
88
|
+
title: r.self.title || '—',
|
|
89
|
+
affordances: r.self.affordances.length ? r.self.affordances.map(a => a.kind).join(',') : '—',
|
|
90
|
+
file: r.file,
|
|
91
|
+
}));
|
|
92
|
+
const head = { kind: 'KIND', title: 'TITLE', affordances: 'AFFORDANCES', file: 'FILE' };
|
|
93
|
+
const w = (k) => Math.max(head[k].length, ...cells.map(c => c[k].length));
|
|
94
|
+
const wk = w('kind'), wt = w('title'), wa = w('affordances');
|
|
95
|
+
const row = (c) => `${c.kind.padEnd(wk)} ${c.title.padEnd(wt)} ${c.affordances.padEnd(wa)} ${c.file}`;
|
|
96
|
+
lines.push(row(head));
|
|
97
|
+
for (const c of cells) lines.push(row(c));
|
|
98
|
+
lines.push('');
|
|
99
|
+
}
|
|
100
|
+
const parts = [`${rwa.length} rewritable${rwa.length === 1 ? '' : 's'}`];
|
|
101
|
+
if (other.length) parts.push(`${other.length} other (${other.map(r => r.file).join(', ')})`);
|
|
102
|
+
if (errors.length) parts.push(`${errors.length} error (${errors.map(r => `${r.file}: ${r.reason}`).join(', ')})`);
|
|
103
|
+
lines.push(parts.join(', '));
|
|
104
|
+
return lines.join('\n');
|
|
105
|
+
}
|