rewritable 0.1.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +263 -5
- package/bin/rwa.mjs +1033 -6
- package/package.json +7 -4
- package/seeds/rewritable.html +6989 -156
- package/src/agent-loop.mjs +155 -0
- package/src/apply-edits.mjs +664 -0
- package/src/atomic-write.mjs +38 -0
- package/src/backend.mjs +43 -0
- package/src/clone-extract.mjs +249 -0
- package/src/clone.mjs +161 -0
- package/src/commands.mjs +207 -11
- package/src/create.mjs +256 -0
- package/src/doc.mjs +69 -0
- package/src/dsl-compiler.mjs +357 -0
- package/src/edit.mjs +300 -0
- package/src/fetch-page.mjs +346 -0
- package/src/host.mjs +126 -0
- package/src/identity.mjs +257 -0
- package/src/import-claude.mjs +360 -0
- package/src/import-vision.mjs +156 -0
- package/src/import.mjs +357 -8
- package/src/ls.mjs +105 -0
- package/src/publish-site.mjs +85 -0
- package/src/publish.mjs +98 -0
- package/src/seed-extract.mjs +40 -0
- package/src/seed.mjs +1399 -6
- package/src/self-contained.mjs +115 -0
- package/src/skill-manifest.mjs +227 -0
- package/src/skin.mjs +350 -0
- package/src/skins.mjs +274 -0
- package/src/template.mjs +109 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
// SSRF-safe page fetcher for `rwa clone <url>`. The fetch layer only — the
|
|
2
|
+
// article extractor and the bootstrap wiring are separate modules.
|
|
3
|
+
//
|
|
4
|
+
// A user (or an agent) can pass any URL, so without guards `rwa clone
|
|
5
|
+
// http://169.254.169.254/…` or `http://127.0.0.1:…` could reach cloud-metadata
|
|
6
|
+
// endpoints or internal services. Defence is in three layers:
|
|
7
|
+
// 1. scheme allowlist (http/https only) — assertFetchableUrl
|
|
8
|
+
// 2. IP-literal classification (block private/etc.) — assertPublicIp
|
|
9
|
+
// 3. DNS-rebinding defence: resolve the hostname and re-classify EVERY
|
|
10
|
+
// resolved address; manual per-hop redirect re-validation (no
|
|
11
|
+
// redirect:'follow' — that would bypass the per-hop checks).
|
|
12
|
+
//
|
|
13
|
+
// Error surface (all exitCode 2 so the CLI maps them to the file/fetch class):
|
|
14
|
+
// subcode: 'bad_scheme', 'blocked_host', 'too_many_redirects', 'http_error',
|
|
15
|
+
// 'not_html', 'too_large', 'fetch_failed'.
|
|
16
|
+
//
|
|
17
|
+
// Mirrors the rigor of the seed bridge SSRF block (redirect:'error' +
|
|
18
|
+
// private-range rejection). Only node: built-ins + global fetch.
|
|
19
|
+
|
|
20
|
+
import { isIP } from 'node:net';
|
|
21
|
+
import { lookup } from 'node:dns/promises';
|
|
22
|
+
|
|
23
|
+
export class CloneError extends Error {
|
|
24
|
+
constructor(exitCode, subcode, details = {}) {
|
|
25
|
+
super(subcode);
|
|
26
|
+
this.exitCode = exitCode;
|
|
27
|
+
this.subcode = subcode;
|
|
28
|
+
this.details = details;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// --- IP classification ------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
// Parse a dotted-quad into four octets, or null if it is not a v4 literal.
|
|
35
|
+
function parseV4(host) {
|
|
36
|
+
if (isIP(host) !== 4) return null;
|
|
37
|
+
const parts = host.split('.').map((p) => Number(p));
|
|
38
|
+
if (parts.length !== 4 || parts.some((n) => !Number.isInteger(n) || n < 0 || n > 255)) return null;
|
|
39
|
+
return parts;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// True if a v4 address falls in any range we refuse to fetch. Categories are
|
|
43
|
+
// returned (not just a boolean) so the error message can name the reason.
|
|
44
|
+
function v4Category([a, b, c]) {
|
|
45
|
+
if (a === 0) return 'unspecified'; // 0.0.0.0/8
|
|
46
|
+
if (a === 10) return 'private'; // 10/8
|
|
47
|
+
if (a === 127) return 'loopback'; // 127/8
|
|
48
|
+
if (a === 169 && b === 254) return 'link-local'; // 169.254/16 (incl. metadata)
|
|
49
|
+
if (a === 172 && b >= 16 && b <= 31) return 'private'; // 172.16/12
|
|
50
|
+
if (a === 192 && b === 168) return 'private'; // 192.168/16
|
|
51
|
+
if (a === 100 && b >= 64 && b <= 127) return 'reserved'; // 100.64/10 CGNAT
|
|
52
|
+
if (a === 192 && b === 0 && c === 2) return 'reserved'; // 192.0.2/24 TEST-NET-1
|
|
53
|
+
if (a === 198 && (b === 18 || b === 19)) return 'reserved'; // 198.18/15 benchmarking
|
|
54
|
+
if (a === 198 && b === 51 && c === 100) return 'reserved'; // 198.51.100/24 TEST-NET-2
|
|
55
|
+
if (a === 203 && b === 0 && c === 113) return 'reserved'; // 203.0.113/24 TEST-NET-3
|
|
56
|
+
if (a === 192 && b === 88 && c === 99) return 'reserved'; // 192.88.99/24 6to4 anycast
|
|
57
|
+
if (a >= 224) return 'reserved'; // 224/4 multicast + 240/4 reserved
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Expand an IPv6 literal into its full 16 bytes, dep-free: handles `::`
|
|
62
|
+
// compression and an embedded dotted-quad tail (::ffff:a.b.c.d / ::a.b.c.d).
|
|
63
|
+
// Returns a 16-element byte array, or null if it does not parse as v6. Operating
|
|
64
|
+
// on bytes (not string regexes) makes the dotted and hex spellings of the same
|
|
65
|
+
// address — e.g. ::ffff:127.0.0.1 and ::ffff:7f00:1 — classify identically.
|
|
66
|
+
function expandV6(host) {
|
|
67
|
+
if (isIP(host) !== 6) return null;
|
|
68
|
+
let s = host.toLowerCase();
|
|
69
|
+
// Split out an embedded IPv4 tail (last group with dots) into two hex groups.
|
|
70
|
+
const dot = s.lastIndexOf(':');
|
|
71
|
+
const tail = s.slice(dot + 1);
|
|
72
|
+
let v4Bytes = null;
|
|
73
|
+
if (tail.includes('.')) {
|
|
74
|
+
const quad = parseV4(tail);
|
|
75
|
+
if (!quad) return null;
|
|
76
|
+
v4Bytes = quad;
|
|
77
|
+
s = s.slice(0, dot + 1); // keep trailing ':' so the group count stays right
|
|
78
|
+
}
|
|
79
|
+
// Split around the `::` compression point (at most one). The length-mismatch
|
|
80
|
+
// and multiple-`::` guards below are belt-and-suspenders — isIP() already
|
|
81
|
+
// rejected malformed literals, but we re-check on raw bytes for defence-in-depth.
|
|
82
|
+
const halves = s.split('::');
|
|
83
|
+
if (halves.length > 2) return null;
|
|
84
|
+
const splitGroups = (part) => (part === '' ? [] : part.split(':').filter((g) => g !== ''));
|
|
85
|
+
const head = splitGroups(halves[0]);
|
|
86
|
+
const tailGroups = halves.length === 2 ? splitGroups(halves[1]) : [];
|
|
87
|
+
// Each remaining group is one 16-bit hex word; the v4 tail (if any) is 2 words.
|
|
88
|
+
const v4Words = v4Bytes ? 2 : 0;
|
|
89
|
+
const words = [];
|
|
90
|
+
for (const g of head) words.push(parseInt(g, 16));
|
|
91
|
+
if (halves.length === 2) {
|
|
92
|
+
const fill = 8 - head.length - tailGroups.length - v4Words;
|
|
93
|
+
if (fill < 0) return null;
|
|
94
|
+
for (let i = 0; i < fill; i++) words.push(0);
|
|
95
|
+
}
|
|
96
|
+
for (const g of tailGroups) words.push(parseInt(g, 16));
|
|
97
|
+
if (words.length !== 8 - v4Words) return null;
|
|
98
|
+
const bytes = [];
|
|
99
|
+
for (const w of words) { bytes.push((w >> 8) & 0xff, w & 0xff); }
|
|
100
|
+
if (v4Bytes) bytes.push(...v4Bytes);
|
|
101
|
+
if (bytes.length !== 16) return null;
|
|
102
|
+
return bytes;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Normalize a v6 literal: returns a category string if it must be blocked, or
|
|
106
|
+
// 'mapped:<v4>' to signal an IPv4-mapped (or -compatible) address whose embedded
|
|
107
|
+
// v4 must be re-checked through the v4 category logic, or null for a public v6.
|
|
108
|
+
function v6Category(host) {
|
|
109
|
+
const b = expandV6(host);
|
|
110
|
+
if (!b) return null;
|
|
111
|
+
const allZeroThrough = (n) => b.slice(0, n).every((x) => x === 0);
|
|
112
|
+
// IPv4-mapped ::ffff:a.b.c.d — first 10 bytes zero, bytes 11-12 = 0xff,0xff.
|
|
113
|
+
if (allZeroThrough(10) && b[10] === 0xff && b[11] === 0xff) {
|
|
114
|
+
return `mapped:${b[12]}.${b[13]}.${b[14]}.${b[15]}`;
|
|
115
|
+
}
|
|
116
|
+
// ::1 loopback / :: unspecified (must come before the v4-compatible check).
|
|
117
|
+
if (allZeroThrough(15) && b[15] === 1) return 'loopback';
|
|
118
|
+
if (b.every((x) => x === 0)) return 'unspecified';
|
|
119
|
+
// Deprecated IPv4-compatible ::a.b.c.d — first 12 bytes zero, low 32 bits a
|
|
120
|
+
// real v4. Re-check the embedded v4 the same way as the mapped form.
|
|
121
|
+
if (allZeroThrough(12)) {
|
|
122
|
+
return `mapped:${b[12]}.${b[13]}.${b[14]}.${b[15]}`;
|
|
123
|
+
}
|
|
124
|
+
// ff00::/8 — IPv6 multicast (mirrors the v4 224/4 block; closes the asymmetry).
|
|
125
|
+
if (b[0] === 0xff) return 'reserved';
|
|
126
|
+
// fc00::/7 — Unique Local Addresses (fc.. and fd..).
|
|
127
|
+
if ((b[0] & 0xfe) === 0xfc) return 'private';
|
|
128
|
+
// fe80::/10 — link-local.
|
|
129
|
+
if (b[0] === 0xfe && (b[1] & 0xc0) === 0x80) return 'link-local';
|
|
130
|
+
// 2001:db8::/32 — documentation range (RFC 3849), never routable.
|
|
131
|
+
if (b[0] === 0x20 && b[1] === 0x01 && b[2] === 0x0d && b[3] === 0xb8) return 'reserved';
|
|
132
|
+
// NAT64 64:ff9b::/96 — bytes 0-1 = 00 64, 2-3 = ff 9b, bytes 4-11 zero, the
|
|
133
|
+
// embedded v4 in bytes 12-15 is reachable through a NAT64 gateway. Re-check it.
|
|
134
|
+
if (b[0] === 0x00 && b[1] === 0x64 && b[2] === 0xff && b[3] === 0x9b &&
|
|
135
|
+
b.slice(4, 12).every((x) => x === 0)) {
|
|
136
|
+
return `mapped:${b[12]}.${b[13]}.${b[14]}.${b[15]}`;
|
|
137
|
+
}
|
|
138
|
+
// 6to4 2002::/16 — bytes 0-1 = 20 02, the embedded v4 is bytes 2-5; reachable
|
|
139
|
+
// through a 6to4 relay. Re-check the embedded v4.
|
|
140
|
+
if (b[0] === 0x20 && b[1] === 0x02) {
|
|
141
|
+
return `mapped:${b[2]}.${b[3]}.${b[4]}.${b[5]}`;
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Classify a single IP literal (v4 or v6). Throws CloneError(blocked_host) for
|
|
147
|
+
// any non-public address; returns silently for a public address. Shared by the
|
|
148
|
+
// sync URL check and the async DNS-rebinding check.
|
|
149
|
+
function assertPublicIp(ip, host = ip) {
|
|
150
|
+
const fam = isIP(ip);
|
|
151
|
+
if (fam === 4) {
|
|
152
|
+
const cat = v4Category(parseV4(ip));
|
|
153
|
+
if (cat) throw new CloneError(2, 'blocked_host', { host, ip, category: cat,
|
|
154
|
+
message: `blocked ${cat} address ${ip}` });
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
if (fam === 6) {
|
|
158
|
+
const cat = v6Category(ip);
|
|
159
|
+
if (cat && cat.startsWith('mapped:')) {
|
|
160
|
+
const v4 = cat.slice('mapped:'.length);
|
|
161
|
+
// [255,255] sentinel: if the embedded quad somehow fails to re-parse, force
|
|
162
|
+
// a blocking category (255 ⇒ a>=224 'reserved') rather than failing open.
|
|
163
|
+
const c4 = v4Category(parseV4(v4) || [255, 255]);
|
|
164
|
+
if (c4) throw new CloneError(2, 'blocked_host', { host, ip, category: c4,
|
|
165
|
+
message: `blocked ${c4} address ${v4} (IPv4-mapped IPv6)` });
|
|
166
|
+
return; // public IPv4-mapped v6
|
|
167
|
+
}
|
|
168
|
+
if (cat) throw new CloneError(2, 'blocked_host', { host, ip, category: cat,
|
|
169
|
+
message: `blocked ${cat} address ${ip}` });
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
// Not an IP literal — caller decides (sync path returns, DNS path won't hit).
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// --- URL gate (sync) --------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
// Strip surrounding brackets from an IPv6 URL hostname.
|
|
178
|
+
function bareHost(hostname) {
|
|
179
|
+
return hostname.startsWith('[') && hostname.endsWith(']')
|
|
180
|
+
? hostname.slice(1, -1)
|
|
181
|
+
: hostname;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Synchronous pre-flight: scheme + IP-literal classification only. DNS is async
|
|
185
|
+
// and lives in fetchPage. Returns the parsed URL on success.
|
|
186
|
+
export function assertFetchableUrl(url) {
|
|
187
|
+
let parsed;
|
|
188
|
+
try {
|
|
189
|
+
parsed = new URL(url);
|
|
190
|
+
} catch {
|
|
191
|
+
throw new CloneError(2, 'bad_scheme', { url, message: 'unparseable URL (no valid scheme)' });
|
|
192
|
+
}
|
|
193
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
194
|
+
throw new CloneError(2, 'bad_scheme', { url, protocol: parsed.protocol,
|
|
195
|
+
message: `unsupported scheme ${parsed.protocol} — only http/https allowed` });
|
|
196
|
+
}
|
|
197
|
+
const host = bareHost(parsed.hostname);
|
|
198
|
+
if (host.toLowerCase() === 'localhost') {
|
|
199
|
+
throw new CloneError(2, 'blocked_host', { host, category: 'loopback',
|
|
200
|
+
message: 'blocked loopback host localhost' });
|
|
201
|
+
}
|
|
202
|
+
if (isIP(host)) assertPublicIp(host);
|
|
203
|
+
return parsed;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// --- fetch (async) ----------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
// Resolve a non-literal hostname and re-classify every resolved address, so a
|
|
209
|
+
// public-looking name that resolves to a private IP (DNS rebinding) is blocked.
|
|
210
|
+
async function assertHostResolvesPublic(host, lookupImpl = lookup) {
|
|
211
|
+
if (isIP(host)) return; // already validated as a literal
|
|
212
|
+
let addrs;
|
|
213
|
+
try {
|
|
214
|
+
addrs = await lookupImpl(host, { all: true });
|
|
215
|
+
} catch (err) {
|
|
216
|
+
throw new CloneError(2, 'fetch_failed', { host, message: `DNS lookup failed: ${err.message}` });
|
|
217
|
+
}
|
|
218
|
+
if (!addrs.length) {
|
|
219
|
+
throw new CloneError(2, 'fetch_failed', { host, message: 'DNS lookup returned no addresses' });
|
|
220
|
+
}
|
|
221
|
+
for (const { address } of addrs) assertPublicIp(address, host);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Shared SSRF-guarded fetch core for fetchPage (HTML) and fetchImageDataUri
|
|
225
|
+
// (images). Validates the URL + every redirect hop (DNS-rebinding re-resolution,
|
|
226
|
+
// never redirect:'follow'), streams with a hard byte cap, and returns the raw
|
|
227
|
+
// bytes + matched mime + final URL. Content-type policy is the CALLER's job
|
|
228
|
+
// (this core is media-agnostic) — the one place the two fetchers differ, plus
|
|
229
|
+
// the `accept` header. Keeping the security machinery here means the image path
|
|
230
|
+
// can never drift from the audited HTML path.
|
|
231
|
+
async function fetchValidatedBytes(url, { maxBytes, timeoutMs, maxRedirects, accept, deps }) {
|
|
232
|
+
const lookupImpl = deps.lookup || lookup;
|
|
233
|
+
const fetchImpl = deps.fetchImpl || fetch;
|
|
234
|
+
|
|
235
|
+
let current = assertFetchableUrl(url);
|
|
236
|
+
await assertHostResolvesPublic(bareHost(current.hostname), lookupImpl);
|
|
237
|
+
|
|
238
|
+
let response;
|
|
239
|
+
for (let hop = 0; ; hop++) {
|
|
240
|
+
try {
|
|
241
|
+
response = await fetchImpl(current.href, {
|
|
242
|
+
redirect: 'manual',
|
|
243
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
244
|
+
headers: {
|
|
245
|
+
'user-agent': 'rwa-clone/1.0 (+https://rewritable.ikangai.com)',
|
|
246
|
+
'accept': accept,
|
|
247
|
+
},
|
|
248
|
+
});
|
|
249
|
+
} catch (err) {
|
|
250
|
+
throw new CloneError(2, 'fetch_failed', { url: current.href, message: err.message });
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// 3xx with a Location → manual per-hop revalidation (NEVER redirect:'follow').
|
|
254
|
+
if (response.status >= 300 && response.status < 400 && response.headers.get('location')) {
|
|
255
|
+
if (hop >= maxRedirects) {
|
|
256
|
+
throw new CloneError(2, 'too_many_redirects', { url: current.href, hops: hop + 1 });
|
|
257
|
+
}
|
|
258
|
+
let next;
|
|
259
|
+
try {
|
|
260
|
+
next = new URL(response.headers.get('location'), current.href);
|
|
261
|
+
} catch {
|
|
262
|
+
throw new CloneError(2, 'fetch_failed', { url: current.href, message: 'malformed redirect Location' });
|
|
263
|
+
}
|
|
264
|
+
current = assertFetchableUrl(next.href);
|
|
265
|
+
await assertHostResolvesPublic(bareHost(current.hostname), lookupImpl);
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
if (!response.ok) {
|
|
272
|
+
throw new CloneError(2, 'http_error', { url: current.href, status: response.status });
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const contentType = response.headers.get('content-type') || '';
|
|
276
|
+
// Match the media type only — an unanchored substring test would wrongly pass
|
|
277
|
+
// e.g. `image/svg+xml; charset=text/html` (a parameter that mentions text/html).
|
|
278
|
+
const mime = contentType.split(';')[0].trim().toLowerCase();
|
|
279
|
+
|
|
280
|
+
// content-length is advisory; we still cap the streamed bytes below.
|
|
281
|
+
const declared = Number(response.headers.get('content-length'));
|
|
282
|
+
if (Number.isFinite(declared) && declared > maxBytes) {
|
|
283
|
+
throw new CloneError(2, 'too_large', { url: current.href, contentLength: declared, maxBytes });
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Stream and cap — a lying or absent content-length cannot exhaust memory.
|
|
287
|
+
if (!response.body) {
|
|
288
|
+
const buf = await response.arrayBuffer();
|
|
289
|
+
if (buf.byteLength > maxBytes) {
|
|
290
|
+
throw new CloneError(2, 'too_large', { url: current.href, maxBytes });
|
|
291
|
+
}
|
|
292
|
+
return { bytes: new Uint8Array(buf), mime, url: current.href };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const reader = response.body.getReader();
|
|
296
|
+
const chunks = [];
|
|
297
|
+
let total = 0;
|
|
298
|
+
try {
|
|
299
|
+
for (;;) {
|
|
300
|
+
const { done, value } = await reader.read();
|
|
301
|
+
if (done) break;
|
|
302
|
+
total += value.byteLength;
|
|
303
|
+
if (total > maxBytes) {
|
|
304
|
+
await reader.cancel();
|
|
305
|
+
throw new CloneError(2, 'too_large', { url: current.href, maxBytes });
|
|
306
|
+
}
|
|
307
|
+
chunks.push(value);
|
|
308
|
+
}
|
|
309
|
+
} catch (err) {
|
|
310
|
+
if (err instanceof CloneError) throw err;
|
|
311
|
+
throw new CloneError(2, 'fetch_failed', { url: current.href, message: err.message });
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
const out = new Uint8Array(total);
|
|
315
|
+
let off = 0;
|
|
316
|
+
for (const c of chunks) { out.set(c, off); off += c.byteLength; }
|
|
317
|
+
return { bytes: out, mime, url: current.href };
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
export async function fetchPage(url, { maxBytes = 3_000_000, timeoutMs = 15000, maxRedirects = 5, deps = {} } = {}) {
|
|
321
|
+
// Injection seam (testing only): defaults are the real node:dns lookup and the
|
|
322
|
+
// global fetch, so the public call signature is unchanged for real callers.
|
|
323
|
+
const { bytes, mime, url: finalUrl } = await fetchValidatedBytes(url, {
|
|
324
|
+
maxBytes, timeoutMs, maxRedirects, accept: 'text/html,application/xhtml+xml', deps,
|
|
325
|
+
});
|
|
326
|
+
if (mime !== 'text/html' && mime !== 'application/xhtml+xml') {
|
|
327
|
+
throw new CloneError(2, 'not_html', { url: finalUrl, contentType: mime });
|
|
328
|
+
}
|
|
329
|
+
return new TextDecoder('utf-8').decode(bytes);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Image localization (rwa clone --localize-images). Fetch ONE image URL through
|
|
333
|
+
// the same SSRF-guarded core and return it as a `data:image/<type>;base64,…`
|
|
334
|
+
// URI, or throw CloneError. image/* only (raster + svg+xml — `<img src>` renders
|
|
335
|
+
// SVG in no-script image mode, the same allowance import.mjs makes). The CLI has
|
|
336
|
+
// no canvas, so bytes are inlined RAW (no recompression) — bounded by maxBytes.
|
|
337
|
+
const IMG_MIME_RE = /^image\/(png|jpeg|gif|webp|avif|svg\+xml|bmp|x-icon|vnd\.microsoft\.icon)$/;
|
|
338
|
+
export async function fetchImageDataUri(url, { maxBytes = 2_000_000, timeoutMs = 15000, maxRedirects = 5, deps = {} } = {}) {
|
|
339
|
+
const { bytes, mime, url: finalUrl } = await fetchValidatedBytes(url, {
|
|
340
|
+
maxBytes, timeoutMs, maxRedirects, accept: 'image/*', deps,
|
|
341
|
+
});
|
|
342
|
+
if (!IMG_MIME_RE.test(mime)) {
|
|
343
|
+
throw new CloneError(2, 'not_image', { url: finalUrl, contentType: mime });
|
|
344
|
+
}
|
|
345
|
+
return `data:${mime};base64,${Buffer.from(bytes).toString('base64')}`;
|
|
346
|
+
}
|
package/src/host.mjs
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// `rwa host <file>` — ingest a local rewritable into a hosted runtime's `POST /r`
|
|
2
|
+
// (service/server.js handleHostedCreate) and print the `{id, token, url}` the
|
|
3
|
+
// server mints. The url carries the capability token in its `#k=` fragment — the
|
|
4
|
+
// only way the user keeps editing the hosted copy — so it is printed verbatim.
|
|
5
|
+
//
|
|
6
|
+
// This is the network-bearing INGEST client (the round-trip-edit foundation),
|
|
7
|
+
// the way `rwa publish` is the ephemeral-share client. Online by design (the
|
|
8
|
+
// offline-first invariant of new/import does not apply to a host action), so —
|
|
9
|
+
// like `clone`/`publish-site` — it is excluded from the offline-first rule.
|
|
10
|
+
//
|
|
11
|
+
// Design parity:
|
|
12
|
+
// - flags-over-env config (--url > $RWA_HOST_URL), nothing baked in — like
|
|
13
|
+
// publish-site's RWA_SITE_*.
|
|
14
|
+
// - injected transport ({transport, env}) so tests run offline — the same
|
|
15
|
+
// deps-seam shape publish-site uses for {execFile, env}. The default
|
|
16
|
+
// transport is a real node:http/node:https POST.
|
|
17
|
+
// - CliError exit codes: 2 file_error (not_found/read_error/not_a_rewritable),
|
|
18
|
+
// 1 config_error (no url), 4 host_error (transport/HTTP failure, carrying the
|
|
19
|
+
// server's status/body verbatim). The bin labels exit 4 `host_error`.
|
|
20
|
+
//
|
|
21
|
+
// Security: only the file bytes are sent — a rewritable carries NO secret (the
|
|
22
|
+
// API key is sessionStorage-only, never in the file). The returned token is
|
|
23
|
+
// surfaced to stdout (the bin) and nowhere else.
|
|
24
|
+
|
|
25
|
+
import { readFile } from 'node:fs/promises';
|
|
26
|
+
import { request as httpRequest } from 'node:http';
|
|
27
|
+
import { request as httpsRequest } from 'node:https';
|
|
28
|
+
import { extractInlineDoc } from './seed.mjs';
|
|
29
|
+
import { CliError } from './edit.mjs';
|
|
30
|
+
|
|
31
|
+
// Default transport: a single POST over node:http / node:https. Returns the raw
|
|
32
|
+
// status + body text; hostFile owns all status/JSON interpretation so the seam
|
|
33
|
+
// stays dumb and the contract lives in one place. Network failures reject — the
|
|
34
|
+
// caller maps them to host_error/network_error.
|
|
35
|
+
//
|
|
36
|
+
// @param {string} url — the full POST target (already includes the /r path)
|
|
37
|
+
// @param {{method:string, headers:object, body:string}} opts
|
|
38
|
+
// @returns {Promise<{status:number, body:string}>}
|
|
39
|
+
function defaultTransport(url, { method, headers, body }) {
|
|
40
|
+
const u = new URL(url);
|
|
41
|
+
const request = u.protocol === 'https:' ? httpsRequest : httpRequest;
|
|
42
|
+
return new Promise((resolve, reject) => {
|
|
43
|
+
const req = request(u, { method, headers }, (res) => {
|
|
44
|
+
const chunks = [];
|
|
45
|
+
res.on('data', (c) => chunks.push(c));
|
|
46
|
+
res.on('end', () => resolve({ status: res.statusCode, body: Buffer.concat(chunks).toString('utf8') }));
|
|
47
|
+
});
|
|
48
|
+
req.on('error', reject);
|
|
49
|
+
if (body != null) req.write(body);
|
|
50
|
+
req.end();
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Read, locally validate, and POST a rewritable's bytes to `<baseUrl>/r`.
|
|
56
|
+
*
|
|
57
|
+
* @param {string} filePath
|
|
58
|
+
* @param {{url?:string, transport?:Function, env?:object}} [deps]
|
|
59
|
+
* url — base url override (flag); falls back to env.RWA_HOST_URL
|
|
60
|
+
* transport— injection seam ((url, opts) => {status, body}); defaults to a
|
|
61
|
+
* real node:http/https POST
|
|
62
|
+
* env — env source (tests inject); defaults to process.env
|
|
63
|
+
* @returns {Promise<{id:string, token:string, url:string}>} the server's 200 object
|
|
64
|
+
* @throws {CliError} 2 file_error · 1 config_error · 4 host_error
|
|
65
|
+
*/
|
|
66
|
+
export async function hostFile(filePath, deps = {}) {
|
|
67
|
+
const env = deps.env || process.env;
|
|
68
|
+
const transport = deps.transport || defaultTransport;
|
|
69
|
+
|
|
70
|
+
// 1. Read — identical CliError file_error surface to publish.mjs / publish-site.mjs.
|
|
71
|
+
let bytes;
|
|
72
|
+
try {
|
|
73
|
+
bytes = await readFile(filePath, 'utf8');
|
|
74
|
+
} catch (e) {
|
|
75
|
+
if (e && e.code === 'ENOENT') throw new CliError(2, 'not_found', { path: filePath });
|
|
76
|
+
throw new CliError(2, 'read_error', { path: filePath, errno: e && e.code, message: e && e.message });
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// 2. Local fail-fast: is this even a rewritable? Same gate as `rwa publish`.
|
|
80
|
+
// The server re-validates authoritatively (it returns 400 not_a_rewritable);
|
|
81
|
+
// this just avoids a wasted round trip and gives an offline-detectable error.
|
|
82
|
+
try {
|
|
83
|
+
extractInlineDoc(bytes);
|
|
84
|
+
} catch {
|
|
85
|
+
throw new CliError(2, 'not_a_rewritable', { path: filePath });
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// 3. Config: flag url > $RWA_HOST_URL; nothing is baked into the package.
|
|
89
|
+
const urlBase = deps.url || env.RWA_HOST_URL;
|
|
90
|
+
if (!urlBase) throw new CliError(1, 'config_error', { missing: ['RWA_HOST_URL'] });
|
|
91
|
+
|
|
92
|
+
// 4. POST the raw bytes to <base>/r. text/html is the honest label for the
|
|
93
|
+
// payload (the server reads the body raw; service/server.js ignores
|
|
94
|
+
// content-type but is honest about what we send).
|
|
95
|
+
const endpoint = `${urlBase.replace(/\/+$/, '')}/r`;
|
|
96
|
+
let res;
|
|
97
|
+
try {
|
|
98
|
+
res = await transport(endpoint, {
|
|
99
|
+
method: 'POST',
|
|
100
|
+
headers: { 'Content-Type': 'text/html; charset=utf-8' },
|
|
101
|
+
body: bytes,
|
|
102
|
+
});
|
|
103
|
+
} catch (e) {
|
|
104
|
+
throw new CliError(4, 'network_error', { url: endpoint, message: (e && e.message) || String(e) });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Body may be empty or non-JSON on some error paths — parse defensively.
|
|
108
|
+
let payload = null;
|
|
109
|
+
if (res.body) { try { payload = JSON.parse(res.body); } catch { payload = null; } }
|
|
110
|
+
|
|
111
|
+
if (res.status === 200) {
|
|
112
|
+
if (!payload || typeof payload.id !== 'string' || typeof payload.token !== 'string' || typeof payload.url !== 'string') {
|
|
113
|
+
throw new CliError(4, 'malformed_success_response', { status: 200 });
|
|
114
|
+
}
|
|
115
|
+
return { id: payload.id, token: payload.token, url: payload.url };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Map the server's error envelope to an honest subcode. Prefer the server's
|
|
119
|
+
// own `error` name when present; carry the status + maxBytes verbatim so the
|
|
120
|
+
// user sees WHY ingest failed.
|
|
121
|
+
const errName = payload && typeof payload.error === 'string' ? payload.error : null;
|
|
122
|
+
if (res.status === 413 || errName === 'body_too_large') {
|
|
123
|
+
throw new CliError(4, 'body_too_large', { maxBytes: payload && payload.maxBytes });
|
|
124
|
+
}
|
|
125
|
+
throw new CliError(4, 'server_error', { status: res.status, error: errName });
|
|
126
|
+
}
|