@q32/signal-scanner 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/COPYING +674 -0
  2. package/COPYING.LESSER +165 -0
  3. package/README.md +57 -9
  4. package/dist/cli.d.ts +26 -0
  5. package/dist/cli.d.ts.map +1 -0
  6. package/dist/cli.js +592 -0
  7. package/dist/cli.js.map +1 -0
  8. package/dist/dynamic.d.ts +43 -0
  9. package/dist/dynamic.d.ts.map +1 -0
  10. package/{src/dynamic.ts → dist/dynamic.js} +133 -156
  11. package/dist/dynamic.js.map +1 -0
  12. package/dist/feeds.d.ts +66 -0
  13. package/dist/feeds.d.ts.map +1 -0
  14. package/dist/feeds.js +259 -0
  15. package/dist/feeds.js.map +1 -0
  16. package/dist/index.d.ts +110 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +1251 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/intel.d.ts +72 -0
  21. package/dist/intel.d.ts.map +1 -0
  22. package/dist/intel.js +480 -0
  23. package/dist/intel.js.map +1 -0
  24. package/dist/node-tls.d.ts +8 -0
  25. package/dist/node-tls.d.ts.map +1 -0
  26. package/dist/node-tls.js +48 -0
  27. package/dist/node-tls.js.map +1 -0
  28. package/dist/render-isolate/entry.d.ts +2 -0
  29. package/dist/render-isolate/entry.d.ts.map +1 -0
  30. package/dist/render-isolate/entry.js +3 -0
  31. package/dist/render-isolate/entry.js.map +1 -0
  32. package/dist/render-isolate/polyfills.d.ts +2 -0
  33. package/dist/render-isolate/polyfills.d.ts.map +1 -0
  34. package/dist/render-isolate/polyfills.js +41 -0
  35. package/dist/render-isolate/polyfills.js.map +1 -0
  36. package/dist/render-isolate/run.d.ts +3 -0
  37. package/dist/render-isolate/run.d.ts.map +1 -0
  38. package/dist/render-isolate/run.js +88 -0
  39. package/dist/render-isolate/run.js.map +1 -0
  40. package/dist/render.d.ts +26 -0
  41. package/dist/render.d.ts.map +1 -0
  42. package/dist/render.js +248 -0
  43. package/dist/render.js.map +1 -0
  44. package/dist/rules/packs/binary.d.ts +4 -0
  45. package/dist/rules/packs/binary.d.ts.map +1 -0
  46. package/dist/rules/packs/binary.js +101 -0
  47. package/dist/rules/packs/binary.js.map +1 -0
  48. package/dist/rules/packs/css.d.ts +3 -0
  49. package/dist/rules/packs/css.d.ts.map +1 -0
  50. package/dist/rules/packs/css.js +43 -0
  51. package/dist/rules/packs/css.js.map +1 -0
  52. package/dist/rules/packs/decoders.d.ts +3 -0
  53. package/dist/rules/packs/decoders.d.ts.map +1 -0
  54. package/dist/rules/packs/decoders.js +46 -0
  55. package/dist/rules/packs/decoders.js.map +1 -0
  56. package/dist/rules/packs/html.d.ts +4 -0
  57. package/dist/rules/packs/html.d.ts.map +1 -0
  58. package/dist/rules/packs/html.js +227 -0
  59. package/dist/rules/packs/html.js.map +1 -0
  60. package/dist/rules/packs/index.d.ts +24 -0
  61. package/dist/rules/packs/index.d.ts.map +1 -0
  62. package/dist/rules/packs/index.js +75 -0
  63. package/dist/rules/packs/index.js.map +1 -0
  64. package/dist/rules/packs/script-risk.d.ts +4 -0
  65. package/dist/rules/packs/script-risk.d.ts.map +1 -0
  66. package/dist/rules/packs/script-risk.js +231 -0
  67. package/dist/rules/packs/script-risk.js.map +1 -0
  68. package/dist/rules/packs/source-code.d.ts +3 -0
  69. package/dist/rules/packs/source-code.d.ts.map +1 -0
  70. package/dist/rules/packs/source-code.js +179 -0
  71. package/dist/rules/packs/source-code.js.map +1 -0
  72. package/dist/rules/packs/urls.d.ts +3 -0
  73. package/dist/rules/packs/urls.d.ts.map +1 -0
  74. package/dist/rules/packs/urls.js +123 -0
  75. package/dist/rules/packs/urls.js.map +1 -0
  76. package/dist/rules/types.d.ts +34 -0
  77. package/dist/rules/types.d.ts.map +1 -0
  78. package/dist/rules/types.js +2 -0
  79. package/dist/rules/types.js.map +1 -0
  80. package/package.json +33 -18
  81. package/scripts/check-coverage.ts +0 -33
  82. package/scripts/eval.ts +0 -311
  83. package/scripts/render-isolate/entry.ts +0 -2
  84. package/scripts/render-isolate/polyfills.ts +0 -33
  85. package/scripts/render-isolate/run.ts +0 -63
  86. package/scripts/scan.ts +0 -612
  87. package/src/feeds.ts +0 -334
  88. package/src/index.ts +0 -1366
  89. package/src/intel.ts +0 -561
  90. package/src/node-tls.ts +0 -55
  91. package/src/render.ts +0 -233
  92. package/src/rules/packs/binary.ts +0 -103
  93. package/src/rules/packs/css.ts +0 -44
  94. package/src/rules/packs/decoders.ts +0 -47
  95. package/src/rules/packs/html.ts +0 -255
  96. package/src/rules/packs/index.ts +0 -76
  97. package/src/rules/packs/script-risk.ts +0 -236
  98. package/src/rules/packs/source-code.ts +0 -180
  99. package/src/rules/packs/urls.ts +0 -138
  100. package/src/rules/types.ts +0 -56
package/scripts/eval.ts DELETED
@@ -1,311 +0,0 @@
1
- // Eval harness: run the homegrown scanner over a labeled corpus of known-good
2
- // and known-bad sites and measure how well it separates them.
3
- //
4
- // npm run eval # reuse cached bad list if fresh (<6h)
5
- // npm run eval -- --refresh # re-pull a fresh live bad list
6
- //
7
- // Known-good is the curated corpus/good.txt. Known-bad is pulled live from
8
- // OpenPhish + URLhaus (they go offline fast), probed for reachability, and
9
- // cached to corpus/.bad-cache.txt. The scan path is CLI heuristics only
10
- // (structural + content + dynamic JS) — NO threat-intel feeds — so this measures
11
- // the homegrown detector's own discriminative power, not feed lookups.
12
-
13
- import { readFile, writeFile } from "node:fs/promises";
14
- import { resolve } from "node:path";
15
- import { crawlTargets, DEFAULT_CRAWL_OPTIONS, type CrawlOptions } from "./scan";
16
- import { dispositionForScore } from "../src/index";
17
-
18
- const FLAG_THRESHOLD = 50; // score >= 50 => product surfaces suspicious/malicious
19
- const TARGET_BAD = 80;
20
- const SITE_CONCURRENCY = 6;
21
- const CACHE_PATH = resolve("corpus/.bad-cache.txt");
22
- const PHISHING_CACHE_PATH = resolve("corpus/.bad-phishing-cache.txt");
23
- const CACHE_TTL_MS = 6 * 60 * 60 * 1000;
24
- const MAX_FP_RATE = 0.05; // gate: at most 5% of good sites may be flagged
25
-
26
- // Bounded per-site crawl: landing page + a shallow hop is enough to judge, and
27
- // keeps a 160-site sweep tractable.
28
- const CRAWL: CrawlOptions = {
29
- ...DEFAULT_CRAWL_OPTIONS,
30
- maxUrls: 10,
31
- maxDepth: 1,
32
- parallel: 4,
33
- robots: false,
34
- timeoutMs: 8000
35
- };
36
-
37
- const BROWSER_UA = DEFAULT_CRAWL_OPTIONS.userAgent;
38
-
39
- interface SiteResult {
40
- url: string;
41
- label: "good" | "bad";
42
- score: number;
43
- disposition: string;
44
- pagesScanned: number;
45
- topFindings: Array<{ ruleId: string; score: number }>;
46
- unreachable: boolean;
47
- }
48
-
49
- // The dynamic-analysis sandbox runs untrusted page JS; a stray rejection or
50
- // throw from one site must never abort a 160-site sweep. Per-site scanning is
51
- // already best-effort, so swallow these and keep going.
52
- process.on("unhandledRejection", () => {});
53
- process.on("uncaughtException", (error) => {
54
- console.error(" (ignored uncaught error from sandbox):", error instanceof Error ? error.message : error);
55
- });
56
-
57
- async function main(): Promise<void> {
58
- const refresh = process.argv.includes("--refresh");
59
- // --phishing pulls a phishing-ONLY bad corpus (OpenPhish + Phishing.Database
60
- // active links, no URLhaus malware binaries) to measure catch rate on
61
- // malicious PAGES — where the web heuristics (credential forms, brand
62
- // impersonation, cloaking) should actually shine.
63
- const phishingOnly = process.argv.includes("--phishing");
64
- // --live uses the curated, hand-verified corpus/phishing-live.txt (real
65
- // credential-capture pages confirmed alive) instead of a noisy feed.
66
- const live = process.argv.includes("--live");
67
- // Egress: set EVAL_PROXY_URL (e.g. an unfiltered residential proxy) so the
68
- // crawl + reachability probe leave via that proxy instead of the local
69
- // network — necessary when an ISP filter (e.g. Spectrum Security Shield)
70
- // intercepts known-malicious URLs and serves a block page, which would
71
- // otherwise make every bad site look benign. The npm script maps it onto
72
- // HTTP(S)_PROXY with NODE_USE_ENV_PROXY=1 (read at startup by node's fetch).
73
- const proxy = process.env.EVAL_PROXY_URL || process.env.HTTPS_PROXY || "";
74
- console.error(`egress: ${proxy ? "proxy " + redactProxy(proxy) : "direct (local network)"}`);
75
-
76
- const good = await loadList("corpus/good.txt");
77
- const bad = live ? await loadList("corpus/phishing-live.txt") : await loadBad(refresh, phishingOnly);
78
- console.error(`corpus: ${good.length} good, ${bad.length} bad (${live ? "curated live" : phishingOnly ? "phishing feed" : "mixed feed"})`);
79
-
80
- const labeled: Array<{ url: string; label: "good" | "bad" }> = [
81
- ...good.map((url) => ({ url, label: "good" as const })),
82
- ...bad.map((url) => ({ url, label: "bad" as const }))
83
- ];
84
-
85
- const results: SiteResult[] = [];
86
- let done = 0;
87
- await pool(labeled, SITE_CONCURRENCY, async ({ url, label }) => {
88
- const result = await scanSite(url, label);
89
- results.push(result);
90
- done += 1;
91
- if (done % 10 === 0) console.error(` scanned ${done}/${labeled.length}`);
92
- });
93
-
94
- report(results);
95
- }
96
-
97
- async function scanSite(url: string, label: "good" | "bad"): Promise<SiteResult> {
98
- try {
99
- const reports = await crawlTargets([url], CRAWL);
100
- const scored = reports.filter((r) => !r.error && r.report);
101
- if (!scored.length) {
102
- return { url, label, score: 0, disposition: "allow", pagesScanned: 0, topFindings: [], unreachable: true };
103
- }
104
- const worst = scored.reduce((a, b) => (b.report.score > a.report.score ? b : a));
105
- const score = worst.report.score;
106
- const topFindings = [...worst.report.findings]
107
- .sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
108
- .slice(0, 3)
109
- .map((f) => ({ ruleId: f.ruleId, score: f.score ?? 0 }));
110
- return { url, label, score, disposition: dispositionForScore(score), pagesScanned: scored.length, topFindings, unreachable: false };
111
- } catch {
112
- return { url, label, score: 0, disposition: "allow", pagesScanned: 0, topFindings: [], unreachable: true };
113
- }
114
- }
115
-
116
- function report(results: SiteResult[]): void {
117
- const reachable = results.filter((r) => !r.unreachable);
118
- const good = reachable.filter((r) => r.label === "good");
119
- const bad = reachable.filter((r) => r.label === "bad");
120
- const flagged = (r: SiteResult) => r.score >= FLAG_THRESHOLD;
121
-
122
- const fp = good.filter(flagged); // good, flagged => false positive
123
- const tn = good.filter((r) => !flagged(r));
124
- const tp = bad.filter(flagged); // bad, flagged => caught
125
- const fn = bad.filter((r) => !flagged(r)); // bad, missed
126
-
127
- const pct = (n: number, d: number) => (d ? `${((100 * n) / d).toFixed(1)}%` : "n/a");
128
-
129
- const proxy = process.env.EVAL_PROXY_URL || process.env.HTTPS_PROXY || "";
130
- console.log("\n================ SCANNER EVAL ================");
131
- console.log(`egress: ${proxy ? "proxy " + redactProxy(proxy) : "direct (local network)"}`);
132
- console.log(`unreachable (excluded): ${results.filter((r) => r.unreachable).length} / ${results.length}`);
133
- console.log(`\nGood sites: ${good.length} reachable`);
134
- console.log(` flagged (FALSE POSITIVE): ${fp.length} [${pct(fp.length, good.length)}]`);
135
- console.log(` clean (true negative): ${tn.length}`);
136
- console.log(`\nBad sites: ${bad.length} reachable`);
137
- console.log(` flagged (caught): ${tp.length} [recall ${pct(tp.length, bad.length)}]`);
138
- console.log(` missed (false negative): ${fn.length}`);
139
-
140
- console.log("\nScore distribution (count by band):");
141
- console.log(` band good bad`);
142
- for (const [lo, hi] of [[0, 9], [10, 24], [25, 49], [50, 74], [75, 100]]) {
143
- const g = good.filter((r) => r.score >= lo && r.score <= hi).length;
144
- const b = bad.filter((r) => r.score >= lo && r.score <= hi).length;
145
- const mark = lo >= FLAG_THRESHOLD ? " <-flag" : "";
146
- console.log(` ${String(lo).padStart(3)}-${String(hi).padEnd(3)} ${String(g).padStart(5)} ${String(b).padStart(5)}${mark}`);
147
- }
148
- console.log(` good: median ${median(good.map((r) => r.score))}, p90 ${percentile(good.map((r) => r.score), 90)}`);
149
- console.log(` bad: median ${median(bad.map((r) => r.score))}, p90 ${percentile(bad.map((r) => r.score), 90)}`);
150
-
151
- if (fp.length) {
152
- console.log("\nFALSE POSITIVES (good sites flagged) — fix these:");
153
- for (const r of fp.sort((a, b) => b.score - a.score)) {
154
- console.log(` [${r.score}] ${r.url} ${r.topFindings.map((f) => `${f.ruleId}(${f.score})`).join(", ")}`);
155
- }
156
- }
157
- if (fn.length) {
158
- console.log("\nMISSED bad sites (score < flag threshold):");
159
- for (const r of fn.sort((a, b) => b.score - a.score).slice(0, 25)) {
160
- console.log(` [${r.score}] ${r.url} ${r.topFindings.map((f) => `${f.ruleId}(${f.score})`).join(", ") || "(no signal)"}`);
161
- }
162
- if (fn.length > 25) console.log(` ... and ${fn.length - 25} more`);
163
- }
164
-
165
- const fpRate = good.length ? fp.length / good.length : 0;
166
- const pass = fpRate <= MAX_FP_RATE;
167
- console.log(`\nGATE: false-positive rate ${pct(fp.length, good.length)} (max ${MAX_FP_RATE * 100}%) => ${pass ? "PASS" : "FAIL"}`);
168
- console.log("=============================================\n");
169
- if (!pass) process.exitCode = 1;
170
- }
171
-
172
- // ---- known-bad corpus (live) --------------------------------------------
173
-
174
- async function loadBad(refresh: boolean, phishingOnly: boolean): Promise<string[]> {
175
- const cachePath = phishingOnly ? PHISHING_CACHE_PATH : CACHE_PATH;
176
- if (!refresh) {
177
- const cached = await readCacheIfFresh(cachePath);
178
- if (cached) {
179
- console.error(`using cached bad list (${cached.length} urls)`);
180
- return cached;
181
- }
182
- }
183
- console.error(`pulling live bad URLs (${phishingOnly ? "phishing-only" : "mixed"}) ...`);
184
- const candidates = shuffle(dedupe(await fetchBadCandidates(phishingOnly)));
185
- console.error(` ${candidates.length} candidates; probing reachability ...`);
186
- const live = await probeReachable(candidates, TARGET_BAD);
187
- await writeFile(cachePath, `# pulled ${new Date().toISOString()}\n${live.join("\n")}\n`, "utf8");
188
- return live;
189
- }
190
-
191
- async function readCacheIfFresh(cachePath: string): Promise<string[] | null> {
192
- try {
193
- const text = await readFile(cachePath, "utf8");
194
- const stamp = text.match(/# pulled (.+)/)?.[1];
195
- if (!stamp || Date.now() - Date.parse(stamp) > CACHE_TTL_MS) return null;
196
- const urls = parseList(text);
197
- return urls.length ? urls : null;
198
- } catch {
199
- return null;
200
- }
201
- }
202
-
203
- async function fetchBadCandidates(phishingOnly: boolean): Promise<string[]> {
204
- const urls: string[] = [];
205
- // OpenPhish community feed (public, ~hundreds of fresh phishing URLs).
206
- try {
207
- const res = await fetch("https://openphish.com/feed.txt", { signal: AbortSignal.timeout(15000) });
208
- if (res.ok) urls.push(...parseList(await res.text()));
209
- } catch (error) {
210
- console.error(" openphish fetch failed:", error instanceof Error ? error.message : error);
211
- }
212
- if (phishingOnly) {
213
- // Phishing.Database active links (public, large list of currently-active
214
- // phishing URLs) — sampled, no auth.
215
- try {
216
- const res = await fetch("https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE.txt", { signal: AbortSignal.timeout(30000) });
217
- if (res.ok) urls.push(...parseList(await res.text()).filter((u) => u.startsWith("http")).slice(0, 4000));
218
- } catch (error) {
219
- console.error(" phishing.database fetch failed:", error instanceof Error ? error.message : error);
220
- }
221
- return urls;
222
- }
223
- // URLhaus online URLs (malware distribution). Auth-Key used if present.
224
- try {
225
- const headers: Record<string, string> = {};
226
- if (process.env.ABUSE_CH_AUTH_KEY) headers["Auth-Key"] = process.env.ABUSE_CH_AUTH_KEY;
227
- const res = await fetch("https://urlhaus.abuse.ch/downloads/csv_online/", { headers, signal: AbortSignal.timeout(20000) });
228
- if (res.ok) {
229
- for (const line of (await res.text()).split("\n")) {
230
- if (line.startsWith("#") || !line.trim()) continue;
231
- const fields = line.split('","').map((f) => f.replace(/^"|"$/g, ""));
232
- if (fields[3] === "online" && fields[2]?.startsWith("http")) urls.push(fields[2]);
233
- }
234
- }
235
- } catch (error) {
236
- console.error(" urlhaus fetch failed:", error instanceof Error ? error.message : error);
237
- }
238
- return urls;
239
- }
240
-
241
- async function probeReachable(candidates: string[], target: number): Promise<string[]> {
242
- const live: string[] = [];
243
- let i = 0;
244
- await pool(candidates, 12, async (url) => {
245
- if (live.length >= target) return;
246
- try {
247
- // A live phishing kit serves real content (200) at the URL itself. A
248
- // taken-down one 404s or 301/302s to a park/block page — exclude those
249
- // (status !== 200) so a dead corpus doesn't dilute recall. No body-size
250
- // gate: a single <script> tag can be a complete phishing page.
251
- const res = await fetch(url, { headers: { "user-agent": BROWSER_UA }, redirect: "manual", signal: AbortSignal.timeout(8000) });
252
- if (res.status === 200 && live.length < target) live.push(url);
253
- } catch {
254
- // dead/unreachable — skip
255
- }
256
- i += 1;
257
- });
258
- return live.slice(0, target);
259
- }
260
-
261
- // ---- helpers -------------------------------------------------------------
262
-
263
- async function loadList(path: string): Promise<string[]> {
264
- return parseList(await readFile(resolve(path), "utf8"));
265
- }
266
- function parseList(text: string): string[] {
267
- return text.split("\n").map((l) => l.trim()).filter((l) => l && !l.startsWith("#"));
268
- }
269
- function dedupe(values: string[]): string[] {
270
- return [...new Set(values)];
271
- }
272
- function redactProxy(url: string): string {
273
- try {
274
- const u = new URL(url);
275
- return `${u.hostname}:${u.port}`;
276
- } catch {
277
- return "set";
278
- }
279
- }
280
- function shuffle<T>(values: T[]): T[] {
281
- // Index-based jitter (no Math.random dependency needed for a rough mix).
282
- return values
283
- .map((v, i) => ({ v, k: (i * 2654435761) % values.length }))
284
- .sort((a, b) => a.k - b.k)
285
- .map((x) => x.v);
286
- }
287
- function median(values: number[]): number {
288
- if (!values.length) return 0;
289
- const s = [...values].sort((a, b) => a - b);
290
- return s[Math.floor(s.length / 2)];
291
- }
292
- function percentile(values: number[], p: number): number {
293
- if (!values.length) return 0;
294
- const s = [...values].sort((a, b) => a - b);
295
- return s[Math.min(s.length - 1, Math.floor((p / 100) * s.length))];
296
- }
297
- async function pool<T>(items: T[], concurrency: number, worker: (item: T) => Promise<void>): Promise<void> {
298
- let index = 0;
299
- const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
300
- while (index < items.length) {
301
- const item = items[index++];
302
- await worker(item);
303
- }
304
- });
305
- await Promise.all(runners);
306
- }
307
-
308
- main().catch((error) => {
309
- console.error(error);
310
- process.exit(1);
311
- });
@@ -1,2 +0,0 @@
1
- import { renderDom } from "../../src/render";
2
- (globalThis as any).__renderAndScan = renderDom;
@@ -1,33 +0,0 @@
1
- // Web globals a bare V8 isolate lacks, installed BEFORE linkedom/render load.
2
- // (self/window are set by the host via context.eval before this bundle runs.)
3
- import "fast-text-encoding";
4
- import base64 from "base-64";
5
- import { URL, URLSearchParams } from "whatwg-url-without-unicode";
6
- const g = globalThis as any;
7
- if (!g.atob) g.atob = (s: string) => base64.decode(String(s));
8
- if (!g.btoa) g.btoa = (s: string) => base64.encode(String(s));
9
- if (!g.URL) g.URL = URL;
10
- if (!g.URLSearchParams) g.URLSearchParams = URLSearchParams;
11
-
12
- // Minimal Buffer shim (linkedom's entity decoder + a few runtime paths use it).
13
- // Built on the globals above; covers from(str|base64|bytes) + toString(enc).
14
- if (!g.Buffer) {
15
- const toBinary = (bytes: Uint8Array): string => {
16
- let out = "";
17
- for (let i = 0; i < bytes.length; i += 8192) out += String.fromCharCode.apply(null, bytes.subarray(i, i + 8192) as unknown as number[]);
18
- return out;
19
- };
20
- g.Buffer = {
21
- from(input: unknown, enc?: string): Uint8Array & { toString: (e?: string) => string } {
22
- let bytes: Uint8Array;
23
- if (input instanceof Uint8Array) bytes = input;
24
- else if (enc === "base64") bytes = Uint8Array.from(g.atob(String(input)), (c: string) => c.charCodeAt(0));
25
- else bytes = new TextEncoder().encode(String(input));
26
- const view = bytes as Uint8Array & { toString: (e?: string) => string };
27
- view.toString = (e?: string) => (e === "binary" || e === "latin1" ? toBinary(bytes) : e === "base64" ? g.btoa(toBinary(bytes)) : new TextDecoder().decode(bytes));
28
- return view;
29
- },
30
- alloc: (n: number) => new Uint8Array(n),
31
- isBuffer: (x: unknown) => x instanceof Uint8Array
32
- };
33
- }
@@ -1,63 +0,0 @@
1
- // CLI executor for renderDom: runs the render bundle inside a real isolated-vm
2
- // isolate. True isolation — the page's untrusted JS gets web-global polyfills and
3
- // a floor-dropping fetch, and cannot reach the host's fetch/process/fs. A fresh
4
- // context per page prevents cross-page contamination; the heavy bundles are
5
- // compiled once.
6
- import { resolve } from "node:path";
7
- import ivm from "isolated-vm";
8
- import { build } from "esbuild";
9
- import type { RenderInput, RenderResult } from "../../src/render";
10
-
11
- const HERE = import.meta.dirname;
12
- const PUNYCODE = resolve(HERE, "../../node_modules/punycode/punycode.es6.js");
13
- const CALL_TIMEOUT_MS = 6000;
14
-
15
- let ready: Promise<{ isolate: ivm.Isolate; poly: ivm.Script; render: ivm.Script }> | null = null;
16
-
17
- async function bundleOnce(entry: string): Promise<string> {
18
- const result = await build({
19
- entryPoints: [resolve(HERE, entry)],
20
- bundle: true,
21
- format: "iife",
22
- platform: "node",
23
- treeShaking: false, // keep polyfill side-effects (sideEffects:false would drop them)
24
- alias: { punycode: PUNYCODE },
25
- write: false
26
- });
27
- return result.outputFiles[0].text;
28
- }
29
-
30
- async function init() {
31
- if (!ready) {
32
- ready = (async () => {
33
- const [polyCode, renderCode] = await Promise.all([bundleOnce("polyfills.ts"), bundleOnce("entry.ts")]);
34
- const isolate = new ivm.Isolate({ memoryLimit: 256 });
35
- const poly = await isolate.compileScript(polyCode);
36
- const render = await isolate.compileScript(renderCode);
37
- return { isolate, poly, render };
38
- })();
39
- }
40
- return ready;
41
- }
42
-
43
- export async function renderInIsolate(input: RenderInput): Promise<RenderResult> {
44
- const { isolate, poly, render } = await init();
45
- const context = await isolate.createContext();
46
- try {
47
- await context.global.set("globalThis", context.global.derefInto());
48
- // self/window must exist before the polyfill bundle (fast-text-encoding
49
- // detects them), and polyfills must run before the render bundle (linkedom's
50
- // entity decoder reads atob/Buffer at module init).
51
- await context.eval("globalThis.self = globalThis; globalThis.window = globalThis;");
52
- await poly.run(context);
53
- await render.run(context);
54
- const out = await context.evalClosure(
55
- "return JSON.stringify(globalThis.__renderAndScan($0))",
56
- [input],
57
- { arguments: { copy: true }, result: { copy: true }, timeout: CALL_TIMEOUT_MS }
58
- );
59
- return JSON.parse(out as string) as RenderResult;
60
- } finally {
61
- context.release();
62
- }
63
- }