@q32/signal-scanner 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/COPYING +674 -0
- package/COPYING.LESSER +165 -0
- package/README.md +57 -9
- package/dist/cli.d.ts +26 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +592 -0
- package/dist/cli.js.map +1 -0
- package/dist/render-isolate/entry.d.ts +2 -0
- package/dist/render-isolate/entry.d.ts.map +1 -0
- package/dist/render-isolate/entry.js +3 -0
- package/dist/render-isolate/entry.js.map +1 -0
- package/dist/render-isolate/polyfills.d.ts +2 -0
- package/dist/render-isolate/polyfills.d.ts.map +1 -0
- package/dist/render-isolate/polyfills.js +41 -0
- package/dist/render-isolate/polyfills.js.map +1 -0
- package/dist/render-isolate/run.d.ts +3 -0
- package/dist/render-isolate/run.d.ts.map +1 -0
- package/dist/render-isolate/run.js +88 -0
- package/dist/render-isolate/run.js.map +1 -0
- package/package.json +19 -8
- package/scripts/check-coverage.ts +0 -33
- package/scripts/eval.ts +0 -311
- package/scripts/render-isolate/entry.ts +0 -2
- package/scripts/render-isolate/polyfills.ts +0 -33
- package/scripts/render-isolate/run.ts +0 -63
- package/scripts/scan.ts +0 -612
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// Web globals a bare V8 isolate lacks, installed BEFORE linkedom/render load.
|
|
2
|
+
// (self/window are set by the host via context.eval before this bundle runs.)
|
|
3
|
+
import "fast-text-encoding";
|
|
4
|
+
import base64 from "base-64";
|
|
5
|
+
import { URL, URLSearchParams } from "whatwg-url-without-unicode";
|
|
6
|
+
const g = globalThis;
|
|
7
|
+
if (!g.atob)
|
|
8
|
+
g.atob = (s) => base64.decode(String(s));
|
|
9
|
+
if (!g.btoa)
|
|
10
|
+
g.btoa = (s) => base64.encode(String(s));
|
|
11
|
+
if (!g.URL)
|
|
12
|
+
g.URL = URL;
|
|
13
|
+
if (!g.URLSearchParams)
|
|
14
|
+
g.URLSearchParams = URLSearchParams;
|
|
15
|
+
// Minimal Buffer shim (linkedom's entity decoder + a few runtime paths use it).
|
|
16
|
+
// Built on the globals above; covers from(str|base64|bytes) + toString(enc).
|
|
17
|
+
if (!g.Buffer) {
|
|
18
|
+
const toBinary = (bytes) => {
|
|
19
|
+
let out = "";
|
|
20
|
+
for (let i = 0; i < bytes.length; i += 8192)
|
|
21
|
+
out += String.fromCharCode.apply(null, bytes.subarray(i, i + 8192));
|
|
22
|
+
return out;
|
|
23
|
+
};
|
|
24
|
+
g.Buffer = {
|
|
25
|
+
from(input, enc) {
|
|
26
|
+
let bytes;
|
|
27
|
+
if (input instanceof Uint8Array)
|
|
28
|
+
bytes = input;
|
|
29
|
+
else if (enc === "base64")
|
|
30
|
+
bytes = Uint8Array.from(g.atob(String(input)), (c) => c.charCodeAt(0));
|
|
31
|
+
else
|
|
32
|
+
bytes = new TextEncoder().encode(String(input));
|
|
33
|
+
const view = bytes;
|
|
34
|
+
view.toString = (e) => (e === "binary" || e === "latin1" ? toBinary(bytes) : e === "base64" ? g.btoa(toBinary(bytes)) : new TextDecoder().decode(bytes));
|
|
35
|
+
return view;
|
|
36
|
+
},
|
|
37
|
+
alloc: (n) => new Uint8Array(n),
|
|
38
|
+
isBuffer: (x) => x instanceof Uint8Array
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=polyfills.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"polyfills.js","sourceRoot":"","sources":["../../src/render-isolate/polyfills.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,8EAA8E;AAC9E,OAAO,oBAAoB,CAAC;AAC5B,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,GAAG,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAClE,MAAM,CAAC,GAAG,UAAiB,CAAC;AAC5B,IAAI,CAAC,CAAC,CAAC,IAAI;IAAE,CAAC,CAAC,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AAC9D,IAAI,CAAC,CAAC,CAAC,IAAI;IAAE,CAAC,CAAC,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AAC9D,IAAI,CAAC,CAAC,CAAC,GAAG;IAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC;AACxB,IAAI,CAAC,CAAC,CAAC,eAAe;IAAE,CAAC,CAAC,eAAe,GAAG,eAAe,CAAC;AAE5D,gFAAgF;AAChF,6EAA6E;AAC7E,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IACd,MAAM,QAAQ,GAAG,CAAC,KAAiB,EAAU,EAAE;QAC7C,IAAI,GAAG,GAAG,EAAE,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI;YAAE,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAwB,CAAC,CAAC;QACxI,OAAO,GAAG,CAAC;IACb,CAAC,CAAC;IACF,CAAC,CAAC,MAAM,GAAG;QACT,IAAI,CAAC,KAAc,EAAE,GAAY;YAC/B,IAAI,KAAiB,CAAC;YACtB,IAAI,KAAK,YAAY,UAAU;gBAAE,KAAK,GAAG,KAAK,CAAC;iBAC1C,IAAI,GAAG,KAAK,QAAQ;gBAAE,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;;gBACrG,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACrD,MAAM,IAAI,GAAG,KAA0D,CAAC;YACxE,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAU,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YAClK,OAAO,IAAI,CAAC;QACd,CAAC;QACD,KAAK,EAAE,CAAC,CAAS,EAAE,EAAE,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC;QACvC,QAAQ,EAAE,CAAC,CAAU,EAAE,EAAE,CAAC,CAAC,YAAY,UAAU;KAClD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../src/render-isolate/run.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAkE9D,wBAAsB,eAAe,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC,CAoB/E"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// CLI executor for renderDom: runs the render bundle inside a real isolated-vm
|
|
2
|
+
// isolate. True isolation — the page's untrusted JS gets web-global polyfills and
|
|
3
|
+
// a floor-dropping fetch, and cannot reach the host's fetch/process/fs. A fresh
|
|
4
|
+
// context per page prevents cross-page contamination; the heavy bundles are
|
|
5
|
+
// compiled once.
|
|
6
|
+
//
|
|
7
|
+
// `isolated-vm` and `esbuild` are optionalDependencies: a base install can still
|
|
8
|
+
// run the static scan/crawl. They are loaded lazily here so the package imports
|
|
9
|
+
// cleanly without them; if absent, renderInIsolate throws and the caller
|
|
10
|
+
// (cli.ts) skips dynamic rendering for that page.
|
|
11
|
+
import { createRequire } from "node:module";
|
|
12
|
+
import { existsSync } from "node:fs";
|
|
13
|
+
import { resolve } from "node:path";
|
|
14
|
+
const HERE = import.meta.dirname;
|
|
15
|
+
const CALL_TIMEOUT_MS = 6000;
|
|
16
|
+
// Punycode lives in a transitive (linkedom/whatwg-url); resolve it through the
|
|
17
|
+
// module graph so it works whether or not node_modules is hoisted.
|
|
18
|
+
const require = createRequire(import.meta.url);
|
|
19
|
+
const PUNYCODE = require.resolve("punycode/punycode.es6.js");
|
|
20
|
+
// Sibling bundle entry: `.js` once compiled into dist/, `.ts` when run from
|
|
21
|
+
// source via tsx. esbuild bundles either.
|
|
22
|
+
function entryPath(base) {
|
|
23
|
+
const js = resolve(HERE, `${base}.js`);
|
|
24
|
+
return existsSync(js) ? js : resolve(HERE, `${base}.ts`);
|
|
25
|
+
}
|
|
26
|
+
let depsWarned = false;
|
|
27
|
+
async function loadDeps() {
|
|
28
|
+
try {
|
|
29
|
+
const [ivmMod, esbuildMod] = await Promise.all([import("isolated-vm"), import("esbuild")]);
|
|
30
|
+
return { ivm: ivmMod.default ?? ivmMod, build: esbuildMod.build };
|
|
31
|
+
}
|
|
32
|
+
catch (error) {
|
|
33
|
+
if (!depsWarned) {
|
|
34
|
+
depsWarned = true;
|
|
35
|
+
console.error("signal-scanner: dynamic rendering is disabled — install the optional " +
|
|
36
|
+
"dependencies `isolated-vm` and `esbuild` to enable it. Static analysis continues.");
|
|
37
|
+
}
|
|
38
|
+
throw error instanceof Error ? error : new Error(String(error));
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// Typed as `any` so the build does not hard-depend on the optional deps' types.
|
|
42
|
+
let ready = null;
|
|
43
|
+
async function bundleOnce(build, base) {
|
|
44
|
+
const result = await build({
|
|
45
|
+
entryPoints: [entryPath(base)],
|
|
46
|
+
bundle: true,
|
|
47
|
+
format: "iife",
|
|
48
|
+
platform: "node",
|
|
49
|
+
treeShaking: false, // keep polyfill side-effects (sideEffects:false would drop them)
|
|
50
|
+
alias: { punycode: PUNYCODE },
|
|
51
|
+
write: false
|
|
52
|
+
});
|
|
53
|
+
return result.outputFiles[0].text;
|
|
54
|
+
}
|
|
55
|
+
async function init() {
|
|
56
|
+
if (!ready) {
|
|
57
|
+
ready = (async () => {
|
|
58
|
+
const { ivm, build } = await loadDeps();
|
|
59
|
+
const [polyCode, renderCode] = await Promise.all([bundleOnce(build, "polyfills"), bundleOnce(build, "entry")]);
|
|
60
|
+
const isolate = new ivm.Isolate({ memoryLimit: 256 });
|
|
61
|
+
const poly = await isolate.compileScript(polyCode);
|
|
62
|
+
const render = await isolate.compileScript(renderCode);
|
|
63
|
+
return { isolate, poly, render };
|
|
64
|
+
})();
|
|
65
|
+
// A failed init must not poison every later call (e.g. transient bundle error).
|
|
66
|
+
ready.catch(() => { ready = null; });
|
|
67
|
+
}
|
|
68
|
+
return ready;
|
|
69
|
+
}
|
|
70
|
+
export async function renderInIsolate(input) {
|
|
71
|
+
const { isolate, poly, render } = await init();
|
|
72
|
+
const context = await isolate.createContext();
|
|
73
|
+
try {
|
|
74
|
+
await context.global.set("globalThis", context.global.derefInto());
|
|
75
|
+
// self/window must exist before the polyfill bundle (fast-text-encoding
|
|
76
|
+
// detects them), and polyfills must run before the render bundle (linkedom's
|
|
77
|
+
// entity decoder reads atob/Buffer at module init).
|
|
78
|
+
await context.eval("globalThis.self = globalThis; globalThis.window = globalThis;");
|
|
79
|
+
await poly.run(context);
|
|
80
|
+
await render.run(context);
|
|
81
|
+
const out = await context.evalClosure("return JSON.stringify(globalThis.__renderAndScan($0))", [input], { arguments: { copy: true }, result: { copy: true }, timeout: CALL_TIMEOUT_MS });
|
|
82
|
+
return JSON.parse(out);
|
|
83
|
+
}
|
|
84
|
+
finally {
|
|
85
|
+
context.release();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=run.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.js","sourceRoot":"","sources":["../../src/render-isolate/run.ts"],"names":[],"mappings":"AAAA,+EAA+E;AAC/E,kFAAkF;AAClF,gFAAgF;AAChF,4EAA4E;AAC5E,iBAAiB;AACjB,EAAE;AACF,iFAAiF;AACjF,gFAAgF;AAChF,yEAAyE;AACzE,kDAAkD;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;AACjC,MAAM,eAAe,GAAG,IAAI,CAAC;AAE7B,+EAA+E;AAC/E,mEAAmE;AACnE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,0BAA0B,CAAC,CAAC;AAE7D,4EAA4E;AAC5E,0CAA0C;AAC1C,SAAS,SAAS,CAAC,IAAY;IAC7B,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,EAAE,GAAG,IAAI,KAAK,CAAC,CAAC;IACvC,OAAO,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,IAAI,KAAK,CAAC,CAAC;AAC3D,CAAC;AAED,IAAI,UAAU,GAAG,KAAK,CAAC;AACvB,KAAK,UAAU,QAAQ;IACrB,IAAI,CAAC;QACH,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAC3F,OAAO,EAAE,GAAG,EAAE,MAAM,CAAC,OAAO,IAAI,MAAM,EAAE,KAAK,EAAG,UAAkB,CAAC,KAAK,EAAE,CAAC;IAC7E,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,UAAU,GAAG,IAAI,CAAC;YAClB,OAAO,CAAC,KAAK,CACX,uEAAuE;gBACrE,mFAAmF,CACtF,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;IAClE,CAAC;AACH,CAAC;AAED,gFAAgF;AAChF,IAAI,KAAK,GAA6D,IAAI,CAAC;AAE3E,KAAK,UAAU,UAAU,CAAC,KAAU,EAAE,IAAY;IAChD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC;QACzB,WAAW,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC9B,MAAM,EAAE,IAAI;QACZ,MAAM,EAAE,MAAM;QACd,QAAQ,EAAE,MAAM;QAChB,WAAW,EAAE,KAAK,EAAE,iEAAiE;QACrF,KAAK,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE;QAC7B,KAAK,EAAE,KAAK;KACb,CAAC,CAAC;IACH,OAAO,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACpC,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,KAAK,GAAG,CAAC,KAAK,IAAI,EAAE;YAClB,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,MAAM,QAAQ,EAAE,CAAC;YACxC,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,WAAW,CAAC,EAAE,UAAU,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;YAC/G,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,CAAC,CAAC;YACtD,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YACnD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;YACvD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QACnC,CAAC,CAAC,EAAE,CAAC;QACL,gFAAgF;QAChF,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,KAAkB;IACtD,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,EAAE,CAAC;IAC/C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,aAAa,EAAE,CAAC;IAC9C,IAAI,CAAC;QACH,MAAM,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QACnE,wEAAwE;QACxE,6EAA6E;QAC7E,oDAAoD;QACpD,MAAM,OAAO,CAAC,IAAI,CAAC,+DAA+D,CAAC,CAAC;QACpF,MAAM,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACxB,MAAM,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,WAAW,CACnC,uDAAuD,EACvD,CAAC,KAAK,CAAC,EACP,EAAE,SAAS,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,eAAe,EAAE,CAChF,CAAC;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,GAAa,CAAiB,CAAC;IACnD,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,OAAO,EAAE,CAAC;IACpB,CAAC;AACH,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,25 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@q32/signal-scanner",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Static web signal scanner with bounded streaming analyzers, URL extraction, rule packs, scoring, and normalized reports.",
|
|
6
|
-
"license": "
|
|
6
|
+
"license": "LGPL-3.0-or-later",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
9
9
|
"url": "git+https://github.com/q32llc/signal-scanner.git"
|
|
10
10
|
},
|
|
11
|
+
"engines": {
|
|
12
|
+
"node": ">=20.11"
|
|
13
|
+
},
|
|
14
|
+
"bin": {
|
|
15
|
+
"signal-scanner": "dist/cli.js"
|
|
16
|
+
},
|
|
11
17
|
"files": [
|
|
12
18
|
"README.md",
|
|
13
|
-
"
|
|
14
|
-
"
|
|
19
|
+
"COPYING",
|
|
20
|
+
"COPYING.LESSER",
|
|
21
|
+
"dist/"
|
|
15
22
|
],
|
|
16
23
|
"scripts": {
|
|
17
24
|
"build": "tsc -p tsconfig.json",
|
|
18
25
|
"prepack": "npm run build",
|
|
19
26
|
"test": "bun test",
|
|
27
|
+
"test:isolate": "npm run build && node --test test/integration/isolate.mjs",
|
|
20
28
|
"coverage": "bun test --coverage --coverage-reporter=lcov --coverage-dir=coverage && bun scripts/check-coverage.ts coverage/lcov.info 80",
|
|
21
29
|
"coverage:report": "bun test --coverage",
|
|
22
|
-
"scan": "tsx
|
|
30
|
+
"scan": "tsx src/cli.ts",
|
|
23
31
|
"eval": "NODE_USE_ENV_PROXY=1 HTTP_PROXY=\"${EVAL_PROXY_URL-}\" HTTPS_PROXY=\"${EVAL_PROXY_URL-}\" tsx --env-file-if-exists=.env scripts/eval.ts"
|
|
24
32
|
},
|
|
25
33
|
"exports": {
|
|
@@ -53,14 +61,17 @@
|
|
|
53
61
|
"htmlparser2": "^10.1.0",
|
|
54
62
|
"linkedom": "^0.18.12"
|
|
55
63
|
},
|
|
56
|
-
"
|
|
64
|
+
"optionalDependencies": {
|
|
57
65
|
"base-64": "^1.0.0",
|
|
58
66
|
"buffer": "^6.0.3",
|
|
59
67
|
"esbuild": "^0.28.0",
|
|
60
68
|
"fast-text-encoding": "^1.0.6",
|
|
61
69
|
"isolated-vm": "^6.1.2",
|
|
62
|
-
"
|
|
63
|
-
"@types/node": "^24.0.0",
|
|
70
|
+
"punycode": "^2.3.1",
|
|
64
71
|
"whatwg-url-without-unicode": "^8.0.0-3"
|
|
72
|
+
},
|
|
73
|
+
"devDependencies": {
|
|
74
|
+
"@types/node": "^24.0.0",
|
|
75
|
+
"typescript": "^5.9.3"
|
|
65
76
|
}
|
|
66
77
|
}
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import { readFileSync } from "node:fs";
|
|
2
|
-
|
|
3
|
-
const [, , lcovPath, thresholdArg] = process.argv;
|
|
4
|
-
const threshold = Number(thresholdArg ?? 80);
|
|
5
|
-
|
|
6
|
-
if (!lcovPath || !Number.isFinite(threshold)) {
|
|
7
|
-
console.error("Usage: bun scripts/check-coverage.ts <lcov.info> <line-threshold-percent>");
|
|
8
|
-
process.exit(2);
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
const lcov = readFileSync(lcovPath, "utf8");
|
|
12
|
-
let found = 0;
|
|
13
|
-
let hit = 0;
|
|
14
|
-
|
|
15
|
-
for (const line of lcov.split(/\r?\n/)) {
|
|
16
|
-
if (line.startsWith("LF:")) found += Number(line.slice(3));
|
|
17
|
-
else if (line.startsWith("LH:")) hit += Number(line.slice(3));
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
if (!found) {
|
|
21
|
-
console.error(`No line coverage data found in ${lcovPath}`);
|
|
22
|
-
process.exit(2);
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
const pct = (hit / found) * 100;
|
|
26
|
-
const display = pct.toFixed(2);
|
|
27
|
-
|
|
28
|
-
if (pct < threshold) {
|
|
29
|
-
console.error(`Line coverage ${display}% is below required ${threshold}%`);
|
|
30
|
-
process.exit(1);
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
console.log(`Line coverage ${display}% meets required ${threshold}%`);
|
package/scripts/eval.ts
DELETED
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
// Eval harness: run the homegrown scanner over a labeled corpus of known-good
|
|
2
|
-
// and known-bad sites and measure how well it separates them.
|
|
3
|
-
//
|
|
4
|
-
// npm run eval # reuse cached bad list if fresh (<6h)
|
|
5
|
-
// npm run eval -- --refresh # re-pull a fresh live bad list
|
|
6
|
-
//
|
|
7
|
-
// Known-good is the curated corpus/good.txt. Known-bad is pulled live from
|
|
8
|
-
// OpenPhish + URLhaus (they go offline fast), probed for reachability, and
|
|
9
|
-
// cached to corpus/.bad-cache.txt. The scan path is CLI heuristics only
|
|
10
|
-
// (structural + content + dynamic JS) — NO threat-intel feeds — so this measures
|
|
11
|
-
// the homegrown detector's own discriminative power, not feed lookups.
|
|
12
|
-
|
|
13
|
-
import { readFile, writeFile } from "node:fs/promises";
|
|
14
|
-
import { resolve } from "node:path";
|
|
15
|
-
import { crawlTargets, DEFAULT_CRAWL_OPTIONS, type CrawlOptions } from "./scan";
|
|
16
|
-
import { dispositionForScore } from "../src/index";
|
|
17
|
-
|
|
18
|
-
const FLAG_THRESHOLD = 50; // score >= 50 => product surfaces suspicious/malicious
|
|
19
|
-
const TARGET_BAD = 80;
|
|
20
|
-
const SITE_CONCURRENCY = 6;
|
|
21
|
-
const CACHE_PATH = resolve("corpus/.bad-cache.txt");
|
|
22
|
-
const PHISHING_CACHE_PATH = resolve("corpus/.bad-phishing-cache.txt");
|
|
23
|
-
const CACHE_TTL_MS = 6 * 60 * 60 * 1000;
|
|
24
|
-
const MAX_FP_RATE = 0.05; // gate: at most 5% of good sites may be flagged
|
|
25
|
-
|
|
26
|
-
// Bounded per-site crawl: landing page + a shallow hop is enough to judge, and
|
|
27
|
-
// keeps a 160-site sweep tractable.
|
|
28
|
-
const CRAWL: CrawlOptions = {
|
|
29
|
-
...DEFAULT_CRAWL_OPTIONS,
|
|
30
|
-
maxUrls: 10,
|
|
31
|
-
maxDepth: 1,
|
|
32
|
-
parallel: 4,
|
|
33
|
-
robots: false,
|
|
34
|
-
timeoutMs: 8000
|
|
35
|
-
};
|
|
36
|
-
|
|
37
|
-
const BROWSER_UA = DEFAULT_CRAWL_OPTIONS.userAgent;
|
|
38
|
-
|
|
39
|
-
interface SiteResult {
|
|
40
|
-
url: string;
|
|
41
|
-
label: "good" | "bad";
|
|
42
|
-
score: number;
|
|
43
|
-
disposition: string;
|
|
44
|
-
pagesScanned: number;
|
|
45
|
-
topFindings: Array<{ ruleId: string; score: number }>;
|
|
46
|
-
unreachable: boolean;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// The dynamic-analysis sandbox runs untrusted page JS; a stray rejection or
|
|
50
|
-
// throw from one site must never abort a 160-site sweep. Per-site scanning is
|
|
51
|
-
// already best-effort, so swallow these and keep going.
|
|
52
|
-
process.on("unhandledRejection", () => {});
|
|
53
|
-
process.on("uncaughtException", (error) => {
|
|
54
|
-
console.error(" (ignored uncaught error from sandbox):", error instanceof Error ? error.message : error);
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
async function main(): Promise<void> {
|
|
58
|
-
const refresh = process.argv.includes("--refresh");
|
|
59
|
-
// --phishing pulls a phishing-ONLY bad corpus (OpenPhish + Phishing.Database
|
|
60
|
-
// active links, no URLhaus malware binaries) to measure catch rate on
|
|
61
|
-
// malicious PAGES — where the web heuristics (credential forms, brand
|
|
62
|
-
// impersonation, cloaking) should actually shine.
|
|
63
|
-
const phishingOnly = process.argv.includes("--phishing");
|
|
64
|
-
// --live uses the curated, hand-verified corpus/phishing-live.txt (real
|
|
65
|
-
// credential-capture pages confirmed alive) instead of a noisy feed.
|
|
66
|
-
const live = process.argv.includes("--live");
|
|
67
|
-
// Egress: set EVAL_PROXY_URL (e.g. an unfiltered residential proxy) so the
|
|
68
|
-
// crawl + reachability probe leave via that proxy instead of the local
|
|
69
|
-
// network — necessary when an ISP filter (e.g. Spectrum Security Shield)
|
|
70
|
-
// intercepts known-malicious URLs and serves a block page, which would
|
|
71
|
-
// otherwise make every bad site look benign. The npm script maps it onto
|
|
72
|
-
// HTTP(S)_PROXY with NODE_USE_ENV_PROXY=1 (read at startup by node's fetch).
|
|
73
|
-
const proxy = process.env.EVAL_PROXY_URL || process.env.HTTPS_PROXY || "";
|
|
74
|
-
console.error(`egress: ${proxy ? "proxy " + redactProxy(proxy) : "direct (local network)"}`);
|
|
75
|
-
|
|
76
|
-
const good = await loadList("corpus/good.txt");
|
|
77
|
-
const bad = live ? await loadList("corpus/phishing-live.txt") : await loadBad(refresh, phishingOnly);
|
|
78
|
-
console.error(`corpus: ${good.length} good, ${bad.length} bad (${live ? "curated live" : phishingOnly ? "phishing feed" : "mixed feed"})`);
|
|
79
|
-
|
|
80
|
-
const labeled: Array<{ url: string; label: "good" | "bad" }> = [
|
|
81
|
-
...good.map((url) => ({ url, label: "good" as const })),
|
|
82
|
-
...bad.map((url) => ({ url, label: "bad" as const }))
|
|
83
|
-
];
|
|
84
|
-
|
|
85
|
-
const results: SiteResult[] = [];
|
|
86
|
-
let done = 0;
|
|
87
|
-
await pool(labeled, SITE_CONCURRENCY, async ({ url, label }) => {
|
|
88
|
-
const result = await scanSite(url, label);
|
|
89
|
-
results.push(result);
|
|
90
|
-
done += 1;
|
|
91
|
-
if (done % 10 === 0) console.error(` scanned ${done}/${labeled.length}`);
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
report(results);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
async function scanSite(url: string, label: "good" | "bad"): Promise<SiteResult> {
|
|
98
|
-
try {
|
|
99
|
-
const reports = await crawlTargets([url], CRAWL);
|
|
100
|
-
const scored = reports.filter((r) => !r.error && r.report);
|
|
101
|
-
if (!scored.length) {
|
|
102
|
-
return { url, label, score: 0, disposition: "allow", pagesScanned: 0, topFindings: [], unreachable: true };
|
|
103
|
-
}
|
|
104
|
-
const worst = scored.reduce((a, b) => (b.report.score > a.report.score ? b : a));
|
|
105
|
-
const score = worst.report.score;
|
|
106
|
-
const topFindings = [...worst.report.findings]
|
|
107
|
-
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
|
|
108
|
-
.slice(0, 3)
|
|
109
|
-
.map((f) => ({ ruleId: f.ruleId, score: f.score ?? 0 }));
|
|
110
|
-
return { url, label, score, disposition: dispositionForScore(score), pagesScanned: scored.length, topFindings, unreachable: false };
|
|
111
|
-
} catch {
|
|
112
|
-
return { url, label, score: 0, disposition: "allow", pagesScanned: 0, topFindings: [], unreachable: true };
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
function report(results: SiteResult[]): void {
|
|
117
|
-
const reachable = results.filter((r) => !r.unreachable);
|
|
118
|
-
const good = reachable.filter((r) => r.label === "good");
|
|
119
|
-
const bad = reachable.filter((r) => r.label === "bad");
|
|
120
|
-
const flagged = (r: SiteResult) => r.score >= FLAG_THRESHOLD;
|
|
121
|
-
|
|
122
|
-
const fp = good.filter(flagged); // good, flagged => false positive
|
|
123
|
-
const tn = good.filter((r) => !flagged(r));
|
|
124
|
-
const tp = bad.filter(flagged); // bad, flagged => caught
|
|
125
|
-
const fn = bad.filter((r) => !flagged(r)); // bad, missed
|
|
126
|
-
|
|
127
|
-
const pct = (n: number, d: number) => (d ? `${((100 * n) / d).toFixed(1)}%` : "n/a");
|
|
128
|
-
|
|
129
|
-
const proxy = process.env.EVAL_PROXY_URL || process.env.HTTPS_PROXY || "";
|
|
130
|
-
console.log("\n================ SCANNER EVAL ================");
|
|
131
|
-
console.log(`egress: ${proxy ? "proxy " + redactProxy(proxy) : "direct (local network)"}`);
|
|
132
|
-
console.log(`unreachable (excluded): ${results.filter((r) => r.unreachable).length} / ${results.length}`);
|
|
133
|
-
console.log(`\nGood sites: ${good.length} reachable`);
|
|
134
|
-
console.log(` flagged (FALSE POSITIVE): ${fp.length} [${pct(fp.length, good.length)}]`);
|
|
135
|
-
console.log(` clean (true negative): ${tn.length}`);
|
|
136
|
-
console.log(`\nBad sites: ${bad.length} reachable`);
|
|
137
|
-
console.log(` flagged (caught): ${tp.length} [recall ${pct(tp.length, bad.length)}]`);
|
|
138
|
-
console.log(` missed (false negative): ${fn.length}`);
|
|
139
|
-
|
|
140
|
-
console.log("\nScore distribution (count by band):");
|
|
141
|
-
console.log(` band good bad`);
|
|
142
|
-
for (const [lo, hi] of [[0, 9], [10, 24], [25, 49], [50, 74], [75, 100]]) {
|
|
143
|
-
const g = good.filter((r) => r.score >= lo && r.score <= hi).length;
|
|
144
|
-
const b = bad.filter((r) => r.score >= lo && r.score <= hi).length;
|
|
145
|
-
const mark = lo >= FLAG_THRESHOLD ? " <-flag" : "";
|
|
146
|
-
console.log(` ${String(lo).padStart(3)}-${String(hi).padEnd(3)} ${String(g).padStart(5)} ${String(b).padStart(5)}${mark}`);
|
|
147
|
-
}
|
|
148
|
-
console.log(` good: median ${median(good.map((r) => r.score))}, p90 ${percentile(good.map((r) => r.score), 90)}`);
|
|
149
|
-
console.log(` bad: median ${median(bad.map((r) => r.score))}, p90 ${percentile(bad.map((r) => r.score), 90)}`);
|
|
150
|
-
|
|
151
|
-
if (fp.length) {
|
|
152
|
-
console.log("\nFALSE POSITIVES (good sites flagged) — fix these:");
|
|
153
|
-
for (const r of fp.sort((a, b) => b.score - a.score)) {
|
|
154
|
-
console.log(` [${r.score}] ${r.url} ${r.topFindings.map((f) => `${f.ruleId}(${f.score})`).join(", ")}`);
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
if (fn.length) {
|
|
158
|
-
console.log("\nMISSED bad sites (score < flag threshold):");
|
|
159
|
-
for (const r of fn.sort((a, b) => b.score - a.score).slice(0, 25)) {
|
|
160
|
-
console.log(` [${r.score}] ${r.url} ${r.topFindings.map((f) => `${f.ruleId}(${f.score})`).join(", ") || "(no signal)"}`);
|
|
161
|
-
}
|
|
162
|
-
if (fn.length > 25) console.log(` ... and ${fn.length - 25} more`);
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const fpRate = good.length ? fp.length / good.length : 0;
|
|
166
|
-
const pass = fpRate <= MAX_FP_RATE;
|
|
167
|
-
console.log(`\nGATE: false-positive rate ${pct(fp.length, good.length)} (max ${MAX_FP_RATE * 100}%) => ${pass ? "PASS" : "FAIL"}`);
|
|
168
|
-
console.log("=============================================\n");
|
|
169
|
-
if (!pass) process.exitCode = 1;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
// ---- known-bad corpus (live) --------------------------------------------
|
|
173
|
-
|
|
174
|
-
async function loadBad(refresh: boolean, phishingOnly: boolean): Promise<string[]> {
|
|
175
|
-
const cachePath = phishingOnly ? PHISHING_CACHE_PATH : CACHE_PATH;
|
|
176
|
-
if (!refresh) {
|
|
177
|
-
const cached = await readCacheIfFresh(cachePath);
|
|
178
|
-
if (cached) {
|
|
179
|
-
console.error(`using cached bad list (${cached.length} urls)`);
|
|
180
|
-
return cached;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
console.error(`pulling live bad URLs (${phishingOnly ? "phishing-only" : "mixed"}) ...`);
|
|
184
|
-
const candidates = shuffle(dedupe(await fetchBadCandidates(phishingOnly)));
|
|
185
|
-
console.error(` ${candidates.length} candidates; probing reachability ...`);
|
|
186
|
-
const live = await probeReachable(candidates, TARGET_BAD);
|
|
187
|
-
await writeFile(cachePath, `# pulled ${new Date().toISOString()}\n${live.join("\n")}\n`, "utf8");
|
|
188
|
-
return live;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
async function readCacheIfFresh(cachePath: string): Promise<string[] | null> {
|
|
192
|
-
try {
|
|
193
|
-
const text = await readFile(cachePath, "utf8");
|
|
194
|
-
const stamp = text.match(/# pulled (.+)/)?.[1];
|
|
195
|
-
if (!stamp || Date.now() - Date.parse(stamp) > CACHE_TTL_MS) return null;
|
|
196
|
-
const urls = parseList(text);
|
|
197
|
-
return urls.length ? urls : null;
|
|
198
|
-
} catch {
|
|
199
|
-
return null;
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
async function fetchBadCandidates(phishingOnly: boolean): Promise<string[]> {
|
|
204
|
-
const urls: string[] = [];
|
|
205
|
-
// OpenPhish community feed (public, ~hundreds of fresh phishing URLs).
|
|
206
|
-
try {
|
|
207
|
-
const res = await fetch("https://openphish.com/feed.txt", { signal: AbortSignal.timeout(15000) });
|
|
208
|
-
if (res.ok) urls.push(...parseList(await res.text()));
|
|
209
|
-
} catch (error) {
|
|
210
|
-
console.error(" openphish fetch failed:", error instanceof Error ? error.message : error);
|
|
211
|
-
}
|
|
212
|
-
if (phishingOnly) {
|
|
213
|
-
// Phishing.Database active links (public, large list of currently-active
|
|
214
|
-
// phishing URLs) — sampled, no auth.
|
|
215
|
-
try {
|
|
216
|
-
const res = await fetch("https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE.txt", { signal: AbortSignal.timeout(30000) });
|
|
217
|
-
if (res.ok) urls.push(...parseList(await res.text()).filter((u) => u.startsWith("http")).slice(0, 4000));
|
|
218
|
-
} catch (error) {
|
|
219
|
-
console.error(" phishing.database fetch failed:", error instanceof Error ? error.message : error);
|
|
220
|
-
}
|
|
221
|
-
return urls;
|
|
222
|
-
}
|
|
223
|
-
// URLhaus online URLs (malware distribution). Auth-Key used if present.
|
|
224
|
-
try {
|
|
225
|
-
const headers: Record<string, string> = {};
|
|
226
|
-
if (process.env.ABUSE_CH_AUTH_KEY) headers["Auth-Key"] = process.env.ABUSE_CH_AUTH_KEY;
|
|
227
|
-
const res = await fetch("https://urlhaus.abuse.ch/downloads/csv_online/", { headers, signal: AbortSignal.timeout(20000) });
|
|
228
|
-
if (res.ok) {
|
|
229
|
-
for (const line of (await res.text()).split("\n")) {
|
|
230
|
-
if (line.startsWith("#") || !line.trim()) continue;
|
|
231
|
-
const fields = line.split('","').map((f) => f.replace(/^"|"$/g, ""));
|
|
232
|
-
if (fields[3] === "online" && fields[2]?.startsWith("http")) urls.push(fields[2]);
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
} catch (error) {
|
|
236
|
-
console.error(" urlhaus fetch failed:", error instanceof Error ? error.message : error);
|
|
237
|
-
}
|
|
238
|
-
return urls;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
async function probeReachable(candidates: string[], target: number): Promise<string[]> {
|
|
242
|
-
const live: string[] = [];
|
|
243
|
-
let i = 0;
|
|
244
|
-
await pool(candidates, 12, async (url) => {
|
|
245
|
-
if (live.length >= target) return;
|
|
246
|
-
try {
|
|
247
|
-
// A live phishing kit serves real content (200) at the URL itself. A
|
|
248
|
-
// taken-down one 404s or 301/302s to a park/block page — exclude those
|
|
249
|
-
// (status !== 200) so a dead corpus doesn't dilute recall. No body-size
|
|
250
|
-
// gate: a single <script> tag can be a complete phishing page.
|
|
251
|
-
const res = await fetch(url, { headers: { "user-agent": BROWSER_UA }, redirect: "manual", signal: AbortSignal.timeout(8000) });
|
|
252
|
-
if (res.status === 200 && live.length < target) live.push(url);
|
|
253
|
-
} catch {
|
|
254
|
-
// dead/unreachable — skip
|
|
255
|
-
}
|
|
256
|
-
i += 1;
|
|
257
|
-
});
|
|
258
|
-
return live.slice(0, target);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// ---- helpers -------------------------------------------------------------
|
|
262
|
-
|
|
263
|
-
async function loadList(path: string): Promise<string[]> {
|
|
264
|
-
return parseList(await readFile(resolve(path), "utf8"));
|
|
265
|
-
}
|
|
266
|
-
function parseList(text: string): string[] {
|
|
267
|
-
return text.split("\n").map((l) => l.trim()).filter((l) => l && !l.startsWith("#"));
|
|
268
|
-
}
|
|
269
|
-
function dedupe(values: string[]): string[] {
|
|
270
|
-
return [...new Set(values)];
|
|
271
|
-
}
|
|
272
|
-
function redactProxy(url: string): string {
|
|
273
|
-
try {
|
|
274
|
-
const u = new URL(url);
|
|
275
|
-
return `${u.hostname}:${u.port}`;
|
|
276
|
-
} catch {
|
|
277
|
-
return "set";
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
function shuffle<T>(values: T[]): T[] {
|
|
281
|
-
// Index-based jitter (no Math.random dependency needed for a rough mix).
|
|
282
|
-
return values
|
|
283
|
-
.map((v, i) => ({ v, k: (i * 2654435761) % values.length }))
|
|
284
|
-
.sort((a, b) => a.k - b.k)
|
|
285
|
-
.map((x) => x.v);
|
|
286
|
-
}
|
|
287
|
-
function median(values: number[]): number {
|
|
288
|
-
if (!values.length) return 0;
|
|
289
|
-
const s = [...values].sort((a, b) => a - b);
|
|
290
|
-
return s[Math.floor(s.length / 2)];
|
|
291
|
-
}
|
|
292
|
-
function percentile(values: number[], p: number): number {
|
|
293
|
-
if (!values.length) return 0;
|
|
294
|
-
const s = [...values].sort((a, b) => a - b);
|
|
295
|
-
return s[Math.min(s.length - 1, Math.floor((p / 100) * s.length))];
|
|
296
|
-
}
|
|
297
|
-
async function pool<T>(items: T[], concurrency: number, worker: (item: T) => Promise<void>): Promise<void> {
|
|
298
|
-
let index = 0;
|
|
299
|
-
const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
300
|
-
while (index < items.length) {
|
|
301
|
-
const item = items[index++];
|
|
302
|
-
await worker(item);
|
|
303
|
-
}
|
|
304
|
-
});
|
|
305
|
-
await Promise.all(runners);
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
main().catch((error) => {
|
|
309
|
-
console.error(error);
|
|
310
|
-
process.exit(1);
|
|
311
|
-
});
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
// Web globals a bare V8 isolate lacks, installed BEFORE linkedom/render load.
|
|
2
|
-
// (self/window are set by the host via context.eval before this bundle runs.)
|
|
3
|
-
import "fast-text-encoding";
|
|
4
|
-
import base64 from "base-64";
|
|
5
|
-
import { URL, URLSearchParams } from "whatwg-url-without-unicode";
|
|
6
|
-
const g = globalThis as any;
|
|
7
|
-
if (!g.atob) g.atob = (s: string) => base64.decode(String(s));
|
|
8
|
-
if (!g.btoa) g.btoa = (s: string) => base64.encode(String(s));
|
|
9
|
-
if (!g.URL) g.URL = URL;
|
|
10
|
-
if (!g.URLSearchParams) g.URLSearchParams = URLSearchParams;
|
|
11
|
-
|
|
12
|
-
// Minimal Buffer shim (linkedom's entity decoder + a few runtime paths use it).
|
|
13
|
-
// Built on the globals above; covers from(str|base64|bytes) + toString(enc).
|
|
14
|
-
if (!g.Buffer) {
|
|
15
|
-
const toBinary = (bytes: Uint8Array): string => {
|
|
16
|
-
let out = "";
|
|
17
|
-
for (let i = 0; i < bytes.length; i += 8192) out += String.fromCharCode.apply(null, bytes.subarray(i, i + 8192) as unknown as number[]);
|
|
18
|
-
return out;
|
|
19
|
-
};
|
|
20
|
-
g.Buffer = {
|
|
21
|
-
from(input: unknown, enc?: string): Uint8Array & { toString: (e?: string) => string } {
|
|
22
|
-
let bytes: Uint8Array;
|
|
23
|
-
if (input instanceof Uint8Array) bytes = input;
|
|
24
|
-
else if (enc === "base64") bytes = Uint8Array.from(g.atob(String(input)), (c: string) => c.charCodeAt(0));
|
|
25
|
-
else bytes = new TextEncoder().encode(String(input));
|
|
26
|
-
const view = bytes as Uint8Array & { toString: (e?: string) => string };
|
|
27
|
-
view.toString = (e?: string) => (e === "binary" || e === "latin1" ? toBinary(bytes) : e === "base64" ? g.btoa(toBinary(bytes)) : new TextDecoder().decode(bytes));
|
|
28
|
-
return view;
|
|
29
|
-
},
|
|
30
|
-
alloc: (n: number) => new Uint8Array(n),
|
|
31
|
-
isBuffer: (x: unknown) => x instanceof Uint8Array
|
|
32
|
-
};
|
|
33
|
-
}
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
// CLI executor for renderDom: runs the render bundle inside a real isolated-vm
|
|
2
|
-
// isolate. True isolation — the page's untrusted JS gets web-global polyfills and
|
|
3
|
-
// a floor-dropping fetch, and cannot reach the host's fetch/process/fs. A fresh
|
|
4
|
-
// context per page prevents cross-page contamination; the heavy bundles are
|
|
5
|
-
// compiled once.
|
|
6
|
-
import { resolve } from "node:path";
|
|
7
|
-
import ivm from "isolated-vm";
|
|
8
|
-
import { build } from "esbuild";
|
|
9
|
-
import type { RenderInput, RenderResult } from "../../src/render";
|
|
10
|
-
|
|
11
|
-
const HERE = import.meta.dirname;
|
|
12
|
-
const PUNYCODE = resolve(HERE, "../../node_modules/punycode/punycode.es6.js");
|
|
13
|
-
const CALL_TIMEOUT_MS = 6000;
|
|
14
|
-
|
|
15
|
-
let ready: Promise<{ isolate: ivm.Isolate; poly: ivm.Script; render: ivm.Script }> | null = null;
|
|
16
|
-
|
|
17
|
-
async function bundleOnce(entry: string): Promise<string> {
|
|
18
|
-
const result = await build({
|
|
19
|
-
entryPoints: [resolve(HERE, entry)],
|
|
20
|
-
bundle: true,
|
|
21
|
-
format: "iife",
|
|
22
|
-
platform: "node",
|
|
23
|
-
treeShaking: false, // keep polyfill side-effects (sideEffects:false would drop them)
|
|
24
|
-
alias: { punycode: PUNYCODE },
|
|
25
|
-
write: false
|
|
26
|
-
});
|
|
27
|
-
return result.outputFiles[0].text;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async function init() {
|
|
31
|
-
if (!ready) {
|
|
32
|
-
ready = (async () => {
|
|
33
|
-
const [polyCode, renderCode] = await Promise.all([bundleOnce("polyfills.ts"), bundleOnce("entry.ts")]);
|
|
34
|
-
const isolate = new ivm.Isolate({ memoryLimit: 256 });
|
|
35
|
-
const poly = await isolate.compileScript(polyCode);
|
|
36
|
-
const render = await isolate.compileScript(renderCode);
|
|
37
|
-
return { isolate, poly, render };
|
|
38
|
-
})();
|
|
39
|
-
}
|
|
40
|
-
return ready;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
export async function renderInIsolate(input: RenderInput): Promise<RenderResult> {
|
|
44
|
-
const { isolate, poly, render } = await init();
|
|
45
|
-
const context = await isolate.createContext();
|
|
46
|
-
try {
|
|
47
|
-
await context.global.set("globalThis", context.global.derefInto());
|
|
48
|
-
// self/window must exist before the polyfill bundle (fast-text-encoding
|
|
49
|
-
// detects them), and polyfills must run before the render bundle (linkedom's
|
|
50
|
-
// entity decoder reads atob/Buffer at module init).
|
|
51
|
-
await context.eval("globalThis.self = globalThis; globalThis.window = globalThis;");
|
|
52
|
-
await poly.run(context);
|
|
53
|
-
await render.run(context);
|
|
54
|
-
const out = await context.evalClosure(
|
|
55
|
-
"return JSON.stringify(globalThis.__renderAndScan($0))",
|
|
56
|
-
[input],
|
|
57
|
-
{ arguments: { copy: true }, result: { copy: true }, timeout: CALL_TIMEOUT_MS }
|
|
58
|
-
);
|
|
59
|
-
return JSON.parse(out as string) as RenderResult;
|
|
60
|
-
} finally {
|
|
61
|
-
context.release();
|
|
62
|
-
}
|
|
63
|
-
}
|