@ozzylabs/feedradar 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +114 -0
- package/README.md +63 -53
- package/dist/cli/doctor.d.ts +83 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +260 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +2 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/source.d.ts.map +1 -1
- package/dist/cli/source.js +6 -3
- package/dist/cli/source.js.map +1 -1
- package/dist/cli/watch.d.ts +16 -0
- package/dist/cli/watch.d.ts.map +1 -1
- package/dist/cli/watch.js +3 -0
- package/dist/cli/watch.js.map +1 -1
- package/dist/core/feeds/_html-common.d.ts +30 -0
- package/dist/core/feeds/_html-common.d.ts.map +1 -0
- package/dist/core/feeds/_html-common.js +192 -0
- package/dist/core/feeds/_html-common.js.map +1 -0
- package/dist/core/feeds/html-js.d.ts +50 -0
- package/dist/core/feeds/html-js.d.ts.map +1 -0
- package/dist/core/feeds/html-js.js +135 -0
- package/dist/core/feeds/html-js.js.map +1 -0
- package/dist/core/feeds/html.d.ts +1 -7
- package/dist/core/feeds/html.d.ts.map +1 -1
- package/dist/core/feeds/html.js +5 -180
- package/dist/core/feeds/html.js.map +1 -1
- package/dist/core/feeds/index.d.ts.map +1 -1
- package/dist/core/feeds/index.js +2 -0
- package/dist/core/feeds/index.js.map +1 -1
- package/dist/core/playwright-check.d.ts +134 -0
- package/dist/core/playwright-check.d.ts.map +1 -0
- package/dist/core/playwright-check.js +98 -0
- package/dist/core/playwright-check.js.map +1 -0
- package/dist/core/watcher.d.ts +17 -0
- package/dist/core/watcher.d.ts.map +1 -1
- package/dist/core/watcher.js +59 -0
- package/dist/core/watcher.js.map +1 -1
- package/dist/index.js +0 -0
- package/dist/schemas/source.d.ts +42 -0
- package/dist/schemas/source.d.ts.map +1 -1
- package/dist/schemas/source.js +42 -7
- package/dist/schemas/source.js.map +1 -1
- package/dist/templates/agents/AGENTS.md +2 -2
- package/dist/templates/feedradar.md +2 -2
- package/package.json +11 -1
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { contentHash, parseHtmlDocument } from "./_html-common.js";
|
|
2
|
+
/**
|
|
3
|
+
* `kind: html-js` adapter — Playwright-rendered HTML scraping (ADR-0010).
|
|
4
|
+
*
|
|
5
|
+
* Same selector contract as `kind: html` (delegates to `parseHtmlDocument`),
|
|
6
|
+
* but acquires the document by driving headless Chromium so SPA / CSR pages
|
|
7
|
+
* (Next.js, Notion embeds, Algolia DocSearch, etc.) that ship empty initial
|
|
8
|
+
* HTML can still be scraped.
|
|
9
|
+
*
|
|
10
|
+
* ## Hardening (ADR-0010 §D5 — hardcoded, NOT user-configurable)
|
|
11
|
+
*
|
|
12
|
+
* | Policy | Value | Rationale |
|
|
13
|
+
* |---------------------|----------------------|----------------------------------------------------------|
|
|
14
|
+
* | `headless` | `true` | UI mode is CI-incompatible and an operator-UI risk. |
|
|
15
|
+
* | `acceptDownloads` | `false` | Block drive-by downloads (page JS-triggered file saves). |
|
|
16
|
+
* | context reuse | none — fresh each fetch | Prevent SW / IndexedDB / localStorage injection persistence and cross-source state mixing. |
|
|
17
|
+
* | default `timeout` | 30000ms | Cap OOM / infinite loops on pathological pages. |
|
|
18
|
+
* | `page.close()` | in `finally` | Prevent page leak / memory accumulation. |
|
|
19
|
+
* | viewport | Playwright default | Avoid bloating DOM with oversized viewports. |
|
|
20
|
+
*
|
|
21
|
+
* The above are intentionally NOT exposed through `SourceJsOptions`. Users
|
|
22
|
+
* may tune `waitFor` / `waitUntil` / `timeout` / `userAgent`, but the threat
|
|
23
|
+
* model assumes the policy floor above always holds.
|
|
24
|
+
*
|
|
25
|
+
* ## Optional peer dep
|
|
26
|
+
*
|
|
27
|
+
* Playwright is declared as an *optional* peer dependency (ADR-0010 §D3) so
|
|
28
|
+
* users who only run `kind: rss` / `kind: html` are not forced to install
|
|
29
|
+
* Chromium. The import is therefore `await import("playwright")` and resolves
|
|
30
|
+
* lazily on the first `html-js` fetch; missing-module errors are translated
|
|
31
|
+
* into a user-friendly install hint.
|
|
32
|
+
*/
|
|
33
|
+
/**
|
|
34
|
+
* Default per-step timeout in ms when `Source.js?.timeout` is omitted.
|
|
35
|
+
* Mirrors `SourceJsOptionsSchema`'s default so adapter-direct callers (not
|
|
36
|
+
* going through schema parse) still get the documented behavior.
|
|
37
|
+
*/
|
|
38
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
39
|
+
/**
|
|
40
|
+
* Default Playwright `page.goto()` waitUntil mode. `networkidle` is the
|
|
41
|
+
* safest default for SPA / CSR pages where item data arrives via XHR after
|
|
42
|
+
* the document has loaded.
|
|
43
|
+
*/
|
|
44
|
+
const DEFAULT_WAIT_UNTIL = "networkidle";
|
|
45
|
+
/**
|
|
46
|
+
* Dynamically import Playwright. Translates the very common
|
|
47
|
+
* "package not installed" failure into the install hint from ADR-0010 §D3.
|
|
48
|
+
*/
|
|
49
|
+
async function loadPlaywright() {
|
|
50
|
+
try {
|
|
51
|
+
// Bare specifier: resolves via the consumer project's node_modules. The
|
|
52
|
+
// type assertion narrows the dynamic import to the subset we use.
|
|
53
|
+
const mod = (await import("playwright"));
|
|
54
|
+
return mod;
|
|
55
|
+
}
|
|
56
|
+
catch (e) {
|
|
57
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
58
|
+
throw new Error(`html-js adapter: failed to load Playwright (${message}). ` +
|
|
59
|
+
"Install it with: `npm i playwright && npx playwright install chromium`");
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
export const htmlJsAdapter = {
|
|
63
|
+
kind: "html-js",
|
|
64
|
+
fetch: async (source, options = {}) => {
|
|
65
|
+
if (!source.selectors) {
|
|
66
|
+
throw new Error(`html-js adapter: source '${source.id}' has no selectors`);
|
|
67
|
+
}
|
|
68
|
+
const selectors = source.selectors;
|
|
69
|
+
const js = source.js;
|
|
70
|
+
const timeout = js?.timeout ?? DEFAULT_TIMEOUT_MS;
|
|
71
|
+
const waitUntil = js?.waitUntil ?? DEFAULT_WAIT_UNTIL;
|
|
72
|
+
// When `waitFor` is omitted we wait for the item selector itself — the
|
|
73
|
+
// common "wait until the item list rendered" intent without extra config.
|
|
74
|
+
const waitFor = js?.waitFor ?? selectors.item;
|
|
75
|
+
const playwright = options.playwright ?? (await loadPlaywright());
|
|
76
|
+
const previous = options.state;
|
|
77
|
+
const fetchedAt = new Date().toISOString();
|
|
78
|
+
// Hardening: headless is forced true. Even if a future Playwright default
|
|
79
|
+
// changes, the adapter pins it explicitly here.
|
|
80
|
+
const browser = await playwright.chromium.launch({ headless: true });
|
|
81
|
+
let html;
|
|
82
|
+
try {
|
|
83
|
+
// Hardening: fresh context per fetch (no SW / IndexedDB / localStorage
|
|
84
|
+
// persistence across fetches or sources). `acceptDownloads: false`
|
|
85
|
+
// blocks drive-by download routes (page JS triggering file saves).
|
|
86
|
+
const context = await browser.newContext({
|
|
87
|
+
acceptDownloads: false,
|
|
88
|
+
...(js?.userAgent ? { userAgent: js.userAgent } : {}),
|
|
89
|
+
});
|
|
90
|
+
try {
|
|
91
|
+
const page = await context.newPage();
|
|
92
|
+
try {
|
|
93
|
+
await page.goto(source.url, { waitUntil, timeout });
|
|
94
|
+
await page.waitForSelector(waitFor, { timeout });
|
|
95
|
+
html = await page.content();
|
|
96
|
+
}
|
|
97
|
+
finally {
|
|
98
|
+
// `finally` guarantees page close even on goto / waitFor timeout —
|
|
99
|
+
// prevents page leak / memory accumulation per ADR-0010 §D5.
|
|
100
|
+
await page.close();
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
finally {
|
|
104
|
+
await context.close();
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
finally {
|
|
108
|
+
await browser.close();
|
|
109
|
+
}
|
|
110
|
+
// Dedup via content hash stored in the `lastEtag` slot (same convention
|
|
111
|
+
// as `kind: html` — see `_html-common.ts`). Server-side ETags are not
|
|
112
|
+
// observable from `page.content()`, so the content hash is the only
|
|
113
|
+
// dedup signal available here.
|
|
114
|
+
const bodyHash = contentHash(html);
|
|
115
|
+
if (previous?.lastEtag === bodyHash) {
|
|
116
|
+
return {
|
|
117
|
+
items: [],
|
|
118
|
+
notModified: true,
|
|
119
|
+
state: {
|
|
120
|
+
lastFetchedAt: fetchedAt,
|
|
121
|
+
lastEtag: bodyHash,
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
const items = parseHtmlDocument(html, source, fetchedAt);
|
|
126
|
+
return {
|
|
127
|
+
items,
|
|
128
|
+
state: {
|
|
129
|
+
lastFetchedAt: fetchedAt,
|
|
130
|
+
lastEtag: bodyHash,
|
|
131
|
+
},
|
|
132
|
+
};
|
|
133
|
+
},
|
|
134
|
+
};
|
|
135
|
+
//# sourceMappingURL=html-js.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-js.js","sourceRoot":"","sources":["../../../src/core/feeds/html-js.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAGnE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH;;;;GAIG;AACH,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAClC;;;;GAIG;AACH,MAAM,kBAAkB,GAAiC,aAAa,CAAC;AAiDvE;;;GAGG;AACH,KAAK,UAAU,cAAc;IAC3B,IAAI,CAAC;QACH,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,YAAY,CAAC,CAA8B,CAAC;QACtE,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,IAAI,KAAK,CACb,+CAA+C,OAAO,KAAK;YACzD,wEAAwE,CAC3E,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAgB;IACxC,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,KAAK,EAAE,MAAc,EAAE,UAAgC,EAAE,EAAE,EAAE;QAClE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,EAAE,oBAAoB,CAAC,CAAC;QAC7E,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QACnC,MAAM,EAAE,GAAG,MAAM,CAAC,EAAE,CAAC;QACrB,MAAM,OAAO,GAAG,EAAE,EAAE,OAAO,IAAI,kBAAkB,CAAC;QAClD,MAAM,SAAS,GAAG,EAAE,EAAE,SAAS,IAAI,kBAAkB,CAAC;QACtD,uEAAuE;QACvE,0EAA0E;QAC1E,MAAM,OAAO,GAAG,EAAE,EAAE,OAAO,IAAI,SAAS,CAAC,IAAI,CAAC;QAE9C,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAE3C,0EAA0E;QAC1E,gDAAgD;QAChD,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,IAAI,IAAY,CAAC;QACjB,IAAI,CAAC;YACH,uEAAuE;YACvE,mEAAmE;YACnE,mEAAmE;YACnE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;gBACvC,eAAe,EAAE,KAAK;gBACtB,GAAG,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,EAAE,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACtD,CAAC,CAAC;YACH,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;gBACrC,IAAI,CAAC;oBACH,MAAM,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;oBACpD,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;oBACjD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC9B,CAAC;wBAAS,CAAC;oBACT,mEAAmE;oBACnE,6DAA6D;oBAC7D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;gBACrB,CAAC;YACH,CAAC;oBAAS,CAAC;gBACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;YACxB,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;QAED,wEAAwE;QACxE,sEAAsE;QACtE,oEAAoE;QACpE,+BAA+B;QAC/B,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,QAAQ,EAAE,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE;oBACL,aAAa,EAAE,SAAS;oBACxB,QAAQ,EAAE,QAAQ;iBACnB;aACF,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QACzD,OAAO;YACL,KAAK;YACL,KAAK,EAAE;gBACL,aAAa,EAAE,SAAS;gBACxB,QAAQ,EAAE,QAAQ;aACnB;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
import type { Item, Source } from "../../schemas/index.js";
|
|
2
1
|
import type { FeedAdapter } from "./types.js";
|
|
3
|
-
|
|
4
|
-
* Parse an HTML document into validated `Item[]` using the source's
|
|
5
|
-
* `selectors`. Exported so tests can drive the parser directly without
|
|
6
|
-
* needing a fake HTTP layer.
|
|
7
|
-
*/
|
|
8
|
-
export declare function parseHtmlDocument(html: string, source: Source, fetchedAt: string): Item[];
|
|
2
|
+
export { parseHtmlDocument } from "./_html-common.js";
|
|
9
3
|
export declare const htmlAdapter: FeedAdapter;
|
|
10
4
|
//# sourceMappingURL=html.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/html.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/html.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAiC,MAAM,YAAY,CAAC;AAK7E,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAyCtD,eAAO,MAAM,WAAW,EAAE,WAuDzB,CAAC"}
|
package/dist/core/feeds/html.js
CHANGED
|
@@ -1,184 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import { CONTENT_HASH_PREFIX, contentHash, parseHtmlDocument } from "./_html-common.js";
|
|
2
|
+
// Re-export shared primitives so existing imports (and tests) that pulled
|
|
3
|
+
// `parseHtmlDocument` from this module keep working after the
|
|
4
|
+
// `_html-common.ts` split (ADR-0010 §D1, no behavior change).
|
|
5
|
+
export { parseHtmlDocument } from "./_html-common.js";
|
|
5
6
|
const USER_AGENT = "feedradar/0.0.0 (+https://github.com/ozzy-labs/feedradar)";
|
|
6
|
-
/**
|
|
7
|
-
* Prefix that flags an `lastEtag` slot as carrying a content hash rather than
|
|
8
|
-
* an actual HTTP ETag. We reuse the `lastEtag` field so this Phase does not
|
|
9
|
-
* have to migrate `SourceState` (see `docs/design/source-html.md`).
|
|
10
|
-
*/
|
|
11
|
-
const CONTENT_HASH_PREFIX = "sha256:";
|
|
12
|
-
/** Attributes the adapter checks before falling back to text content. */
|
|
13
|
-
const DATETIME_ATTRS = ["datetime", "content", "value"];
|
|
14
|
-
/**
|
|
15
|
-
* Convert an `HTMLElement | null` to its trimmed text, or `undefined` when
|
|
16
|
-
* the selector did not match. We always trim because raw scrapes routinely
|
|
17
|
-
* carry surrounding whitespace from formatted markup.
|
|
18
|
-
*/
|
|
19
|
-
function textOf(el) {
|
|
20
|
-
if (!el)
|
|
21
|
-
return undefined;
|
|
22
|
-
const text = el.text?.trim();
|
|
23
|
-
return text ? text : undefined;
|
|
24
|
-
}
|
|
25
|
-
/**
|
|
26
|
-
* Apply a CSS selector relative to `root` and return the first match.
|
|
27
|
-
* `node-html-parser` returns `null` instead of throwing for invalid input,
|
|
28
|
-
* which matches what callers want here (a missing field, not a hard error).
|
|
29
|
-
*/
|
|
30
|
-
function queryFirst(root, selector) {
|
|
31
|
-
return root.querySelector(selector);
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* Resolve the `link` selector to an `href` (or text fallback).
|
|
35
|
-
*
|
|
36
|
-
* Anchor tags expose the URL via `href` so we prefer the attribute. When the
|
|
37
|
-
* selector points at a non-anchor (e.g. a `<div data-link>` wrapper used by
|
|
38
|
-
* some changelog layouts), we fall back to text content so the adapter can
|
|
39
|
-
* still operate, deferring URL validation to `ItemSchema`.
|
|
40
|
-
*/
|
|
41
|
-
function pickLink(el) {
|
|
42
|
-
if (!el)
|
|
43
|
-
return undefined;
|
|
44
|
-
const href = el.getAttribute("href");
|
|
45
|
-
if (href && href.trim())
|
|
46
|
-
return href.trim();
|
|
47
|
-
return textOf(el);
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* Resolve `publishedAt` to a candidate string for `new Date()`.
|
|
51
|
-
*
|
|
52
|
-
* `<time datetime="2026-05-12">` and `<meta content="..."/>` markup hide the
|
|
53
|
-
* canonical timestamp in attributes; the visible text is often a
|
|
54
|
-
* localized "May 12, 2026" that is harder to parse reliably. We probe the
|
|
55
|
-
* known attributes first, then fall back to element text.
|
|
56
|
-
*/
|
|
57
|
-
function pickDatetime(el) {
|
|
58
|
-
if (!el)
|
|
59
|
-
return undefined;
|
|
60
|
-
for (const attr of DATETIME_ATTRS) {
|
|
61
|
-
const value = el.getAttribute(attr);
|
|
62
|
-
if (value && value.trim())
|
|
63
|
-
return value.trim();
|
|
64
|
-
}
|
|
65
|
-
return textOf(el);
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Try to parse a candidate timestamp into ISO 8601. Returns `undefined` for
|
|
69
|
-
* unparseable inputs so the item can still be emitted (RSS adapter parity).
|
|
70
|
-
*/
|
|
71
|
-
function toIsoDate(value) {
|
|
72
|
-
if (!value)
|
|
73
|
-
return undefined;
|
|
74
|
-
const date = new Date(value);
|
|
75
|
-
if (Number.isNaN(date.getTime()))
|
|
76
|
-
return undefined;
|
|
77
|
-
return date.toISOString();
|
|
78
|
-
}
|
|
79
|
-
/** Collect the trimmed text of every match for `selector`. */
|
|
80
|
-
function collectTags(root, selector) {
|
|
81
|
-
if (!selector)
|
|
82
|
-
return undefined;
|
|
83
|
-
const tags = root
|
|
84
|
-
.querySelectorAll(selector)
|
|
85
|
-
.map((el) => el.text?.trim())
|
|
86
|
-
.filter((t) => !!t && t.length > 0);
|
|
87
|
-
return tags.length > 0 ? tags : undefined;
|
|
88
|
-
}
|
|
89
|
-
/**
|
|
90
|
-
* Resolve a relative `link` against the source URL.
|
|
91
|
-
*
|
|
92
|
-
* Many sites publish `<a href="/changelog/foo">` rather than absolute URLs;
|
|
93
|
-
* without resolution `ItemSchema`'s `z.string().url()` would drop them. We
|
|
94
|
-
* intentionally swallow `URL` constructor errors so a malformed `link`
|
|
95
|
-
* surfaces as a normal validation drop later instead of breaking the whole
|
|
96
|
-
* fetch.
|
|
97
|
-
*/
|
|
98
|
-
function resolveUrl(raw, base) {
|
|
99
|
-
try {
|
|
100
|
-
return new URL(raw, base).toString();
|
|
101
|
-
}
|
|
102
|
-
catch {
|
|
103
|
-
return raw;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
/** Normalize one matched element into an Item, or `null` to drop it. */
|
|
107
|
-
function parseItem(itemEl, selectors, source, fetchedAt) {
|
|
108
|
-
const title = textOf(queryFirst(itemEl, selectors.title));
|
|
109
|
-
const linkRaw = pickLink(queryFirst(itemEl, selectors.link));
|
|
110
|
-
if (!title || !linkRaw)
|
|
111
|
-
return null;
|
|
112
|
-
const url = resolveUrl(linkRaw, source.url);
|
|
113
|
-
const summary = selectors.summary ? textOf(queryFirst(itemEl, selectors.summary)) : undefined;
|
|
114
|
-
const body = selectors.body ? textOf(queryFirst(itemEl, selectors.body)) : undefined;
|
|
115
|
-
const publishedAt = selectors.publishedAt
|
|
116
|
-
? toIsoDate(pickDatetime(queryFirst(itemEl, selectors.publishedAt)))
|
|
117
|
-
: undefined;
|
|
118
|
-
const tags = collectTags(itemEl, selectors.tags);
|
|
119
|
-
const stableKey = deriveStableKey({
|
|
120
|
-
url,
|
|
121
|
-
fallbackHashInputs: [title, publishedAt],
|
|
122
|
-
});
|
|
123
|
-
const id = deriveItemId(title, stableKey);
|
|
124
|
-
// Preserve a structured snapshot of the raw scrape rather than the
|
|
125
|
-
// `HTMLElement` instance itself — the watcher serializes `raw` to YAML and
|
|
126
|
-
// we want the on-disk payload to be diff-friendly.
|
|
127
|
-
const raw = { title, link: linkRaw };
|
|
128
|
-
if (summary !== undefined)
|
|
129
|
-
raw.summary = summary;
|
|
130
|
-
if (body !== undefined)
|
|
131
|
-
raw.body = body;
|
|
132
|
-
if (publishedAt !== undefined)
|
|
133
|
-
raw.publishedAt = publishedAt;
|
|
134
|
-
if (tags !== undefined)
|
|
135
|
-
raw.tags = tags;
|
|
136
|
-
return validateItem({
|
|
137
|
-
id,
|
|
138
|
-
sourceId: source.id,
|
|
139
|
-
title,
|
|
140
|
-
url,
|
|
141
|
-
summary,
|
|
142
|
-
publishedAt,
|
|
143
|
-
fetchedAt,
|
|
144
|
-
raw,
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
function validateItem(candidate) {
|
|
148
|
-
const result = ItemSchema.safeParse(candidate);
|
|
149
|
-
// Items that fail validation (e.g. unresolvable URL) are dropped silently —
|
|
150
|
-
// see rss.ts for the same fail-soft rationale.
|
|
151
|
-
return result.success ? result.data : null;
|
|
152
|
-
}
|
|
153
|
-
/**
|
|
154
|
-
* Parse an HTML document into validated `Item[]` using the source's
|
|
155
|
-
* `selectors`. Exported so tests can drive the parser directly without
|
|
156
|
-
* needing a fake HTTP layer.
|
|
157
|
-
*/
|
|
158
|
-
export function parseHtmlDocument(html, source, fetchedAt) {
|
|
159
|
-
if (!source.selectors) {
|
|
160
|
-
throw new Error(`html adapter: source '${source.id}' has no selectors`);
|
|
161
|
-
}
|
|
162
|
-
const selectors = source.selectors;
|
|
163
|
-
let root;
|
|
164
|
-
try {
|
|
165
|
-
root = parseHtml(html);
|
|
166
|
-
}
|
|
167
|
-
catch (e) {
|
|
168
|
-
throw new Error(`html adapter: failed to parse HTML: ${e instanceof Error ? e.message : String(e)}`);
|
|
169
|
-
}
|
|
170
|
-
const itemEls = root.querySelectorAll(selectors.item);
|
|
171
|
-
return itemEls
|
|
172
|
-
.map((el) => parseItem(el, selectors, source, fetchedAt))
|
|
173
|
-
.filter((i) => i !== null);
|
|
174
|
-
}
|
|
175
|
-
/**
|
|
176
|
-
* Compute the sha256 of the raw response body, prefixed so callers can tell
|
|
177
|
-
* it apart from a real ETag inside `SourceState.lastEtag`.
|
|
178
|
-
*/
|
|
179
|
-
function contentHash(body) {
|
|
180
|
-
return `${CONTENT_HASH_PREFIX}${createHash("sha256").update(body).digest("hex")}`;
|
|
181
|
-
}
|
|
182
7
|
/**
|
|
183
8
|
* Issue an HTTP GET with conditional headers. The previous `lastEtag` slot
|
|
184
9
|
* may contain either an actual ETag (mirror RSS behavior) or a `sha256:`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"html.js","sourceRoot":"","sources":["../../../src/core/feeds/html.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"html.js","sourceRoot":"","sources":["../../../src/core/feeds/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAGxF,0EAA0E;AAC1E,8DAA8D;AAC9D,8DAA8D;AAC9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAEtD,MAAM,UAAU,GAAG,2DAA2D,CAAC;AAE/E;;;;GAIG;AACH,KAAK,UAAU,SAAS,CACtB,GAAW,EACX,SAAoB,EACpB,UAAmD,EAAE;IAMrD,MAAM,OAAO,GAA2B;QACtC,MAAM,EAAE,mDAAmD;QAC3D,YAAY,EAAE,UAAU;KACzB,CAAC;IACF,0EAA0E;IAC1E,2EAA2E;IAC3E,WAAW;IACX,IAAI,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,mBAAmB,CAAC,EAAE,CAAC;QAClE,OAAO,CAAC,eAAe,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAC1C,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3E,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;IACzC,CAAC;IACD,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACpD,MAAM,IAAI,KAAK,CAAC,sBAAsB,QAAQ,CAAC,MAAM,SAAS,GAAG,EAAE,CAAC,CAAC;IACvE,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACnC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,MAAM,WAAW,GAAgB;IACtC,IAAI,EAAE,MAAM;IACZ,KAAK,EAAE,KAAK,EAAE,MAAc,EAAE,UAA8B,EAAE,EAAE,EAAE;QAChE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,yBAAyB,MAAM,CAAC,EAAE,oBAAoB,CAAC,CAAC;QAC1E,CAAC;QACD,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,IAAK,UAAU,CAAC,KAA8B,CAAC;QAC9E,IAAI,OAAO,SAAS,KAAK,UAAU,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;QACzF,CAAC;QACD,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,GAAG,EAAE,SAAS,EAAE;YACtD,IAAI,EAAE,QAAQ,EAAE,QAAQ;SACzB,CAAC,CAAC;QACH,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE;oBACL,aAAa,EAAE,SAAS;oBACxB,iEAAiE;oBACjE,oEAAoE;oBACpE,QAAQ,EAAE,QAAQ,CAAC,IAAI,IAAI,QAAQ,EAAE,QAAQ;iBAC9C;aACF,CAAC;QACJ,CAAC;QAED,yEAAyE;QACzE,uEAAuE;QACvE,wDAAwD;QACxD,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC5C,MAAM,cAAc,GAAG,QAAQ,EAAE,QAAQ,CAAC;QAC1C,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,cAAc,KAAK,QAAQ,EAAE,CAAC;YAClD,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE;oBACL,aAAa,EAAE,SAAS;oBACxB,QAAQ,EAAE,QAAQ;iBACnB;aACF,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,iBAAiB,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QAClE,OAAO;YACL,KAAK;YACL,KAAK,EAAE;gBACL,aAAa,EAAE,SAAS;gBACxB,qEAAqE;gBACrE,wDAAwD;gBACxD,QAAQ,EAAE,QAAQ,CAAC,IAAI,IAAI,QAAQ;aACpC;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAMrD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAU9C,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,WAAW,CAMhE;AAED,YAAY,EAAE,WAAW,EAAE,kBAAkB,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/core/feeds/index.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { githubReleasesAdapter } from "./github-releases.js";
|
|
2
2
|
import { htmlAdapter } from "./html.js";
|
|
3
|
+
import { htmlJsAdapter } from "./html-js.js";
|
|
3
4
|
import { npmRegistryAdapter } from "./npm-registry.js";
|
|
4
5
|
import { rssAdapter } from "./rss.js";
|
|
5
6
|
const adapters = new Map([
|
|
6
7
|
[rssAdapter.kind, rssAdapter],
|
|
7
8
|
[htmlAdapter.kind, htmlAdapter],
|
|
9
|
+
[htmlJsAdapter.kind, htmlJsAdapter],
|
|
8
10
|
[githubReleasesAdapter.kind, githubReleasesAdapter],
|
|
9
11
|
[npmRegistryAdapter.kind, npmRegistryAdapter],
|
|
10
12
|
]);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/core/feeds/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAGtC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAA8B;IACpD,CAAC,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;IAC7B,CAAC,WAAW,CAAC,IAAI,EAAE,WAAW,CAAC;IAC/B,CAAC,qBAAqB,CAAC,IAAI,EAAE,qBAAqB,CAAC;IACnD,CAAC,kBAAkB,CAAC,IAAI,EAAE,kBAAkB,CAAC;CAC9C,CAAC,CAAC;AAEH,MAAM,UAAU,cAAc,CAAC,IAAoB;IACjD,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,wCAAwC,IAAI,EAAE,CAAC,CAAC;IAClE,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/core/feeds/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAGtC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAA8B;IACpD,CAAC,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;IAC7B,CAAC,WAAW,CAAC,IAAI,EAAE,WAAW,CAAC;IAC/B,CAAC,aAAa,CAAC,IAAI,EAAE,aAAa,CAAC;IACnC,CAAC,qBAAqB,CAAC,IAAI,EAAE,qBAAqB,CAAC;IACnD,CAAC,kBAAkB,CAAC,IAAI,EAAE,kBAAkB,CAAC;CAC9C,CAAC,CAAC;AAEH,MAAM,UAAU,cAAc,CAAC,IAAoB;IACjD,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,wCAAwC,IAAI,EAAE,CAAC,CAAC;IAClE,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detection / install helpers for the optional `playwright` peer dependency
|
|
3
|
+
* used by the `html-js` feed adapter (ADR-0010).
|
|
4
|
+
*
|
|
5
|
+
* The `html-js` adapter resolves Playwright at fetch time via
|
|
6
|
+
* `await import("playwright")`, so the package is intentionally optional in
|
|
7
|
+
* `package.json#peerDependenciesMeta`. Two CLI surfaces need to inspect that
|
|
8
|
+
* resolution outside of the adapter itself:
|
|
9
|
+
*
|
|
10
|
+
* 1. `radar doctor` — proactively reports whether Playwright + Chromium are
|
|
11
|
+
* installed so users can fix the environment before scheduling a watch.
|
|
12
|
+
* 2. `radar watch run` — lazily probes Playwright on the first `html-js`
|
|
13
|
+
* source so a missing install does not abort the whole run; the affected
|
|
14
|
+
* source is skipped with an actionable error, other kinds continue.
|
|
15
|
+
*
|
|
16
|
+
* Both paths share the same probe / install helpers here to keep the install
|
|
17
|
+
* hint and `RADAR_AUTO_INSTALL_CHROMIUM` escape hatch in one place. The escape
|
|
18
|
+
* hatch exists for CI scenarios where a fresh runner has `playwright` itself
|
|
19
|
+
* (e.g. via `npm i`) but no browser binary on disk yet.
|
|
20
|
+
*
|
|
21
|
+
* Policy choices (intentional, not for adapter to second-guess):
|
|
22
|
+
*
|
|
23
|
+
* - We do NOT auto-install the `playwright` npm package. Global npm installs
|
|
24
|
+
* fail in non-obvious ways (permissions, version mismatches with the
|
|
25
|
+
* workspace's lockfile), so the user must run `npm i -g playwright`
|
|
26
|
+
* themselves and get a clear error from `npm` if it fails.
|
|
27
|
+
* - We DO auto-install Chromium when `RADAR_AUTO_INSTALL_CHROMIUM=1` is set
|
|
28
|
+
* and Playwright is present. `npx playwright install chromium` is the
|
|
29
|
+
* official path, idempotent, and well-supported in CI runners.
|
|
30
|
+
*/
|
|
31
|
+
/**
|
|
32
|
+
* Minimal Playwright surface this module reasons about. We only need
|
|
33
|
+
* `chromium.executablePath()` (sync function returning a fs path) — the
|
|
34
|
+
* fetcher in `feeds/html-js.ts` keeps its own structural type for the launch
|
|
35
|
+
* subset it actually uses.
|
|
36
|
+
*/
|
|
37
|
+
export interface PlaywrightModuleLike {
|
|
38
|
+
chromium: {
|
|
39
|
+
executablePath: () => string;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Outcome of a Playwright probe. Discriminated union so callers branch
|
|
44
|
+
* cleanly on the failure mode without parsing error messages.
|
|
45
|
+
*
|
|
46
|
+
* - `ok`: module loaded AND `chromium.executablePath()` points at an
|
|
47
|
+
* existing file on disk.
|
|
48
|
+
* - `module-missing`: `import("playwright")` threw (package not installed).
|
|
49
|
+
* - `chromium-missing`: module loaded but the executable path is absent.
|
|
50
|
+
* The path is included so callers can show it to the user.
|
|
51
|
+
*/
|
|
52
|
+
export type PlaywrightProbeResult = {
|
|
53
|
+
status: "ok";
|
|
54
|
+
executablePath: string;
|
|
55
|
+
} | {
|
|
56
|
+
status: "module-missing";
|
|
57
|
+
message: string;
|
|
58
|
+
} | {
|
|
59
|
+
status: "chromium-missing";
|
|
60
|
+
executablePath: string;
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Test seam: lets unit tests inject a fake importer / `pathExists` so we can
|
|
64
|
+
* exercise every branch (module missing, chromium missing, ok) without
|
|
65
|
+
* touching the real Playwright install.
|
|
66
|
+
*
|
|
67
|
+
* The real CLI never passes these; defaults are dynamic import + `fs.access`.
|
|
68
|
+
*/
|
|
69
|
+
export interface ProbeOptions {
|
|
70
|
+
/** Replace dynamic `import("playwright")` (tests only). */
|
|
71
|
+
importPlaywright?: () => Promise<unknown>;
|
|
72
|
+
/** Replace fs existence check (tests only). */
|
|
73
|
+
pathExists?: (p: string) => Promise<boolean>;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Probe whether Playwright + Chromium are usable from this process.
|
|
77
|
+
*
|
|
78
|
+
* Order matters: we MUST surface "module missing" before attempting any
|
|
79
|
+
* property access — `chromium.executablePath()` would throw with a less
|
|
80
|
+
* actionable message ("Cannot read properties of undefined").
|
|
81
|
+
*/
|
|
82
|
+
export declare function probePlaywright(options?: ProbeOptions): Promise<PlaywrightProbeResult>;
|
|
83
|
+
/**
|
|
84
|
+
* User-facing install hint emitted when Playwright (the npm package) is
|
|
85
|
+
* missing. The text matches the wording in `feeds/html-js.ts#loadPlaywright`
|
|
86
|
+
* so users see consistent guidance across `doctor` and `watch run`.
|
|
87
|
+
*
|
|
88
|
+
* `RADAR_AUTO_INSTALL_CHROMIUM` is mentioned only in the Chromium-missing
|
|
89
|
+
* branch — auto-installing the npm package itself is intentionally out of
|
|
90
|
+
* scope (see module header).
|
|
91
|
+
*/
|
|
92
|
+
export declare const PLAYWRIGHT_MODULE_MISSING_HINT: string;
|
|
93
|
+
/**
|
|
94
|
+
* User-facing install hint emitted when the npm package is present but the
|
|
95
|
+
* Chromium binary on disk is not. Mentions the auto-install escape hatch
|
|
96
|
+
* since this is the branch it actually applies to.
|
|
97
|
+
*/
|
|
98
|
+
export declare const CHROMIUM_MISSING_HINT: string;
|
|
99
|
+
/**
|
|
100
|
+
* Test seam for the spawn used by `installChromium`. Production passes
|
|
101
|
+
* the real `child_process.spawn`; tests inject a fake that returns a
|
|
102
|
+
* predetermined exit code without actually launching a subprocess.
|
|
103
|
+
*/
|
|
104
|
+
export type InstallSpawnLike = (command: string, args: readonly string[], options: {
|
|
105
|
+
cwd?: string;
|
|
106
|
+
stdio?: "inherit" | "pipe" | "ignore";
|
|
107
|
+
}) => {
|
|
108
|
+
on(event: "close", listener: (code: number | null) => void): void;
|
|
109
|
+
on(event: "error", listener: (err: Error) => void): void;
|
|
110
|
+
};
|
|
111
|
+
export interface InstallChromiumOptions {
|
|
112
|
+
/** Working directory for the spawned `npx`. Defaults to the caller's cwd. */
|
|
113
|
+
cwd?: string;
|
|
114
|
+
/** Test seam: swap out `child_process.spawn`. */
|
|
115
|
+
spawnImpl?: InstallSpawnLike;
|
|
116
|
+
/** Sink for progress messages (defaults to console.log). */
|
|
117
|
+
log?: (message: string) => void;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Spawn `npx playwright install chromium` and resolve when it exits.
|
|
121
|
+
*
|
|
122
|
+
* Used by the `RADAR_AUTO_INSTALL_CHROMIUM=1` escape hatch. We pipe output
|
|
123
|
+
* through `stdio: "inherit"` so the user (or CI logs) sees Playwright's
|
|
124
|
+
* progress in real time — `npx playwright install` already prints
|
|
125
|
+
* download URLs and percentages that are helpful debugging signal when the
|
|
126
|
+
* install fails. Resolves to the child's exit code so callers can decide
|
|
127
|
+
* whether to retry the original operation.
|
|
128
|
+
*
|
|
129
|
+
* Note we explicitly use `npx` (not direct binary lookup) because Playwright
|
|
130
|
+
* does not expose a JS API for browser install; the CLI is the supported
|
|
131
|
+
* entrypoint per Playwright docs.
|
|
132
|
+
*/
|
|
133
|
+
export declare function installChromium(options?: InstallChromiumOptions): Promise<number>;
|
|
134
|
+
//# sourceMappingURL=playwright-check.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-check.d.ts","sourceRoot":"","sources":["../../src/core/playwright-check.ts"],"names":[],"mappings":"AAGA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH;;;;;GAKG;AACH,MAAM,WAAW,oBAAoB;IACnC,QAAQ,EAAE;QACR,cAAc,EAAE,MAAM,MAAM,CAAC;KAC9B,CAAC;CACH;AAED;;;;;;;;;GASG;AACH,MAAM,MAAM,qBAAqB,GAC7B;IAAE,MAAM,EAAE,IAAI,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GACxC;IAAE,MAAM,EAAE,gBAAgB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAC7C;IAAE,MAAM,EAAE,kBAAkB,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,CAAC;AAE3D;;;;;;GAMG;AACH,MAAM,WAAW,YAAY;IAC3B,2DAA2D;IAC3D,gBAAgB,CAAC,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1C,+CAA+C;IAC/C,UAAU,CAAC,EAAE,CAAC,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;CAC9C;AAWD;;;;;;GAMG;AACH,wBAAsB,eAAe,CAAC,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAgChG;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,8BAA8B,QAE0B,CAAC;AAEtE;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,QAEmC,CAAC;AAEtE;;;;GAIG;AACH,MAAM,MAAM,gBAAgB,GAAG,CAC7B,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAA;CAAE,KAC7D;IACH,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,KAAK,IAAI,GAAG,IAAI,CAAC;IAClE,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,GAAG,EAAE,KAAK,KAAK,IAAI,GAAG,IAAI,CAAC;CAC1D,CAAC;AAEF,MAAM,WAAW,sBAAsB;IACrC,6EAA6E;IAC7E,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,iDAAiD;IACjD,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,4DAA4D;IAC5D,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CACjC;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,eAAe,CAAC,OAAO,GAAE,sBAA2B,GAAG,OAAO,CAAC,MAAM,CAAC,CAc3F"}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { access } from "node:fs/promises";
|
|
3
|
+
async function defaultPathExists(p) {
|
|
4
|
+
try {
|
|
5
|
+
await access(p);
|
|
6
|
+
return true;
|
|
7
|
+
}
|
|
8
|
+
catch {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Probe whether Playwright + Chromium are usable from this process.
|
|
14
|
+
*
|
|
15
|
+
* Order matters: we MUST surface "module missing" before attempting any
|
|
16
|
+
* property access — `chromium.executablePath()` would throw with a less
|
|
17
|
+
* actionable message ("Cannot read properties of undefined").
|
|
18
|
+
*/
|
|
19
|
+
export async function probePlaywright(options = {}) {
|
|
20
|
+
const importPlaywright = options.importPlaywright ?? (() => import("playwright"));
|
|
21
|
+
const pathExists = options.pathExists ?? defaultPathExists;
|
|
22
|
+
let mod;
|
|
23
|
+
try {
|
|
24
|
+
mod = (await importPlaywright());
|
|
25
|
+
}
|
|
26
|
+
catch (e) {
|
|
27
|
+
return {
|
|
28
|
+
status: "module-missing",
|
|
29
|
+
message: e instanceof Error ? e.message : String(e),
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
let execPath;
|
|
33
|
+
try {
|
|
34
|
+
execPath = mod.chromium.executablePath();
|
|
35
|
+
}
|
|
36
|
+
catch (e) {
|
|
37
|
+
// `executablePath()` raises when no browsers were ever installed via
|
|
38
|
+
// `npx playwright install`. Treat as the same user-actionable failure as a
|
|
39
|
+
// missing file on disk; surface an empty path so callers know the message
|
|
40
|
+
// came from this branch.
|
|
41
|
+
return {
|
|
42
|
+
status: "chromium-missing",
|
|
43
|
+
executablePath: e instanceof Error ? `(${e.message})` : "(unknown)",
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
if (!(await pathExists(execPath))) {
|
|
47
|
+
return { status: "chromium-missing", executablePath: execPath };
|
|
48
|
+
}
|
|
49
|
+
return { status: "ok", executablePath: execPath };
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* User-facing install hint emitted when Playwright (the npm package) is
|
|
53
|
+
* missing. The text matches the wording in `feeds/html-js.ts#loadPlaywright`
|
|
54
|
+
* so users see consistent guidance across `doctor` and `watch run`.
|
|
55
|
+
*
|
|
56
|
+
* `RADAR_AUTO_INSTALL_CHROMIUM` is mentioned only in the Chromium-missing
|
|
57
|
+
* branch — auto-installing the npm package itself is intentionally out of
|
|
58
|
+
* scope (see module header).
|
|
59
|
+
*/
|
|
60
|
+
export const PLAYWRIGHT_MODULE_MISSING_HINT = "Playwright is required for kind: html-js. Run: npm i -g playwright && npx playwright install chromium\n" +
|
|
61
|
+
"Or set RADAR_AUTO_INSTALL_CHROMIUM=1 to auto-install on next run.";
|
|
62
|
+
/**
|
|
63
|
+
* User-facing install hint emitted when the npm package is present but the
|
|
64
|
+
* Chromium binary on disk is not. Mentions the auto-install escape hatch
|
|
65
|
+
* since this is the branch it actually applies to.
|
|
66
|
+
*/
|
|
67
|
+
export const CHROMIUM_MISSING_HINT = "Chromium binary not found. Run: npx playwright install chromium\n" +
|
|
68
|
+
"Or set RADAR_AUTO_INSTALL_CHROMIUM=1 to auto-install on next run.";
|
|
69
|
+
/**
|
|
70
|
+
* Spawn `npx playwright install chromium` and resolve when it exits.
|
|
71
|
+
*
|
|
72
|
+
* Used by the `RADAR_AUTO_INSTALL_CHROMIUM=1` escape hatch. We pipe output
|
|
73
|
+
* through `stdio: "inherit"` so the user (or CI logs) sees Playwright's
|
|
74
|
+
* progress in real time — `npx playwright install` already prints
|
|
75
|
+
* download URLs and percentages that are helpful debugging signal when the
|
|
76
|
+
* install fails. Resolves to the child's exit code so callers can decide
|
|
77
|
+
* whether to retry the original operation.
|
|
78
|
+
*
|
|
79
|
+
* Note we explicitly use `npx` (not direct binary lookup) because Playwright
|
|
80
|
+
* does not expose a JS API for browser install; the CLI is the supported
|
|
81
|
+
* entrypoint per Playwright docs.
|
|
82
|
+
*/
|
|
83
|
+
export async function installChromium(options = {}) {
|
|
84
|
+
const spawnImpl = options.spawnImpl ?? spawn;
|
|
85
|
+
const log = options.log ?? ((m) => console.log(m));
|
|
86
|
+
log("Installing Chromium via `npx playwright install chromium`...");
|
|
87
|
+
return new Promise((resolve, reject) => {
|
|
88
|
+
const child = spawnImpl("npx", ["playwright", "install", "chromium"], {
|
|
89
|
+
cwd: options.cwd,
|
|
90
|
+
stdio: "inherit",
|
|
91
|
+
});
|
|
92
|
+
child.on("error", reject);
|
|
93
|
+
child.on("close", (code) => {
|
|
94
|
+
resolve(code ?? 1);
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=playwright-check.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-check.js","sourceRoot":"","sources":["../../src/core/playwright-check.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AA0E1C,KAAK,UAAU,iBAAiB,CAAC,CAAS;IACxC,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,UAAwB,EAAE;IAC9D,MAAM,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,IAAI,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC;IAClF,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,iBAAiB,CAAC;IAE3D,IAAI,GAAyB,CAAC;IAC9B,IAAI,CAAC;QACH,GAAG,GAAG,CAAC,MAAM,gBAAgB,EAAE,CAAyB,CAAC;IAC3D,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO;YACL,MAAM,EAAE,gBAAgB;YACxB,OAAO,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;SACpD,CAAC;IACJ,CAAC;IAED,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,cAAc,EAAE,CAAC;IAC3C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,qEAAqE;QACrE,2EAA2E;QAC3E,0EAA0E;QAC1E,yBAAyB;QACzB,OAAO;YACL,MAAM,EAAE,kBAAkB;YAC1B,cAAc,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,WAAW;SACpE,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,MAAM,EAAE,kBAAkB,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC;IAClE,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC;AACpD,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,8BAA8B,GACzC,yGAAyG;IACzG,mEAAmE,CAAC;AAEtE;;;;GAIG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAChC,mEAAmE;IACnE,mEAAmE,CAAC;AAyBtE;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,UAAkC,EAAE;IACxE,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAK,KAAqC,CAAC;IAC9E,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3D,GAAG,CAAC,8DAA8D,CAAC,CAAC;IACpE,OAAO,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC7C,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC,YAAY,EAAE,SAAS,EAAE,UAAU,CAAC,EAAE;YACpE,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,KAAK,EAAE,SAAS;SACjB,CAAC,CAAC;QACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACzB,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC"}
|