@ozzylabs/feedradar 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.ja.md +13 -5
  2. package/README.md +13 -5
  3. package/dist/cli/doctor.d.ts +83 -0
  4. package/dist/cli/doctor.d.ts.map +1 -0
  5. package/dist/cli/doctor.js +260 -0
  6. package/dist/cli/doctor.js.map +1 -0
  7. package/dist/cli/index.d.ts.map +1 -1
  8. package/dist/cli/index.js +2 -2
  9. package/dist/cli/index.js.map +1 -1
  10. package/dist/cli/source.d.ts.map +1 -1
  11. package/dist/cli/source.js +6 -3
  12. package/dist/cli/source.js.map +1 -1
  13. package/dist/cli/watch.d.ts +16 -0
  14. package/dist/cli/watch.d.ts.map +1 -1
  15. package/dist/cli/watch.js +3 -0
  16. package/dist/cli/watch.js.map +1 -1
  17. package/dist/core/feeds/_html-common.d.ts +30 -0
  18. package/dist/core/feeds/_html-common.d.ts.map +1 -0
  19. package/dist/core/feeds/_html-common.js +192 -0
  20. package/dist/core/feeds/_html-common.js.map +1 -0
  21. package/dist/core/feeds/html-js.d.ts +50 -0
  22. package/dist/core/feeds/html-js.d.ts.map +1 -0
  23. package/dist/core/feeds/html-js.js +135 -0
  24. package/dist/core/feeds/html-js.js.map +1 -0
  25. package/dist/core/feeds/html.d.ts +1 -7
  26. package/dist/core/feeds/html.d.ts.map +1 -1
  27. package/dist/core/feeds/html.js +5 -180
  28. package/dist/core/feeds/html.js.map +1 -1
  29. package/dist/core/feeds/index.d.ts.map +1 -1
  30. package/dist/core/feeds/index.js +2 -0
  31. package/dist/core/feeds/index.js.map +1 -1
  32. package/dist/core/playwright-check.d.ts +134 -0
  33. package/dist/core/playwright-check.d.ts.map +1 -0
  34. package/dist/core/playwright-check.js +98 -0
  35. package/dist/core/playwright-check.js.map +1 -0
  36. package/dist/core/watcher.d.ts +17 -0
  37. package/dist/core/watcher.d.ts.map +1 -1
  38. package/dist/core/watcher.js +59 -0
  39. package/dist/core/watcher.js.map +1 -1
  40. package/dist/schemas/source.d.ts +42 -0
  41. package/dist/schemas/source.d.ts.map +1 -1
  42. package/dist/schemas/source.js +42 -7
  43. package/dist/schemas/source.js.map +1 -1
  44. package/dist/templates/agents/AGENTS.md +2 -2
  45. package/dist/templates/feedradar.md +2 -2
  46. package/package.json +11 -1
@@ -1,3 +1,4 @@
1
+ import type { installChromium, ProbeOptions } from "../core/playwright-check.js";
1
2
  import type { Command } from "./index.js";
2
3
  export interface WatchIO {
3
4
  log?: (message: string) => void;
@@ -9,6 +10,21 @@ export interface WatchCommandOptions {
9
10
  io?: WatchIO;
10
11
  /** Test seam: override the adapter HTTP fetcher. */
11
12
  fetch?: typeof globalThis.fetch;
13
+ /**
14
+ * Test seam: override the Playwright probe used by the lazy `html-js`
15
+ * pre-check. Threaded straight through to `watchRun` — see watcher.ts.
16
+ */
17
+ playwrightProbeOptions?: ProbeOptions;
18
+ /**
19
+ * Test seam: override `process.env` lookup so the test can toggle
20
+ * `RADAR_AUTO_INSTALL_CHROMIUM=1` deterministically.
21
+ */
22
+ env?: NodeJS.ProcessEnv;
23
+ /**
24
+ * Test seam: override the Chromium auto-install function. Tests inject a
25
+ * stub that records invocation without spawning the real `npx`.
26
+ */
27
+ installChromiumImpl?: typeof installChromium;
12
28
  }
13
29
  /**
14
30
  * Implementation of `watch run`.
@@ -1 +1 @@
1
- {"version":3,"file":"watch.d.ts","sourceRoot":"","sources":["../../src/cli/watch.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAE1C,MAAM,WAAW,OAAO;IACtB,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IAChC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACjC,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CACnC;AAED,MAAM,WAAW,mBAAmB;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,OAAO,CAAC;IACb,oDAAoD;IACpD,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AA2CD;;;;;;GAMG;AACH,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,MAAM,CAAC,CA4CjG;AAED,eAAO,MAAM,YAAY,EAAE,OAgB1B,CAAC"}
1
+ {"version":3,"file":"watch.d.ts","sourceRoot":"","sources":["../../src/cli/watch.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAEjF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAE1C,MAAM,WAAW,OAAO;IACtB,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IAChC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACjC,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CACnC;AAED,MAAM,WAAW,mBAAmB;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,OAAO,CAAC;IACb,oDAAoD;IACpD,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;OAGG;IACH,sBAAsB,CAAC,EAAE,YAAY,CAAC;IACtC;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;IACxB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,OAAO,eAAe,CAAC;CAC9C;AA2CD;;;;;;GAMG;AACH,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,MAAM,CAAC,CA+CjG;AAED,eAAO,MAAM,YAAY,EAAE,OAgB1B,CAAC"}
package/dist/cli/watch.js CHANGED
@@ -66,6 +66,9 @@ export async function runWatch(args, options = {}) {
66
66
  log,
67
67
  warn,
68
68
  error,
69
+ env: options.env,
70
+ playwrightProbeOptions: options.playwrightProbeOptions,
71
+ installChromiumImpl: options.installChromiumImpl,
69
72
  });
70
73
  }
71
74
  catch (e) {
@@ -1 +1 @@
1
- {"version":3,"file":"watch.js","sourceRoot":"","sources":["../../src/cli/watch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAsBnE,SAAS,YAAY,CAAC,IAAc;IAClC,MAAM,GAAG,GAAiB,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;YACjC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;YAChB,SAAS;QACX,CAAC;QACD,IAAI,CAAC,KAAK,UAAU,EAAE,CAAC;YACrB,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;YACzB,SAAS;QACX,CAAC;QACD,IAAI,CAAC,KAAK,aAAa,EAAE,CAAC;YACxB,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC;YACrB,SAAS;QACX,CAAC;QACD,IAAI,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;QAC1C,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,cAAc,CAAC,GAAwB;IAC9C,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAC1C,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CAAC,cAAc,CAAC,CAAC;IACpB,GAAG,CAAC,uEAAuE,CAAC,CAAC;IAC7E,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CAAC,kBAAkB,CAAC,CAAC;IACxB,GAAG,CAAC,uDAAuD,CAAC,CAAC;IAC7D,GAAG,CAAC,oFAAoF,CAAC,CAAC;AAC5F,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAc,EAAE,UAA+B,EAAE;IAC9E,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,GAAG,GAAG,OAAO,CAAC,EAAE,EAAE,GAAG,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,OAAO,CAAC,EAAE,EAAE,IAAI,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAErE,IAAI,MAAoB,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,KAAK,CAAC,cAAc,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,cAAc,CAAC,GAAG,CAAC,CAAC;QACpB,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,MAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,QAAQ,CAAC;YACtB,GAAG;YACH,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,KAAK,EAAE,OAAO,CAAC,KAAc;YAC7B,GAAG;YACH,IAAI;YACJ,KAAK;SACN,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,KAAK,CAAC,cAAc,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACjG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,GAAG,CAAC,kCAAkC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,WAAW,CAAC,CAAC;IACtF,CAAC;SAAM,CAAC;QACN,GAAG,CACD,cAAc,aAAa,uBAAuB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,YAAY,CAChG,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAY;IACnC,IAAI,EAAE,OAAO;IACb,OAAO,EAAE,gDAAgD;IACzD,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;QAClB,MAAM,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,GAAG,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;YAC/D,cAAc,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACtC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;QACD,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;YAClB,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,8BAA8B,GAAG,GAAG,CAAC,CAAC;QACpD,cAAc,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,OAAO,CAAC,CAAC;IACX,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"watch.js","sourceRoot":"","sources":["../../src/cli/watch.ts"],"names":[],"mappings":"AACA,OAAO,EAAuB,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAqCnE,SAAS,YAAY,CAAC,IAAc;IAClC,MAAM,GAAG,GAAiB,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;YACjC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;YAChB,SAAS;QACX,CAAC;QACD,IAAI,CAAC,KAAK,UAAU,EAAE,CAAC;YACrB,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;YACzB,SAAS;QACX,CAAC;QACD,IAAI,CAAC,KAAK,aAAa,EAAE,CAAC;YACxB,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC;YACrB,SAAS;QACX,CAAC;QACD,IAAI,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;QAC1C,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,cAAc,CAAC,GAAwB;IAC9C,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAC1C,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CAAC,cAAc,CAAC,CAAC;IACpB,GAAG,CAAC,uEAAuE,CAAC,CAAC;IAC7E,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CAAC,kBAAkB,CAAC,CAAC;IACxB,GAAG,CAAC,uDAAuD,CAAC,CAAC;IAC7D,GAAG,CAAC,oFAAoF,CAAC,CAAC;AAC5F,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAc,EAAE,UAA+B,EAAE;IAC9E,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,GAAG,GAAG,OAAO,CAAC,EAAE,EAAE,GAAG,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,OAAO,CAAC,EAAE,EAAE,IAAI,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAErE,IAAI,MAAoB,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,KAAK,CAAC,cAAc,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,cAAc,CAAC,GAAG,CAAC,CAAC;QACpB,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,MAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,QAAQ,CAAC;YACtB,GAAG;YACH,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,KAAK,EAAE,OAAO,CAAC,KAAc;YAC7B,GAAG;YACH,IAAI;YACJ,KAAK;YACL,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,sBAAsB,EAAE,OAAO,CAAC,sBAAsB;YACtD,mBAAmB,EAAE,OAAO,CAAC,mBAAmB;SACjD,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,KAAK,CAAC,cAAc,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACjG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,GAAG,CAAC,kCAAkC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,WAAW,CAAC,CAAC;IACtF,CAAC;SAAM,CAAC;QACN,GAAG,CACD,cAAc,aAAa,uBAAuB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,YAAY,CAChG,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAY;IACnC,IAAI,EAAE,OAAO;IACb,OAAO,EAAE,gDAAgD;IACzD,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;QAClB,MAAM,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,GAAG,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;YAC/D,cAAc,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACtC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;QACD,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;YAClB,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,8BAA8B,GAAG,GAAG,CAAC,CAAC;QACpD,cAAc,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,OAAO,CAAC,CAAC;IACX,CAAC;CACF,CAAC"}
@@ -0,0 +1,30 @@
1
+ import type { Item, Source } from "../../schemas/index.js";
2
+ /**
3
+ * Shared parsing primitives for the `kind: html` (static) and `kind: html-js`
4
+ * (Playwright-rendered) adapters (ADR-0010 §D1).
5
+ *
6
+ * Both adapters apply the same `SourceSelectors` contract to a serialized HTML
7
+ * string — the only difference is how that string was acquired (raw HTTP body
8
+ * vs `page.content()` after JS execution). Extracting `parseHtmlDocument` and
9
+ * `contentHash` here keeps the selector semantics and dedup marker format in
10
+ * lockstep so a switch from `html` to `html-js` is transparent to downstream
11
+ * consumers (dedup, state file, watcher).
12
+ */
13
+ /**
14
+ * Prefix that flags an `lastEtag` slot as carrying a content hash rather than
15
+ * an actual HTTP ETag. Both adapters reuse the `lastEtag` field so neither
16
+ * has to migrate `SourceState` (see `docs/design/source-html.md`).
17
+ */
18
+ export declare const CONTENT_HASH_PREFIX = "sha256:";
19
+ /**
20
+ * Parse an HTML document into validated `Item[]` using the source's
21
+ * `selectors`. Both `kind: html` and `kind: html-js` go through here so the
22
+ * selector contract stays in one place.
23
+ */
24
+ export declare function parseHtmlDocument(html: string, source: Source, fetchedAt: string): Item[];
25
+ /**
26
+ * Compute the sha256 of the raw response body, prefixed so callers can tell
27
+ * it apart from a real ETag inside `SourceState.lastEtag`.
28
+ */
29
+ export declare function contentHash(body: string): string;
30
+ //# sourceMappingURL=_html-common.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"_html-common.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/_html-common.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,EAAmB,MAAM,wBAAwB,CAAC;AAI5E;;;;;;;;;;GAUG;AAEH;;;;GAIG;AACH,eAAO,MAAM,mBAAmB,YAAY,CAAC;AAoJ7C;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI,EAAE,CAiBzF;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD"}
@@ -0,0 +1,192 @@
1
+ import { createHash } from "node:crypto";
2
+ import { parse as parseHtml } from "node-html-parser";
3
+ import { ItemSchema } from "../../schemas/index.js";
4
+ import { deriveItemId, deriveStableKey } from "./derive-id.js";
5
+ /**
6
+ * Shared parsing primitives for the `kind: html` (static) and `kind: html-js`
7
+ * (Playwright-rendered) adapters (ADR-0010 §D1).
8
+ *
9
+ * Both adapters apply the same `SourceSelectors` contract to a serialized HTML
10
+ * string — the only difference is how that string was acquired (raw HTTP body
11
+ * vs `page.content()` after JS execution). Extracting `parseHtmlDocument` and
12
+ * `contentHash` here keeps the selector semantics and dedup marker format in
13
+ * lockstep so a switch from `html` to `html-js` is transparent to downstream
14
+ * consumers (dedup, state file, watcher).
15
+ */
16
+ /**
17
+ * Prefix that flags an `lastEtag` slot as carrying a content hash rather than
18
+ * an actual HTTP ETag. Both adapters reuse the `lastEtag` field so neither
19
+ * has to migrate `SourceState` (see `docs/design/source-html.md`).
20
+ */
21
+ export const CONTENT_HASH_PREFIX = "sha256:";
22
+ /** Attributes the parser checks before falling back to text content. */
23
+ const DATETIME_ATTRS = ["datetime", "content", "value"];
24
+ /**
25
+ * Convert an `HTMLElement | null` to its trimmed text, or `undefined` when
26
+ * the selector did not match. We always trim because raw scrapes routinely
27
+ * carry surrounding whitespace from formatted markup.
28
+ */
29
+ function textOf(el) {
30
+ if (!el)
31
+ return undefined;
32
+ const text = el.text?.trim();
33
+ return text ? text : undefined;
34
+ }
35
+ /**
36
+ * Apply a CSS selector relative to `root` and return the first match.
37
+ * `node-html-parser` returns `null` instead of throwing for invalid input,
38
+ * which matches what callers want here (a missing field, not a hard error).
39
+ */
40
+ function queryFirst(root, selector) {
41
+ return root.querySelector(selector);
42
+ }
43
+ /**
44
+ * Resolve the `link` selector to an `href` (or text fallback).
45
+ *
46
+ * Anchor tags expose the URL via `href` so we prefer the attribute. When the
47
+ * selector points at a non-anchor (e.g. a `<div data-link>` wrapper used by
48
+ * some changelog layouts), we fall back to text content so the adapter can
49
+ * still operate, deferring URL validation to `ItemSchema`.
50
+ */
51
+ function pickLink(el) {
52
+ if (!el)
53
+ return undefined;
54
+ const href = el.getAttribute("href");
55
+ if (href && href.trim())
56
+ return href.trim();
57
+ return textOf(el);
58
+ }
59
+ /**
60
+ * Resolve `publishedAt` to a candidate string for `new Date()`.
61
+ *
62
+ * `<time datetime="2026-05-12">` and `<meta content="..."/>` markup hide the
63
+ * canonical timestamp in attributes; the visible text is often a
64
+ * localized "May 12, 2026" that is harder to parse reliably. We probe the
65
+ * known attributes first, then fall back to element text.
66
+ */
67
+ function pickDatetime(el) {
68
+ if (!el)
69
+ return undefined;
70
+ for (const attr of DATETIME_ATTRS) {
71
+ const value = el.getAttribute(attr);
72
+ if (value && value.trim())
73
+ return value.trim();
74
+ }
75
+ return textOf(el);
76
+ }
77
+ /**
78
+ * Try to parse a candidate timestamp into ISO 8601. Returns `undefined` for
79
+ * unparseable inputs so the item can still be emitted (RSS adapter parity).
80
+ */
81
+ function toIsoDate(value) {
82
+ if (!value)
83
+ return undefined;
84
+ const date = new Date(value);
85
+ if (Number.isNaN(date.getTime()))
86
+ return undefined;
87
+ return date.toISOString();
88
+ }
89
+ /** Collect the trimmed text of every match for `selector`. */
90
+ function collectTags(root, selector) {
91
+ if (!selector)
92
+ return undefined;
93
+ const tags = root
94
+ .querySelectorAll(selector)
95
+ .map((el) => el.text?.trim())
96
+ .filter((t) => !!t && t.length > 0);
97
+ return tags.length > 0 ? tags : undefined;
98
+ }
99
+ /**
100
+ * Resolve a relative `link` against the source URL.
101
+ *
102
+ * Many sites publish `<a href="/changelog/foo">` rather than absolute URLs;
103
+ * without resolution `ItemSchema`'s `z.string().url()` would drop them. We
104
+ * intentionally swallow `URL` constructor errors so a malformed `link`
105
+ * surfaces as a normal validation drop later instead of breaking the whole
106
+ * fetch.
107
+ */
108
+ function resolveUrl(raw, base) {
109
+ try {
110
+ return new URL(raw, base).toString();
111
+ }
112
+ catch {
113
+ return raw;
114
+ }
115
+ }
116
+ /** Normalize one matched element into an Item, or `null` to drop it. */
117
+ function parseItem(itemEl, selectors, source, fetchedAt) {
118
+ const title = textOf(queryFirst(itemEl, selectors.title));
119
+ const linkRaw = pickLink(queryFirst(itemEl, selectors.link));
120
+ if (!title || !linkRaw)
121
+ return null;
122
+ const url = resolveUrl(linkRaw, source.url);
123
+ const summary = selectors.summary ? textOf(queryFirst(itemEl, selectors.summary)) : undefined;
124
+ const body = selectors.body ? textOf(queryFirst(itemEl, selectors.body)) : undefined;
125
+ const publishedAt = selectors.publishedAt
126
+ ? toIsoDate(pickDatetime(queryFirst(itemEl, selectors.publishedAt)))
127
+ : undefined;
128
+ const tags = collectTags(itemEl, selectors.tags);
129
+ const stableKey = deriveStableKey({
130
+ url,
131
+ fallbackHashInputs: [title, publishedAt],
132
+ });
133
+ const id = deriveItemId(title, stableKey);
134
+ // Preserve a structured snapshot of the raw scrape rather than the
135
+ // `HTMLElement` instance itself — the watcher serializes `raw` to YAML and
136
+ // we want the on-disk payload to be diff-friendly.
137
+ const raw = { title, link: linkRaw };
138
+ if (summary !== undefined)
139
+ raw.summary = summary;
140
+ if (body !== undefined)
141
+ raw.body = body;
142
+ if (publishedAt !== undefined)
143
+ raw.publishedAt = publishedAt;
144
+ if (tags !== undefined)
145
+ raw.tags = tags;
146
+ return validateItem({
147
+ id,
148
+ sourceId: source.id,
149
+ title,
150
+ url,
151
+ summary,
152
+ publishedAt,
153
+ fetchedAt,
154
+ raw,
155
+ });
156
+ }
157
+ function validateItem(candidate) {
158
+ const result = ItemSchema.safeParse(candidate);
159
+ // Items that fail validation (e.g. unresolvable URL) are dropped silently —
160
+ // see rss.ts for the same fail-soft rationale.
161
+ return result.success ? result.data : null;
162
+ }
163
+ /**
164
+ * Parse an HTML document into validated `Item[]` using the source's
165
+ * `selectors`. Both `kind: html` and `kind: html-js` go through here so the
166
+ * selector contract stays in one place.
167
+ */
168
+ export function parseHtmlDocument(html, source, fetchedAt) {
169
+ if (!source.selectors) {
170
+ throw new Error(`html adapter: source '${source.id}' has no selectors`);
171
+ }
172
+ const selectors = source.selectors;
173
+ let root;
174
+ try {
175
+ root = parseHtml(html);
176
+ }
177
+ catch (e) {
178
+ throw new Error(`html adapter: failed to parse HTML: ${e instanceof Error ? e.message : String(e)}`);
179
+ }
180
+ const itemEls = root.querySelectorAll(selectors.item);
181
+ return itemEls
182
+ .map((el) => parseItem(el, selectors, source, fetchedAt))
183
+ .filter((i) => i !== null);
184
+ }
185
+ /**
186
+ * Compute the sha256 of the raw response body, prefixed so callers can tell
187
+ * it apart from a real ETag inside `SourceState.lastEtag`.
188
+ */
189
+ export function contentHash(body) {
190
+ return `${CONTENT_HASH_PREFIX}${createHash("sha256").update(body).digest("hex")}`;
191
+ }
192
+ //# sourceMappingURL=_html-common.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"_html-common.js","sourceRoot":"","sources":["../../../src/core/feeds/_html-common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAoB,KAAK,IAAI,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAExE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAE/D;;;;;;;;;;GAUG;AAEH;;;;GAIG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,SAAS,CAAC;AAE7C,wEAAwE;AACxE,MAAM,cAAc,GAAG,CAAC,UAAU,EAAE,SAAS,EAAE,OAAO,CAAU,CAAC;AAEjE;;;;GAIG;AACH,SAAS,MAAM,CAAC,EAAsB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,SAAS,CAAC;IAC1B,MAAM,IAAI,GAAG,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IAC7B,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;AACjC,CAAC;AAED;;;;GAIG;AACH,SAAS,UAAU,CAAC,IAAiB,EAAE,QAAgB;IACrD,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;AACtC,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,QAAQ,CAAC,EAAsB;IACtC,IAAI,CAAC,EAAE;QAAE,OAAO,SAAS,CAAC;IAC1B,MAAM,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IACrC,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5C,OAAO,MAAM,CAAC,EAAE,CAAC,CAAC;AACpB,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,YAAY,CAAC,EAAsB;IAC1C,IAAI,CAAC,EAAE;QAAE,OAAO,SAAS,CAAC;IAC1B,KAAK,MAAM,IAAI,IAAI,cAAc,EAAE,CAAC;QAClC,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE;YAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IACjD,CAAC;IACD,OAAO,MAAM,CAAC,EAAE,CAAC,CAAC;AACpB,CAAC;AAED;;;GAGG;AACH,SAAS,SAAS,CAAC,KAAyB;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAAE,OAAO,SAAS,CAAC;IACnD,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;AAC5B,CAAC;AAED,8DAA8D;AAC9D,SAAS,WAAW,CAAC,IAAiB,EAAE,QAA4B;IAClE,IAAI,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAC;IAChC,MAAM,IAAI,GAAG,IAAI;SACd,gBAAgB,CAAC,QAAQ,CAAC;SAC1B,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;SAC5B,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACnD,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;AAC5C,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,GAAW,EAAE,IAAY;IAC3C,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED,wEAAwE;AACxE,SAAS,SAAS,CAChB,MAAmB,EACnB,SAA0B,EAC1B,MAAc,EACd,SAAiB;IAEjB,MAAM,KAAK,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,IAAI,CAAC,KAAK,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IACpC,MAAM,GAAG,GAAG,UAAU,CAAC,OAAO,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC;IAE5C,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAC9F,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACrF,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW;QACvC,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,EAAE,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC;QACpE,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,CAAC,CAAC;IAEjD,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,GAAG;QACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,WAAW,CAAC;KACzC,CAAC,CAAC;IACH,MAAM,EAAE,GAAG,YAAY,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAE1C,mEAAmE;IACnE,2EAA2E;IAC3E,mDAAmD;IACnD,MAAM,GAAG,GAA4B,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC9D,IAAI,OAAO,KAAK,SAAS;QAAE,GAAG,CAAC,OAAO,GAAG,OAAO,CAAC;IACjD,IAAI,IAAI,KAAK,SAAS;QAAE,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;IACxC,IAAI,WAAW,KAAK,SAAS;QAAE,GAAG,CAAC,WAAW,GAAG,WAAW,CAAC;IAC7D,IAAI,IAAI,KAAK,SAAS;QAAE,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;IAExC,OAAO,YAAY,CAAC;QAClB,EAAE;QACF,QAAQ,EAAE,MAAM,CAAC,EAAE;QACnB,KAAK;QACL,GAAG;QACH,OAAO;QACP,WAAW;QACX,SAAS;QACT,GAAG;KACJ,CAAC,CAAC;AACL,CAAC;AAED,SAAS,YAAY,CAAC,SAAkC;IACtD,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IAC/C,4EAA4E;IAC5E,+CAA+C;IAC/C,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7C,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY,EAAE,MAAc,EAAE,SAAiB;IAC/E,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,yBAAyB,MAAM,CAAC,EAAE,oBAAoB,CAAC,CAAC;IAC1E,CAAC;IACD,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;IACnC,IAAI,IAAiB,CAAC;IACtB,IAAI,CAAC;QACH,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,uCAAuC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACpF,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACtD,OAAO,OAAO;SACX,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;SACxD,MAAM,CAAC,CAAC,CAAC,EAAa,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;AAC1C,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,GAAG,mBAAmB,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;AACpF,CAAC"}
@@ -0,0 +1,50 @@
1
+ import type { SourceJsOptions } from "../../schemas/index.js";
2
+ import type { FeedAdapter, FeedAdapterOptions } from "./types.js";
3
+ /**
4
+ * Minimal subset of the Playwright surface this adapter uses. Defined
5
+ * structurally so the `chromium` argument passed by tests does not need to
6
+ * pull in the full Playwright type tree (which is itself an optional peer
7
+ * dep and therefore not guaranteed to be installed in dev).
8
+ */
9
+ export interface PlaywrightLike {
10
+ chromium: {
11
+ launch(options?: {
12
+ headless?: boolean;
13
+ }): Promise<PlaywrightBrowserLike>;
14
+ };
15
+ }
16
+ export interface PlaywrightBrowserLike {
17
+ newContext(options?: {
18
+ acceptDownloads?: boolean;
19
+ userAgent?: string;
20
+ }): Promise<PlaywrightContextLike>;
21
+ close(): Promise<void>;
22
+ }
23
+ export interface PlaywrightContextLike {
24
+ newPage(): Promise<PlaywrightPageLike>;
25
+ close(): Promise<void>;
26
+ }
27
+ export interface PlaywrightPageLike {
28
+ goto(url: string, options?: {
29
+ waitUntil?: SourceJsOptions["waitUntil"];
30
+ timeout?: number;
31
+ }): Promise<unknown>;
32
+ waitForSelector(selector: string, options?: {
33
+ timeout?: number;
34
+ }): Promise<unknown>;
35
+ content(): Promise<string>;
36
+ close(): Promise<void>;
37
+ }
38
+ /**
39
+ * Test-only extension to `FeedAdapterOptions` for the `html-js` adapter.
40
+ *
41
+ * Production callers leave `playwright` unset and the adapter dynamically
42
+ * imports it. Tests inject a fake module so they can exercise the adapter
43
+ * without spinning up real Chromium. The shape mirrors the subset above.
44
+ */
45
+ export interface HtmlJsAdapterOptions extends FeedAdapterOptions {
46
+ /** Injected Playwright module (tests only). Production uses dynamic import. */
47
+ playwright?: PlaywrightLike;
48
+ }
49
+ export declare const htmlJsAdapter: FeedAdapter;
50
+ //# sourceMappingURL=html-js.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-js.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/html-js.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAU,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAEtE,OAAO,KAAK,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AA+ClE;;;;;GAKG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE;QACR,MAAM,CAAC,OAAO,CAAC,EAAE;YAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;SAAE,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;KAC1E,CAAC;CACH;AAED,MAAM,WAAW,qBAAqB;IACpC,UAAU,CAAC,OAAO,CAAC,EAAE;QACnB,eAAe,CAAC,EAAE,OAAO,CAAC;QAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IACnC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,IAAI,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACvC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,CACF,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,SAAS,CAAC,EAAE,eAAe,CAAC,WAAW,CAAC,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GACvE,OAAO,CAAC,OAAO,CAAC,CAAC;IACpB,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACpF,OAAO,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAC3B,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,oBAAqB,SAAQ,kBAAkB;IAC9D,+EAA+E;IAC/E,UAAU,CAAC,EAAE,cAAc,CAAC;CAC7B;AAqBD,eAAO,MAAM,aAAa,EAAE,WAyE3B,CAAC"}
@@ -0,0 +1,135 @@
1
+ import { contentHash, parseHtmlDocument } from "./_html-common.js";
2
+ /**
3
+ * `kind: html-js` adapter — Playwright-rendered HTML scraping (ADR-0010).
4
+ *
5
+ * Same selector contract as `kind: html` (delegates to `parseHtmlDocument`),
6
+ * but acquires the document by driving headless Chromium so SPA / CSR pages
7
+ * (Next.js, Notion embeds, Algolia DocSearch, etc.) that ship empty initial
8
+ * HTML can still be scraped.
9
+ *
10
+ * ## Hardening (ADR-0010 §D5 — hardcoded, NOT user-configurable)
11
+ *
12
+ * | Policy | Value | Rationale |
13
+ * |---------------------|----------------------|----------------------------------------------------------|
14
+ * | `headless` | `true` | UI mode is CI-incompatible and an operator-UI risk. |
15
+ * | `acceptDownloads` | `false` | Block drive-by downloads (page JS-triggered file saves). |
16
+ * | context reuse | none — fresh each fetch | Prevent SW / IndexedDB / localStorage injection persistence and cross-source state mixing. |
17
+ * | default `timeout` | 30000ms | Cap OOM / infinite loops on pathological pages. |
18
+ * | `page.close()` | in `finally` | Prevent page leak / memory accumulation. |
19
+ * | viewport | Playwright default | Avoid bloating DOM with oversized viewports. |
20
+ *
21
+ * The above are intentionally NOT exposed through `SourceJsOptions`. Users
22
+ * may tune `waitFor` / `waitUntil` / `timeout` / `userAgent`, but the threat
23
+ * model assumes the policy floor above always holds.
24
+ *
25
+ * ## Optional peer dep
26
+ *
27
+ * Playwright is declared as an *optional* peer dependency (ADR-0010 §D3) so
28
+ * users who only run `kind: rss` / `kind: html` are not forced to install
29
+ * Chromium. The import is therefore `await import("playwright")` and resolves
30
+ * lazily on the first `html-js` fetch; missing-module errors are translated
31
+ * into a user-friendly install hint.
32
+ */
33
+ /**
34
+ * Default per-step timeout in ms when `Source.js?.timeout` is omitted.
35
+ * Mirrors `SourceJsOptionsSchema`'s default so adapter-direct callers (not
36
+ * going through schema parse) still get the documented behavior.
37
+ */
38
+ const DEFAULT_TIMEOUT_MS = 30_000;
39
+ /**
40
+ * Default Playwright `page.goto()` waitUntil mode. `networkidle` is the
41
+ * safest default for SPA / CSR pages where item data arrives via XHR after
42
+ * the document has loaded.
43
+ */
44
+ const DEFAULT_WAIT_UNTIL = "networkidle";
45
+ /**
46
+ * Dynamically import Playwright. Translates the very common
47
+ * "package not installed" failure into the install hint from ADR-0010 §D3.
48
+ */
49
+ async function loadPlaywright() {
50
+ try {
51
+ // Bare specifier: resolves via the consumer project's node_modules. The
52
+ // type assertion narrows the dynamic import to the subset we use.
53
+ const mod = (await import("playwright"));
54
+ return mod;
55
+ }
56
+ catch (e) {
57
+ const message = e instanceof Error ? e.message : String(e);
58
+ throw new Error(`html-js adapter: failed to load Playwright (${message}). ` +
59
+ "Install it with: `npm i playwright && npx playwright install chromium`");
60
+ }
61
+ }
62
+ export const htmlJsAdapter = {
63
+ kind: "html-js",
64
+ fetch: async (source, options = {}) => {
65
+ if (!source.selectors) {
66
+ throw new Error(`html-js adapter: source '${source.id}' has no selectors`);
67
+ }
68
+ const selectors = source.selectors;
69
+ const js = source.js;
70
+ const timeout = js?.timeout ?? DEFAULT_TIMEOUT_MS;
71
+ const waitUntil = js?.waitUntil ?? DEFAULT_WAIT_UNTIL;
72
+ // When `waitFor` is omitted we wait for the item selector itself — the
73
+ // common "wait until the item list rendered" intent without extra config.
74
+ const waitFor = js?.waitFor ?? selectors.item;
75
+ const playwright = options.playwright ?? (await loadPlaywright());
76
+ const previous = options.state;
77
+ const fetchedAt = new Date().toISOString();
78
+ // Hardening: headless is forced true. Even if a future Playwright default
79
+ // changes, the adapter pins it explicitly here.
80
+ const browser = await playwright.chromium.launch({ headless: true });
81
+ let html;
82
+ try {
83
+ // Hardening: fresh context per fetch (no SW / IndexedDB / localStorage
84
+ // persistence across fetches or sources). `acceptDownloads: false`
85
+ // blocks drive-by download routes (page JS triggering file saves).
86
+ const context = await browser.newContext({
87
+ acceptDownloads: false,
88
+ ...(js?.userAgent ? { userAgent: js.userAgent } : {}),
89
+ });
90
+ try {
91
+ const page = await context.newPage();
92
+ try {
93
+ await page.goto(source.url, { waitUntil, timeout });
94
+ await page.waitForSelector(waitFor, { timeout });
95
+ html = await page.content();
96
+ }
97
+ finally {
98
+ // `finally` guarantees page close even on goto / waitFor timeout —
99
+ // prevents page leak / memory accumulation per ADR-0010 §D5.
100
+ await page.close();
101
+ }
102
+ }
103
+ finally {
104
+ await context.close();
105
+ }
106
+ }
107
+ finally {
108
+ await browser.close();
109
+ }
110
+ // Dedup via content hash stored in the `lastEtag` slot (same convention
111
+ // as `kind: html` — see `_html-common.ts`). Server-side ETags are not
112
+ // observable from `page.content()`, so the content hash is the only
113
+ // dedup signal available here.
114
+ const bodyHash = contentHash(html);
115
+ if (previous?.lastEtag === bodyHash) {
116
+ return {
117
+ items: [],
118
+ notModified: true,
119
+ state: {
120
+ lastFetchedAt: fetchedAt,
121
+ lastEtag: bodyHash,
122
+ },
123
+ };
124
+ }
125
+ const items = parseHtmlDocument(html, source, fetchedAt);
126
+ return {
127
+ items,
128
+ state: {
129
+ lastFetchedAt: fetchedAt,
130
+ lastEtag: bodyHash,
131
+ },
132
+ };
133
+ },
134
+ };
135
+ //# sourceMappingURL=html-js.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-js.js","sourceRoot":"","sources":["../../../src/core/feeds/html-js.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAGnE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH;;;;GAIG;AACH,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAClC;;;;GAIG;AACH,MAAM,kBAAkB,GAAiC,aAAa,CAAC;AAiDvE;;;GAGG;AACH,KAAK,UAAU,cAAc;IAC3B,IAAI,CAAC;QACH,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,YAAY,CAAC,CAA8B,CAAC;QACtE,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,IAAI,KAAK,CACb,+CAA+C,OAAO,KAAK;YACzD,wEAAwE,CAC3E,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAgB;IACxC,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,KAAK,EAAE,MAAc,EAAE,UAAgC,EAAE,EAAE,EAAE;QAClE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,EAAE,oBAAoB,CAAC,CAAC;QAC7E,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QACnC,MAAM,EAAE,GAAG,MAAM,CAAC,EAAE,CAAC;QACrB,MAAM,OAAO,GAAG,EAAE,EAAE,OAAO,IAAI,kBAAkB,CAAC;QAClD,MAAM,SAAS,GAAG,EAAE,EAAE,SAAS,IAAI,kBAAkB,CAAC;QACtD,uEAAuE;QACvE,0EAA0E;QAC1E,MAAM,OAAO,GAAG,EAAE,EAAE,OAAO,IAAI,SAAS,CAAC,IAAI,CAAC;QAE9C,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAE3C,0EAA0E;QAC1E,gDAAgD;QAChD,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,IAAI,IAAY,CAAC;QACjB,IAAI,CAAC;YACH,uEAAuE;YACvE,mEAAmE;YACnE,mEAAmE;YACnE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;gBACvC,eAAe,EAAE,KAAK;gBACtB,GAAG,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,EAAE,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACtD,CAAC,CAAC;YACH,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;gBACrC,IAAI,CAAC;oBACH,MAAM,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;oBACpD,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;oBACjD,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC9B,CAAC;wBAAS,CAAC;oBACT,mEAAmE;oBACnE,6DAA6D;oBAC7D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;gBACrB,CAAC;YACH,CAAC;oBAAS,CAAC;gBACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;YACxB,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;QAED,wEAAwE;QACxE,sEAAsE;QACtE,oEAAoE;QACpE,+BAA+B;QAC/B,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,QAAQ,EAAE,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE;oBACL,aAAa,EAAE,SAAS;oBACxB,QAAQ,EAAE,QAAQ;iBACnB;aACF,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QACzD,OAAO;YACL,KAAK;YACL,KAAK,EAAE;gBACL,aAAa,EAAE,SAAS;gBACxB,QAAQ,EAAE,QAAQ;aACnB;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -1,10 +1,4 @@
1
- import type { Item, Source } from "../../schemas/index.js";
2
1
  import type { FeedAdapter } from "./types.js";
3
- /**
4
- * Parse an HTML document into validated `Item[]` using the source's
5
- * `selectors`. Exported so tests can drive the parser directly without
6
- * needing a fake HTTP layer.
7
- */
8
- export declare function parseHtmlDocument(html: string, source: Source, fetchedAt: string): Item[];
2
+ export { parseHtmlDocument } from "./_html-common.js";
9
3
  export declare const htmlAdapter: FeedAdapter;
10
4
  //# sourceMappingURL=html.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/html.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,EAAmB,MAAM,wBAAwB,CAAC;AAG5E,OAAO,KAAK,EAAE,WAAW,EAAiC,MAAM,YAAY,CAAC;AA6J7E;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI,EAAE,CAiBzF;AA+CD,eAAO,MAAM,WAAW,EAAE,WAuDzB,CAAC"}
1
+ {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/core/feeds/html.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAiC,MAAM,YAAY,CAAC;AAK7E,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAyCtD,eAAO,MAAM,WAAW,EAAE,WAuDzB,CAAC"}