@pseolint/core 0.4.3 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/README.md +264 -169
  2. package/dist/ai/manifest/diff.d.ts +78 -0
  3. package/dist/ai/manifest/diff.d.ts.map +1 -0
  4. package/dist/ai/manifest/diff.js +139 -0
  5. package/dist/ai/manifest/diff.js.map +1 -0
  6. package/dist/ai/manifest/index.d.ts +18 -0
  7. package/dist/ai/manifest/index.d.ts.map +1 -0
  8. package/dist/ai/manifest/index.js +15 -0
  9. package/dist/ai/manifest/index.js.map +1 -0
  10. package/dist/ai/manifest/validate-manifest.d.ts +37 -0
  11. package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
  12. package/dist/ai/manifest/validate-manifest.js +67 -0
  13. package/dist/ai/manifest/validate-manifest.js.map +1 -0
  14. package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
  15. package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
  16. package/dist/ai/manifest/validators/domain-patches.js +110 -0
  17. package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
  18. package/dist/ai/manifest/validators/index.d.ts +5 -0
  19. package/dist/ai/manifest/validators/index.d.ts.map +1 -0
  20. package/dist/ai/manifest/validators/index.js +4 -0
  21. package/dist/ai/manifest/validators/index.js.map +1 -0
  22. package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
  23. package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
  24. package/dist/ai/manifest/validators/page-changes.js +221 -0
  25. package/dist/ai/manifest/validators/page-changes.js.map +1 -0
  26. package/dist/ai/manifest/validators/types.d.ts +17 -0
  27. package/dist/ai/manifest/validators/types.d.ts.map +1 -0
  28. package/dist/ai/manifest/validators/types.js +5 -0
  29. package/dist/ai/manifest/validators/types.js.map +1 -0
  30. package/dist/ai/orchestrate.d.ts +74 -0
  31. package/dist/ai/orchestrate.d.ts.map +1 -0
  32. package/dist/ai/orchestrate.js +54 -0
  33. package/dist/ai/orchestrate.js.map +1 -0
  34. package/dist/ai/orchestrator/budget.d.ts +57 -0
  35. package/dist/ai/orchestrator/budget.d.ts.map +1 -0
  36. package/dist/ai/orchestrator/budget.js +114 -0
  37. package/dist/ai/orchestrator/budget.js.map +1 -0
  38. package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
  39. package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
  40. package/dist/ai/orchestrator/finish-tool.js +114 -0
  41. package/dist/ai/orchestrator/finish-tool.js.map +1 -0
  42. package/dist/ai/orchestrator/index.d.ts +25 -0
  43. package/dist/ai/orchestrator/index.d.ts.map +1 -0
  44. package/dist/ai/orchestrator/index.js +21 -0
  45. package/dist/ai/orchestrator/index.js.map +1 -0
  46. package/dist/ai/orchestrator/log.d.ts +24 -0
  47. package/dist/ai/orchestrator/log.d.ts.map +1 -0
  48. package/dist/ai/orchestrator/log.js +48 -0
  49. package/dist/ai/orchestrator/log.js.map +1 -0
  50. package/dist/ai/orchestrator/page-cache.d.ts +64 -0
  51. package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
  52. package/dist/ai/orchestrator/page-cache.js +127 -0
  53. package/dist/ai/orchestrator/page-cache.js.map +1 -0
  54. package/dist/ai/orchestrator/prompt.d.ts +16 -0
  55. package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
  56. package/dist/ai/orchestrator/prompt.js +52 -0
  57. package/dist/ai/orchestrator/prompt.js.map +1 -0
  58. package/dist/ai/orchestrator/runner.d.ts +65 -0
  59. package/dist/ai/orchestrator/runner.d.ts.map +1 -0
  60. package/dist/ai/orchestrator/runner.js +223 -0
  61. package/dist/ai/orchestrator/runner.js.map +1 -0
  62. package/dist/ai/orchestrator/session.d.ts +44 -0
  63. package/dist/ai/orchestrator/session.d.ts.map +1 -0
  64. package/dist/ai/orchestrator/session.js +64 -0
  65. package/dist/ai/orchestrator/session.js.map +1 -0
  66. package/dist/ai/orchestrator/types.d.ts +99 -0
  67. package/dist/ai/orchestrator/types.d.ts.map +1 -0
  68. package/dist/ai/orchestrator/types.js +8 -0
  69. package/dist/ai/orchestrator/types.js.map +1 -0
  70. package/dist/ai/probes/cache.d.ts +12 -0
  71. package/dist/ai/probes/cache.d.ts.map +1 -0
  72. package/dist/ai/probes/cache.js +46 -0
  73. package/dist/ai/probes/cache.js.map +1 -0
  74. package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
  75. package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
  76. package/dist/ai/tools/ask-ai-engine.js +253 -0
  77. package/dist/ai/tools/ask-ai-engine.js.map +1 -0
  78. package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
  79. package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
  80. package/dist/ai/tools/check-domain-crawler-access.js +76 -0
  81. package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
  82. package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
  83. package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
  84. package/dist/ai/tools/check-domain-llms-txt.js +75 -0
  85. package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
  86. package/dist/ai/tools/check-indexability.d.ts +58 -0
  87. package/dist/ai/tools/check-indexability.d.ts.map +1 -0
  88. package/dist/ai/tools/check-indexability.js +64 -0
  89. package/dist/ai/tools/check-indexability.js.map +1 -0
  90. package/dist/ai/tools/check-robots.d.ts +68 -0
  91. package/dist/ai/tools/check-robots.d.ts.map +1 -0
  92. package/dist/ai/tools/check-robots.js +90 -0
  93. package/dist/ai/tools/check-robots.js.map +1 -0
  94. package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
  95. package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
  96. package/dist/ai/tools/check-rule-answer-first.js +50 -0
  97. package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
  98. package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
  99. package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
  100. package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
  101. package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
  102. package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
  103. package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
  104. package/dist/ai/tools/check-rule-citable-facts.js +41 -0
  105. package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
  106. package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
  107. package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
  108. package/dist/ai/tools/check-rule-content-modularity.js +45 -0
  109. package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
  110. package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
  111. package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
  112. package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
  113. package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
  114. package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
  115. package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
  116. package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
  117. package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
  118. package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
  119. package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
  120. package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
  121. package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
  122. package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
  123. package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
  124. package/dist/ai/tools/check-rule-missing-author.js +45 -0
  125. package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
  126. package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
  127. package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
  128. package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
  129. package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
  130. package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
  131. package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
  132. package/dist/ai/tools/check-rule-required-fields.js +38 -0
  133. package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
  134. package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
  135. package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
  136. package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
  137. package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
  138. package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
  139. package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
  140. package/dist/ai/tools/check-rule-summary-bait.js +39 -0
  141. package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
  142. package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
  143. package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
  144. package/dist/ai/tools/check-rule-thin-content.js +58 -0
  145. package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
  146. package/dist/ai/tools/detect-templates.d.ts +60 -0
  147. package/dist/ai/tools/detect-templates.d.ts.map +1 -0
  148. package/dist/ai/tools/detect-templates.js +43 -0
  149. package/dist/ai/tools/detect-templates.js.map +1 -0
  150. package/dist/ai/tools/fetch-page.d.ts +70 -0
  151. package/dist/ai/tools/fetch-page.d.ts.map +1 -0
  152. package/dist/ai/tools/fetch-page.js +93 -0
  153. package/dist/ai/tools/fetch-page.js.map +1 -0
  154. package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
  155. package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
  156. package/dist/ai/tools/fetch-sitemap.js +116 -0
  157. package/dist/ai/tools/fetch-sitemap.js.map +1 -0
  158. package/dist/ai/tools/index.d.ts +1555 -0
  159. package/dist/ai/tools/index.d.ts.map +1 -0
  160. package/dist/ai/tools/index.js +119 -0
  161. package/dist/ai/tools/index.js.map +1 -0
  162. package/dist/ai/tools/parse-page.d.ts +94 -0
  163. package/dist/ai/tools/parse-page.d.ts.map +1 -0
  164. package/dist/ai/tools/parse-page.js +108 -0
  165. package/dist/ai/tools/parse-page.js.map +1 -0
  166. package/dist/ai/tools/query-serp.d.ts +113 -0
  167. package/dist/ai/tools/query-serp.d.ts.map +1 -0
  168. package/dist/ai/tools/query-serp.js +131 -0
  169. package/dist/ai/tools/query-serp.js.map +1 -0
  170. package/dist/ai/tools/sample-template.d.ts +67 -0
  171. package/dist/ai/tools/sample-template.d.ts.map +1 -0
  172. package/dist/ai/tools/sample-template.js +75 -0
  173. package/dist/ai/tools/sample-template.js.map +1 -0
  174. package/dist/ai/tools/types.d.ts +73 -0
  175. package/dist/ai/tools/types.d.ts.map +1 -0
  176. package/dist/ai/tools/types.js +64 -0
  177. package/dist/ai/tools/types.js.map +1 -0
  178. package/dist/ai/tools/validate-jsonld.d.ts +62 -0
  179. package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
  180. package/dist/ai/tools/validate-jsonld.js +84 -0
  181. package/dist/ai/tools/validate-jsonld.js.map +1 -0
  182. package/dist/auditor.d.ts +4 -0
  183. package/dist/auditor.d.ts.map +1 -1
  184. package/dist/auditor.js +629 -64
  185. package/dist/auditor.js.map +1 -1
  186. package/dist/backpressure.d.ts.map +1 -1
  187. package/dist/backpressure.js +10 -3
  188. package/dist/backpressure.js.map +1 -1
  189. package/dist/enrich-findings.d.ts.map +1 -1
  190. package/dist/enrich-findings.js +15 -1
  191. package/dist/enrich-findings.js.map +1 -1
  192. package/dist/formatters/console.d.ts.map +1 -1
  193. package/dist/formatters/console.js +13 -0
  194. package/dist/formatters/console.js.map +1 -1
  195. package/dist/formatters/markdown.d.ts.map +1 -1
  196. package/dist/formatters/markdown.js +20 -2
  197. package/dist/formatters/markdown.js.map +1 -1
  198. package/dist/index.d.ts +12 -1
  199. package/dist/index.d.ts.map +1 -1
  200. package/dist/index.js +8 -0
  201. package/dist/index.js.map +1 -1
  202. package/dist/rule-references.d.ts.map +1 -1
  203. package/dist/rule-references.js +5 -0
  204. package/dist/rule-references.js.map +1 -1
  205. package/dist/rules/content/heading-structure.d.ts +21 -0
  206. package/dist/rules/content/heading-structure.d.ts.map +1 -0
  207. package/dist/rules/content/heading-structure.js +56 -0
  208. package/dist/rules/content/heading-structure.js.map +1 -0
  209. package/dist/rules/content/image-alt-text.d.ts +18 -0
  210. package/dist/rules/content/image-alt-text.d.ts.map +1 -0
  211. package/dist/rules/content/image-alt-text.js +77 -0
  212. package/dist/rules/content/image-alt-text.js.map +1 -0
  213. package/dist/rules/content/title-uniqueness.d.ts +18 -0
  214. package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
  215. package/dist/rules/content/title-uniqueness.js +70 -0
  216. package/dist/rules/content/title-uniqueness.js.map +1 -0
  217. package/dist/rules/links/host-section-divergence.d.ts +3 -0
  218. package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
  219. package/dist/rules/links/host-section-divergence.js +158 -0
  220. package/dist/rules/links/host-section-divergence.js.map +1 -0
  221. package/dist/rules/links/link-depth.d.ts +12 -1
  222. package/dist/rules/links/link-depth.d.ts.map +1 -1
  223. package/dist/rules/links/link-depth.js +25 -12
  224. package/dist/rules/links/link-depth.js.map +1 -1
  225. package/dist/rules/scope.d.ts.map +1 -1
  226. package/dist/rules/scope.js +5 -0
  227. package/dist/rules/scope.js.map +1 -1
  228. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
  229. package/dist/rules/spam/doorway-pattern.js +27 -4
  230. package/dist/rules/spam/doorway-pattern.js.map +1 -1
  231. package/dist/rules/spam/publication-velocity.d.ts +1 -1
  232. package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
  233. package/dist/rules/spam/publication-velocity.js +9 -4
  234. package/dist/rules/spam/publication-velocity.js.map +1 -1
  235. package/dist/rules/spam/template-coverage.js +1 -1
  236. package/dist/rules/spam/template-coverage.js.map +1 -1
  237. package/dist/rules/spam/template-diversity.js +1 -1
  238. package/dist/rules/spam/template-diversity.js.map +1 -1
  239. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
  240. package/dist/rules/tech/hreflang-consistency.js +33 -4
  241. package/dist/rules/tech/hreflang-consistency.js.map +1 -1
  242. package/dist/rules/tech/og-completeness.d.ts +11 -0
  243. package/dist/rules/tech/og-completeness.d.ts.map +1 -1
  244. package/dist/rules/tech/og-completeness.js +22 -23
  245. package/dist/rules/tech/og-completeness.js.map +1 -1
  246. package/dist/ruleset-version.d.ts +8 -0
  247. package/dist/ruleset-version.d.ts.map +1 -0
  248. package/dist/ruleset-version.js +8 -0
  249. package/dist/ruleset-version.js.map +1 -0
  250. package/dist/scrape-strategy.d.ts +42 -0
  251. package/dist/scrape-strategy.d.ts.map +1 -0
  252. package/dist/scrape-strategy.js +101 -0
  253. package/dist/scrape-strategy.js.map +1 -0
  254. package/dist/site-classifier.d.ts.map +1 -1
  255. package/dist/site-classifier.js +1 -0
  256. package/dist/site-classifier.js.map +1 -1
  257. package/dist/state.d.ts +36 -1
  258. package/dist/state.d.ts.map +1 -1
  259. package/dist/state.js +3 -1
  260. package/dist/state.js.map +1 -1
  261. package/dist/stratified-sample.d.ts +9 -1
  262. package/dist/stratified-sample.d.ts.map +1 -1
  263. package/dist/stratified-sample.js +23 -6
  264. package/dist/stratified-sample.js.map +1 -1
  265. package/dist/types.d.ts +135 -2
  266. package/dist/types.d.ts.map +1 -1
  267. package/dist/url-normalize.d.ts.map +1 -1
  268. package/dist/url-normalize.js +13 -1
  269. package/dist/url-normalize.js.map +1 -1
  270. package/package.json +90 -90
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Orchestrator MVP entry points.
3
+ *
4
+ * The runner takes a domain + a resolved `LanguageModel` (use existing
5
+ * `createLanguageModel` from ai/adapters) and runs a single audit session
6
+ * to completion or until budget is exhausted. Events are emitted to an
7
+ * optional `onEvent` callback (SSE / R2 fanout in the web app) and an
8
+ * optional NDJSON file. The full event log is also returned in the result.
9
+ *
10
+ * Phase 2 MVP scope: single-agent, generateText (not streamText), no
11
+ * watchdog injection. The follow-up swaps in streamText for live UI
12
+ * streaming and adds the watchdog.
13
+ */
14
+ export { runOrchestrator } from "./runner.js";
15
+ export type { RunOrchestratorOptions, EventSink } from "./runner.js";
16
+ export { SessionState } from "./session.js";
17
+ export type { SessionStateOptions } from "./session.js";
18
+ export { BudgetTracker } from "./budget.js";
19
+ export { SessionLog } from "./log.js";
20
+ export { buildSystemPrompt } from "./prompt.js";
21
+ export { finishAuditTool, manifestSchema } from "./finish-tool.js";
22
+ export type { FixManifest } from "./finish-tool.js";
23
+ export { DEFAULT_BUDGET, } from "./types.js";
24
+ export type { BudgetCaps, UsageSnapshot, StopReason, SessionEvent, SessionResult, } from "./types.js";
25
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/ai/orchestrator/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,YAAY,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,YAAY,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACnE,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EACL,cAAc,GACf,MAAM,YAAY,CAAC;AACpB,YAAY,EACV,UAAU,EACV,aAAa,EACb,UAAU,EACV,YAAY,EACZ,aAAa,GACd,MAAM,YAAY,CAAC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Orchestrator MVP entry points.
3
+ *
4
+ * The runner takes a domain + a resolved `LanguageModel` (use existing
5
+ * `createLanguageModel` from ai/adapters) and runs a single audit session
6
+ * to completion or until budget is exhausted. Events are emitted to an
7
+ * optional `onEvent` callback (SSE / R2 fanout in the web app) and an
8
+ * optional NDJSON file. The full event log is also returned in the result.
9
+ *
10
+ * Phase 2 MVP scope: single-agent, generateText (not streamText), no
11
+ * watchdog injection. The follow-up swaps in streamText for live UI
12
+ * streaming and adds the watchdog.
13
+ */
14
+ export { runOrchestrator } from "./runner.js";
15
+ export { SessionState } from "./session.js";
16
+ export { BudgetTracker } from "./budget.js";
17
+ export { SessionLog } from "./log.js";
18
+ export { buildSystemPrompt } from "./prompt.js";
19
+ export { finishAuditTool, manifestSchema } from "./finish-tool.js";
20
+ export { DEFAULT_BUDGET, } from "./types.js";
21
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/ai/orchestrator/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAEnE,OAAO,EACL,cAAc,GACf,MAAM,YAAY,CAAC"}
@@ -0,0 +1,24 @@
1
+ import type { SessionEvent } from "./types.js";
2
+ export type EventSink = (event: SessionEvent) => void | Promise<void>;
3
+ /**
4
+ * Per-session event recorder. Three concerns:
5
+ * 1. In-memory buffer (returned in SessionResult so callers can replay).
6
+ * 2. Optional NDJSON file writer for durable replay (orchestrator session log).
7
+ * 3. Optional callback fanout — the web app passes a callback that pushes
8
+ * to SSE / R2 in production.
9
+ *
10
+ * Errors writing the file are swallowed; we never want a log-write failure
11
+ * to abort an in-flight orchestrator session.
12
+ */
13
+ export declare class SessionLog {
14
+ private readonly opts;
15
+ private readonly buffer;
16
+ private fileInitialized;
17
+ constructor(opts?: {
18
+ ndjsonPath?: string;
19
+ onEvent?: EventSink;
20
+ });
21
+ emit(event: SessionEvent): Promise<void>;
22
+ events(): SessionEvent[];
23
+ }
24
+ //# sourceMappingURL=log.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"log.d.ts","sourceRoot":"","sources":["../../../src/ai/orchestrator/log.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE/C,MAAM,MAAM,SAAS,GAAG,CAAC,KAAK,EAAE,YAAY,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAEtE;;;;;;;;;GASG;AACH,qBAAa,UAAU;IAKnB,OAAO,CAAC,QAAQ,CAAC,IAAI;IAJvB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAC7C,OAAO,CAAC,eAAe,CAAS;gBAGb,IAAI,GAAE;QACrB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,OAAO,CAAC,EAAE,SAAS,CAAC;KAChB;IAGF,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAyB9C,MAAM,IAAI,YAAY,EAAE;CAGzB"}
@@ -0,0 +1,48 @@
1
+ import { appendFile, mkdir } from "node:fs/promises";
2
+ import { dirname } from "node:path";
3
+ /**
4
+ * Per-session event recorder. Three concerns:
5
+ * 1. In-memory buffer (returned in SessionResult so callers can replay).
6
+ * 2. Optional NDJSON file writer for durable replay (orchestrator session log).
7
+ * 3. Optional callback fanout — the web app passes a callback that pushes
8
+ * to SSE / R2 in production.
9
+ *
10
+ * Errors writing the file are swallowed; we never want a log-write failure
11
+ * to abort an in-flight orchestrator session.
12
+ */
13
+ export class SessionLog {
14
+ opts;
15
+ buffer = [];
16
+ fileInitialized = false;
17
+ constructor(opts = {}) {
18
+ this.opts = opts;
19
+ }
20
+ async emit(event) {
21
+ this.buffer.push(event);
22
+ if (this.opts.onEvent) {
23
+ try {
24
+ await this.opts.onEvent(event);
25
+ }
26
+ catch {
27
+ // Sink failures are non-fatal — never let an SSE/R2 hiccup take
28
+ // down the session. The buffer is still authoritative.
29
+ }
30
+ }
31
+ if (this.opts.ndjsonPath) {
32
+ try {
33
+ if (!this.fileInitialized) {
34
+ await mkdir(dirname(this.opts.ndjsonPath), { recursive: true });
35
+ this.fileInitialized = true;
36
+ }
37
+ await appendFile(this.opts.ndjsonPath, JSON.stringify(event) + "\n", "utf8");
38
+ }
39
+ catch {
40
+ // Same — log-file errors don't kill the session.
41
+ }
42
+ }
43
+ }
44
+ events() {
45
+ return [...this.buffer];
46
+ }
47
+ }
48
+ //# sourceMappingURL=log.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"log.js","sourceRoot":"","sources":["../../../src/ai/orchestrator/log.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC;;;;;;;;;GASG;AACH,MAAM,OAAO,UAAU;IAKF;IAJF,MAAM,GAAmB,EAAE,CAAC;IACrC,eAAe,GAAG,KAAK,CAAC;IAEhC,YACmB,OAGb,EAAE;QAHW,SAAI,GAAJ,IAAI,CAGf;IACL,CAAC;IAEJ,KAAK,CAAC,IAAI,CAAC,KAAmB;QAC5B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAExB,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;YAAC,MAAM,CAAC;gBACP,gEAAgE;gBAChE,uDAAuD;YACzD,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;oBAC1B,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;oBAChE,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;gBAC9B,CAAC;gBACD,MAAM,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/E,CAAC;YAAC,MAAM,CAAC;gBACP,iDAAiD;YACnD,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM;QACJ,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1B,CAAC;CACF"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * One entry in the per-session page cache. `html` is the response body;
3
+ * `status` and `headers` come from the original fetch so downstream tools
4
+ * (`check_indexability` reading X-Robots-Tag, etc.) don't have to re-fetch.
5
+ */
6
+ export interface PageCacheEntry {
7
+ url: string;
8
+ html: string;
9
+ status: number;
10
+ headers: Record<string, string>;
11
+ fetchedAt: number;
12
+ }
13
+ export declare class PageCache {
14
+ private readonly maxBytes;
15
+ private readonly store;
16
+ private currentBytes;
17
+ constructor(maxBytes?: number);
18
+ /**
19
+ * Stable pageId derivation: sha256 of the URL, truncated to 16 hex chars
20
+ * (~64 bits — collision-safe for any realistic session size). Same URL
21
+ * always maps to the same pageId so `fetch_page` is idempotent within
22
+ * a session, and the LLM can re-reference a page without juggling ids.
23
+ */
24
+ static idFor(url: string): string;
25
+ put(entry: Omit<PageCacheEntry, "fetchedAt">): string;
26
+ get(pageId: string): PageCacheEntry | null;
27
+ has(pageId: string): boolean;
28
+ size(): number;
29
+ /** Memory usage estimate in bytes — useful for orchestrator-side monitoring. */
30
+ approximateBytes(): number;
31
+ }
32
+ /**
33
+ * Run `fn` with `cache` available via `currentPageCache()`. The
34
+ * orchestrator runner wraps its `generateText` call in this so every tool
35
+ * invocation (including those nested inside the AI SDK's tool loop) sees
36
+ * the same cache.
37
+ */
38
+ export declare function withPageCache<T>(cache: PageCache, fn: () => Promise<T>): Promise<T>;
39
+ /**
40
+ * Read the current session's page cache. Returns null when called outside
41
+ * `withPageCache` (e.g. unit tests that don't set one up — those tests use
42
+ * `setTestCache` to inject a cache directly).
43
+ */
44
+ export declare function currentPageCache(): PageCache | null;
45
+ /**
46
+ * Test-only helper: set a page cache for the duration of a synchronous or
47
+ * async test. Use as `await withPageCache(cache, async () => { ... })`
48
+ * — same API as the production wrapper, just imported in tests.
49
+ */
50
+ export declare const setTestCache: typeof withPageCache;
51
+ /**
52
+ * Resolve a `pageId` to its cache entry — the standard tool-side helper.
53
+ * Throws a clear, LLM-friendly error when the cache isn't in scope or the
54
+ * pageId is unknown so the orchestrator can recover by re-fetching rather
55
+ * than crashing the session.
56
+ */
57
+ export declare function resolvePage(pageId: string): PageCacheEntry;
58
+ /**
59
+ * Multi-page variant. Resolves a list of pageIds in one call; throws on
60
+ * the first unknown id so the LLM gets specific feedback about which page
61
+ * needs re-fetching.
62
+ */
63
+ export declare function resolvePages(pageIds: string[]): PageCacheEntry[];
64
+ //# sourceMappingURL=page-cache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page-cache.d.ts","sourceRoot":"","sources":["../../../src/ai/orchestrator/page-cache.ts"],"names":[],"mappings":"AAGA;;;;GAIG;AACH,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AA2BD,qBAAa,SAAS;IAIR,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAHrC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqC;IAC3D,OAAO,CAAC,YAAY,CAAK;gBAEI,QAAQ,GAAE,MAA0B;IAEjE;;;;;OAKG;IACH,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAIjC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,cAAc,EAAE,WAAW,CAAC,GAAG,MAAM;IA4BrD,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,cAAc,GAAG,IAAI;IAI1C,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,MAAM;IAId,gFAAgF;IAChF,gBAAgB,IAAI,MAAM;CAG3B;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAEnF;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,IAAI,SAAS,GAAG,IAAI,CAEnD;AAED;;;;GAIG;AACH,eAAO,MAAM,YAAY,sBAAgB,CAAC;AAE1C;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,cAAc,CAc1D;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,cAAc,EAAE,CAEhE"}
@@ -0,0 +1,127 @@
1
+ import { AsyncLocalStorage } from "node:async_hooks";
2
+ import { createHash } from "node:crypto";
3
+ /**
4
+ * Session-scoped page cache. The orchestrator's reason for being:
5
+ * 1. `fetch_page` writes HTML here keyed by `pageId`
6
+ * 2. Returns the `pageId` (not the HTML) to the LLM
7
+ * 3. Subsequent tools (parse_page, check_rule_*, validate_jsonld, etc.)
8
+ * take a `pageId` in their input and look up HTML via the cache
9
+ *
10
+ * Net effect: HTML never lives in the LLM conversation history. The cost
11
+ * impact is dramatic — the v0.4.4 dogfood showed 5 fetched pages exploding
12
+ * to 297K tokens / $0.91 in a single step because re-passing HTML through
13
+ * tool inputs accumulated quadratically. With pageId references that
14
+ * collapses to ~20K tokens / $0.06 for the same workload.
15
+ *
16
+ * Implementation: AsyncLocalStorage threads the cache through generateText
17
+ * → tool execute() without changing the AI SDK's tool surface. Tools that
18
+ * don't need page content ignore it entirely.
19
+ */
20
+ /**
21
+ * Soft cap on per-session cache size. Generous enough for any realistic
22
+ * orchestrator session (200 pages × 100KB ≈ 20MB) but bounds memory growth
23
+ * for misbehaving runs. `put` rejects with a tool-friendly error past this
24
+ * cap so the LLM can adapt rather than the orchestrator OOMing silently.
25
+ */
26
+ const DEFAULT_MAX_BYTES = 50 * 1024 * 1024; // 50MB
27
+ export class PageCache {
28
+ maxBytes;
29
+ store = new Map();
30
+ currentBytes = 0;
31
+ constructor(maxBytes = DEFAULT_MAX_BYTES) {
32
+ this.maxBytes = maxBytes;
33
+ }
34
+ /**
35
+ * Stable pageId derivation: sha256 of the URL, truncated to 16 hex chars
36
+ * (~64 bits — collision-safe for any realistic session size). Same URL
37
+ * always maps to the same pageId so `fetch_page` is idempotent within
38
+ * a session, and the LLM can re-reference a page without juggling ids.
39
+ */
40
+ static idFor(url) {
41
+ return createHash("sha256").update(url).digest("hex").slice(0, 16);
42
+ }
43
+ put(entry) {
44
+ const pageId = PageCache.idFor(entry.url);
45
+ const bytes = entry.html.length +
46
+ entry.url.length +
47
+ Object.entries(entry.headers).reduce((acc, [k, v]) => acc + k.length + v.length, 0);
48
+ // Replacing an existing entry: subtract its old size from the running total
49
+ // before checking the cap — re-fetching the same URL shouldn't re-bill cap.
50
+ const existing = this.store.get(pageId);
51
+ if (existing) {
52
+ this.currentBytes -=
53
+ existing.html.length +
54
+ existing.url.length +
55
+ Object.entries(existing.headers).reduce((acc, [k, v]) => acc + k.length + v.length, 0);
56
+ }
57
+ if (this.currentBytes + bytes > this.maxBytes) {
58
+ throw new Error(`page cache full (${this.maxBytes} bytes); reduce concurrent fetches or finalize the audit before fetching more`);
59
+ }
60
+ this.store.set(pageId, { ...entry, fetchedAt: Date.now() });
61
+ this.currentBytes += bytes;
62
+ return pageId;
63
+ }
64
+ get(pageId) {
65
+ return this.store.get(pageId) ?? null;
66
+ }
67
+ has(pageId) {
68
+ return this.store.has(pageId);
69
+ }
70
+ size() {
71
+ return this.store.size;
72
+ }
73
+ /** Memory usage estimate in bytes — useful for orchestrator-side monitoring. */
74
+ approximateBytes() {
75
+ return this.currentBytes;
76
+ }
77
+ }
78
+ const cacheStorage = new AsyncLocalStorage();
79
+ /**
80
+ * Run `fn` with `cache` available via `currentPageCache()`. The
81
+ * orchestrator runner wraps its `generateText` call in this so every tool
82
+ * invocation (including those nested inside the AI SDK's tool loop) sees
83
+ * the same cache.
84
+ */
85
+ export function withPageCache(cache, fn) {
86
+ return cacheStorage.run(cache, fn);
87
+ }
88
+ /**
89
+ * Read the current session's page cache. Returns null when called outside
90
+ * `withPageCache` (e.g. unit tests that don't set one up — those tests use
91
+ * `setTestCache` to inject a cache directly).
92
+ */
93
+ export function currentPageCache() {
94
+ return cacheStorage.getStore() ?? null;
95
+ }
96
+ /**
97
+ * Test-only helper: set a page cache for the duration of a synchronous or
98
+ * async test. Use as `await withPageCache(cache, async () => { ... })`
99
+ * — same API as the production wrapper, just imported in tests.
100
+ */
101
+ export const setTestCache = withPageCache;
102
+ /**
103
+ * Resolve a `pageId` to its cache entry — the standard tool-side helper.
104
+ * Throws a clear, LLM-friendly error when the cache isn't in scope or the
105
+ * pageId is unknown so the orchestrator can recover by re-fetching rather
106
+ * than crashing the session.
107
+ */
108
+ export function resolvePage(pageId) {
109
+ const cache = currentPageCache();
110
+ if (!cache) {
111
+ throw new Error("page cache not in scope — tool must run inside the orchestrator's withPageCache wrapper");
112
+ }
113
+ const entry = cache.get(pageId);
114
+ if (!entry) {
115
+ throw new Error(`unknown pageId ${pageId} — call fetch_page to populate the cache before passing the returned pageId here`);
116
+ }
117
+ return entry;
118
+ }
119
+ /**
120
+ * Multi-page variant. Resolves a list of pageIds in one call; throws on
121
+ * the first unknown id so the LLM gets specific feedback about which page
122
+ * needs re-fetching.
123
+ */
124
+ export function resolvePages(pageIds) {
125
+ return pageIds.map(resolvePage);
126
+ }
127
+ //# sourceMappingURL=page-cache.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page-cache.js","sourceRoot":"","sources":["../../../src/ai/orchestrator/page-cache.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAezC;;;;;;;;;;;;;;;;GAgBG;AACH;;;;;GAKG;AACH,MAAM,iBAAiB,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO;AAEnD,MAAM,OAAO,SAAS;IAIS;IAHZ,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IACnD,YAAY,GAAG,CAAC,CAAC;IAEzB,YAA6B,WAAmB,iBAAiB;QAApC,aAAQ,GAAR,QAAQ,CAA4B;IAAG,CAAC;IAErE;;;;;OAKG;IACH,MAAM,CAAC,KAAK,CAAC,GAAW;QACtB,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,GAAG,CAAC,KAAwC;QAC1C,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC1C,MAAM,KAAK,GACT,KAAK,CAAC,IAAI,CAAC,MAAM;YACjB,KAAK,CAAC,GAAG,CAAC,MAAM;YAChB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAEtF,4EAA4E;QAC5E,4EAA4E;QAC5E,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACxC,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC,YAAY;gBACf,QAAQ,CAAC,IAAI,CAAC,MAAM;oBACpB,QAAQ,CAAC,GAAG,CAAC,MAAM;oBACnB,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3F,CAAC;QAED,IAAI,IAAI,CAAC,YAAY,GAAG,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC9C,MAAM,IAAI,KAAK,CACb,oBAAoB,IAAI,CAAC,QAAQ,+EAA+E,CACjH,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,GAAG,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,YAAY,IAAI,KAAK,CAAC;QAC3B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,GAAG,CAAC,MAAc;QAChB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC;IACxC,CAAC;IAED,GAAG,CAAC,MAAc;QAChB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChC,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACzB,CAAC;IAED,gFAAgF;IAChF,gBAAgB;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;CACF;AAED,MAAM,YAAY,GAAG,IAAI,iBAAiB,EAAa,CAAC;AAExD;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAI,KAAgB,EAAE,EAAoB;IACrE,OAAO,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO,YAAY,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AACzC,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,aAAa,CAAC;AAE1C;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,MAAc;IACxC,MAAM,KAAK,GAAG,gBAAgB,EAAE,CAAC;IACjC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,yFAAyF,CAC1F,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CACb,kBAAkB,MAAM,kFAAkF,CAC3G,CAAC;IACJ,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,OAAiB;IAC5C,OAAO,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;AAClC,CAAC"}
@@ -0,0 +1,16 @@
1
+ import type { BudgetCaps } from "./types.js";
2
+ /**
3
+ * Build the orchestrator system prompt. Structure:
4
+ * 1. Role
5
+ * 2. Methodology hints (not script — model picks order)
6
+ * 3. Budget contract (concrete numbers from BudgetCaps)
7
+ * 4. Output contract (must call finish_audit to terminate)
8
+ * 5. Tool-use guidance (avoid amplifying low-confidence findings, etc.)
9
+ *
10
+ * Kept stable between sessions so Anthropic prompt caching keeps the
11
+ * ~2-3K-token system prompt cached across invocations. Only the budget
12
+ * numbers vary per session, but they're at the bottom and don't break
13
+ * higher-prefix cache hits.
14
+ */
15
+ export declare function buildSystemPrompt(caps: BudgetCaps): string;
16
+ //# sourceMappingURL=prompt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/ai/orchestrator/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C;;;;;;;;;;;;GAYG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAqC1D"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Build the orchestrator system prompt. Structure:
3
+ * 1. Role
4
+ * 2. Methodology hints (not script — model picks order)
5
+ * 3. Budget contract (concrete numbers from BudgetCaps)
6
+ * 4. Output contract (must call finish_audit to terminate)
7
+ * 5. Tool-use guidance (avoid amplifying low-confidence findings, etc.)
8
+ *
9
+ * Kept stable between sessions so Anthropic prompt caching keeps the
10
+ * ~2-3K-token system prompt cached across invocations. Only the budget
11
+ * numbers vary per session, but they're at the bottom and don't break
12
+ * higher-prefix cache hits.
13
+ */
14
+ export function buildSystemPrompt(caps) {
15
+ return [
16
+ `You are pseolint's SEO audit orchestrator. Drive deterministic tools to produce a fix manifest with concrete, paste-able patches — not just a list of findings.`,
17
+ ``,
18
+ `## Suggested order (you choose)`,
19
+ `1. fetch_sitemap → discover URLs`,
20
+ `2. detect_templates → identify dominant patterns`,
21
+ `3. check_domain_llms_txt + check_domain_crawler_access (once per origin)`,
22
+ `4. sample_template (3-5 URLs per meaningful template, ≥5% of URLs)`,
23
+ `5. fetch_page in parallel batches of 5-8 → parse_page → check_indexability`,
24
+ `6. Per-page rule checks for indexable pages: thin_content, missing_author, canonical_consistency, json_ld_valid, answer_first, validate_jsonld`,
25
+ `7. Cluster checks per-template: near_duplicate, faq_coverage (if FAQ-typed URLs), citable_facts, freshness_signals, content_modularity, summary_bait, required_fields, schema_consistency`,
26
+ `8. Translate findings into patches; call finish_audit`,
27
+ ``,
28
+ `## Findings vs patches`,
29
+ `A finding is evidence (e.g. "thin content on /city/austin"). A patch is a concrete fix a user can paste in. Translate evidence into action:`,
30
+ `- replace_h1, rewrite_meta, rewrite_intro — text replacements`,
31
+ `- add_jsonld, add_faq_block, add_internal_link — HTML insertions`,
32
+ `- remove_thin_block — selector-based deletion`,
33
+ `- robots_txt, sitemap_xml, canonical_strategy — domain-level`,
34
+ `Use template-level patches when an issue spans 80%+ of a cluster. Per-page patches don't scale.`,
35
+ `For add_jsonld, validate_jsonld lists exactly which Schema.org properties are missing per declared @type — use that to populate the block.`,
36
+ ``,
37
+ `## Tool-use rules`,
38
+ `- Findings carry confidence (high|medium|low|speculative). Don't propose structural rewrites on low/speculative evidence alone — corroborate with another tool first.`,
39
+ `- Skip rule checks on non-indexable pages (noindex, wrong canonical) — they don't affect search.`,
40
+ `- If a tool returns ok:false, move on. Don't retry the same input.`,
41
+ `- Prefer parallel tool calls when independent (multi-page fetches especially).`,
42
+ `- HTML never travels in conversation: fetch_page returns a pageId you pass to subsequent tools. The token cost stays bounded.`,
43
+ ``,
44
+ `## Budget`,
45
+ `${caps.maxToolCalls} tool calls · ${caps.maxInputTokensTotal.toLocaleString("en-US")} input tokens · $${caps.maxSessionUsd.toFixed(2)} · ${caps.maxWallSeconds}s wall.`,
46
+ `The runtime stops you at any cap. Partial manifests are useful — prefer breadth (cover all templates) over depth (exhaustive on one).`,
47
+ ``,
48
+ `## Output`,
49
+ `Call finish_audit(manifest) exactly once to terminate. Without it, the session ends with no manifest. Empty patch arrays are fine when nothing's actionable — don't invent patches.`,
50
+ ].join("\n");
51
+ }
52
+ //# sourceMappingURL=prompt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../../src/ai/orchestrator/prompt.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAgB;IAChD,OAAO;QACL,iKAAiK;QACjK,EAAE;QACF,iCAAiC;QACjC,kCAAkC;QAClC,kDAAkD;QAClD,0EAA0E;QAC1E,oEAAoE;QACpE,4EAA4E;QAC5E,gJAAgJ;QAChJ,2LAA2L;QAC3L,uDAAuD;QACvD,EAAE;QACF,wBAAwB;QACxB,6IAA6I;QAC7I,+DAA+D;QAC/D,kEAAkE;QAClE,+CAA+C;QAC/C,8DAA8D;QAC9D,iGAAiG;QACjG,4IAA4I;QAC5I,EAAE;QACF,mBAAmB;QACnB,uKAAuK;QACvK,kGAAkG;QAClG,oEAAoE;QACpE,gFAAgF;QAChF,+HAA+H;QAC/H,EAAE;QACF,WAAW;QACX,GAAG,IAAI,CAAC,YAAY,iBAAiB,IAAI,CAAC,mBAAmB,CAAC,cAAc,CAAC,OAAO,CAAC,oBAAoB,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,cAAc,SAAS;QACxK,uIAAuI;QACvI,EAAE;QACF,WAAW;QACX,qLAAqL;KACtL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC"}
@@ -0,0 +1,65 @@
1
+ import { type LanguageModel } from "ai";
2
+ import type { BudgetCaps, SessionResult } from "./types.js";
3
+ import type { EventSink as _EventSinkPathReexport } from "./log.js";
4
+ /**
5
+ * Loose type for the tool registry. The AI SDK's `Tool<I, O>` is invariant
6
+ * in its generics, so a heterogeneous map of tools with distinct schemas
7
+ * can't be statically typed as `Record<string, Tool<unknown, unknown>>`.
8
+ * We accept the unsoundness at this single boundary — every individual tool
9
+ * is type-safe at definition site (via `defineTool`), and the AI SDK
10
+ * validates inputs at runtime via Zod regardless.
11
+ */
12
+ type AnyDefinedTool = {
13
+ name: string;
14
+ toAiTool: () => any;
15
+ };
16
+ export interface RunOrchestratorOptions {
17
+ domain: string;
18
+ userId: string;
19
+ model: LanguageModel;
20
+ /** Provider id for cost estimation (e.g. "anthropic"). */
21
+ providerId: string;
22
+ /** Model id for cost estimation (e.g. "claude-opus-4-7"). */
23
+ modelId: string;
24
+ /** Override default budget caps. Defaults from DEFAULT_BUDGET. */
25
+ budget?: Partial<BudgetCaps>;
26
+ /** Optional path for durable NDJSON session log. */
27
+ ndjsonPath?: string;
28
+ /** Optional event sink (e.g. SSE / R2 fanout). */
29
+ onEvent?: EventSink;
30
+ /** External abort signal — aborting flips the session to `aborted`. */
31
+ signal?: AbortSignal;
32
+ /**
33
+ * Override the tool registry. Default = full `orchestratorTools` plus
34
+ * `finish_audit`. Tests pass a smaller subset; production passes the full
35
+ * registry.
36
+ */
37
+ tools?: Record<string, AnyDefinedTool>;
38
+ /**
39
+ * Per-model-call retry budget for transient errors (5xx, rate limits,
40
+ * network failures). The AI SDK handles the retry loop internally and
41
+ * respects `retry-after` headers from the provider, so each retry waits
42
+ * the rate-limit window when the provider asks.
43
+ *
44
+ * Default 5: tier-1 Anthropic accounts have a 30K input-tokens-per-minute
45
+ * cap that burst-friendly orchestrator sessions can hit. Five retries
46
+ * with exponential backoff (~1s/2s/4s/8s/16s base) span a full 60s TPM
47
+ * refresh, so the next retry usually succeeds. Lower this only if the
48
+ * caller has tier-2+ keys (no TPM cap) or cares more about fail-fast.
49
+ */
50
+ maxRetries?: number;
51
+ }
52
+ /**
53
+ * Run a single orchestrator session end-to-end. Returns the final
54
+ * `SessionResult` once the LLM calls `finish_audit`, the budget is
55
+ * exhausted, the abort signal fires, or the model errors out.
56
+ *
57
+ * No streaming yet — uses `generateText` for simpler test ergonomics with
58
+ * the existing `MockModel` (which only implements `doGenerate`). Phase 2
59
+ * follow-up swaps to `streamText` once we have a stream-capable mock and a
60
+ * web-app SSE endpoint to wire up.
61
+ */
62
+ export declare function runOrchestrator(opts: RunOrchestratorOptions): Promise<SessionResult>;
63
+ export type EventSink = _EventSinkPathReexport;
64
+ export {};
65
+ //# sourceMappingURL=runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../../src/ai/orchestrator/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,IAAI,CAAC;AAOtD,OAAO,KAAK,EAAE,UAAU,EAAE,aAAa,EAAc,MAAM,YAAY,CAAC;AACxE,OAAO,KAAK,EAAE,SAAS,IAAI,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAEpE;;;;;;;GAOG;AAEH,KAAK,cAAc,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,GAAG,CAAA;CAAE,CAAC;AAI5D,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,aAAa,CAAC;IACrB,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAC;IACnB,6DAA6D;IAC7D,OAAO,EAAE,MAAM,CAAC;IAChB,kEAAkE;IAClE,MAAM,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAC7B,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,kDAAkD;IAClD,OAAO,CAAC,EAAE,SAAS,CAAC;IACpB,uEAAuE;IACvE,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB;;;;OAIG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IACvC;;;;;;;;;;;OAWG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID;;;;;;;;;GASG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE,sBAAsB,GAAG,OAAO,CAAC,aAAa,CAAC,CAuM1F;AAmBD,MAAM,MAAM,SAAS,GAAG,sBAAsB,CAAC"}