@inbrowser/agent 0.0.0-placeholder → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. package/AGENTS.md +270 -0
  2. package/LICENSE +21 -0
  3. package/README.md +117 -2
  4. package/bin/agent.ts +10 -0
  5. package/dist/cli/commands/describe.d.ts +14 -0
  6. package/dist/cli/commands/describe.d.ts.map +1 -0
  7. package/dist/cli/commands/describe.js +179 -0
  8. package/dist/cli/commands/describe.js.map +1 -0
  9. package/dist/cli/commands/events.d.ts +21 -0
  10. package/dist/cli/commands/events.d.ts.map +1 -0
  11. package/dist/cli/commands/events.js +59 -0
  12. package/dist/cli/commands/events.js.map +1 -0
  13. package/dist/cli/commands/fleet.d.ts +15 -0
  14. package/dist/cli/commands/fleet.d.ts.map +1 -0
  15. package/dist/cli/commands/fleet.js +149 -0
  16. package/dist/cli/commands/fleet.js.map +1 -0
  17. package/dist/cli/commands/help.d.ts +15 -0
  18. package/dist/cli/commands/help.d.ts.map +1 -0
  19. package/dist/cli/commands/help.js +93 -0
  20. package/dist/cli/commands/help.js.map +1 -0
  21. package/dist/cli/commands/migrate.d.ts +27 -0
  22. package/dist/cli/commands/migrate.d.ts.map +1 -0
  23. package/dist/cli/commands/migrate.js +109 -0
  24. package/dist/cli/commands/migrate.js.map +1 -0
  25. package/dist/cli/commands/run.d.ts +38 -0
  26. package/dist/cli/commands/run.d.ts.map +1 -0
  27. package/dist/cli/commands/run.js +535 -0
  28. package/dist/cli/commands/run.js.map +1 -0
  29. package/dist/cli/commands/schema.d.ts +8 -0
  30. package/dist/cli/commands/schema.d.ts.map +1 -0
  31. package/dist/cli/commands/schema.js +12 -0
  32. package/dist/cli/commands/schema.js.map +1 -0
  33. package/dist/cli/commands/serve.d.ts +39 -0
  34. package/dist/cli/commands/serve.d.ts.map +1 -0
  35. package/dist/cli/commands/serve.js +65 -0
  36. package/dist/cli/commands/serve.js.map +1 -0
  37. package/dist/cli/commands/undo.d.ts +36 -0
  38. package/dist/cli/commands/undo.d.ts.map +1 -0
  39. package/dist/cli/commands/undo.js +132 -0
  40. package/dist/cli/commands/undo.js.map +1 -0
  41. package/dist/cli/fixtures.d.ts +17 -0
  42. package/dist/cli/fixtures.d.ts.map +1 -0
  43. package/dist/cli/fixtures.js +107 -0
  44. package/dist/cli/fixtures.js.map +1 -0
  45. package/dist/cli/hardening.d.ts +39 -0
  46. package/dist/cli/hardening.d.ts.map +1 -0
  47. package/dist/cli/hardening.js +68 -0
  48. package/dist/cli/hardening.js.map +1 -0
  49. package/dist/cli/index.d.ts +28 -0
  50. package/dist/cli/index.d.ts.map +1 -0
  51. package/dist/cli/index.js +19 -0
  52. package/dist/cli/index.js.map +1 -0
  53. package/dist/cli/llm/openrouter.d.ts +33 -0
  54. package/dist/cli/llm/openrouter.d.ts.map +1 -0
  55. package/dist/cli/llm/openrouter.js +285 -0
  56. package/dist/cli/llm/openrouter.js.map +1 -0
  57. package/dist/cli/main.d.ts +32 -0
  58. package/dist/cli/main.d.ts.map +1 -0
  59. package/dist/cli/main.js +106 -0
  60. package/dist/cli/main.js.map +1 -0
  61. package/dist/cli/output.d.ts +36 -0
  62. package/dist/cli/output.d.ts.map +1 -0
  63. package/dist/cli/output.js +95 -0
  64. package/dist/cli/output.js.map +1 -0
  65. package/dist/cli/parse.d.ts +26 -0
  66. package/dist/cli/parse.d.ts.map +1 -0
  67. package/dist/cli/parse.js +160 -0
  68. package/dist/cli/parse.js.map +1 -0
  69. package/dist/cli/session-log.d.ts +34 -0
  70. package/dist/cli/session-log.d.ts.map +1 -0
  71. package/dist/cli/session-log.js +52 -0
  72. package/dist/cli/session-log.js.map +1 -0
  73. package/dist/cli/spec.d.ts +62 -0
  74. package/dist/cli/spec.d.ts.map +1 -0
  75. package/dist/cli/spec.js +510 -0
  76. package/dist/cli/spec.js.map +1 -0
  77. package/dist/cli/ui/RunView.d.ts +134 -0
  78. package/dist/cli/ui/RunView.d.ts.map +1 -0
  79. package/dist/cli/ui/RunView.js +341 -0
  80. package/dist/cli/ui/RunView.js.map +1 -0
  81. package/dist/diagnostics/index.d.ts +5 -0
  82. package/dist/diagnostics/index.d.ts.map +1 -0
  83. package/dist/diagnostics/index.js +3 -0
  84. package/dist/diagnostics/index.js.map +1 -0
  85. package/dist/diagnostics/timing.d.ts +48 -0
  86. package/dist/diagnostics/timing.d.ts.map +1 -0
  87. package/dist/diagnostics/timing.js +85 -0
  88. package/dist/diagnostics/timing.js.map +1 -0
  89. package/dist/diagnostics/truthfulness.d.ts +36 -0
  90. package/dist/diagnostics/truthfulness.d.ts.map +1 -0
  91. package/dist/diagnostics/truthfulness.js +180 -0
  92. package/dist/diagnostics/truthfulness.js.map +1 -0
  93. package/dist/dispatch-memoization.d.ts +84 -0
  94. package/dist/dispatch-memoization.d.ts.map +1 -0
  95. package/dist/dispatch-memoization.js +197 -0
  96. package/dist/dispatch-memoization.js.map +1 -0
  97. package/dist/eval/comparison-report.d.ts +164 -0
  98. package/dist/eval/comparison-report.d.ts.map +1 -0
  99. package/dist/eval/comparison-report.js +316 -0
  100. package/dist/eval/comparison-report.js.map +1 -0
  101. package/dist/eval/fixture.d.ts +74 -0
  102. package/dist/eval/fixture.d.ts.map +1 -0
  103. package/dist/eval/fixture.js +217 -0
  104. package/dist/eval/fixture.js.map +1 -0
  105. package/dist/eval/index.d.ts +13 -0
  106. package/dist/eval/index.d.ts.map +1 -0
  107. package/dist/eval/index.js +7 -0
  108. package/dist/eval/index.js.map +1 -0
  109. package/dist/eval/load-node.d.ts +16 -0
  110. package/dist/eval/load-node.d.ts.map +1 -0
  111. package/dist/eval/load-node.js +58 -0
  112. package/dist/eval/load-node.js.map +1 -0
  113. package/dist/eval/metric-collector.d.ts +209 -0
  114. package/dist/eval/metric-collector.d.ts.map +1 -0
  115. package/dist/eval/metric-collector.js +293 -0
  116. package/dist/eval/metric-collector.js.map +1 -0
  117. package/dist/eval/run-record.d.ts +76 -0
  118. package/dist/eval/run-record.d.ts.map +1 -0
  119. package/dist/eval/run-record.js +32 -0
  120. package/dist/eval/run-record.js.map +1 -0
  121. package/dist/eval/runner.d.ts +140 -0
  122. package/dist/eval/runner.d.ts.map +1 -0
  123. package/dist/eval/runner.js +310 -0
  124. package/dist/eval/runner.js.map +1 -0
  125. package/dist/eval/spec-framework.d.ts +113 -0
  126. package/dist/eval/spec-framework.d.ts.map +1 -0
  127. package/dist/eval/spec-framework.js +100 -0
  128. package/dist/eval/spec-framework.js.map +1 -0
  129. package/dist/eval/spec-helpers.d.ts +245 -0
  130. package/dist/eval/spec-helpers.d.ts.map +1 -0
  131. package/dist/eval/spec-helpers.js +605 -0
  132. package/dist/eval/spec-helpers.js.map +1 -0
  133. package/dist/events/codec.d.ts +79 -0
  134. package/dist/events/codec.d.ts.map +1 -0
  135. package/dist/events/codec.js +142 -0
  136. package/dist/events/codec.js.map +1 -0
  137. package/dist/events/log-core.d.ts +76 -0
  138. package/dist/events/log-core.d.ts.map +1 -0
  139. package/dist/events/log-core.js +73 -0
  140. package/dist/events/log-core.js.map +1 -0
  141. package/dist/events/log.d.ts +60 -0
  142. package/dist/events/log.d.ts.map +1 -0
  143. package/dist/events/log.js +193 -0
  144. package/dist/events/log.js.map +1 -0
  145. package/dist/events/replay.d.ts +106 -0
  146. package/dist/events/replay.d.ts.map +1 -0
  147. package/dist/events/replay.js +137 -0
  148. package/dist/events/replay.js.map +1 -0
  149. package/dist/events/wrap.d.ts +100 -0
  150. package/dist/events/wrap.d.ts.map +1 -0
  151. package/dist/events/wrap.js +141 -0
  152. package/dist/events/wrap.js.map +1 -0
  153. package/dist/index.d.ts +73 -0
  154. package/dist/index.d.ts.map +1 -0
  155. package/dist/index.js +47 -0
  156. package/dist/index.js.map +1 -0
  157. package/dist/llm-adapter.d.ts +96 -0
  158. package/dist/llm-adapter.d.ts.map +1 -0
  159. package/dist/llm-adapter.js +132 -0
  160. package/dist/llm-adapter.js.map +1 -0
  161. package/dist/mcp/serve.d.ts +70 -0
  162. package/dist/mcp/serve.d.ts.map +1 -0
  163. package/dist/mcp/serve.js +154 -0
  164. package/dist/mcp/serve.js.map +1 -0
  165. package/dist/metrics/runs.d.ts +58 -0
  166. package/dist/metrics/runs.d.ts.map +1 -0
  167. package/dist/metrics/runs.js +99 -0
  168. package/dist/metrics/runs.js.map +1 -0
  169. package/dist/metrics.d.ts +38 -0
  170. package/dist/metrics.d.ts.map +1 -0
  171. package/dist/metrics.js +123 -0
  172. package/dist/metrics.js.map +1 -0
  173. package/dist/node.d.ts +23 -0
  174. package/dist/node.d.ts.map +1 -0
  175. package/dist/node.js +23 -0
  176. package/dist/node.js.map +1 -0
  177. package/dist/planner-executor.d.ts +132 -0
  178. package/dist/planner-executor.d.ts.map +1 -0
  179. package/dist/planner-executor.js +274 -0
  180. package/dist/planner-executor.js.map +1 -0
  181. package/dist/session.d.ts +10 -0
  182. package/dist/session.d.ts.map +1 -0
  183. package/dist/session.js +179 -0
  184. package/dist/session.js.map +1 -0
  185. package/dist/skill-catalog.d.ts +81 -0
  186. package/dist/skill-catalog.d.ts.map +1 -0
  187. package/dist/skill-catalog.js +388 -0
  188. package/dist/skill-catalog.js.map +1 -0
  189. package/dist/skill-router.d.ts +95 -0
  190. package/dist/skill-router.d.ts.map +1 -0
  191. package/dist/skill-router.js +130 -0
  192. package/dist/skill-router.js.map +1 -0
  193. package/dist/storage.d.ts +14 -0
  194. package/dist/storage.d.ts.map +1 -0
  195. package/dist/storage.js +58 -0
  196. package/dist/storage.js.map +1 -0
  197. package/dist/strategy.d.ts +45 -0
  198. package/dist/strategy.d.ts.map +1 -0
  199. package/dist/strategy.js +520 -0
  200. package/dist/strategy.js.map +1 -0
  201. package/dist/tools.d.ts +40 -0
  202. package/dist/tools.d.ts.map +1 -0
  203. package/dist/tools.js +147 -0
  204. package/dist/tools.js.map +1 -0
  205. package/dist/types/agent.d.ts +94 -0
  206. package/dist/types/agent.d.ts.map +1 -0
  207. package/dist/types/agent.js +17 -0
  208. package/dist/types/agent.js.map +1 -0
  209. package/dist/types/capabilities.d.ts +17 -0
  210. package/dist/types/capabilities.d.ts.map +1 -0
  211. package/dist/types/capabilities.js +13 -0
  212. package/dist/types/capabilities.js.map +1 -0
  213. package/dist/types/chat.d.ts +74 -0
  214. package/dist/types/chat.d.ts.map +1 -0
  215. package/dist/types/chat.js +10 -0
  216. package/dist/types/chat.js.map +1 -0
  217. package/dist/types/events.d.ts +115 -0
  218. package/dist/types/events.d.ts.map +1 -0
  219. package/dist/types/events.js +30 -0
  220. package/dist/types/events.js.map +1 -0
  221. package/dist/types/llm.d.ts +89 -0
  222. package/dist/types/llm.d.ts.map +1 -0
  223. package/dist/types/llm.js +12 -0
  224. package/dist/types/llm.js.map +1 -0
  225. package/dist/types/metrics.d.ts +34 -0
  226. package/dist/types/metrics.d.ts.map +1 -0
  227. package/dist/types/metrics.js +10 -0
  228. package/dist/types/metrics.js.map +1 -0
  229. package/dist/types/observer.d.ts +41 -0
  230. package/dist/types/observer.d.ts.map +1 -0
  231. package/dist/types/observer.js +41 -0
  232. package/dist/types/observer.js.map +1 -0
  233. package/dist/types/project-context.d.ts +18 -0
  234. package/dist/types/project-context.d.ts.map +1 -0
  235. package/dist/types/project-context.js +11 -0
  236. package/dist/types/project-context.js.map +1 -0
  237. package/dist/types/runtime.d.ts +71 -0
  238. package/dist/types/runtime.d.ts.map +1 -0
  239. package/dist/types/runtime.js +21 -0
  240. package/dist/types/runtime.js.map +1 -0
  241. package/dist/types/session.d.ts +103 -0
  242. package/dist/types/session.d.ts.map +1 -0
  243. package/dist/types/session.js +11 -0
  244. package/dist/types/session.js.map +1 -0
  245. package/dist/types/storage.d.ts +20 -0
  246. package/dist/types/storage.d.ts.map +1 -0
  247. package/dist/types/storage.js +41 -0
  248. package/dist/types/storage.js.map +1 -0
  249. package/dist/types/strategy.d.ts +124 -0
  250. package/dist/types/strategy.d.ts.map +1 -0
  251. package/dist/types/strategy.js +10 -0
  252. package/dist/types/strategy.js.map +1 -0
  253. package/dist/types/tools.d.ts +154 -0
  254. package/dist/types/tools.d.ts.map +1 -0
  255. package/dist/types/tools.js +11 -0
  256. package/dist/types/tools.js.map +1 -0
  257. package/dist/types/trace.d.ts +175 -0
  258. package/dist/types/trace.d.ts.map +1 -0
  259. package/dist/types/trace.js +26 -0
  260. package/dist/types/trace.js.map +1 -0
  261. package/dist/types/workspace.d.ts +29 -0
  262. package/dist/types/workspace.d.ts.map +1 -0
  263. package/dist/types/workspace.js +18 -0
  264. package/dist/types/workspace.js.map +1 -0
  265. package/package.json +45 -14
  266. package/skills/agent-cli.md +218 -0
  267. package/index.js +0 -2
@@ -0,0 +1,38 @@
1
+ /**
2
+ * `MetricsCollector` implementation — pricing tables + cost
3
+ * derivation in one place. Pure functions; no I/O.
4
+ *
5
+ * Pricing is per-million-token USD figures. When the provider
6
+ * reports a cost (OpenRouter's `x-cost` header), the collector
7
+ * skips estimation and marks `costEstimated: false`. When the
8
+ * model isn't in the pricing table, cost is set to 0 with
9
+ * `costEstimated: true`.
10
+ *
11
+ * Update pricing rows below when providers revise rates — there's
12
+ * no API to introspect them.
13
+ */
14
+ import type { TurnMetrics } from './types/llm.js';
15
+ import type { MetricsCollector, RecordTurnInput } from './types/metrics.js';
16
+ interface PricingRow {
17
+ /** USD per million input tokens. */
18
+ input: number;
19
+ /** USD per million output tokens. */
20
+ output: number;
21
+ /** USD per million cached input tokens (when cache-hit). */
22
+ cacheRead: number;
23
+ }
24
+ /**
25
+ * Look up a pricing row. Returns undefined when the model isn't
26
+ * priced — caller skips the cost field rather than fabricating one.
27
+ */
28
+ export declare function findPricing(llmId: string, model: string): PricingRow | undefined;
29
+ /**
30
+ * Derive a `TurnMetrics` value from one turn's raw usage. Pure
31
+ * function — used by both the collector below and direct callers
32
+ * that don't need session-level aggregation.
33
+ */
34
+ export declare function computeTurnMetrics(input: RecordTurnInput): TurnMetrics;
35
+ /** Build a stateful `MetricsCollector` for one session. */
36
+ export declare function createMetricsCollector(): MetricsCollector;
37
+ export {};
38
+ //# sourceMappingURL=metrics.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../src/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAiB,MAAM,oBAAoB,CAAC;AAE3F,UAAU,UAAU;IAClB,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,qCAAqC;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,4DAA4D;IAC5D,SAAS,EAAE,MAAM,CAAC;CACnB;AAeD;;;GAGG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAEhF;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,eAAe,GAAG,WAAW,CA8CtE;AAED,2DAA2D;AAC3D,wBAAgB,sBAAsB,IAAI,gBAAgB,CAqCzD"}
@@ -0,0 +1,123 @@
1
+ /**
2
+ * `MetricsCollector` implementation — pricing tables + cost
3
+ * derivation in one place. Pure functions; no I/O.
4
+ *
5
+ * Pricing is per-million-token USD figures. When the provider
6
+ * reports a cost (OpenRouter's `x-cost` header), the collector
7
+ * skips estimation and marks `costEstimated: false`. When the
8
+ * model isn't in the pricing table, cost is set to 0 with
9
+ * `costEstimated: true`.
10
+ *
11
+ * Update pricing rows below when providers revise rates — there's
12
+ * no API to introspect them.
13
+ */
14
+ /**
15
+ * Per-(provider, model) pricing table. Keys are `${llmId}:${model}`
16
+ * to avoid collisions when two providers ship a same-named model.
17
+ */
18
+ const PRICING = {
19
+ 'gemini:gemini-3.1-pro-preview': { input: 2.5, output: 20.0, cacheRead: 0.625 },
20
+ 'gemini:gemini-3-flash-preview': { input: 0.5, output: 4.0, cacheRead: 0.125 },
21
+ 'gemini:gemini-3.1-flash-lite': { input: 0.15, output: 0.6, cacheRead: 0.0375 },
22
+ // OpenRouter quotes cost on the response — we don't need a row.
23
+ // Ollama is local — no cost.
24
+ // Nano is on-device — no cost.
25
+ };
26
+ /**
27
+ * Look up a pricing row. Returns undefined when the model isn't
28
+ * priced — caller skips the cost field rather than fabricating one.
29
+ */
30
+ export function findPricing(llmId, model) {
31
+ return PRICING[`${llmId}:${model}`];
32
+ }
33
+ /**
34
+ * Derive a `TurnMetrics` value from one turn's raw usage. Pure
35
+ * function — used by both the collector below and direct callers
36
+ * that don't need session-level aggregation.
37
+ */
38
+ export function computeTurnMetrics(input) {
39
+ const { llmId, model, rawUsage, isByok } = input;
40
+ const tokensCached = rawUsage.cachedTokens ?? 0;
41
+ const tokensReasoning = rawUsage.reasoningTokens ?? 0;
42
+ // Provider reported cost directly (OpenRouter): trust it.
43
+ if (typeof rawUsage.costUsd === 'number') {
44
+ return {
45
+ tokensIn: rawUsage.promptTokens,
46
+ tokensOut: rawUsage.completionTokens,
47
+ tokensCached,
48
+ tokensReasoning,
49
+ costUsd: rawUsage.costUsd,
50
+ costEstimated: false,
51
+ isByok,
52
+ };
53
+ }
54
+ // Estimate from the table.
55
+ const row = findPricing(llmId, model);
56
+ if (!row) {
57
+ return {
58
+ tokensIn: rawUsage.promptTokens,
59
+ tokensOut: rawUsage.completionTokens,
60
+ tokensCached,
61
+ tokensReasoning,
62
+ costUsd: 0,
63
+ costEstimated: true,
64
+ isByok,
65
+ };
66
+ }
67
+ // Math: cached tokens get the cache rate; the rest of the prompt
68
+ // gets the standard input rate; output tokens (which include
69
+ // reasoning per most providers' billing) get the output rate.
70
+ const billedInput = Math.max(0, rawUsage.promptTokens - tokensCached);
71
+ const costUsd = (billedInput * row.input) / 1_000_000 +
72
+ (tokensCached * row.cacheRead) / 1_000_000 +
73
+ (rawUsage.completionTokens * row.output) / 1_000_000;
74
+ return {
75
+ tokensIn: rawUsage.promptTokens,
76
+ tokensOut: rawUsage.completionTokens,
77
+ tokensCached,
78
+ tokensReasoning,
79
+ costUsd,
80
+ costEstimated: true,
81
+ isByok,
82
+ };
83
+ }
84
+ /** Build a stateful `MetricsCollector` for one session. */
85
+ export function createMetricsCollector() {
86
+ let totals = {
87
+ tokensTotal: 0,
88
+ tokensIn: 0,
89
+ tokensOut: 0,
90
+ tokensCached: 0,
91
+ tokensReasoning: 0,
92
+ costUsdTotal: 0,
93
+ turnCount: 0,
94
+ };
95
+ return {
96
+ recordTurn(input) {
97
+ const m = computeTurnMetrics(input);
98
+ totals = {
99
+ tokensTotal: totals.tokensTotal + m.tokensIn + m.tokensOut,
100
+ tokensIn: totals.tokensIn + m.tokensIn,
101
+ tokensOut: totals.tokensOut + m.tokensOut,
102
+ tokensCached: totals.tokensCached + m.tokensCached,
103
+ tokensReasoning: totals.tokensReasoning + m.tokensReasoning,
104
+ costUsdTotal: totals.costUsdTotal + m.costUsd,
105
+ turnCount: totals.turnCount + 1,
106
+ };
107
+ return m;
108
+ },
109
+ totals: () => ({ ...totals }),
110
+ reset() {
111
+ totals = {
112
+ tokensTotal: 0,
113
+ tokensIn: 0,
114
+ tokensOut: 0,
115
+ tokensCached: 0,
116
+ tokensReasoning: 0,
117
+ costUsdTotal: 0,
118
+ turnCount: 0,
119
+ };
120
+ },
121
+ };
122
+ }
123
+ //# sourceMappingURL=metrics.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metrics.js","sourceRoot":"","sources":["../src/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAcH;;;GAGG;AACH,MAAM,OAAO,GAA+B;IAC1C,+BAA+B,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE;IAC/E,+BAA+B,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,KAAK,EAAE;IAC9E,8BAA8B,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE;IAC/E,gEAAgE;IAChE,6BAA6B;IAC7B,+BAA+B;CAChC,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,KAAa;IACtD,OAAO,OAAO,CAAC,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC,CAAC;AACtC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAsB;IACvD,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IACjD,MAAM,YAAY,GAAG,QAAQ,CAAC,YAAY,IAAI,CAAC,CAAC;IAChD,MAAM,eAAe,GAAG,QAAQ,CAAC,eAAe,IAAI,CAAC,CAAC;IACtD,0DAA0D;IAC1D,IAAI,OAAO,QAAQ,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzC,OAAO;YACL,QAAQ,EAAE,QAAQ,CAAC,YAAY;YAC/B,SAAS,EAAE,QAAQ,CAAC,gBAAgB;YACpC,YAAY;YACZ,eAAe;YACf,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,aAAa,EAAE,KAAK;YACpB,MAAM;SACP,CAAC;IACJ,CAAC;IACD,2BAA2B;IAC3B,MAAM,GAAG,GAAG,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;YACL,QAAQ,EAAE,QAAQ,CAAC,YAAY;YAC/B,SAAS,EAAE,QAAQ,CAAC,gBAAgB;YACpC,YAAY;YACZ,eAAe;YACf,OAAO,EAAE,CAAC;YACV,aAAa,EAAE,IAAI;YACnB,MAAM;SACP,CAAC;IACJ,CAAC;IACD,iEAAiE;IACjE,6DAA6D;IAC7D,8DAA8D;IAC9D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,GAAG,YAAY,CAAC,CAAC;IACtE,MAAM,OAAO,GACX,CAAC,WAAW,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,SAAS;QACrC,CAAC,YAAY,GAAG,GAAG,CAAC,SAAS,CAAC,GAAG,SAAS;QAC1C,CAAC,QAAQ,CAAC,gBAAgB,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,SAAS,CAAC;IACvD,OAAO;QACL,QAAQ,EAAE,QAAQ,CAAC,YAAY;QAC/B,SAAS,EAAE,QAAQ,CAAC,gBAAgB;QACpC,YAAY;QACZ,eAAe;QACf,OAAO;QACP,aAAa,EAAE,IAAI;QACnB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,2DAA2D;AAC3D,MAAM,UAAU,sBAAsB;IACpC,IAAI,MAAM,GAAkB;QAC1B,WAAW,EAAE,CAAC;QACd,QAAQ,EAAE,CAAC;QACX,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;QACf,eAAe,EAAE,CAAC;QAClB,YAAY,EAAE,CAAC;QACf,SAAS,EAAE,CAAC;KACb,CAAC;IACF,OAAO;QACL,UAAU,CAAC,KAAK;YACd,MAAM,CAAC,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;YACpC,MAAM,GAAG;gBACP,WAAW,EAAE,MAAM,CAAC,WAAW,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,SAAS;gBAC1D,QAAQ,EAAE,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ;gBACtC,SAAS,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS;gBACzC,YAAY,EAAE,MAAM,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY;gBAClD,eAAe,EAAE,MAAM,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe;gBAC3D,YAAY,EAAE,MAAM,CAAC,YAAY,GAAG,CAAC,CAAC,OAAO;gBAC7C,SAAS,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC;aAChC,CAAC;YACF,OAAO,CAAC,CAAC;QACX,CAAC;QACD,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,GAAG,MAAM,EAAE,CAAC;QAC7B,KAAK;YACH,MAAM,GAAG;gBACP,WAAW,EAAE,CAAC;gBACd,QAAQ,EAAE,CAAC;gBACX,SAAS,EAAE,CAAC;gBACZ,YAAY,EAAE,CAAC;gBACf,eAAe,EAAE,CAAC;gBAClB,YAAY,EAAE,CAAC;gBACf,SAAS,EAAE,CAAC;aACb,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
package/dist/node.d.ts ADDED
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Node-only entry point.
3
+ *
4
+ * The event log writer imports `node:fs` / `node:os` at module init
5
+ * for append-only NDJSON logging to disk. It lives here so the
6
+ * browser-facing root entry (`@inbrowser/agent`) stays free of Node
7
+ * builtins.
8
+ *
9
+ * Consumers running in Node (the CLI, the MCP server, sdk's agent
10
+ * definitions, integration tests) import from `@inbrowser/agent/node`.
11
+ * Browser consumers (playground) use the root entry, which exposes
12
+ * the browser-safe events helpers (`wrapMutating`, `replayEvents`,
13
+ * codec utilities) directly without going through `events/index.js`.
14
+ *
15
+ * Other Node-only files in this package (`metrics/runs`, `agents/
16
+ * firestore`, the CLI commands) have no external consumers via the
17
+ * public barrel today, so they stay reachable via relative paths
18
+ * within the package.
19
+ */
20
+ export { openEventLog, defaultProjectLogDir } from './events/log.js';
21
+ export { generateEventId, buildRollbackEvent, HOST_AGENT_ID, EventTooLargeError, DEFAULT_MAX_EVENT_BYTES, } from './events/log-core.js';
22
+ export { FixtureLoadError, loadFixture, loadFixtures } from './eval/load-node.js';
23
+ //# sourceMappingURL=node.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node.d.ts","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,kBAAkB,EAClB,uBAAuB,GACxB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC"}
package/dist/node.js ADDED
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Node-only entry point.
3
+ *
4
+ * The event log writer imports `node:fs` / `node:os` at module init
5
+ * for append-only NDJSON logging to disk. It lives here so the
6
+ * browser-facing root entry (`@inbrowser/agent`) stays free of Node
7
+ * builtins.
8
+ *
9
+ * Consumers running in Node (the CLI, the MCP server, sdk's agent
10
+ * definitions, integration tests) import from `@inbrowser/agent/node`.
11
+ * Browser consumers (playground) use the root entry, which exposes
12
+ * the browser-safe events helpers (`wrapMutating`, `replayEvents`,
13
+ * codec utilities) directly without going through `events/index.js`.
14
+ *
15
+ * Other Node-only files in this package (`metrics/runs`, `agents/
16
+ * firestore`, the CLI commands) have no external consumers via the
17
+ * public barrel today, so they stay reachable via relative paths
18
+ * within the package.
19
+ */
20
+ export { openEventLog, defaultProjectLogDir } from './events/log.js';
21
+ export { generateEventId, buildRollbackEvent, HOST_AGENT_ID, EventTooLargeError, DEFAULT_MAX_EVENT_BYTES, } from './events/log-core.js';
22
+ export { FixtureLoadError, loadFixture, loadFixtures } from './eval/load-node.js';
23
+ //# sourceMappingURL=node.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node.js","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,kBAAkB,EAClB,uBAAuB,GACxB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC"}
@@ -0,0 +1,132 @@
1
+ /**
2
+ * `createPlannerExecutorStrategy()` — phase five `AgentStrategy`.
3
+ *
4
+ * Takes a user prompt, routes it to a skill from the catalog,
5
+ * materializes that skill's prescribed plan, and executes each step
6
+ * with a bounded ReAct sub-loop. Scratch is dropped between steps:
7
+ * the next step starts fresh with a short summary of each prior step,
8
+ * not the full message history. The working window stays flat across
9
+ * a long workflow, which is the hypothesis this strategy exists to
10
+ * test.
11
+ *
12
+ * Lifecycle:
13
+ *
14
+ * 1. Route the prompt against the catalog. When the router returns
15
+ * no match, either fall back to `createReactLoopStrategy()`
16
+ * (default) or yield an `error` event (when `fallbackToReact`
17
+ * is `false`).
18
+ * 2. Look up the catalog entry. If the router returned a name not
19
+ * in the catalog (shouldn't happen for a catalog-derived router,
20
+ * defensive), behave as the no-match case.
21
+ * 3. Emit a `custom` event `'plan_started'` with the skill name and
22
+ * the step ids.
23
+ * 4. For each step, in order:
24
+ * a. Emit `custom` `'step_started'` with `{ stepId,
25
+ * description }`.
26
+ * b. Build a step-scoped system prompt that wraps the original
27
+ * system prompt with a suffix naming the step ("You are on
28
+ * step X of Y: <description>. Prior step summaries follow.")
29
+ * and one synthetic user message per prior step's summary.
30
+ * c. Drive a bounded `createReactLoopStrategy({ maxTurns })`
31
+ * sub-loop. Every inner `text`, `thinking`, `tool_call`,
32
+ * `tool_result`, and `turn_complete` event is streamed
33
+ * through unchanged so the host's UI continues working.
34
+ * d. After the sub-loop completes, capture the concatenated
35
+ * assistant text the inner loop emitted this step, call
36
+ * `summarizeStep(stepId, transcript)` to get a short
37
+ * summary, and emit `custom` `'step_completed'` with
38
+ * `{ stepId, summary }`.
39
+ * 5. Emit `custom` `'plan_completed'` and return.
40
+ *
41
+ * Tracer: per-step inner loops generate their own `llm_request` /
42
+ * `llm_response` / `turn_dispatch_complete` trace events. The trace's
43
+ * `requestId` carries a `${turnId}#${stepId}#${iteration}` shape so
44
+ * the eval harness can read per-step iteration counts off the trace
45
+ * without changes.
46
+ *
47
+ * What is intentionally NOT done in v1:
48
+ *
49
+ * - Per-step verifier gating. The catalog's `verifier?` is read but
50
+ * not enforced. The executor always advances to the next step.
51
+ * Gating progression on verifier outcomes is a follow-up.
52
+ * - Per-step tool subsetting. Every step sees the same dispatcher
53
+ * and the same tool list. A future enhancement can scope tools by
54
+ * step.
55
+ * - Smarter step summarization. The default `summarizeStep` is
56
+ * `transcript.slice(0, 400)` — dumb truncation. Override with a
57
+ * real summarizer if needed.
58
+ */
59
+ import type { SkillName } from './eval/fixture.js';
60
+ import type { SkillCatalog } from './skill-catalog.js';
61
+ import type { AgentStrategy } from './types/strategy.js';
62
+ /**
63
+ * Minimal router contract this strategy depends on. The sibling
64
+ * `strategy/skill-router` branch ships a concrete `routeSkill`
65
+ * function with the same signature; until that lands, callers can
66
+ * pass any function with this shape, and this module's
67
+ * `defaultKeywordRouter` does a trivial substring scan against the
68
+ * catalog's `triggerHints` so the strategy works end-to-end on its
69
+ * own.
70
+ *
71
+ * Once the router branch merges, downstream code can replace the
72
+ * default by passing `router: routeSkill` (or by wrapping it) into
73
+ * the options. No change is needed in this file.
74
+ */
75
+ export interface SkillRouterMatch {
76
+ /** The chosen skill. Must be a name present in the catalog. */
77
+ skill: SkillName;
78
+ /** Higher is better. Consumers may surface it; the strategy uses
79
+ * only its presence (non-null match) to gate execution. */
80
+ score?: number;
81
+ }
82
+ export interface SkillRouterDecision {
83
+ /** The top match, or `null` when no entry crossed the router's
84
+ * internal threshold. */
85
+ match: SkillRouterMatch | null;
86
+ }
87
+ export type SkillRouter = (prompt: string, options?: {
88
+ catalog?: SkillCatalog;
89
+ }) => SkillRouterDecision;
90
+ export interface PlannerExecutorOptions {
91
+ /** Catalog override. Defaults to `SKILL_CATALOG`. */
92
+ catalog?: SkillCatalog;
93
+ /** Per-step bounded turn budget for the inner ReAct sub-loop.
94
+ * Default 4. */
95
+ stepMaxTurns?: number;
96
+ /** When the router returns no match, fall back to
97
+ * `createReactLoopStrategy()` for the rest of the turn. Default
98
+ * `true`. When `false`, the strategy yields an `error` event and
99
+ * returns. */
100
+ fallbackToReact?: boolean;
101
+ /**
102
+ * Summarize a single step's transcript (concatenated assistant
103
+ * text) into a short string that seeds the next step's context.
104
+ * Default: dumb truncation to 400 characters.
105
+ */
106
+ summarizeStep?: (stepId: string, transcript: string) => string;
107
+ /**
108
+ * Router function. Defaults to a keyword scan against the catalog's
109
+ * `triggerHints`. The sibling `strategy/skill-router` branch ships
110
+ * a more sophisticated `routeSkill`; once that lands, callers can
111
+ * pass it here.
112
+ */
113
+ router?: SkillRouter;
114
+ }
115
+ /**
116
+ * Default keyword router. Scores each catalog entry by counting
117
+ * `triggerHints` substring hits in the lowercased prompt and returns
118
+ * the top scorer (or `null` when every entry scored zero). Catalog
119
+ * order breaks ties — the same ordering the sibling
120
+ * `strategy/skill-router` branch documents for its production router.
121
+ *
122
+ * This is intentionally tiny. The point is that the executor can run
123
+ * end-to-end (and be unit-tested) without depending on the router
124
+ * branch landing first.
125
+ */
126
+ export declare const defaultKeywordRouter: SkillRouter;
127
+ /**
128
+ * Build the planner-executor strategy. Returns an `AgentStrategy`
129
+ * with `id: 'planner-executor'`.
130
+ */
131
+ export declare function createPlannerExecutorStrategy(options?: PlannerExecutorOptions): AgentStrategy;
132
+ //# sourceMappingURL=planner-executor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"planner-executor.d.ts","sourceRoot":"","sources":["../src/planner-executor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyDG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD,OAAO,KAAK,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAC;AAG1E,OAAO,KAAK,EAAE,aAAa,EAAmC,MAAM,qBAAqB,CAAC;AAE1F;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,+DAA+D;IAC/D,KAAK,EAAE,SAAS,CAAC;IACjB;gEAC4D;IAC5D,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC;8BAC0B;IAC1B,KAAK,EAAE,gBAAgB,GAAG,IAAI,CAAC;CAChC;AAED,MAAM,MAAM,WAAW,GAAG,CACxB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,YAAY,CAAA;CAAE,KACjC,mBAAmB,CAAC;AAEzB,MAAM,WAAW,sBAAsB;IACrC,qDAAqD;IACrD,OAAO,CAAC,EAAE,YAAY,CAAC;IACvB;qBACiB;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;mBAGe;IACf,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;OAIG;IACH,aAAa,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,MAAM,CAAC;IAC/D;;;;;OAKG;IACH,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAgBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAoBlC,CAAC;AAEF;;;GAGG;AACH,wBAAgB,6BAA6B,CAAC,OAAO,GAAE,sBAA2B,GAAG,aAAa,CA8KjG"}
@@ -0,0 +1,274 @@
1
+ /**
2
+ * `createPlannerExecutorStrategy()` — phase five `AgentStrategy`.
3
+ *
4
+ * Takes a user prompt, routes it to a skill from the catalog,
5
+ * materializes that skill's prescribed plan, and executes each step
6
+ * with a bounded ReAct sub-loop. Scratch is dropped between steps:
7
+ * the next step starts fresh with a short summary of each prior step,
8
+ * not the full message history. The working window stays flat across
9
+ * a long workflow, which is the hypothesis this strategy exists to
10
+ * test.
11
+ *
12
+ * Lifecycle:
13
+ *
14
+ * 1. Route the prompt against the catalog. When the router returns
15
+ * no match, either fall back to `createReactLoopStrategy()`
16
+ * (default) or yield an `error` event (when `fallbackToReact`
17
+ * is `false`).
18
+ * 2. Look up the catalog entry. If the router returned a name not
19
+ * in the catalog (shouldn't happen for a catalog-derived router,
20
+ * defensive), behave as the no-match case.
21
+ * 3. Emit a `custom` event `'plan_started'` with the skill name and
22
+ * the step ids.
23
+ * 4. For each step, in order:
24
+ * a. Emit `custom` `'step_started'` with `{ stepId,
25
+ * description }`.
26
+ * b. Build a step-scoped system prompt that wraps the original
27
+ * system prompt with a suffix naming the step ("You are on
28
+ * step X of Y: <description>. Prior step summaries follow.")
29
+ * and one synthetic user message per prior step's summary.
30
+ * c. Drive a bounded `createReactLoopStrategy({ maxTurns })`
31
+ * sub-loop. Every inner `text`, `thinking`, `tool_call`,
32
+ * `tool_result`, and `turn_complete` event is streamed
33
+ * through unchanged so the host's UI continues working.
34
+ * d. After the sub-loop completes, capture the concatenated
35
+ * assistant text the inner loop emitted this step, call
36
+ * `summarizeStep(stepId, transcript)` to get a short
37
+ * summary, and emit `custom` `'step_completed'` with
38
+ * `{ stepId, summary }`.
39
+ * 5. Emit `custom` `'plan_completed'` and return.
40
+ *
41
+ * Tracer: per-step inner loops generate their own `llm_request` /
42
+ * `llm_response` / `turn_dispatch_complete` trace events. The trace's
43
+ * `requestId` carries a `${turnId}#${stepId}#${iteration}` shape so
44
+ * the eval harness can read per-step iteration counts off the trace
45
+ * without changes.
46
+ *
47
+ * What is intentionally NOT done in v1:
48
+ *
49
+ * - Per-step verifier gating. The catalog's `verifier?` is read but
50
+ * not enforced. The executor always advances to the next step.
51
+ * Gating progression on verifier outcomes is a follow-up.
52
+ * - Per-step tool subsetting. Every step sees the same dispatcher
53
+ * and the same tool list. A future enhancement can scope tools by
54
+ * step.
55
+ * - Smarter step summarization. The default `summarizeStep` is
56
+ * `transcript.slice(0, 400)` — dumb truncation. Override with a
57
+ * real summarizer if needed.
58
+ */
59
+ import { SKILL_CATALOG, getSkillEntry } from './skill-catalog.js';
60
+ import { createReactLoopStrategy } from './strategy.js';
61
+ const DEFAULT_STEP_MAX_TURNS = 4;
62
+ const DEFAULT_SUMMARY_LIMIT = 400;
63
+ /**
64
+ * Default step summarizer. Trims and truncates. Sufficient for v1 —
65
+ * the next step's context only needs a rough recall of what the prior
66
+ * step concluded, not a faithful reproduction.
67
+ */
68
+ function defaultSummarizeStep(_stepId, transcript) {
69
+ const trimmed = transcript.trim();
70
+ if (trimmed.length <= DEFAULT_SUMMARY_LIMIT)
71
+ return trimmed;
72
+ return `${trimmed.slice(0, DEFAULT_SUMMARY_LIMIT)}…`;
73
+ }
74
+ /**
75
+ * Default keyword router. Scores each catalog entry by counting
76
+ * `triggerHints` substring hits in the lowercased prompt and returns
77
+ * the top scorer (or `null` when every entry scored zero). Catalog
78
+ * order breaks ties — the same ordering the sibling
79
+ * `strategy/skill-router` branch documents for its production router.
80
+ *
81
+ * This is intentionally tiny. The point is that the executor can run
82
+ * end-to-end (and be unit-tested) without depending on the router
83
+ * branch landing first.
84
+ */
85
+ export const defaultKeywordRouter = (prompt, options) => {
86
+ const catalog = options?.catalog ?? SKILL_CATALOG;
87
+ if (!prompt)
88
+ return { match: null };
89
+ const lowered = prompt.toLowerCase();
90
+ let best = null;
91
+ for (const entry of catalog) {
92
+ let score = 0;
93
+ for (const hint of entry.triggerHints) {
94
+ if (hint.length === 0)
95
+ continue;
96
+ if (lowered.includes(hint.toLowerCase()))
97
+ score += 1;
98
+ }
99
+ if (score > 0 && (best === null || score > best.score)) {
100
+ best = { entry, score };
101
+ }
102
+ }
103
+ if (best === null)
104
+ return { match: null };
105
+ return { match: { skill: best.entry.name, score: best.score } };
106
+ };
107
+ /**
108
+ * Build the planner-executor strategy. Returns an `AgentStrategy`
109
+ * with `id: 'planner-executor'`.
110
+ */
111
+ export function createPlannerExecutorStrategy(options = {}) {
112
+ const catalog = options.catalog ?? SKILL_CATALOG;
113
+ const stepMaxTurns = options.stepMaxTurns ?? DEFAULT_STEP_MAX_TURNS;
114
+ const fallbackToReact = options.fallbackToReact !== false;
115
+ const summarizeStep = options.summarizeStep ?? defaultSummarizeStep;
116
+ const router = options.router ?? defaultKeywordRouter;
117
+ return {
118
+ id: 'planner-executor',
119
+ async *run(input, signal) {
120
+ if (signal.aborted) {
121
+ yield { kind: 'error', message: 'aborted' };
122
+ return;
123
+ }
124
+ // 1. Route the prompt. When a custom catalog was supplied, look
125
+ // the entry up in that catalog directly so the executor honors
126
+ // the override; otherwise the production `SKILL_CATALOG` table
127
+ // is consulted via `getSkillEntry`.
128
+ const decision = router(input.prompt, { catalog });
129
+ const lookupEntry = (skill) => options.catalog === undefined
130
+ ? getSkillEntry(skill)
131
+ : options.catalog.find((entry) => entry.name === skill);
132
+ const matchedEntry = decision.match === null ? undefined : lookupEntry(decision.match.skill);
133
+ // 2. No match or match-but-not-in-catalog → fallback path.
134
+ if (decision.match === null || matchedEntry === undefined) {
135
+ if (!fallbackToReact) {
136
+ yield {
137
+ kind: 'error',
138
+ message: 'planner-executor: no skill matched and fallbackToReact is disabled',
139
+ };
140
+ return;
141
+ }
142
+ // Delegate the rest of the turn to a plain ReAct sub-strategy.
143
+ // Stream every event through unchanged.
144
+ const sub = createReactLoopStrategy();
145
+ for await (const ev of sub.run(input, signal)) {
146
+ yield ev;
147
+ }
148
+ return;
149
+ }
150
+ // 3. Plan started.
151
+ const plan = matchedEntry.steps;
152
+ yield {
153
+ kind: 'custom',
154
+ name: 'plan_started',
155
+ data: {
156
+ skill: matchedEntry.name,
157
+ plan: plan.map((s) => s.id),
158
+ },
159
+ };
160
+ const stepSummaries = [];
161
+ const turnIdForReq = input.turnId ?? 'turn-anon';
162
+ // 4. Walk the steps.
163
+ for (let stepIndex = 0; stepIndex < plan.length; stepIndex++) {
164
+ if (signal.aborted) {
165
+ yield { kind: 'error', message: 'aborted' };
166
+ return;
167
+ }
168
+ const step = plan[stepIndex];
169
+ yield {
170
+ kind: 'custom',
171
+ name: 'step_started',
172
+ data: { stepId: step.id, description: step.description },
173
+ };
174
+ // 4b. Build the step-scoped system prompt + a fresh history
175
+ // composed of one synthetic user message per prior step's
176
+ // summary. The next step sees ONLY these summaries plus the
177
+ // original user prompt — never the raw scratch from prior
178
+ // sub-loops. This is the context-discipline mechanism.
179
+ const stepSystemPrompt = buildStepSystemPrompt(input.systemPrompt, step.id, step.description, stepIndex, plan.length, stepSummaries.length > 0);
180
+ const stepHistory = stepSummaries.map((s, i) => ({
181
+ id: `step-summary-${i}`,
182
+ role: 'user',
183
+ text: `[Prior step '${s.stepId}' summary] ${s.summary}`,
184
+ }));
185
+ // 4c. Drive a bounded ReAct sub-loop. The sub-loop's tracer is
186
+ // the OUTER tracer wrapped so that emitted `llm_request` /
187
+ // `llm_response` / `turn_dispatch_complete` events carry a
188
+ // step-scoped `requestId` prefix; the wrapping rewrites the
189
+ // request id from `${turnId}#${iteration}` to
190
+ // `${turnId}#${stepId}#${iteration}` so the eval harness can
191
+ // read per-step iteration counts off the trace.
192
+ const subStrategy = createReactLoopStrategy({ maxTurns: stepMaxTurns });
193
+ const stepInput = {
194
+ ...input,
195
+ history: stepHistory,
196
+ systemPrompt: stepSystemPrompt,
197
+ turnId: `${turnIdForReq}#${step.id}`,
198
+ ...(input.tracer ? { tracer: { emit: input.tracer.emit.bind(input.tracer) } } : {}),
199
+ };
200
+ let stepAssistantText = '';
201
+ let stepExceededBudget = false;
202
+ for await (const ev of subStrategy.run(stepInput, signal)) {
203
+ if (ev.kind === 'text') {
204
+ stepAssistantText += ev.chunk;
205
+ }
206
+ // Stream every event from the inner loop through unchanged.
207
+ // The host sees normal `text`/`thinking`/`tool_call`/
208
+ // `tool_result`/`turn_complete` events as if a single ReAct
209
+ // loop were running — plus the planner-executor's own
210
+ // `custom` plan events around them.
211
+ if (ev.kind === 'error') {
212
+ // A sub-loop maxTurns exhaustion (the message format from
213
+ // `createReactLoopStrategy` is `react-loop: exceeded
214
+ // maxTurns (N) without settling`) is treated as a soft
215
+ // step failure: we surface it via a `custom` event so the
216
+ // host can react, then advance to the next step with
217
+ // whatever text the step produced. Hitting the budget on
218
+ // one step shouldn't kill the plan — the next step's
219
+ // summary chain can still seed downstream work.
220
+ //
221
+ // All other errors (abort, provider failure, etc.)
222
+ // propagate and stop the plan.
223
+ if (/exceeded maxTurns/i.test(ev.message)) {
224
+ stepExceededBudget = true;
225
+ yield {
226
+ kind: 'custom',
227
+ name: 'step_budget_exhausted',
228
+ data: { stepId: step.id, message: ev.message },
229
+ };
230
+ break;
231
+ }
232
+ yield ev;
233
+ return;
234
+ }
235
+ yield ev;
236
+ }
237
+ // Silence the lint: a fresh `let` that the planner-executor
238
+ // tracks for follow-up branches that may surface it on the
239
+ // emitted `step_completed` event. v1 keeps it private.
240
+ void stepExceededBudget;
241
+ // 4d. Summarize and record.
242
+ const summary = summarizeStep(step.id, stepAssistantText);
243
+ stepSummaries.push({
244
+ stepId: step.id,
245
+ description: step.description,
246
+ summary,
247
+ });
248
+ yield {
249
+ kind: 'custom',
250
+ name: 'step_completed',
251
+ data: { stepId: step.id, summary },
252
+ };
253
+ }
254
+ // 5. Plan complete.
255
+ yield {
256
+ kind: 'custom',
257
+ name: 'plan_completed',
258
+ data: {
259
+ skill: matchedEntry.name,
260
+ steps: stepSummaries.map((s) => ({ stepId: s.stepId, summary: s.summary })),
261
+ },
262
+ };
263
+ },
264
+ };
265
+ }
266
+ function buildStepSystemPrompt(basePrompt, stepId, stepDescription, stepIndex, stepCount, hasPriorSummaries) {
267
+ // 1-indexed for human-readable display ("step 3 of 5").
268
+ const displayIndex = stepIndex + 1;
269
+ const suffix = hasPriorSummaries
270
+ ? `You are on step ${displayIndex} of ${stepCount} (id: ${stepId}): ${stepDescription}. Prior step summaries follow as user messages — treat them as facts you have already established, not as new requests.`
271
+ : `You are on step ${displayIndex} of ${stepCount} (id: ${stepId}): ${stepDescription}. This is the first step.`;
272
+ return `${basePrompt}\n\n${suffix}`;
273
+ }
274
+ //# sourceMappingURL=planner-executor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"planner-executor.js","sourceRoot":"","sources":["../src/planner-executor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyDG;AAGH,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAElE,OAAO,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AA8DxD,MAAM,sBAAsB,GAAG,CAAC,CAAC;AACjC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,OAAe,EAAE,UAAkB;IAC/D,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,OAAO,CAAC,MAAM,IAAI,qBAAqB;QAAE,OAAO,OAAO,CAAC;IAC5D,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,qBAAqB,CAAC,GAAG,CAAC;AACvD,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAgB,CAC/C,MAAc,EACd,OAAoC,EACf,EAAE;IACvB,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,aAAa,CAAC;IAClD,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;IACrC,IAAI,IAAI,GAAuD,IAAI,CAAC;IACpE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAChC,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBAAE,KAAK,IAAI,CAAC,CAAC;QACvD,CAAC;QACD,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,IAAI,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACvD,IAAI,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IAC1C,OAAO,EAAE,KAAK,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC;AAClE,CAAC,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAAC,UAAkC,EAAE;IAChF,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,aAAa,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,sBAAsB,CAAC;IACpE,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,KAAK,KAAK,CAAC;IAC1D,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,oBAAoB,CAAC;IACpE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,oBAAoB,CAAC;IAEtD,OAAO;QACL,EAAE,EAAE,kBAAkB;QACtB,KAAK,CAAC,CAAC,GAAG,CAAC,KAAuB,EAAE,MAAmB;YACrD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACnB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;gBAC5C,OAAO;YACT,CAAC;YAED,gEAAgE;YAChE,+DAA+D;YAC/D,+DAA+D;YAC/D,oCAAoC;YACpC,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YACnD,MAAM,WAAW,GAAG,CAAC,KAAgB,EAAE,EAAE,CACvC,OAAO,CAAC,OAAO,KAAK,SAAS;gBAC3B,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;gBACtB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC;YAC5D,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE7F,2DAA2D;YAC3D,IAAI,QAAQ,CAAC,KAAK,KAAK,IAAI,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;gBAC1D,IAAI,CAAC,eAAe,EAAE,CAAC;oBACrB,MAAM;wBACJ,IAAI,EAAE,OAAO;wBACb,OAAO,EAAE,oEAAoE;qBAC9E,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,+DAA+D;gBAC/D,wCAAwC;gBACxC,MAAM,GAAG,GAAG,uBAAuB,EAAE,CAAC;gBACtC,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,EAAE,CAAC;oBAC9C,MAAM,EAAE,CAAC;gBACX,CAAC;gBACD,OAAO;YACT,CAAC;YAED,mBAAmB;YACnB,MAAM,IAAI,GAAG,YAAY,CAAC,KAAK,CAAC;YAChC,MAAM;gBACJ,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,cAAc;gBACpB,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY,CAAC,IAAI;oBACxB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAC5B;aACF,CAAC;YAEF,MAAM,aAAa,GAA+D,EAAE,CAAC;YACrF,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,IAAI,WAAW,CAAC;YAEjD,qBAAqB;YACrB,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC;gBAC7D,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;oBACnB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;oBAC5C,OAAO;gBACT,CAAC;gBAED,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAE,CAAC;gBAC9B,MAAM;oBACJ,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE;iBACzD,CAAC;gBAEF,4DAA4D;gBAC5D,0DAA0D;gBAC1D,4DAA4D;gBAC5D,0DAA0D;gBAC1D,uDAAuD;gBACvD,MAAM,gBAAgB,GAAG,qBAAqB,CAC5C,KAAK,CAAC,YAAY,EAClB,IAAI,CAAC,EAAE,EACP,IAAI,CAAC,WAAW,EAChB,SAAS,EACT,IAAI,CAAC,MAAM,EACX,aAAa,CAAC,MAAM,GAAG,CAAC,CACzB,CAAC;gBACF,MAAM,WAAW,GAAkB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9D,EAAE,EAAE,gBAAgB,CAAC,EAAE;oBACvB,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,gBAAgB,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,OAAO,EAAE;iBACxD,CAAC,CAAC,CAAC;gBAEJ,+DAA+D;gBAC/D,2DAA2D;gBAC3D,2DAA2D;gBAC3D,4DAA4D;gBAC5D,8CAA8C;gBAC9C,6DAA6D;gBAC7D,gDAAgD;gBAChD,MAAM,WAAW,GAAG,uBAAuB,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,CAAC;gBACxE,MAAM,SAAS,GAAqB;oBAClC,GAAG,KAAK;oBACR,OAAO,EAAE,WAAW;oBACpB,YAAY,EAAE,gBAAgB;oBAC9B,MAAM,EAAE,GAAG,YAAY,IAAI,IAAI,CAAC,EAAE,EAAE;oBACpC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBACpF,CAAC;gBAEF,IAAI,iBAAiB,GAAG,EAAE,CAAC;gBAC3B,IAAI,kBAAkB,GAAG,KAAK,CAAC;gBAC/B,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,EAAE,CAAC;oBAC1D,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;wBACvB,iBAAiB,IAAI,EAAE,CAAC,KAAK,CAAC;oBAChC,CAAC;oBACD,4DAA4D;oBAC5D,sDAAsD;oBACtD,4DAA4D;oBAC5D,sDAAsD;oBACtD,oCAAoC;oBACpC,IAAI,EAAE,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;wBACxB,0DAA0D;wBAC1D,qDAAqD;wBACrD,uDAAuD;wBACvD,0DAA0D;wBAC1D,qDAAqD;wBACrD,yDAAyD;wBACzD,qDAAqD;wBACrD,gDAAgD;wBAChD,EAAE;wBACF,mDAAmD;wBACnD,+BAA+B;wBAC/B,IAAI,oBAAoB,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC1C,kBAAkB,GAAG,IAAI,CAAC;4BAC1B,MAAM;gCACJ,IAAI,EAAE,QAAQ;gCACd,IAAI,EAAE,uBAAuB;gCAC7B,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE;6BAC/C,CAAC;4BACF,MAAM;wBACR,CAAC;wBACD,MAAM,EAAE,CAAC;wBACT,OAAO;oBACT,CAAC;oBACD,MAAM,EAAE,CAAC;gBACX,CAAC;gBACD,4DAA4D;gBAC5D,2DAA2D;gBAC3D,uDAAuD;gBACvD,KAAK,kBAAkB,CAAC;gBAExB,4BAA4B;gBAC5B,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,EAAE,iBAAiB,CAAC,CAAC;gBAC1D,aAAa,CAAC,IAAI,CAAC;oBACjB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,OAAO;iBACR,CAAC,CAAC;gBACH,MAAM;oBACJ,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,gBAAgB;oBACtB,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,OAAO,EAAE;iBACnC,CAAC;YACJ,CAAC;YAED,oBAAoB;YACpB,MAAM;gBACJ,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY,CAAC,IAAI;oBACxB,KAAK,EAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;iBAC5E;aACF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAC5B,UAAkB,EAClB,MAAc,EACd,eAAuB,EACvB,SAAiB,EACjB,SAAiB,EACjB,iBAA0B;IAE1B,wDAAwD;IACxD,MAAM,YAAY,GAAG,SAAS,GAAG,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,iBAAiB;QAC9B,CAAC,CAAC,mBAAmB,YAAY,OAAO,SAAS,SAAS,MAAM,MAAM,eAAe,yHAAyH;QAC9M,CAAC,CAAC,mBAAmB,YAAY,OAAO,SAAS,SAAS,MAAM,MAAM,eAAe,2BAA2B,CAAC;IACnH,OAAO,GAAG,UAAU,OAAO,MAAM,EAAE,CAAC;AACtC,CAAC"}