@wrongstack/core 0.148.0 → 0.236.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/{agent-bridge-r9y6gdn4.d.ts → agent-bridge-Cimv7bK7.d.ts} +1 -1
  2. package/dist/{agent-subagent-runner-1GeQE_L0.d.ts → agent-subagent-runner-C658wj_c.d.ts} +9 -8
  3. package/dist/{brain-Cp_3GIS2.d.ts → brain-sCZ3lCjq.d.ts} +28 -2
  4. package/dist/{compactor-BueGt7LG.d.ts → compactor-BRfg3QPd.d.ts} +1 -1
  5. package/dist/{config-BaVThgnT.d.ts → config-Koq6f3fs.d.ts} +2 -2
  6. package/dist/{context-C7G_MtLV.d.ts → context-CLz3z_E8.d.ts} +126 -2
  7. package/dist/coordination/index.d.ts +70 -13
  8. package/dist/coordination/index.js +2126 -151
  9. package/dist/coordination/index.js.map +1 -1
  10. package/dist/defaults/index.d.ts +27 -27
  11. package/dist/defaults/index.js +1328 -354
  12. package/dist/defaults/index.js.map +1 -1
  13. package/dist/execution/index.d.ts +45 -16
  14. package/dist/execution/index.js +367 -59
  15. package/dist/execution/index.js.map +1 -1
  16. package/dist/execution/prompt-enhancer.d.ts +86 -0
  17. package/dist/execution/prompt-enhancer.js +125 -0
  18. package/dist/execution/prompt-enhancer.js.map +1 -0
  19. package/dist/extension/index.d.ts +6 -6
  20. package/dist/extension/index.js +3 -1
  21. package/dist/extension/index.js.map +1 -1
  22. package/dist/{goal-preamble-CYJLg0wk.d.ts → goal-preamble-CnbzyVvl.d.ts} +19 -10
  23. package/dist/{index-BZdezm3g.d.ts → index-BlMqh5GO.d.ts} +8 -8
  24. package/dist/{index-CPweVoFM.d.ts → index-C2eSNPsB.d.ts} +7 -5
  25. package/dist/index.d.ts +439 -129
  26. package/dist/index.js +5206 -905
  27. package/dist/index.js.map +1 -1
  28. package/dist/infrastructure/index.d.ts +7 -7
  29. package/dist/infrastructure/index.js +72 -15
  30. package/dist/infrastructure/index.js.map +1 -1
  31. package/dist/kernel/index.d.ts +9 -9
  32. package/dist/kernel/index.js +7 -1
  33. package/dist/kernel/index.js.map +1 -1
  34. package/dist/{llm-selector-CP72f1lC.d.ts → llm-selector-D22R4AFz.d.ts} +2 -2
  35. package/dist/logger-DmmQhf4P.d.ts +65 -0
  36. package/dist/{mcp-servers-Bl5LTvQg.d.ts → mcp-servers-DFbirBv6.d.ts} +11 -4
  37. package/dist/models/index.d.ts +5 -5
  38. package/dist/models/index.js +89 -9
  39. package/dist/models/index.js.map +1 -1
  40. package/dist/{models-registry-D90K9UnM.d.ts → models-registry-CnJRjTXc.d.ts} +1 -1
  41. package/dist/{multi-agent-coordinator-QWEzJDlm.d.ts → multi-agent-coordinator-60weDZoA.d.ts} +8 -8
  42. package/dist/{null-fleet-bus-BUyfqh23.d.ts → null-fleet-bus-1068dEnr.d.ts} +7 -7
  43. package/dist/observability/index.d.ts +2 -2
  44. package/dist/package-outdated-watcher-pzJ5w7y8.d.ts +560 -0
  45. package/dist/{parallel-eternal-engine-C75QuhAI.d.ts → parallel-eternal-engine-DtG1fjc9.d.ts} +13 -9
  46. package/dist/{path-resolver-DRjQBkoO.d.ts → path-resolver-CA1ULU0J.d.ts} +3 -3
  47. package/dist/{permission-B7nKnEvQ.d.ts → permission-DbWPbuoA.d.ts} +1 -1
  48. package/dist/{permission-policy-8-6zBmfA.d.ts → permission-policy-AOk0LVsV.d.ts} +2 -2
  49. package/dist/pipeline-DsmlwTXu.d.ts +493 -0
  50. package/dist/{plan-templates-CkKNPU3I.d.ts → plan-templates-DPABrDvy.d.ts} +19 -8
  51. package/dist/{provider-runner-BNpuIyOL.d.ts → provider-runner-D0HgUqwV.d.ts} +3 -3
  52. package/dist/{retry-policy-rutAfVeR.d.ts → retry-policy-BVnkbMET.d.ts} +1 -1
  53. package/dist/sdd/index.d.ts +8 -8
  54. package/dist/sdd/index.js +358 -85
  55. package/dist/sdd/index.js.map +1 -1
  56. package/dist/{secret-vault-DoISxaKO.d.ts → secret-vault-BJDY28ev.d.ts} +7 -1
  57. package/dist/{secret-vault-BTcC_T5v.d.ts → secret-vault-CeVNiy_f.d.ts} +4 -3
  58. package/dist/security/index.d.ts +6 -5
  59. package/dist/security/index.js +214 -35
  60. package/dist/security/index.js.map +1 -1
  61. package/dist/{selector-4vDFZKt3.d.ts → selector-Cb4_9-hf.d.ts} +1 -1
  62. package/dist/{session-event-bridge-DWlvglC2.d.ts → session-event-bridge-BhtkkFFy.d.ts} +4 -2
  63. package/dist/{session-reader-BAtCxdaw.d.ts → session-reader-CCOssnBS.d.ts} +1 -1
  64. package/dist/skills/index.js +171 -21
  65. package/dist/skills/index.js.map +1 -1
  66. package/dist/storage/index.d.ts +151 -13
  67. package/dist/storage/index.js +1117 -256
  68. package/dist/storage/index.js.map +1 -1
  69. package/dist/types/index.d.ts +68 -21
  70. package/dist/types/index.js +616 -74
  71. package/dist/types/index.js.map +1 -1
  72. package/dist/utils/expect-defined.js +3 -1
  73. package/dist/utils/expect-defined.js.map +1 -1
  74. package/dist/utils/index.d.ts +80 -4
  75. package/dist/utils/index.js +100 -15
  76. package/dist/utils/index.js.map +1 -1
  77. package/dist/{wstack-paths-DD50Omgn.d.ts → wstack-paths-CJjEwPXn.d.ts} +14 -1
  78. package/package.json +7 -3
  79. package/skills/chimera/SKILL.md +105 -0
  80. package/skills/research-web/SKILL.md +342 -0
  81. package/dist/logger-B9J5puGM.d.ts +0 -32
  82. package/dist/pipeline-BG7UgbDc.d.ts +0 -239
@@ -9,7 +9,9 @@ import { EventEmitter } from 'events';
9
9
  // src/utils/expect-defined.ts
10
10
  function expectDefined(value, label) {
11
11
  if (value === null || value === void 0) {
12
- throw new Error("Expected value to be defined");
12
+ const err = new Error("Expected value to be defined");
13
+ err.name = "ExpectDefinedError";
14
+ throw err;
13
15
  }
14
16
  return value;
15
17
  }
@@ -58,22 +60,31 @@ function estimateToolResultTokens(content) {
58
60
  function estimateTextTokens(text) {
59
61
  return RoughTokenEstimate(text);
60
62
  }
63
+ function computeMessageTokens(msg) {
64
+ if (typeof msg.content === "string") return estimateTextTokens(msg.content);
65
+ let total = 0;
66
+ for (const b of msg.content) {
67
+ if (b.type === "text") total += estimateTextTokens(b.text);
68
+ else if (b.type === "tool_use") total += estimateToolInputTokens(b.input);
69
+ else if (b.type === "tool_result") total += estimateToolResultTokens(b.content);
70
+ else total += RoughTokenEstimate(JSON.stringify(b));
71
+ }
72
+ return total;
73
+ }
61
74
  function estimateMessageTokens(messages) {
62
75
  let total = 0;
63
76
  for (const m of messages) {
64
- if (typeof m.content === "string") {
65
- total += estimateTextTokens(m.content);
66
- } else {
67
- for (const b of m.content) {
68
- if (b.type === "text") total += estimateTextTokens(b.text);
69
- else if (b.type === "tool_use") total += estimateToolInputTokens(b.input);
70
- else if (b.type === "tool_result") total += estimateToolResultTokens(b.content);
71
- }
77
+ if (typeof m._estTokens === "number" && m._estTokens > 0) {
78
+ total += m._estTokens;
79
+ continue;
72
80
  }
81
+ total += computeMessageTokens(m);
73
82
  }
74
83
  return total;
75
84
  }
76
85
  function estimateToolDefTokens(tool) {
86
+ const cached = tool._estDefTokens;
87
+ if (typeof cached === "number" && cached > 0) return cached;
77
88
  return RoughTokenEstimate(tool.name) + RoughTokenEstimate(tool.description ?? "") + RoughTokenEstimate(JSON.stringify(tool.inputSchema));
78
89
  }
79
90
  function estimateRequestTokens(messages, systemPrompt, tools, calibrationKey = CALIBRATION_GLOBAL_KEY) {
@@ -83,6 +94,11 @@ function estimateRequestTokens(messages, systemPrompt, tools, calibrationKey = C
83
94
  } else if (Array.isArray(messages)) {
84
95
  for (const m of messages) {
85
96
  if (typeof m === "object" && m !== null && "content" in m) {
97
+ const cached = m._estTokens;
98
+ if (typeof cached === "number" && cached > 0) {
99
+ messagesTokens += cached;
100
+ continue;
101
+ }
86
102
  const content = m.content;
87
103
  if (typeof content === "string") {
88
104
  messagesTokens += RoughTokenEstimate(content);
@@ -267,6 +283,18 @@ function findPreserveStart(messages, preserveK) {
267
283
  }
268
284
  function eliseOldToolResults(messages, opts) {
269
285
  const preserveStart = findPreserveStart(messages, opts.preserveK);
286
+ let hasOversized = false;
287
+ for (let i = 0; i < preserveStart && !hasOversized; i++) {
288
+ const msg = messages[i];
289
+ if (!msg || !Array.isArray(msg.content)) continue;
290
+ for (const b of msg.content) {
291
+ if (b.type === "tool_result" && estimateToolResultTokens(b.content) >= opts.eliseThreshold) {
292
+ hasOversized = true;
293
+ break;
294
+ }
295
+ }
296
+ }
297
+ if (!hasOversized) return { messages, saved: 0, changed: false };
270
298
  let saved = 0;
271
299
  let changed = false;
272
300
  const next = new Array(messages.length);
@@ -1245,6 +1273,15 @@ var AutoCompactionMiddleware = class _AutoCompactionMiddleware {
1245
1273
  static NOOP_RETRY_DELTA_TOKENS = 2e3;
1246
1274
  /** Tracks the most recent no-op attempt so we can avoid re-firing per turn. */
1247
1275
  lastNoopAttempt = null;
1276
+ /**
1277
+ * Cached token estimate from the last handler() invocation. When the
1278
+ * message count and tool count haven't changed since the last estimate
1279
+ * (autonomous idle loops), we skip the expensive O(n) token estimation
1280
+ * and reuse this value. Reset to -1 when the context changes.
1281
+ */
1282
+ _cachedTokens = -1;
1283
+ _cachedMsgCount = -1;
1284
+ _cachedToolCount = -1;
1248
1285
  /**
1249
1286
  * @param compactor Compactor to use for compaction.
1250
1287
  * @param maxContext Provider's max context window in tokens.
@@ -1280,12 +1317,24 @@ var AutoCompactionMiddleware = class _AutoCompactionMiddleware {
1280
1317
  }
1281
1318
  handler() {
1282
1319
  return async (ctx, next) => {
1283
- const tokens = this._estimator ? this._estimator(ctx) : estimateRequestTokensCalibrated(
1284
- ctx.messages,
1285
- ctx.systemPrompt,
1286
- ctx.tools ?? [],
1287
- `${ctx.provider?.id ?? "unknown"}/${ctx.model}`
1288
- ).total;
1320
+ const msgCount = ctx.messages.length;
1321
+ const toolCount = (ctx.tools ?? []).length;
1322
+ let tokens;
1323
+ if (this._estimator) {
1324
+ tokens = this._estimator(ctx);
1325
+ } else if (msgCount === this._cachedMsgCount && toolCount === this._cachedToolCount && this._cachedTokens >= 0) {
1326
+ tokens = this._cachedTokens;
1327
+ } else {
1328
+ tokens = estimateRequestTokensCalibrated(
1329
+ ctx.messages,
1330
+ ctx.systemPrompt,
1331
+ ctx.tools ?? [],
1332
+ `${ctx.provider?.id ?? "unknown"}/${ctx.model}`
1333
+ ).total;
1334
+ this._cachedTokens = tokens;
1335
+ this._cachedMsgCount = msgCount;
1336
+ this._cachedToolCount = toolCount;
1337
+ }
1289
1338
  const load = tokens / this._maxContext;
1290
1339
  const policy = this.policyProvider?.(ctx);
1291
1340
  const thresholds = policy?.thresholds ?? {
@@ -1566,7 +1615,7 @@ function createToolOutputSerializer(opts = {}) {
1566
1615
  }
1567
1616
 
1568
1617
  // src/execution/tool-executor.ts
1569
- var ToolExecutor = class {
1618
+ var ToolExecutor = class _ToolExecutor {
1570
1619
  constructor(registry, opts) {
1571
1620
  this.registry = registry;
1572
1621
  this.opts = opts;
@@ -1578,6 +1627,10 @@ var ToolExecutor = class {
1578
1627
  }
1579
1628
  registry;
1580
1629
  opts;
1630
+ /** Minimum gap between coalesced `partial_output` tool.progress emits. */
1631
+ static PROGRESS_EMIT_INTERVAL_MS = 100;
1632
+ /** Max chars of accumulated stream text carried per coalesced emit. */
1633
+ static PROGRESS_TAIL_CHARS = 16384;
1581
1634
  serializer;
1582
1635
  iterationTimeoutMs;
1583
1636
  maxToolTimeoutMs;
@@ -1623,9 +1676,6 @@ Please call the tool again with arguments that match its inputSchema. You can us
1623
1676
  return { result, tool, durationMs: Date.now() - start };
1624
1677
  }
1625
1678
  const toolDangerousCaps = getDangerousCapabilities(tool);
1626
- if (toolDangerousCaps.length > 0) {
1627
- if (this.opts.events) ;
1628
- }
1629
1679
  if (hasMalformedArguments(use.input)) {
1630
1680
  const result = this.malformedInputResult(use, extractMalformedRaw(use.input));
1631
1681
  budget = this.decrementBudget(result, budget);
@@ -1863,17 +1913,48 @@ ${post.additionalContext}` };
1863
1913
  throw new Error(`Tool "${tool.name}" does not support streaming execution`);
1864
1914
  }
1865
1915
  const stream = tool.executeStream(input, ctx, { signal });
1866
- for await (const ev of stream) {
1867
- if (ev.type === "final") {
1868
- finalOutput = ev.output;
1869
- sawFinal = true;
1870
- break;
1871
- }
1916
+ const iter = stream[Symbol.asyncIterator]();
1917
+ let progressTail = "";
1918
+ let lastProgressEmitAt = 0;
1919
+ const emitProgress = (ev) => {
1872
1920
  this.opts.events?.emit("tool.progress", {
1873
1921
  name: tool.name,
1874
1922
  id: toolUseId ?? "<unknown>",
1875
1923
  event: ev
1876
1924
  });
1925
+ };
1926
+ const flushProgressTail = (force) => {
1927
+ if (progressTail.length === 0) return;
1928
+ const now = Date.now();
1929
+ if (!force && now - lastProgressEmitAt < _ToolExecutor.PROGRESS_EMIT_INTERVAL_MS) return;
1930
+ const text = progressTail;
1931
+ progressTail = "";
1932
+ lastProgressEmitAt = now;
1933
+ emitProgress({ type: "partial_output", text });
1934
+ };
1935
+ try {
1936
+ while (true) {
1937
+ const { done, value: ev } = await iter.next();
1938
+ if (done) break;
1939
+ if (ev.type === "final") {
1940
+ finalOutput = ev.output;
1941
+ sawFinal = true;
1942
+ break;
1943
+ }
1944
+ if (ev.type === "partial_output" && typeof ev.text === "string") {
1945
+ progressTail += ev.text;
1946
+ if (progressTail.length > _ToolExecutor.PROGRESS_TAIL_CHARS) {
1947
+ progressTail = progressTail.slice(-_ToolExecutor.PROGRESS_TAIL_CHARS);
1948
+ }
1949
+ flushProgressTail(false);
1950
+ continue;
1951
+ }
1952
+ flushProgressTail(true);
1953
+ emitProgress(ev);
1954
+ }
1955
+ flushProgressTail(true);
1956
+ } finally {
1957
+ await iter.return?.(void 0);
1877
1958
  }
1878
1959
  if (!sawFinal) {
1879
1960
  throw new Error(`tool "${tool.name}" executeStream completed without a 'final' event`);
@@ -1984,9 +2065,11 @@ function extractMalformedRaw(input) {
1984
2065
 
1985
2066
  // src/utils/assert-never.ts
1986
2067
  function assertNever(x, message) {
1987
- throw new Error(
2068
+ const err = new Error(
1988
2069
  `Unhandled case: ${JSON.stringify(x)}`
1989
2070
  );
2071
+ err.name = "AssertNeverError";
2072
+ throw err;
1990
2073
  }
1991
2074
 
1992
2075
  // src/utils/regex-guard.ts
@@ -2033,7 +2116,13 @@ var DoneConditionChecker = class {
2033
2116
  const result = compileUserRegex(condition.pattern, "");
2034
2117
  this.compiledRegex = result.ok ? result.regex : null;
2035
2118
  if (!result.ok) {
2036
- console.warn(`[DoneConditionChecker] Invalid regex pattern "${condition.pattern}": ${result.reason}`);
2119
+ console.warn(JSON.stringify({
2120
+ level: "warn",
2121
+ event: "autonomous.done_condition_invalid_regex",
2122
+ pattern: condition.pattern,
2123
+ reason: result.reason,
2124
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
2125
+ }));
2037
2126
  }
2038
2127
  } else {
2039
2128
  this.compiledRegex = null;
@@ -2267,9 +2356,13 @@ function projectSlug(absRoot) {
2267
2356
  function slugify(name) {
2268
2357
  return name.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 40) || "project";
2269
2358
  }
2359
+ function wstackGlobalRoot() {
2360
+ const fromEnv = process.env["WRONGSTACK_HOME"];
2361
+ if (fromEnv && fromEnv.trim().length > 0) return path2.resolve(fromEnv);
2362
+ return path2.join(os.homedir(), ".wrongstack");
2363
+ }
2270
2364
  function resolveWstackPaths(opts) {
2271
- const home = opts.userHome ?? os.homedir();
2272
- const globalRoot = opts.globalRoot ?? path2.join(home, ".wrongstack");
2365
+ const globalRoot = opts.globalRoot ?? (opts.userHome ? path2.join(opts.userHome, ".wrongstack") : wstackGlobalRoot());
2273
2366
  const hash = projectHash(opts.projectRoot);
2274
2367
  const slug = projectSlug(opts.projectRoot);
2275
2368
  const projectDir = path2.join(globalRoot, "projects", slug);
@@ -2326,12 +2419,24 @@ async function loadGoal(filePath) {
2326
2419
  try {
2327
2420
  const parsed = JSON.parse(raw);
2328
2421
  if (parsed?.version !== 1 || typeof parsed.goal !== "string" || !Array.isArray(parsed.journal)) {
2329
- console.warn(`[goal-store] Corrupt goal.json at ${filePath} \u2014 invalid schema. Consider deleting it and re-creating.`);
2422
+ console.warn(JSON.stringify({
2423
+ level: "warn",
2424
+ event: "goal_store.invalid_schema",
2425
+ path: filePath,
2426
+ message: "invalid schema \u2014 consider deleting and re-creating",
2427
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
2428
+ }));
2330
2429
  return null;
2331
2430
  }
2332
2431
  return parsed;
2333
2432
  } catch {
2334
- console.warn(`[goal-store] Corrupt goal.json at ${filePath} \u2014 JSON parse failed. Consider deleting it and re-creating.`);
2433
+ console.warn(JSON.stringify({
2434
+ level: "warn",
2435
+ event: "goal_store.parse_failed",
2436
+ path: filePath,
2437
+ message: "JSON parse failed \u2014 consider deleting and re-creating",
2438
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
2439
+ }));
2335
2440
  return null;
2336
2441
  }
2337
2442
  }
@@ -2406,6 +2511,25 @@ var RISK_LEVELS = {
2406
2511
  high: 2,
2407
2512
  critical: 3
2408
2513
  };
2514
+ function createTieredBrainArbiter(opts) {
2515
+ return {
2516
+ async decide(request) {
2517
+ const policyDecision = await opts.policy.decide(request);
2518
+ if (policyDecision.type !== "ask_human") return policyDecision;
2519
+ const ceiling = opts.getMaxAutoRisk?.() ?? "medium";
2520
+ if (!opts.autonomous || ceiling === "off") return policyDecision;
2521
+ const ceilingLevel = ceiling === "all" ? 3 : RISK_LEVELS[ceiling] ?? 1;
2522
+ const requestLevel = RISK_LEVELS[request.risk] ?? 2;
2523
+ if (requestLevel > ceilingLevel) return policyDecision;
2524
+ try {
2525
+ const llmDecision = await opts.autonomous.decide(request);
2526
+ if (llmDecision.type === "answer") return llmDecision;
2527
+ } catch {
2528
+ }
2529
+ return policyDecision;
2530
+ }
2531
+ };
2532
+ }
2409
2533
  function createAutonomyBrain(opts) {
2410
2534
  const maxRisk = opts.maxAutoRisk ?? "high";
2411
2535
  const maxRiskLevel = RISK_LEVELS[maxRisk] ?? 2;
@@ -2612,7 +2736,14 @@ var EternalAutonomyEngine = class {
2612
2736
  stop() {
2613
2737
  this.stopRequested = true;
2614
2738
  this.currentCtrl?.abort();
2615
- void this.persistEngineState("stopped").catch(() => {
2739
+ void this.persistEngineState("stopped").catch((err) => {
2740
+ console.error(JSON.stringify({
2741
+ level: "error",
2742
+ event: "engine.persist_state_failed",
2743
+ message: err instanceof Error ? err.message : String(err),
2744
+ context: { expectedState: "stopped" },
2745
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
2746
+ }));
2616
2747
  });
2617
2748
  this.state = "stopped";
2618
2749
  }
@@ -3579,6 +3710,7 @@ var SubagentBudget = class _SubagentBudget {
3579
3710
  function makeAgentSubagentRunner(opts) {
3580
3711
  const format = opts.formatTaskInput ?? defaultFormatTaskInput;
3581
3712
  return async (task, ctx) => {
3713
+ const taskStartedAt = Date.now();
3582
3714
  const factoryResult = await opts.factory(ctx.config);
3583
3715
  const { agent, events } = factoryResult;
3584
3716
  const detachFleet = opts.fleetBus?.attach(ctx.subagentId, events, task.id);
@@ -3675,7 +3807,7 @@ function makeAgentSubagentRunner(opts) {
3675
3807
  }),
3676
3808
  events.on("provider.text_delta", (e) => {
3677
3809
  ctx.budget.markActivity();
3678
- streamingTextAcc = (streamingTextAcc + e.text).slice(-200);
3810
+ streamingTextAcc = (streamingTextAcc + e.text).slice(-2e3);
3679
3811
  })
3680
3812
  );
3681
3813
  const onParentAbort = () => aborter.abort();
@@ -3683,6 +3815,15 @@ function makeAgentSubagentRunner(opts) {
3683
3815
  let result;
3684
3816
  try {
3685
3817
  result = await agent.run(format(task, ctx.config), { signal: aborter.signal });
3818
+ events.emit("subagent.task_completed", {
3819
+ subagentId: ctx.subagentId,
3820
+ taskId: task.id,
3821
+ status: result.status === "done" ? "success" : "failed",
3822
+ iterations: result.iterations,
3823
+ toolCalls: ctx.budget.usage().toolCalls,
3824
+ durationMs: Date.now() - taskStartedAt,
3825
+ finalText: result.finalText?.trim() || void 0
3826
+ });
3686
3827
  } finally {
3687
3828
  detachFleet?.();
3688
3829
  ctx.signal.removeEventListener("abort", onParentAbort);
@@ -3718,21 +3859,40 @@ function makeAgentSubagentRunner(opts) {
3718
3859
  if (budgetError) throw budgetError;
3719
3860
  }
3720
3861
  if (result.status === "failed") {
3721
- throw result.error instanceof Error ? result.error : new Error(String(result.error ?? "agent failed"));
3862
+ throw result.error instanceof AgentError ? result.error : new AgentError({
3863
+ message: result.error instanceof Error ? result.error.message : String(result.error ?? "agent failed"),
3864
+ code: ERROR_CODES.AGENT_RUN_FAILED,
3865
+ cause: result.error
3866
+ });
3722
3867
  }
3723
3868
  if (result.status === "aborted") {
3724
- throw new Error("agent aborted");
3869
+ throw new AgentError({
3870
+ message: "agent aborted",
3871
+ code: ERROR_CODES.AGENT_ABORTED
3872
+ });
3725
3873
  }
3726
3874
  if (result.status === "max_iterations") {
3727
- throw new Error("agent exhausted iteration limit");
3875
+ throw new AgentError({
3876
+ message: "agent exhausted iteration limit",
3877
+ code: ERROR_CODES.AGENT_ITERATION_LIMIT,
3878
+ recoverable: true
3879
+ });
3728
3880
  }
3729
3881
  const usage = ctx.budget.usage();
3730
3882
  const finalText = (result.finalText ?? "").trim();
3731
3883
  if (finalText.length === 0 && usage.toolCalls === 0) {
3732
- throw new Error("empty response");
3884
+ throw new AgentError({
3885
+ message: "empty response \u2014 agent produced no text and no tool calls",
3886
+ code: ERROR_CODES.AGENT_RUN_FAILED,
3887
+ context: { iterations: result.iterations }
3888
+ });
3733
3889
  }
3734
3890
  if (finalText.length === 0 && lastToolFailed !== null) {
3735
- throw new Error(`tool failed: ${lastToolFailed}`);
3891
+ throw new AgentError({
3892
+ message: `unrecovered tool failure: ${lastToolFailed} \u2014 agent ended turn without acknowledging the error`,
3893
+ code: ERROR_CODES.AGENT_RUN_FAILED,
3894
+ context: { tool: lastToolFailed, iterations: result.iterations }
3895
+ });
3736
3896
  }
3737
3897
  return {
3738
3898
  result: result.finalText,
@@ -3764,11 +3924,11 @@ var HEAVY_BUDGET = {
3764
3924
  };
3765
3925
  var TOOLS = {
3766
3926
  /** Pure read/inspect — safe for analysis and review agents. */
3767
- read: ["read", "grep", "glob", "search", "tree"],
3927
+ read: ["read", "grep", "glob", "search", "tree", "mailbox"],
3768
3928
  /** Read + structured inspection (logs, diffs, json, dependency audit). */
3769
- inspect: ["read", "grep", "glob", "search", "tree", "json", "diff", "logs", "audit"],
3929
+ inspect: ["read", "grep", "glob", "search", "tree", "json", "diff", "logs", "audit", "mailbox"],
3770
3930
  /** Read + edit (no shell). For agents that write code/docs but don't run it. */
3771
- write: ["read", "grep", "glob", "search", "tree", "write", "edit", "replace", "patch"],
3931
+ write: ["read", "grep", "glob", "search", "tree", "write", "edit", "replace", "patch", "mailbox"],
3772
3932
  /** Full build loop: edit + run (lint/format/typecheck/test/bash). */
3773
3933
  build: [
3774
3934
  "read",
@@ -3785,16 +3945,17 @@ var TOOLS = {
3785
3945
  "lint",
3786
3946
  "format",
3787
3947
  "typecheck",
3788
- "test"
3948
+ "test",
3949
+ "mailbox"
3789
3950
  ],
3790
3951
  /** Version control. */
3791
3952
  vcs: ["read", "grep", "glob", "git", "diff"],
3792
3953
  /** Dependency management + CVE audit. */
3793
- deps: ["read", "grep", "glob", "install", "outdated", "audit", "json"],
3954
+ deps: ["read", "grep", "glob", "install", "outdated", "audit", "json", "mailbox"],
3794
3955
  /** Documentation authoring. */
3795
- docs: ["read", "grep", "glob", "search", "tree", "write", "edit", "document"],
3956
+ docs: ["read", "grep", "glob", "search", "tree", "write", "edit", "document", "mailbox"],
3796
3957
  /** Web research. */
3797
- research: ["read", "grep", "glob", "search", "fetch"]
3958
+ research: ["read", "grep", "glob", "search", "fetch", "mailbox"]
3798
3959
  };
3799
3960
 
3800
3961
  // src/coordination/agents/phase1-discovery.ts
@@ -4592,15 +4753,44 @@ Working rules:
4592
4753
  id: "e2e",
4593
4754
  name: "E2E",
4594
4755
  role: "e2e",
4595
- tools: [...TOOLS.build, "fetch"],
4756
+ tools: [
4757
+ ...TOOLS.build,
4758
+ "fetch",
4759
+ "playwright_navigate",
4760
+ "playwright_screenshot",
4761
+ "playwright_click",
4762
+ "playwright_type",
4763
+ "playwright_evaluate",
4764
+ "playwright_select_option",
4765
+ "playwright_hover",
4766
+ "playwright_fill_form",
4767
+ "playwright_wait_for",
4768
+ "playwright_press_key",
4769
+ "playwright_drag"
4770
+ ],
4596
4771
  prompt: `You are the E2E agent. Your job is end-to-end testing: drive the whole
4597
4772
  system the way a user would and verify the full flow works across boundaries.
4598
4773
 
4599
4774
  Scope:
4600
4775
  - Author end-to-end scenarios that exercise real user journeys
4601
4776
  - Drive UI/CLI/API across process and network boundaries
4777
+ - Use Playwright browser tools (navigate, click, type, screenshot, evaluate)
4778
+ to automate web UI flows \u2014 open pages, interact with forms, capture evidence
4602
4779
  - Set up and tear down realistic test state
4603
- - Capture failures with enough detail to reproduce (screenshots, logs)
4780
+ - Capture failures with enough detail to reproduce (screenshots, logs, page HTML)
4781
+
4782
+ Playwright tools available (require the "playwright" MCP server to be enabled):
4783
+ playwright_navigate(url) \u2014 open a page at the given URL
4784
+ playwright_screenshot() \u2014 capture a full-page or viewport screenshot
4785
+ playwright_click(selector) \u2014 click on an element matching a CSS selector
4786
+ playwright_type(selector, text) \u2014 type text into a focused input element
4787
+ playwright_evaluate(script) \u2014 run arbitrary JavaScript in the page context
4788
+ playwright_select_option(selector, value) \u2014 pick a <select> dropdown option
4789
+ playwright_hover(selector) \u2014 hover the mouse over an element
4790
+ playwright_fill_form(fields) \u2014 fill multiple form fields in one call
4791
+ playwright_wait_for(selector) \u2014 block until an element appears on the page
4792
+ playwright_press_key(key) \u2014 press a keyboard key (Enter, Tab, Escape, \u2026)
4793
+ playwright_drag(from, to) \u2014 drag an element from one selector to another
4604
4794
 
4605
4795
  Input format you accept:
4606
4796
  { "task": "scenario | smoke | journey", "flow": "<user journey>", "surface": "ui | cli | api" }
@@ -4614,8 +4804,10 @@ Output: Markdown e2e report:
4614
4804
  Working rules:
4615
4805
  - Test the real flow end to end; don't stub the thing under test
4616
4806
  - Make scenarios deterministic \u2014 control time, randomness, and external state
4617
- - On failure, capture artifacts (logs/screenshots) for reproduction
4618
- - Keep scenarios independent so one failure doesn't cascade`
4807
+ - On failure, capture artifacts (screenshots, page HTML, logs) for reproduction
4808
+ - Keep scenarios independent so one failure doesn't cascade
4809
+ - For browser tests: playwright_navigate first, then interact, then playwright_screenshot as evidence
4810
+ - If playwright tools are unavailable, report it and fall back to API/CLI testing`
4619
4811
  },
4620
4812
  budget: HEAVY_BUDGET,
4621
4813
  capability: {
@@ -4628,10 +4820,106 @@ Working rules:
4628
4820
  "user journey",
4629
4821
  "smoke test",
4630
4822
  "playwright",
4823
+ "browser",
4824
+ "screenshot",
4825
+ "web ui",
4826
+ "headless",
4631
4827
  "cypress",
4632
4828
  "full flow",
4633
4829
  "browser test",
4634
- "acceptance test"
4830
+ "acceptance test",
4831
+ "navigate",
4832
+ "click",
4833
+ "form fill",
4834
+ "dom",
4835
+ "page load"
4836
+ ]
4837
+ }
4838
+ },
4839
+ {
4840
+ config: {
4841
+ id: "browser",
4842
+ name: "Browser",
4843
+ role: "browser",
4844
+ tools: [
4845
+ ...TOOLS.read,
4846
+ "fetch",
4847
+ "playwright_navigate",
4848
+ "playwright_screenshot",
4849
+ "playwright_click",
4850
+ "playwright_type",
4851
+ "playwright_evaluate",
4852
+ "playwright_select_option",
4853
+ "playwright_hover",
4854
+ "playwright_fill_form",
4855
+ "playwright_wait_for",
4856
+ "playwright_press_key",
4857
+ "playwright_drag"
4858
+ ],
4859
+ prompt: `You are the Browser agent. Your job is browser automation: open web pages,
4860
+ interact with them, extract data, capture screenshots, and return structured
4861
+ results. You are a read-focused agent \u2014 you drive the browser, not the filesystem.
4862
+
4863
+ Scope:
4864
+ - Navigate to URLs and wait for pages to load
4865
+ - Take full-page or element screenshots as evidence
4866
+ - Click buttons, fill forms, select options, type text \u2014 full user simulation
4867
+ - Extract page content: text, HTML, element attributes, data tables
4868
+ - Evaluate JavaScript in the page context to extract structured data
4869
+ - Verify visual state (element visibility, text content, attribute values)
4870
+
4871
+ Playwright tools available (require the "playwright" MCP server to be enabled):
4872
+ playwright_navigate(url) \u2014 open a page at the given URL
4873
+ playwright_screenshot() \u2014 capture a full-page or viewport screenshot
4874
+ playwright_click(selector) \u2014 click on an element matching a CSS selector
4875
+ playwright_type(selector, text) \u2014 type text into a focused input element
4876
+ playwright_evaluate(script) \u2014 run arbitrary JavaScript in the page context
4877
+ playwright_select_option(selector, value) \u2014 pick a <select> dropdown option
4878
+ playwright_hover(selector) \u2014 hover the mouse over an element
4879
+ playwright_fill_form(fields) \u2014 fill multiple form fields in one call
4880
+ playwright_wait_for(selector) \u2014 block until an element appears on the page
4881
+ playwright_press_key(key) \u2014 press a keyboard key (Enter, Tab, Escape, \u2026)
4882
+ playwright_drag(from, to) \u2014 drag an element from one selector to another
4883
+
4884
+ Input format you accept:
4885
+ { "task": "navigate | screenshot | extract | interact | verify", "url": "<url>", "steps": ["step1", "step2"] }
4886
+
4887
+ Output: Structured markdown report:
4888
+ - ## Page (URL, title, load status)
4889
+ - ## Actions Taken (step-by-step with timestamps)
4890
+ - ## Results (extracted data, element states, verification results)
4891
+ - ## Screenshots (list attached screenshot references)
4892
+ - ## Errors (any failures with stack traces)
4893
+
4894
+ Working rules:
4895
+ - Always playwright_navigate first before any interaction
4896
+ - Always playwright_wait_for after navigation to ensure the page is ready
4897
+ - playwright_screenshot is your primary evidence \u2014 use it before and after interactions
4898
+ - Use playwright_evaluate for structured data extraction (JSON, text content)
4899
+ - If a selector fails, try alternative selectors before giving up
4900
+ - Report exact CSS selectors used \u2014 they're part of the evidence
4901
+ - If playwright tools are unavailable, report the error immediately \u2014 do not guess`
4902
+ },
4903
+ budget: MEDIUM_BUDGET,
4904
+ capability: {
4905
+ phase: "verify",
4906
+ summary: "Browser automation: opens pages, clicks, types, screenshots, extracts data via Playwright headless Chromium.",
4907
+ keywords: [
4908
+ "browser",
4909
+ "screenshot",
4910
+ "navigate",
4911
+ "web page",
4912
+ "scrape",
4913
+ "crawl",
4914
+ "headless",
4915
+ "chrome",
4916
+ "open url",
4917
+ "capture",
4918
+ "page title",
4919
+ "extract data",
4920
+ "fill form",
4921
+ "click button",
4922
+ "take screenshot"
4635
4923
  ]
4636
4924
  }
4637
4925
  },
@@ -6080,7 +6368,7 @@ Working rules:
6080
6368
  id: "tech-stack",
6081
6369
  name: "Tech Stack Validator",
6082
6370
  role: "tech-stack",
6083
- tools: ["search", "fetch", "read", "grep", "glob", "outdated", "audit", "json"],
6371
+ tools: ["search", "fetch", "read", "grep", "glob", "outdated", "audit", "json", "mailbox"],
6084
6372
  prompt: `You are the Tech Stack Validator \u2014 a single-shot validation agent that fires
6085
6373
  before any package, library, or framework choice is committed.
6086
6374
 
@@ -6088,6 +6376,16 @@ Your ONLY job: verify that a technology choice is current, real, and not obsolet
6088
6376
  You are the "this isn't code, this is 10-year-old technology" agent. Intervene
6089
6377
  hard when the LLM hallucinates a version number or suggests dead tech.
6090
6378
 
6379
+ ## Before you begin
6380
+
6381
+ Check the inter-agent mailbox for pending tasks. Other agents or the file-watcher
6382
+ may have left assign messages with dependency files to audit:
6383
+ - mailbox action=check
6384
+
6385
+ If you find an assign message, use the specified file path and packages.
6386
+ When done, post results back:
6387
+ - mailbox action=send to=<sender> type=result subject="Tech stack audit results" body="..."
6388
+
6091
6389
  ## Critical rules
6092
6390
 
6093
6391
  1. **Verify existence.** Search npm registry (fetch https://registry.npmjs.org/<pkg>/latest)
@@ -6146,11 +6444,11 @@ When APPROVED:
6146
6444
  **Install**: pnpm add <name>@^<major>.<minor>.0`
6147
6445
  },
6148
6446
  budget: {
6149
- timeoutMs: 6e4,
6150
- maxIterations: 5,
6151
- maxToolCalls: 20,
6152
- maxTokens: 4e4,
6153
- maxCostUsd: 0.1
6447
+ timeoutMs: 12e4,
6448
+ maxIterations: 10,
6449
+ maxToolCalls: 40,
6450
+ maxTokens: 6e4,
6451
+ maxCostUsd: 0.25
6154
6452
  },
6155
6453
  capability: {
6156
6454
  phase: "meta",
@@ -6380,6 +6678,9 @@ function providerStatusToCode(status, type) {
6380
6678
 
6381
6679
  // src/coordination/coordinator/error-classifier.ts
6382
6680
  function classifySubagentError(err, hints = {}) {
6681
+ if (err instanceof AgentError && err.cause) {
6682
+ return classifySubagentError(err.cause, hints);
6683
+ }
6383
6684
  const cause = err instanceof Error ? { name: err.name, message: err.message, stack: err.stack } : void 0;
6384
6685
  if (err instanceof ProviderError) {
6385
6686
  const baseMessage2 = err.describe();
@@ -6412,7 +6713,7 @@ function classifySubagentError(err, hints = {}) {
6412
6713
  if (/agent exhausted iteration limit$/i.test(baseMessage)) {
6413
6714
  return { kind: "budget_iterations", message: baseMessage, retryable: false, cause };
6414
6715
  }
6415
- if (/empty response$/i.test(baseMessage)) {
6716
+ if (/empty response/i.test(baseMessage)) {
6416
6717
  return { kind: "empty_response", message: baseMessage, retryable: false, cause };
6417
6718
  }
6418
6719
  if (/^tool failed: /i.test(baseMessage)) {
@@ -7361,7 +7662,14 @@ var ParallelEternalEngine = class {
7361
7662
  }
7362
7663
  stop() {
7363
7664
  this.stopRequested = true;
7364
- void this.persistState("stopped").catch(() => {
7665
+ void this.persistState("stopped").catch((err) => {
7666
+ console.error(JSON.stringify({
7667
+ level: "error",
7668
+ event: "engine.persist_state_failed",
7669
+ message: err instanceof Error ? err.message : String(err),
7670
+ context: { expectedState: "stopped" },
7671
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
7672
+ }));
7365
7673
  });
7366
7674
  this.state = "stopped";
7367
7675
  }
@@ -8122,6 +8430,6 @@ function parseDescription(raw) {
8122
8430
  return { trigger, scope };
8123
8431
  }
8124
8432
 
8125
- export { AutoCompactionMiddleware, AutonomousRunner, DefaultErrorHandler, DefaultRetryPolicy, DefaultSkillLoader, DoneConditionChecker, EternalAutonomyEngine, HybridCompactor, IntelligentCompactor, ParallelEternalEngine, SelectiveCompactor, ToolExecutor, buildGoalPreamble, createAutonomyBrain, createStrategyCompactor, formatDecisionSummary, makeAutonomyPromptContributor };
8433
+ export { AutoCompactionMiddleware, AutonomousRunner, DefaultErrorHandler, DefaultRetryPolicy, DefaultSkillLoader, DoneConditionChecker, EternalAutonomyEngine, HybridCompactor, IntelligentCompactor, ParallelEternalEngine, SelectiveCompactor, ToolExecutor, buildGoalPreamble, createAutonomyBrain, createStrategyCompactor, createTieredBrainArbiter, formatDecisionSummary, makeAutonomyPromptContributor };
8126
8434
  //# sourceMappingURL=index.js.map
8127
8435
  //# sourceMappingURL=index.js.map