@dungle-scrubs/tallow 0.8.25 → 0.8.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/auth-hardening.d.ts +12 -0
  2. package/dist/auth-hardening.d.ts.map +1 -1
  3. package/dist/auth-hardening.js +30 -7
  4. package/dist/auth-hardening.js.map +1 -1
  5. package/dist/cli.js +5 -0
  6. package/dist/cli.js.map +1 -1
  7. package/dist/config.d.ts +1 -1
  8. package/dist/config.js +1 -1
  9. package/dist/install.js +2 -2
  10. package/dist/install.js.map +1 -1
  11. package/dist/interactive-mode-patch.d.ts.map +1 -1
  12. package/dist/interactive-mode-patch.js +119 -7
  13. package/dist/interactive-mode-patch.js.map +1 -1
  14. package/dist/model-metadata-overrides.d.ts +19 -0
  15. package/dist/model-metadata-overrides.d.ts.map +1 -0
  16. package/dist/model-metadata-overrides.js +38 -0
  17. package/dist/model-metadata-overrides.js.map +1 -0
  18. package/dist/sdk.d.ts +2 -0
  19. package/dist/sdk.d.ts.map +1 -1
  20. package/dist/sdk.js +28 -1
  21. package/dist/sdk.js.map +1 -1
  22. package/extensions/__integration__/teams-runtime.test.ts +22 -1
  23. package/extensions/_shared/__tests__/shell-policy.test.ts +197 -0
  24. package/extensions/_shared/shell-policy.ts +27 -0
  25. package/extensions/background-task-tool/index.ts +2 -1
  26. package/extensions/bash-tool-enhanced/index.ts +2 -1
  27. package/extensions/custom-footer/__tests__/index.test.ts +29 -0
  28. package/extensions/custom-footer/context-display.ts +49 -0
  29. package/extensions/custom-footer/index.ts +10 -23
  30. package/extensions/permissions/index.ts +31 -10
  31. package/extensions/plan-mode-tool/__tests__/index.test.ts +32 -2
  32. package/extensions/plan-mode-tool/index.ts +6 -1
  33. package/extensions/slash-command-bridge/index.ts +30 -1
  34. package/extensions/subagent-tool/__tests__/process-liveness.test.ts +42 -3
  35. package/extensions/subagent-tool/process.ts +132 -21
  36. package/extensions/tasks/__tests__/store.test.ts +26 -2
  37. package/extensions/tasks/commands/register-tasks-extension.ts +2 -2
  38. package/extensions/tasks/index.ts +5 -5
  39. package/extensions/tasks/state/index.ts +90 -36
  40. package/extensions/teams-tool/__tests__/archive-store.test.ts +98 -0
  41. package/extensions/teams-tool/__tests__/peer-messaging.test.ts +26 -0
  42. package/extensions/teams-tool/archive-store.ts +200 -0
  43. package/extensions/teams-tool/sessions/spawn.ts +244 -71
  44. package/extensions/teams-tool/tools/register-extension.ts +146 -105
  45. package/extensions/teams-tool/tools/teammate-tools.ts +43 -1
  46. package/package.json +4 -4
  47. package/skills/tallow-expert/SKILL.md +1 -1
  48. package/templates/agents/architect.md +13 -5
  49. package/templates/agents/debug.md +3 -3
  50. package/templates/agents/explore.md +9 -2
  51. package/templates/agents/refactor.md +2 -2
  52. package/templates/agents/scout.md +3 -2
@@ -0,0 +1,49 @@
1
+ import type { ContextUsage } from "@mariozechner/pi-coding-agent";
2
+
3
+ /**
4
+ * Formats token counts with k/M suffixes for readability.
5
+ *
6
+ * @param count - Token count to format
7
+ * @returns Formatted string (e.g., "1.2k", "5M")
8
+ */
9
+ function formatTokens(count: number): string {
10
+ if (count < 1000) return count.toString();
11
+ if (count < 10_000) return `${(count / 1000).toFixed(1)}k`;
12
+ if (count < 1_000_000) return `${Math.round(count / 1000)}k`;
13
+ if (count < 10_000_000) return `${(count / 1_000_000).toFixed(1)}M`;
14
+ return `${Math.round(count / 1_000_000)}M`;
15
+ }
16
+
17
+ /**
18
+ * Formats footer context usage without reusing stale pre-compaction token counts.
19
+ *
20
+ * `ctx.getContextUsage()` intentionally returns `tokens: null` after compaction
21
+ * until a fresh assistant response arrives. The footer must preserve that
22
+ * unknown state instead of showing a bogus percentage from stale usage data.
23
+ *
24
+ * @param usage - Current context usage snapshot, if available
25
+ * @param fallbackContextWindow - Active model context window when usage is unavailable
26
+ * @param autoCompactEnabled - Whether to append the auto-compaction indicator
27
+ * @returns Display text plus raw percentage for severity coloring
28
+ */
29
+ export function formatContextUsageDisplay(
30
+ usage: ContextUsage | undefined,
31
+ fallbackContextWindow: number,
32
+ autoCompactEnabled: boolean
33
+ ): { readonly percent: number | null; readonly text: string } {
34
+ const autoIndicator = autoCompactEnabled ? " (auto)" : "";
35
+ const contextWindow = usage?.contextWindow ?? fallbackContextWindow;
36
+ const tokens = usage ? usage.tokens : 0;
37
+
38
+ if (contextWindow <= 0) {
39
+ return { percent: null, text: `?/?${autoIndicator}` };
40
+ }
41
+
42
+ const windowText = formatTokens(contextWindow);
43
+ if (tokens === null) {
44
+ return { percent: null, text: `?/${windowText}${autoIndicator}` };
45
+ }
46
+
47
+ const percent = (tokens / contextWindow) * 100;
48
+ return { percent, text: `${percent.toFixed(1)}%/${windowText}${autoIndicator}` };
49
+ }
@@ -19,6 +19,7 @@
19
19
  import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
20
20
  import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui";
21
21
  import { runGitCommandSync } from "../_shared/shell-policy.js";
22
+ import { formatContextUsageDisplay } from "./context-display.js";
22
23
 
23
24
  /** Cached git repository state for the footer display. */
24
25
  interface GitState {
@@ -204,26 +205,12 @@ export default function customFooterExtension(pi: ExtensionAPI): void {
204
205
  }
205
206
  }
206
207
 
207
- // Get context percentage from last assistant message
208
- const branch = sessionManager.getBranch();
209
- const lastAssistant = branch
210
- .slice()
211
- .reverse()
212
- .find(
213
- (e) =>
214
- e.type === "message" &&
215
- e.message.role === "assistant" &&
216
- (e.message as unknown as Record<string, string>).stopReason !== "aborted"
217
- );
218
-
219
- let contextTokens = 0;
220
- if (lastAssistant?.type === "message" && lastAssistant.message.role === "assistant") {
221
- const u = lastAssistant.message.usage;
222
- contextTokens = u.input + u.output + u.cacheRead + u.cacheWrite;
223
- }
224
-
225
- const contextWindow = model?.contextWindow || 0;
226
- const contextPercentValue = contextWindow > 0 ? (contextTokens / contextWindow) * 100 : 0;
208
+ const contextUsage = extensionCtx.getContextUsage();
209
+ const { percent: contextPercentValue, text: contextDisplay } = formatContextUsageDisplay(
210
+ contextUsage,
211
+ model?.contextWindow ?? 0,
212
+ autoCompactEnabled
213
+ );
227
214
 
228
215
  // Build path (replace home with ~)
229
216
  let pwd = process.cwd();
@@ -270,10 +257,10 @@ export default function customFooterExtension(pi: ExtensionAPI): void {
270
257
  if (totalCost) statsParts.push(`$${totalCost.toFixed(3)}`);
271
258
 
272
259
  // Context percentage with color
273
- const autoIndicator = autoCompactEnabled ? " (auto)" : "";
274
- const contextDisplay = `${contextPercentValue.toFixed(1)}%/${formatTokens(contextWindow)}${autoIndicator}`;
275
260
  let contextStr: string;
276
- if (contextPercentValue > 90) {
261
+ if (contextPercentValue === null) {
262
+ contextStr = theme.fg("dim", contextDisplay);
263
+ } else if (contextPercentValue > 90) {
277
264
  contextStr = theme.fg("error", contextDisplay);
278
265
  } else if (contextPercentValue > 70) {
279
266
  contextStr = theme.fg("warning", contextDisplay);
@@ -25,7 +25,12 @@ import {
25
25
  type PermissionVerdict,
26
26
  redactSensitiveReasonText,
27
27
  } from "../_shared/permissions.js";
28
- import { getPermissions, recordAudit, reloadPermissions } from "../_shared/shell-policy.js";
28
+ import {
29
+ getPermissions,
30
+ isYoloMode,
31
+ recordAudit,
32
+ reloadPermissions,
33
+ } from "../_shared/shell-policy.js";
29
34
 
30
35
  // ── Helper: build expansion vars ─────────────────────────────────────────────
31
36
 
@@ -69,6 +74,14 @@ export default function (pi: ExtensionAPI): void {
69
74
  pi.on("session_start", async (_event, ctx) => {
70
75
  currentCwd = ctx.cwd;
71
76
 
77
+ // Yolo mode banner
78
+ if (isYoloMode()) {
79
+ ctx.ui?.notify(
80
+ "⚡ YOLO mode — auto-approving tool confirmations. Hard denies still enforced.",
81
+ "warning"
82
+ );
83
+ }
84
+
72
85
  // Eagerly load permissions to surface any config warnings at startup
73
86
  const permissions = getPermissions(currentCwd);
74
87
  const totalRules =
@@ -119,6 +132,10 @@ export default function (pi: ExtensionAPI): void {
119
132
  return { block: true, reason: buildBlockReason(verdict) };
120
133
  }
121
134
  if (verdict.action === "ask") {
135
+ if (isYoloMode()) {
136
+ recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
137
+ continue;
138
+ }
122
139
  const confirmed = await confirmPermission(ctx, event.toolName, agent, verdict);
123
140
  if (!confirmed) {
124
141
  recordPermissionAudit(event.toolName, cwd, "blocked", verdict);
@@ -142,16 +159,20 @@ export default function (pi: ExtensionAPI): void {
142
159
  }
143
160
 
144
161
  if (verdict.action === "ask") {
145
- const specifier = getSpecifierDisplay(toolName, input, cwd);
146
- const confirmed = await confirmPermission(ctx, event.toolName, specifier, verdict);
147
- if (!confirmed) {
148
- recordPermissionAudit(event.toolName, cwd, "blocked", verdict);
149
- return {
150
- block: true,
151
- reason: `Permission request denied: ${buildBlockReason(verdict)}`,
152
- };
162
+ if (isYoloMode()) {
163
+ recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
164
+ } else {
165
+ const specifier = getSpecifierDisplay(toolName, input, cwd);
166
+ const confirmed = await confirmPermission(ctx, event.toolName, specifier, verdict);
167
+ if (!confirmed) {
168
+ recordPermissionAudit(event.toolName, cwd, "blocked", verdict);
169
+ return {
170
+ block: true,
171
+ reason: `Permission request denied: ${buildBlockReason(verdict)}`,
172
+ };
173
+ }
174
+ recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
153
175
  }
154
- recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
155
176
  }
156
177
 
157
178
  if (verdict.action === "allow") {
@@ -63,10 +63,10 @@ function registerMockTools(pi: ExtensionAPI): void {
63
63
  * @param entries - Session entries returned by sessionManager.getEntries
64
64
  * @returns Context object compatible with extension handlers
65
65
  */
66
- function createContext(entries: unknown[] = []): ExtensionContext {
66
+ function createContext(entries: unknown[] = [], hasUI = true): ExtensionContext {
67
67
  return {
68
68
  cwd: process.cwd(),
69
- hasUI: true,
69
+ hasUI,
70
70
  ui: {
71
71
  notify() {},
72
72
  setStatus() {},
@@ -180,4 +180,34 @@ describe("plan-mode strict readonly enforcement", () => {
180
180
  );
181
181
  expect(blockedResult).toMatchObject({ block: true });
182
182
  });
183
+
184
+ test("auto-enable only triggers for interactive UI input", async () => {
185
+ const [result] = await harness.fireEvent(
186
+ "input",
187
+ { source: "interactive", text: "plan only fix auth" },
188
+ createContext([], true)
189
+ );
190
+
191
+ expect(result).toEqual({ action: "transform", text: "fix auth" });
192
+ expect(harness.api.getActiveTools()).toEqual(
193
+ PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
194
+ );
195
+ });
196
+
197
+ test("auto-enable ignores headless or non-interactive input", async () => {
198
+ const [headlessResult] = await harness.fireEvent(
199
+ "input",
200
+ { source: "interactive", text: "plan only fix auth" },
201
+ createContext([], false)
202
+ );
203
+ const [rpcResult] = await harness.fireEvent(
204
+ "input",
205
+ { source: "rpc", text: "plan only fix auth" },
206
+ createContext([], true)
207
+ );
208
+
209
+ expect(headlessResult).toEqual({ action: "continue" });
210
+ expect(rpcResult).toEqual({ action: "continue" });
211
+ expect(harness.api.getActiveTools()).toEqual([...BASELINE_TOOLS]);
212
+ });
183
213
  });
@@ -378,13 +378,18 @@ Use action "enable" to enter plan mode, "disable" to exit, or "status" to check
378
378
  }
379
379
  });
380
380
 
381
- // Auto-enable plan mode when user expresses planning intent in natural language
381
+ // Auto-enable plan mode when a human interactive session explicitly signals planning intent.
382
382
  pi.on("input", async (event, ctx) => {
383
383
  // No-op if already in plan mode
384
384
  if (planModeEnabled) {
385
385
  return { action: "continue" as const };
386
386
  }
387
387
 
388
+ // Headless/orchestrated prompts should never toggle workflow modes via string matching.
389
+ if (!ctx.hasUI || event.source !== "interactive") {
390
+ return { action: "continue" as const };
391
+ }
392
+
388
393
  if (!detectPlanIntent(event.text)) {
389
394
  return { action: "continue" as const };
390
395
  }
@@ -395,7 +395,8 @@ WHEN TO USE:
395
395
 
396
396
  WHEN NOT TO USE:
397
397
  - The user already ran the command themselves
398
- - You want to start a new session (suggest the user run /clear instead)`,
398
+ - You want to start a new session (suggest the user run /clear instead)
399
+ - Context usage is below 80% — there is no need to compact proactively. Do NOT compact between tasks "just in case". Compaction destroys conversation history and should only happen when the context window is nearly full.`,
399
400
  parameters: Type.Object({
400
401
  command: Type.String({
401
402
  description:
@@ -490,6 +491,34 @@ WHEN NOT TO USE:
490
491
  }
491
492
 
492
493
  case "compact": {
494
+ // Guard: reject model-initiated compact when context usage is low.
495
+ // The model frequently compacts proactively at 15-30% usage, wasting
496
+ // context and losing valuable conversation history. Only allow
497
+ // programmatic compact when usage exceeds 80% of the context window.
498
+ const compactUsage = ctx.getContextUsage?.();
499
+ if (
500
+ compactUsage &&
501
+ compactUsage.tokens !== null &&
502
+ compactUsage.tokens > 0 &&
503
+ compactUsage.contextWindow > 0
504
+ ) {
505
+ const usagePercent = (compactUsage.tokens / compactUsage.contextWindow) * 100;
506
+ if (usagePercent < 80) {
507
+ return {
508
+ content: [
509
+ {
510
+ type: "text",
511
+ text:
512
+ `Context usage is only ${Math.round(usagePercent)}% — compaction is not needed yet. ` +
513
+ "The session has plenty of context space remaining. " +
514
+ "Continue working normally; compaction will happen automatically when needed.",
515
+ },
516
+ ],
517
+ details: { command, rejected: true, usagePercent },
518
+ };
519
+ }
520
+ }
521
+
493
522
  // Don't call ctx.compact() here — it aborts the agent mid-tool-call,
494
523
  // orphaning the tool execution spinner (plan 95/98). Defer to a
495
524
  // proven turn_end boundary so the tool completes normally first.
@@ -5,7 +5,14 @@ import {
5
5
  createWatchdogHeartbeatState,
6
6
  evaluateWatchdogStatus,
7
7
  type ForegroundWatchdogThresholds,
8
+ isWatchdogHeartbeatEventType,
8
9
  recordWatchdogHeartbeat,
10
+ recordWatchdogToolCallEnd,
11
+ recordWatchdogToolCallStart,
12
+ resolveForegroundWatchdogThresholds,
13
+ SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV,
14
+ SUBAGENT_STARTUP_TIMEOUT_MS_ENV,
15
+ SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV,
9
16
  terminateProcessWithGrace,
10
17
  } from "../process.js";
11
18
 
@@ -13,6 +20,7 @@ const TEST_THRESHOLDS: ForegroundWatchdogThresholds = {
13
20
  inactivityTimeoutMs: 2_000,
14
21
  killGraceMs: 50,
15
22
  startupTimeoutMs: 1_000,
23
+ toolExecutionTimeoutMs: 8_000,
16
24
  };
17
25
 
18
26
  interface ManualTimer {
@@ -102,6 +110,38 @@ describe("foreground subagent liveness watchdog", () => {
102
110
  expect(stalledStatus.phase).toBe("inactivity");
103
111
  });
104
112
 
113
+ it("widens the timeout while a tool call is still running", () => {
114
+ let state = createWatchdogHeartbeatState(0);
115
+ state = recordWatchdogToolCallStart(state, 500);
116
+ expect(evaluateWatchdogStatus(state, 6_000, TEST_THRESHOLDS).kind).toBe("healthy");
117
+
118
+ const stalledStatus = evaluateWatchdogStatus(state, 8_600, TEST_THRESHOLDS);
119
+ expect(stalledStatus.kind).toBe("stalled");
120
+ if (stalledStatus.kind !== "stalled") return;
121
+ expect(stalledStatus.phase).toBe("tool_execution");
122
+
123
+ state = recordWatchdogToolCallEnd(state, 8_600);
124
+ expect(state.activeToolCalls).toBe(0);
125
+ });
126
+
127
+ it("treats message updates and tool execution events as heartbeats", () => {
128
+ expect(isWatchdogHeartbeatEventType("message_update")).toBe(true);
129
+ expect(isWatchdogHeartbeatEventType("tool_execution_start")).toBe(true);
130
+ expect(isWatchdogHeartbeatEventType("tool_execution_end")).toBe(true);
131
+ expect(isWatchdogHeartbeatEventType("tool_result_end")).toBe(false);
132
+ });
133
+
134
+ it("applies env overrides for watchdog thresholds", () => {
135
+ const thresholds = resolveForegroundWatchdogThresholds({
136
+ [SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV]: "7000",
137
+ [SUBAGENT_STARTUP_TIMEOUT_MS_ENV]: "3000",
138
+ [SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV]: "11000",
139
+ });
140
+ expect(thresholds.inactivityTimeoutMs).toBe(7_000);
141
+ expect(thresholds.startupTimeoutMs).toBe(3_000);
142
+ expect(thresholds.toolExecutionTimeoutMs).toBe(11_000);
143
+ });
144
+
105
145
  it("stalled termination escalates and resolves without hanging", async () => {
106
146
  const state = createWatchdogHeartbeatState(0);
107
147
  const stalledStatus = evaluateWatchdogStatus(state, 1_001, TEST_THRESHOLDS);
@@ -134,8 +174,7 @@ describe("foreground subagent liveness watchdog", () => {
134
174
  expect(signals).toEqual(["SIGTERM", "SIGKILL"]);
135
175
  expect(resolvedCode).toBe(1);
136
176
  expect(result.stopReason).toBe("stalled");
137
- expect(result.errorMessage).toContain(
138
- "interactive confirmation path unavailable in subagent JSON mode"
139
- );
177
+ expect(result.errorMessage).toContain("slow provider startup");
178
+ expect(result.errorMessage).toContain("TALLOW_SUBAGENT_*");
140
179
  });
141
180
  });
@@ -448,10 +448,12 @@ export interface ForegroundWatchdogThresholds {
448
448
  readonly inactivityTimeoutMs: number;
449
449
  readonly killGraceMs: number;
450
450
  readonly startupTimeoutMs: number;
451
+ readonly toolExecutionTimeoutMs: number;
451
452
  }
452
453
 
453
454
  /** Heartbeat state tracked by the foreground subagent liveness watchdog. */
454
455
  export interface WatchdogHeartbeatState {
456
+ readonly activeToolCalls: number;
455
457
  readonly lastHeartbeatAtMs: number | null;
456
458
  readonly startedAtMs: number;
457
459
  }
@@ -462,20 +464,86 @@ export type WatchdogStatus =
462
464
  | {
463
465
  readonly elapsedMs: number;
464
466
  readonly kind: "stalled";
465
- readonly phase: "inactivity" | "startup";
467
+ readonly phase: "inactivity" | "startup" | "tool_execution";
466
468
  readonly timeoutMs: number;
467
469
  };
468
470
 
471
+ /** Env var overriding the foreground startup timeout. */
472
+ export const SUBAGENT_STARTUP_TIMEOUT_MS_ENV = "TALLOW_SUBAGENT_STARTUP_TIMEOUT_MS";
473
+
474
+ /** Env var overriding the foreground inactivity timeout when no tool is active. */
475
+ export const SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV = "TALLOW_SUBAGENT_INACTIVITY_TIMEOUT_MS";
476
+
477
+ /** Env var overriding the foreground timeout while a tool call is still running. */
478
+ export const SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV = "TALLOW_SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS";
479
+
480
+ /** Env var overriding the SIGTERM → SIGKILL grace window for stalled workers. */
481
+ export const SUBAGENT_WATCHDOG_KILL_GRACE_MS_ENV = "TALLOW_SUBAGENT_WATCHDOG_KILL_GRACE_MS";
482
+
469
483
  /** Default watchdog thresholds used by foreground subagents in runSingleAgent. */
470
484
  export const FOREGROUND_WATCHDOG_THRESHOLDS: ForegroundWatchdogThresholds = {
471
- inactivityTimeoutMs: 90_000,
485
+ inactivityTimeoutMs: 180_000,
472
486
  killGraceMs: 5_000,
473
- startupTimeoutMs: 30_000,
487
+ startupTimeoutMs: 60_000,
488
+ toolExecutionTimeoutMs: 600_000,
474
489
  };
475
490
 
476
491
  /** How often the foreground watchdog checks for stalled subagents. */
477
492
  const FOREGROUND_WATCHDOG_CHECK_INTERVAL_MS = 500;
478
493
 
494
+ /** Foreground event types that count as liveness without changing tool-call state. */
495
+ const WATCHDOG_HEARTBEAT_EVENT_TYPES = new Set([
496
+ "message_end",
497
+ "message_update",
498
+ "tool_execution_end",
499
+ "tool_execution_start",
500
+ ]);
501
+
502
+ /**
503
+ * Parse a positive millisecond timeout override.
504
+ * @param rawValue - Raw env value
505
+ * @returns Parsed timeout in milliseconds, or undefined when invalid
506
+ */
507
+ function parseTimeoutOverrideMs(rawValue: string | undefined): number | undefined {
508
+ if (!rawValue) return undefined;
509
+ const parsed = Number.parseInt(rawValue, 10);
510
+ if (Number.isNaN(parsed) || !Number.isFinite(parsed) || parsed <= 0) return undefined;
511
+ return parsed;
512
+ }
513
+
514
+ /**
515
+ * Resolve effective watchdog thresholds from env overrides.
516
+ * @param env - Environment lookup map
517
+ * @returns Watchdog thresholds used for this foreground worker
518
+ */
519
+ export function resolveForegroundWatchdogThresholds(
520
+ env: EnvLookup = process.env
521
+ ): ForegroundWatchdogThresholds {
522
+ return {
523
+ inactivityTimeoutMs:
524
+ parseTimeoutOverrideMs(env[SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV]) ??
525
+ FOREGROUND_WATCHDOG_THRESHOLDS.inactivityTimeoutMs,
526
+ killGraceMs:
527
+ parseTimeoutOverrideMs(env[SUBAGENT_WATCHDOG_KILL_GRACE_MS_ENV]) ??
528
+ FOREGROUND_WATCHDOG_THRESHOLDS.killGraceMs,
529
+ startupTimeoutMs:
530
+ parseTimeoutOverrideMs(env[SUBAGENT_STARTUP_TIMEOUT_MS_ENV]) ??
531
+ FOREGROUND_WATCHDOG_THRESHOLDS.startupTimeoutMs,
532
+ toolExecutionTimeoutMs:
533
+ parseTimeoutOverrideMs(env[SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV]) ??
534
+ FOREGROUND_WATCHDOG_THRESHOLDS.toolExecutionTimeoutMs,
535
+ };
536
+ }
537
+
538
+ /**
539
+ * Return whether an event type counts as watchdog progress.
540
+ * @param eventType - Raw child-process event type
541
+ * @returns True when the event should refresh liveness
542
+ */
543
+ export function isWatchdogHeartbeatEventType(eventType: string): boolean {
544
+ return WATCHDOG_HEARTBEAT_EVENT_TYPES.has(eventType);
545
+ }
546
+
479
547
  /**
480
548
  * Create initial watchdog heartbeat state.
481
549
  * @param nowMs - Current wall-clock timestamp in milliseconds
@@ -483,6 +551,7 @@ const FOREGROUND_WATCHDOG_CHECK_INTERVAL_MS = 500;
483
551
  */
484
552
  export function createWatchdogHeartbeatState(nowMs: number): WatchdogHeartbeatState {
485
553
  return {
554
+ activeToolCalls: 0,
486
555
  lastHeartbeatAtMs: null,
487
556
  startedAtMs: nowMs,
488
557
  };
@@ -504,6 +573,40 @@ export function recordWatchdogHeartbeat(
504
573
  };
505
574
  }
506
575
 
576
+ /**
577
+ * Record the start of a tool call for watchdog timeout widening.
578
+ * @param state - Existing watchdog heartbeat state
579
+ * @param nowMs - Current wall-clock timestamp in milliseconds
580
+ * @returns Updated heartbeat state
581
+ */
582
+ export function recordWatchdogToolCallStart(
583
+ state: WatchdogHeartbeatState,
584
+ nowMs: number
585
+ ): WatchdogHeartbeatState {
586
+ return {
587
+ activeToolCalls: state.activeToolCalls + 1,
588
+ lastHeartbeatAtMs: nowMs,
589
+ startedAtMs: state.startedAtMs,
590
+ };
591
+ }
592
+
593
+ /**
594
+ * Record the completion of a tool call for watchdog timeout narrowing.
595
+ * @param state - Existing watchdog heartbeat state
596
+ * @param nowMs - Current wall-clock timestamp in milliseconds
597
+ * @returns Updated heartbeat state
598
+ */
599
+ export function recordWatchdogToolCallEnd(
600
+ state: WatchdogHeartbeatState,
601
+ nowMs: number
602
+ ): WatchdogHeartbeatState {
603
+ return {
604
+ activeToolCalls: Math.max(0, state.activeToolCalls - 1),
605
+ lastHeartbeatAtMs: nowMs,
606
+ startedAtMs: state.startedAtMs,
607
+ };
608
+ }
609
+
507
610
  /**
508
611
  * Evaluate current liveness state against watchdog thresholds.
509
612
  * @param state - Current heartbeat state
@@ -530,12 +633,14 @@ export function evaluateWatchdogStatus(
530
633
  }
531
634
 
532
635
  const inactivityElapsedMs = nowMs - state.lastHeartbeatAtMs;
533
- if (inactivityElapsedMs >= thresholds.inactivityTimeoutMs) {
636
+ const timeoutMs =
637
+ state.activeToolCalls > 0 ? thresholds.toolExecutionTimeoutMs : thresholds.inactivityTimeoutMs;
638
+ if (inactivityElapsedMs >= timeoutMs) {
534
639
  return {
535
640
  elapsedMs: inactivityElapsedMs,
536
641
  kind: "stalled",
537
- phase: "inactivity",
538
- timeoutMs: thresholds.inactivityTimeoutMs,
642
+ phase: state.activeToolCalls > 0 ? "tool_execution" : "inactivity",
643
+ timeoutMs,
539
644
  };
540
645
  }
541
646
  return { kind: "healthy" };
@@ -552,9 +657,16 @@ export function createStalledSubagentErrorMessage(
552
657
  const timeoutSeconds = Math.max(1, Math.round(stalledStatus.timeoutMs / 1000));
553
658
  const phaseDescription =
554
659
  stalledStatus.phase === "startup"
555
- ? "no startup heartbeat was received"
556
- : `no heartbeat was received for ${timeoutSeconds}s`;
557
- return `Subagent stalled (${phaseDescription}). Likely deadlock: waiting for an interactive confirmation path unavailable in subagent JSON mode. Action: avoid confirmation-gated steps, pre-authorize required tools, or run this step in the parent agent.`;
660
+ ? "no startup activity was received"
661
+ : stalledStatus.phase === "tool_execution"
662
+ ? `no subagent activity was received for ${timeoutSeconds}s while a tool call was running`
663
+ : `no subagent activity was received for ${timeoutSeconds}s`;
664
+ return (
665
+ `Subagent stalled (${phaseDescription}). Common causes: slow provider startup, long-running tool execution without progress events, ` +
666
+ "or an interactive confirmation path unavailable in subagent JSON mode. " +
667
+ "Action: narrow task scope, avoid confirmation-gated steps, run very long commands in the parent agent, " +
668
+ "or increase TALLOW_SUBAGENT_* timeout env vars when slow work is legitimate."
669
+ );
558
670
  }
559
671
 
560
672
  /**
@@ -1251,6 +1363,7 @@ export async function runSingleAgent(
1251
1363
  if (!foregroundSpawn.ok) {
1252
1364
  throw new Error(foregroundSpawn.reason);
1253
1365
  }
1366
+ const watchdogThresholds = resolveForegroundWatchdogThresholds();
1254
1367
  const exitCode = await new Promise<number>((resolve) => {
1255
1368
  const proc = foregroundSpawn.proc;
1256
1369
  if (!proc.stdout || !proc.stderr) {
@@ -1292,7 +1405,7 @@ export async function runSingleAgent(
1292
1405
  if (stopRequested) return;
1293
1406
  stopRequested = true;
1294
1407
  stopHandle = terminateProcessWithGrace(proc, {
1295
- killGraceMs: FOREGROUND_WATCHDOG_THRESHOLDS.killGraceMs,
1408
+ killGraceMs: watchdogThresholds.killGraceMs,
1296
1409
  onForceResolve: () => {
1297
1410
  settle(1);
1298
1411
  },
@@ -1309,16 +1422,14 @@ export async function runSingleAgent(
1309
1422
  return;
1310
1423
  }
1311
1424
 
1312
- if (
1313
- event.type === "message_end" ||
1314
- event.type === "tool_call_start" ||
1315
- event.type === "tool_result_end"
1316
- ) {
1317
- heartbeatState = recordWatchdogHeartbeat(heartbeatState, Date.now());
1425
+ const nowMs = Date.now();
1426
+ if (isWatchdogHeartbeatEventType(String(event.type))) {
1427
+ heartbeatState = recordWatchdogHeartbeat(heartbeatState, nowMs);
1318
1428
  }
1319
1429
 
1320
1430
  // Emit subagent_tool_call when tool starts
1321
1431
  if (event.type === "tool_call_start") {
1432
+ heartbeatState = recordWatchdogToolCallStart(heartbeatState, nowMs);
1322
1433
  fgTurnCount++;
1323
1434
  // Hard enforcement: kill after maxTurns tool calls
1324
1435
  if (agent.maxTurns && fgTurnCount >= agent.maxTurns) {
@@ -1360,6 +1471,10 @@ export async function runSingleAgent(
1360
1471
  emitUpdate();
1361
1472
  }
1362
1473
 
1474
+ if (event.type === "tool_result_end") {
1475
+ heartbeatState = recordWatchdogToolCallEnd(heartbeatState, nowMs);
1476
+ }
1477
+
1363
1478
  if (event.type === "tool_result_end" && event.message) {
1364
1479
  currentResult.messages.push(event.message as Message);
1365
1480
  // Detect permission denials vs regular errors
@@ -1385,11 +1500,7 @@ export async function runSingleAgent(
1385
1500
 
1386
1501
  watchdogInterval = setInterval(() => {
1387
1502
  if (isResolved || stopRequested) return;
1388
- const status = evaluateWatchdogStatus(
1389
- heartbeatState,
1390
- Date.now(),
1391
- FOREGROUND_WATCHDOG_THRESHOLDS
1392
- );
1503
+ const status = evaluateWatchdogStatus(heartbeatState, Date.now(), watchdogThresholds);
1393
1504
  if (status.kind !== "stalled") return;
1394
1505
  applyStalledClassification(currentResult, status);
1395
1506
  setForegroundSubagentStatus(taskId, "stalled", piEvents);
@@ -3,9 +3,9 @@
3
3
  * corruption tolerance, and session-only mode.
4
4
  */
5
5
  import { afterEach, describe, expect, it } from "bun:test";
6
- import { existsSync, rmSync, writeFileSync } from "node:fs";
6
+ import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
7
7
  import { join } from "node:path";
8
- import { type Task, TaskListStore } from "../state/index.js";
8
+ import { LEGACY_TEAMS_DIR, TASK_GROUPS_DIR, type Task, TaskListStore } from "../state/index.js";
9
9
 
10
10
  /**
11
11
  * Create a minimal task for store tests.
@@ -119,6 +119,30 @@ describe("TaskListStore file-backed mode", () => {
119
119
 
120
120
  expect(ctx.store.isShared).toBe(true);
121
121
  expect(existsSync(ctx.dir)).toBe(true);
122
+ expect(ctx.dir.startsWith(TASK_GROUPS_DIR)).toBe(true);
123
+ });
124
+
125
+ it("migrates a legacy ~/.tallow/teams task directory into task-groups", () => {
126
+ const teamName = `legacy-${Date.now()}`;
127
+ const legacyDir = join(LEGACY_TEAMS_DIR, teamName, "tasks");
128
+ const nextDir = join(TASK_GROUPS_DIR, teamName, "tasks");
129
+ mkdirSync(legacyDir, { recursive: true });
130
+ writeFileSync(join(legacyDir, "1.json"), JSON.stringify(makeTask("1", "From legacy")), "utf-8");
131
+
132
+ const store = new TaskListStore(teamName);
133
+ stores.push({
134
+ cleanup: () => {
135
+ store.deleteAll();
136
+ store.close();
137
+ rmSync(join(nextDir, ".."), { recursive: true, force: true });
138
+ rmSync(join(legacyDir, ".."), { recursive: true, force: true });
139
+ },
140
+ });
141
+
142
+ expect(store.path).toBe(nextDir);
143
+ expect(existsSync(join(nextDir, "1.json"))).toBe(true);
144
+ expect(existsSync(join(legacyDir, "1.json"))).toBe(false);
145
+ expect(store.loadAll()?.[0].subject).toBe("From legacy");
122
146
  });
123
147
 
124
148
  it("saveTask persists and loadAll retrieves", () => {
@@ -1093,7 +1093,7 @@ export function registerTasksExtension(
1093
1093
  case "team": {
1094
1094
  const current = store.isShared ? process.env.PI_TEAM_NAME : "(none — session-only)";
1095
1095
  const teamPath = store.path ?? "N/A";
1096
- ctx.ui.notify(`Team: ${current}\nPath: ${teamPath}`, "info");
1096
+ ctx.ui.notify(`Shared task group: ${current}\nPath: ${teamPath}`, "info");
1097
1097
  break;
1098
1098
  }
1099
1099
 
@@ -1122,7 +1122,7 @@ export function registerTasksExtension(
1122
1122
  " delete <n> - Delete task n\n" +
1123
1123
  " clear - Clear all tasks\n" +
1124
1124
  " toggle - Show/hide task widget\n" +
1125
- " team - Show current team name and path",
1125
+ " team - Show current shared task group and path",
1126
1126
  "info"
1127
1127
  );
1128
1128
  }