@dungle-scrubs/tallow 0.8.25 → 0.8.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-hardening.d.ts +12 -0
- package/dist/auth-hardening.d.ts.map +1 -1
- package/dist/auth-hardening.js +30 -7
- package/dist/auth-hardening.js.map +1 -1
- package/dist/cli.js +5 -0
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +1 -1
- package/dist/config.js +1 -1
- package/dist/install.js +2 -2
- package/dist/install.js.map +1 -1
- package/dist/interactive-mode-patch.d.ts.map +1 -1
- package/dist/interactive-mode-patch.js +119 -7
- package/dist/interactive-mode-patch.js.map +1 -1
- package/dist/model-metadata-overrides.d.ts +19 -0
- package/dist/model-metadata-overrides.d.ts.map +1 -0
- package/dist/model-metadata-overrides.js +38 -0
- package/dist/model-metadata-overrides.js.map +1 -0
- package/dist/sdk.d.ts +2 -0
- package/dist/sdk.d.ts.map +1 -1
- package/dist/sdk.js +28 -1
- package/dist/sdk.js.map +1 -1
- package/extensions/__integration__/teams-runtime.test.ts +22 -1
- package/extensions/_shared/__tests__/shell-policy.test.ts +197 -0
- package/extensions/_shared/shell-policy.ts +27 -0
- package/extensions/background-task-tool/index.ts +2 -1
- package/extensions/bash-tool-enhanced/index.ts +2 -1
- package/extensions/custom-footer/__tests__/index.test.ts +29 -0
- package/extensions/custom-footer/context-display.ts +49 -0
- package/extensions/custom-footer/index.ts +10 -23
- package/extensions/permissions/index.ts +31 -10
- package/extensions/plan-mode-tool/__tests__/index.test.ts +32 -2
- package/extensions/plan-mode-tool/index.ts +6 -1
- package/extensions/slash-command-bridge/index.ts +30 -1
- package/extensions/subagent-tool/__tests__/process-liveness.test.ts +42 -3
- package/extensions/subagent-tool/process.ts +132 -21
- package/extensions/tasks/__tests__/store.test.ts +26 -2
- package/extensions/tasks/commands/register-tasks-extension.ts +2 -2
- package/extensions/tasks/index.ts +5 -5
- package/extensions/tasks/state/index.ts +90 -36
- package/extensions/teams-tool/__tests__/archive-store.test.ts +98 -0
- package/extensions/teams-tool/__tests__/peer-messaging.test.ts +26 -0
- package/extensions/teams-tool/archive-store.ts +200 -0
- package/extensions/teams-tool/sessions/spawn.ts +244 -71
- package/extensions/teams-tool/tools/register-extension.ts +146 -105
- package/extensions/teams-tool/tools/teammate-tools.ts +43 -1
- package/package.json +4 -4
- package/skills/tallow-expert/SKILL.md +1 -1
- package/templates/agents/architect.md +13 -5
- package/templates/agents/debug.md +3 -3
- package/templates/agents/explore.md +9 -2
- package/templates/agents/refactor.md +2 -2
- package/templates/agents/scout.md +3 -2
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { ContextUsage } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Formats token counts with k/M suffixes for readability.
|
|
5
|
+
*
|
|
6
|
+
* @param count - Token count to format
|
|
7
|
+
* @returns Formatted string (e.g., "1.2k", "5M")
|
|
8
|
+
*/
|
|
9
|
+
function formatTokens(count: number): string {
|
|
10
|
+
if (count < 1000) return count.toString();
|
|
11
|
+
if (count < 10_000) return `${(count / 1000).toFixed(1)}k`;
|
|
12
|
+
if (count < 1_000_000) return `${Math.round(count / 1000)}k`;
|
|
13
|
+
if (count < 10_000_000) return `${(count / 1_000_000).toFixed(1)}M`;
|
|
14
|
+
return `${Math.round(count / 1_000_000)}M`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Formats footer context usage without reusing stale pre-compaction token counts.
|
|
19
|
+
*
|
|
20
|
+
* `ctx.getContextUsage()` intentionally returns `tokens: null` after compaction
|
|
21
|
+
* until a fresh assistant response arrives. The footer must preserve that
|
|
22
|
+
* unknown state instead of showing a bogus percentage from stale usage data.
|
|
23
|
+
*
|
|
24
|
+
* @param usage - Current context usage snapshot, if available
|
|
25
|
+
* @param fallbackContextWindow - Active model context window when usage is unavailable
|
|
26
|
+
* @param autoCompactEnabled - Whether to append the auto-compaction indicator
|
|
27
|
+
* @returns Display text plus raw percentage for severity coloring
|
|
28
|
+
*/
|
|
29
|
+
export function formatContextUsageDisplay(
|
|
30
|
+
usage: ContextUsage | undefined,
|
|
31
|
+
fallbackContextWindow: number,
|
|
32
|
+
autoCompactEnabled: boolean
|
|
33
|
+
): { readonly percent: number | null; readonly text: string } {
|
|
34
|
+
const autoIndicator = autoCompactEnabled ? " (auto)" : "";
|
|
35
|
+
const contextWindow = usage?.contextWindow ?? fallbackContextWindow;
|
|
36
|
+
const tokens = usage ? usage.tokens : 0;
|
|
37
|
+
|
|
38
|
+
if (contextWindow <= 0) {
|
|
39
|
+
return { percent: null, text: `?/?${autoIndicator}` };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const windowText = formatTokens(contextWindow);
|
|
43
|
+
if (tokens === null) {
|
|
44
|
+
return { percent: null, text: `?/${windowText}${autoIndicator}` };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const percent = (tokens / contextWindow) * 100;
|
|
48
|
+
return { percent, text: `${percent.toFixed(1)}%/${windowText}${autoIndicator}` };
|
|
49
|
+
}
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
20
20
|
import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui";
|
|
21
21
|
import { runGitCommandSync } from "../_shared/shell-policy.js";
|
|
22
|
+
import { formatContextUsageDisplay } from "./context-display.js";
|
|
22
23
|
|
|
23
24
|
/** Cached git repository state for the footer display. */
|
|
24
25
|
interface GitState {
|
|
@@ -204,26 +205,12 @@ export default function customFooterExtension(pi: ExtensionAPI): void {
|
|
|
204
205
|
}
|
|
205
206
|
}
|
|
206
207
|
|
|
207
|
-
|
|
208
|
-
const
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
(e) =>
|
|
214
|
-
e.type === "message" &&
|
|
215
|
-
e.message.role === "assistant" &&
|
|
216
|
-
(e.message as unknown as Record<string, string>).stopReason !== "aborted"
|
|
217
|
-
);
|
|
218
|
-
|
|
219
|
-
let contextTokens = 0;
|
|
220
|
-
if (lastAssistant?.type === "message" && lastAssistant.message.role === "assistant") {
|
|
221
|
-
const u = lastAssistant.message.usage;
|
|
222
|
-
contextTokens = u.input + u.output + u.cacheRead + u.cacheWrite;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
const contextWindow = model?.contextWindow || 0;
|
|
226
|
-
const contextPercentValue = contextWindow > 0 ? (contextTokens / contextWindow) * 100 : 0;
|
|
208
|
+
const contextUsage = extensionCtx.getContextUsage();
|
|
209
|
+
const { percent: contextPercentValue, text: contextDisplay } = formatContextUsageDisplay(
|
|
210
|
+
contextUsage,
|
|
211
|
+
model?.contextWindow ?? 0,
|
|
212
|
+
autoCompactEnabled
|
|
213
|
+
);
|
|
227
214
|
|
|
228
215
|
// Build path (replace home with ~)
|
|
229
216
|
let pwd = process.cwd();
|
|
@@ -270,10 +257,10 @@ export default function customFooterExtension(pi: ExtensionAPI): void {
|
|
|
270
257
|
if (totalCost) statsParts.push(`$${totalCost.toFixed(3)}`);
|
|
271
258
|
|
|
272
259
|
// Context percentage with color
|
|
273
|
-
const autoIndicator = autoCompactEnabled ? " (auto)" : "";
|
|
274
|
-
const contextDisplay = `${contextPercentValue.toFixed(1)}%/${formatTokens(contextWindow)}${autoIndicator}`;
|
|
275
260
|
let contextStr: string;
|
|
276
|
-
if (contextPercentValue
|
|
261
|
+
if (contextPercentValue === null) {
|
|
262
|
+
contextStr = theme.fg("dim", contextDisplay);
|
|
263
|
+
} else if (contextPercentValue > 90) {
|
|
277
264
|
contextStr = theme.fg("error", contextDisplay);
|
|
278
265
|
} else if (contextPercentValue > 70) {
|
|
279
266
|
contextStr = theme.fg("warning", contextDisplay);
|
|
@@ -25,7 +25,12 @@ import {
|
|
|
25
25
|
type PermissionVerdict,
|
|
26
26
|
redactSensitiveReasonText,
|
|
27
27
|
} from "../_shared/permissions.js";
|
|
28
|
-
import {
|
|
28
|
+
import {
|
|
29
|
+
getPermissions,
|
|
30
|
+
isYoloMode,
|
|
31
|
+
recordAudit,
|
|
32
|
+
reloadPermissions,
|
|
33
|
+
} from "../_shared/shell-policy.js";
|
|
29
34
|
|
|
30
35
|
// ── Helper: build expansion vars ─────────────────────────────────────────────
|
|
31
36
|
|
|
@@ -69,6 +74,14 @@ export default function (pi: ExtensionAPI): void {
|
|
|
69
74
|
pi.on("session_start", async (_event, ctx) => {
|
|
70
75
|
currentCwd = ctx.cwd;
|
|
71
76
|
|
|
77
|
+
// Yolo mode banner
|
|
78
|
+
if (isYoloMode()) {
|
|
79
|
+
ctx.ui?.notify(
|
|
80
|
+
"⚡ YOLO mode — auto-approving tool confirmations. Hard denies still enforced.",
|
|
81
|
+
"warning"
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
|
|
72
85
|
// Eagerly load permissions to surface any config warnings at startup
|
|
73
86
|
const permissions = getPermissions(currentCwd);
|
|
74
87
|
const totalRules =
|
|
@@ -119,6 +132,10 @@ export default function (pi: ExtensionAPI): void {
|
|
|
119
132
|
return { block: true, reason: buildBlockReason(verdict) };
|
|
120
133
|
}
|
|
121
134
|
if (verdict.action === "ask") {
|
|
135
|
+
if (isYoloMode()) {
|
|
136
|
+
recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
122
139
|
const confirmed = await confirmPermission(ctx, event.toolName, agent, verdict);
|
|
123
140
|
if (!confirmed) {
|
|
124
141
|
recordPermissionAudit(event.toolName, cwd, "blocked", verdict);
|
|
@@ -142,16 +159,20 @@ export default function (pi: ExtensionAPI): void {
|
|
|
142
159
|
}
|
|
143
160
|
|
|
144
161
|
if (verdict.action === "ask") {
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
162
|
+
if (isYoloMode()) {
|
|
163
|
+
recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
|
|
164
|
+
} else {
|
|
165
|
+
const specifier = getSpecifierDisplay(toolName, input, cwd);
|
|
166
|
+
const confirmed = await confirmPermission(ctx, event.toolName, specifier, verdict);
|
|
167
|
+
if (!confirmed) {
|
|
168
|
+
recordPermissionAudit(event.toolName, cwd, "blocked", verdict);
|
|
169
|
+
return {
|
|
170
|
+
block: true,
|
|
171
|
+
reason: `Permission request denied: ${buildBlockReason(verdict)}`,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
|
|
153
175
|
}
|
|
154
|
-
recordPermissionAudit(event.toolName, cwd, "confirmed", verdict);
|
|
155
176
|
}
|
|
156
177
|
|
|
157
178
|
if (verdict.action === "allow") {
|
|
@@ -63,10 +63,10 @@ function registerMockTools(pi: ExtensionAPI): void {
|
|
|
63
63
|
* @param entries - Session entries returned by sessionManager.getEntries
|
|
64
64
|
* @returns Context object compatible with extension handlers
|
|
65
65
|
*/
|
|
66
|
-
function createContext(entries: unknown[] = []): ExtensionContext {
|
|
66
|
+
function createContext(entries: unknown[] = [], hasUI = true): ExtensionContext {
|
|
67
67
|
return {
|
|
68
68
|
cwd: process.cwd(),
|
|
69
|
-
hasUI
|
|
69
|
+
hasUI,
|
|
70
70
|
ui: {
|
|
71
71
|
notify() {},
|
|
72
72
|
setStatus() {},
|
|
@@ -180,4 +180,34 @@ describe("plan-mode strict readonly enforcement", () => {
|
|
|
180
180
|
);
|
|
181
181
|
expect(blockedResult).toMatchObject({ block: true });
|
|
182
182
|
});
|
|
183
|
+
|
|
184
|
+
test("auto-enable only triggers for interactive UI input", async () => {
|
|
185
|
+
const [result] = await harness.fireEvent(
|
|
186
|
+
"input",
|
|
187
|
+
{ source: "interactive", text: "plan only fix auth" },
|
|
188
|
+
createContext([], true)
|
|
189
|
+
);
|
|
190
|
+
|
|
191
|
+
expect(result).toEqual({ action: "transform", text: "fix auth" });
|
|
192
|
+
expect(harness.api.getActiveTools()).toEqual(
|
|
193
|
+
PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
|
|
194
|
+
);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test("auto-enable ignores headless or non-interactive input", async () => {
|
|
198
|
+
const [headlessResult] = await harness.fireEvent(
|
|
199
|
+
"input",
|
|
200
|
+
{ source: "interactive", text: "plan only fix auth" },
|
|
201
|
+
createContext([], false)
|
|
202
|
+
);
|
|
203
|
+
const [rpcResult] = await harness.fireEvent(
|
|
204
|
+
"input",
|
|
205
|
+
{ source: "rpc", text: "plan only fix auth" },
|
|
206
|
+
createContext([], true)
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
expect(headlessResult).toEqual({ action: "continue" });
|
|
210
|
+
expect(rpcResult).toEqual({ action: "continue" });
|
|
211
|
+
expect(harness.api.getActiveTools()).toEqual([...BASELINE_TOOLS]);
|
|
212
|
+
});
|
|
183
213
|
});
|
|
@@ -378,13 +378,18 @@ Use action "enable" to enter plan mode, "disable" to exit, or "status" to check
|
|
|
378
378
|
}
|
|
379
379
|
});
|
|
380
380
|
|
|
381
|
-
// Auto-enable plan mode when
|
|
381
|
+
// Auto-enable plan mode when a human interactive session explicitly signals planning intent.
|
|
382
382
|
pi.on("input", async (event, ctx) => {
|
|
383
383
|
// No-op if already in plan mode
|
|
384
384
|
if (planModeEnabled) {
|
|
385
385
|
return { action: "continue" as const };
|
|
386
386
|
}
|
|
387
387
|
|
|
388
|
+
// Headless/orchestrated prompts should never toggle workflow modes via string matching.
|
|
389
|
+
if (!ctx.hasUI || event.source !== "interactive") {
|
|
390
|
+
return { action: "continue" as const };
|
|
391
|
+
}
|
|
392
|
+
|
|
388
393
|
if (!detectPlanIntent(event.text)) {
|
|
389
394
|
return { action: "continue" as const };
|
|
390
395
|
}
|
|
@@ -395,7 +395,8 @@ WHEN TO USE:
|
|
|
395
395
|
|
|
396
396
|
WHEN NOT TO USE:
|
|
397
397
|
- The user already ran the command themselves
|
|
398
|
-
- You want to start a new session (suggest the user run /clear instead)
|
|
398
|
+
- You want to start a new session (suggest the user run /clear instead)
|
|
399
|
+
- Context usage is below 80% — there is no need to compact proactively. Do NOT compact between tasks "just in case". Compaction destroys conversation history and should only happen when the context window is nearly full.`,
|
|
399
400
|
parameters: Type.Object({
|
|
400
401
|
command: Type.String({
|
|
401
402
|
description:
|
|
@@ -490,6 +491,34 @@ WHEN NOT TO USE:
|
|
|
490
491
|
}
|
|
491
492
|
|
|
492
493
|
case "compact": {
|
|
494
|
+
// Guard: reject model-initiated compact when context usage is low.
|
|
495
|
+
// The model frequently compacts proactively at 15-30% usage, wasting
|
|
496
|
+
// context and losing valuable conversation history. Only allow
|
|
497
|
+
// programmatic compact when usage exceeds 80% of the context window.
|
|
498
|
+
const compactUsage = ctx.getContextUsage?.();
|
|
499
|
+
if (
|
|
500
|
+
compactUsage &&
|
|
501
|
+
compactUsage.tokens !== null &&
|
|
502
|
+
compactUsage.tokens > 0 &&
|
|
503
|
+
compactUsage.contextWindow > 0
|
|
504
|
+
) {
|
|
505
|
+
const usagePercent = (compactUsage.tokens / compactUsage.contextWindow) * 100;
|
|
506
|
+
if (usagePercent < 80) {
|
|
507
|
+
return {
|
|
508
|
+
content: [
|
|
509
|
+
{
|
|
510
|
+
type: "text",
|
|
511
|
+
text:
|
|
512
|
+
`Context usage is only ${Math.round(usagePercent)}% — compaction is not needed yet. ` +
|
|
513
|
+
"The session has plenty of context space remaining. " +
|
|
514
|
+
"Continue working normally; compaction will happen automatically when needed.",
|
|
515
|
+
},
|
|
516
|
+
],
|
|
517
|
+
details: { command, rejected: true, usagePercent },
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
493
522
|
// Don't call ctx.compact() here — it aborts the agent mid-tool-call,
|
|
494
523
|
// orphaning the tool execution spinner (plan 95/98). Defer to a
|
|
495
524
|
// proven turn_end boundary so the tool completes normally first.
|
|
@@ -5,7 +5,14 @@ import {
|
|
|
5
5
|
createWatchdogHeartbeatState,
|
|
6
6
|
evaluateWatchdogStatus,
|
|
7
7
|
type ForegroundWatchdogThresholds,
|
|
8
|
+
isWatchdogHeartbeatEventType,
|
|
8
9
|
recordWatchdogHeartbeat,
|
|
10
|
+
recordWatchdogToolCallEnd,
|
|
11
|
+
recordWatchdogToolCallStart,
|
|
12
|
+
resolveForegroundWatchdogThresholds,
|
|
13
|
+
SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV,
|
|
14
|
+
SUBAGENT_STARTUP_TIMEOUT_MS_ENV,
|
|
15
|
+
SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV,
|
|
9
16
|
terminateProcessWithGrace,
|
|
10
17
|
} from "../process.js";
|
|
11
18
|
|
|
@@ -13,6 +20,7 @@ const TEST_THRESHOLDS: ForegroundWatchdogThresholds = {
|
|
|
13
20
|
inactivityTimeoutMs: 2_000,
|
|
14
21
|
killGraceMs: 50,
|
|
15
22
|
startupTimeoutMs: 1_000,
|
|
23
|
+
toolExecutionTimeoutMs: 8_000,
|
|
16
24
|
};
|
|
17
25
|
|
|
18
26
|
interface ManualTimer {
|
|
@@ -102,6 +110,38 @@ describe("foreground subagent liveness watchdog", () => {
|
|
|
102
110
|
expect(stalledStatus.phase).toBe("inactivity");
|
|
103
111
|
});
|
|
104
112
|
|
|
113
|
+
it("widens the timeout while a tool call is still running", () => {
|
|
114
|
+
let state = createWatchdogHeartbeatState(0);
|
|
115
|
+
state = recordWatchdogToolCallStart(state, 500);
|
|
116
|
+
expect(evaluateWatchdogStatus(state, 6_000, TEST_THRESHOLDS).kind).toBe("healthy");
|
|
117
|
+
|
|
118
|
+
const stalledStatus = evaluateWatchdogStatus(state, 8_600, TEST_THRESHOLDS);
|
|
119
|
+
expect(stalledStatus.kind).toBe("stalled");
|
|
120
|
+
if (stalledStatus.kind !== "stalled") return;
|
|
121
|
+
expect(stalledStatus.phase).toBe("tool_execution");
|
|
122
|
+
|
|
123
|
+
state = recordWatchdogToolCallEnd(state, 8_600);
|
|
124
|
+
expect(state.activeToolCalls).toBe(0);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("treats message updates and tool execution events as heartbeats", () => {
|
|
128
|
+
expect(isWatchdogHeartbeatEventType("message_update")).toBe(true);
|
|
129
|
+
expect(isWatchdogHeartbeatEventType("tool_execution_start")).toBe(true);
|
|
130
|
+
expect(isWatchdogHeartbeatEventType("tool_execution_end")).toBe(true);
|
|
131
|
+
expect(isWatchdogHeartbeatEventType("tool_result_end")).toBe(false);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("applies env overrides for watchdog thresholds", () => {
|
|
135
|
+
const thresholds = resolveForegroundWatchdogThresholds({
|
|
136
|
+
[SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV]: "7000",
|
|
137
|
+
[SUBAGENT_STARTUP_TIMEOUT_MS_ENV]: "3000",
|
|
138
|
+
[SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV]: "11000",
|
|
139
|
+
});
|
|
140
|
+
expect(thresholds.inactivityTimeoutMs).toBe(7_000);
|
|
141
|
+
expect(thresholds.startupTimeoutMs).toBe(3_000);
|
|
142
|
+
expect(thresholds.toolExecutionTimeoutMs).toBe(11_000);
|
|
143
|
+
});
|
|
144
|
+
|
|
105
145
|
it("stalled termination escalates and resolves without hanging", async () => {
|
|
106
146
|
const state = createWatchdogHeartbeatState(0);
|
|
107
147
|
const stalledStatus = evaluateWatchdogStatus(state, 1_001, TEST_THRESHOLDS);
|
|
@@ -134,8 +174,7 @@ describe("foreground subagent liveness watchdog", () => {
|
|
|
134
174
|
expect(signals).toEqual(["SIGTERM", "SIGKILL"]);
|
|
135
175
|
expect(resolvedCode).toBe(1);
|
|
136
176
|
expect(result.stopReason).toBe("stalled");
|
|
137
|
-
expect(result.errorMessage).toContain(
|
|
138
|
-
|
|
139
|
-
);
|
|
177
|
+
expect(result.errorMessage).toContain("slow provider startup");
|
|
178
|
+
expect(result.errorMessage).toContain("TALLOW_SUBAGENT_*");
|
|
140
179
|
});
|
|
141
180
|
});
|
|
@@ -448,10 +448,12 @@ export interface ForegroundWatchdogThresholds {
|
|
|
448
448
|
readonly inactivityTimeoutMs: number;
|
|
449
449
|
readonly killGraceMs: number;
|
|
450
450
|
readonly startupTimeoutMs: number;
|
|
451
|
+
readonly toolExecutionTimeoutMs: number;
|
|
451
452
|
}
|
|
452
453
|
|
|
453
454
|
/** Heartbeat state tracked by the foreground subagent liveness watchdog. */
|
|
454
455
|
export interface WatchdogHeartbeatState {
|
|
456
|
+
readonly activeToolCalls: number;
|
|
455
457
|
readonly lastHeartbeatAtMs: number | null;
|
|
456
458
|
readonly startedAtMs: number;
|
|
457
459
|
}
|
|
@@ -462,20 +464,86 @@ export type WatchdogStatus =
|
|
|
462
464
|
| {
|
|
463
465
|
readonly elapsedMs: number;
|
|
464
466
|
readonly kind: "stalled";
|
|
465
|
-
readonly phase: "inactivity" | "startup";
|
|
467
|
+
readonly phase: "inactivity" | "startup" | "tool_execution";
|
|
466
468
|
readonly timeoutMs: number;
|
|
467
469
|
};
|
|
468
470
|
|
|
471
|
+
/** Env var overriding the foreground startup timeout. */
|
|
472
|
+
export const SUBAGENT_STARTUP_TIMEOUT_MS_ENV = "TALLOW_SUBAGENT_STARTUP_TIMEOUT_MS";
|
|
473
|
+
|
|
474
|
+
/** Env var overriding the foreground inactivity timeout when no tool is active. */
|
|
475
|
+
export const SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV = "TALLOW_SUBAGENT_INACTIVITY_TIMEOUT_MS";
|
|
476
|
+
|
|
477
|
+
/** Env var overriding the foreground timeout while a tool call is still running. */
|
|
478
|
+
export const SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV = "TALLOW_SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS";
|
|
479
|
+
|
|
480
|
+
/** Env var overriding the SIGTERM → SIGKILL grace window for stalled workers. */
|
|
481
|
+
export const SUBAGENT_WATCHDOG_KILL_GRACE_MS_ENV = "TALLOW_SUBAGENT_WATCHDOG_KILL_GRACE_MS";
|
|
482
|
+
|
|
469
483
|
/** Default watchdog thresholds used by foreground subagents in runSingleAgent. */
|
|
470
484
|
export const FOREGROUND_WATCHDOG_THRESHOLDS: ForegroundWatchdogThresholds = {
|
|
471
|
-
inactivityTimeoutMs:
|
|
485
|
+
inactivityTimeoutMs: 180_000,
|
|
472
486
|
killGraceMs: 5_000,
|
|
473
|
-
startupTimeoutMs:
|
|
487
|
+
startupTimeoutMs: 60_000,
|
|
488
|
+
toolExecutionTimeoutMs: 600_000,
|
|
474
489
|
};
|
|
475
490
|
|
|
476
491
|
/** How often the foreground watchdog checks for stalled subagents. */
|
|
477
492
|
const FOREGROUND_WATCHDOG_CHECK_INTERVAL_MS = 500;
|
|
478
493
|
|
|
494
|
+
/** Foreground event types that count as liveness without changing tool-call state. */
|
|
495
|
+
const WATCHDOG_HEARTBEAT_EVENT_TYPES = new Set([
|
|
496
|
+
"message_end",
|
|
497
|
+
"message_update",
|
|
498
|
+
"tool_execution_end",
|
|
499
|
+
"tool_execution_start",
|
|
500
|
+
]);
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Parse a positive millisecond timeout override.
|
|
504
|
+
* @param rawValue - Raw env value
|
|
505
|
+
* @returns Parsed timeout in milliseconds, or undefined when invalid
|
|
506
|
+
*/
|
|
507
|
+
function parseTimeoutOverrideMs(rawValue: string | undefined): number | undefined {
|
|
508
|
+
if (!rawValue) return undefined;
|
|
509
|
+
const parsed = Number.parseInt(rawValue, 10);
|
|
510
|
+
if (Number.isNaN(parsed) || !Number.isFinite(parsed) || parsed <= 0) return undefined;
|
|
511
|
+
return parsed;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Resolve effective watchdog thresholds from env overrides.
|
|
516
|
+
* @param env - Environment lookup map
|
|
517
|
+
* @returns Watchdog thresholds used for this foreground worker
|
|
518
|
+
*/
|
|
519
|
+
export function resolveForegroundWatchdogThresholds(
|
|
520
|
+
env: EnvLookup = process.env
|
|
521
|
+
): ForegroundWatchdogThresholds {
|
|
522
|
+
return {
|
|
523
|
+
inactivityTimeoutMs:
|
|
524
|
+
parseTimeoutOverrideMs(env[SUBAGENT_INACTIVITY_TIMEOUT_MS_ENV]) ??
|
|
525
|
+
FOREGROUND_WATCHDOG_THRESHOLDS.inactivityTimeoutMs,
|
|
526
|
+
killGraceMs:
|
|
527
|
+
parseTimeoutOverrideMs(env[SUBAGENT_WATCHDOG_KILL_GRACE_MS_ENV]) ??
|
|
528
|
+
FOREGROUND_WATCHDOG_THRESHOLDS.killGraceMs,
|
|
529
|
+
startupTimeoutMs:
|
|
530
|
+
parseTimeoutOverrideMs(env[SUBAGENT_STARTUP_TIMEOUT_MS_ENV]) ??
|
|
531
|
+
FOREGROUND_WATCHDOG_THRESHOLDS.startupTimeoutMs,
|
|
532
|
+
toolExecutionTimeoutMs:
|
|
533
|
+
parseTimeoutOverrideMs(env[SUBAGENT_TOOL_EXECUTION_TIMEOUT_MS_ENV]) ??
|
|
534
|
+
FOREGROUND_WATCHDOG_THRESHOLDS.toolExecutionTimeoutMs,
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Return whether an event type counts as watchdog progress.
|
|
540
|
+
* @param eventType - Raw child-process event type
|
|
541
|
+
* @returns True when the event should refresh liveness
|
|
542
|
+
*/
|
|
543
|
+
export function isWatchdogHeartbeatEventType(eventType: string): boolean {
|
|
544
|
+
return WATCHDOG_HEARTBEAT_EVENT_TYPES.has(eventType);
|
|
545
|
+
}
|
|
546
|
+
|
|
479
547
|
/**
|
|
480
548
|
* Create initial watchdog heartbeat state.
|
|
481
549
|
* @param nowMs - Current wall-clock timestamp in milliseconds
|
|
@@ -483,6 +551,7 @@ const FOREGROUND_WATCHDOG_CHECK_INTERVAL_MS = 500;
|
|
|
483
551
|
*/
|
|
484
552
|
export function createWatchdogHeartbeatState(nowMs: number): WatchdogHeartbeatState {
|
|
485
553
|
return {
|
|
554
|
+
activeToolCalls: 0,
|
|
486
555
|
lastHeartbeatAtMs: null,
|
|
487
556
|
startedAtMs: nowMs,
|
|
488
557
|
};
|
|
@@ -504,6 +573,40 @@ export function recordWatchdogHeartbeat(
|
|
|
504
573
|
};
|
|
505
574
|
}
|
|
506
575
|
|
|
576
|
+
/**
|
|
577
|
+
* Record the start of a tool call for watchdog timeout widening.
|
|
578
|
+
* @param state - Existing watchdog heartbeat state
|
|
579
|
+
* @param nowMs - Current wall-clock timestamp in milliseconds
|
|
580
|
+
* @returns Updated heartbeat state
|
|
581
|
+
*/
|
|
582
|
+
export function recordWatchdogToolCallStart(
|
|
583
|
+
state: WatchdogHeartbeatState,
|
|
584
|
+
nowMs: number
|
|
585
|
+
): WatchdogHeartbeatState {
|
|
586
|
+
return {
|
|
587
|
+
activeToolCalls: state.activeToolCalls + 1,
|
|
588
|
+
lastHeartbeatAtMs: nowMs,
|
|
589
|
+
startedAtMs: state.startedAtMs,
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/**
|
|
594
|
+
* Record the completion of a tool call for watchdog timeout narrowing.
|
|
595
|
+
* @param state - Existing watchdog heartbeat state
|
|
596
|
+
* @param nowMs - Current wall-clock timestamp in milliseconds
|
|
597
|
+
* @returns Updated heartbeat state
|
|
598
|
+
*/
|
|
599
|
+
export function recordWatchdogToolCallEnd(
|
|
600
|
+
state: WatchdogHeartbeatState,
|
|
601
|
+
nowMs: number
|
|
602
|
+
): WatchdogHeartbeatState {
|
|
603
|
+
return {
|
|
604
|
+
activeToolCalls: Math.max(0, state.activeToolCalls - 1),
|
|
605
|
+
lastHeartbeatAtMs: nowMs,
|
|
606
|
+
startedAtMs: state.startedAtMs,
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
|
|
507
610
|
/**
|
|
508
611
|
* Evaluate current liveness state against watchdog thresholds.
|
|
509
612
|
* @param state - Current heartbeat state
|
|
@@ -530,12 +633,14 @@ export function evaluateWatchdogStatus(
|
|
|
530
633
|
}
|
|
531
634
|
|
|
532
635
|
const inactivityElapsedMs = nowMs - state.lastHeartbeatAtMs;
|
|
533
|
-
|
|
636
|
+
const timeoutMs =
|
|
637
|
+
state.activeToolCalls > 0 ? thresholds.toolExecutionTimeoutMs : thresholds.inactivityTimeoutMs;
|
|
638
|
+
if (inactivityElapsedMs >= timeoutMs) {
|
|
534
639
|
return {
|
|
535
640
|
elapsedMs: inactivityElapsedMs,
|
|
536
641
|
kind: "stalled",
|
|
537
|
-
phase: "inactivity",
|
|
538
|
-
timeoutMs
|
|
642
|
+
phase: state.activeToolCalls > 0 ? "tool_execution" : "inactivity",
|
|
643
|
+
timeoutMs,
|
|
539
644
|
};
|
|
540
645
|
}
|
|
541
646
|
return { kind: "healthy" };
|
|
@@ -552,9 +657,16 @@ export function createStalledSubagentErrorMessage(
|
|
|
552
657
|
const timeoutSeconds = Math.max(1, Math.round(stalledStatus.timeoutMs / 1000));
|
|
553
658
|
const phaseDescription =
|
|
554
659
|
stalledStatus.phase === "startup"
|
|
555
|
-
? "no startup
|
|
556
|
-
:
|
|
557
|
-
|
|
660
|
+
? "no startup activity was received"
|
|
661
|
+
: stalledStatus.phase === "tool_execution"
|
|
662
|
+
? `no subagent activity was received for ${timeoutSeconds}s while a tool call was running`
|
|
663
|
+
: `no subagent activity was received for ${timeoutSeconds}s`;
|
|
664
|
+
return (
|
|
665
|
+
`Subagent stalled (${phaseDescription}). Common causes: slow provider startup, long-running tool execution without progress events, ` +
|
|
666
|
+
"or an interactive confirmation path unavailable in subagent JSON mode. " +
|
|
667
|
+
"Action: narrow task scope, avoid confirmation-gated steps, run very long commands in the parent agent, " +
|
|
668
|
+
"or increase TALLOW_SUBAGENT_* timeout env vars when slow work is legitimate."
|
|
669
|
+
);
|
|
558
670
|
}
|
|
559
671
|
|
|
560
672
|
/**
|
|
@@ -1251,6 +1363,7 @@ export async function runSingleAgent(
|
|
|
1251
1363
|
if (!foregroundSpawn.ok) {
|
|
1252
1364
|
throw new Error(foregroundSpawn.reason);
|
|
1253
1365
|
}
|
|
1366
|
+
const watchdogThresholds = resolveForegroundWatchdogThresholds();
|
|
1254
1367
|
const exitCode = await new Promise<number>((resolve) => {
|
|
1255
1368
|
const proc = foregroundSpawn.proc;
|
|
1256
1369
|
if (!proc.stdout || !proc.stderr) {
|
|
@@ -1292,7 +1405,7 @@ export async function runSingleAgent(
|
|
|
1292
1405
|
if (stopRequested) return;
|
|
1293
1406
|
stopRequested = true;
|
|
1294
1407
|
stopHandle = terminateProcessWithGrace(proc, {
|
|
1295
|
-
killGraceMs:
|
|
1408
|
+
killGraceMs: watchdogThresholds.killGraceMs,
|
|
1296
1409
|
onForceResolve: () => {
|
|
1297
1410
|
settle(1);
|
|
1298
1411
|
},
|
|
@@ -1309,16 +1422,14 @@ export async function runSingleAgent(
|
|
|
1309
1422
|
return;
|
|
1310
1423
|
}
|
|
1311
1424
|
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
event.type === "tool_result_end"
|
|
1316
|
-
) {
|
|
1317
|
-
heartbeatState = recordWatchdogHeartbeat(heartbeatState, Date.now());
|
|
1425
|
+
const nowMs = Date.now();
|
|
1426
|
+
if (isWatchdogHeartbeatEventType(String(event.type))) {
|
|
1427
|
+
heartbeatState = recordWatchdogHeartbeat(heartbeatState, nowMs);
|
|
1318
1428
|
}
|
|
1319
1429
|
|
|
1320
1430
|
// Emit subagent_tool_call when tool starts
|
|
1321
1431
|
if (event.type === "tool_call_start") {
|
|
1432
|
+
heartbeatState = recordWatchdogToolCallStart(heartbeatState, nowMs);
|
|
1322
1433
|
fgTurnCount++;
|
|
1323
1434
|
// Hard enforcement: kill after maxTurns tool calls
|
|
1324
1435
|
if (agent.maxTurns && fgTurnCount >= agent.maxTurns) {
|
|
@@ -1360,6 +1471,10 @@ export async function runSingleAgent(
|
|
|
1360
1471
|
emitUpdate();
|
|
1361
1472
|
}
|
|
1362
1473
|
|
|
1474
|
+
if (event.type === "tool_result_end") {
|
|
1475
|
+
heartbeatState = recordWatchdogToolCallEnd(heartbeatState, nowMs);
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1363
1478
|
if (event.type === "tool_result_end" && event.message) {
|
|
1364
1479
|
currentResult.messages.push(event.message as Message);
|
|
1365
1480
|
// Detect permission denials vs regular errors
|
|
@@ -1385,11 +1500,7 @@ export async function runSingleAgent(
|
|
|
1385
1500
|
|
|
1386
1501
|
watchdogInterval = setInterval(() => {
|
|
1387
1502
|
if (isResolved || stopRequested) return;
|
|
1388
|
-
const status = evaluateWatchdogStatus(
|
|
1389
|
-
heartbeatState,
|
|
1390
|
-
Date.now(),
|
|
1391
|
-
FOREGROUND_WATCHDOG_THRESHOLDS
|
|
1392
|
-
);
|
|
1503
|
+
const status = evaluateWatchdogStatus(heartbeatState, Date.now(), watchdogThresholds);
|
|
1393
1504
|
if (status.kind !== "stalled") return;
|
|
1394
1505
|
applyStalledClassification(currentResult, status);
|
|
1395
1506
|
setForegroundSubagentStatus(taskId, "stalled", piEvents);
|
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
* corruption tolerance, and session-only mode.
|
|
4
4
|
*/
|
|
5
5
|
import { afterEach, describe, expect, it } from "bun:test";
|
|
6
|
-
import { existsSync, rmSync, writeFileSync } from "node:fs";
|
|
6
|
+
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
|
|
7
7
|
import { join } from "node:path";
|
|
8
|
-
import { type Task, TaskListStore } from "../state/index.js";
|
|
8
|
+
import { LEGACY_TEAMS_DIR, TASK_GROUPS_DIR, type Task, TaskListStore } from "../state/index.js";
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* Create a minimal task for store tests.
|
|
@@ -119,6 +119,30 @@ describe("TaskListStore file-backed mode", () => {
|
|
|
119
119
|
|
|
120
120
|
expect(ctx.store.isShared).toBe(true);
|
|
121
121
|
expect(existsSync(ctx.dir)).toBe(true);
|
|
122
|
+
expect(ctx.dir.startsWith(TASK_GROUPS_DIR)).toBe(true);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("migrates a legacy ~/.tallow/teams task directory into task-groups", () => {
|
|
126
|
+
const teamName = `legacy-${Date.now()}`;
|
|
127
|
+
const legacyDir = join(LEGACY_TEAMS_DIR, teamName, "tasks");
|
|
128
|
+
const nextDir = join(TASK_GROUPS_DIR, teamName, "tasks");
|
|
129
|
+
mkdirSync(legacyDir, { recursive: true });
|
|
130
|
+
writeFileSync(join(legacyDir, "1.json"), JSON.stringify(makeTask("1", "From legacy")), "utf-8");
|
|
131
|
+
|
|
132
|
+
const store = new TaskListStore(teamName);
|
|
133
|
+
stores.push({
|
|
134
|
+
cleanup: () => {
|
|
135
|
+
store.deleteAll();
|
|
136
|
+
store.close();
|
|
137
|
+
rmSync(join(nextDir, ".."), { recursive: true, force: true });
|
|
138
|
+
rmSync(join(legacyDir, ".."), { recursive: true, force: true });
|
|
139
|
+
},
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
expect(store.path).toBe(nextDir);
|
|
143
|
+
expect(existsSync(join(nextDir, "1.json"))).toBe(true);
|
|
144
|
+
expect(existsSync(join(legacyDir, "1.json"))).toBe(false);
|
|
145
|
+
expect(store.loadAll()?.[0].subject).toBe("From legacy");
|
|
122
146
|
});
|
|
123
147
|
|
|
124
148
|
it("saveTask persists and loadAll retrieves", () => {
|
|
@@ -1093,7 +1093,7 @@ export function registerTasksExtension(
|
|
|
1093
1093
|
case "team": {
|
|
1094
1094
|
const current = store.isShared ? process.env.PI_TEAM_NAME : "(none — session-only)";
|
|
1095
1095
|
const teamPath = store.path ?? "N/A";
|
|
1096
|
-
ctx.ui.notify(`
|
|
1096
|
+
ctx.ui.notify(`Shared task group: ${current}\nPath: ${teamPath}`, "info");
|
|
1097
1097
|
break;
|
|
1098
1098
|
}
|
|
1099
1099
|
|
|
@@ -1122,7 +1122,7 @@ export function registerTasksExtension(
|
|
|
1122
1122
|
" delete <n> - Delete task n\n" +
|
|
1123
1123
|
" clear - Clear all tasks\n" +
|
|
1124
1124
|
" toggle - Show/hide task widget\n" +
|
|
1125
|
-
" team - Show current
|
|
1125
|
+
" team - Show current shared task group and path",
|
|
1126
1126
|
"info"
|
|
1127
1127
|
);
|
|
1128
1128
|
}
|