@opengsd/gsd-pi 1.1.1-dev.75048e7 → 1.1.1-dev.9f86580
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/resources/.managed-resources-content-hash +1 -1
- package/dist/resources/extensions/browser-tools/engine/managed-gsd-browser.js +18 -2
- package/dist/resources/extensions/browser-tools/engine/selection.js +1 -1
- package/dist/resources/extensions/browser-tools/extension-manifest.json +1 -1
- package/dist/resources/extensions/browser-tools/index.js +29 -2
- package/dist/resources/extensions/browser-tools/web-app-detect.js +52 -0
- package/dist/resources/extensions/gsd/auto/phases.js +45 -3
- package/dist/resources/extensions/gsd/auto/session.js +2 -0
- package/dist/resources/extensions/gsd/auto-dispatch.js +10 -2
- package/dist/resources/extensions/gsd/auto-model-selection.js +26 -0
- package/dist/resources/extensions/gsd/auto-timers.js +24 -10
- package/dist/resources/extensions/gsd/auto.js +26 -4
- package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +29 -21
- package/dist/resources/extensions/gsd/bootstrap/system-context.js +1 -1
- package/dist/resources/extensions/gsd/commands/handlers/auto.js +10 -0
- package/dist/resources/extensions/gsd/commands-mcp-status.js +1 -1
- package/dist/resources/extensions/gsd/config-overlay.js +1 -0
- package/dist/resources/extensions/gsd/context-masker.js +129 -5
- package/dist/resources/extensions/gsd/guided-flow.js +4 -1
- package/dist/resources/extensions/gsd/planner-handoff.js +98 -0
- package/dist/resources/extensions/gsd/preferences-models.js +1 -0
- package/dist/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
- package/dist/resources/extensions/gsd/prompts/run-uat.md +2 -2
- package/dist/resources/extensions/gsd/prompts/system.md +1 -1
- package/dist/resources/extensions/gsd/skill-manifest.js +12 -0
- package/dist/resources/extensions/gsd/tool-contract.js +1 -1
- package/dist/resources/extensions/gsd/tool-presentation-plan.js +19 -2
- package/dist/resources/extensions/gsd/tools/complete-slice.js +28 -1
- package/dist/resources/extensions/gsd/tools/workflow-tool-executors.js +32 -4
- package/dist/resources/extensions/gsd/unit-tool-contracts.js +38 -14
- package/dist/resources/extensions/gsd/workflow-mcp.js +2 -3
- package/dist/resources/extensions/gsd/worktree-manager.js +26 -0
- package/dist/resources/extensions/gsd/worktree-reentry.js +96 -0
- package/dist/resources/extensions/shared/gsd-browser-cli.js +6 -0
- package/dist/web/standalone/.next/BUILD_ID +1 -1
- package/dist/web/standalone/.next/app-path-routes-manifest.json +8 -8
- package/dist/web/standalone/.next/build-manifest.json +2 -2
- package/dist/web/standalone/.next/prerender-manifest.json +3 -3
- package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.html +1 -1
- package/dist/web/standalone/.next/server/app/index.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app-paths-manifest.json +8 -8
- package/dist/web/standalone/.next/server/chunks/8357.js +1 -1
- package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
- package/dist/web/standalone/.next/server/pages/404.html +1 -1
- package/dist/web/standalone/.next/server/pages/500.html +1 -1
- package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
- package/package.json +1 -1
- package/packages/cloud-mcp-gateway/package.json +2 -2
- package/packages/contracts/package.json +1 -1
- package/packages/daemon/package.json +4 -4
- package/packages/gsd-agent-core/package.json +5 -5
- package/packages/gsd-agent-modes/package.json +7 -7
- package/packages/mcp-server/package.json +3 -3
- package/packages/native/package.json +1 -1
- package/packages/pi-agent-core/package.json +1 -1
- package/packages/pi-ai/dist/models.generated.d.ts +158 -2
- package/packages/pi-ai/dist/models.generated.d.ts.map +1 -1
- package/packages/pi-ai/dist/models.generated.js +149 -9
- package/packages/pi-ai/dist/models.generated.js.map +1 -1
- package/packages/pi-ai/dist/providers/transform-messages.d.ts.map +1 -1
- package/packages/pi-ai/dist/providers/transform-messages.js +8 -1
- package/packages/pi-ai/dist/providers/transform-messages.js.map +1 -1
- package/packages/pi-ai/package.json +1 -1
- package/packages/pi-coding-agent/package.json +7 -7
- package/packages/pi-tui/package.json +1 -1
- package/packages/rpc-client/package.json +2 -2
- package/pkg/package.json +1 -1
- package/scripts/install/handoff.js +16 -3
- package/src/resources/extensions/browser-tools/engine/managed-gsd-browser.ts +21 -2
- package/src/resources/extensions/browser-tools/engine/selection.ts +1 -1
- package/src/resources/extensions/browser-tools/extension-manifest.json +1 -1
- package/src/resources/extensions/browser-tools/index.ts +36 -5
- package/src/resources/extensions/browser-tools/tests/browser-engine-selection.test.mjs +2 -2
- package/src/resources/extensions/browser-tools/tests/gsd-browser-launch-config.test.mjs +37 -0
- package/src/resources/extensions/browser-tools/tests/web-app-detect.test.mjs +68 -0
- package/src/resources/extensions/browser-tools/web-app-detect.ts +63 -0
- package/src/resources/extensions/gsd/auto/phases.ts +48 -6
- package/src/resources/extensions/gsd/auto/session.ts +2 -0
- package/src/resources/extensions/gsd/auto-dispatch.ts +34 -2
- package/src/resources/extensions/gsd/auto-model-selection.ts +26 -0
- package/src/resources/extensions/gsd/auto-timers.ts +25 -9
- package/src/resources/extensions/gsd/auto.ts +28 -4
- package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +40 -21
- package/src/resources/extensions/gsd/bootstrap/system-context.ts +1 -1
- package/src/resources/extensions/gsd/commands/handlers/auto.ts +9 -0
- package/src/resources/extensions/gsd/commands-mcp-status.ts +1 -1
- package/src/resources/extensions/gsd/config-overlay.ts +1 -0
- package/src/resources/extensions/gsd/context-masker.ts +152 -5
- package/src/resources/extensions/gsd/guided-flow.ts +4 -1
- package/src/resources/extensions/gsd/planner-handoff.ts +149 -0
- package/src/resources/extensions/gsd/preferences-models.ts +1 -0
- package/src/resources/extensions/gsd/preferences-types.ts +8 -0
- package/src/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
- package/src/resources/extensions/gsd/prompts/run-uat.md +2 -2
- package/src/resources/extensions/gsd/prompts/system.md +1 -1
- package/src/resources/extensions/gsd/skill-manifest.ts +12 -0
- package/src/resources/extensions/gsd/tests/auto-loop.test.ts +99 -0
- package/src/resources/extensions/gsd/tests/auto-model-selection-tool-poisoning.test.ts +66 -4
- package/src/resources/extensions/gsd/tests/auto-supervisor.test.mjs +4 -0
- package/src/resources/extensions/gsd/tests/bundled-skill-triggers.test.ts +9 -0
- package/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts +118 -0
- package/src/resources/extensions/gsd/tests/context-masker.test.ts +56 -1
- package/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts +1 -0
- package/src/resources/extensions/gsd/tests/dispatch-rule-coverage.test.ts +24 -0
- package/src/resources/extensions/gsd/tests/integration/run-uat.test.ts +1 -1
- package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +27 -0
- package/src/resources/extensions/gsd/tests/journal-integration.test.ts +1 -0
- package/src/resources/extensions/gsd/tests/mcp-project-config.test.ts +7 -1
- package/src/resources/extensions/gsd/tests/mcp-status.test.ts +1 -1
- package/src/resources/extensions/gsd/tests/planner-handoff.test.ts +100 -0
- package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +113 -1
- package/src/resources/extensions/gsd/tests/provider-switch-observer.test.ts +55 -0
- package/src/resources/extensions/gsd/tests/runtime-invariant-modules.test.ts +20 -0
- package/src/resources/extensions/gsd/tests/skill-manifest.test.ts +4 -3
- package/src/resources/extensions/gsd/tests/workflow-mcp.test.ts +77 -10
- package/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts +131 -2
- package/src/resources/extensions/gsd/tests/worktree-reentry.test.ts +102 -0
- package/src/resources/extensions/gsd/tool-contract.ts +1 -1
- package/src/resources/extensions/gsd/tool-presentation-plan.ts +21 -2
- package/src/resources/extensions/gsd/tools/complete-slice.ts +29 -1
- package/src/resources/extensions/gsd/tools/workflow-tool-executors.ts +46 -4
- package/src/resources/extensions/gsd/unit-tool-contracts.ts +38 -14
- package/src/resources/extensions/gsd/workflow-mcp.ts +2 -3
- package/src/resources/extensions/gsd/worktree-manager.ts +32 -0
- package/src/resources/extensions/gsd/worktree-reentry.ts +103 -0
- package/src/resources/extensions/shared/gsd-browser-cli.ts +6 -0
- /package/dist/web/standalone/.next/static/{h4TGni4xJzlZjGkxaT6uU → zzYMrKpPGfRQRxSFO32Jr}/_buildManifest.js +0 -0
- /package/dist/web/standalone/.next/static/{h4TGni4xJzlZjGkxaT6uU → zzYMrKpPGfRQRxSFO32Jr}/_ssgManifest.js +0 -0
|
@@ -5,10 +5,17 @@
|
|
|
5
5
|
* Reduces context bloat between compactions with zero LLM overhead.
|
|
6
6
|
* Preserves message ordering, roles, and all assistant/user messages.
|
|
7
7
|
*
|
|
8
|
-
* Operates on
|
|
8
|
+
* Operates on provider payloads after convertToLlm:
|
|
9
|
+
*
|
|
10
|
+
* pi-ai Message[] payloads:
|
|
9
11
|
* - toolResult messages: { role: "toolResult", content: TextContent[] }
|
|
10
12
|
* - bash results are already converted to: { role: "user", content: [{type:"text",text:"..."}] }
|
|
11
13
|
* and start with "Ran `" from bashExecutionToText.
|
|
14
|
+
*
|
|
15
|
+
* OpenAI/Codex Responses payloads:
|
|
16
|
+
* - conversation items live in `input`, not `messages`
|
|
17
|
+
* - tool results are { type: "function_call_output", output: string | content[] }
|
|
18
|
+
* - bash results are user items with input_text content starting with "Ran `"
|
|
12
19
|
*/
|
|
13
20
|
|
|
14
21
|
interface MaskableMessage {
|
|
@@ -20,6 +27,37 @@ interface MaskableMessage {
|
|
|
20
27
|
|
|
21
28
|
const MASK_PLACEHOLDER = "[result masked — within summarized history]";
|
|
22
29
|
const MASK_CONTENT_BLOCK = [{ type: "text" as const, text: MASK_PLACEHOLDER }];
|
|
30
|
+
const RESPONSES_MASK_CONTENT_BLOCK = [{ type: "input_text" as const, text: MASK_PLACEHOLDER }];
|
|
31
|
+
const TRUNCATION_MARKER = "\n…[truncated]";
|
|
32
|
+
|
|
33
|
+
type TextLikeBlock = {
|
|
34
|
+
type?: string;
|
|
35
|
+
text?: unknown;
|
|
36
|
+
[key: string]: unknown;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
interface ResponsesInputItem {
|
|
40
|
+
role?: string;
|
|
41
|
+
type?: string;
|
|
42
|
+
content?: unknown;
|
|
43
|
+
output?: unknown;
|
|
44
|
+
[key: string]: unknown;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function isTextLikeBlock(block: unknown): block is TextLikeBlock {
|
|
48
|
+
return Boolean(block && typeof block === "object" && "text" in block);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function firstTextFromContent(content: unknown): string | undefined {
|
|
52
|
+
if (typeof content === "string") return content;
|
|
53
|
+
if (!Array.isArray(content)) return undefined;
|
|
54
|
+
const first = content.find(isTextLikeBlock);
|
|
55
|
+
return typeof first?.text === "string" ? first.text : undefined;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function isBashResultText(text: string | undefined): boolean {
|
|
59
|
+
return typeof text === "string" && text.startsWith("Ran `");
|
|
60
|
+
}
|
|
23
61
|
|
|
24
62
|
function findTurnBoundary(messages: MaskableMessage[], keepRecentTurns: number): number {
|
|
25
63
|
let turnsSeen = 0;
|
|
@@ -43,10 +81,8 @@ function findTurnBoundary(messages: MaskableMessage[], keepRecentTurns: number):
|
|
|
43
81
|
* The bashExecutionToText format always starts with "Ran `".
|
|
44
82
|
*/
|
|
45
83
|
function isBashResultUserMessage(m: MaskableMessage): boolean {
|
|
46
|
-
if (m.role !== "user"
|
|
47
|
-
|
|
48
|
-
return first && typeof first === "object" && "text" in first &&
|
|
49
|
-
typeof first.text === "string" && first.text.startsWith("Ran `");
|
|
84
|
+
if (m.role !== "user") return false;
|
|
85
|
+
return isBashResultText(firstTextFromContent(m.content));
|
|
50
86
|
}
|
|
51
87
|
|
|
52
88
|
function isMaskableMessage(m: MaskableMessage): boolean {
|
|
@@ -72,3 +108,114 @@ export function createObservationMask(keepRecentTurns: number = 8) {
|
|
|
72
108
|
});
|
|
73
109
|
};
|
|
74
110
|
}
|
|
111
|
+
|
|
112
|
+
function isResponsesBashResultUserItem(item: ResponsesInputItem): boolean {
|
|
113
|
+
if (item.role !== "user") return false;
|
|
114
|
+
return isBashResultText(firstTextFromContent(item.content));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function findResponsesTurnBoundary(items: ResponsesInputItem[], keepRecentTurns: number): number {
|
|
118
|
+
let turnsSeen = 0;
|
|
119
|
+
for (let i = items.length - 1; i >= 0; i--) {
|
|
120
|
+
const item = items[i];
|
|
121
|
+
if (item.role === "user" && !isResponsesBashResultUserItem(item)) {
|
|
122
|
+
turnsSeen++;
|
|
123
|
+
if (turnsSeen >= keepRecentTurns) return i;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return 0;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Observation masking for OpenAI/Codex Responses API payloads.
|
|
131
|
+
*
|
|
132
|
+
* Responses payloads store the conversation under `input` instead of
|
|
133
|
+
* `messages`, with tool results as `function_call_output` items. Keep this
|
|
134
|
+
* separate from createObservationMask so each payload shape stays explicit.
|
|
135
|
+
*/
|
|
136
|
+
export function createResponsesInputObservationMask(keepRecentTurns: number = 8) {
|
|
137
|
+
return (items: ResponsesInputItem[]): ResponsesInputItem[] => {
|
|
138
|
+
const boundary = findResponsesTurnBoundary(items, keepRecentTurns);
|
|
139
|
+
if (boundary === 0) return items;
|
|
140
|
+
|
|
141
|
+
return items.map((item, i) => {
|
|
142
|
+
if (i >= boundary) return item;
|
|
143
|
+
if (item.type === "function_call_output") {
|
|
144
|
+
return { ...item, output: MASK_PLACEHOLDER };
|
|
145
|
+
}
|
|
146
|
+
if (isResponsesBashResultUserItem(item)) {
|
|
147
|
+
return { ...item, content: RESPONSES_MASK_CONTENT_BLOCK };
|
|
148
|
+
}
|
|
149
|
+
return item;
|
|
150
|
+
});
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function truncateText(text: string, maxChars: number): string {
|
|
155
|
+
if (text.length <= maxChars) return text;
|
|
156
|
+
return text.slice(0, maxChars) + TRUNCATION_MARKER;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function truncateTextBlocks(content: unknown, maxChars: number): unknown {
|
|
160
|
+
if (typeof content === "string") {
|
|
161
|
+
return truncateText(content, maxChars);
|
|
162
|
+
}
|
|
163
|
+
if (!Array.isArray(content)) return content;
|
|
164
|
+
|
|
165
|
+
let remaining = maxChars;
|
|
166
|
+
let didTruncate = false;
|
|
167
|
+
const nextBlocks: unknown[] = [];
|
|
168
|
+
|
|
169
|
+
for (const block of content) {
|
|
170
|
+
if (!isTextLikeBlock(block) || typeof block.text !== "string") {
|
|
171
|
+
nextBlocks.push(block);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (remaining <= 0) {
|
|
176
|
+
didTruncate = true;
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const text = block.text;
|
|
181
|
+
if (text.length <= remaining) {
|
|
182
|
+
nextBlocks.push(block);
|
|
183
|
+
remaining -= text.length;
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
nextBlocks.push({ ...block, text: truncateText(text, remaining) });
|
|
188
|
+
remaining = 0;
|
|
189
|
+
didTruncate = true;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return didTruncate ? nextBlocks : content;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function normalizedMaxChars(maxChars: number): number {
|
|
196
|
+
return Number.isFinite(maxChars) && maxChars > 0 ? Math.floor(maxChars) : 800;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export function truncateContextResultMessages(messages: MaskableMessage[], maxChars: number = 800): MaskableMessage[] {
|
|
200
|
+
const limit = normalizedMaxChars(maxChars);
|
|
201
|
+
return messages.map((message) => {
|
|
202
|
+
if (!isMaskableMessage(message)) return message;
|
|
203
|
+
const content = truncateTextBlocks(message.content, limit);
|
|
204
|
+
return content === message.content ? message : { ...message, content };
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
export function truncateResponsesInputResultItems(items: ResponsesInputItem[], maxChars: number = 800): ResponsesInputItem[] {
|
|
209
|
+
const limit = normalizedMaxChars(maxChars);
|
|
210
|
+
return items.map((item) => {
|
|
211
|
+
if (item.type === "function_call_output") {
|
|
212
|
+
const output = truncateTextBlocks(item.output, limit);
|
|
213
|
+
return output === item.output ? item : { ...item, output };
|
|
214
|
+
}
|
|
215
|
+
if (isResponsesBashResultUserItem(item)) {
|
|
216
|
+
const content = truncateTextBlocks(item.content, limit);
|
|
217
|
+
return content === item.content ? item : { ...item, content };
|
|
218
|
+
}
|
|
219
|
+
return item;
|
|
220
|
+
});
|
|
221
|
+
}
|
|
@@ -1158,7 +1158,10 @@ async function dispatchWorkflow(
|
|
|
1158
1158
|
? ctx.modelRegistry.getProviderAuthMode(ctx.model.provider)
|
|
1159
1159
|
: undefined,
|
|
1160
1160
|
baseUrl: result.appliedModel?.baseUrl ?? ctx.model?.baseUrl,
|
|
1161
|
-
|
|
1161
|
+
// Guided flow starts the MCP workflow server as part of dispatch, so the
|
|
1162
|
+
// parent session's activeTools doesn't include MCP tools yet. The MCP
|
|
1163
|
+
// launch config check (detectWorkflowMcpLaunchConfig) is the right gate
|
|
1164
|
+
// here — not whether MCP tools are pre-registered in the parent session.
|
|
1162
1165
|
},
|
|
1163
1166
|
);
|
|
1164
1167
|
if (compatibilityError) {
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// Project/App: gsd-pi
|
|
2
|
+
// File Purpose: Optional gsd-planner handoff after milestone planning.
|
|
3
|
+
|
|
4
|
+
import { spawn as spawnChild, type ChildProcess, type SpawnOptions } from "node:child_process";
|
|
5
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
|
|
8
|
+
import { gsdRoot } from "./paths.js";
|
|
9
|
+
|
|
10
|
+
export const PLANNER_HANDOFF_RULE_NAME = "planning review handoff -> gsd-planner";
|
|
11
|
+
export const GSD_PLANNER_COMMAND = "gsd-planner";
|
|
12
|
+
|
|
13
|
+
export interface GsdPlannerSpawnPlan {
|
|
14
|
+
command: string;
|
|
15
|
+
args: string[];
|
|
16
|
+
cwd: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface GsdPlannerLaunchInput {
|
|
20
|
+
basePath: string;
|
|
21
|
+
milestoneId?: string | null;
|
|
22
|
+
extraArgs?: string[];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export type GsdPlannerLaunchResult =
|
|
26
|
+
| { status: "launched"; plan: GsdPlannerSpawnPlan }
|
|
27
|
+
| { status: "failed"; plan: GsdPlannerSpawnPlan; error: Error };
|
|
28
|
+
|
|
29
|
+
type SpawnLike = (
|
|
30
|
+
command: string,
|
|
31
|
+
args: readonly string[],
|
|
32
|
+
options: SpawnOptions,
|
|
33
|
+
) => ChildProcess;
|
|
34
|
+
|
|
35
|
+
export interface GsdPlannerLaunchDeps {
|
|
36
|
+
spawn?: SpawnLike;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function handoffDir(basePath: string): string {
|
|
40
|
+
return join(gsdRoot(basePath), "runtime", "planner-handoffs");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function safeMilestoneFileSegment(milestoneId: string): string {
|
|
44
|
+
return milestoneId.replace(/[^A-Za-z0-9._-]/g, "_") || "unknown";
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function handoffMarkerPath(basePath: string, milestoneId: string): string {
|
|
48
|
+
return join(handoffDir(basePath), `${safeMilestoneFileSegment(milestoneId)}.json`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function hasPlannerHandoffBeenOffered(basePath: string, milestoneId: string): boolean {
|
|
52
|
+
return existsSync(handoffMarkerPath(basePath, milestoneId));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function markPlannerHandoffOffered(
|
|
56
|
+
basePath: string,
|
|
57
|
+
milestoneId: string,
|
|
58
|
+
source: "auto" | "command" = "auto",
|
|
59
|
+
): void {
|
|
60
|
+
mkdirSync(handoffDir(basePath), { recursive: true });
|
|
61
|
+
writeFileSync(
|
|
62
|
+
handoffMarkerPath(basePath, milestoneId),
|
|
63
|
+
JSON.stringify({
|
|
64
|
+
milestoneId,
|
|
65
|
+
source,
|
|
66
|
+
offeredAt: new Date().toISOString(),
|
|
67
|
+
}, null, 2) + "\n",
|
|
68
|
+
"utf-8",
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function buildGsdPlannerSpawnPlan(input: GsdPlannerLaunchInput): GsdPlannerSpawnPlan {
|
|
73
|
+
const args = ["--project", input.basePath];
|
|
74
|
+
const milestoneId = input.milestoneId?.trim();
|
|
75
|
+
if (milestoneId) args.push("--milestone", milestoneId);
|
|
76
|
+
args.push(...(input.extraArgs ?? []));
|
|
77
|
+
return {
|
|
78
|
+
command: GSD_PLANNER_COMMAND,
|
|
79
|
+
args,
|
|
80
|
+
cwd: input.basePath,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function quoteArg(arg: string): string {
|
|
85
|
+
return /^[A-Za-z0-9_./:=@+-]+$/.test(arg) ? arg : JSON.stringify(arg);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function formatGsdPlannerCommand(plan: GsdPlannerSpawnPlan): string {
|
|
89
|
+
return [plan.command, ...plan.args].map(quoteArg).join(" ");
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function launchGsdPlanner(
|
|
93
|
+
input: GsdPlannerLaunchInput,
|
|
94
|
+
deps: GsdPlannerLaunchDeps = {},
|
|
95
|
+
): Promise<GsdPlannerLaunchResult> {
|
|
96
|
+
const plan = buildGsdPlannerSpawnPlan(input);
|
|
97
|
+
const spawn = deps.spawn ?? spawnChild;
|
|
98
|
+
|
|
99
|
+
let child: ChildProcess;
|
|
100
|
+
try {
|
|
101
|
+
child = spawn(plan.command, plan.args, {
|
|
102
|
+
cwd: plan.cwd,
|
|
103
|
+
detached: true,
|
|
104
|
+
stdio: "ignore",
|
|
105
|
+
windowsHide: true,
|
|
106
|
+
});
|
|
107
|
+
} catch (err) {
|
|
108
|
+
return {
|
|
109
|
+
status: "failed",
|
|
110
|
+
plan,
|
|
111
|
+
error: err instanceof Error ? err : new Error(String(err)),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return new Promise((resolve) => {
|
|
116
|
+
let settled = false;
|
|
117
|
+
const settle = (result: GsdPlannerLaunchResult) => {
|
|
118
|
+
if (settled) return;
|
|
119
|
+
settled = true;
|
|
120
|
+
resolve(result);
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
child.once("error", (err) => {
|
|
124
|
+
settle({
|
|
125
|
+
status: "failed",
|
|
126
|
+
plan,
|
|
127
|
+
error: err instanceof Error ? err : new Error(String(err)),
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
child.once("spawn", () => {
|
|
131
|
+
child.unref();
|
|
132
|
+
settle({ status: "launched", plan });
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function formatPlannerHandoffPauseReason(milestoneId: string): string {
|
|
138
|
+
return [
|
|
139
|
+
`Milestone ${milestoneId} is planned. Review or customize the plan before implementation if needed.`,
|
|
140
|
+
`Run /gsd planner to launch ${GSD_PLANNER_COMMAND}, or run /gsd auto to continue without planner changes.`,
|
|
141
|
+
].join(" ");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function formatPlannerLaunchUnavailable(plan: GsdPlannerSpawnPlan, error: Error): string {
|
|
145
|
+
return [
|
|
146
|
+
`Could not launch ${GSD_PLANNER_COMMAND}: ${error.message}`,
|
|
147
|
+
`Install ${GSD_PLANNER_COMMAND} or run it manually: ${formatGsdPlannerCommand(plan)}`,
|
|
148
|
+
].join("\n");
|
|
149
|
+
}
|
|
@@ -372,6 +372,7 @@ export function resolveAutoSupervisorConfig(): AutoSupervisorConfig {
|
|
|
372
372
|
soft_timeout_minutes: configured.soft_timeout_minutes ?? 20,
|
|
373
373
|
idle_timeout_minutes: configured.idle_timeout_minutes ?? 10,
|
|
374
374
|
hard_timeout_minutes: configured.hard_timeout_minutes ?? 30,
|
|
375
|
+
stalled_tool_timeout_minutes: configured.stalled_tool_timeout_minutes ?? 5,
|
|
375
376
|
...(configured.model ? { model: configured.model } : {}),
|
|
376
377
|
};
|
|
377
378
|
}
|
|
@@ -256,6 +256,14 @@ export interface AutoSupervisorConfig {
|
|
|
256
256
|
soft_timeout_minutes?: number;
|
|
257
257
|
idle_timeout_minutes?: number;
|
|
258
258
|
hard_timeout_minutes?: number;
|
|
259
|
+
/**
|
|
260
|
+
* Dedicated budget for a single in-flight tool call before it is treated as
|
|
261
|
+
* hung. Distinct from `idle_timeout_minutes`: a genuinely stuck tool should
|
|
262
|
+
* be recovered in minutes rather than waiting out the full idle window. A
|
|
263
|
+
* long-but-progressing session is not idle, so it must not share the hung-tool
|
|
264
|
+
* threshold.
|
|
265
|
+
*/
|
|
266
|
+
stalled_tool_timeout_minutes?: number;
|
|
259
267
|
}
|
|
260
268
|
|
|
261
269
|
export interface RemoteQuestionsConfig {
|
|
@@ -14,7 +14,7 @@ All relevant context is preloaded below. Start immediately without re-reading th
|
|
|
14
14
|
|
|
15
15
|
## Already Planned? Soft Brake
|
|
16
16
|
|
|
17
|
-
If `{{outputPath}}` exists with at least one slice line (e.g. `- [ ] **S01:`) AND `
|
|
17
|
+
If `{{outputPath}}` exists with at least one slice line (e.g. `- [ ] **S01:`) AND `gsd_milestone_status` reports slice rows for this milestone, a prior `gsd_plan_milestone` call already persisted the plan. Do **not** re-call it; its UPSERT could overwrite existing planning. Skip to the ready phrase.
|
|
18
18
|
|
|
19
19
|
If only the file or only DB rows exist, the prior write was incomplete; plan normally so the tool reconciles both.
|
|
20
20
|
|
|
@@ -27,7 +27,7 @@ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply a
|
|
|
27
27
|
### Automation rules by mode
|
|
28
28
|
|
|
29
29
|
- `artifact-driven` — verify with shell commands, scripts, file reads, and artifact structure checks.
|
|
30
|
-
- `browser-executable` — use
|
|
30
|
+
- `browser-executable` — use browser tools to navigate to the target URL and verify expected behavior. Prefer direct `browser_*` tools when available. Capture screenshots as evidence. Record pass/fail with specific assertions.
|
|
31
31
|
- `runtime-executable` — execute the specified command or script. Capture stdout/stderr as evidence. Record pass/fail based on exit code and output.
|
|
32
32
|
- `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
|
|
33
33
|
- `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
|
|
@@ -48,7 +48,7 @@ Choose the lightest tool that proves the check honestly:
|
|
|
48
48
|
- Run `node` / other script invocations
|
|
49
49
|
- Read files and verify their contents
|
|
50
50
|
- Check that expected artifacts exist and have correct structure
|
|
51
|
-
- For live/runtime/UI checks, exercise the real flow with
|
|
51
|
+
- For live/runtime/UI checks, exercise the real flow with browser tools when applicable and inspect runtime/network/console state
|
|
52
52
|
- When a check cannot be honestly automated, gather the best objective evidence you can and mark it `NEEDS-HUMAN`
|
|
53
53
|
|
|
54
54
|
For each check, record:
|
|
@@ -118,7 +118,7 @@ Templates are in `{{templatesDir}}`.
|
|
|
118
118
|
|
|
119
119
|
**Secrets:** Use `secure_env_collect`. Never ask the user to edit `.env` files or paste secrets.
|
|
120
120
|
|
|
121
|
-
**Browser verification:** Verify frontend work against a running app with
|
|
121
|
+
**Browser verification:** Verify frontend work against a running app with browser tools by default. Use `browser_find`/`browser_snapshot_refs` for discovery, refs/selectors -> `browser_batch` for actions, `browser_assert` for verification, and `browser_diff` -> console/network logs -> full inspection as last resort. If browser tools are MCP-namespaced, use that host-provided browser surface. Retry only with a new hypothesis.
|
|
122
122
|
|
|
123
123
|
**Database:** Never query `.gsd/gsd.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')`; the engine owns a single-writer WAL connection. Use `gsd_milestone_status`, `gsd_journal_query`, or other `gsd_*` tools.
|
|
124
124
|
|
|
@@ -117,6 +117,18 @@ const UNIT_TYPE_SKILL_MANIFEST: Record<string, string[]> = {
|
|
|
117
117
|
"review",
|
|
118
118
|
"accessibility",
|
|
119
119
|
],
|
|
120
|
+
// Slice closeout — the "closer" role: verify assembled task work, write the
|
|
121
|
+
// downstream-ready summary + UAT, optionally drive reviewer/security/tester
|
|
122
|
+
// subagents. Predictable skill set, mirrors `complete-milestone`.
|
|
123
|
+
"complete-slice": [
|
|
124
|
+
"verify-before-complete",
|
|
125
|
+
"test",
|
|
126
|
+
"review",
|
|
127
|
+
"security-review",
|
|
128
|
+
"write-docs",
|
|
129
|
+
"observability",
|
|
130
|
+
"handoff",
|
|
131
|
+
],
|
|
120
132
|
// `execute-task` intentionally omitted — implementation hot path covers a
|
|
121
133
|
// wide surface of technologies; wildcard fallback preserves today's
|
|
122
134
|
// behavior until per-task skill hints can be derived from task-plan
|
|
@@ -1118,6 +1118,7 @@ function makeLoopSession(overrides?: Partial<Record<string, unknown>>) {
|
|
|
1118
1118
|
unitLifetimeDispatches: new Map<string, number>(),
|
|
1119
1119
|
unitRecoveryCount: new Map<string, number>(),
|
|
1120
1120
|
verificationRetryCount: new Map<string, number>(),
|
|
1121
|
+
zeroToolRetryCount: new Map<string, number>(),
|
|
1121
1122
|
gitService: null,
|
|
1122
1123
|
lastRequestTimestamp: 0,
|
|
1123
1124
|
autoStartTime: Date.now(),
|
|
@@ -4693,6 +4694,104 @@ test("runUnitPhase retries 0-tool units with ordinary network-related assistant
|
|
|
4693
4694
|
assert.equal(deps.callLog.includes("pauseAuto"), false);
|
|
4694
4695
|
});
|
|
4695
4696
|
|
|
4697
|
+
test("runUnitPhase pauses auto-mode when zero-tool-call retry is exhausted", async (t) => {
|
|
4698
|
+
_resetPendingResolve();
|
|
4699
|
+
|
|
4700
|
+
const basePath = mkdtempSync(join(tmpdir(), "gsd-zero-tool-exhausted-"));
|
|
4701
|
+
t.after(() => {
|
|
4702
|
+
rmSync(basePath, { recursive: true, force: true });
|
|
4703
|
+
});
|
|
4704
|
+
|
|
4705
|
+
const ctx = {
|
|
4706
|
+
...makeMockCtx(),
|
|
4707
|
+
ui: {
|
|
4708
|
+
notify: () => {},
|
|
4709
|
+
setStatus: () => {},
|
|
4710
|
+
setWorkingMessage: () => {},
|
|
4711
|
+
},
|
|
4712
|
+
sessionManager: {
|
|
4713
|
+
getEntries: () => [],
|
|
4714
|
+
},
|
|
4715
|
+
modelRegistry: {
|
|
4716
|
+
getProviderAuthMode: () => undefined,
|
|
4717
|
+
isProviderRequestReady: () => true,
|
|
4718
|
+
},
|
|
4719
|
+
} as any;
|
|
4720
|
+
const pi = {
|
|
4721
|
+
...makeMockPi(),
|
|
4722
|
+
sendMessage: () => {
|
|
4723
|
+
queueMicrotask(() => resolveAgentEnd(makeEvent([
|
|
4724
|
+
{
|
|
4725
|
+
role: "assistant",
|
|
4726
|
+
content: [
|
|
4727
|
+
{ type: "text", text: "Error: I'll investigate the network error handling next." },
|
|
4728
|
+
],
|
|
4729
|
+
},
|
|
4730
|
+
])));
|
|
4731
|
+
},
|
|
4732
|
+
} as any;
|
|
4733
|
+
const s = makeLoopSession({
|
|
4734
|
+
basePath,
|
|
4735
|
+
canonicalProjectRoot: basePath,
|
|
4736
|
+
originalBasePath: basePath,
|
|
4737
|
+
});
|
|
4738
|
+
// Pre-seed counter at MAX_ZERO_TOOL_RETRIES so the next zero-tool turn exhausts the cap
|
|
4739
|
+
s.zeroToolRetryCount.set("execute-task/M001/S01/T01", 1);
|
|
4740
|
+
|
|
4741
|
+
const mockLedger = {
|
|
4742
|
+
version: 1,
|
|
4743
|
+
projectStartedAt: Date.now(),
|
|
4744
|
+
units: [] as any[],
|
|
4745
|
+
};
|
|
4746
|
+
const deps = makeMockDeps({
|
|
4747
|
+
closeoutUnit: async () => {
|
|
4748
|
+
mockLedger.units.push({
|
|
4749
|
+
type: "execute-task",
|
|
4750
|
+
id: "M001/S01/T01",
|
|
4751
|
+
startedAt: s.currentUnit?.startedAt ?? Date.now(),
|
|
4752
|
+
toolCalls: 0,
|
|
4753
|
+
assistantMessages: 1,
|
|
4754
|
+
tokens: { input: 100, output: 20, total: 120, cacheRead: 0, cacheWrite: 0 },
|
|
4755
|
+
cost: 0.01,
|
|
4756
|
+
});
|
|
4757
|
+
},
|
|
4758
|
+
getLedger: () => mockLedger,
|
|
4759
|
+
});
|
|
4760
|
+
let seq = 0;
|
|
4761
|
+
|
|
4762
|
+
const result = await runUnitPhase(
|
|
4763
|
+
{ ctx, pi, s, deps, prefs: undefined, iteration: 1, flowId: "flow-zero-tool-exhausted", nextSeq: () => ++seq },
|
|
4764
|
+
{
|
|
4765
|
+
unitType: "execute-task",
|
|
4766
|
+
unitId: "M001/S01/T01",
|
|
4767
|
+
prompt: "do work",
|
|
4768
|
+
finalPrompt: "do work",
|
|
4769
|
+
pauseAfterUatDispatch: false,
|
|
4770
|
+
state: {
|
|
4771
|
+
phase: "executing",
|
|
4772
|
+
activeMilestone: { id: "M001", title: "Milestone" },
|
|
4773
|
+
activeSlice: { id: "S01", title: "Slice" },
|
|
4774
|
+
activeTask: { id: "T01", title: "Task" },
|
|
4775
|
+
registry: [{ id: "M001", title: "Milestone", status: "active" }],
|
|
4776
|
+
recentDecisions: [],
|
|
4777
|
+
blockers: [],
|
|
4778
|
+
nextAction: "",
|
|
4779
|
+
progress: { milestones: { done: 0, total: 1 } },
|
|
4780
|
+
requirements: { active: 0, validated: 0, deferred: 0, outOfScope: 0, blocked: 0, total: 0 },
|
|
4781
|
+
} as any,
|
|
4782
|
+
mid: "M001",
|
|
4783
|
+
midTitle: "Milestone",
|
|
4784
|
+
isRetry: false,
|
|
4785
|
+
previousTier: undefined,
|
|
4786
|
+
},
|
|
4787
|
+
{ recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 },
|
|
4788
|
+
);
|
|
4789
|
+
|
|
4790
|
+
assert.equal(result.action, "break");
|
|
4791
|
+
assert.equal((result as any).reason, "zero-tool-calls-exhausted");
|
|
4792
|
+
assert.equal(deps.callLog.includes("pauseAuto"), true);
|
|
4793
|
+
});
|
|
4794
|
+
|
|
4696
4795
|
test("autoLoop pauses user-driven deep question instead of flagging 0 tool calls", async () => {
|
|
4697
4796
|
_resetPendingResolve();
|
|
4698
4797
|
|
|
@@ -33,6 +33,7 @@ import {
|
|
|
33
33
|
selectAndApplyModel,
|
|
34
34
|
ModelPolicyDispatchBlockedError,
|
|
35
35
|
clearToolBaseline,
|
|
36
|
+
getToolBaselineSnapshot,
|
|
36
37
|
} from "../auto-model-selection.js";
|
|
37
38
|
import { applyModelPolicyFilter } from "../uok/model-policy.js";
|
|
38
39
|
import {
|
|
@@ -139,7 +140,7 @@ function makeCtx(
|
|
|
139
140
|
test("vacuous-truth (a): unit type with empty workflow-required tools → dispatch succeeds", async () => {
|
|
140
141
|
const env = makeTempProject();
|
|
141
142
|
try {
|
|
142
|
-
// `
|
|
143
|
+
// `rewrite-docs` has no required workflow tools
|
|
143
144
|
// → returns []. Exercises the empty-requiredTools branch in
|
|
144
145
|
// applyModelPolicyFilter (existing test used
|
|
145
146
|
// gate-evaluate which has non-empty required tools and never hit this path).
|
|
@@ -161,7 +162,7 @@ test("vacuous-truth (a): unit type with empty workflow-required tools → dispat
|
|
|
161
162
|
const result = await selectAndApplyModel(
|
|
162
163
|
makeCtx(availableModels),
|
|
163
164
|
pi as any,
|
|
164
|
-
"
|
|
165
|
+
"rewrite-docs",
|
|
165
166
|
"x1",
|
|
166
167
|
env.dir,
|
|
167
168
|
undefined,
|
|
@@ -308,8 +309,8 @@ test("genuinely-impossible (a): pi-native required tool incompatible with candid
|
|
|
308
309
|
test("genuinely-impossible (b): cross-provider routing disabled + provider mismatch → typed error", async () => {
|
|
309
310
|
const env = makeTempProject();
|
|
310
311
|
try {
|
|
311
|
-
// Use plan-slice
|
|
312
|
-
//
|
|
312
|
+
// Use plan-slice but pretend no candidate model can carry its required
|
|
313
|
+
// workflow tools. The simplest way: provide a model whose
|
|
313
314
|
// api is a fictitious "no-tools" string — `filterToolsForProvider` returns
|
|
314
315
|
// every tool as filtered for an unknown api with toolCalling=false, OR we
|
|
315
316
|
// can pick a real api that also denies the tool. We use an api that
|
|
@@ -711,3 +712,64 @@ test("cross-mode (#4965): auto → guided → auto preserves the original auto-e
|
|
|
711
712
|
env.cleanup();
|
|
712
713
|
}
|
|
713
714
|
});
|
|
715
|
+
|
|
716
|
+
// ─── 8. Baseline union: MCP tools connected after baseline capture (#477) ─────
|
|
717
|
+
//
|
|
718
|
+
// `getToolBaselineSnapshot` must return the UNION of the frozen WeakMap baseline
|
|
719
|
+
// and the current live tool set. This ensures:
|
|
720
|
+
// (a) Provider-narrowed tools (in baseline, dropped from live) are still seen
|
|
721
|
+
// by transport preflight — the bug-5 fix from #477.
|
|
722
|
+
// (b) Tools connected after the baseline was captured (e.g. MCP server attached
|
|
723
|
+
// mid-session) are also visible — so a paused run that resumes after MCP
|
|
724
|
+
// reconnects clears the transport warning on the first iteration instead of
|
|
725
|
+
// repeating it indefinitely.
|
|
726
|
+
|
|
727
|
+
test("baseline union (#477): getToolBaselineSnapshot includes live tools not present in frozen baseline", async () => {
|
|
728
|
+
const env = makeTempProject();
|
|
729
|
+
try {
|
|
730
|
+
const availableModels = [
|
|
731
|
+
{ id: "claude-sonnet-4-6", provider: "anthropic", api: "anthropic-messages" },
|
|
732
|
+
];
|
|
733
|
+
|
|
734
|
+
const initialTools = ["bash", "read", "write"];
|
|
735
|
+
const pi = makeRecordingPi(initialTools);
|
|
736
|
+
clearToolBaseline(pi as unknown as object);
|
|
737
|
+
|
|
738
|
+
// Capture baseline with only native tools (no MCP connected yet).
|
|
739
|
+
await selectAndApplyModel(
|
|
740
|
+
makeCtx(availableModels),
|
|
741
|
+
pi as any,
|
|
742
|
+
"execute-task",
|
|
743
|
+
"u1",
|
|
744
|
+
env.dir,
|
|
745
|
+
undefined,
|
|
746
|
+
false,
|
|
747
|
+
{ provider: "anthropic", id: "claude-sonnet-4-6" },
|
|
748
|
+
undefined,
|
|
749
|
+
/* isAutoMode */ true,
|
|
750
|
+
);
|
|
751
|
+
|
|
752
|
+
// Simulate: provider narrows tools (Groq cap, hook override, etc.).
|
|
753
|
+
// The baseline in the WeakMap still has the full initial set.
|
|
754
|
+
pi.setActiveTools(["bash"]);
|
|
755
|
+
|
|
756
|
+
// Simulate: user connects MCP mid-session (after the baseline was captured).
|
|
757
|
+
const liveTools = pi.getActiveTools().concat(["mcp__gsd-workflow__gsd_uat_exec"]);
|
|
758
|
+
pi.setActiveTools(liveTools);
|
|
759
|
+
|
|
760
|
+
const snapshot = getToolBaselineSnapshot(pi as any);
|
|
761
|
+
|
|
762
|
+
// All baseline tools must be present (even the provider-narrowed ones).
|
|
763
|
+
for (const t of initialTools) {
|
|
764
|
+
assert.ok(snapshot.includes(t), `snapshot must include baseline tool: ${t}`);
|
|
765
|
+
}
|
|
766
|
+
// Newly connected MCP tool must also be present.
|
|
767
|
+
assert.ok(
|
|
768
|
+
snapshot.includes("mcp__gsd-workflow__gsd_uat_exec"),
|
|
769
|
+
"snapshot must include MCP tool connected after baseline capture",
|
|
770
|
+
);
|
|
771
|
+
} finally {
|
|
772
|
+
env.restoreEnv();
|
|
773
|
+
env.cleanup();
|
|
774
|
+
}
|
|
775
|
+
});
|
|
@@ -20,6 +20,10 @@ test('resolveAutoSupervisorConfig provides safe timeout defaults', () => {
|
|
|
20
20
|
assert.equal(supervisor.soft_timeout_minutes, 20);
|
|
21
21
|
assert.equal(supervisor.idle_timeout_minutes, 10);
|
|
22
22
|
assert.equal(supervisor.hard_timeout_minutes, 30);
|
|
23
|
+
// A single hung tool gets its own short budget, well below the idle window,
|
|
24
|
+
// so a genuinely stuck tool is recovered in minutes instead of waiting out
|
|
25
|
+
// the full idle timeout.
|
|
26
|
+
assert.equal(supervisor.stalled_tool_timeout_minutes, 5);
|
|
23
27
|
} finally {
|
|
24
28
|
if (previousGsdHome === undefined) {
|
|
25
29
|
delete process.env.GSD_HOME;
|
|
@@ -76,3 +76,12 @@ test('BUNDLED_SKILL_TRIGGERS: skill ids are unique', () => {
|
|
|
76
76
|
seen.add(skill);
|
|
77
77
|
}
|
|
78
78
|
});
|
|
79
|
+
|
|
80
|
+
test('BUNDLED_SKILL_TRIGGERS: gsd-browser and agent-browser stay distinct', () => {
|
|
81
|
+
const gsdBrowser = BUNDLED_SKILL_TRIGGERS.find(entry => entry.skill === 'gsd-browser');
|
|
82
|
+
const agentBrowser = BUNDLED_SKILL_TRIGGERS.find(entry => entry.skill === 'agent-browser');
|
|
83
|
+
|
|
84
|
+
assert.ok(gsdBrowser, 'gsd-browser trigger should be registered');
|
|
85
|
+
assert.ok(agentBrowser, 'agent-browser trigger should be registered');
|
|
86
|
+
assert.notStrictEqual(gsdBrowser.trigger, agentBrowser.trigger);
|
|
87
|
+
});
|