pi-crew 0.9.5 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +556 -0
- package/README.md +10 -3
- package/docs/HARNESS_BACKLOG.md +51 -3
- package/docs/dynamic-workflows.md +315 -2
- package/docs/fix-plan-disabletools-exit-null.md +219 -0
- package/docs/troubleshooting.md +76 -0
- package/package.json +10 -3
- package/src/config/defaults.ts +8 -4
- package/src/extension/team-tool/doctor.ts +14 -0
- package/src/extension/team-tool/run.ts +2 -0
- package/src/runtime/background-runner.ts +1 -1
- package/src/runtime/capability-inventory.ts +20 -1
- package/src/runtime/child-pi.ts +109 -11
- package/src/runtime/deterministic-ast.ts +161 -0
- package/src/runtime/dwf-state-store.ts +97 -0
- package/src/runtime/dynamic-workflow-context.ts +381 -7
- package/src/runtime/dynamic-workflow-runner.ts +93 -2
- package/src/runtime/pi-args.ts +11 -0
- package/src/runtime/result-extractor.ts +72 -7
- package/src/runtime/task-output-context.ts +25 -9
- package/src/runtime/team-runner.ts +8 -3
- package/src/runtime/zombie-scanner.ts +297 -0
- package/src/schema/team-tool-schema.ts +28 -0
- package/src/skills/discover-skills.ts +61 -8
- package/src/skills/validate.ts +267 -0
- package/src/state/contracts.ts +1 -0
- package/src/state/state-store.ts +3 -0
- package/src/state/types.ts +9 -0
- package/src/ui/dashboard-panes/progress-pane.ts +5 -0
- package/src/ui/dwf-phase-display.ts +151 -0
- package/src/ui/keybinding-map.ts +128 -41
- package/src/ui/run-event-bus.ts +83 -0
- package/src/ui/run-snapshot-cache.ts +4 -0
- package/src/ui/snapshot-types.ts +3 -0
- package/src/workflows/workflow-config.ts +3 -0
- package/src/worktree/worktree-manager.ts +94 -0
- package/types/dwf.d.ts +187 -0
package/docs/troubleshooting.md
CHANGED
|
@@ -155,3 +155,79 @@ code + a help hint inline. Common ones:
|
|
|
155
155
|
- `team action='summary' runId=…` — includes common failure-pattern detection
|
|
156
156
|
("4 of 5 failures share 2 root causes").
|
|
157
157
|
- `team action='events' runId=…` — full event timeline for forensics.
|
|
158
|
+
|
|
159
|
+
## Stuck / orphaned sub-agent processes ("zombies")
|
|
160
|
+
|
|
161
|
+
A pi-crew sub-agent whose parent crashed may linger as an orphaned process.
|
|
162
|
+
**Do NOT kill `pi` processes by eye** (uptime/RSS heuristics will match your
|
|
163
|
+
own interactive main session — that is unrecoverable). Use the safe scanner:
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
team action='doctor' focus='zombies'
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
This is **read-only**. It matches ONLY processes carrying the authoritative
|
|
170
|
+
`PI_CREW_KIND=subagent` env marker (set by every child-pi spawn) whose
|
|
171
|
+
`PI_CREW_PARENT_PID` is no longer alive. Your main session never carries the
|
|
172
|
+
marker, so it can never appear in the list. (The marker is an env var, not an
|
|
173
|
+
argv flag — pi's strict option parser rejects unknown flags, so we can't use
|
|
174
|
+
a `--crew-subagent` CLI flag.)
|
|
175
|
+
|
|
176
|
+
To kill a confirmed zombie: `kill <PID>` (the OS reaps it). The scanner never
|
|
177
|
+
kills on your behalf.
|
|
178
|
+
|
|
179
|
+
### Why the marker exists
|
|
180
|
+
|
|
181
|
+
Before `PI_CREW_KIND`, a heuristic zombie "cleanup" killed a live main session
|
|
182
|
+
by accident. The marker makes sub-agent identity authoritative rather than
|
|
183
|
+
guessed. See `src/runtime/zombie-scanner.ts` and `.crew/knowledge.md`.
|
|
184
|
+
|
|
185
|
+
## `ctx.agent({disableTools: true})` — historical `exit null` (FIXED)
|
|
186
|
+
|
|
187
|
+
Previously, `ctx.agent({disableTools: true, maxTurns: 1})` could return
|
|
188
|
+
`exit null` because the steer-injection code mis-treated normal Node stdin
|
|
189
|
+
backpressure (`write() === false`) as a fatal failure and `killProcessTree`'d
|
|
190
|
+
the worker mid-answer. **Fixed**: steer injection is now advisory — a
|
|
191
|
+
backpressure return or non-writable stdin is logged, not fatal; the
|
|
192
|
+
hard-abort at `maxTurns + graceTurns` remains the safety net for genuine
|
|
193
|
+
runaways. The `disableTools` correlation was a red herring — the real trigger
|
|
194
|
+
was `maxTurns:1` hitting on the first turn. See CHANGELOG "Real-world smoke
|
|
195
|
+
testing findings" and `test/unit/child-pi-steer-backpressure.test.ts`.
|
|
196
|
+
|
|
197
|
+
## Running the real-binary smoke suite (HB-004)
|
|
198
|
+
|
|
199
|
+
The default `npm test` mocks child-pi (`PI_TEAMS_MOCK_CHILD_PI`), so it cannot
|
|
200
|
+
catch bugs that only manifest against the real `pi` binary. The smoke suite
|
|
201
|
+
shells out to real pi + makes real LLM calls, so it bills tokens and is gated
|
|
202
|
+
behind `PI_CREW_SMOKE=1`.
|
|
203
|
+
|
|
204
|
+
### Run locally
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# All smoke tests (~5 tests, ~1 min, bills tokens):
|
|
208
|
+
PI_CREW_SMOKE=1 npm run test:smoke
|
|
209
|
+
|
|
210
|
+
# One smoke test in isolation:
|
|
211
|
+
PI_CREW_SMOKE=1 npx tsx --test test/smoke/agent-disabletools.smoke.ts
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Smoke tests live in `test/smoke/*.smoke.ts` and are NOT picked up by the default
|
|
215
|
+
`npm test` glob (`test/unit/*` + `test/integration/*`). Each test self-skips
|
|
216
|
+
unless `PI_CREW_SMOKE=1`.
|
|
217
|
+
|
|
218
|
+
### What each covers
|
|
219
|
+
|
|
220
|
+
| File | Feature family | Catches |
|
|
221
|
+
|---|---|---|
|
|
222
|
+
| `argv-flags.smoke.ts` | buildPiWorkerArgs argv | unknown-flag rejection (e.g. `--crew-subagent`) |
|
|
223
|
+
| `agent-plain.smoke.ts` | ctx.agent() baseline | spawn-path breakage |
|
|
224
|
+
| `agent-schema.smoke.ts` | ctx.agent({schema, systemPrompt}) | persona-leak / schema-validation failures |
|
|
225
|
+
| `agent-disabletools.smoke.ts` | ctx.agent({disableTools, maxTurns:1}) ×5 | HB-003a steer-backpressure exit-null (flaky → 5×) |
|
|
226
|
+
| `dwf-workflow.smoke.ts` | full DWF end-to-end | phase/log/args/budget/pipeline/agent/setResult integration |
|
|
227
|
+
|
|
228
|
+
### Run in CI (manual dispatch)
|
|
229
|
+
|
|
230
|
+
GitHub Actions → "Smoke (real-binary, manual)" → Run workflow → pick OS.
|
|
231
|
+
Requires the `PI_AUTH_JSON` repo secret (the contents of `~/.pi/agent/auth.json`)
|
|
232
|
+
so the spawned `pi` can authenticate with the model provider. If unset, the
|
|
233
|
+
LLM-calling smoke tests fail with a clear auth error.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-crew",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.8",
|
|
4
4
|
"description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
|
|
5
5
|
"author": "baphuongna",
|
|
6
6
|
"license": "MIT",
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
"docs/",
|
|
40
40
|
"tsconfig.json",
|
|
41
41
|
"schema.json",
|
|
42
|
+
"types/",
|
|
42
43
|
"CHANGELOG.md",
|
|
43
44
|
"LICENSE",
|
|
44
45
|
"NOTICE.md"
|
|
@@ -52,6 +53,7 @@
|
|
|
52
53
|
"test:unit": "node scripts/test-runner.mjs --test-concurrency=4 --test-timeout=180000 --test-force-exit test/unit/*.test.ts",
|
|
53
54
|
"test:watch": "tsx --watch --test --test-concurrency=4 --test-timeout=30000 --test-force-exit test/unit/*.test.ts",
|
|
54
55
|
"test:integration": "node scripts/test-runner.mjs --test-concurrency=1 --test-timeout=120000 test/integration/*.test.ts",
|
|
56
|
+
"test:smoke": "node scripts/test-runner.mjs --test-concurrency=1 --test-timeout=180000 test/smoke/*.smoke.ts",
|
|
55
57
|
"build:bundle": "node scripts/build-bundle.mjs",
|
|
56
58
|
"bench": "node scripts/run-bench.mjs",
|
|
57
59
|
"bench:check": "node scripts/bench-check.mjs",
|
|
@@ -63,7 +65,10 @@
|
|
|
63
65
|
"smoke:release": "node scripts/release-smoke.mjs"
|
|
64
66
|
},
|
|
65
67
|
"exports": {
|
|
66
|
-
"./schema.json": "./schema.json"
|
|
68
|
+
"./schema.json": "./schema.json",
|
|
69
|
+
"./workflow": {
|
|
70
|
+
"types": "./types/dwf.d.ts"
|
|
71
|
+
}
|
|
67
72
|
},
|
|
68
73
|
"pi": {
|
|
69
74
|
"extensions": [
|
|
@@ -81,10 +86,12 @@
|
|
|
81
86
|
},
|
|
82
87
|
"dependencies": {
|
|
83
88
|
"@sinclair/typebox": "^0.34.49",
|
|
89
|
+
"acorn": "^8.17.0",
|
|
84
90
|
"ajv": "^8.20.0",
|
|
85
91
|
"cli-highlight": "^2.1.11",
|
|
86
92
|
"diff": "^5.2.0",
|
|
87
|
-
"jiti": "^2.7.0"
|
|
93
|
+
"jiti": "^2.7.0",
|
|
94
|
+
"yaml": "^2.9.0"
|
|
88
95
|
},
|
|
89
96
|
"devDependencies": {
|
|
90
97
|
"@biomejs/biome": "^2.4.15",
|
package/src/config/defaults.ts
CHANGED
|
@@ -16,10 +16,14 @@ export const DEFAULT_CHILD_PI: Readonly<{
|
|
|
16
16
|
// Keep this as a coarse stuck-worker guard rather than a short per-message latency budget.
|
|
17
17
|
responseTimeoutMs: 5 * 60_000,
|
|
18
18
|
maxCaptureBytes: 256 * 1024,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
// L4 output-handling: thresholds sized from real worker-output data
|
|
20
|
+
// (27 result artifacts measured: max 9226 bytes, median 8272, 100% < 16KB).
|
|
21
|
+
// Previous values (8192/1024/4096) truncated 62% of real results.
|
|
22
|
+
// See .crew/research/worker-output-handling.md + source/deer-flow/.research/.
|
|
23
|
+
maxAssistantTextChars: 16_384,
|
|
24
|
+
maxToolResultChars: 8_192,
|
|
25
|
+
maxToolInputChars: 4_096,
|
|
26
|
+
maxCompactContentChars: 8_192,
|
|
23
27
|
};
|
|
24
28
|
|
|
25
29
|
export const DEFAULT_LIVE_SESSION = {
|
|
@@ -10,6 +10,7 @@ import { DEFAULT_PATHS } from "../../config/defaults.ts";
|
|
|
10
10
|
import type { TeamToolParamsValue } from "../../schema/team-tool-schema.ts";
|
|
11
11
|
import { getPiSpawnCommand } from "../../runtime/pi-spawn.ts";
|
|
12
12
|
import { getRuntimeWarmupStatus } from "../../runtime/runtime-warmup.ts";
|
|
13
|
+
import { scanZombieSubagents, formatZombieReport } from "../../runtime/zombie-scanner.ts";
|
|
13
14
|
import { validateResources } from "../validate-resources.ts";
|
|
14
15
|
import { detectDrift, formatDriftReport, type DriftReport } from "../../config/drift-detector.ts";
|
|
15
16
|
import { TeamToolParams } from "../../schema/team-tool-schema.ts";
|
|
@@ -237,6 +238,19 @@ export function buildTeamDoctorReport(input: TeamDoctorReportInput): TeamDoctorR
|
|
|
237
238
|
}
|
|
238
239
|
|
|
239
240
|
export function handleDoctor(ctx: TeamContext, params: TeamToolParamsValue = {}): PiTeamsToolResult {
|
|
241
|
+
// Sub-focus: zombie sub-agent scan. READ-ONLY — never kills. Returns a table of
|
|
242
|
+
// orphaned pi-crew sub-agents identified by the authoritative PI_CREW_KIND=subagent
|
|
243
|
+
// marker. The user's main session never carries that marker, so it can never appear.
|
|
244
|
+
if (params.focus === "zombies") {
|
|
245
|
+
const scan = scanZombieSubagents();
|
|
246
|
+
const text = formatZombieReport(scan);
|
|
247
|
+
return result(text, {
|
|
248
|
+
action: "doctor",
|
|
249
|
+
status: "ok",
|
|
250
|
+
data: { zombies: scan.zombies.length, live: scan.live.length, errors: scan.errors.length },
|
|
251
|
+
}, false);
|
|
252
|
+
}
|
|
253
|
+
|
|
240
254
|
const loadedConfig = loadConfig(ctx.cwd);
|
|
241
255
|
let smokeChildPi: { ok: boolean; detail: string } | undefined;
|
|
242
256
|
if (configRecord(params.config).smokeChildPi === true) {
|
|
@@ -281,6 +281,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
281
281
|
workspaceMode: params.workspaceMode,
|
|
282
282
|
ownerSessionId: ctx.sessionId,
|
|
283
283
|
runKind: params.runKind,
|
|
284
|
+
args: params.args,
|
|
284
285
|
});
|
|
285
286
|
const goalArtifact = writeArtifact(paths.artifactsRoot, {
|
|
286
287
|
kind: "prompt",
|
|
@@ -323,6 +324,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
323
324
|
team: dwfTeam,
|
|
324
325
|
signal: ctx.signal ?? AbortSignal.timeout(3_600_000),
|
|
325
326
|
modelOverride: params.model,
|
|
327
|
+
tokenBudget: params.tokenBudget ?? (workflow as import("../../workflows/workflow-config.ts").DynamicWorkflowConfig).maxTokenBudget,
|
|
326
328
|
});
|
|
327
329
|
} catch (runnerError) {
|
|
328
330
|
// Round-11 runtime fix: persist manifest with status=failed when runner throws
|
|
@@ -602,7 +602,7 @@ async function main(): Promise<void> {
|
|
|
602
602
|
const { allWorkflows, discoverWorkflows } = await import("../workflows/discover-workflows.ts");
|
|
603
603
|
const wf = allWorkflows(discoverWorkflows(manifest.cwd)).find((w) => w.name === manifest.workflow);
|
|
604
604
|
if (!wf || wf.runtime !== "dynamic" || !wf.dynamicScript) throw new Error(`runKind="dynamic-workflow" but workflow '${manifest.workflow}' is not dynamic (runId=${manifest.runId})`);
|
|
605
|
-
const dwfResult = await runDynamicWorkflow({ manifest, workflow: wf as import("../workflows/workflow-config.ts").DynamicWorkflowConfig, signal: abortController.signal });
|
|
605
|
+
const dwfResult = await runDynamicWorkflow({ manifest, workflow: wf as import("../workflows/workflow-config.ts").DynamicWorkflowConfig, signal: abortController.signal, tokenBudget: wf.maxTokenBudget });
|
|
606
606
|
saveRunManifest(dwfResult.manifest);
|
|
607
607
|
earlyResult = dwfResult;
|
|
608
608
|
}
|
|
@@ -2,7 +2,8 @@ import type { AgentConfig, ResourceSource } from "../agents/agent-config.ts";
|
|
|
2
2
|
import { discoverAgents } from "../agents/discover-agents.ts";
|
|
3
3
|
import { discoverTeams } from "../teams/discover-teams.ts";
|
|
4
4
|
import { discoverWorkflows } from "../workflows/discover-workflows.ts";
|
|
5
|
-
import { discoverSkills } from "../skills/discover-skills.ts";
|
|
5
|
+
import { discoverSkills, getLastDiscoveryDiagnostics } from "../skills/discover-skills.ts";
|
|
6
|
+
import type { SkillValidationError } from "../skills/validate.ts";
|
|
6
7
|
import type { PiTeamsConfig } from "../config/config.ts";
|
|
7
8
|
|
|
8
9
|
export type CapabilityKind = "team" | "workflow" | "agent" | "skill" | "tool" | "runtime";
|
|
@@ -114,3 +115,21 @@ export function buildCapabilityInventory(cwd: string, config?: PiTeamsConfig): C
|
|
|
114
115
|
|
|
115
116
|
return items.sort((a, b) => a.id.localeCompare(b.id));
|
|
116
117
|
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* L3: surface skill-validation diagnostics from the most recent
|
|
121
|
+
* `discoverSkills()` call. Skills that fail HARD validation are silently
|
|
122
|
+
* excluded from `buildCapabilityInventory()`; this function exposes the
|
|
123
|
+
* underlying errors so users see WHY a skill is missing instead of just
|
|
124
|
+
* noticing the absence.
|
|
125
|
+
*
|
|
126
|
+
* Soft warnings (unknown props, derived-name fallback) are also returned so
|
|
127
|
+
* skill authors can clean up their frontmatter over time.
|
|
128
|
+
*
|
|
129
|
+
* IMPORTANT: `discoverSkills()` is internally cached for 30s, so this
|
|
130
|
+
* function returns diagnostics from whichever call last populated the cache.
|
|
131
|
+
* Call `buildCapabilityInventory(cwd)` first to ensure a fresh pass.
|
|
132
|
+
*/
|
|
133
|
+
export function buildSkillValidationDiagnostics(): SkillValidationError[] {
|
|
134
|
+
return getLastDiscoveryDiagnostics();
|
|
135
|
+
}
|
package/src/runtime/child-pi.ts
CHANGED
|
@@ -95,6 +95,17 @@ export function killProcessPid(pid: number): void {
|
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
function killProcessTree(pid: number | undefined, child?: ChildProcess): void {
|
|
98
|
+
// Phase-0 diagnostic (HB-003a): capture who invoked killProcessTree so the
|
|
99
|
+
// exit-null race has a provenance trail. .stack is best-effort (may be undefined
|
|
100
|
+
// under deep async), so we take a snapshot lazily.
|
|
101
|
+
try {
|
|
102
|
+
const callerStack = new Error("killProcessTree caller").stack ?? "(no stack)";
|
|
103
|
+
logInternalError(
|
|
104
|
+
"child-pi.kill-process-tree-invoked",
|
|
105
|
+
new Error(`pid=${pid} called from:\n${callerStack.split("\n").slice(0, 8).join("\n")}`),
|
|
106
|
+
`pid=${pid}`,
|
|
107
|
+
);
|
|
108
|
+
} catch { /* diagnostic best-effort */ }
|
|
98
109
|
if (!pid || !Number.isInteger(pid) || pid <= 0) return;
|
|
99
110
|
if (child && child.exitCode !== null) return;
|
|
100
111
|
killProcessPid(pid);
|
|
@@ -124,6 +135,18 @@ export interface ChildPiLifecycleEvent {
|
|
|
124
135
|
stderrExcerpt?: string;
|
|
125
136
|
/** Timestamp (ISO). */
|
|
126
137
|
ts: string;
|
|
138
|
+
/** Phase-0 diagnostic (HB-003a): the signal that killed the child (when
|
|
139
|
+
* available). Was previously discarded after building the error string. */
|
|
140
|
+
signal?: string;
|
|
141
|
+
/** Phase-0 diagnostic (HB-003a): final-drain race timing, present only on
|
|
142
|
+
* exit events where a drain timer was armed. Surfaces the exit-null race. */
|
|
143
|
+
diagnostic?: {
|
|
144
|
+
finalDrainArmed: boolean;
|
|
145
|
+
forcedFinalDrain: boolean;
|
|
146
|
+
finalDrainFiredMonotonicMs?: number;
|
|
147
|
+
finalAssistantEventMonotonicMs?: number;
|
|
148
|
+
exitMonotonicMs: number;
|
|
149
|
+
};
|
|
127
150
|
}
|
|
128
151
|
|
|
129
152
|
export interface ChildPiRunInput {
|
|
@@ -267,6 +290,9 @@ export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): S
|
|
|
267
290
|
"PI_CREW_MAX_DEPTH",
|
|
268
291
|
"PI_CREW_INHERIT_PROJECT_CONTEXT",
|
|
269
292
|
"PI_CREW_INHERIT_SKILLS",
|
|
293
|
+
// PI_CREW_KIND marks this process as a crew sub-agent (vs the user's main session).
|
|
294
|
+
// doctor --zombies matches it to safely list orphaned sub-agents only.
|
|
295
|
+
"PI_CREW_KIND",
|
|
270
296
|
// PI_CREW_PARENT_PID is needed by child-pi's parent-guard (uses
|
|
271
297
|
// process.kill(pid, 0) liveness check). The PID is not a secret.
|
|
272
298
|
"PI_CREW_PARENT_PID",
|
|
@@ -354,7 +380,14 @@ function appendTranscript(input: ChildPiRunInput, line: string): void {
|
|
|
354
380
|
|
|
355
381
|
function compactString(value: string, maxChars = MAX_COMPACT_CONTENT_CHARS): string {
|
|
356
382
|
if (value.length <= maxChars) return value;
|
|
357
|
-
|
|
383
|
+
// L4: head + tail instead of head-only. Keeps closing markdown structure
|
|
384
|
+
// (code fences, headings, list tails) instead of dropping them — the old
|
|
385
|
+
// head-only slice left unclosed ``` fences that downstream parsers and
|
|
386
|
+
// output-validator.ts flagged as "output may be truncated". Head gets 75%
|
|
387
|
+
// (opening structure + bulk of content); tail gets 25% (closing structure).
|
|
388
|
+
const head = Math.floor(maxChars * 0.75);
|
|
389
|
+
const tail = maxChars - head;
|
|
390
|
+
return `${value.slice(0, head)}\n...[pi-crew compacted ${value.length - maxChars} chars, head+tail preserved]...\n${value.slice(-tail)}`;
|
|
358
391
|
}
|
|
359
392
|
|
|
360
393
|
function compactValue(value: unknown): unknown {
|
|
@@ -577,6 +610,15 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
577
610
|
let noResponseTimer: NodeJS.Timeout | undefined;
|
|
578
611
|
const finalDrainMs = input.finalDrainMs ?? FINAL_DRAIN_MS;
|
|
579
612
|
const hardKillMs = input.hardKillMs ?? HARD_KILL_MS;
|
|
613
|
+
// Phase-0 diagnostic (HB-003a): track the final-drain race that produces
|
|
614
|
+
// `exit null` for ctx.agent({disableTools:true}). These vars are READ-ONLY
|
|
615
|
+
// instrumentation — no behavior change. finalDrainArmed lets the close
|
|
616
|
+
// handler know a drain timer existed even after clearFinalDrainTimers() ran;
|
|
617
|
+
// spawnMonotonicMs gives us relative timing to distinguish a race from a crash.
|
|
618
|
+
let finalDrainArmed = false;
|
|
619
|
+
let finalDrainFiredMonotonicMs: number | undefined;
|
|
620
|
+
const spawnMonotonicMs = performance.now();
|
|
621
|
+
let finalAssistantEventMonotonicMs: number | undefined;
|
|
580
622
|
// FIX (Round 14): Bound the env-controlled response timeout to
|
|
581
623
|
// [1_000ms, 3_600_000ms] (1s–1h) so a hostile or accidental value
|
|
582
624
|
// (e.g. 1, or 999_999_999) cannot disable the timeout or cause
|
|
@@ -680,20 +722,27 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
680
722
|
if (maxTurns !== undefined && !softLimitReached && turnCount >= maxTurns) {
|
|
681
723
|
softLimitReached = true;
|
|
682
724
|
// Inject steer via stdin to tell child to wrap up.
|
|
683
|
-
//
|
|
684
|
-
//
|
|
685
|
-
//
|
|
725
|
+
// Steer injection is ADVISORY: it asks the worker to wrap up. The real
|
|
726
|
+
// enforcement is the hard-abort at maxTurns + graceTurns (below). So a
|
|
727
|
+
// failed/non-writable stdin must NOT kill the worker — that destroys a
|
|
728
|
+
// valid answer already in stdout (Phase-0 root cause of the
|
|
729
|
+
// disableTools/maxTurns:1 exit-null bug). Just log + let the hard-abort
|
|
730
|
+
// path handle a genuinely runaway worker.
|
|
686
731
|
if (child.stdin?.writable) {
|
|
687
732
|
const steerPayload = JSON.stringify({ type: "steer", message: "You have reached your turn limit. Wrap up immediately — provide your final answer now." }) + "\n";
|
|
688
733
|
const writeSucceeded = child.stdin.write(steerPayload);
|
|
689
734
|
if (!writeSucceeded) {
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
735
|
+
// Normal Node backpressure: the payload is buffered and will flush on
|
|
736
|
+
// 'drain'. NOT a failure — do NOT kill the worker. The steer is
|
|
737
|
+
// advisory; if the worker ignores it and runs past maxTurns +
|
|
738
|
+
// graceTurns, the hard-abort below terminates it.
|
|
739
|
+
logInternalError("child-pi.steer-backpressure", new Error("stdin write returned false (normal backpressure); steer buffered, worker NOT killed"), `pid=${child.pid}`);
|
|
693
740
|
}
|
|
694
741
|
} else {
|
|
695
|
-
|
|
696
|
-
|
|
742
|
+
// stdin closed (worker already finished) or otherwise unwritable.
|
|
743
|
+
// Also advisory — the worker is done or nearly done; let it exit
|
|
744
|
+
// naturally. Hard-abort remains the safety net for true runaways.
|
|
745
|
+
logInternalError("child-pi.steer-not-writable", new Error("stdin not writable when attempting steer injection (worker may be done); worker NOT killed"), `pid=${child.pid}`);
|
|
697
746
|
}
|
|
698
747
|
} else if (maxTurns !== undefined && softLimitReached && turnCount >= maxTurns + (graceTurns ?? 5)) {
|
|
699
748
|
// Hard abort — terminate after grace turns
|
|
@@ -708,9 +757,12 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
708
757
|
}
|
|
709
758
|
input.onJsonEvent?.(event);
|
|
710
759
|
if (!isFinalAssistantEvent(event) || childExited || settled || finalDrainTimer) return;
|
|
760
|
+
finalAssistantEventMonotonicMs = performance.now();
|
|
761
|
+
finalDrainArmed = true; // Phase-0 diagnostic: track that a drain timer was created.
|
|
711
762
|
finalDrainTimer = setTimeout(() => {
|
|
712
763
|
if (settled || childExited) return;
|
|
713
764
|
forcedFinalDrain = true;
|
|
765
|
+
finalDrainFiredMonotonicMs = performance.now(); // Phase-0 diagnostic: race timing.
|
|
714
766
|
input.onLifecycleEvent?.({ type: "final_drain", pid: child.pid, ts: new Date().toISOString() });
|
|
715
767
|
try {
|
|
716
768
|
child.kill(process.platform === "win32" ? undefined : "SIGTERM");
|
|
@@ -765,7 +817,27 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
765
817
|
}
|
|
766
818
|
// Catch all errors from settle to prevent unhandled rejection from propagating
|
|
767
819
|
try {
|
|
768
|
-
resolve({
|
|
820
|
+
resolve({
|
|
821
|
+
...result,
|
|
822
|
+
exitStatus: result.exitStatus ?? {
|
|
823
|
+
exitCode: result.exitCode,
|
|
824
|
+
cancelled: abortRequested,
|
|
825
|
+
timedOut: responseTimeoutHit,
|
|
826
|
+
killed: hardKilled,
|
|
827
|
+
// Phase-0 diagnostic (HB-003a): surface the final-drain race state.
|
|
828
|
+
// finalDrainArmed lets Phase 1 decide whether a signal-death (exitCode=null)
|
|
829
|
+
// should be treated as a forced final drain. READ-ONLY for now.
|
|
830
|
+
...(finalDrainArmed || forcedFinalDrain
|
|
831
|
+
? {
|
|
832
|
+
finalDrainArmed,
|
|
833
|
+
forcedFinalDrain,
|
|
834
|
+
finalDrainFiredMonotonicMs,
|
|
835
|
+
}
|
|
836
|
+
: {}),
|
|
837
|
+
cleanupErrors,
|
|
838
|
+
finalDrainMs,
|
|
839
|
+
},
|
|
840
|
+
});
|
|
769
841
|
} catch (resolveError) {
|
|
770
842
|
logInternalError("child-pi.settle-resolve", resolveError, `result=${JSON.stringify({ exitCode: result.exitCode })}`);
|
|
771
843
|
}
|
|
@@ -866,7 +938,30 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
866
938
|
rejectPendingOperations(exitError);
|
|
867
939
|
}
|
|
868
940
|
try {
|
|
869
|
-
|
|
941
|
+
// Phase-0 diagnostic (HB-003a): capture signal + drain timing in the
|
|
942
|
+
// exit lifecycle event so the exit-null race is diagnosable instead of
|
|
943
|
+
// opaque. `signal` was previously discarded after building the error msg.
|
|
944
|
+
input.onLifecycleEvent?.({
|
|
945
|
+
type: "exit",
|
|
946
|
+
pid: child.pid,
|
|
947
|
+
exitCode: code,
|
|
948
|
+
ts: new Date().toISOString(),
|
|
949
|
+
error: exitError?.message,
|
|
950
|
+
stderrExcerpt: isUnexpectedExit ? stderr.slice(-1000) || undefined : undefined,
|
|
951
|
+
// Phase-0 diagnostic fields (kept optional — no type change required).
|
|
952
|
+
...(signal ? { signal } : {}),
|
|
953
|
+
...(finalDrainArmed || forcedFinalDrain
|
|
954
|
+
? {
|
|
955
|
+
diagnostic: {
|
|
956
|
+
finalDrainArmed,
|
|
957
|
+
forcedFinalDrain,
|
|
958
|
+
finalDrainFiredMonotonicMs,
|
|
959
|
+
finalAssistantEventMonotonicMs,
|
|
960
|
+
exitMonotonicMs: performance.now() - spawnMonotonicMs,
|
|
961
|
+
},
|
|
962
|
+
}
|
|
963
|
+
: {}),
|
|
964
|
+
});
|
|
870
965
|
} catch (err) {
|
|
871
966
|
logInternalError("child-pi.on-lifecycle-event", err, `event=exit, pid=${child.pid}`);
|
|
872
967
|
}
|
|
@@ -902,6 +997,9 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
902
997
|
const finalExitCode = forcedFinalDrain && !timeoutError ? 0 : exitCode;
|
|
903
998
|
const wasGraceAborted = softLimitReached && turnCount >= (maxTurns ?? 0) + (graceTurns ?? 5);
|
|
904
999
|
const wasParentAborted = abortDueToParentSignal && !wasGraceAborted;
|
|
1000
|
+
// steerInjectionFailed is now always false (Phase-1 fix: steer backpressure
|
|
1001
|
+
// is logged, not fatal). The steerError branch is retained for safety in
|
|
1002
|
+
// case a future change reintroduces a fatal steer path.
|
|
905
1003
|
const steerError = steerInjectionFailed ? "Steer injection failed due to stdin backpressure; process killed" : undefined;
|
|
906
1004
|
settle({ exitCode: finalExitCode, stdout, stderr, ...(timeoutError ? { error: timeoutError.error } : {}), ...(steerError ? { error: steerError } : {}), aborted: wasGraceAborted || wasParentAborted, steered: softLimitReached && !wasGraceAborted, exitStatus: { exitCode: finalExitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
|
|
907
1005
|
});
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deterministic-ast.ts — AST-based determinism enforcement for dynamic-workflow scripts (round-13 P0-2).
|
|
3
|
+
*
|
|
4
|
+
* Rejects `Date.now()`, `Math.random()`, and `new Date()` at workflow-load time
|
|
5
|
+
* using a true AST walk (not regex) so that:
|
|
6
|
+
* - Prompts mentioning "Date.now()" as string literals are accepted.
|
|
7
|
+
* - Comments containing "Date.now()" are accepted.
|
|
8
|
+
* - `Date.parse()`, `Date.UTC()`, `Math.floor()`, etc. are accepted (only `now` and `random` are blocked).
|
|
9
|
+
*
|
|
10
|
+
* Adapted from pi-dynamic-workflows/src/workflow.ts (MIT) — see NOTICE.md.
|
|
11
|
+
*
|
|
12
|
+
* The walker uses acorn's parse() with permissive flags (allowAwaitOutsideFunction,
|
|
13
|
+
* allowReturnOutsideFunction) so we don't reject perfectly valid workflow scripts
|
|
14
|
+
* that contain top-level `await` or `return`.
|
|
15
|
+
*
|
|
16
|
+
* On parse error, this function returns silently: jiti will surface a clearer
|
|
17
|
+
* parse error downstream. We don't double-report parse errors.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { parse } from "acorn";
|
|
21
|
+
|
|
22
|
+
const NONDETERMINISM_ERROR =
|
|
23
|
+
"Workflow scripts must be deterministic: Date.now()/Math.random()/new Date() are unavailable. These introduce non-reproducible behavior across runs. Use ctx.vars for cached state, or pass a fixed seed via ctx.setArgs(). To bypass this check (escape hatch), set PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1.";
|
|
24
|
+
|
|
25
|
+
export class DeterminismError extends Error {
|
|
26
|
+
constructor() {
|
|
27
|
+
super(NONDETERMINISM_ERROR);
|
|
28
|
+
this.name = "DeterminismError";
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Parse `script` and walk the AST looking for non-deterministic calls.
|
|
34
|
+
* Throws DeterminismError on the first hit. Silently returns on parse error
|
|
35
|
+
* (jiti will produce a clearer message downstream).
|
|
36
|
+
*/
|
|
37
|
+
export function assertDeterministicScript(script: string): void {
|
|
38
|
+
let ast: AstNode;
|
|
39
|
+
try {
|
|
40
|
+
ast = parse(script, {
|
|
41
|
+
ecmaVersion: "latest",
|
|
42
|
+
sourceType: "module",
|
|
43
|
+
allowAwaitOutsideFunction: true,
|
|
44
|
+
allowReturnOutsideFunction: true,
|
|
45
|
+
ranges: false,
|
|
46
|
+
}) as unknown as AstNode;
|
|
47
|
+
} catch {
|
|
48
|
+
// Parse errors are handled by jiti downstream — don't double-report.
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
assertDeterministicAst(ast);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Escape hatch: when PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1 the check is bypassed.
|
|
56
|
+
* Power users may need this when a workflow legitimately depends on time/random
|
|
57
|
+
* (e.g. randomized benchmark scripts).
|
|
58
|
+
*/
|
|
59
|
+
export function isDeterminismCheckEnabled(): boolean {
|
|
60
|
+
return process.env.PI_CREW_DWF_SKIP_DETERMINISM_CHECK !== "1";
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// AST walker
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
interface AstNode {
|
|
68
|
+
type: string;
|
|
69
|
+
[key: string]: unknown;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function asAstNode(value: unknown): AstNode | undefined {
|
|
73
|
+
if (!value || typeof value !== "object") return undefined;
|
|
74
|
+
const obj = value as Record<string, unknown>;
|
|
75
|
+
if (typeof obj.type !== "string") return undefined;
|
|
76
|
+
return obj as AstNode;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function astChildren(node: AstNode): AstNode[] {
|
|
80
|
+
const out: AstNode[] = [];
|
|
81
|
+
for (const value of Object.values(node)) {
|
|
82
|
+
if (Array.isArray(value)) {
|
|
83
|
+
for (const item of value) {
|
|
84
|
+
const child = asAstNode(item);
|
|
85
|
+
if (child) out.push(child);
|
|
86
|
+
}
|
|
87
|
+
} else {
|
|
88
|
+
const child = asAstNode(value);
|
|
89
|
+
if (child) out.push(child);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return out;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function assertDeterministicAst(node: AstNode): void {
|
|
96
|
+
if (isDateNowCall(node) || isMathRandomCall(node) || isNewDateExpression(node)) {
|
|
97
|
+
throw new DeterminismError();
|
|
98
|
+
}
|
|
99
|
+
for (const child of astChildren(node)) assertDeterministicAst(child);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function isDateNowCall(node: AstNode): boolean {
|
|
103
|
+
return node.type === "CallExpression" && isMemberExpression(node, "callee", "Date", "now");
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function isMathRandomCall(node: AstNode): boolean {
|
|
107
|
+
return node.type === "CallExpression" && isMemberExpression(node, "callee", "Math", "random");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function isNewDateExpression(node: AstNode): boolean {
|
|
111
|
+
if (node.type !== "NewExpression") return false;
|
|
112
|
+
const callee = asAstNode(node.callee);
|
|
113
|
+
return callee?.type === "Identifier" && callee.name === "Date";
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Test whether `node[childKey]` is a MemberExpression of shape `objectName.propertyName`,
|
|
118
|
+
* where the property is either a static Identifier or a resolvable static string.
|
|
119
|
+
* `childKey` is the property name on `node` (usually "callee" for CallExpression).
|
|
120
|
+
*/
|
|
121
|
+
function isMemberExpression(node: AstNode, childKey: string, objectName: string, propertyName: string): boolean {
|
|
122
|
+
const child = asAstNode(node[childKey]);
|
|
123
|
+
if (!child || child.type !== "MemberExpression") return false;
|
|
124
|
+
const object = asAstNode(child.object);
|
|
125
|
+
if (!object || object.type !== "Identifier" || object.name !== objectName) return false;
|
|
126
|
+
return propertyNameOf(child) === propertyName;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function propertyNameOf(node: AstNode): string | undefined {
|
|
130
|
+
const computed = node.computed === true;
|
|
131
|
+
const property = asAstNode(node.property);
|
|
132
|
+
if (!property) return undefined;
|
|
133
|
+
if (!computed && property.type === "Identifier") {
|
|
134
|
+
return property.name as string | undefined;
|
|
135
|
+
}
|
|
136
|
+
return staticStringOf(property);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function staticStringOf(node: AstNode | undefined): string | undefined {
|
|
140
|
+
if (!node) return undefined;
|
|
141
|
+
if (node.type === "Literal" && typeof node.value === "string") return node.value;
|
|
142
|
+
if (node.type === "TemplateLiteral") {
|
|
143
|
+
const expressions = node.expressions;
|
|
144
|
+
if (Array.isArray(expressions) && expressions.length > 0) return undefined;
|
|
145
|
+
const quasis = node.quasis;
|
|
146
|
+
if (!Array.isArray(quasis)) return undefined;
|
|
147
|
+
return quasis
|
|
148
|
+
.map((q) => {
|
|
149
|
+
const quasi = asAstNode(q);
|
|
150
|
+
const value = quasi?.value as { cooked?: string; raw?: string } | undefined;
|
|
151
|
+
return value?.cooked ?? value?.raw ?? "";
|
|
152
|
+
})
|
|
153
|
+
.join("");
|
|
154
|
+
}
|
|
155
|
+
if (node.type === "BinaryExpression" && node.operator === "+") {
|
|
156
|
+
const left = staticStringOf(asAstNode(node.left));
|
|
157
|
+
const right = staticStringOf(asAstNode(node.right));
|
|
158
|
+
if (left !== undefined && right !== undefined) return left + right;
|
|
159
|
+
}
|
|
160
|
+
return undefined;
|
|
161
|
+
}
|