pi-crew 0.1.49 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -1
- package/README.md +176 -781
- package/agents/analyst.md +11 -11
- package/agents/critic.md +11 -11
- package/agents/executor.md +11 -11
- package/agents/explorer.md +11 -11
- package/agents/planner.md +11 -11
- package/agents/reviewer.md +11 -11
- package/agents/security-reviewer.md +11 -11
- package/agents/test-engineer.md +11 -11
- package/agents/verifier.md +70 -11
- package/agents/writer.md +11 -11
- package/docs/actions-reference.md +595 -0
- package/docs/commands-reference.md +347 -0
- package/docs/runtime-flow.md +148 -148
- package/index.ts +6 -6
- package/package.json +99 -99
- package/skills/async-worker-recovery/SKILL.md +42 -42
- package/skills/context-artifact-hygiene/SKILL.md +52 -52
- package/skills/delegation-patterns/SKILL.md +54 -54
- package/skills/mailbox-interactive/SKILL.md +40 -40
- package/skills/model-routing-context/SKILL.md +39 -39
- package/skills/multi-perspective-review/SKILL.md +58 -58
- package/skills/observability-reliability/SKILL.md +41 -41
- package/skills/orchestration/SKILL.md +157 -157
- package/skills/ownership-session-security/SKILL.md +41 -41
- package/skills/pi-extension-lifecycle/SKILL.md +39 -39
- package/skills/requirements-to-task-packet/SKILL.md +63 -63
- package/skills/resource-discovery-config/SKILL.md +41 -41
- package/skills/runtime-state-reader/SKILL.md +44 -44
- package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
- package/skills/state-mutation-locking/SKILL.md +42 -42
- package/skills/systematic-debugging/SKILL.md +67 -67
- package/skills/ui-render-performance/SKILL.md +39 -39
- package/skills/verification-before-done/SKILL.md +57 -57
- package/skills/worktree-isolation/SKILL.md +39 -39
- package/src/adapters/claude-adapter.ts +25 -0
- package/src/adapters/codex-adapter.ts +21 -0
- package/src/adapters/cursor-adapter.ts +17 -0
- package/src/adapters/export-util.ts +137 -0
- package/src/adapters/index.ts +15 -0
- package/src/adapters/registry.ts +18 -0
- package/src/adapters/types.ts +23 -0
- package/src/agents/agent-config.ts +2 -0
- package/src/agents/agent-search.ts +98 -98
- package/src/agents/discover-agents.ts +2 -1
- package/src/config/config.ts +14 -1
- package/src/config/defaults.ts +5 -5
- package/src/config/drift-detector.ts +211 -0
- package/src/config/markers.ts +327 -0
- package/src/config/resilient-parser.ts +108 -0
- package/src/config/suggestions.ts +74 -0
- package/src/extension/cross-extension-rpc.ts +103 -82
- package/src/extension/project-init.ts +36 -4
- package/src/extension/register.ts +67 -22
- package/src/extension/registration/commands.ts +77 -8
- package/src/extension/registration/subagent-tools.ts +10 -1
- package/src/extension/registration/team-tool.ts +10 -1
- package/src/extension/registration/viewers.ts +48 -34
- package/src/extension/run-bundle-schema.ts +89 -89
- package/src/extension/run-export.ts +26 -12
- package/src/extension/run-import.ts +25 -1
- package/src/extension/run-index.ts +5 -1
- package/src/extension/run-maintenance.ts +142 -68
- package/src/extension/team-manager-command.ts +10 -1
- package/src/extension/team-tool/context.ts +1 -1
- package/src/extension/team-tool/doctor.ts +28 -3
- package/src/extension/team-tool/handle-settings.ts +195 -188
- package/src/extension/team-tool/inspect.ts +41 -41
- package/src/extension/team-tool/intent-policy.ts +42 -42
- package/src/extension/team-tool/lifecycle-actions.ts +27 -8
- package/src/extension/team-tool/plan.ts +19 -19
- package/src/extension/team-tool/run.ts +12 -1
- package/src/extension/team-tool.ts +14 -3
- package/src/i18n.ts +184 -184
- package/src/observability/exporters/otlp-exporter.ts +92 -77
- package/src/prompt/prompt-runtime.ts +72 -72
- package/src/runtime/agent-memory.ts +72 -72
- package/src/runtime/agent-observability.ts +114 -114
- package/src/runtime/async-marker.ts +26 -26
- package/src/runtime/attention-events.ts +28 -28
- package/src/runtime/auto-resume.ts +100 -0
- package/src/runtime/background-runner.ts +11 -1
- package/src/runtime/cancellation-token.ts +89 -89
- package/src/runtime/cancellation.ts +61 -61
- package/src/runtime/capability-inventory.ts +116 -116
- package/src/runtime/child-pi.ts +7 -2
- package/src/runtime/compaction-summary.ts +271 -0
- package/src/runtime/completion-guard.ts +190 -190
- package/src/runtime/concurrency.ts +3 -1
- package/src/runtime/crash-recovery.ts +33 -0
- package/src/runtime/delta-conflict.ts +360 -0
- package/src/runtime/diagnostic-export.ts +3 -1
- package/src/runtime/direct-run.ts +35 -35
- package/src/runtime/event-stream-bridge.ts +3 -1
- package/src/runtime/foreground-control.ts +82 -82
- package/src/runtime/green-contract.ts +46 -46
- package/src/runtime/group-join.ts +106 -106
- package/src/runtime/heartbeat-gradient.ts +28 -28
- package/src/runtime/heartbeat-watcher.ts +124 -124
- package/src/runtime/iteration-hooks.ts +262 -0
- package/src/runtime/live-agent-control.ts +88 -88
- package/src/runtime/live-control-realtime.ts +36 -36
- package/src/runtime/live-extension-bridge.ts +150 -150
- package/src/runtime/live-irc.ts +92 -92
- package/src/runtime/live-session-health.ts +100 -100
- package/src/runtime/loop-gates.ts +129 -0
- package/src/runtime/metric-parser.ts +40 -0
- package/src/runtime/notebook-helpers.ts +90 -90
- package/src/runtime/orphan-sentinel.ts +7 -7
- package/src/runtime/parallel-research.ts +44 -44
- package/src/runtime/phase-progress.ts +217 -0
- package/src/runtime/pi-args.ts +38 -2
- package/src/runtime/pi-json-output.ts +111 -111
- package/src/runtime/pi-spawn.ts +74 -6
- package/src/runtime/policy-engine.ts +79 -79
- package/src/runtime/post-checks.ts +122 -0
- package/src/runtime/process-status.ts +14 -1
- package/src/runtime/progress-event-coalescer.ts +43 -43
- package/src/runtime/prose-compressor.ts +164 -164
- package/src/runtime/recovery-recipes.ts +74 -74
- package/src/runtime/result-extractor.ts +121 -121
- package/src/runtime/role-permission.ts +39 -39
- package/src/runtime/sensitive-paths.ts +3 -3
- package/src/runtime/session-resources.ts +25 -25
- package/src/runtime/session-snapshot.ts +59 -59
- package/src/runtime/session-usage.ts +79 -79
- package/src/runtime/sidechain-output.ts +29 -29
- package/src/runtime/stream-preview.ts +177 -177
- package/src/runtime/supervisor-contact.ts +59 -59
- package/src/runtime/task-display.ts +38 -38
- package/src/runtime/task-graph.ts +207 -0
- package/src/runtime/task-quality.ts +207 -0
- package/src/runtime/task-runner/capabilities.ts +78 -78
- package/src/runtime/task-runner/live-executor.ts +7 -1
- package/src/runtime/task-runner/progress.ts +119 -119
- package/src/runtime/task-runner/prompt-builder.ts +1 -1
- package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
- package/src/runtime/task-runner/result-utils.ts +14 -14
- package/src/runtime/task-runner/run-projection.ts +103 -103
- package/src/runtime/task-runner/state-helpers.ts +22 -22
- package/src/runtime/team-runner.ts +126 -7
- package/src/runtime/worker-heartbeat.ts +21 -21
- package/src/runtime/worker-startup.ts +57 -57
- package/src/runtime/workflow-state.ts +187 -0
- package/src/runtime/workspace-tree.ts +298 -298
- package/src/schema/config-schema.ts +12 -0
- package/src/schema/validation-types.ts +148 -0
- package/src/skills/skill-templates.ts +374 -0
- package/src/state/active-run-registry.ts +35 -11
- package/src/state/atomic-write.ts +33 -26
- package/src/state/contracts.ts +1 -0
- package/src/state/event-reconstructor.ts +217 -0
- package/src/state/locks.ts +2 -11
- package/src/state/mailbox.ts +4 -3
- package/src/state/state-store.ts +32 -14
- package/src/state/task-claims.ts +44 -44
- package/src/state/types.ts +9 -0
- package/src/state/usage.ts +29 -29
- package/src/subagents/async-entry.ts +1 -1
- package/src/subagents/index.ts +3 -3
- package/src/subagents/live/control.ts +1 -1
- package/src/subagents/live/manager.ts +1 -1
- package/src/subagents/live/realtime.ts +1 -1
- package/src/subagents/live/session-runtime.ts +1 -1
- package/src/subagents/manager.ts +1 -1
- package/src/subagents/spawn.ts +1 -1
- package/src/teams/team-serializer.ts +38 -38
- package/src/types/diff.d.ts +18 -18
- package/src/ui/crew-footer.ts +101 -101
- package/src/ui/crew-select-list.ts +111 -111
- package/src/ui/crew-widget.ts +9 -4
- package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
- package/src/ui/dashboard-panes/capability-pane.ts +59 -59
- package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
- package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
- package/src/ui/dashboard-panes/progress-pane.ts +11 -0
- package/src/ui/dynamic-border.ts +25 -25
- package/src/ui/layout-primitives.ts +106 -106
- package/src/ui/loaders.ts +158 -158
- package/src/ui/powerbar-publisher.ts +6 -0
- package/src/ui/render-coalescer.ts +51 -51
- package/src/ui/render-diff.ts +119 -119
- package/src/ui/render-scheduler.ts +143 -143
- package/src/ui/run-action-dispatcher.ts +10 -1
- package/src/ui/spinner.ts +17 -17
- package/src/ui/status-colors.ts +58 -58
- package/src/ui/syntax-highlight.ts +116 -116
- package/src/ui/transcript-entries.ts +258 -258
- package/src/utils/completion-dedupe.ts +63 -63
- package/src/utils/frontmatter.ts +68 -68
- package/src/utils/git.ts +262 -262
- package/src/utils/ids.ts +17 -17
- package/src/utils/incremental-reader.ts +104 -104
- package/src/utils/names.ts +27 -27
- package/src/utils/redaction.ts +44 -44
- package/src/utils/safe-paths.ts +47 -47
- package/src/utils/scan-cache.ts +136 -136
- package/src/utils/sleep.ts +40 -26
- package/src/utils/task-name-generator.ts +337 -337
- package/src/workflows/validate-workflow.ts +40 -40
- package/src/worktree/branch-freshness.ts +45 -45
- package/src/worktree/worktree-manager.ts +11 -3
- package/teams/default.team.md +12 -12
- package/teams/fast-fix.team.md +11 -11
- package/teams/implementation.team.md +18 -18
- package/teams/parallel-research.team.md +14 -14
- package/teams/research.team.md +11 -11
- package/teams/review.team.md +12 -12
- package/workflows/default.workflow.md +30 -29
- package/workflows/fast-fix.workflow.md +23 -22
- package/workflows/implementation.workflow.md +43 -38
- package/workflows/parallel-research.workflow.md +46 -46
- package/workflows/research.workflow.md +22 -22
- package/workflows/review.workflow.md +30 -30
- package/docs/refactor-tasks-phase3.md +0 -394
- package/docs/refactor-tasks-phase4.md +0 -564
- package/docs/refactor-tasks-phase5.md +0 -402
- package/docs/refactor-tasks-phase6.md +0 -662
- package/docs/refactor-tasks.md +0 -1484
- package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +0 -261
- package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +0 -111
- package/docs/research/AUDIT_OH_MY_PI.md +0 -261
- package/docs/research/AUDIT_PI_CREW.md +0 -457
- package/docs/research/CAVEMAN-DEEP-RESEARCH.md +0 -281
- package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +0 -264
- package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +0 -343
- package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +0 -480
- package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +0 -354
- package/docs/research/IMPLEMENTATION_PLAN.md +0 -385
- package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +0 -502
- package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +0 -266
- package/docs/research/REMAINING-GAPS-PLAN.md +0 -363
- package/docs/research/SESSION-SUMMARY-2026-05-08.md +0 -146
- package/docs/research/UI-RESPONSIVENESS-AUDIT.md +0 -173
- package/docs/research-awesome-agent-skills-distillation.md +0 -100
- package/docs/research-extension-examples.md +0 -297
- package/docs/research-extension-system.md +0 -324
- package/docs/research-oh-my-pi-distillation.md +0 -369
- package/docs/research-optimization-plan.md +0 -548
- package/docs/research-phase10-distillation.md +0 -199
- package/docs/research-phase11-distillation.md +0 -201
- package/docs/research-phase8-operator-experience-plan.md +0 -819
- package/docs/research-phase9-observability-reliability-plan.md +0 -1190
- package/docs/research-pi-coding-agent.md +0 -357
- package/docs/research-source-pi-crew-reference.md +0 -174
- package/docs/research-ui-optimization-plan.md +0 -480
- package/docs/source-runtime-refactor-map.md +0 -107
- package/src/utils/atomic-write.ts +0 -33
package/src/runtime/child-pi.ts
CHANGED
|
@@ -8,7 +8,7 @@ import { getPiSpawnCommand } from "./pi-spawn.ts";
|
|
|
8
8
|
import { DEFAULT_CHILD_PI } from "../config/defaults.ts";
|
|
9
9
|
import { logInternalError } from "../utils/internal-error.ts";
|
|
10
10
|
import { attachPostExitStdioGuard, trySignalChild } from "./post-exit-stdio-guard.ts";
|
|
11
|
-
import { redactJsonLine } from "../utils/redaction.ts";
|
|
11
|
+
import { redactJsonLine, SECRET_KEY_PATTERN } from "../utils/redaction.ts";
|
|
12
12
|
|
|
13
13
|
const POST_EXIT_STDIO_GUARD_MS = DEFAULT_CHILD_PI.postExitStdioGuardMs;
|
|
14
14
|
const FINAL_DRAIN_MS = DEFAULT_CHILD_PI.finalDrainMs;
|
|
@@ -110,9 +110,14 @@ export interface ChildPiRunResult {
|
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): SpawnOptions {
|
|
113
|
+
// Filter out env vars whose keys match secret patterns to avoid leaking credentials to child processes
|
|
114
|
+
const filteredEnv: Record<string, string> = {};
|
|
115
|
+
for (const [key, value] of Object.entries(env)) {
|
|
116
|
+
if (value !== undefined && !SECRET_KEY_PATTERN.test(key)) filteredEnv[key] = value;
|
|
117
|
+
}
|
|
113
118
|
return {
|
|
114
119
|
cwd,
|
|
115
|
-
env: { ...
|
|
120
|
+
env: { ...filteredEnv, PI_CREW_PARENT_PID: String(process.pid) },
|
|
116
121
|
stdio: ["ignore", "pipe", "pipe"],
|
|
117
122
|
detached: process.platform !== "win32",
|
|
118
123
|
windowsHide: true,
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic compaction summary — builds a markdown summary of a pi-crew run
|
|
3
|
+
* from manifest.json, tasks.json, and the tail of events.jsonl.
|
|
4
|
+
*
|
|
5
|
+
* Distilled from pi-autoresearch's compaction-summary pattern.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
class GiantLineFallbackError extends Error { constructor() { super("GIANT_LINE_FALLBACK"); this.name = "GiantLineFallbackError"; } }
|
|
9
|
+
|
|
10
|
+
import * as fs from "node:fs";
|
|
11
|
+
import * as path from "node:path";
|
|
12
|
+
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
|
13
|
+
import { readJsonFile } from "../state/atomic-write.ts";
|
|
14
|
+
import type { TeamEvent } from "../state/event-log.ts";
|
|
15
|
+
|
|
16
|
+
/** Maximum number of events to read from the tail of events.jsonl. */
|
|
17
|
+
const MAX_TAIL_EVENTS = 100;
|
|
18
|
+
|
|
19
|
+
/** Maximum number of completed tasks to include in the "Recent Results" section. */
|
|
20
|
+
const MAX_RECENT_RESULTS = 10;
|
|
21
|
+
|
|
22
|
+
/** Paths relevant to building a compaction summary for a run. */
|
|
23
|
+
export interface SummaryPaths {
|
|
24
|
+
manifestPath: string;
|
|
25
|
+
tasksPath: string;
|
|
26
|
+
eventsPath: string;
|
|
27
|
+
stateRoot: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Derive the standard summary-relevant paths from a state root directory.
|
|
32
|
+
* Mirrors pi-autoresearch's `autoresearchSummaryPathsFor()`.
|
|
33
|
+
*/
|
|
34
|
+
export function summaryPathsFor(stateRoot: string): SummaryPaths {
|
|
35
|
+
return {
|
|
36
|
+
stateRoot,
|
|
37
|
+
manifestPath: path.join(stateRoot, "manifest.json"),
|
|
38
|
+
tasksPath: path.join(stateRoot, "tasks.json"),
|
|
39
|
+
eventsPath: path.join(stateRoot, "events.jsonl"),
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Read the last N lines from a text file.
|
|
45
|
+
* Uses reverse buffer reading to avoid loading the entire file into memory.
|
|
46
|
+
* For files larger than TAIL_MAX_READ bytes, only the last chunk is read.
|
|
47
|
+
*/
|
|
48
|
+
const TAIL_MAX_READ = 256 * 1024; // 256KB — enough for ~1000 lines of JSONL
|
|
49
|
+
const MAX_FALLBACK_READ = 2 * 1024 * 1024; // 2MB — safety limit for giant-line fallback
|
|
50
|
+
function readTailLines(filePath: string, maxLines: number): string[] {
|
|
51
|
+
if (!fs.existsSync(filePath)) return [];
|
|
52
|
+
try {
|
|
53
|
+
const stat = fs.statSync(filePath);
|
|
54
|
+
const fileSize = stat.size;
|
|
55
|
+
if (fileSize === 0) return [];
|
|
56
|
+
|
|
57
|
+
// For small files, just read everything
|
|
58
|
+
if (fileSize <= TAIL_MAX_READ) {
|
|
59
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
60
|
+
return content.split("\n").filter((line) => line.trim().length > 0).slice(-maxLines);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// For large files, read only the last chunk.
|
|
64
|
+
// Search backwards from the read boundary for a newline to avoid
|
|
65
|
+
// splitting mid-line or mid-multibyte UTF-8 character.
|
|
66
|
+
const fd = fs.openSync(filePath, "r");
|
|
67
|
+
try {
|
|
68
|
+
const readSize = Math.min(fileSize, TAIL_MAX_READ);
|
|
69
|
+
const readOffset = fileSize - readSize;
|
|
70
|
+
const buf = Buffer.alloc(readSize);
|
|
71
|
+
fs.readSync(fd, buf, 0, readSize, readOffset);
|
|
72
|
+
|
|
73
|
+
// Find the first newline in the buffer to avoid partial lines.
|
|
74
|
+
// This also handles multibyte UTF-8 safety — we start after a
|
|
75
|
+
// newline boundary which is always a clean character boundary.
|
|
76
|
+
let start = 0;
|
|
77
|
+
if (readOffset > 0) {
|
|
78
|
+
const firstNewline = buf.indexOf("\n");
|
|
79
|
+
if (firstNewline >= 0) {
|
|
80
|
+
start = firstNewline + 1;
|
|
81
|
+
} else {
|
|
82
|
+
// No newline found in the entire tail chunk — single giant line.
|
|
83
|
+
// Fall back to reading the full file to avoid data loss.
|
|
84
|
+
// Note: fd will be closed by the outer finally block.
|
|
85
|
+
throw new GiantLineFallbackError();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const content = buf.toString("utf-8", start, readSize);
|
|
90
|
+
const lines = content.split("\n").filter((line) => line.trim().length > 0);
|
|
91
|
+
return lines.slice(-maxLines);
|
|
92
|
+
} finally {
|
|
93
|
+
fs.closeSync(fd);
|
|
94
|
+
}
|
|
95
|
+
} catch (err) {
|
|
96
|
+
// Giant-line fallback: fd already closed by finally above.
|
|
97
|
+
if (err instanceof GiantLineFallbackError) {
|
|
98
|
+
const stat = fs.statSync(filePath);
|
|
99
|
+
if (stat.size > MAX_FALLBACK_READ) return [];
|
|
100
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
101
|
+
return content.split("\n").filter((line) => line.trim().length > 0).slice(-maxLines);
|
|
102
|
+
}
|
|
103
|
+
return [];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Parse JSONL lines into TeamEvent objects, skipping malformed lines.
|
|
109
|
+
*/
|
|
110
|
+
function parseEvents(lines: string[]): TeamEvent[] {
|
|
111
|
+
const events: TeamEvent[] = [];
|
|
112
|
+
for (const line of lines) {
|
|
113
|
+
try {
|
|
114
|
+
const parsed = JSON.parse(line.trim());
|
|
115
|
+
if (typeof parsed === "object" && parsed !== null && typeof parsed.type === "string" && typeof parsed.runId === "string") {
|
|
116
|
+
events.push(parsed as TeamEvent);
|
|
117
|
+
}
|
|
118
|
+
} catch {
|
|
119
|
+
// Skip malformed lines
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return events;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Compute a human-readable duration between two ISO timestamp strings.
|
|
127
|
+
*/
|
|
128
|
+
function formatDuration(startIso?: string, endIso?: string): string {
|
|
129
|
+
if (!startIso) return "—";
|
|
130
|
+
const start = new Date(startIso).getTime();
|
|
131
|
+
if (Number.isNaN(start)) return "—";
|
|
132
|
+
const end = endIso ? new Date(endIso).getTime() : Date.now();
|
|
133
|
+
if (Number.isNaN(end)) return "—";
|
|
134
|
+
const diffMs = end - start;
|
|
135
|
+
if (diffMs < 0) return "—";
|
|
136
|
+
const seconds = Math.floor(diffMs / 1000);
|
|
137
|
+
if (seconds < 60) return `${seconds}s`;
|
|
138
|
+
const minutes = Math.floor(seconds / 60);
|
|
139
|
+
const remainingSeconds = seconds % 60;
|
|
140
|
+
if (minutes < 60) return `${minutes}m ${remainingSeconds}s`;
|
|
141
|
+
const hours = Math.floor(minutes / 60);
|
|
142
|
+
const remainingMinutes = minutes % 60;
|
|
143
|
+
return `${hours}h ${remainingMinutes}m`;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Build a deterministic compaction summary for a pi-crew run.
|
|
148
|
+
*
|
|
149
|
+
* Reads manifest.json, tasks.json, and the tail of events.jsonl to produce
|
|
150
|
+
* a self-contained markdown summary suitable for context injection.
|
|
151
|
+
*
|
|
152
|
+
* @param stateRoot - Path to the run's state root directory
|
|
153
|
+
* @returns Markdown-formatted compaction summary
|
|
154
|
+
*/
|
|
155
|
+
export function buildCompactionSummary(stateRoot: string): string {
|
|
156
|
+
const paths = summaryPathsFor(stateRoot);
|
|
157
|
+
|
|
158
|
+
// Read manifest
|
|
159
|
+
const manifest = readJsonFile<TeamRunManifest>(paths.manifestPath);
|
|
160
|
+
|
|
161
|
+
// Read tasks
|
|
162
|
+
const tasks = readJsonFile<TeamTaskState[]>(paths.tasksPath) ?? [];
|
|
163
|
+
|
|
164
|
+
// Read tail events
|
|
165
|
+
const tailLines = readTailLines(paths.eventsPath, MAX_TAIL_EVENTS);
|
|
166
|
+
const tailEvents = parseEvents(tailLines);
|
|
167
|
+
|
|
168
|
+
const sections: string[] = [];
|
|
169
|
+
|
|
170
|
+
// Section: Run Metadata
|
|
171
|
+
sections.push("# Run Summary");
|
|
172
|
+
if (manifest) {
|
|
173
|
+
sections.push("");
|
|
174
|
+
sections.push("## Run Metadata");
|
|
175
|
+
sections.push(`- **Run ID**: ${manifest.runId}`);
|
|
176
|
+
sections.push(`- **Team**: ${manifest.team}`);
|
|
177
|
+
if (manifest.workflow) {
|
|
178
|
+
sections.push(`- **Workflow**: ${manifest.workflow}`);
|
|
179
|
+
}
|
|
180
|
+
if (manifest.goal) {
|
|
181
|
+
sections.push(`- **Goal**: ${manifest.goal.replace(/\n/g, " ").slice(0, 500)}`);
|
|
182
|
+
}
|
|
183
|
+
sections.push(`- **Status**: ${manifest.status}`);
|
|
184
|
+
sections.push(`- **Created**: ${manifest.createdAt}`);
|
|
185
|
+
sections.push(`- **Updated**: ${manifest.updatedAt}`);
|
|
186
|
+
if (manifest.workspaceMode) {
|
|
187
|
+
sections.push(`- **Workspace Mode**: ${manifest.workspaceMode}`);
|
|
188
|
+
}
|
|
189
|
+
} else {
|
|
190
|
+
sections.push("");
|
|
191
|
+
sections.push("## Run Metadata");
|
|
192
|
+
sections.push("- **Status**: manifest unavailable");
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Section: Task Progress Table
|
|
196
|
+
sections.push("");
|
|
197
|
+
sections.push("## Task Progress");
|
|
198
|
+
if (tasks.length > 0) {
|
|
199
|
+
sections.push("");
|
|
200
|
+
sections.push("| ID | Role | Status | Duration |");
|
|
201
|
+
sections.push("|---|---|---|---|");
|
|
202
|
+
for (const task of tasks) {
|
|
203
|
+
const taskId = task.id;
|
|
204
|
+
const role = task.role || "—";
|
|
205
|
+
const status = task.status || "—";
|
|
206
|
+
const duration = formatDuration(task.startedAt, task.finishedAt);
|
|
207
|
+
sections.push(`| ${taskId} | ${role} | ${status} | ${duration} |`);
|
|
208
|
+
}
|
|
209
|
+
} else {
|
|
210
|
+
sections.push("");
|
|
211
|
+
sections.push("No tasks recorded.");
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Section: Recent Task Results
|
|
215
|
+
const completedTasks = tasks
|
|
216
|
+
.filter((t) => t.status === "completed" || t.status === "failed")
|
|
217
|
+
.slice(-MAX_RECENT_RESULTS);
|
|
218
|
+
|
|
219
|
+
if (completedTasks.length > 0) {
|
|
220
|
+
sections.push("");
|
|
221
|
+
sections.push("## Recent Task Results");
|
|
222
|
+
for (const task of completedTasks) {
|
|
223
|
+
sections.push("");
|
|
224
|
+
sections.push(`### ${task.id} (${task.status})`);
|
|
225
|
+
if (task.error) {
|
|
226
|
+
sections.push(`- **Error**: ${task.error}`);
|
|
227
|
+
}
|
|
228
|
+
if (task.diagnostics && Object.keys(task.diagnostics).length > 0) {
|
|
229
|
+
sections.push("- **Diagnostics**:");
|
|
230
|
+
for (const [key, value] of Object.entries(task.diagnostics)) {
|
|
231
|
+
sections.push(` - ${key}: ${JSON.stringify(value)}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
if (task.metrics && Object.keys(task.metrics).length > 0) {
|
|
235
|
+
sections.push("- **Metrics**:");
|
|
236
|
+
for (const [key, value] of Object.entries(task.metrics)) {
|
|
237
|
+
sections.push(` - ${key}: ${value}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Section: Next Steps (pending/queued tasks)
|
|
244
|
+
const pendingStatuses = new Set(["queued", "waiting", "running"]);
|
|
245
|
+
const pendingTasks = tasks.filter(
|
|
246
|
+
(t) => pendingStatuses.has(t.status),
|
|
247
|
+
);
|
|
248
|
+
if (pendingTasks.length > 0) {
|
|
249
|
+
sections.push("");
|
|
250
|
+
sections.push("## Next Steps");
|
|
251
|
+
sections.push("");
|
|
252
|
+
for (const task of pendingTasks) {
|
|
253
|
+
const title = task.title || task.role || "Untitled";
|
|
254
|
+
sections.push(`- [${task.status}] ${task.id}: ${title}`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Section: Tail Events Summary
|
|
259
|
+
if (tailEvents.length > 0) {
|
|
260
|
+
sections.push("");
|
|
261
|
+
sections.push(`## Recent Events (last ${tailEvents.length})`);
|
|
262
|
+
sections.push("");
|
|
263
|
+
for (const event of tailEvents.slice(-10)) {
|
|
264
|
+
const taskPart = event.taskId ? ` task=${event.taskId}` : "";
|
|
265
|
+
const msgPart = event.message ? ` — ${event.message}` : "";
|
|
266
|
+
sections.push(`- [${event.time}] ${event.type}${taskPart}${msgPart}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return sections.join("\n");
|
|
271
|
+
}
|
|
@@ -1,190 +1,190 @@
|
|
|
1
|
-
import * as fs from "node:fs";
|
|
2
|
-
import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
|
|
3
|
-
|
|
4
|
-
// ============================================================================
|
|
5
|
-
// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
|
|
6
|
-
// made no observable mutations. Used by task-runner.ts.
|
|
7
|
-
// ============================================================================
|
|
8
|
-
|
|
9
|
-
export interface CompletionMutationGuardInput {
|
|
10
|
-
role: string;
|
|
11
|
-
taskText?: string;
|
|
12
|
-
transcriptPath?: string;
|
|
13
|
-
stdout?: string;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface CompletionMutationGuardResult {
|
|
17
|
-
expectedMutation: boolean;
|
|
18
|
-
observedMutation: boolean;
|
|
19
|
-
reason?: "no_mutation_observed";
|
|
20
|
-
observedTools: string[];
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
|
|
24
|
-
const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
|
|
25
|
-
const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
|
|
26
|
-
const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
|
|
27
|
-
const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
|
|
28
|
-
|
|
29
|
-
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
|
30
|
-
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function commandText(value: unknown): string {
|
|
34
|
-
const record = asRecord(value);
|
|
35
|
-
if (!record) return typeof value === "string" ? value : "";
|
|
36
|
-
for (const key of ["command", "cmd", "script", "input"]) {
|
|
37
|
-
const raw = record[key];
|
|
38
|
-
if (typeof raw === "string") return raw;
|
|
39
|
-
}
|
|
40
|
-
return JSON.stringify(record);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function isMutatingTool(tool: string, args: unknown): boolean {
|
|
44
|
-
const normalized = tool.toLowerCase();
|
|
45
|
-
if (MUTATING_TOOLS.has(normalized)) return true;
|
|
46
|
-
if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
|
|
47
|
-
const command = commandText(args).trim();
|
|
48
|
-
if (!command) return false;
|
|
49
|
-
// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
|
|
50
|
-
if (MUTATING_COMMANDS.test(command)) return true;
|
|
51
|
-
if (READ_ONLY_COMMANDS.test(command)) return false;
|
|
52
|
-
// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
|
|
53
|
-
return true;
|
|
54
|
-
}
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
|
|
59
|
-
const record = asRecord(event);
|
|
60
|
-
if (!record) return [];
|
|
61
|
-
const calls: Array<{ tool: string; args?: unknown }> = [];
|
|
62
|
-
const directTool = record.toolName ?? record.name ?? record.tool;
|
|
63
|
-
if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
|
|
64
|
-
calls.push({ tool: directTool, args: record.args ?? record.input });
|
|
65
|
-
}
|
|
66
|
-
const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
|
|
67
|
-
if (Array.isArray(content)) {
|
|
68
|
-
for (const part of content) {
|
|
69
|
-
const item = asRecord(part);
|
|
70
|
-
if (!item) continue;
|
|
71
|
-
const tool = item.name ?? item.toolName ?? item.tool;
|
|
72
|
-
if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
return calls;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function transcriptText(input: CompletionMutationGuardInput): string {
|
|
79
|
-
if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
|
|
80
|
-
return input.stdout ?? "";
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
|
|
84
|
-
if (!MUTATING_ROLES.has(input.role)) return false;
|
|
85
|
-
return !READ_ONLY_HINTS.test(input.taskText ?? "");
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
|
|
89
|
-
const expectedMutation = expectsImplementationMutation(input);
|
|
90
|
-
const observedTools: string[] = [];
|
|
91
|
-
let observedMutation = false;
|
|
92
|
-
const text = transcriptText(input);
|
|
93
|
-
for (const line of text.split("\n")) {
|
|
94
|
-
const trimmed = line.trim();
|
|
95
|
-
if (!trimmed) continue;
|
|
96
|
-
let event: unknown;
|
|
97
|
-
try { event = JSON.parse(trimmed); } catch { continue; }
|
|
98
|
-
for (const call of collectToolCallsFromEvent(event)) {
|
|
99
|
-
observedTools.push(call.tool);
|
|
100
|
-
if (isMutatingTool(call.tool, call.args)) observedMutation = true;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
return {
|
|
104
|
-
expectedMutation,
|
|
105
|
-
observedMutation,
|
|
106
|
-
observedTools,
|
|
107
|
-
...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// ============================================================================
|
|
112
|
-
// Phase 11a: Artifact-based Completion Verification — a second layer that
|
|
113
|
-
// checks whether a completed task actually produced meaningful artifacts.
|
|
114
|
-
// ============================================================================
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Guard against false-positive task completions.
|
|
118
|
-
*
|
|
119
|
-
* Checks whether a task that claims success actually produced meaningful output.
|
|
120
|
-
* Returns a verification result with the green level (0-3) and any warnings.
|
|
121
|
-
*/
|
|
122
|
-
export interface CompletionVerifyResult {
|
|
123
|
-
/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
|
|
124
|
-
greenLevel: number;
|
|
125
|
-
/** Warnings about potentially incomplete work */
|
|
126
|
-
warnings: string[];
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const MAX_OUTPUT_PREVIEW = 200;
|
|
130
|
-
|
|
131
|
-
function isTrivialError(error: string | undefined): boolean {
|
|
132
|
-
if (!error) return false;
|
|
133
|
-
return error.trim().length === 0;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
export function verifyTaskCompletion(
|
|
137
|
-
task: TeamTaskState,
|
|
138
|
-
manifest: TeamRunManifest,
|
|
139
|
-
): CompletionVerifyResult {
|
|
140
|
-
const warnings: string[] = [];
|
|
141
|
-
let greenLevel = 0;
|
|
142
|
-
|
|
143
|
-
// Check 1: Has an error?
|
|
144
|
-
if (task.error && !isTrivialError(task.error)) {
|
|
145
|
-
return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Check 2: Has result artifact?
|
|
149
|
-
if (task.resultArtifact) {
|
|
150
|
-
greenLevel += 1;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Check 3: Has transcript?
|
|
154
|
-
if (task.transcriptArtifact) {
|
|
155
|
-
greenLevel += 1;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Check 4: For implementation tasks, verify artifacts were actually produced
|
|
159
|
-
const runArtifacts = manifest.artifacts.filter(
|
|
160
|
-
(a) => a.producer === task.id || a.producer === task.agent,
|
|
161
|
-
);
|
|
162
|
-
if (runArtifacts.length > 0) {
|
|
163
|
-
greenLevel += 1;
|
|
164
|
-
} else if (greenLevel < 3) {
|
|
165
|
-
warnings.push("No run-level artifacts produced by this task");
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// Check 5: Usage tracking — did the task actually consume tokens?
|
|
169
|
-
if (task.usage) {
|
|
170
|
-
const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
|
|
171
|
-
if (totalTokens === 0 && greenLevel < 3) {
|
|
172
|
-
warnings.push("Task reports zero token usage — may not have executed");
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
return {
|
|
177
|
-
greenLevel: Math.min(greenLevel, 3),
|
|
178
|
-
warnings,
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* Format a preview of task output for diagnostic display.
|
|
184
|
-
*/
|
|
185
|
-
export function formatOutputPreview(output: string | undefined): string {
|
|
186
|
-
if (!output) return "(no output)";
|
|
187
|
-
const trimmed = output.trim();
|
|
188
|
-
if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
|
|
189
|
-
return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
|
|
190
|
-
}
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
|
|
3
|
+
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
|
|
6
|
+
// made no observable mutations. Used by task-runner.ts.
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
export interface CompletionMutationGuardInput {
|
|
10
|
+
role: string;
|
|
11
|
+
taskText?: string;
|
|
12
|
+
transcriptPath?: string;
|
|
13
|
+
stdout?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface CompletionMutationGuardResult {
|
|
17
|
+
expectedMutation: boolean;
|
|
18
|
+
observedMutation: boolean;
|
|
19
|
+
reason?: "no_mutation_observed";
|
|
20
|
+
observedTools: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
|
|
24
|
+
const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
|
|
25
|
+
const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
|
|
26
|
+
const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
|
|
27
|
+
const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
|
|
28
|
+
|
|
29
|
+
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
|
30
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function commandText(value: unknown): string {
|
|
34
|
+
const record = asRecord(value);
|
|
35
|
+
if (!record) return typeof value === "string" ? value : "";
|
|
36
|
+
for (const key of ["command", "cmd", "script", "input"]) {
|
|
37
|
+
const raw = record[key];
|
|
38
|
+
if (typeof raw === "string") return raw;
|
|
39
|
+
}
|
|
40
|
+
return JSON.stringify(record);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function isMutatingTool(tool: string, args: unknown): boolean {
|
|
44
|
+
const normalized = tool.toLowerCase();
|
|
45
|
+
if (MUTATING_TOOLS.has(normalized)) return true;
|
|
46
|
+
if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
|
|
47
|
+
const command = commandText(args).trim();
|
|
48
|
+
if (!command) return false;
|
|
49
|
+
// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
|
|
50
|
+
if (MUTATING_COMMANDS.test(command)) return true;
|
|
51
|
+
if (READ_ONLY_COMMANDS.test(command)) return false;
|
|
52
|
+
// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
|
|
59
|
+
const record = asRecord(event);
|
|
60
|
+
if (!record) return [];
|
|
61
|
+
const calls: Array<{ tool: string; args?: unknown }> = [];
|
|
62
|
+
const directTool = record.toolName ?? record.name ?? record.tool;
|
|
63
|
+
if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
|
|
64
|
+
calls.push({ tool: directTool, args: record.args ?? record.input });
|
|
65
|
+
}
|
|
66
|
+
const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
|
|
67
|
+
if (Array.isArray(content)) {
|
|
68
|
+
for (const part of content) {
|
|
69
|
+
const item = asRecord(part);
|
|
70
|
+
if (!item) continue;
|
|
71
|
+
const tool = item.name ?? item.toolName ?? item.tool;
|
|
72
|
+
if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return calls;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function transcriptText(input: CompletionMutationGuardInput): string {
|
|
79
|
+
if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
|
|
80
|
+
return input.stdout ?? "";
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
|
|
84
|
+
if (!MUTATING_ROLES.has(input.role)) return false;
|
|
85
|
+
return !READ_ONLY_HINTS.test(input.taskText ?? "");
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
|
|
89
|
+
const expectedMutation = expectsImplementationMutation(input);
|
|
90
|
+
const observedTools: string[] = [];
|
|
91
|
+
let observedMutation = false;
|
|
92
|
+
const text = transcriptText(input);
|
|
93
|
+
for (const line of text.split("\n")) {
|
|
94
|
+
const trimmed = line.trim();
|
|
95
|
+
if (!trimmed) continue;
|
|
96
|
+
let event: unknown;
|
|
97
|
+
try { event = JSON.parse(trimmed); } catch { continue; }
|
|
98
|
+
for (const call of collectToolCallsFromEvent(event)) {
|
|
99
|
+
observedTools.push(call.tool);
|
|
100
|
+
if (isMutatingTool(call.tool, call.args)) observedMutation = true;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return {
|
|
104
|
+
expectedMutation,
|
|
105
|
+
observedMutation,
|
|
106
|
+
observedTools,
|
|
107
|
+
...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ============================================================================
|
|
112
|
+
// Phase 11a: Artifact-based Completion Verification — a second layer that
|
|
113
|
+
// checks whether a completed task actually produced meaningful artifacts.
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Guard against false-positive task completions.
|
|
118
|
+
*
|
|
119
|
+
* Checks whether a task that claims success actually produced meaningful output.
|
|
120
|
+
* Returns a verification result with the green level (0-3) and any warnings.
|
|
121
|
+
*/
|
|
122
|
+
export interface CompletionVerifyResult {
|
|
123
|
+
/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
|
|
124
|
+
greenLevel: number;
|
|
125
|
+
/** Warnings about potentially incomplete work */
|
|
126
|
+
warnings: string[];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const MAX_OUTPUT_PREVIEW = 200;
|
|
130
|
+
|
|
131
|
+
function isTrivialError(error: string | undefined): boolean {
|
|
132
|
+
if (!error) return false;
|
|
133
|
+
return error.trim().length === 0;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function verifyTaskCompletion(
|
|
137
|
+
task: TeamTaskState,
|
|
138
|
+
manifest: TeamRunManifest,
|
|
139
|
+
): CompletionVerifyResult {
|
|
140
|
+
const warnings: string[] = [];
|
|
141
|
+
let greenLevel = 0;
|
|
142
|
+
|
|
143
|
+
// Check 1: Has an error?
|
|
144
|
+
if (task.error && !isTrivialError(task.error)) {
|
|
145
|
+
return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Check 2: Has result artifact?
|
|
149
|
+
if (task.resultArtifact) {
|
|
150
|
+
greenLevel += 1;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Check 3: Has transcript?
|
|
154
|
+
if (task.transcriptArtifact) {
|
|
155
|
+
greenLevel += 1;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Check 4: For implementation tasks, verify artifacts were actually produced
|
|
159
|
+
const runArtifacts = manifest.artifacts.filter(
|
|
160
|
+
(a) => a.producer === task.id || a.producer === task.agent,
|
|
161
|
+
);
|
|
162
|
+
if (runArtifacts.length > 0) {
|
|
163
|
+
greenLevel += 1;
|
|
164
|
+
} else if (greenLevel < 3) {
|
|
165
|
+
warnings.push("No run-level artifacts produced by this task");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Check 5: Usage tracking — did the task actually consume tokens?
|
|
169
|
+
if (task.usage) {
|
|
170
|
+
const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
|
|
171
|
+
if (totalTokens === 0 && greenLevel < 3) {
|
|
172
|
+
warnings.push("Task reports zero token usage — may not have executed");
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
greenLevel: Math.min(greenLevel, 3),
|
|
178
|
+
warnings,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Format a preview of task output for diagnostic display.
|
|
184
|
+
*/
|
|
185
|
+
export function formatOutputPreview(output: string | undefined): string {
|
|
186
|
+
if (!output) return "(no output)";
|
|
187
|
+
const trimmed = output.trim();
|
|
188
|
+
if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
|
|
189
|
+
return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
|
|
190
|
+
}
|