pi-crew 0.1.49 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. package/CHANGELOG.md +74 -1
  2. package/README.md +176 -781
  3. package/agents/analyst.md +11 -11
  4. package/agents/critic.md +11 -11
  5. package/agents/executor.md +11 -11
  6. package/agents/explorer.md +11 -11
  7. package/agents/planner.md +11 -11
  8. package/agents/reviewer.md +11 -11
  9. package/agents/security-reviewer.md +11 -11
  10. package/agents/test-engineer.md +11 -11
  11. package/agents/verifier.md +70 -11
  12. package/agents/writer.md +11 -11
  13. package/docs/actions-reference.md +595 -0
  14. package/docs/commands-reference.md +347 -0
  15. package/docs/runtime-flow.md +148 -148
  16. package/index.ts +6 -6
  17. package/package.json +99 -99
  18. package/skills/async-worker-recovery/SKILL.md +42 -42
  19. package/skills/context-artifact-hygiene/SKILL.md +52 -52
  20. package/skills/delegation-patterns/SKILL.md +54 -54
  21. package/skills/mailbox-interactive/SKILL.md +40 -40
  22. package/skills/model-routing-context/SKILL.md +39 -39
  23. package/skills/multi-perspective-review/SKILL.md +58 -58
  24. package/skills/observability-reliability/SKILL.md +41 -41
  25. package/skills/orchestration/SKILL.md +157 -157
  26. package/skills/ownership-session-security/SKILL.md +41 -41
  27. package/skills/pi-extension-lifecycle/SKILL.md +39 -39
  28. package/skills/requirements-to-task-packet/SKILL.md +63 -63
  29. package/skills/resource-discovery-config/SKILL.md +41 -41
  30. package/skills/runtime-state-reader/SKILL.md +44 -44
  31. package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
  32. package/skills/state-mutation-locking/SKILL.md +42 -42
  33. package/skills/systematic-debugging/SKILL.md +67 -67
  34. package/skills/ui-render-performance/SKILL.md +39 -39
  35. package/skills/verification-before-done/SKILL.md +57 -57
  36. package/skills/worktree-isolation/SKILL.md +39 -39
  37. package/src/adapters/claude-adapter.ts +25 -0
  38. package/src/adapters/codex-adapter.ts +21 -0
  39. package/src/adapters/cursor-adapter.ts +17 -0
  40. package/src/adapters/export-util.ts +137 -0
  41. package/src/adapters/index.ts +15 -0
  42. package/src/adapters/registry.ts +18 -0
  43. package/src/adapters/types.ts +23 -0
  44. package/src/agents/agent-config.ts +2 -0
  45. package/src/agents/agent-search.ts +98 -98
  46. package/src/agents/discover-agents.ts +2 -1
  47. package/src/config/config.ts +14 -1
  48. package/src/config/defaults.ts +5 -5
  49. package/src/config/drift-detector.ts +211 -0
  50. package/src/config/markers.ts +327 -0
  51. package/src/config/resilient-parser.ts +108 -0
  52. package/src/config/suggestions.ts +74 -0
  53. package/src/extension/cross-extension-rpc.ts +103 -82
  54. package/src/extension/project-init.ts +36 -4
  55. package/src/extension/register.ts +67 -22
  56. package/src/extension/registration/commands.ts +77 -8
  57. package/src/extension/registration/subagent-tools.ts +10 -1
  58. package/src/extension/registration/team-tool.ts +10 -1
  59. package/src/extension/registration/viewers.ts +48 -34
  60. package/src/extension/run-bundle-schema.ts +89 -89
  61. package/src/extension/run-export.ts +26 -12
  62. package/src/extension/run-import.ts +25 -1
  63. package/src/extension/run-index.ts +5 -1
  64. package/src/extension/run-maintenance.ts +142 -68
  65. package/src/extension/team-manager-command.ts +10 -1
  66. package/src/extension/team-tool/context.ts +1 -1
  67. package/src/extension/team-tool/doctor.ts +28 -3
  68. package/src/extension/team-tool/handle-settings.ts +195 -188
  69. package/src/extension/team-tool/inspect.ts +41 -41
  70. package/src/extension/team-tool/intent-policy.ts +42 -42
  71. package/src/extension/team-tool/lifecycle-actions.ts +27 -8
  72. package/src/extension/team-tool/plan.ts +19 -19
  73. package/src/extension/team-tool/run.ts +12 -1
  74. package/src/extension/team-tool.ts +14 -3
  75. package/src/i18n.ts +184 -184
  76. package/src/observability/exporters/otlp-exporter.ts +92 -77
  77. package/src/prompt/prompt-runtime.ts +72 -72
  78. package/src/runtime/agent-memory.ts +72 -72
  79. package/src/runtime/agent-observability.ts +114 -114
  80. package/src/runtime/async-marker.ts +26 -26
  81. package/src/runtime/attention-events.ts +28 -28
  82. package/src/runtime/auto-resume.ts +100 -0
  83. package/src/runtime/background-runner.ts +11 -1
  84. package/src/runtime/cancellation-token.ts +89 -89
  85. package/src/runtime/cancellation.ts +61 -61
  86. package/src/runtime/capability-inventory.ts +116 -116
  87. package/src/runtime/child-pi.ts +7 -2
  88. package/src/runtime/compaction-summary.ts +271 -0
  89. package/src/runtime/completion-guard.ts +190 -190
  90. package/src/runtime/concurrency.ts +3 -1
  91. package/src/runtime/crash-recovery.ts +33 -0
  92. package/src/runtime/delta-conflict.ts +360 -0
  93. package/src/runtime/diagnostic-export.ts +3 -1
  94. package/src/runtime/direct-run.ts +35 -35
  95. package/src/runtime/event-stream-bridge.ts +3 -1
  96. package/src/runtime/foreground-control.ts +82 -82
  97. package/src/runtime/green-contract.ts +46 -46
  98. package/src/runtime/group-join.ts +106 -106
  99. package/src/runtime/heartbeat-gradient.ts +28 -28
  100. package/src/runtime/heartbeat-watcher.ts +124 -124
  101. package/src/runtime/iteration-hooks.ts +262 -0
  102. package/src/runtime/live-agent-control.ts +88 -88
  103. package/src/runtime/live-control-realtime.ts +36 -36
  104. package/src/runtime/live-extension-bridge.ts +150 -150
  105. package/src/runtime/live-irc.ts +92 -92
  106. package/src/runtime/live-session-health.ts +100 -100
  107. package/src/runtime/loop-gates.ts +129 -0
  108. package/src/runtime/metric-parser.ts +40 -0
  109. package/src/runtime/notebook-helpers.ts +90 -90
  110. package/src/runtime/orphan-sentinel.ts +7 -7
  111. package/src/runtime/parallel-research.ts +44 -44
  112. package/src/runtime/phase-progress.ts +217 -0
  113. package/src/runtime/pi-args.ts +38 -2
  114. package/src/runtime/pi-json-output.ts +111 -111
  115. package/src/runtime/pi-spawn.ts +74 -6
  116. package/src/runtime/policy-engine.ts +79 -79
  117. package/src/runtime/post-checks.ts +122 -0
  118. package/src/runtime/process-status.ts +14 -1
  119. package/src/runtime/progress-event-coalescer.ts +43 -43
  120. package/src/runtime/prose-compressor.ts +164 -164
  121. package/src/runtime/recovery-recipes.ts +74 -74
  122. package/src/runtime/result-extractor.ts +121 -121
  123. package/src/runtime/role-permission.ts +39 -39
  124. package/src/runtime/sensitive-paths.ts +3 -3
  125. package/src/runtime/session-resources.ts +25 -25
  126. package/src/runtime/session-snapshot.ts +59 -59
  127. package/src/runtime/session-usage.ts +79 -79
  128. package/src/runtime/sidechain-output.ts +29 -29
  129. package/src/runtime/stream-preview.ts +177 -177
  130. package/src/runtime/supervisor-contact.ts +59 -59
  131. package/src/runtime/task-display.ts +38 -38
  132. package/src/runtime/task-graph.ts +207 -0
  133. package/src/runtime/task-quality.ts +207 -0
  134. package/src/runtime/task-runner/capabilities.ts +78 -78
  135. package/src/runtime/task-runner/live-executor.ts +7 -1
  136. package/src/runtime/task-runner/progress.ts +119 -119
  137. package/src/runtime/task-runner/prompt-builder.ts +1 -1
  138. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  139. package/src/runtime/task-runner/result-utils.ts +14 -14
  140. package/src/runtime/task-runner/run-projection.ts +103 -103
  141. package/src/runtime/task-runner/state-helpers.ts +22 -22
  142. package/src/runtime/team-runner.ts +126 -7
  143. package/src/runtime/worker-heartbeat.ts +21 -21
  144. package/src/runtime/worker-startup.ts +57 -57
  145. package/src/runtime/workflow-state.ts +187 -0
  146. package/src/runtime/workspace-tree.ts +298 -298
  147. package/src/schema/config-schema.ts +12 -0
  148. package/src/schema/validation-types.ts +148 -0
  149. package/src/skills/skill-templates.ts +374 -0
  150. package/src/state/active-run-registry.ts +35 -11
  151. package/src/state/atomic-write.ts +33 -26
  152. package/src/state/contracts.ts +1 -0
  153. package/src/state/event-reconstructor.ts +217 -0
  154. package/src/state/locks.ts +2 -11
  155. package/src/state/mailbox.ts +4 -3
  156. package/src/state/state-store.ts +32 -14
  157. package/src/state/task-claims.ts +44 -44
  158. package/src/state/types.ts +9 -0
  159. package/src/state/usage.ts +29 -29
  160. package/src/subagents/async-entry.ts +1 -1
  161. package/src/subagents/index.ts +3 -3
  162. package/src/subagents/live/control.ts +1 -1
  163. package/src/subagents/live/manager.ts +1 -1
  164. package/src/subagents/live/realtime.ts +1 -1
  165. package/src/subagents/live/session-runtime.ts +1 -1
  166. package/src/subagents/manager.ts +1 -1
  167. package/src/subagents/spawn.ts +1 -1
  168. package/src/teams/team-serializer.ts +38 -38
  169. package/src/types/diff.d.ts +18 -18
  170. package/src/ui/crew-footer.ts +101 -101
  171. package/src/ui/crew-select-list.ts +111 -111
  172. package/src/ui/crew-widget.ts +9 -4
  173. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  174. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  175. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  176. package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
  177. package/src/ui/dashboard-panes/progress-pane.ts +11 -0
  178. package/src/ui/dynamic-border.ts +25 -25
  179. package/src/ui/layout-primitives.ts +106 -106
  180. package/src/ui/loaders.ts +158 -158
  181. package/src/ui/powerbar-publisher.ts +6 -0
  182. package/src/ui/render-coalescer.ts +51 -51
  183. package/src/ui/render-diff.ts +119 -119
  184. package/src/ui/render-scheduler.ts +143 -143
  185. package/src/ui/run-action-dispatcher.ts +10 -1
  186. package/src/ui/spinner.ts +17 -17
  187. package/src/ui/status-colors.ts +58 -58
  188. package/src/ui/syntax-highlight.ts +116 -116
  189. package/src/ui/transcript-entries.ts +258 -258
  190. package/src/utils/completion-dedupe.ts +63 -63
  191. package/src/utils/frontmatter.ts +68 -68
  192. package/src/utils/git.ts +262 -262
  193. package/src/utils/ids.ts +17 -17
  194. package/src/utils/incremental-reader.ts +104 -104
  195. package/src/utils/names.ts +27 -27
  196. package/src/utils/redaction.ts +44 -44
  197. package/src/utils/safe-paths.ts +47 -47
  198. package/src/utils/scan-cache.ts +136 -136
  199. package/src/utils/sleep.ts +40 -26
  200. package/src/utils/task-name-generator.ts +337 -337
  201. package/src/workflows/validate-workflow.ts +40 -40
  202. package/src/worktree/branch-freshness.ts +45 -45
  203. package/src/worktree/worktree-manager.ts +11 -3
  204. package/teams/default.team.md +12 -12
  205. package/teams/fast-fix.team.md +11 -11
  206. package/teams/implementation.team.md +18 -18
  207. package/teams/parallel-research.team.md +14 -14
  208. package/teams/research.team.md +11 -11
  209. package/teams/review.team.md +12 -12
  210. package/workflows/default.workflow.md +30 -29
  211. package/workflows/fast-fix.workflow.md +23 -22
  212. package/workflows/implementation.workflow.md +43 -38
  213. package/workflows/parallel-research.workflow.md +46 -46
  214. package/workflows/research.workflow.md +22 -22
  215. package/workflows/review.workflow.md +30 -30
  216. package/docs/refactor-tasks-phase3.md +0 -394
  217. package/docs/refactor-tasks-phase4.md +0 -564
  218. package/docs/refactor-tasks-phase5.md +0 -402
  219. package/docs/refactor-tasks-phase6.md +0 -662
  220. package/docs/refactor-tasks.md +0 -1484
  221. package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +0 -261
  222. package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +0 -111
  223. package/docs/research/AUDIT_OH_MY_PI.md +0 -261
  224. package/docs/research/AUDIT_PI_CREW.md +0 -457
  225. package/docs/research/CAVEMAN-DEEP-RESEARCH.md +0 -281
  226. package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +0 -264
  227. package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +0 -343
  228. package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +0 -480
  229. package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +0 -354
  230. package/docs/research/IMPLEMENTATION_PLAN.md +0 -385
  231. package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +0 -502
  232. package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +0 -266
  233. package/docs/research/REMAINING-GAPS-PLAN.md +0 -363
  234. package/docs/research/SESSION-SUMMARY-2026-05-08.md +0 -146
  235. package/docs/research/UI-RESPONSIVENESS-AUDIT.md +0 -173
  236. package/docs/research-awesome-agent-skills-distillation.md +0 -100
  237. package/docs/research-extension-examples.md +0 -297
  238. package/docs/research-extension-system.md +0 -324
  239. package/docs/research-oh-my-pi-distillation.md +0 -369
  240. package/docs/research-optimization-plan.md +0 -548
  241. package/docs/research-phase10-distillation.md +0 -199
  242. package/docs/research-phase11-distillation.md +0 -201
  243. package/docs/research-phase8-operator-experience-plan.md +0 -819
  244. package/docs/research-phase9-observability-reliability-plan.md +0 -1190
  245. package/docs/research-pi-coding-agent.md +0 -357
  246. package/docs/research-source-pi-crew-reference.md +0 -174
  247. package/docs/research-ui-optimization-plan.md +0 -480
  248. package/docs/source-runtime-refactor-map.md +0 -107
  249. package/src/utils/atomic-write.ts +0 -33
@@ -8,7 +8,7 @@ import { getPiSpawnCommand } from "./pi-spawn.ts";
8
8
  import { DEFAULT_CHILD_PI } from "../config/defaults.ts";
9
9
  import { logInternalError } from "../utils/internal-error.ts";
10
10
  import { attachPostExitStdioGuard, trySignalChild } from "./post-exit-stdio-guard.ts";
11
- import { redactJsonLine } from "../utils/redaction.ts";
11
+ import { redactJsonLine, SECRET_KEY_PATTERN } from "../utils/redaction.ts";
12
12
 
13
13
  const POST_EXIT_STDIO_GUARD_MS = DEFAULT_CHILD_PI.postExitStdioGuardMs;
14
14
  const FINAL_DRAIN_MS = DEFAULT_CHILD_PI.finalDrainMs;
@@ -110,9 +110,14 @@ export interface ChildPiRunResult {
110
110
  }
111
111
 
112
112
  export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): SpawnOptions {
113
+ // Filter out env vars whose keys match secret patterns to avoid leaking credentials to child processes
114
+ const filteredEnv: Record<string, string> = {};
115
+ for (const [key, value] of Object.entries(env)) {
116
+ if (value !== undefined && !SECRET_KEY_PATTERN.test(key)) filteredEnv[key] = value;
117
+ }
113
118
  return {
114
119
  cwd,
115
- env: { ...env, PI_CREW_PARENT_PID: String(process.pid) },
120
+ env: { ...filteredEnv, PI_CREW_PARENT_PID: String(process.pid) },
116
121
  stdio: ["ignore", "pipe", "pipe"],
117
122
  detached: process.platform !== "win32",
118
123
  windowsHide: true,
@@ -0,0 +1,271 @@
1
+ /**
2
+ * Deterministic compaction summary — builds a markdown summary of a pi-crew run
3
+ * from manifest.json, tasks.json, and the tail of events.jsonl.
4
+ *
5
+ * Distilled from pi-autoresearch's compaction-summary pattern.
6
+ */
7
+
8
+ class GiantLineFallbackError extends Error { constructor() { super("GIANT_LINE_FALLBACK"); this.name = "GiantLineFallbackError"; } }
9
+
10
+ import * as fs from "node:fs";
11
+ import * as path from "node:path";
12
+ import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
13
+ import { readJsonFile } from "../state/atomic-write.ts";
14
+ import type { TeamEvent } from "../state/event-log.ts";
15
+
16
+ /** Maximum number of events to read from the tail of events.jsonl. */
17
+ const MAX_TAIL_EVENTS = 100;
18
+
19
+ /** Maximum number of completed tasks to include in the "Recent Results" section. */
20
+ const MAX_RECENT_RESULTS = 10;
21
+
22
+ /** Paths relevant to building a compaction summary for a run. */
23
+ export interface SummaryPaths {
24
+ manifestPath: string;
25
+ tasksPath: string;
26
+ eventsPath: string;
27
+ stateRoot: string;
28
+ }
29
+
30
+ /**
31
+ * Derive the standard summary-relevant paths from a state root directory.
32
+ * Mirrors pi-autoresearch's `autoresearchSummaryPathsFor()`.
33
+ */
34
+ export function summaryPathsFor(stateRoot: string): SummaryPaths {
35
+ return {
36
+ stateRoot,
37
+ manifestPath: path.join(stateRoot, "manifest.json"),
38
+ tasksPath: path.join(stateRoot, "tasks.json"),
39
+ eventsPath: path.join(stateRoot, "events.jsonl"),
40
+ };
41
+ }
42
+
43
+ /**
44
+ * Read the last N lines from a text file.
45
+ * Uses reverse buffer reading to avoid loading the entire file into memory.
46
+ * For files larger than TAIL_MAX_READ bytes, only the last chunk is read.
47
+ */
48
+ const TAIL_MAX_READ = 256 * 1024; // 256KB — enough for ~1000 lines of JSONL
49
+ const MAX_FALLBACK_READ = 2 * 1024 * 1024; // 2MB — safety limit for giant-line fallback
50
+ function readTailLines(filePath: string, maxLines: number): string[] {
51
+ if (!fs.existsSync(filePath)) return [];
52
+ try {
53
+ const stat = fs.statSync(filePath);
54
+ const fileSize = stat.size;
55
+ if (fileSize === 0) return [];
56
+
57
+ // For small files, just read everything
58
+ if (fileSize <= TAIL_MAX_READ) {
59
+ const content = fs.readFileSync(filePath, "utf-8");
60
+ return content.split("\n").filter((line) => line.trim().length > 0).slice(-maxLines);
61
+ }
62
+
63
+ // For large files, read only the last chunk.
64
+ // Search backwards from the read boundary for a newline to avoid
65
+ // splitting mid-line or mid-multibyte UTF-8 character.
66
+ const fd = fs.openSync(filePath, "r");
67
+ try {
68
+ const readSize = Math.min(fileSize, TAIL_MAX_READ);
69
+ const readOffset = fileSize - readSize;
70
+ const buf = Buffer.alloc(readSize);
71
+ fs.readSync(fd, buf, 0, readSize, readOffset);
72
+
73
+ // Find the first newline in the buffer to avoid partial lines.
74
+ // This also handles multibyte UTF-8 safety — we start after a
75
+ // newline boundary which is always a clean character boundary.
76
+ let start = 0;
77
+ if (readOffset > 0) {
78
+ const firstNewline = buf.indexOf("\n");
79
+ if (firstNewline >= 0) {
80
+ start = firstNewline + 1;
81
+ } else {
82
+ // No newline found in the entire tail chunk — single giant line.
83
+ // Fall back to reading the full file to avoid data loss.
84
+ // Note: fd will be closed by the outer finally block.
85
+ throw new GiantLineFallbackError();
86
+ }
87
+ }
88
+
89
+ const content = buf.toString("utf-8", start, readSize);
90
+ const lines = content.split("\n").filter((line) => line.trim().length > 0);
91
+ return lines.slice(-maxLines);
92
+ } finally {
93
+ fs.closeSync(fd);
94
+ }
95
+ } catch (err) {
96
+ // Giant-line fallback: fd already closed by finally above.
97
+ if (err instanceof GiantLineFallbackError) {
98
+ const stat = fs.statSync(filePath);
99
+ if (stat.size > MAX_FALLBACK_READ) return [];
100
+ const content = fs.readFileSync(filePath, "utf-8");
101
+ return content.split("\n").filter((line) => line.trim().length > 0).slice(-maxLines);
102
+ }
103
+ return [];
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Parse JSONL lines into TeamEvent objects, skipping malformed lines.
109
+ */
110
+ function parseEvents(lines: string[]): TeamEvent[] {
111
+ const events: TeamEvent[] = [];
112
+ for (const line of lines) {
113
+ try {
114
+ const parsed = JSON.parse(line.trim());
115
+ if (typeof parsed === "object" && parsed !== null && typeof parsed.type === "string" && typeof parsed.runId === "string") {
116
+ events.push(parsed as TeamEvent);
117
+ }
118
+ } catch {
119
+ // Skip malformed lines
120
+ }
121
+ }
122
+ return events;
123
+ }
124
+
125
+ /**
126
+ * Compute a human-readable duration between two ISO timestamp strings.
127
+ */
128
+ function formatDuration(startIso?: string, endIso?: string): string {
129
+ if (!startIso) return "—";
130
+ const start = new Date(startIso).getTime();
131
+ if (Number.isNaN(start)) return "—";
132
+ const end = endIso ? new Date(endIso).getTime() : Date.now();
133
+ if (Number.isNaN(end)) return "—";
134
+ const diffMs = end - start;
135
+ if (diffMs < 0) return "—";
136
+ const seconds = Math.floor(diffMs / 1000);
137
+ if (seconds < 60) return `${seconds}s`;
138
+ const minutes = Math.floor(seconds / 60);
139
+ const remainingSeconds = seconds % 60;
140
+ if (minutes < 60) return `${minutes}m ${remainingSeconds}s`;
141
+ const hours = Math.floor(minutes / 60);
142
+ const remainingMinutes = minutes % 60;
143
+ return `${hours}h ${remainingMinutes}m`;
144
+ }
145
+
146
+ /**
147
+ * Build a deterministic compaction summary for a pi-crew run.
148
+ *
149
+ * Reads manifest.json, tasks.json, and the tail of events.jsonl to produce
150
+ * a self-contained markdown summary suitable for context injection.
151
+ *
152
+ * @param stateRoot - Path to the run's state root directory
153
+ * @returns Markdown-formatted compaction summary
154
+ */
155
+ export function buildCompactionSummary(stateRoot: string): string {
156
+ const paths = summaryPathsFor(stateRoot);
157
+
158
+ // Read manifest
159
+ const manifest = readJsonFile<TeamRunManifest>(paths.manifestPath);
160
+
161
+ // Read tasks
162
+ const tasks = readJsonFile<TeamTaskState[]>(paths.tasksPath) ?? [];
163
+
164
+ // Read tail events
165
+ const tailLines = readTailLines(paths.eventsPath, MAX_TAIL_EVENTS);
166
+ const tailEvents = parseEvents(tailLines);
167
+
168
+ const sections: string[] = [];
169
+
170
+ // Section: Run Metadata
171
+ sections.push("# Run Summary");
172
+ if (manifest) {
173
+ sections.push("");
174
+ sections.push("## Run Metadata");
175
+ sections.push(`- **Run ID**: ${manifest.runId}`);
176
+ sections.push(`- **Team**: ${manifest.team}`);
177
+ if (manifest.workflow) {
178
+ sections.push(`- **Workflow**: ${manifest.workflow}`);
179
+ }
180
+ if (manifest.goal) {
181
+ sections.push(`- **Goal**: ${manifest.goal.replace(/\n/g, " ").slice(0, 500)}`);
182
+ }
183
+ sections.push(`- **Status**: ${manifest.status}`);
184
+ sections.push(`- **Created**: ${manifest.createdAt}`);
185
+ sections.push(`- **Updated**: ${manifest.updatedAt}`);
186
+ if (manifest.workspaceMode) {
187
+ sections.push(`- **Workspace Mode**: ${manifest.workspaceMode}`);
188
+ }
189
+ } else {
190
+ sections.push("");
191
+ sections.push("## Run Metadata");
192
+ sections.push("- **Status**: manifest unavailable");
193
+ }
194
+
195
+ // Section: Task Progress Table
196
+ sections.push("");
197
+ sections.push("## Task Progress");
198
+ if (tasks.length > 0) {
199
+ sections.push("");
200
+ sections.push("| ID | Role | Status | Duration |");
201
+ sections.push("|---|---|---|---|");
202
+ for (const task of tasks) {
203
+ const taskId = task.id;
204
+ const role = task.role || "—";
205
+ const status = task.status || "—";
206
+ const duration = formatDuration(task.startedAt, task.finishedAt);
207
+ sections.push(`| ${taskId} | ${role} | ${status} | ${duration} |`);
208
+ }
209
+ } else {
210
+ sections.push("");
211
+ sections.push("No tasks recorded.");
212
+ }
213
+
214
+ // Section: Recent Task Results
215
+ const completedTasks = tasks
216
+ .filter((t) => t.status === "completed" || t.status === "failed")
217
+ .slice(-MAX_RECENT_RESULTS);
218
+
219
+ if (completedTasks.length > 0) {
220
+ sections.push("");
221
+ sections.push("## Recent Task Results");
222
+ for (const task of completedTasks) {
223
+ sections.push("");
224
+ sections.push(`### ${task.id} (${task.status})`);
225
+ if (task.error) {
226
+ sections.push(`- **Error**: ${task.error}`);
227
+ }
228
+ if (task.diagnostics && Object.keys(task.diagnostics).length > 0) {
229
+ sections.push("- **Diagnostics**:");
230
+ for (const [key, value] of Object.entries(task.diagnostics)) {
231
+ sections.push(` - ${key}: ${JSON.stringify(value)}`);
232
+ }
233
+ }
234
+ if (task.metrics && Object.keys(task.metrics).length > 0) {
235
+ sections.push("- **Metrics**:");
236
+ for (const [key, value] of Object.entries(task.metrics)) {
237
+ sections.push(` - ${key}: ${value}`);
238
+ }
239
+ }
240
+ }
241
+ }
242
+
243
+ // Section: Next Steps (pending/queued tasks)
244
+ const pendingStatuses = new Set(["queued", "waiting", "running"]);
245
+ const pendingTasks = tasks.filter(
246
+ (t) => pendingStatuses.has(t.status),
247
+ );
248
+ if (pendingTasks.length > 0) {
249
+ sections.push("");
250
+ sections.push("## Next Steps");
251
+ sections.push("");
252
+ for (const task of pendingTasks) {
253
+ const title = task.title || task.role || "Untitled";
254
+ sections.push(`- [${task.status}] ${task.id}: ${title}`);
255
+ }
256
+ }
257
+
258
+ // Section: Tail Events Summary
259
+ if (tailEvents.length > 0) {
260
+ sections.push("");
261
+ sections.push(`## Recent Events (last ${tailEvents.length})`);
262
+ sections.push("");
263
+ for (const event of tailEvents.slice(-10)) {
264
+ const taskPart = event.taskId ? ` task=${event.taskId}` : "";
265
+ const msgPart = event.message ? ` — ${event.message}` : "";
266
+ sections.push(`- [${event.time}] ${event.type}${taskPart}${msgPart}`);
267
+ }
268
+ }
269
+
270
+ return sections.join("\n");
271
+ }
@@ -1,190 +1,190 @@
1
- import * as fs from "node:fs";
2
- import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
3
-
4
- // ============================================================================
5
- // Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
6
- // made no observable mutations. Used by task-runner.ts.
7
- // ============================================================================
8
-
9
- export interface CompletionMutationGuardInput {
10
- role: string;
11
- taskText?: string;
12
- transcriptPath?: string;
13
- stdout?: string;
14
- }
15
-
16
- export interface CompletionMutationGuardResult {
17
- expectedMutation: boolean;
18
- observedMutation: boolean;
19
- reason?: "no_mutation_observed";
20
- observedTools: string[];
21
- }
22
-
23
- const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
24
- const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
25
- const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
26
- const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
27
- const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
28
-
29
- function asRecord(value: unknown): Record<string, unknown> | undefined {
30
- return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
31
- }
32
-
33
- function commandText(value: unknown): string {
34
- const record = asRecord(value);
35
- if (!record) return typeof value === "string" ? value : "";
36
- for (const key of ["command", "cmd", "script", "input"]) {
37
- const raw = record[key];
38
- if (typeof raw === "string") return raw;
39
- }
40
- return JSON.stringify(record);
41
- }
42
-
43
- function isMutatingTool(tool: string, args: unknown): boolean {
44
- const normalized = tool.toLowerCase();
45
- if (MUTATING_TOOLS.has(normalized)) return true;
46
- if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
47
- const command = commandText(args).trim();
48
- if (!command) return false;
49
- // Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
50
- if (MUTATING_COMMANDS.test(command)) return true;
51
- if (READ_ONLY_COMMANDS.test(command)) return false;
52
- // If the command doesn't match either list, treat unknown bash calls as potentially mutating.
53
- return true;
54
- }
55
- return false;
56
- }
57
-
58
- function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
59
- const record = asRecord(event);
60
- if (!record) return [];
61
- const calls: Array<{ tool: string; args?: unknown }> = [];
62
- const directTool = record.toolName ?? record.name ?? record.tool;
63
- if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
64
- calls.push({ tool: directTool, args: record.args ?? record.input });
65
- }
66
- const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
67
- if (Array.isArray(content)) {
68
- for (const part of content) {
69
- const item = asRecord(part);
70
- if (!item) continue;
71
- const tool = item.name ?? item.toolName ?? item.tool;
72
- if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
73
- }
74
- }
75
- return calls;
76
- }
77
-
78
- function transcriptText(input: CompletionMutationGuardInput): string {
79
- if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
80
- return input.stdout ?? "";
81
- }
82
-
83
- export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
84
- if (!MUTATING_ROLES.has(input.role)) return false;
85
- return !READ_ONLY_HINTS.test(input.taskText ?? "");
86
- }
87
-
88
- export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
89
- const expectedMutation = expectsImplementationMutation(input);
90
- const observedTools: string[] = [];
91
- let observedMutation = false;
92
- const text = transcriptText(input);
93
- for (const line of text.split("\n")) {
94
- const trimmed = line.trim();
95
- if (!trimmed) continue;
96
- let event: unknown;
97
- try { event = JSON.parse(trimmed); } catch { continue; }
98
- for (const call of collectToolCallsFromEvent(event)) {
99
- observedTools.push(call.tool);
100
- if (isMutatingTool(call.tool, call.args)) observedMutation = true;
101
- }
102
- }
103
- return {
104
- expectedMutation,
105
- observedMutation,
106
- observedTools,
107
- ...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
108
- };
109
- }
110
-
111
- // ============================================================================
112
- // Phase 11a: Artifact-based Completion Verification — a second layer that
113
- // checks whether a completed task actually produced meaningful artifacts.
114
- // ============================================================================
115
-
116
- /**
117
- * Guard against false-positive task completions.
118
- *
119
- * Checks whether a task that claims success actually produced meaningful output.
120
- * Returns a verification result with the green level (0-3) and any warnings.
121
- */
122
- export interface CompletionVerifyResult {
123
- /** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
124
- greenLevel: number;
125
- /** Warnings about potentially incomplete work */
126
- warnings: string[];
127
- }
128
-
129
- const MAX_OUTPUT_PREVIEW = 200;
130
-
131
- function isTrivialError(error: string | undefined): boolean {
132
- if (!error) return false;
133
- return error.trim().length === 0;
134
- }
135
-
136
- export function verifyTaskCompletion(
137
- task: TeamTaskState,
138
- manifest: TeamRunManifest,
139
- ): CompletionVerifyResult {
140
- const warnings: string[] = [];
141
- let greenLevel = 0;
142
-
143
- // Check 1: Has an error?
144
- if (task.error && !isTrivialError(task.error)) {
145
- return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
146
- }
147
-
148
- // Check 2: Has result artifact?
149
- if (task.resultArtifact) {
150
- greenLevel += 1;
151
- }
152
-
153
- // Check 3: Has transcript?
154
- if (task.transcriptArtifact) {
155
- greenLevel += 1;
156
- }
157
-
158
- // Check 4: For implementation tasks, verify artifacts were actually produced
159
- const runArtifacts = manifest.artifacts.filter(
160
- (a) => a.producer === task.id || a.producer === task.agent,
161
- );
162
- if (runArtifacts.length > 0) {
163
- greenLevel += 1;
164
- } else if (greenLevel < 3) {
165
- warnings.push("No run-level artifacts produced by this task");
166
- }
167
-
168
- // Check 5: Usage tracking — did the task actually consume tokens?
169
- if (task.usage) {
170
- const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
171
- if (totalTokens === 0 && greenLevel < 3) {
172
- warnings.push("Task reports zero token usage — may not have executed");
173
- }
174
- }
175
-
176
- return {
177
- greenLevel: Math.min(greenLevel, 3),
178
- warnings,
179
- };
180
- }
181
-
182
- /**
183
- * Format a preview of task output for diagnostic display.
184
- */
185
- export function formatOutputPreview(output: string | undefined): string {
186
- if (!output) return "(no output)";
187
- const trimmed = output.trim();
188
- if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
189
- return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
190
- }
1
+ import * as fs from "node:fs";
2
+ import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
3
+
4
+ // ============================================================================
5
+ // Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
6
+ // made no observable mutations. Used by task-runner.ts.
7
+ // ============================================================================
8
+
9
+ export interface CompletionMutationGuardInput {
10
+ role: string;
11
+ taskText?: string;
12
+ transcriptPath?: string;
13
+ stdout?: string;
14
+ }
15
+
16
+ export interface CompletionMutationGuardResult {
17
+ expectedMutation: boolean;
18
+ observedMutation: boolean;
19
+ reason?: "no_mutation_observed";
20
+ observedTools: string[];
21
+ }
22
+
23
+ const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
24
+ const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
25
+ const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
26
+ const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
27
+ const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
28
+
29
+ function asRecord(value: unknown): Record<string, unknown> | undefined {
30
+ return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
31
+ }
32
+
33
+ function commandText(value: unknown): string {
34
+ const record = asRecord(value);
35
+ if (!record) return typeof value === "string" ? value : "";
36
+ for (const key of ["command", "cmd", "script", "input"]) {
37
+ const raw = record[key];
38
+ if (typeof raw === "string") return raw;
39
+ }
40
+ return JSON.stringify(record);
41
+ }
42
+
43
+ function isMutatingTool(tool: string, args: unknown): boolean {
44
+ const normalized = tool.toLowerCase();
45
+ if (MUTATING_TOOLS.has(normalized)) return true;
46
+ if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
47
+ const command = commandText(args).trim();
48
+ if (!command) return false;
49
+ // Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
50
+ if (MUTATING_COMMANDS.test(command)) return true;
51
+ if (READ_ONLY_COMMANDS.test(command)) return false;
52
+ // If the command doesn't match either list, treat unknown bash calls as potentially mutating.
53
+ return true;
54
+ }
55
+ return false;
56
+ }
57
+
58
+ function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
59
+ const record = asRecord(event);
60
+ if (!record) return [];
61
+ const calls: Array<{ tool: string; args?: unknown }> = [];
62
+ const directTool = record.toolName ?? record.name ?? record.tool;
63
+ if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
64
+ calls.push({ tool: directTool, args: record.args ?? record.input });
65
+ }
66
+ const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
67
+ if (Array.isArray(content)) {
68
+ for (const part of content) {
69
+ const item = asRecord(part);
70
+ if (!item) continue;
71
+ const tool = item.name ?? item.toolName ?? item.tool;
72
+ if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
73
+ }
74
+ }
75
+ return calls;
76
+ }
77
+
78
+ function transcriptText(input: CompletionMutationGuardInput): string {
79
+ if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
80
+ return input.stdout ?? "";
81
+ }
82
+
83
+ export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
84
+ if (!MUTATING_ROLES.has(input.role)) return false;
85
+ return !READ_ONLY_HINTS.test(input.taskText ?? "");
86
+ }
87
+
88
+ export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
89
+ const expectedMutation = expectsImplementationMutation(input);
90
+ const observedTools: string[] = [];
91
+ let observedMutation = false;
92
+ const text = transcriptText(input);
93
+ for (const line of text.split("\n")) {
94
+ const trimmed = line.trim();
95
+ if (!trimmed) continue;
96
+ let event: unknown;
97
+ try { event = JSON.parse(trimmed); } catch { continue; }
98
+ for (const call of collectToolCallsFromEvent(event)) {
99
+ observedTools.push(call.tool);
100
+ if (isMutatingTool(call.tool, call.args)) observedMutation = true;
101
+ }
102
+ }
103
+ return {
104
+ expectedMutation,
105
+ observedMutation,
106
+ observedTools,
107
+ ...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
108
+ };
109
+ }
110
+
111
+ // ============================================================================
112
+ // Phase 11a: Artifact-based Completion Verification — a second layer that
113
+ // checks whether a completed task actually produced meaningful artifacts.
114
+ // ============================================================================
115
+
116
+ /**
117
+ * Guard against false-positive task completions.
118
+ *
119
+ * Checks whether a task that claims success actually produced meaningful output.
120
+ * Returns a verification result with the green level (0-3) and any warnings.
121
+ */
122
+ export interface CompletionVerifyResult {
123
+ /** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
124
+ greenLevel: number;
125
+ /** Warnings about potentially incomplete work */
126
+ warnings: string[];
127
+ }
128
+
129
+ const MAX_OUTPUT_PREVIEW = 200;
130
+
131
+ function isTrivialError(error: string | undefined): boolean {
132
+ if (!error) return false;
133
+ return error.trim().length === 0;
134
+ }
135
+
136
+ export function verifyTaskCompletion(
137
+ task: TeamTaskState,
138
+ manifest: TeamRunManifest,
139
+ ): CompletionVerifyResult {
140
+ const warnings: string[] = [];
141
+ let greenLevel = 0;
142
+
143
+ // Check 1: Has an error?
144
+ if (task.error && !isTrivialError(task.error)) {
145
+ return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
146
+ }
147
+
148
+ // Check 2: Has result artifact?
149
+ if (task.resultArtifact) {
150
+ greenLevel += 1;
151
+ }
152
+
153
+ // Check 3: Has transcript?
154
+ if (task.transcriptArtifact) {
155
+ greenLevel += 1;
156
+ }
157
+
158
+ // Check 4: For implementation tasks, verify artifacts were actually produced
159
+ const runArtifacts = manifest.artifacts.filter(
160
+ (a) => a.producer === task.id || a.producer === task.agent,
161
+ );
162
+ if (runArtifacts.length > 0) {
163
+ greenLevel += 1;
164
+ } else if (greenLevel < 3) {
165
+ warnings.push("No run-level artifacts produced by this task");
166
+ }
167
+
168
+ // Check 5: Usage tracking — did the task actually consume tokens?
169
+ if (task.usage) {
170
+ const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
171
+ if (totalTokens === 0 && greenLevel < 3) {
172
+ warnings.push("Task reports zero token usage — may not have executed");
173
+ }
174
+ }
175
+
176
+ return {
177
+ greenLevel: Math.min(greenLevel, 3),
178
+ warnings,
179
+ };
180
+ }
181
+
182
+ /**
183
+ * Format a preview of task output for diagnostic display.
184
+ */
185
+ export function formatOutputPreview(output: string | undefined): string {
186
+ if (!output) return "(no output)";
187
+ const trimmed = output.trim();
188
+ if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
189
+ return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
190
+ }