pi-crew 0.1.45 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/README.md +5 -5
  2. package/agents/analyst.md +1 -1
  3. package/agents/critic.md +1 -1
  4. package/agents/executor.md +1 -1
  5. package/agents/explorer.md +1 -1
  6. package/agents/planner.md +1 -1
  7. package/agents/reviewer.md +1 -1
  8. package/agents/security-reviewer.md +1 -1
  9. package/agents/test-engineer.md +1 -1
  10. package/agents/verifier.md +1 -1
  11. package/agents/writer.md +1 -1
  12. package/docs/next-upgrade-roadmap.md +733 -0
  13. package/docs/refactor-tasks-phase3.md +394 -394
  14. package/docs/refactor-tasks-phase4.md +564 -564
  15. package/docs/refactor-tasks-phase5.md +402 -402
  16. package/docs/refactor-tasks-phase6.md +662 -662
  17. package/docs/research-awesome-agent-skills-distillation.md +100 -0
  18. package/docs/research-extension-examples.md +297 -297
  19. package/docs/research-extension-system.md +324 -324
  20. package/docs/research-oh-my-pi-distillation.md +322 -0
  21. package/docs/research-optimization-plan.md +548 -548
  22. package/docs/research-phase10-distillation.md +198 -198
  23. package/docs/research-phase11-distillation.md +201 -201
  24. package/docs/research-pi-coding-agent.md +357 -357
  25. package/docs/research-source-pi-crew-reference.md +174 -174
  26. package/docs/runtime-flow.md +148 -148
  27. package/docs/source-runtime-refactor-map.md +107 -83
  28. package/docs/usage.md +3 -3
  29. package/index.ts +6 -6
  30. package/install.mjs +52 -8
  31. package/package.json +1 -1
  32. package/schema.json +2 -1
  33. package/skills/async-worker-recovery/SKILL.md +42 -0
  34. package/skills/context-artifact-hygiene/SKILL.md +52 -0
  35. package/skills/delegation-patterns/SKILL.md +54 -0
  36. package/skills/mailbox-interactive/SKILL.md +40 -0
  37. package/skills/model-routing-context/SKILL.md +39 -0
  38. package/skills/multi-perspective-review/SKILL.md +58 -0
  39. package/skills/observability-reliability/SKILL.md +41 -0
  40. package/skills/ownership-session-security/SKILL.md +41 -0
  41. package/skills/pi-extension-lifecycle/SKILL.md +39 -0
  42. package/skills/requirements-to-task-packet/SKILL.md +63 -0
  43. package/skills/resource-discovery-config/SKILL.md +41 -0
  44. package/skills/runtime-state-reader/SKILL.md +44 -0
  45. package/skills/secure-agent-orchestration-review/SKILL.md +45 -0
  46. package/skills/state-mutation-locking/SKILL.md +42 -0
  47. package/skills/systematic-debugging/SKILL.md +67 -0
  48. package/skills/ui-render-performance/SKILL.md +39 -0
  49. package/skills/verification-before-done/SKILL.md +57 -0
  50. package/skills/worktree-isolation/SKILL.md +39 -0
  51. package/src/agents/agent-serializer.ts +34 -34
  52. package/src/agents/discover-agents.ts +12 -11
  53. package/src/config/config.ts +48 -24
  54. package/src/config/defaults.ts +14 -0
  55. package/src/extension/cross-extension-rpc.ts +82 -82
  56. package/src/extension/project-init.ts +62 -2
  57. package/src/extension/register.ts +11 -9
  58. package/src/extension/registration/commands.ts +32 -25
  59. package/src/extension/registration/compaction-guard.ts +125 -125
  60. package/src/extension/registration/subagent-helpers.ts +8 -0
  61. package/src/extension/registration/subagent-tools.ts +149 -148
  62. package/src/extension/registration/team-tool.ts +8 -6
  63. package/src/extension/run-bundle-schema.ts +89 -89
  64. package/src/extension/run-index.ts +13 -5
  65. package/src/extension/run-maintenance.ts +62 -43
  66. package/src/extension/team-tool/api.ts +25 -8
  67. package/src/extension/team-tool/cancel.ts +33 -4
  68. package/src/extension/team-tool/context.ts +5 -0
  69. package/src/extension/team-tool/handle-settings.ts +188 -188
  70. package/src/extension/team-tool/inspect.ts +41 -41
  71. package/src/extension/team-tool/lifecycle-actions.ts +91 -79
  72. package/src/extension/team-tool/plan.ts +19 -19
  73. package/src/extension/team-tool/respond.ts +37 -17
  74. package/src/extension/team-tool/run.ts +52 -10
  75. package/src/extension/team-tool/status.ts +12 -1
  76. package/src/extension/team-tool-types.ts +2 -0
  77. package/src/extension/team-tool.ts +32 -11
  78. package/src/i18n.ts +184 -184
  79. package/src/observability/event-to-metric.ts +8 -1
  80. package/src/observability/exporters/otlp-exporter.ts +77 -77
  81. package/src/prompt/prompt-runtime.ts +72 -72
  82. package/src/runtime/agent-control.ts +63 -63
  83. package/src/runtime/agent-memory.ts +72 -72
  84. package/src/runtime/agent-observability.ts +114 -114
  85. package/src/runtime/async-marker.ts +26 -26
  86. package/src/runtime/attention-events.ts +28 -28
  87. package/src/runtime/background-runner.ts +59 -53
  88. package/src/runtime/cancellation.ts +51 -0
  89. package/src/runtime/child-pi.ts +457 -444
  90. package/src/runtime/completion-guard.ts +190 -190
  91. package/src/runtime/crash-recovery.ts +1 -0
  92. package/src/runtime/crew-agent-records.ts +38 -6
  93. package/src/runtime/deadletter.ts +1 -0
  94. package/src/runtime/delivery-coordinator.ts +46 -25
  95. package/src/runtime/direct-run.ts +35 -35
  96. package/src/runtime/effectiveness.ts +76 -0
  97. package/src/runtime/foreground-control.ts +82 -82
  98. package/src/runtime/green-contract.ts +46 -46
  99. package/src/runtime/group-join.ts +106 -106
  100. package/src/runtime/heartbeat-gradient.ts +28 -28
  101. package/src/runtime/heartbeat-watcher.ts +124 -124
  102. package/src/runtime/live-agent-control.ts +88 -87
  103. package/src/runtime/live-agent-manager.ts +103 -85
  104. package/src/runtime/live-control-realtime.ts +36 -36
  105. package/src/runtime/live-session-runtime.ts +309 -305
  106. package/src/runtime/manifest-cache.ts +17 -2
  107. package/src/runtime/model-fallback.ts +6 -4
  108. package/src/runtime/parallel-research.ts +44 -44
  109. package/src/runtime/pi-args.ts +18 -3
  110. package/src/runtime/pi-json-output.ts +111 -111
  111. package/src/runtime/policy-engine.ts +79 -79
  112. package/src/runtime/process-status.ts +5 -1
  113. package/src/runtime/progress-event-coalescer.ts +43 -43
  114. package/src/runtime/recovery-recipes.ts +74 -74
  115. package/src/runtime/retry-executor.ts +81 -64
  116. package/src/runtime/role-permission.ts +39 -39
  117. package/src/runtime/runtime-resolver.ts +22 -6
  118. package/src/runtime/session-resources.ts +25 -25
  119. package/src/runtime/session-snapshot.ts +59 -59
  120. package/src/runtime/session-usage.ts +79 -79
  121. package/src/runtime/sidechain-output.ts +29 -29
  122. package/src/runtime/skill-instructions.ts +222 -0
  123. package/src/runtime/stale-reconciler.ts +4 -14
  124. package/src/runtime/subagent-manager.ts +3 -0
  125. package/src/runtime/supervisor-contact.ts +59 -59
  126. package/src/runtime/task-display.ts +38 -38
  127. package/src/runtime/task-output-context.ts +127 -127
  128. package/src/runtime/task-runner/capabilities.ts +78 -0
  129. package/src/runtime/task-runner/live-executor.ts +105 -101
  130. package/src/runtime/task-runner/progress.ts +119 -119
  131. package/src/runtime/task-runner/prompt-builder.ts +3 -1
  132. package/src/runtime/task-runner/prompt-pipeline.ts +64 -0
  133. package/src/runtime/task-runner/result-utils.ts +14 -14
  134. package/src/runtime/task-runner/state-helpers.ts +22 -22
  135. package/src/runtime/task-runner.ts +44 -5
  136. package/src/runtime/team-runner.ts +78 -15
  137. package/src/runtime/worker-heartbeat.ts +21 -21
  138. package/src/runtime/worker-startup.ts +57 -57
  139. package/src/schema/config-schema.ts +1 -0
  140. package/src/schema/team-tool-schema.ts +3 -3
  141. package/src/state/active-run-registry.ts +165 -0
  142. package/src/state/contracts.ts +1 -1
  143. package/src/state/mailbox.ts +44 -4
  144. package/src/state/state-store.ts +8 -1
  145. package/src/state/task-claims.ts +44 -44
  146. package/src/state/types.ts +44 -2
  147. package/src/state/usage.ts +29 -29
  148. package/src/subagents/async-entry.ts +1 -1
  149. package/src/subagents/index.ts +3 -3
  150. package/src/subagents/live/control.ts +1 -1
  151. package/src/subagents/live/manager.ts +1 -1
  152. package/src/subagents/live/realtime.ts +1 -1
  153. package/src/subagents/live/session-runtime.ts +1 -1
  154. package/src/subagents/manager.ts +1 -1
  155. package/src/subagents/spawn.ts +1 -1
  156. package/src/teams/team-config.ts +1 -0
  157. package/src/teams/team-serializer.ts +38 -38
  158. package/src/types/diff.d.ts +18 -18
  159. package/src/ui/crew-footer.ts +101 -101
  160. package/src/ui/crew-select-list.ts +111 -111
  161. package/src/ui/crew-widget.ts +4 -3
  162. package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
  163. package/src/ui/dashboard-panes/progress-pane.ts +2 -0
  164. package/src/ui/dynamic-border.ts +25 -25
  165. package/src/ui/layout-primitives.ts +106 -106
  166. package/src/ui/loaders.ts +158 -158
  167. package/src/ui/render-diff.ts +119 -119
  168. package/src/ui/render-scheduler.ts +143 -143
  169. package/src/ui/run-snapshot-cache.ts +10 -2
  170. package/src/ui/snapshot-types.ts +2 -0
  171. package/src/ui/spinner.ts +17 -17
  172. package/src/ui/status-colors.ts +58 -58
  173. package/src/ui/syntax-highlight.ts +116 -116
  174. package/src/utils/atomic-write.ts +33 -33
  175. package/src/utils/completion-dedupe.ts +63 -63
  176. package/src/utils/frontmatter.ts +68 -68
  177. package/src/utils/git.ts +262 -262
  178. package/src/utils/ids.ts +12 -12
  179. package/src/utils/names.ts +27 -27
  180. package/src/utils/paths.ts +4 -2
  181. package/src/utils/redaction.ts +44 -44
  182. package/src/utils/safe-paths.ts +47 -47
  183. package/src/utils/sleep.ts +32 -32
  184. package/src/workflows/validate-workflow.ts +40 -40
  185. package/src/workflows/workflow-config.ts +1 -0
  186. package/src/worktree/branch-freshness.ts +45 -45
  187. package/teams/default.team.md +12 -12
  188. package/teams/fast-fix.team.md +11 -11
  189. package/teams/implementation.team.md +18 -18
  190. package/teams/parallel-research.team.md +14 -14
  191. package/teams/research.team.md +11 -11
  192. package/teams/review.team.md +12 -12
  193. package/workflows/default.workflow.md +29 -29
  194. package/workflows/fast-fix.workflow.md +22 -22
  195. package/workflows/implementation.workflow.md +38 -38
  196. package/workflows/parallel-research.workflow.md +46 -46
  197. package/workflows/research.workflow.md +22 -22
  198. package/workflows/review.workflow.md +30 -30
@@ -1,190 +1,190 @@
1
- import * as fs from "node:fs";
2
- import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
3
-
4
- // ============================================================================
5
- // Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
6
- // made no observable mutations. Used by task-runner.ts.
7
- // ============================================================================
8
-
9
- export interface CompletionMutationGuardInput {
10
- role: string;
11
- taskText?: string;
12
- transcriptPath?: string;
13
- stdout?: string;
14
- }
15
-
16
- export interface CompletionMutationGuardResult {
17
- expectedMutation: boolean;
18
- observedMutation: boolean;
19
- reason?: "no_mutation_observed";
20
- observedTools: string[];
21
- }
22
-
23
- const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
24
- const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
25
- const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
26
- const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
27
- const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
28
-
29
- function asRecord(value: unknown): Record<string, unknown> | undefined {
30
- return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
31
- }
32
-
33
- function commandText(value: unknown): string {
34
- const record = asRecord(value);
35
- if (!record) return typeof value === "string" ? value : "";
36
- for (const key of ["command", "cmd", "script", "input"]) {
37
- const raw = record[key];
38
- if (typeof raw === "string") return raw;
39
- }
40
- return JSON.stringify(record);
41
- }
42
-
43
- function isMutatingTool(tool: string, args: unknown): boolean {
44
- const normalized = tool.toLowerCase();
45
- if (MUTATING_TOOLS.has(normalized)) return true;
46
- if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
47
- const command = commandText(args).trim();
48
- if (!command) return false;
49
- // Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
50
- if (MUTATING_COMMANDS.test(command)) return true;
51
- if (READ_ONLY_COMMANDS.test(command)) return false;
52
- // If the command doesn't match either list, treat unknown bash calls as potentially mutating.
53
- return true;
54
- }
55
- return false;
56
- }
57
-
58
- function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
59
- const record = asRecord(event);
60
- if (!record) return [];
61
- const calls: Array<{ tool: string; args?: unknown }> = [];
62
- const directTool = record.toolName ?? record.name ?? record.tool;
63
- if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
64
- calls.push({ tool: directTool, args: record.args ?? record.input });
65
- }
66
- const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
67
- if (Array.isArray(content)) {
68
- for (const part of content) {
69
- const item = asRecord(part);
70
- if (!item) continue;
71
- const tool = item.name ?? item.toolName ?? item.tool;
72
- if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
73
- }
74
- }
75
- return calls;
76
- }
77
-
78
- function transcriptText(input: CompletionMutationGuardInput): string {
79
- if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
80
- return input.stdout ?? "";
81
- }
82
-
83
- export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
84
- if (!MUTATING_ROLES.has(input.role)) return false;
85
- return !READ_ONLY_HINTS.test(input.taskText ?? "");
86
- }
87
-
88
- export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
89
- const expectedMutation = expectsImplementationMutation(input);
90
- const observedTools: string[] = [];
91
- let observedMutation = false;
92
- const text = transcriptText(input);
93
- for (const line of text.split("\n")) {
94
- const trimmed = line.trim();
95
- if (!trimmed) continue;
96
- let event: unknown;
97
- try { event = JSON.parse(trimmed); } catch { continue; }
98
- for (const call of collectToolCallsFromEvent(event)) {
99
- observedTools.push(call.tool);
100
- if (isMutatingTool(call.tool, call.args)) observedMutation = true;
101
- }
102
- }
103
- return {
104
- expectedMutation,
105
- observedMutation,
106
- observedTools,
107
- ...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
108
- };
109
- }
110
-
111
- // ============================================================================
112
- // Phase 11a: Artifact-based Completion Verification — a second layer that
113
- // checks whether a completed task actually produced meaningful artifacts.
114
- // ============================================================================
115
-
116
- /**
117
- * Guard against false-positive task completions.
118
- *
119
- * Checks whether a task that claims success actually produced meaningful output.
120
- * Returns a verification result with the green level (0-3) and any warnings.
121
- */
122
- export interface CompletionVerifyResult {
123
- /** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
124
- greenLevel: number;
125
- /** Warnings about potentially incomplete work */
126
- warnings: string[];
127
- }
128
-
129
- const MAX_OUTPUT_PREVIEW = 200;
130
-
131
- function isTrivialError(error: string | undefined): boolean {
132
- if (!error) return false;
133
- return error.trim().length === 0;
134
- }
135
-
136
- export function verifyTaskCompletion(
137
- task: TeamTaskState,
138
- manifest: TeamRunManifest,
139
- ): CompletionVerifyResult {
140
- const warnings: string[] = [];
141
- let greenLevel = 0;
142
-
143
- // Check 1: Has an error?
144
- if (task.error && !isTrivialError(task.error)) {
145
- return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
146
- }
147
-
148
- // Check 2: Has result artifact?
149
- if (task.resultArtifact) {
150
- greenLevel += 1;
151
- }
152
-
153
- // Check 3: Has transcript?
154
- if (task.transcriptArtifact) {
155
- greenLevel += 1;
156
- }
157
-
158
- // Check 4: For implementation tasks, verify artifacts were actually produced
159
- const runArtifacts = manifest.artifacts.filter(
160
- (a) => a.producer === task.id || a.producer === task.agent,
161
- );
162
- if (runArtifacts.length > 0) {
163
- greenLevel += 1;
164
- } else if (greenLevel < 3) {
165
- warnings.push("No run-level artifacts produced by this task");
166
- }
167
-
168
- // Check 5: Usage tracking — did the task actually consume tokens?
169
- if (task.usage) {
170
- const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
171
- if (totalTokens === 0 && greenLevel < 3) {
172
- warnings.push("Task reports zero token usage — may not have executed");
173
- }
174
- }
175
-
176
- return {
177
- greenLevel: Math.min(greenLevel, 3),
178
- warnings,
179
- };
180
- }
181
-
182
- /**
183
- * Format a preview of task output for diagnostic display.
184
- */
185
- export function formatOutputPreview(output: string | undefined): string {
186
- if (!output) return "(no output)";
187
- const trimmed = output.trim();
188
- if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
189
- return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
190
- }
1
+ import * as fs from "node:fs";
2
+ import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
3
+
4
+ // ============================================================================
5
+ // Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
6
+ // made no observable mutations. Used by task-runner.ts.
7
+ // ============================================================================
8
+
9
+ export interface CompletionMutationGuardInput {
10
+ role: string;
11
+ taskText?: string;
12
+ transcriptPath?: string;
13
+ stdout?: string;
14
+ }
15
+
16
+ export interface CompletionMutationGuardResult {
17
+ expectedMutation: boolean;
18
+ observedMutation: boolean;
19
+ reason?: "no_mutation_observed";
20
+ observedTools: string[];
21
+ }
22
+
23
+ const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
24
+ const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
25
+ const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
26
+ const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
27
+ const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
28
+
29
+ function asRecord(value: unknown): Record<string, unknown> | undefined {
30
+ return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
31
+ }
32
+
33
+ function commandText(value: unknown): string {
34
+ const record = asRecord(value);
35
+ if (!record) return typeof value === "string" ? value : "";
36
+ for (const key of ["command", "cmd", "script", "input"]) {
37
+ const raw = record[key];
38
+ if (typeof raw === "string") return raw;
39
+ }
40
+ return JSON.stringify(record);
41
+ }
42
+
43
+ function isMutatingTool(tool: string, args: unknown): boolean {
44
+ const normalized = tool.toLowerCase();
45
+ if (MUTATING_TOOLS.has(normalized)) return true;
46
+ if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
47
+ const command = commandText(args).trim();
48
+ if (!command) return false;
49
+ // Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
50
+ if (MUTATING_COMMANDS.test(command)) return true;
51
+ if (READ_ONLY_COMMANDS.test(command)) return false;
52
+ // If the command doesn't match either list, treat unknown bash calls as potentially mutating.
53
+ return true;
54
+ }
55
+ return false;
56
+ }
57
+
58
+ function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
59
+ const record = asRecord(event);
60
+ if (!record) return [];
61
+ const calls: Array<{ tool: string; args?: unknown }> = [];
62
+ const directTool = record.toolName ?? record.name ?? record.tool;
63
+ if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
64
+ calls.push({ tool: directTool, args: record.args ?? record.input });
65
+ }
66
+ const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
67
+ if (Array.isArray(content)) {
68
+ for (const part of content) {
69
+ const item = asRecord(part);
70
+ if (!item) continue;
71
+ const tool = item.name ?? item.toolName ?? item.tool;
72
+ if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
73
+ }
74
+ }
75
+ return calls;
76
+ }
77
+
78
+ function transcriptText(input: CompletionMutationGuardInput): string {
79
+ if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
80
+ return input.stdout ?? "";
81
+ }
82
+
83
+ export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
84
+ if (!MUTATING_ROLES.has(input.role)) return false;
85
+ return !READ_ONLY_HINTS.test(input.taskText ?? "");
86
+ }
87
+
88
+ export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
89
+ const expectedMutation = expectsImplementationMutation(input);
90
+ const observedTools: string[] = [];
91
+ let observedMutation = false;
92
+ const text = transcriptText(input);
93
+ for (const line of text.split("\n")) {
94
+ const trimmed = line.trim();
95
+ if (!trimmed) continue;
96
+ let event: unknown;
97
+ try { event = JSON.parse(trimmed); } catch { continue; }
98
+ for (const call of collectToolCallsFromEvent(event)) {
99
+ observedTools.push(call.tool);
100
+ if (isMutatingTool(call.tool, call.args)) observedMutation = true;
101
+ }
102
+ }
103
+ return {
104
+ expectedMutation,
105
+ observedMutation,
106
+ observedTools,
107
+ ...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
108
+ };
109
+ }
110
+
111
+ // ============================================================================
112
+ // Phase 11a: Artifact-based Completion Verification — a second layer that
113
+ // checks whether a completed task actually produced meaningful artifacts.
114
+ // ============================================================================
115
+
116
+ /**
117
+ * Guard against false-positive task completions.
118
+ *
119
+ * Checks whether a task that claims success actually produced meaningful output.
120
+ * Returns a verification result with the green level (0-3) and any warnings.
121
+ */
122
+ export interface CompletionVerifyResult {
123
+ /** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
124
+ greenLevel: number;
125
+ /** Warnings about potentially incomplete work */
126
+ warnings: string[];
127
+ }
128
+
129
+ const MAX_OUTPUT_PREVIEW = 200;
130
+
131
+ function isTrivialError(error: string | undefined): boolean {
132
+ if (!error) return false;
133
+ return error.trim().length === 0;
134
+ }
135
+
136
+ export function verifyTaskCompletion(
137
+ task: TeamTaskState,
138
+ manifest: TeamRunManifest,
139
+ ): CompletionVerifyResult {
140
+ const warnings: string[] = [];
141
+ let greenLevel = 0;
142
+
143
+ // Check 1: Has an error?
144
+ if (task.error && !isTrivialError(task.error)) {
145
+ return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
146
+ }
147
+
148
+ // Check 2: Has result artifact?
149
+ if (task.resultArtifact) {
150
+ greenLevel += 1;
151
+ }
152
+
153
+ // Check 3: Has transcript?
154
+ if (task.transcriptArtifact) {
155
+ greenLevel += 1;
156
+ }
157
+
158
+ // Check 4: For implementation tasks, verify artifacts were actually produced
159
+ const runArtifacts = manifest.artifacts.filter(
160
+ (a) => a.producer === task.id || a.producer === task.agent,
161
+ );
162
+ if (runArtifacts.length > 0) {
163
+ greenLevel += 1;
164
+ } else if (greenLevel < 3) {
165
+ warnings.push("No run-level artifacts produced by this task");
166
+ }
167
+
168
+ // Check 5: Usage tracking — did the task actually consume tokens?
169
+ if (task.usage) {
170
+ const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
171
+ if (totalTokens === 0 && greenLevel < 3) {
172
+ warnings.push("Task reports zero token usage — may not have executed");
173
+ }
174
+ }
175
+
176
+ return {
177
+ greenLevel: Math.min(greenLevel, 3),
178
+ warnings,
179
+ };
180
+ }
181
+
182
+ /**
183
+ * Format a preview of task output for diagnostic display.
184
+ */
185
+ export function formatOutputPreview(output: string | undefined): string {
186
+ if (!output) return "(no output)";
187
+ const trimmed = output.trim();
188
+ if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
189
+ return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
190
+ }
@@ -75,6 +75,7 @@ export function reconcileAllStaleRuns(cwd: string, manifestCache: ManifestCache,
75
75
  if (!fresh || fresh.manifest.status !== "running") return;
76
76
  const result = reconcileStaleRun(fresh.manifest, fresh.tasks, now);
77
77
  if (result.repaired) {
78
+ if (result.repairedTasks) saveRunTasks(fresh.manifest, result.repairedTasks);
78
79
  updateRunStatus(fresh.manifest, "failed", `Stale run reconciled: ${result.detail}`);
79
80
  appendEvent(fresh.manifest.eventsPath, { type: "crew.run.reconciled_stale", runId: manifest.runId, message: result.detail, data: { verdict: result.verdict } });
80
81
  }
@@ -61,6 +61,23 @@ export function agentOutputPath(manifest: TeamRunManifest, taskId: string): stri
61
61
  }
62
62
 
63
63
  const AGENT_READER_TTL_MS = 200;
64
+ const ASYNC_AGENT_READER_CACHE_MAX_ENTRIES = 128;
65
+
66
+ const asyncAgentReaderCache = new Map<string, { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }>();
67
+
68
+ function setAsyncAgentReaderCache(filePath: string, entry: { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }): void {
69
+ const now = Date.now();
70
+ for (const [key, cached] of asyncAgentReaderCache) {
71
+ if (cached.expiresAt <= now && !cached.inFlight) asyncAgentReaderCache.delete(key);
72
+ }
73
+ if (asyncAgentReaderCache.has(filePath)) asyncAgentReaderCache.delete(filePath);
74
+ asyncAgentReaderCache.set(filePath, entry);
75
+ while (asyncAgentReaderCache.size > ASYNC_AGENT_READER_CACHE_MAX_ENTRIES) {
76
+ const oldest = asyncAgentReaderCache.keys().next().value;
77
+ if (!oldest) break;
78
+ asyncAgentReaderCache.delete(oldest);
79
+ }
80
+ }
64
81
 
65
82
  export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
66
83
  try {
@@ -71,16 +88,31 @@ export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
71
88
  }
72
89
 
73
90
  export async function readCrewAgentsAsync(manifest: TeamRunManifest): Promise<CrewAgentRecord[]> {
74
- try {
75
- return JSON.parse(await fs.promises.readFile(agentsPath(manifest), "utf-8")) as CrewAgentRecord[];
76
- } catch {
77
- return [];
78
- }
91
+ const filePath = agentsPath(manifest);
92
+ const now = Date.now();
93
+ const cached = asyncAgentReaderCache.get(filePath);
94
+ if (cached && cached.expiresAt > now) return cached.records;
95
+ if (cached?.inFlight) return cached.inFlight;
96
+ const inFlight = (async (): Promise<CrewAgentRecord[]> => {
97
+ try {
98
+ const parsed = JSON.parse(await fs.promises.readFile(filePath, "utf-8")) as unknown;
99
+ const records = Array.isArray(parsed) ? redactSecrets(parsed) as CrewAgentRecord[] : [];
100
+ setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records });
101
+ return records;
102
+ } catch {
103
+ setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records: [] });
104
+ return [];
105
+ }
106
+ })();
107
+ setAsyncAgentReaderCache(filePath, { expiresAt: now + AGENT_READER_TTL_MS, records: cached?.records ?? [], inFlight });
108
+ return inFlight;
79
109
  }
80
110
 
81
111
  export function saveCrewAgents(manifest: TeamRunManifest, records: CrewAgentRecord[]): void {
82
112
  fs.mkdirSync(manifest.stateRoot, { recursive: true });
83
- atomicWriteJson(agentsPath(manifest), redactSecrets(records));
113
+ const filePath = agentsPath(manifest);
114
+ atomicWriteJson(filePath, redactSecrets(records));
115
+ asyncAgentReaderCache.delete(filePath);
84
116
  for (const record of records) writeCrewAgentStatus(manifest, record);
85
117
  }
86
118
 
@@ -12,6 +12,7 @@ export interface DeadletterEntry {
12
12
  reason: DeadletterReason;
13
13
  attempts: number;
14
14
  lastError?: string;
15
+ attemptId?: string;
15
16
  timestamp: string;
16
17
  }
17
18
 
@@ -25,6 +25,7 @@ export class DeliveryCoordinator {
25
25
  private active = false;
26
26
  private generation = 0;
27
27
  private pending: PendingDelivery[] = [];
28
+ private flushing = false;
28
29
  private readonly deps: DeliveryCoordinatorDeps;
29
30
  private ttlTimer: ReturnType<typeof setInterval> | undefined;
30
31
 
@@ -63,7 +64,7 @@ export class DeliveryCoordinator {
63
64
  logInternalError("delivery-coordinator.deliverResult", error, `runId=${runId}`);
64
65
  }
65
66
  }
66
- this.enqueue({ runId, payload: result, timestamp: Date.now(), type: "result" });
67
+ if (!this.flushing) this.enqueue({ runId, payload: result, timestamp: Date.now(), type: "result" });
67
68
  }
68
69
 
69
70
  deliverNotification(notification: NotificationDescriptor): void {
@@ -84,7 +85,7 @@ export class DeliveryCoordinator {
84
85
  }
85
86
  return;
86
87
  }
87
- this.enqueue({ runId: notification.runId ?? "", payload: notification, timestamp: Date.now(), type: "notification" });
88
+ if (!this.flushing) this.enqueue({ runId: notification.runId ?? "", payload: notification, timestamp: Date.now(), type: "notification" });
88
89
  }
89
90
 
90
91
  deliverSteer(runId: string, message: string): void {
@@ -96,36 +97,30 @@ export class DeliveryCoordinator {
96
97
  logInternalError("delivery-coordinator.deliverSteer", error, `runId=${runId}`);
97
98
  }
98
99
  }
99
- this.enqueue({ runId, payload: message, timestamp: Date.now(), type: "steer" });
100
+ if (!this.flushing) this.enqueue({ runId, payload: message, timestamp: Date.now(), type: "steer" });
100
101
  }
101
102
 
102
103
  flushQueuedResults(): void {
103
104
  if (!this.active || this.pending.length === 0) return;
104
105
  const batch = this.pending.splice(0);
105
- for (const delivery of batch) {
106
- if (delivery.generation !== undefined && delivery.generation !== this.generation) {
107
- logInternalError("delivery-coordinator.flush.stale", undefined, `runId=${delivery.runId} type=${delivery.type}`);
108
- continue;
109
- }
110
- try {
111
- switch (delivery.type) {
112
- case "result":
113
- this.deliverResult(delivery.runId, delivery.payload);
114
- break;
115
- case "notification": {
116
- const notification = delivery.payload as NotificationDescriptor;
117
- this.deliverNotification(notification);
118
- break;
119
- }
120
- case "steer": {
121
- const message = typeof delivery.payload === "string" ? delivery.payload : String(delivery.payload);
122
- this.deliverSteer(delivery.runId, message);
123
- break;
124
- }
106
+ this.flushing = true;
107
+ try {
108
+ const retryLater: PendingDelivery[] = [];
109
+ for (const delivery of batch) {
110
+ if (delivery.type === "steer" && delivery.generation !== undefined && delivery.generation !== this.generation) {
111
+ logInternalError("delivery-coordinator.flush.stale", undefined, `runId=${delivery.runId} type=${delivery.type}`);
112
+ continue;
113
+ }
114
+ try {
115
+ if (!this.deliverQueued(delivery)) retryLater.push({ ...delivery, generation: this.generation });
116
+ } catch (error) {
117
+ logInternalError("delivery-coordinator.flush", error, `runId=${delivery.runId} type=${delivery.type}`);
118
+ retryLater.push({ ...delivery, generation: this.generation });
125
119
  }
126
- } catch (error) {
127
- logInternalError("delivery-coordinator.flush", error, `runId=${delivery.runId} type=${delivery.type}`);
128
120
  }
121
+ this.pending.unshift(...retryLater);
122
+ } finally {
123
+ this.flushing = false;
129
124
  }
130
125
  }
131
126
 
@@ -138,6 +133,32 @@ export class DeliveryCoordinator {
138
133
  }
139
134
  }
140
135
 
136
+ private deliverQueued(delivery: PendingDelivery): boolean {
137
+ switch (delivery.type) {
138
+ case "result":
139
+ if (!this.deps.emit) return false;
140
+ this.deps.emit("pi-crew:run-result", delivery.payload);
141
+ return true;
142
+ case "notification": {
143
+ const notification = delivery.payload as NotificationDescriptor;
144
+ if (!this.deps.sendFollowUp) return false;
145
+ this.deps.sendFollowUp(notification.title, notification.body ?? "");
146
+ try {
147
+ this.deps.emit?.("pi-crew:notification", notification);
148
+ } catch {
149
+ // Secondary event delivery must not consume the user-facing notification.
150
+ }
151
+ return true;
152
+ }
153
+ case "steer": {
154
+ if (!this.deps.sendWakeUp) return false;
155
+ const message = typeof delivery.payload === "string" ? delivery.payload : String(delivery.payload);
156
+ this.deps.sendWakeUp(message);
157
+ return true;
158
+ }
159
+ }
160
+ }
161
+
141
162
  private enqueue(delivery: PendingDelivery): void {
142
163
  this.pending.push({ ...delivery, generation: this.generation });
143
164
  }
@@ -1,35 +1,35 @@
1
- import type { AgentConfig } from "../agents/agent-config.ts";
2
- import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
3
- import type { TeamConfig } from "../teams/team-config.ts";
4
- import type { WorkflowConfig } from "../workflows/workflow-config.ts";
5
-
6
- export function isDirectRun(manifest: Pick<TeamRunManifest, "team" | "workflow">): boolean {
7
- return manifest.workflow === "direct-agent";
8
- }
9
-
10
- export function directTeamAndWorkflowFromRun(manifest: TeamRunManifest, tasks: TeamTaskState[], agents: AgentConfig[]): { team: TeamConfig; workflow: WorkflowConfig } | undefined {
11
- if (!isDirectRun(manifest)) return undefined;
12
- const firstTask = tasks[0];
13
- const agentName = firstTask?.agent ?? (manifest.team.replace(/^direct-/, "") || "executor");
14
- const agent = agents.find((candidate) => candidate.name === agentName);
15
- const role = firstTask?.role ?? "agent";
16
- const stepId = firstTask?.stepId ?? "01_agent";
17
- return {
18
- team: {
19
- name: manifest.team,
20
- description: `Direct subagent run for ${agentName}`,
21
- source: "builtin",
22
- filePath: "<generated>",
23
- roles: [{ name: role, agent: agentName, description: agent?.description }],
24
- defaultWorkflow: "direct-agent",
25
- workspaceMode: manifest.workspaceMode,
26
- },
27
- workflow: {
28
- name: manifest.workflow ?? "direct-agent",
29
- description: `Direct task for ${agentName}`,
30
- source: "builtin",
31
- filePath: "<generated>",
32
- steps: [{ id: stepId, role, task: "{goal}", model: firstTask?.model }],
33
- },
34
- };
35
- }
1
+ import type { AgentConfig } from "../agents/agent-config.ts";
2
+ import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
3
+ import type { TeamConfig } from "../teams/team-config.ts";
4
+ import type { WorkflowConfig } from "../workflows/workflow-config.ts";
5
+
6
+ export function isDirectRun(manifest: Pick<TeamRunManifest, "team" | "workflow">): boolean {
7
+ return manifest.workflow === "direct-agent";
8
+ }
9
+
10
+ export function directTeamAndWorkflowFromRun(manifest: TeamRunManifest, tasks: TeamTaskState[], agents: AgentConfig[]): { team: TeamConfig; workflow: WorkflowConfig } | undefined {
11
+ if (!isDirectRun(manifest)) return undefined;
12
+ const firstTask = tasks[0];
13
+ const agentName = firstTask?.agent ?? (manifest.team.replace(/^direct-/, "") || "executor");
14
+ const agent = agents.find((candidate) => candidate.name === agentName);
15
+ const role = firstTask?.role ?? "agent";
16
+ const stepId = firstTask?.stepId ?? "01_agent";
17
+ return {
18
+ team: {
19
+ name: manifest.team,
20
+ description: `Direct subagent run for ${agentName}`,
21
+ source: "builtin",
22
+ filePath: "<generated>",
23
+ roles: [{ name: role, agent: agentName, description: agent?.description }],
24
+ defaultWorkflow: "direct-agent",
25
+ workspaceMode: manifest.workspaceMode,
26
+ },
27
+ workflow: {
28
+ name: manifest.workflow ?? "direct-agent",
29
+ description: `Direct task for ${agentName}`,
30
+ source: "builtin",
31
+ filePath: "<generated>",
32
+ steps: [{ id: stepId, role, task: "{goal}", model: firstTask?.model }],
33
+ },
34
+ };
35
+ }