pi-crew 0.1.51 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/CHANGELOG.md +56 -1
  2. package/README.md +176 -781
  3. package/agents/analyst.md +11 -11
  4. package/agents/critic.md +11 -11
  5. package/agents/executor.md +11 -11
  6. package/agents/explorer.md +11 -11
  7. package/agents/planner.md +11 -11
  8. package/agents/reviewer.md +11 -11
  9. package/agents/security-reviewer.md +11 -11
  10. package/agents/test-engineer.md +11 -11
  11. package/agents/verifier.md +70 -11
  12. package/agents/writer.md +11 -11
  13. package/docs/actions-reference.md +595 -0
  14. package/docs/commands-reference.md +347 -0
  15. package/docs/runtime-flow.md +148 -148
  16. package/index.ts +6 -6
  17. package/package.json +99 -99
  18. package/skills/async-worker-recovery/SKILL.md +42 -42
  19. package/skills/context-artifact-hygiene/SKILL.md +52 -52
  20. package/skills/delegation-patterns/SKILL.md +54 -54
  21. package/skills/mailbox-interactive/SKILL.md +40 -40
  22. package/skills/model-routing-context/SKILL.md +39 -39
  23. package/skills/multi-perspective-review/SKILL.md +58 -58
  24. package/skills/observability-reliability/SKILL.md +41 -41
  25. package/skills/orchestration/SKILL.md +157 -157
  26. package/skills/ownership-session-security/SKILL.md +41 -41
  27. package/skills/pi-extension-lifecycle/SKILL.md +39 -39
  28. package/skills/requirements-to-task-packet/SKILL.md +63 -63
  29. package/skills/resource-discovery-config/SKILL.md +41 -41
  30. package/skills/runtime-state-reader/SKILL.md +44 -44
  31. package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
  32. package/skills/state-mutation-locking/SKILL.md +42 -42
  33. package/skills/systematic-debugging/SKILL.md +67 -67
  34. package/skills/ui-render-performance/SKILL.md +39 -39
  35. package/skills/verification-before-done/SKILL.md +57 -57
  36. package/skills/worktree-isolation/SKILL.md +39 -39
  37. package/src/adapters/claude-adapter.ts +25 -0
  38. package/src/adapters/codex-adapter.ts +21 -0
  39. package/src/adapters/cursor-adapter.ts +17 -0
  40. package/src/adapters/export-util.ts +137 -0
  41. package/src/adapters/index.ts +15 -0
  42. package/src/adapters/registry.ts +18 -0
  43. package/src/adapters/types.ts +23 -0
  44. package/src/agents/agent-config.ts +2 -0
  45. package/src/agents/agent-search.ts +98 -98
  46. package/src/agents/discover-agents.ts +2 -1
  47. package/src/config/config.ts +13 -1
  48. package/src/config/drift-detector.ts +211 -0
  49. package/src/config/markers.ts +327 -0
  50. package/src/config/resilient-parser.ts +108 -0
  51. package/src/config/suggestions.ts +74 -0
  52. package/src/extension/cross-extension-rpc.ts +103 -94
  53. package/src/extension/project-init.ts +21 -1
  54. package/src/extension/register.ts +45 -14
  55. package/src/extension/registration/commands.ts +77 -8
  56. package/src/extension/registration/subagent-tools.ts +10 -1
  57. package/src/extension/registration/team-tool.ts +10 -1
  58. package/src/extension/registration/viewers.ts +48 -34
  59. package/src/extension/run-bundle-schema.ts +89 -89
  60. package/src/extension/run-import.ts +25 -1
  61. package/src/extension/run-index.ts +5 -1
  62. package/src/extension/run-maintenance.ts +142 -68
  63. package/src/extension/team-manager-command.ts +10 -1
  64. package/src/extension/team-tool/doctor.ts +28 -3
  65. package/src/extension/team-tool/handle-settings.ts +195 -188
  66. package/src/extension/team-tool/inspect.ts +41 -41
  67. package/src/extension/team-tool/intent-policy.ts +42 -42
  68. package/src/extension/team-tool/lifecycle-actions.ts +27 -8
  69. package/src/extension/team-tool/plan.ts +19 -19
  70. package/src/extension/team-tool/run.ts +12 -1
  71. package/src/extension/team-tool.ts +11 -1
  72. package/src/i18n.ts +184 -184
  73. package/src/observability/exporters/otlp-exporter.ts +92 -77
  74. package/src/prompt/prompt-runtime.ts +72 -72
  75. package/src/runtime/agent-memory.ts +72 -72
  76. package/src/runtime/agent-observability.ts +114 -114
  77. package/src/runtime/async-marker.ts +26 -26
  78. package/src/runtime/attention-events.ts +28 -28
  79. package/src/runtime/auto-resume.ts +100 -0
  80. package/src/runtime/background-runner.ts +11 -1
  81. package/src/runtime/cancellation-token.ts +89 -89
  82. package/src/runtime/cancellation.ts +61 -61
  83. package/src/runtime/capability-inventory.ts +116 -116
  84. package/src/runtime/child-pi.ts +7 -2
  85. package/src/runtime/compaction-summary.ts +271 -0
  86. package/src/runtime/completion-guard.ts +190 -190
  87. package/src/runtime/crash-recovery.ts +33 -0
  88. package/src/runtime/delta-conflict.ts +360 -0
  89. package/src/runtime/direct-run.ts +35 -35
  90. package/src/runtime/foreground-control.ts +82 -82
  91. package/src/runtime/green-contract.ts +46 -46
  92. package/src/runtime/group-join.ts +106 -106
  93. package/src/runtime/heartbeat-gradient.ts +28 -28
  94. package/src/runtime/heartbeat-watcher.ts +124 -124
  95. package/src/runtime/iteration-hooks.ts +262 -0
  96. package/src/runtime/live-agent-control.ts +88 -88
  97. package/src/runtime/live-control-realtime.ts +36 -36
  98. package/src/runtime/live-extension-bridge.ts +150 -150
  99. package/src/runtime/live-irc.ts +92 -92
  100. package/src/runtime/live-session-health.ts +100 -100
  101. package/src/runtime/loop-gates.ts +129 -0
  102. package/src/runtime/metric-parser.ts +40 -0
  103. package/src/runtime/notebook-helpers.ts +90 -90
  104. package/src/runtime/orphan-sentinel.ts +7 -7
  105. package/src/runtime/parallel-research.ts +44 -44
  106. package/src/runtime/phase-progress.ts +217 -0
  107. package/src/runtime/pi-args.ts +38 -11
  108. package/src/runtime/pi-json-output.ts +111 -111
  109. package/src/runtime/pi-spawn.ts +57 -7
  110. package/src/runtime/policy-engine.ts +79 -79
  111. package/src/runtime/post-checks.ts +122 -0
  112. package/src/runtime/progress-event-coalescer.ts +43 -43
  113. package/src/runtime/prose-compressor.ts +164 -164
  114. package/src/runtime/recovery-recipes.ts +74 -74
  115. package/src/runtime/result-extractor.ts +121 -121
  116. package/src/runtime/role-permission.ts +39 -39
  117. package/src/runtime/sensitive-paths.ts +2 -2
  118. package/src/runtime/session-resources.ts +25 -25
  119. package/src/runtime/session-snapshot.ts +59 -59
  120. package/src/runtime/session-usage.ts +79 -79
  121. package/src/runtime/sidechain-output.ts +29 -29
  122. package/src/runtime/stream-preview.ts +177 -177
  123. package/src/runtime/supervisor-contact.ts +59 -59
  124. package/src/runtime/task-display.ts +38 -38
  125. package/src/runtime/task-graph.ts +207 -0
  126. package/src/runtime/task-quality.ts +207 -0
  127. package/src/runtime/task-runner/capabilities.ts +78 -78
  128. package/src/runtime/task-runner/live-executor.ts +7 -1
  129. package/src/runtime/task-runner/progress.ts +119 -119
  130. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  131. package/src/runtime/task-runner/result-utils.ts +14 -14
  132. package/src/runtime/task-runner/run-projection.ts +103 -103
  133. package/src/runtime/task-runner/state-helpers.ts +22 -22
  134. package/src/runtime/team-runner.ts +117 -7
  135. package/src/runtime/worker-heartbeat.ts +21 -21
  136. package/src/runtime/worker-startup.ts +57 -57
  137. package/src/runtime/workflow-state.ts +187 -0
  138. package/src/runtime/workspace-tree.ts +298 -298
  139. package/src/schema/config-schema.ts +11 -0
  140. package/src/schema/validation-types.ts +148 -0
  141. package/src/skills/skill-templates.ts +374 -0
  142. package/src/state/active-run-registry.ts +35 -11
  143. package/src/state/atomic-write.ts +33 -26
  144. package/src/state/contracts.ts +1 -0
  145. package/src/state/event-reconstructor.ts +217 -0
  146. package/src/state/locks.ts +2 -13
  147. package/src/state/mailbox.ts +4 -3
  148. package/src/state/state-store.ts +32 -14
  149. package/src/state/task-claims.ts +44 -44
  150. package/src/state/types.ts +9 -0
  151. package/src/state/usage.ts +29 -29
  152. package/src/subagents/async-entry.ts +1 -1
  153. package/src/subagents/index.ts +3 -3
  154. package/src/subagents/live/control.ts +1 -1
  155. package/src/subagents/live/manager.ts +1 -1
  156. package/src/subagents/live/realtime.ts +1 -1
  157. package/src/subagents/live/session-runtime.ts +1 -1
  158. package/src/subagents/manager.ts +1 -1
  159. package/src/subagents/spawn.ts +1 -1
  160. package/src/teams/team-serializer.ts +38 -38
  161. package/src/types/diff.d.ts +18 -18
  162. package/src/ui/crew-footer.ts +101 -101
  163. package/src/ui/crew-select-list.ts +111 -111
  164. package/src/ui/crew-widget.ts +5 -2
  165. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  166. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  167. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  168. package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
  169. package/src/ui/dashboard-panes/progress-pane.ts +11 -0
  170. package/src/ui/dynamic-border.ts +25 -25
  171. package/src/ui/layout-primitives.ts +106 -106
  172. package/src/ui/loaders.ts +158 -158
  173. package/src/ui/render-coalescer.ts +51 -51
  174. package/src/ui/render-diff.ts +119 -119
  175. package/src/ui/render-scheduler.ts +143 -143
  176. package/src/ui/run-action-dispatcher.ts +10 -1
  177. package/src/ui/spinner.ts +17 -17
  178. package/src/ui/status-colors.ts +58 -58
  179. package/src/ui/syntax-highlight.ts +116 -116
  180. package/src/ui/transcript-entries.ts +258 -258
  181. package/src/utils/completion-dedupe.ts +63 -63
  182. package/src/utils/frontmatter.ts +68 -68
  183. package/src/utils/git.ts +262 -262
  184. package/src/utils/ids.ts +17 -17
  185. package/src/utils/incremental-reader.ts +104 -104
  186. package/src/utils/names.ts +27 -27
  187. package/src/utils/redaction.ts +44 -44
  188. package/src/utils/safe-paths.ts +47 -47
  189. package/src/utils/scan-cache.ts +136 -136
  190. package/src/utils/sleep.ts +40 -26
  191. package/src/utils/task-name-generator.ts +337 -337
  192. package/src/workflows/validate-workflow.ts +40 -40
  193. package/src/worktree/branch-freshness.ts +45 -45
  194. package/teams/default.team.md +12 -12
  195. package/teams/fast-fix.team.md +11 -11
  196. package/teams/implementation.team.md +18 -18
  197. package/teams/parallel-research.team.md +14 -14
  198. package/teams/research.team.md +11 -11
  199. package/teams/review.team.md +12 -12
  200. package/workflows/default.workflow.md +30 -29
  201. package/workflows/fast-fix.workflow.md +23 -22
  202. package/workflows/implementation.workflow.md +43 -43
  203. package/workflows/parallel-research.workflow.md +46 -46
  204. package/workflows/research.workflow.md +22 -22
  205. package/workflows/review.workflow.md +30 -30
  206. package/docs/refactor-tasks-phase3.md +0 -394
  207. package/docs/refactor-tasks-phase4.md +0 -564
  208. package/docs/refactor-tasks-phase5.md +0 -402
  209. package/docs/refactor-tasks-phase6.md +0 -662
  210. package/docs/refactor-tasks.md +0 -1484
  211. package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +0 -261
  212. package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +0 -111
  213. package/docs/research/AUDIT_OH_MY_PI.md +0 -261
  214. package/docs/research/AUDIT_PI_CREW.md +0 -457
  215. package/docs/research/CAVEMAN-DEEP-RESEARCH.md +0 -281
  216. package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +0 -264
  217. package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +0 -343
  218. package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +0 -480
  219. package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +0 -354
  220. package/docs/research/IMPLEMENTATION_PLAN.md +0 -385
  221. package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +0 -502
  222. package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +0 -266
  223. package/docs/research/REMAINING-GAPS-PLAN.md +0 -363
  224. package/docs/research/SESSION-SUMMARY-2026-05-08.md +0 -146
  225. package/docs/research/UI-RESPONSIVENESS-AUDIT.md +0 -173
  226. package/docs/research-awesome-agent-skills-distillation.md +0 -100
  227. package/docs/research-extension-examples.md +0 -297
  228. package/docs/research-extension-system.md +0 -324
  229. package/docs/research-oh-my-pi-distillation.md +0 -369
  230. package/docs/research-optimization-plan.md +0 -548
  231. package/docs/research-phase10-distillation.md +0 -199
  232. package/docs/research-phase11-distillation.md +0 -201
  233. package/docs/research-phase8-operator-experience-plan.md +0 -819
  234. package/docs/research-phase9-observability-reliability-plan.md +0 -1190
  235. package/docs/research-pi-coding-agent.md +0 -357
  236. package/docs/research-source-pi-crew-reference.md +0 -174
  237. package/docs/research-ui-optimization-plan.md +0 -480
  238. package/docs/source-runtime-refactor-map.md +0 -107
  239. package/src/utils/atomic-write.ts +0 -33
@@ -1,190 +1,190 @@
1
- import * as fs from "node:fs";
2
- import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
3
-
4
- // ============================================================================
5
- // Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
6
- // made no observable mutations. Used by task-runner.ts.
7
- // ============================================================================
8
-
9
- export interface CompletionMutationGuardInput {
10
- role: string;
11
- taskText?: string;
12
- transcriptPath?: string;
13
- stdout?: string;
14
- }
15
-
16
- export interface CompletionMutationGuardResult {
17
- expectedMutation: boolean;
18
- observedMutation: boolean;
19
- reason?: "no_mutation_observed";
20
- observedTools: string[];
21
- }
22
-
23
- const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
24
- const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
25
- const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
26
- const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
27
- const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
28
-
29
- function asRecord(value: unknown): Record<string, unknown> | undefined {
30
- return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
31
- }
32
-
33
- function commandText(value: unknown): string {
34
- const record = asRecord(value);
35
- if (!record) return typeof value === "string" ? value : "";
36
- for (const key of ["command", "cmd", "script", "input"]) {
37
- const raw = record[key];
38
- if (typeof raw === "string") return raw;
39
- }
40
- return JSON.stringify(record);
41
- }
42
-
43
- function isMutatingTool(tool: string, args: unknown): boolean {
44
- const normalized = tool.toLowerCase();
45
- if (MUTATING_TOOLS.has(normalized)) return true;
46
- if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
47
- const command = commandText(args).trim();
48
- if (!command) return false;
49
- // Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
50
- if (MUTATING_COMMANDS.test(command)) return true;
51
- if (READ_ONLY_COMMANDS.test(command)) return false;
52
- // If the command doesn't match either list, treat unknown bash calls as potentially mutating.
53
- return true;
54
- }
55
- return false;
56
- }
57
-
58
- function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
59
- const record = asRecord(event);
60
- if (!record) return [];
61
- const calls: Array<{ tool: string; args?: unknown }> = [];
62
- const directTool = record.toolName ?? record.name ?? record.tool;
63
- if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
64
- calls.push({ tool: directTool, args: record.args ?? record.input });
65
- }
66
- const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
67
- if (Array.isArray(content)) {
68
- for (const part of content) {
69
- const item = asRecord(part);
70
- if (!item) continue;
71
- const tool = item.name ?? item.toolName ?? item.tool;
72
- if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
73
- }
74
- }
75
- return calls;
76
- }
77
-
78
- function transcriptText(input: CompletionMutationGuardInput): string {
79
- if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
80
- return input.stdout ?? "";
81
- }
82
-
83
- export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
84
- if (!MUTATING_ROLES.has(input.role)) return false;
85
- return !READ_ONLY_HINTS.test(input.taskText ?? "");
86
- }
87
-
88
- export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
89
- const expectedMutation = expectsImplementationMutation(input);
90
- const observedTools: string[] = [];
91
- let observedMutation = false;
92
- const text = transcriptText(input);
93
- for (const line of text.split("\n")) {
94
- const trimmed = line.trim();
95
- if (!trimmed) continue;
96
- let event: unknown;
97
- try { event = JSON.parse(trimmed); } catch { continue; }
98
- for (const call of collectToolCallsFromEvent(event)) {
99
- observedTools.push(call.tool);
100
- if (isMutatingTool(call.tool, call.args)) observedMutation = true;
101
- }
102
- }
103
- return {
104
- expectedMutation,
105
- observedMutation,
106
- observedTools,
107
- ...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
108
- };
109
- }
110
-
111
- // ============================================================================
112
- // Phase 11a: Artifact-based Completion Verification — a second layer that
113
- // checks whether a completed task actually produced meaningful artifacts.
114
- // ============================================================================
115
-
116
- /**
117
- * Guard against false-positive task completions.
118
- *
119
- * Checks whether a task that claims success actually produced meaningful output.
120
- * Returns a verification result with the green level (0-3) and any warnings.
121
- */
122
- export interface CompletionVerifyResult {
123
- /** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
124
- greenLevel: number;
125
- /** Warnings about potentially incomplete work */
126
- warnings: string[];
127
- }
128
-
129
- const MAX_OUTPUT_PREVIEW = 200;
130
-
131
- function isTrivialError(error: string | undefined): boolean {
132
- if (!error) return false;
133
- return error.trim().length === 0;
134
- }
135
-
136
- export function verifyTaskCompletion(
137
- task: TeamTaskState,
138
- manifest: TeamRunManifest,
139
- ): CompletionVerifyResult {
140
- const warnings: string[] = [];
141
- let greenLevel = 0;
142
-
143
- // Check 1: Has an error?
144
- if (task.error && !isTrivialError(task.error)) {
145
- return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
146
- }
147
-
148
- // Check 2: Has result artifact?
149
- if (task.resultArtifact) {
150
- greenLevel += 1;
151
- }
152
-
153
- // Check 3: Has transcript?
154
- if (task.transcriptArtifact) {
155
- greenLevel += 1;
156
- }
157
-
158
- // Check 4: For implementation tasks, verify artifacts were actually produced
159
- const runArtifacts = manifest.artifacts.filter(
160
- (a) => a.producer === task.id || a.producer === task.agent,
161
- );
162
- if (runArtifacts.length > 0) {
163
- greenLevel += 1;
164
- } else if (greenLevel < 3) {
165
- warnings.push("No run-level artifacts produced by this task");
166
- }
167
-
168
- // Check 5: Usage tracking — did the task actually consume tokens?
169
- if (task.usage) {
170
- const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
171
- if (totalTokens === 0 && greenLevel < 3) {
172
- warnings.push("Task reports zero token usage — may not have executed");
173
- }
174
- }
175
-
176
- return {
177
- greenLevel: Math.min(greenLevel, 3),
178
- warnings,
179
- };
180
- }
181
-
182
- /**
183
- * Format a preview of task output for diagnostic display.
184
- */
185
- export function formatOutputPreview(output: string | undefined): string {
186
- if (!output) return "(no output)";
187
- const trimmed = output.trim();
188
- if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
189
- return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
190
- }
1
+ import * as fs from "node:fs";
2
+ import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
3
+
4
+ // ============================================================================
5
+ // Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
6
+ // made no observable mutations. Used by task-runner.ts.
7
+ // ============================================================================
8
+
9
+ export interface CompletionMutationGuardInput {
10
+ role: string;
11
+ taskText?: string;
12
+ transcriptPath?: string;
13
+ stdout?: string;
14
+ }
15
+
16
+ export interface CompletionMutationGuardResult {
17
+ expectedMutation: boolean;
18
+ observedMutation: boolean;
19
+ reason?: "no_mutation_observed";
20
+ observedTools: string[];
21
+ }
22
+
23
+ const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
24
+ const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
25
+ const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
26
+ const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
27
+ const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
28
+
29
+ function asRecord(value: unknown): Record<string, unknown> | undefined {
30
+ return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
31
+ }
32
+
33
+ function commandText(value: unknown): string {
34
+ const record = asRecord(value);
35
+ if (!record) return typeof value === "string" ? value : "";
36
+ for (const key of ["command", "cmd", "script", "input"]) {
37
+ const raw = record[key];
38
+ if (typeof raw === "string") return raw;
39
+ }
40
+ return JSON.stringify(record);
41
+ }
42
+
43
+ function isMutatingTool(tool: string, args: unknown): boolean {
44
+ const normalized = tool.toLowerCase();
45
+ if (MUTATING_TOOLS.has(normalized)) return true;
46
+ if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
47
+ const command = commandText(args).trim();
48
+ if (!command) return false;
49
+ // Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
50
+ if (MUTATING_COMMANDS.test(command)) return true;
51
+ if (READ_ONLY_COMMANDS.test(command)) return false;
52
+ // If the command doesn't match either list, treat unknown bash calls as potentially mutating.
53
+ return true;
54
+ }
55
+ return false;
56
+ }
57
+
58
+ function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
59
+ const record = asRecord(event);
60
+ if (!record) return [];
61
+ const calls: Array<{ tool: string; args?: unknown }> = [];
62
+ const directTool = record.toolName ?? record.name ?? record.tool;
63
+ if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
64
+ calls.push({ tool: directTool, args: record.args ?? record.input });
65
+ }
66
+ const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
67
+ if (Array.isArray(content)) {
68
+ for (const part of content) {
69
+ const item = asRecord(part);
70
+ if (!item) continue;
71
+ const tool = item.name ?? item.toolName ?? item.tool;
72
+ if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
73
+ }
74
+ }
75
+ return calls;
76
+ }
77
+
78
+ function transcriptText(input: CompletionMutationGuardInput): string {
79
+ if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
80
+ return input.stdout ?? "";
81
+ }
82
+
83
+ export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
84
+ if (!MUTATING_ROLES.has(input.role)) return false;
85
+ return !READ_ONLY_HINTS.test(input.taskText ?? "");
86
+ }
87
+
88
+ export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
89
+ const expectedMutation = expectsImplementationMutation(input);
90
+ const observedTools: string[] = [];
91
+ let observedMutation = false;
92
+ const text = transcriptText(input);
93
+ for (const line of text.split("\n")) {
94
+ const trimmed = line.trim();
95
+ if (!trimmed) continue;
96
+ let event: unknown;
97
+ try { event = JSON.parse(trimmed); } catch { continue; }
98
+ for (const call of collectToolCallsFromEvent(event)) {
99
+ observedTools.push(call.tool);
100
+ if (isMutatingTool(call.tool, call.args)) observedMutation = true;
101
+ }
102
+ }
103
+ return {
104
+ expectedMutation,
105
+ observedMutation,
106
+ observedTools,
107
+ ...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
108
+ };
109
+ }
110
+
111
+ // ============================================================================
112
+ // Phase 11a: Artifact-based Completion Verification — a second layer that
113
+ // checks whether a completed task actually produced meaningful artifacts.
114
+ // ============================================================================
115
+
116
+ /**
117
+ * Guard against false-positive task completions.
118
+ *
119
+ * Checks whether a task that claims success actually produced meaningful output.
120
+ * Returns a verification result with the green level (0-3) and any warnings.
121
+ */
122
+ export interface CompletionVerifyResult {
123
+ /** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
124
+ greenLevel: number;
125
+ /** Warnings about potentially incomplete work */
126
+ warnings: string[];
127
+ }
128
+
129
+ const MAX_OUTPUT_PREVIEW = 200;
130
+
131
+ function isTrivialError(error: string | undefined): boolean {
132
+ if (!error) return false;
133
+ return error.trim().length === 0;
134
+ }
135
+
136
+ export function verifyTaskCompletion(
137
+ task: TeamTaskState,
138
+ manifest: TeamRunManifest,
139
+ ): CompletionVerifyResult {
140
+ const warnings: string[] = [];
141
+ let greenLevel = 0;
142
+
143
+ // Check 1: Has an error?
144
+ if (task.error && !isTrivialError(task.error)) {
145
+ return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
146
+ }
147
+
148
+ // Check 2: Has result artifact?
149
+ if (task.resultArtifact) {
150
+ greenLevel += 1;
151
+ }
152
+
153
+ // Check 3: Has transcript?
154
+ if (task.transcriptArtifact) {
155
+ greenLevel += 1;
156
+ }
157
+
158
+ // Check 4: For implementation tasks, verify artifacts were actually produced
159
+ const runArtifacts = manifest.artifacts.filter(
160
+ (a) => a.producer === task.id || a.producer === task.agent,
161
+ );
162
+ if (runArtifacts.length > 0) {
163
+ greenLevel += 1;
164
+ } else if (greenLevel < 3) {
165
+ warnings.push("No run-level artifacts produced by this task");
166
+ }
167
+
168
+ // Check 5: Usage tracking — did the task actually consume tokens?
169
+ if (task.usage) {
170
+ const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
171
+ if (totalTokens === 0 && greenLevel < 3) {
172
+ warnings.push("Task reports zero token usage — may not have executed");
173
+ }
174
+ }
175
+
176
+ return {
177
+ greenLevel: Math.min(greenLevel, 3),
178
+ warnings,
179
+ };
180
+ }
181
+
182
+ /**
183
+ * Format a preview of task output for diagnostic display.
184
+ */
185
+ export function formatOutputPreview(output: string | undefined): string {
186
+ if (!output) return "(no output)";
187
+ const trimmed = output.trim();
188
+ if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
189
+ return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
190
+ }
@@ -11,6 +11,8 @@ import { checkProcessLiveness } from "./process-status.ts";
11
11
  import { reconcileStaleRun, type ReconcileResult } from "./stale-reconciler.ts";
12
12
  import { executeHook, appendHookEvent } from "../hooks/registry.ts";
13
13
  import { activeRunEntries, unregisterActiveRun, readActiveRunRegistry } from "../state/active-run-registry.ts";
14
+ import { resolveRealContainedPath } from "../utils/safe-paths.ts";
15
+ import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
14
16
 
15
17
  export interface RecoveryPlan {
16
18
  runId: string;
@@ -168,6 +170,32 @@ export function cancelOrphanedRuns(
168
170
  * This is the **global** cleanup that cancelOrphanedRuns (project-scoped)
169
171
  * cannot reach.
170
172
  */
173
+ /**
174
+ * Best-effort removal of stateRoot and artifactsRoot directories for a purged run.
175
+ * Uses resolveRealContainedPath to ensure we only delete paths that are safely
176
+ * contained within a known crew root (project or user level).
177
+ */
178
+ function tryRemoveRunDirectories(entry: { stateRoot: string; cwd: string }): void {
179
+ const roots = [projectCrewRoot(entry.cwd), userCrewRoot()];
180
+ for (const root of roots) {
181
+ try {
182
+ resolveRealContainedPath(root, entry.stateRoot);
183
+ // If we get here, stateRoot is safely contained — remove it
184
+ fs.rmSync(entry.stateRoot, { recursive: true, force: true });
185
+ break;
186
+ } catch {
187
+ // Not contained in this root, try next
188
+ }
189
+ }
190
+ // NOTE: artifactsRoot is shared across runs and cleaned up by pruneFinishedRuns/pruneUserLevelRuns — not deleted here.
191
+ }
192
+
193
+ /**
194
+ * Purge the global active-run-index of entries whose manifest is no longer active.
195
+ *
196
+ * Note: This function only cleans user-level active run entries.
197
+ * Project-level stale runs are handled by session_start auto-prune triggered during run creation.
198
+ */
171
199
  export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.now()): { purged: string[]; kept: string[] } {
172
200
  const purged: string[] = [];
173
201
  const kept: string[] = [];
@@ -177,6 +205,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
177
205
  // 1. Manifest file gone → definitely stale
178
206
  if (!fs.existsSync(entry.manifestPath)) {
179
207
  unregisterActiveRun(entry.runId);
208
+ tryRemoveRunDirectories(entry);
180
209
  purged.push(entry.runId);
181
210
  continue;
182
211
  }
@@ -184,6 +213,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
184
213
  // 2. CWD gone → temp dir cleaned up
185
214
  if (!fs.existsSync(entry.cwd)) {
186
215
  unregisterActiveRun(entry.runId);
216
+ tryRemoveRunDirectories(entry);
187
217
  purged.push(entry.runId);
188
218
  continue;
189
219
  }
@@ -194,6 +224,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
194
224
  manifest = JSON.parse(fs.readFileSync(entry.manifestPath, "utf-8"));
195
225
  } catch {
196
226
  unregisterActiveRun(entry.runId);
227
+ tryRemoveRunDirectories(entry);
197
228
  purged.push(entry.runId);
198
229
  continue;
199
230
  }
@@ -202,6 +233,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
202
233
  const terminalStatuses = new Set(["completed", "failed", "cancelled", "blocked"]);
203
234
  if (manifest && terminalStatuses.has(manifest.status ?? "")) {
204
235
  unregisterActiveRun(entry.runId);
236
+ tryRemoveRunDirectories(entry);
205
237
  purged.push(entry.runId);
206
238
  continue;
207
239
  }
@@ -231,6 +263,7 @@ export function purgeStaleActiveRunIndex(staleThresholdMs = 300_000, now = Date.
231
263
  // Best-effort manifest cleanup
232
264
  }
233
265
  unregisterActiveRun(entry.runId);
266
+ tryRemoveRunDirectories(entry);
234
267
  purged.push(entry.runId);
235
268
  continue;
236
269
  }