pi-crew 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. package/AGENTS.md +57 -32
  2. package/CHANGELOG.md +466 -448
  3. package/LICENSE +21 -21
  4. package/NOTICE.md +16 -16
  5. package/README.md +323 -323
  6. package/docs/FEATURE_INTAKE.md +126 -0
  7. package/docs/HARNESS.md +86 -0
  8. package/docs/HARNESS_BACKLOG.md +41 -0
  9. package/docs/TEST_MATRIX.md +49 -0
  10. package/docs/actions-reference.md +595 -595
  11. package/docs/architecture.md +180 -180
  12. package/docs/code-review-2026-05-11.md +592 -592
  13. package/docs/commands-reference.md +347 -347
  14. package/docs/comparison-pi-subagents-vs-pi-crew.md +303 -0
  15. package/docs/decisions/0001-durable-state.md +41 -0
  16. package/docs/decisions/0002-child-process-for-async.md +42 -0
  17. package/docs/decisions/0003-depth-guard.md +36 -0
  18. package/docs/decisions/0004-execfile-over-exec.md +34 -0
  19. package/docs/decisions/0005-no-parameter-properties.md +49 -0
  20. package/docs/decisions/0006-publish-bundled-esm.md +63 -0
  21. package/docs/decisions/0007-active-run-binary-index.md +54 -0
  22. package/docs/decisions/0008-child-pi-warm-pool.md +61 -0
  23. package/docs/decisions/README.md +23 -0
  24. package/docs/followup-review-round4-2026-05-13.md +107 -0
  25. package/docs/implementation-plan-top3.md +333 -0
  26. package/docs/live-mailbox-runtime.md +36 -36
  27. package/docs/next-upgrade-roadmap.md +808 -808
  28. package/docs/oh-my-pi-research.md +509 -0
  29. package/docs/perf/baseline-2026-05.md +113 -0
  30. package/docs/perf/final-report-2026-05.md +206 -0
  31. package/docs/perf/sprint-1-report.md +71 -0
  32. package/docs/perf/sprint-2-report.md +81 -0
  33. package/docs/perf/sprint-2.5-report.md +53 -0
  34. package/docs/perf/sprint-3-report.md +36 -0
  35. package/docs/perf/sprint-4-report.md +47 -0
  36. package/docs/perf/sprint-5-report.md +51 -0
  37. package/docs/perf/sprint-6-report.md +94 -0
  38. package/docs/perf/sprint-7-report.md +74 -0
  39. package/docs/perf/upgrade-plan-2026-05.md +147 -0
  40. package/docs/pi-subagents3-deep-analysis.md +508 -0
  41. package/docs/product/README.md +31 -0
  42. package/docs/product/platform.md +27 -0
  43. package/docs/product/runtime-safety.md +37 -0
  44. package/docs/product/team-run.md +39 -0
  45. package/docs/product/team-tool.md +37 -0
  46. package/docs/publishing.md +65 -65
  47. package/docs/resource-formats.md +134 -134
  48. package/docs/runtime-analysis-child-vs-live.md +171 -0
  49. package/docs/runtime-flow.md +148 -148
  50. package/docs/runtime-migration-in-process-analysis.md +250 -0
  51. package/docs/stories/README.md +30 -0
  52. package/docs/stories/backlog.md +36 -0
  53. package/docs/templates/decision.md +27 -0
  54. package/docs/templates/story.md +44 -0
  55. package/docs/templates/validation-report.md +32 -0
  56. package/docs/usage.md +238 -238
  57. package/index.ts +7 -6
  58. package/install.mjs +65 -65
  59. package/package.json +107 -100
  60. package/schema.json +222 -222
  61. package/skills/child-pi-spawning/SKILL.md +213 -0
  62. package/skills/context-artifact-hygiene/SKILL.md +32 -0
  63. package/skills/event-log-tracing/SKILL.md +299 -0
  64. package/skills/git-master/SKILL.md +225 -24
  65. package/skills/live-agent-lifecycle/SKILL.md +192 -0
  66. package/skills/mailbox-interactive/SKILL.md +300 -19
  67. package/skills/model-routing-context/SKILL.md +94 -0
  68. package/skills/multi-perspective-review/SKILL.md +88 -0
  69. package/skills/read-only-explorer/SKILL.md +250 -26
  70. package/skills/safe-bash/SKILL.md +307 -21
  71. package/skills/verification-before-done/SKILL.md +11 -2
  72. package/skills/widget-rendering/SKILL.md +258 -0
  73. package/skills/workspace-isolation/SKILL.md +202 -0
  74. package/skills/worktree-isolation/SKILL.md +202 -18
  75. package/src/adapters/claude-adapter.ts +25 -25
  76. package/src/adapters/codex-adapter.ts +21 -21
  77. package/src/adapters/cursor-adapter.ts +17 -17
  78. package/src/adapters/export-util.ts +137 -137
  79. package/src/adapters/index.ts +15 -15
  80. package/src/adapters/registry.ts +18 -18
  81. package/src/adapters/types.ts +23 -23
  82. package/src/agents/agent-config.ts +38 -38
  83. package/src/agents/agent-serializer.ts +38 -38
  84. package/src/agents/discover-agents.ts +121 -118
  85. package/src/config/config.ts +740 -858
  86. package/src/config/defaults.ts +96 -96
  87. package/src/config/drift-detector.ts +211 -211
  88. package/src/config/markers.ts +327 -327
  89. package/src/config/resilient-parser.ts +109 -108
  90. package/src/config/suggestions.ts +74 -74
  91. package/src/config/types.ts +199 -0
  92. package/src/extension/async-notifier.ts +123 -89
  93. package/src/extension/autonomous-policy.ts +169 -169
  94. package/src/extension/cross-extension-rpc.ts +104 -104
  95. package/src/extension/help.ts +47 -47
  96. package/src/extension/import-index.ts +69 -69
  97. package/src/extension/management.ts +395 -382
  98. package/src/extension/notification-router.ts +116 -116
  99. package/src/extension/notification-sink.ts +51 -51
  100. package/src/extension/project-init.ts +168 -168
  101. package/src/extension/register.ts +859 -668
  102. package/src/extension/registration/artifact-cleanup.ts +15 -15
  103. package/src/extension/registration/command-utils.ts +54 -54
  104. package/src/extension/registration/commands.ts +559 -452
  105. package/src/extension/registration/compaction-guard.ts +125 -125
  106. package/src/extension/registration/subagent-helpers.ts +102 -102
  107. package/src/extension/registration/subagent-tools.ts +220 -159
  108. package/src/extension/registration/team-tool.ts +159 -99
  109. package/src/extension/registration/viewers.ts +29 -0
  110. package/src/extension/result-watcher.ts +128 -128
  111. package/src/extension/run-bundle-schema.ts +89 -89
  112. package/src/extension/run-export.ts +73 -73
  113. package/src/extension/run-import.ts +84 -84
  114. package/src/extension/run-index.ts +94 -94
  115. package/src/extension/run-maintenance.ts +142 -142
  116. package/src/extension/session-summary.ts +8 -8
  117. package/src/extension/team-manager-command.ts +96 -96
  118. package/src/extension/team-recommendation.ts +188 -188
  119. package/src/extension/team-tool/api.ts +5 -2
  120. package/src/extension/team-tool/cancel.ts +224 -209
  121. package/src/extension/team-tool/config-patch.ts +36 -36
  122. package/src/extension/team-tool/context.ts +60 -60
  123. package/src/extension/team-tool/doctor.ts +242 -242
  124. package/src/extension/team-tool/handle-settings.ts +421 -195
  125. package/src/extension/team-tool/inspect.ts +41 -41
  126. package/src/extension/team-tool/lifecycle-actions.ts +139 -139
  127. package/src/extension/team-tool/parallel-dispatch.ts +156 -156
  128. package/src/extension/team-tool/plan.ts +19 -19
  129. package/src/extension/team-tool/respond.ts +112 -111
  130. package/src/extension/team-tool/run.ts +246 -229
  131. package/src/extension/team-tool/status.ts +110 -110
  132. package/src/extension/team-tool-types.ts +13 -13
  133. package/src/extension/team-tool.ts +344 -344
  134. package/src/extension/tool-result.ts +16 -16
  135. package/src/extension/validate-resources.ts +77 -77
  136. package/src/hooks/registry.ts +61 -61
  137. package/src/hooks/types.ts +40 -40
  138. package/src/i18n.ts +184 -184
  139. package/src/observability/correlation.ts +35 -35
  140. package/src/observability/event-to-metric.ts +68 -68
  141. package/src/observability/exporters/adapter.ts +30 -30
  142. package/src/observability/exporters/otlp-exporter.ts +106 -92
  143. package/src/observability/exporters/prometheus-exporter.ts +54 -54
  144. package/src/observability/metric-registry.ts +87 -87
  145. package/src/observability/metric-retention.ts +54 -54
  146. package/src/observability/metric-sink.ts +81 -56
  147. package/src/observability/metrics-primitives.ts +167 -167
  148. package/src/prompt/prompt-runtime.ts +72 -72
  149. package/src/runtime/adaptive-plan.ts +338 -0
  150. package/src/runtime/agent-control.ts +169 -169
  151. package/src/runtime/agent-memory.ts +72 -72
  152. package/src/runtime/agent-observability.ts +114 -114
  153. package/src/runtime/async-marker.ts +26 -26
  154. package/src/runtime/async-runner.ts +153 -153
  155. package/src/runtime/attention-events.ts +28 -28
  156. package/src/runtime/auto-resume.ts +100 -100
  157. package/src/runtime/background-runner.ts +122 -89
  158. package/src/runtime/cancellation.ts +61 -61
  159. package/src/runtime/capability-inventory.ts +116 -116
  160. package/src/runtime/child-pi-pool.ts +68 -0
  161. package/src/runtime/child-pi.ts +541 -461
  162. package/src/runtime/code-summary.ts +247 -247
  163. package/src/runtime/compaction-summary.ts +271 -271
  164. package/src/runtime/concurrency.ts +58 -58
  165. package/src/runtime/crash-recovery.ts +317 -301
  166. package/src/runtime/crew-agent-records.ts +379 -281
  167. package/src/runtime/crew-agent-runtime.ts +60 -60
  168. package/src/runtime/cross-extension-rpc.ts +72 -0
  169. package/src/runtime/custom-tools/irc-tool.ts +201 -201
  170. package/src/runtime/custom-tools/submit-result-tool.ts +90 -90
  171. package/src/runtime/deadletter.ts +47 -47
  172. package/src/runtime/delivery-coordinator.ts +176 -176
  173. package/src/runtime/delta-conflict.ts +360 -360
  174. package/src/runtime/diagnostic-export.ts +102 -102
  175. package/src/runtime/direct-run.ts +35 -35
  176. package/src/runtime/effectiveness.ts +82 -81
  177. package/src/runtime/errors/crew-errors.ts +166 -0
  178. package/src/runtime/event-stream-bridge.ts +92 -92
  179. package/src/runtime/foreground-control.ts +82 -82
  180. package/src/runtime/green-contract.ts +46 -46
  181. package/src/runtime/group-join.ts +234 -106
  182. package/src/runtime/heartbeat-watcher.ts +145 -124
  183. package/src/runtime/iteration-hooks.ts +267 -267
  184. package/src/runtime/live-agent-control.ts +88 -88
  185. package/src/runtime/live-agent-manager.ts +377 -179
  186. package/src/runtime/live-control-realtime.ts +36 -36
  187. package/src/runtime/live-session-runtime.ts +676 -600
  188. package/src/runtime/loop-gates.ts +129 -129
  189. package/src/runtime/manifest-cache.ts +263 -263
  190. package/src/runtime/mcp-proxy.ts +113 -113
  191. package/src/runtime/metric-parser.ts +40 -40
  192. package/src/runtime/model-fallback.ts +282 -274
  193. package/src/runtime/model-resolver.ts +118 -0
  194. package/src/runtime/output-validator.ts +187 -187
  195. package/src/runtime/overflow-recovery.ts +175 -175
  196. package/src/runtime/parallel-research.ts +44 -44
  197. package/src/runtime/parallel-utils.ts +156 -156
  198. package/src/runtime/parent-guard.ts +80 -80
  199. package/src/runtime/phase-progress.ts +217 -217
  200. package/src/runtime/pi-args.ts +165 -165
  201. package/src/runtime/pi-json-output.ts +111 -111
  202. package/src/runtime/pi-spawn.ts +167 -167
  203. package/src/runtime/policy-engine.ts +79 -79
  204. package/src/runtime/post-checks.ts +125 -125
  205. package/src/runtime/post-exit-stdio-guard.ts +86 -86
  206. package/src/runtime/process-status.ts +97 -73
  207. package/src/runtime/progress-event-coalescer.ts +43 -43
  208. package/src/runtime/recovery-recipes.ts +74 -74
  209. package/src/runtime/retry-executor.ts +81 -81
  210. package/src/runtime/role-permission.ts +39 -39
  211. package/src/runtime/run-tracker.ts +99 -0
  212. package/src/runtime/runtime-policy.ts +21 -0
  213. package/src/runtime/runtime-resolver.ts +94 -91
  214. package/src/runtime/scheduler.ts +294 -0
  215. package/src/runtime/semaphore.ts +131 -131
  216. package/src/runtime/sensitive-paths.ts +92 -92
  217. package/src/runtime/session-usage.ts +79 -79
  218. package/src/runtime/settings-store.ts +103 -0
  219. package/src/runtime/sidechain-output.ts +29 -29
  220. package/src/runtime/skill-instructions.ts +222 -222
  221. package/src/runtime/stale-reconciler.ts +198 -189
  222. package/src/runtime/streaming-output.ts +47 -0
  223. package/src/runtime/subagent-manager.ts +404 -400
  224. package/src/runtime/subprocess-tool-registry.ts +67 -67
  225. package/src/runtime/task-display.ts +38 -38
  226. package/src/runtime/task-graph-scheduler.ts +122 -122
  227. package/src/runtime/task-graph.ts +207 -207
  228. package/src/runtime/task-output-context.ts +177 -177
  229. package/src/runtime/task-packet.ts +93 -93
  230. package/src/runtime/task-quality.ts +207 -207
  231. package/src/runtime/task-runner/capabilities.ts +78 -78
  232. package/src/runtime/task-runner/live-executor.ts +131 -113
  233. package/src/runtime/task-runner/progress.ts +119 -119
  234. package/src/runtime/task-runner/prompt-builder.ts +139 -139
  235. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  236. package/src/runtime/task-runner/result-utils.ts +14 -14
  237. package/src/runtime/task-runner/run-projection.ts +103 -103
  238. package/src/runtime/task-runner/state-helpers.ts +22 -22
  239. package/src/runtime/task-runner.ts +469 -459
  240. package/src/runtime/team-runner.ts +693 -945
  241. package/src/runtime/usage-tracker.ts +71 -0
  242. package/src/runtime/worker-heartbeat.ts +21 -21
  243. package/src/runtime/worker-startup.ts +57 -57
  244. package/src/runtime/workflow-state.ts +187 -187
  245. package/src/runtime/yield-handler.ts +190 -190
  246. package/src/schema/config-schema.ts +172 -168
  247. package/src/schema/team-tool-schema.ts +126 -126
  248. package/src/schema/validation-types.ts +151 -148
  249. package/src/skills/discover-skills.ts +67 -67
  250. package/src/skills/skill-templates.ts +374 -374
  251. package/src/state/active-run-registry.ts +227 -191
  252. package/src/state/artifact-store.ts +130 -129
  253. package/src/state/atomic-write.ts +262 -195
  254. package/src/state/blob-store.ts +116 -116
  255. package/src/state/contracts.ts +111 -111
  256. package/src/state/event-log-rotation.ts +161 -158
  257. package/src/state/event-log.ts +383 -303
  258. package/src/state/event-reconstructor.ts +217 -217
  259. package/src/state/jsonl-writer.ts +82 -82
  260. package/src/state/locks.ts +146 -146
  261. package/src/state/mailbox.ts +446 -405
  262. package/src/state/state-store.ts +364 -351
  263. package/src/state/task-claims.ts +44 -44
  264. package/src/state/types.ts +285 -285
  265. package/src/state/usage.ts +29 -29
  266. package/src/subagents/async-entry.ts +1 -1
  267. package/src/subagents/index.ts +3 -3
  268. package/src/subagents/live/control.ts +1 -1
  269. package/src/subagents/live/manager.ts +1 -1
  270. package/src/subagents/live/realtime.ts +1 -1
  271. package/src/subagents/live/session-runtime.ts +1 -1
  272. package/src/subagents/manager.ts +1 -1
  273. package/src/subagents/spawn.ts +1 -1
  274. package/src/teams/discover-teams.ts +116 -116
  275. package/src/teams/team-config.ts +27 -27
  276. package/src/teams/team-serializer.ts +38 -38
  277. package/src/types/diff.d.ts +18 -18
  278. package/src/ui/agent-management-overlay.ts +144 -144
  279. package/src/ui/crew-widget.ts +487 -370
  280. package/src/ui/dashboard-panes/agents-pane.ts +109 -28
  281. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  282. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  283. package/src/ui/dashboard-panes/health-pane.ts +30 -30
  284. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  285. package/src/ui/dashboard-panes/progress-pane.ts +30 -30
  286. package/src/ui/dashboard-panes/transcript-pane.ts +10 -10
  287. package/src/ui/heartbeat-aggregator.ts +63 -63
  288. package/src/ui/keybinding-map.ts +97 -94
  289. package/src/ui/live-conversation-overlay.ts +152 -0
  290. package/src/ui/live-run-sidebar.ts +180 -180
  291. package/src/ui/mascot.ts +442 -442
  292. package/src/ui/overlays/agent-picker-overlay.ts +57 -57
  293. package/src/ui/overlays/confirm-overlay.ts +58 -58
  294. package/src/ui/overlays/mailbox-compose-overlay.ts +144 -144
  295. package/src/ui/overlays/mailbox-compose-preview.ts +63 -63
  296. package/src/ui/overlays/mailbox-detail-overlay.ts +122 -122
  297. package/src/ui/pi-ui-compat.ts +57 -57
  298. package/src/ui/powerbar-publisher.ts +221 -197
  299. package/src/ui/render-scheduler.ts +216 -143
  300. package/src/ui/run-action-dispatcher.ts +118 -118
  301. package/src/ui/run-dashboard.ts +526 -464
  302. package/src/ui/run-event-bus.ts +208 -208
  303. package/src/ui/run-snapshot-cache.ts +826 -777
  304. package/src/ui/settings-overlay.ts +721 -0
  305. package/src/ui/snapshot-types.ts +86 -70
  306. package/src/ui/theme-adapter.ts +190 -190
  307. package/src/ui/tool-progress-formatter.ts +89 -0
  308. package/src/ui/transcript-cache.ts +94 -94
  309. package/src/ui/transcript-viewer.ts +335 -335
  310. package/src/utils/conflict-detect.ts +662 -0
  311. package/src/utils/file-coalescer.ts +86 -86
  312. package/src/utils/frontmatter.ts +68 -68
  313. package/src/utils/fs-watch.ts +88 -31
  314. package/src/utils/gh-protocol.ts +479 -0
  315. package/src/utils/ids.ts +17 -17
  316. package/src/utils/incremental-reader.ts +104 -104
  317. package/src/utils/internal-error.ts +6 -6
  318. package/src/utils/names.ts +27 -27
  319. package/src/utils/paths.ts +102 -63
  320. package/src/utils/redaction.ts +44 -44
  321. package/src/utils/safe-paths.ts +47 -47
  322. package/src/utils/scan-cache.ts +136 -136
  323. package/src/utils/sse-parser.ts +134 -134
  324. package/src/utils/task-name-generator.ts +337 -337
  325. package/src/utils/timings.ts +33 -33
  326. package/src/utils/visual.ts +243 -198
  327. package/src/workflows/discover-workflows.ts +139 -139
  328. package/src/workflows/validate-workflow.ts +40 -40
  329. package/src/workflows/workflow-config.ts +26 -26
  330. package/src/workflows/workflow-serializer.ts +32 -32
  331. package/src/worktree/branch-freshness.ts +45 -45
  332. package/src/worktree/cleanup.ts +75 -75
  333. package/src/worktree/worktree-manager.ts +188 -188
  334. package/teams/default.team.md +12 -12
  335. package/teams/fast-fix.team.md +11 -11
  336. package/teams/implementation.team.md +18 -18
  337. package/teams/parallel-research.team.md +14 -14
  338. package/teams/research.team.md +11 -11
  339. package/teams/review.team.md +12 -12
  340. package/tsconfig.json +19 -19
  341. package/workflows/default.workflow.md +30 -30
  342. package/workflows/fast-fix.workflow.md +23 -23
  343. package/workflows/implementation.workflow.md +43 -43
  344. package/workflows/parallel-research.workflow.md +46 -46
  345. package/workflows/research.workflow.md +22 -22
  346. package/workflows/review.workflow.md +30 -30
  347. package/skills/task-packet/SKILL.md +0 -28
  348. package/skills/verify-evidence/SKILL.md +0 -27
@@ -1,207 +1,207 @@
1
- /**
2
- * Task quality scoring — simple additive heuristic for evaluating task
3
- * completion quality based on diagnostics, metrics, artifacts, and duration.
4
- *
5
- * Distilled from pi-autoresearch's quality scoring pattern.
6
- */
7
- import * as fs from "node:fs";
8
- import * as path from "node:path";
9
- import type { TeamTaskState } from "../state/types.ts";
10
-
11
- /** Letter grade for task quality. */
12
- export type QualityGrade = "A" | "B" | "C" | "D";
13
-
14
- /** Breakdown of individual quality criteria. */
15
- export interface QualityBreakdown {
16
- /** Task has a non-empty diagnostics object. */
17
- hasDiagnostics: boolean;
18
- /** Task has a non-empty metrics object. */
19
- hasMetrics: boolean;
20
- /** Task produced files in the artifacts directory. */
21
- producedArtifacts: boolean;
22
- /** Task has a non-empty result/description. */
23
- hasDescription: boolean;
24
- /** Task duration is reasonable (> 0 and < 1 hour). */
25
- durationReasonable: boolean;
26
- }
27
-
28
- /** Scored quality result for a task. */
29
- export interface TaskQualityScore {
30
- /** Numeric score (0–5). */
31
- score: number;
32
- /** Individual criterion breakdown. */
33
- breakdown: QualityBreakdown;
34
- /** Letter grade based on score thresholds. */
35
- grade: QualityGrade;
36
- }
37
-
38
- /** One hour in milliseconds. */
39
- const ONE_HOUR_MS = 3_600_000;
40
-
41
- /**
42
- * Determine the letter grade for a given numeric score.
43
- *
44
- * A: 4–5, B: 3, C: 2, D: 0–1
45
- */
46
- function scoreToGrade(score: number): QualityGrade {
47
- if (score >= 4) return "A";
48
- if (score === 3) return "B";
49
- if (score === 2) return "C";
50
- return "D";
51
- }
52
-
53
- /**
54
- * Check whether the artifacts directory contains files for the given task.
55
- *
56
- * Looks for a subdirectory named after the task ID, or files containing
57
- * the task ID prefix in the artifacts directory.
58
- */
59
- function hasTaskArtifacts(taskId: string, artifactsDir: string): boolean {
60
- try {
61
- if (!fs.existsSync(artifactsDir)) return false;
62
-
63
- // Check for a task-specific subdirectory
64
- const taskDir = path.join(artifactsDir, taskId);
65
- if (fs.existsSync(taskDir)) {
66
- const stat = fs.statSync(taskDir);
67
- if (stat.isDirectory()) {
68
- const entries = fs.readdirSync(taskDir);
69
- return entries.length > 0;
70
- }
71
- }
72
-
73
- // Check for files containing the task ID prefix
74
- const entries = fs.readdirSync(artifactsDir);
75
- const safePrefix = taskId.replace(/[^a-zA-Z0-9_-]/g, "");
76
- return entries.some((entry) => entry.includes(safePrefix));
77
- } catch {
78
- return false;
79
- }
80
- }
81
-
82
- /**
83
- * Check if a task result string is a non-empty description.
84
- *
85
- * A result is considered descriptive if any of these sources have non-empty content:
86
- * - task.resultArtifact exists with a path
87
- * - task.error is a non-empty string (workers often set this with result info)
88
- * - task.verification.satisfied is true
89
- * - task.diagnostics contains a 'result' string
90
- */
91
- function isResultDescriptive(task: TeamTaskState): boolean {
92
- // Check resultArtifact — presence of a result artifact indicates output was produced
93
- if (task.resultArtifact?.path) return true;
94
-
95
- // Check error field — workers often put result info here
96
- if (typeof task.error === "string" && task.error.trim().length > 0) return true;
97
-
98
- // Check verification — satisfied verification indicates meaningful output
99
- if (task.verification?.satisfied) return true;
100
-
101
- // Check diagnostics for an explicit result string
102
- if (
103
- task.diagnostics &&
104
- typeof task.diagnostics === "object" &&
105
- typeof task.diagnostics.result === "string" &&
106
- (task.diagnostics.result as string).trim().length > 0
107
- ) return true;
108
-
109
- return false;
110
- }
111
-
112
- /**
113
- * Check if the task duration is reasonable (started, finished, > 0, < 1 hour).
114
- */
115
- function isDurationReasonable(task: TeamTaskState): boolean {
116
- if (!task.startedAt || !task.finishedAt) return false;
117
-
118
- const started = new Date(task.startedAt).getTime();
119
- const finished = new Date(task.finishedAt).getTime();
120
-
121
- if (Number.isNaN(started) || Number.isNaN(finished)) return false;
122
-
123
- const duration = finished - started;
124
- return duration > 0 && duration < ONE_HOUR_MS;
125
- }
126
-
127
- /**
128
- * Compute the quality score for a completed task.
129
- *
130
- * Uses simple additive scoring across 5 criteria:
131
- * - hasDiagnostics: +1 if task.diagnostics exists and has keys
132
- * - hasMetrics: +1 if task.metrics exists and has keys
133
- * - producedArtifacts: +1 if artifactsDir has files for this task
134
- * - hasDescription: +1 if task has a non-empty result/description
135
- * - durationReasonable: +1 if task has both startedAt and finishedAt, duration > 0 and < 1 hour
136
- *
137
- * @param task - The task state to evaluate
138
- * @param artifactsDir - Optional path to the run artifacts directory
139
- * @returns TaskQualityScore with numeric score, breakdown, and letter grade
140
- */
141
- export function computeTaskQuality(
142
- task: TeamTaskState,
143
- artifactsDir?: string,
144
- ): TaskQualityScore {
145
- const hasDiagnostics =
146
- task.diagnostics !== undefined &&
147
- typeof task.diagnostics === "object" &&
148
- Object.keys(task.diagnostics).length > 0;
149
-
150
- const hasMetrics =
151
- task.metrics !== undefined &&
152
- typeof task.metrics === "object" &&
153
- Object.keys(task.metrics).length > 0;
154
-
155
- const producedArtifacts =
156
- artifactsDir !== undefined && hasTaskArtifacts(task.id, artifactsDir);
157
-
158
- const hasDescription = isResultDescriptive(task);
159
-
160
- const durationReasonable = isDurationReasonable(task);
161
-
162
- const breakdown: QualityBreakdown = {
163
- hasDiagnostics,
164
- hasMetrics,
165
- producedArtifacts,
166
- hasDescription,
167
- durationReasonable,
168
- };
169
-
170
- const score =
171
- (hasDiagnostics ? 1 : 0) +
172
- (hasMetrics ? 1 : 0) +
173
- (producedArtifacts ? 1 : 0) +
174
- (hasDescription ? 1 : 0) +
175
- (durationReasonable ? 1 : 0);
176
-
177
- return {
178
- score,
179
- breakdown,
180
- grade: scoreToGrade(score),
181
- };
182
- }
183
-
184
- /** Human-readable labels for each quality criterion. */
185
- const CRITERION_LABELS: Record<keyof QualityBreakdown, string> = {
186
- hasDiagnostics: "diagnostics",
187
- hasMetrics: "metrics",
188
- producedArtifacts: "artifacts",
189
- hasDescription: "description",
190
- durationReasonable: "duration",
191
- };
192
-
193
- /**
194
- * Format a quality score as a human-readable one-line string.
195
- *
196
- * Format: "Quality: B (3/5: diagnostics, metrics, description)"
197
- *
198
- * @param score - The quality score to format
199
- * @returns Formatted string
200
- */
201
- export function formatQualityScore(score: TaskQualityScore): string {
202
- const metCriteria = Object.entries(score.breakdown)
203
- .filter(([, met]) => met)
204
- .map(([key]) => CRITERION_LABELS[key as keyof QualityBreakdown]);
205
-
206
- return `Quality: ${score.grade} (${score.score}/5${metCriteria.length > 0 ? `: ${metCriteria.join(", ")}` : ""})`;
207
- }
1
+ /**
2
+ * Task quality scoring — simple additive heuristic for evaluating task
3
+ * completion quality based on diagnostics, metrics, artifacts, and duration.
4
+ *
5
+ * Distilled from pi-autoresearch's quality scoring pattern.
6
+ */
7
+ import * as fs from "node:fs";
8
+ import * as path from "node:path";
9
+ import type { TeamTaskState } from "../state/types.ts";
10
+
11
+ /** Letter grade for task quality. */
12
+ export type QualityGrade = "A" | "B" | "C" | "D";
13
+
14
+ /** Breakdown of individual quality criteria. */
15
+ export interface QualityBreakdown {
16
+ /** Task has a non-empty diagnostics object. */
17
+ hasDiagnostics: boolean;
18
+ /** Task has a non-empty metrics object. */
19
+ hasMetrics: boolean;
20
+ /** Task produced files in the artifacts directory. */
21
+ producedArtifacts: boolean;
22
+ /** Task has a non-empty result/description. */
23
+ hasDescription: boolean;
24
+ /** Task duration is reasonable (> 0 and < 1 hour). */
25
+ durationReasonable: boolean;
26
+ }
27
+
28
+ /** Scored quality result for a task. */
29
+ export interface TaskQualityScore {
30
+ /** Numeric score (0–5). */
31
+ score: number;
32
+ /** Individual criterion breakdown. */
33
+ breakdown: QualityBreakdown;
34
+ /** Letter grade based on score thresholds. */
35
+ grade: QualityGrade;
36
+ }
37
+
38
+ /** One hour in milliseconds. */
39
+ const ONE_HOUR_MS = 3_600_000;
40
+
41
+ /**
42
+ * Determine the letter grade for a given numeric score.
43
+ *
44
+ * A: 4–5, B: 3, C: 2, D: 0–1
45
+ */
46
+ function scoreToGrade(score: number): QualityGrade {
47
+ if (score >= 4) return "A";
48
+ if (score === 3) return "B";
49
+ if (score === 2) return "C";
50
+ return "D";
51
+ }
52
+
53
+ /**
54
+ * Check whether the artifacts directory contains files for the given task.
55
+ *
56
+ * Looks for a subdirectory named after the task ID, or files containing
57
+ * the task ID prefix in the artifacts directory.
58
+ */
59
+ function hasTaskArtifacts(taskId: string, artifactsDir: string): boolean {
60
+ try {
61
+ if (!fs.existsSync(artifactsDir)) return false;
62
+
63
+ // Check for a task-specific subdirectory
64
+ const taskDir = path.join(artifactsDir, taskId);
65
+ if (fs.existsSync(taskDir)) {
66
+ const stat = fs.statSync(taskDir);
67
+ if (stat.isDirectory()) {
68
+ const entries = fs.readdirSync(taskDir);
69
+ return entries.length > 0;
70
+ }
71
+ }
72
+
73
+ // Check for files containing the task ID prefix
74
+ const entries = fs.readdirSync(artifactsDir);
75
+ const safePrefix = taskId.replace(/[^a-zA-Z0-9_-]/g, "");
76
+ return entries.some((entry) => entry.includes(safePrefix));
77
+ } catch {
78
+ return false;
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Check if a task result string is a non-empty description.
84
+ *
85
+ * A result is considered descriptive if any of these sources have non-empty content:
86
+ * - task.resultArtifact exists with a path
87
+ * - task.error is a non-empty string (workers often set this with result info)
88
+ * - task.verification.satisfied is true
89
+ * - task.diagnostics contains a 'result' string
90
+ */
91
+ function isResultDescriptive(task: TeamTaskState): boolean {
92
+ // Check resultArtifact — presence of a result artifact indicates output was produced
93
+ if (task.resultArtifact?.path) return true;
94
+
95
+ // Check error field — workers often put result info here
96
+ if (typeof task.error === "string" && task.error.trim().length > 0) return true;
97
+
98
+ // Check verification — satisfied verification indicates meaningful output
99
+ if (task.verification?.satisfied) return true;
100
+
101
+ // Check diagnostics for an explicit result string
102
+ if (
103
+ task.diagnostics &&
104
+ typeof task.diagnostics === "object" &&
105
+ typeof task.diagnostics.result === "string" &&
106
+ (task.diagnostics.result as string).trim().length > 0
107
+ ) return true;
108
+
109
+ return false;
110
+ }
111
+
112
+ /**
113
+ * Check if the task duration is reasonable (started, finished, > 0, < 1 hour).
114
+ */
115
+ function isDurationReasonable(task: TeamTaskState): boolean {
116
+ if (!task.startedAt || !task.finishedAt) return false;
117
+
118
+ const started = new Date(task.startedAt).getTime();
119
+ const finished = new Date(task.finishedAt).getTime();
120
+
121
+ if (Number.isNaN(started) || Number.isNaN(finished)) return false;
122
+
123
+ const duration = finished - started;
124
+ return duration > 0 && duration < ONE_HOUR_MS;
125
+ }
126
+
127
+ /**
128
+ * Compute the quality score for a completed task.
129
+ *
130
+ * Uses simple additive scoring across 5 criteria:
131
+ * - hasDiagnostics: +1 if task.diagnostics exists and has keys
132
+ * - hasMetrics: +1 if task.metrics exists and has keys
133
+ * - producedArtifacts: +1 if artifactsDir has files for this task
134
+ * - hasDescription: +1 if task has a non-empty result/description
135
+ * - durationReasonable: +1 if task has both startedAt and finishedAt, duration > 0 and < 1 hour
136
+ *
137
+ * @param task - The task state to evaluate
138
+ * @param artifactsDir - Optional path to the run artifacts directory
139
+ * @returns TaskQualityScore with numeric score, breakdown, and letter grade
140
+ */
141
+ export function computeTaskQuality(
142
+ task: TeamTaskState,
143
+ artifactsDir?: string,
144
+ ): TaskQualityScore {
145
+ const hasDiagnostics =
146
+ task.diagnostics !== undefined &&
147
+ typeof task.diagnostics === "object" &&
148
+ Object.keys(task.diagnostics).length > 0;
149
+
150
+ const hasMetrics =
151
+ task.metrics !== undefined &&
152
+ typeof task.metrics === "object" &&
153
+ Object.keys(task.metrics).length > 0;
154
+
155
+ const producedArtifacts =
156
+ artifactsDir !== undefined && hasTaskArtifacts(task.id, artifactsDir);
157
+
158
+ const hasDescription = isResultDescriptive(task);
159
+
160
+ const durationReasonable = isDurationReasonable(task);
161
+
162
+ const breakdown: QualityBreakdown = {
163
+ hasDiagnostics,
164
+ hasMetrics,
165
+ producedArtifacts,
166
+ hasDescription,
167
+ durationReasonable,
168
+ };
169
+
170
+ const score =
171
+ (hasDiagnostics ? 1 : 0) +
172
+ (hasMetrics ? 1 : 0) +
173
+ (producedArtifacts ? 1 : 0) +
174
+ (hasDescription ? 1 : 0) +
175
+ (durationReasonable ? 1 : 0);
176
+
177
+ return {
178
+ score,
179
+ breakdown,
180
+ grade: scoreToGrade(score),
181
+ };
182
+ }
183
+
184
+ /** Human-readable labels for each quality criterion. */
185
+ const CRITERION_LABELS: Record<keyof QualityBreakdown, string> = {
186
+ hasDiagnostics: "diagnostics",
187
+ hasMetrics: "metrics",
188
+ producedArtifacts: "artifacts",
189
+ hasDescription: "description",
190
+ durationReasonable: "duration",
191
+ };
192
+
193
+ /**
194
+ * Format a quality score as a human-readable one-line string.
195
+ *
196
+ * Format: "Quality: B (3/5: diagnostics, metrics, description)"
197
+ *
198
+ * @param score - The quality score to format
199
+ * @returns Formatted string
200
+ */
201
+ export function formatQualityScore(score: TaskQualityScore): string {
202
+ const metCriteria = Object.entries(score.breakdown)
203
+ .filter(([, met]) => met)
204
+ .map(([key]) => CRITERION_LABELS[key as keyof QualityBreakdown]);
205
+
206
+ return `Quality: ${score.grade} (${score.score}/5${metCriteria.length > 0 ? `: ${metCriteria.join(", ")}` : ""})`;
207
+ }
@@ -1,78 +1,78 @@
1
- import type { AgentConfig } from "../../agents/agent-config.ts";
2
- import type { CrewRuntimeKind } from "../crew-agent-runtime.ts";
3
-
4
- export interface WorkerCapabilityInventory {
5
- schemaVersion: 1;
6
- taskId: string;
7
- role: string;
8
- agent: string;
9
- runtime: CrewRuntimeKind;
10
- permissionMode: string;
11
- tools: string[];
12
- extensions: string[];
13
- skills: {
14
- names: string[];
15
- paths: string[];
16
- disabled: boolean;
17
- };
18
- model: {
19
- requested?: string;
20
- agentDefault?: string;
21
- fallbacks: string[];
22
- teamRole?: string;
23
- step?: string;
24
- };
25
- inheritance: {
26
- projectContext: boolean;
27
- skills: boolean;
28
- systemPromptMode: "replace" | "append";
29
- };
30
- }
31
-
32
- export interface BuildWorkerCapabilityInventoryInput {
33
- taskId: string;
34
- role: string;
35
- agent: AgentConfig;
36
- runtime: CrewRuntimeKind;
37
- permissionMode: string;
38
- skillNames?: string[];
39
- skillPaths?: string[];
40
- skillsDisabled: boolean;
41
- modelOverride?: string;
42
- teamRoleModel?: string;
43
- stepModel?: string;
44
- }
45
-
46
- function uniqueSorted(values: readonly string[] | undefined): string[] {
47
- return [...new Set((values ?? []).map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
48
- }
49
-
50
- export function buildWorkerCapabilityInventory(input: BuildWorkerCapabilityInventoryInput): WorkerCapabilityInventory {
51
- return {
52
- schemaVersion: 1,
53
- taskId: input.taskId,
54
- role: input.role,
55
- agent: input.agent.name,
56
- runtime: input.runtime,
57
- permissionMode: input.permissionMode,
58
- tools: uniqueSorted(input.agent.tools),
59
- extensions: uniqueSorted(input.agent.extensions),
60
- skills: {
61
- names: uniqueSorted(input.skillNames),
62
- paths: uniqueSorted(input.skillPaths),
63
- disabled: input.skillsDisabled,
64
- },
65
- model: {
66
- requested: input.modelOverride,
67
- agentDefault: input.agent.model,
68
- fallbacks: uniqueSorted(input.agent.fallbackModels),
69
- teamRole: input.teamRoleModel,
70
- step: input.stepModel,
71
- },
72
- inheritance: {
73
- projectContext: input.agent.inheritProjectContext === true,
74
- skills: input.agent.inheritSkills === true,
75
- systemPromptMode: input.agent.systemPromptMode ?? "replace",
76
- },
77
- };
78
- }
1
+ import type { AgentConfig } from "../../agents/agent-config.ts";
2
+ import type { CrewRuntimeKind } from "../crew-agent-runtime.ts";
3
+
4
+ export interface WorkerCapabilityInventory {
5
+ schemaVersion: 1;
6
+ taskId: string;
7
+ role: string;
8
+ agent: string;
9
+ runtime: CrewRuntimeKind;
10
+ permissionMode: string;
11
+ tools: string[];
12
+ extensions: string[];
13
+ skills: {
14
+ names: string[];
15
+ paths: string[];
16
+ disabled: boolean;
17
+ };
18
+ model: {
19
+ requested?: string;
20
+ agentDefault?: string;
21
+ fallbacks: string[];
22
+ teamRole?: string;
23
+ step?: string;
24
+ };
25
+ inheritance: {
26
+ projectContext: boolean;
27
+ skills: boolean;
28
+ systemPromptMode: "replace" | "append";
29
+ };
30
+ }
31
+
32
+ export interface BuildWorkerCapabilityInventoryInput {
33
+ taskId: string;
34
+ role: string;
35
+ agent: AgentConfig;
36
+ runtime: CrewRuntimeKind;
37
+ permissionMode: string;
38
+ skillNames?: string[];
39
+ skillPaths?: string[];
40
+ skillsDisabled: boolean;
41
+ modelOverride?: string;
42
+ teamRoleModel?: string;
43
+ stepModel?: string;
44
+ }
45
+
46
+ function uniqueSorted(values: readonly string[] | undefined): string[] {
47
+ return [...new Set((values ?? []).map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
48
+ }
49
+
50
+ export function buildWorkerCapabilityInventory(input: BuildWorkerCapabilityInventoryInput): WorkerCapabilityInventory {
51
+ return {
52
+ schemaVersion: 1,
53
+ taskId: input.taskId,
54
+ role: input.role,
55
+ agent: input.agent.name,
56
+ runtime: input.runtime,
57
+ permissionMode: input.permissionMode,
58
+ tools: uniqueSorted(input.agent.tools),
59
+ extensions: uniqueSorted(input.agent.extensions),
60
+ skills: {
61
+ names: uniqueSorted(input.skillNames),
62
+ paths: uniqueSorted(input.skillPaths),
63
+ disabled: input.skillsDisabled,
64
+ },
65
+ model: {
66
+ requested: input.modelOverride,
67
+ agentDefault: input.agent.model,
68
+ fallbacks: uniqueSorted(input.agent.fallbackModels),
69
+ teamRole: input.teamRoleModel,
70
+ step: input.stepModel,
71
+ },
72
+ inheritance: {
73
+ projectContext: input.agent.inheritProjectContext === true,
74
+ skills: input.agent.inheritSkills === true,
75
+ systemPromptMode: input.agent.systemPromptMode ?? "replace",
76
+ },
77
+ };
78
+ }