pi-crew 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/AGENTS.md +57 -32
  2. package/CHANGELOG.md +466 -413
  3. package/LICENSE +21 -21
  4. package/NOTICE.md +16 -16
  5. package/README.md +323 -323
  6. package/docs/FEATURE_INTAKE.md +126 -0
  7. package/docs/HARNESS.md +86 -0
  8. package/docs/HARNESS_BACKLOG.md +41 -0
  9. package/docs/TEST_MATRIX.md +49 -0
  10. package/docs/actions-reference.md +595 -595
  11. package/docs/architecture.md +180 -180
  12. package/docs/code-review-2026-05-11.md +592 -0
  13. package/docs/commands-reference.md +347 -347
  14. package/docs/comparison-pi-subagents-vs-pi-crew.md +303 -0
  15. package/docs/decisions/0001-durable-state.md +41 -0
  16. package/docs/decisions/0002-child-process-for-async.md +42 -0
  17. package/docs/decisions/0003-depth-guard.md +36 -0
  18. package/docs/decisions/0004-execfile-over-exec.md +34 -0
  19. package/docs/decisions/0005-no-parameter-properties.md +49 -0
  20. package/docs/decisions/0006-publish-bundled-esm.md +63 -0
  21. package/docs/decisions/0007-active-run-binary-index.md +54 -0
  22. package/docs/decisions/0008-child-pi-warm-pool.md +61 -0
  23. package/docs/decisions/README.md +23 -0
  24. package/docs/followup-plan-2026-05-12.md +463 -0
  25. package/docs/followup-review-2026-05-12.md +297 -0
  26. package/docs/followup-review-round3-2026-05-12.md +342 -0
  27. package/docs/followup-review-round4-2026-05-13.md +107 -0
  28. package/docs/implementation-plan-top3.md +333 -0
  29. package/docs/live-mailbox-runtime.md +36 -36
  30. package/docs/next-upgrade-roadmap.md +808 -808
  31. package/docs/oh-my-pi-research.md +509 -0
  32. package/docs/perf/baseline-2026-05.md +113 -0
  33. package/docs/perf/final-report-2026-05.md +206 -0
  34. package/docs/perf/sprint-1-report.md +71 -0
  35. package/docs/perf/sprint-2-report.md +81 -0
  36. package/docs/perf/sprint-2.5-report.md +53 -0
  37. package/docs/perf/sprint-3-report.md +36 -0
  38. package/docs/perf/sprint-4-report.md +47 -0
  39. package/docs/perf/sprint-5-report.md +51 -0
  40. package/docs/perf/sprint-6-report.md +94 -0
  41. package/docs/perf/sprint-7-report.md +74 -0
  42. package/docs/perf/upgrade-plan-2026-05.md +147 -0
  43. package/docs/pi-subagents3-deep-analysis.md +508 -0
  44. package/docs/product/README.md +31 -0
  45. package/docs/product/platform.md +27 -0
  46. package/docs/product/runtime-safety.md +37 -0
  47. package/docs/product/team-run.md +39 -0
  48. package/docs/product/team-tool.md +37 -0
  49. package/docs/publishing.md +65 -65
  50. package/docs/resource-formats.md +134 -134
  51. package/docs/runtime-analysis-child-vs-live.md +171 -0
  52. package/docs/runtime-flow.md +148 -148
  53. package/docs/runtime-migration-in-process-analysis.md +250 -0
  54. package/docs/stories/README.md +30 -0
  55. package/docs/stories/backlog.md +36 -0
  56. package/docs/templates/decision.md +27 -0
  57. package/docs/templates/story.md +44 -0
  58. package/docs/templates/validation-report.md +32 -0
  59. package/docs/usage.md +238 -238
  60. package/index.ts +7 -6
  61. package/install.mjs +65 -65
  62. package/package.json +107 -99
  63. package/schema.json +222 -222
  64. package/skills/child-pi-spawning/SKILL.md +213 -0
  65. package/skills/context-artifact-hygiene/SKILL.md +32 -0
  66. package/skills/event-log-tracing/SKILL.md +299 -0
  67. package/skills/git-master/SKILL.md +225 -24
  68. package/skills/live-agent-lifecycle/SKILL.md +192 -0
  69. package/skills/mailbox-interactive/SKILL.md +300 -19
  70. package/skills/model-routing-context/SKILL.md +94 -0
  71. package/skills/multi-perspective-review/SKILL.md +88 -0
  72. package/skills/read-only-explorer/SKILL.md +250 -26
  73. package/skills/safe-bash/SKILL.md +307 -21
  74. package/skills/verification-before-done/SKILL.md +11 -2
  75. package/skills/widget-rendering/SKILL.md +258 -0
  76. package/skills/workspace-isolation/SKILL.md +202 -0
  77. package/skills/worktree-isolation/SKILL.md +202 -18
  78. package/src/adapters/claude-adapter.ts +25 -25
  79. package/src/adapters/codex-adapter.ts +21 -21
  80. package/src/adapters/cursor-adapter.ts +17 -17
  81. package/src/adapters/export-util.ts +137 -137
  82. package/src/adapters/index.ts +15 -15
  83. package/src/adapters/registry.ts +18 -18
  84. package/src/adapters/types.ts +23 -23
  85. package/src/agents/agent-config.ts +38 -38
  86. package/src/agents/agent-serializer.ts +38 -38
  87. package/src/agents/discover-agents.ts +121 -118
  88. package/src/config/config.ts +740 -858
  89. package/src/config/defaults.ts +96 -96
  90. package/src/config/drift-detector.ts +211 -211
  91. package/src/config/markers.ts +327 -327
  92. package/src/config/resilient-parser.ts +109 -108
  93. package/src/config/suggestions.ts +74 -74
  94. package/src/config/types.ts +199 -0
  95. package/src/extension/async-notifier.ts +123 -89
  96. package/src/extension/autonomous-policy.ts +169 -169
  97. package/src/extension/cross-extension-rpc.ts +104 -103
  98. package/src/extension/help.ts +47 -47
  99. package/src/extension/import-index.ts +69 -69
  100. package/src/extension/management.ts +395 -382
  101. package/src/extension/notification-router.ts +116 -116
  102. package/src/extension/notification-sink.ts +51 -51
  103. package/src/extension/project-init.ts +168 -168
  104. package/src/extension/register.ts +859 -668
  105. package/src/extension/registration/artifact-cleanup.ts +15 -15
  106. package/src/extension/registration/command-utils.ts +54 -54
  107. package/src/extension/registration/commands.ts +559 -452
  108. package/src/extension/registration/compaction-guard.ts +125 -125
  109. package/src/extension/registration/subagent-helpers.ts +102 -102
  110. package/src/extension/registration/subagent-tools.ts +220 -158
  111. package/src/extension/registration/team-tool.ts +159 -98
  112. package/src/extension/registration/viewers.ts +29 -0
  113. package/src/extension/result-watcher.ts +128 -128
  114. package/src/extension/run-bundle-schema.ts +89 -89
  115. package/src/extension/run-export.ts +73 -73
  116. package/src/extension/run-import.ts +84 -84
  117. package/src/extension/run-index.ts +94 -94
  118. package/src/extension/run-maintenance.ts +142 -142
  119. package/src/extension/session-summary.ts +8 -8
  120. package/src/extension/team-manager-command.ts +96 -95
  121. package/src/extension/team-recommendation.ts +188 -188
  122. package/src/extension/team-tool/api.ts +5 -2
  123. package/src/extension/team-tool/cancel.ts +224 -209
  124. package/src/extension/team-tool/config-patch.ts +36 -36
  125. package/src/extension/team-tool/context.ts +60 -60
  126. package/src/extension/team-tool/doctor.ts +242 -242
  127. package/src/extension/team-tool/handle-settings.ts +421 -195
  128. package/src/extension/team-tool/inspect.ts +41 -41
  129. package/src/extension/team-tool/lifecycle-actions.ts +139 -139
  130. package/src/extension/team-tool/parallel-dispatch.ts +156 -156
  131. package/src/extension/team-tool/plan.ts +19 -19
  132. package/src/extension/team-tool/respond.ts +112 -111
  133. package/src/extension/team-tool/run.ts +246 -228
  134. package/src/extension/team-tool/status.ts +110 -110
  135. package/src/extension/team-tool-types.ts +13 -13
  136. package/src/extension/team-tool.ts +16 -4
  137. package/src/extension/tool-result.ts +16 -16
  138. package/src/extension/validate-resources.ts +77 -77
  139. package/src/hooks/registry.ts +61 -61
  140. package/src/hooks/types.ts +40 -40
  141. package/src/i18n.ts +184 -184
  142. package/src/observability/correlation.ts +35 -35
  143. package/src/observability/event-to-metric.ts +68 -68
  144. package/src/observability/exporters/adapter.ts +30 -30
  145. package/src/observability/exporters/otlp-exporter.ts +106 -92
  146. package/src/observability/exporters/prometheus-exporter.ts +54 -54
  147. package/src/observability/metric-registry.ts +87 -87
  148. package/src/observability/metric-retention.ts +54 -54
  149. package/src/observability/metric-sink.ts +81 -56
  150. package/src/observability/metrics-primitives.ts +167 -167
  151. package/src/prompt/prompt-runtime.ts +72 -72
  152. package/src/runtime/adaptive-plan.ts +338 -0
  153. package/src/runtime/agent-control.ts +169 -169
  154. package/src/runtime/agent-memory.ts +72 -72
  155. package/src/runtime/agent-observability.ts +114 -114
  156. package/src/runtime/async-marker.ts +26 -26
  157. package/src/runtime/async-runner.ts +153 -79
  158. package/src/runtime/attention-events.ts +28 -28
  159. package/src/runtime/auto-resume.ts +100 -100
  160. package/src/runtime/background-runner.ts +122 -88
  161. package/src/runtime/cancellation.ts +61 -61
  162. package/src/runtime/capability-inventory.ts +116 -116
  163. package/src/runtime/child-pi-pool.ts +68 -0
  164. package/src/runtime/child-pi.ts +541 -463
  165. package/src/runtime/code-summary.ts +247 -247
  166. package/src/runtime/compaction-summary.ts +271 -271
  167. package/src/runtime/concurrency.ts +58 -58
  168. package/src/runtime/crash-recovery.ts +317 -301
  169. package/src/runtime/crew-agent-records.ts +379 -281
  170. package/src/runtime/crew-agent-runtime.ts +60 -60
  171. package/src/runtime/cross-extension-rpc.ts +72 -0
  172. package/src/runtime/custom-tools/irc-tool.ts +201 -201
  173. package/src/runtime/custom-tools/submit-result-tool.ts +90 -90
  174. package/src/runtime/deadletter.ts +47 -47
  175. package/src/runtime/delivery-coordinator.ts +176 -176
  176. package/src/runtime/delta-conflict.ts +360 -360
  177. package/src/runtime/diagnostic-export.ts +102 -102
  178. package/src/runtime/direct-run.ts +35 -35
  179. package/src/runtime/effectiveness.ts +82 -81
  180. package/src/runtime/errors/crew-errors.ts +166 -0
  181. package/src/runtime/event-stream-bridge.ts +92 -92
  182. package/src/runtime/foreground-control.ts +82 -82
  183. package/src/runtime/green-contract.ts +46 -46
  184. package/src/runtime/group-join.ts +234 -106
  185. package/src/runtime/heartbeat-watcher.ts +145 -124
  186. package/src/runtime/iteration-hooks.ts +267 -264
  187. package/src/runtime/live-agent-control.ts +88 -88
  188. package/src/runtime/live-agent-manager.ts +377 -179
  189. package/src/runtime/live-control-realtime.ts +36 -36
  190. package/src/runtime/live-session-runtime.ts +676 -599
  191. package/src/runtime/loop-gates.ts +129 -129
  192. package/src/runtime/manifest-cache.ts +263 -263
  193. package/src/runtime/mcp-proxy.ts +113 -113
  194. package/src/runtime/metric-parser.ts +40 -40
  195. package/src/runtime/model-fallback.ts +282 -274
  196. package/src/runtime/model-resolver.ts +118 -0
  197. package/src/runtime/output-validator.ts +187 -187
  198. package/src/runtime/overflow-recovery.ts +175 -175
  199. package/src/runtime/parallel-research.ts +44 -44
  200. package/src/runtime/parallel-utils.ts +156 -156
  201. package/src/runtime/parent-guard.ts +80 -80
  202. package/src/runtime/phase-progress.ts +217 -217
  203. package/src/runtime/pi-args.ts +165 -165
  204. package/src/runtime/pi-json-output.ts +111 -111
  205. package/src/runtime/pi-spawn.ts +167 -167
  206. package/src/runtime/policy-engine.ts +79 -79
  207. package/src/runtime/post-checks.ts +125 -122
  208. package/src/runtime/post-exit-stdio-guard.ts +86 -86
  209. package/src/runtime/process-status.ts +97 -73
  210. package/src/runtime/progress-event-coalescer.ts +43 -43
  211. package/src/runtime/recovery-recipes.ts +74 -74
  212. package/src/runtime/retry-executor.ts +81 -81
  213. package/src/runtime/role-permission.ts +39 -39
  214. package/src/runtime/run-tracker.ts +99 -0
  215. package/src/runtime/runtime-policy.ts +21 -0
  216. package/src/runtime/runtime-resolver.ts +94 -90
  217. package/src/runtime/scheduler.ts +294 -0
  218. package/src/runtime/semaphore.ts +131 -131
  219. package/src/runtime/sensitive-paths.ts +92 -92
  220. package/src/runtime/session-usage.ts +79 -79
  221. package/src/runtime/settings-store.ts +103 -0
  222. package/src/runtime/sidechain-output.ts +29 -29
  223. package/src/runtime/skill-instructions.ts +222 -222
  224. package/src/runtime/stale-reconciler.ts +198 -189
  225. package/src/runtime/streaming-output.ts +47 -0
  226. package/src/runtime/subagent-manager.ts +404 -395
  227. package/src/runtime/subprocess-tool-registry.ts +67 -67
  228. package/src/runtime/task-display.ts +38 -38
  229. package/src/runtime/task-graph-scheduler.ts +122 -122
  230. package/src/runtime/task-graph.ts +207 -207
  231. package/src/runtime/task-output-context.ts +177 -177
  232. package/src/runtime/task-packet.ts +93 -93
  233. package/src/runtime/task-quality.ts +207 -207
  234. package/src/runtime/task-runner/capabilities.ts +78 -78
  235. package/src/runtime/task-runner/live-executor.ts +131 -113
  236. package/src/runtime/task-runner/progress.ts +119 -119
  237. package/src/runtime/task-runner/prompt-builder.ts +139 -139
  238. package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
  239. package/src/runtime/task-runner/result-utils.ts +14 -14
  240. package/src/runtime/task-runner/run-projection.ts +103 -103
  241. package/src/runtime/task-runner/state-helpers.ts +22 -22
  242. package/src/runtime/task-runner.ts +469 -458
  243. package/src/runtime/team-runner.ts +693 -945
  244. package/src/runtime/usage-tracker.ts +71 -0
  245. package/src/runtime/worker-heartbeat.ts +21 -21
  246. package/src/runtime/worker-startup.ts +57 -57
  247. package/src/runtime/workflow-state.ts +187 -187
  248. package/src/runtime/yield-handler.ts +190 -189
  249. package/src/schema/config-schema.ts +172 -168
  250. package/src/schema/team-tool-schema.ts +126 -125
  251. package/src/schema/validation-types.ts +151 -148
  252. package/src/skills/discover-skills.ts +67 -67
  253. package/src/skills/skill-templates.ts +374 -374
  254. package/src/state/active-run-registry.ts +227 -191
  255. package/src/state/artifact-store.ts +130 -129
  256. package/src/state/atomic-write.ts +262 -178
  257. package/src/state/blob-store.ts +116 -116
  258. package/src/state/contracts.ts +111 -111
  259. package/src/state/event-log-rotation.ts +161 -158
  260. package/src/state/event-log.ts +383 -240
  261. package/src/state/event-reconstructor.ts +217 -217
  262. package/src/state/jsonl-writer.ts +82 -82
  263. package/src/state/locks.ts +146 -148
  264. package/src/state/mailbox.ts +446 -405
  265. package/src/state/state-store.ts +364 -351
  266. package/src/state/task-claims.ts +44 -44
  267. package/src/state/types.ts +285 -285
  268. package/src/state/usage.ts +29 -29
  269. package/src/subagents/async-entry.ts +1 -1
  270. package/src/subagents/index.ts +3 -3
  271. package/src/subagents/live/control.ts +1 -1
  272. package/src/subagents/live/manager.ts +1 -1
  273. package/src/subagents/live/realtime.ts +1 -1
  274. package/src/subagents/live/session-runtime.ts +1 -1
  275. package/src/subagents/manager.ts +1 -1
  276. package/src/subagents/spawn.ts +1 -1
  277. package/src/teams/discover-teams.ts +116 -116
  278. package/src/teams/team-config.ts +27 -27
  279. package/src/teams/team-serializer.ts +38 -38
  280. package/src/types/diff.d.ts +18 -18
  281. package/src/ui/agent-management-overlay.ts +144 -144
  282. package/src/ui/crew-widget.ts +487 -370
  283. package/src/ui/dashboard-panes/agents-pane.ts +109 -28
  284. package/src/ui/dashboard-panes/cancellation-pane.ts +42 -42
  285. package/src/ui/dashboard-panes/capability-pane.ts +59 -59
  286. package/src/ui/dashboard-panes/health-pane.ts +30 -30
  287. package/src/ui/dashboard-panes/mailbox-pane.ts +35 -35
  288. package/src/ui/dashboard-panes/progress-pane.ts +30 -30
  289. package/src/ui/dashboard-panes/transcript-pane.ts +10 -10
  290. package/src/ui/heartbeat-aggregator.ts +63 -63
  291. package/src/ui/keybinding-map.ts +97 -94
  292. package/src/ui/live-conversation-overlay.ts +152 -0
  293. package/src/ui/live-run-sidebar.ts +180 -180
  294. package/src/ui/mascot.ts +442 -442
  295. package/src/ui/overlays/agent-picker-overlay.ts +57 -57
  296. package/src/ui/overlays/confirm-overlay.ts +58 -58
  297. package/src/ui/overlays/mailbox-compose-overlay.ts +144 -144
  298. package/src/ui/overlays/mailbox-compose-preview.ts +63 -63
  299. package/src/ui/overlays/mailbox-detail-overlay.ts +122 -122
  300. package/src/ui/pi-ui-compat.ts +57 -57
  301. package/src/ui/powerbar-publisher.ts +221 -197
  302. package/src/ui/render-scheduler.ts +216 -143
  303. package/src/ui/run-action-dispatcher.ts +118 -117
  304. package/src/ui/run-dashboard.ts +526 -464
  305. package/src/ui/run-event-bus.ts +208 -208
  306. package/src/ui/run-snapshot-cache.ts +826 -777
  307. package/src/ui/settings-overlay.ts +721 -0
  308. package/src/ui/snapshot-types.ts +86 -70
  309. package/src/ui/theme-adapter.ts +190 -190
  310. package/src/ui/tool-progress-formatter.ts +89 -0
  311. package/src/ui/transcript-cache.ts +94 -94
  312. package/src/ui/transcript-viewer.ts +335 -335
  313. package/src/utils/conflict-detect.ts +662 -0
  314. package/src/utils/env-filter.ts +30 -0
  315. package/src/utils/file-coalescer.ts +86 -86
  316. package/src/utils/frontmatter.ts +68 -68
  317. package/src/utils/fs-watch.ts +88 -31
  318. package/src/utils/gh-protocol.ts +479 -0
  319. package/src/utils/ids.ts +17 -17
  320. package/src/utils/incremental-reader.ts +104 -104
  321. package/src/utils/internal-error.ts +6 -6
  322. package/src/utils/names.ts +27 -27
  323. package/src/utils/paths.ts +102 -63
  324. package/src/utils/redaction.ts +44 -44
  325. package/src/utils/resolve-shell.ts +34 -0
  326. package/src/utils/safe-paths.ts +47 -47
  327. package/src/utils/scan-cache.ts +136 -136
  328. package/src/utils/sleep.ts +2 -1
  329. package/src/utils/sse-parser.ts +134 -134
  330. package/src/utils/task-name-generator.ts +337 -337
  331. package/src/utils/timings.ts +33 -33
  332. package/src/utils/visual.ts +243 -198
  333. package/src/workflows/discover-workflows.ts +139 -139
  334. package/src/workflows/validate-workflow.ts +40 -40
  335. package/src/workflows/workflow-config.ts +26 -26
  336. package/src/workflows/workflow-serializer.ts +32 -32
  337. package/src/worktree/branch-freshness.ts +45 -45
  338. package/src/worktree/cleanup.ts +75 -72
  339. package/src/worktree/worktree-manager.ts +188 -146
  340. package/teams/default.team.md +12 -12
  341. package/teams/fast-fix.team.md +11 -11
  342. package/teams/implementation.team.md +18 -18
  343. package/teams/parallel-research.team.md +14 -14
  344. package/teams/research.team.md +11 -11
  345. package/teams/review.team.md +12 -12
  346. package/tsconfig.json +19 -19
  347. package/workflows/default.workflow.md +30 -30
  348. package/workflows/fast-fix.workflow.md +23 -23
  349. package/workflows/implementation.workflow.md +43 -43
  350. package/workflows/parallel-research.workflow.md +46 -46
  351. package/workflows/research.workflow.md +22 -22
  352. package/workflows/review.workflow.md +30 -30
  353. package/skills/task-packet/SKILL.md +0 -28
  354. package/skills/verify-evidence/SKILL.md +0 -27
@@ -1,207 +1,207 @@
1
- /**
2
- * Task quality scoring — simple additive heuristic for evaluating task
3
- * completion quality based on diagnostics, metrics, artifacts, and duration.
4
- *
5
- * Distilled from pi-autoresearch's quality scoring pattern.
6
- */
7
- import * as fs from "node:fs";
8
- import * as path from "node:path";
9
- import type { TeamTaskState } from "../state/types.ts";
10
-
11
- /** Letter grade for task quality. */
12
- export type QualityGrade = "A" | "B" | "C" | "D";
13
-
14
- /** Breakdown of individual quality criteria. */
15
- export interface QualityBreakdown {
16
- /** Task has a non-empty diagnostics object. */
17
- hasDiagnostics: boolean;
18
- /** Task has a non-empty metrics object. */
19
- hasMetrics: boolean;
20
- /** Task produced files in the artifacts directory. */
21
- producedArtifacts: boolean;
22
- /** Task has a non-empty result/description. */
23
- hasDescription: boolean;
24
- /** Task duration is reasonable (> 0 and < 1 hour). */
25
- durationReasonable: boolean;
26
- }
27
-
28
- /** Scored quality result for a task. */
29
- export interface TaskQualityScore {
30
- /** Numeric score (0–5). */
31
- score: number;
32
- /** Individual criterion breakdown. */
33
- breakdown: QualityBreakdown;
34
- /** Letter grade based on score thresholds. */
35
- grade: QualityGrade;
36
- }
37
-
38
- /** One hour in milliseconds. */
39
- const ONE_HOUR_MS = 3_600_000;
40
-
41
- /**
42
- * Determine the letter grade for a given numeric score.
43
- *
44
- * A: 4–5, B: 3, C: 2, D: 0–1
45
- */
46
- function scoreToGrade(score: number): QualityGrade {
47
- if (score >= 4) return "A";
48
- if (score === 3) return "B";
49
- if (score === 2) return "C";
50
- return "D";
51
- }
52
-
53
- /**
54
- * Check whether the artifacts directory contains files for the given task.
55
- *
56
- * Looks for a subdirectory named after the task ID, or files containing
57
- * the task ID prefix in the artifacts directory.
58
- */
59
- function hasTaskArtifacts(taskId: string, artifactsDir: string): boolean {
60
- try {
61
- if (!fs.existsSync(artifactsDir)) return false;
62
-
63
- // Check for a task-specific subdirectory
64
- const taskDir = path.join(artifactsDir, taskId);
65
- if (fs.existsSync(taskDir)) {
66
- const stat = fs.statSync(taskDir);
67
- if (stat.isDirectory()) {
68
- const entries = fs.readdirSync(taskDir);
69
- return entries.length > 0;
70
- }
71
- }
72
-
73
- // Check for files containing the task ID prefix
74
- const entries = fs.readdirSync(artifactsDir);
75
- const safePrefix = taskId.replace(/[^a-zA-Z0-9_-]/g, "");
76
- return entries.some((entry) => entry.includes(safePrefix));
77
- } catch {
78
- return false;
79
- }
80
- }
81
-
82
- /**
83
- * Check if a task result string is a non-empty description.
84
- *
85
- * A result is considered descriptive if any of these sources have non-empty content:
86
- * - task.resultArtifact exists with a path
87
- * - task.error is a non-empty string (workers often set this with result info)
88
- * - task.verification.satisfied is true
89
- * - task.diagnostics contains a 'result' string
90
- */
91
- function isResultDescriptive(task: TeamTaskState): boolean {
92
- // Check resultArtifact — presence of a result artifact indicates output was produced
93
- if (task.resultArtifact?.path) return true;
94
-
95
- // Check error field — workers often put result info here
96
- if (typeof task.error === "string" && task.error.trim().length > 0) return true;
97
-
98
- // Check verification — satisfied verification indicates meaningful output
99
- if (task.verification?.satisfied) return true;
100
-
101
- // Check diagnostics for an explicit result string
102
- if (
103
- task.diagnostics &&
104
- typeof task.diagnostics === "object" &&
105
- typeof task.diagnostics.result === "string" &&
106
- (task.diagnostics.result as string).trim().length > 0
107
- ) return true;
108
-
109
- return false;
110
- }
111
-
112
- /**
113
- * Check if the task duration is reasonable (started, finished, > 0, < 1 hour).
114
- */
115
- function isDurationReasonable(task: TeamTaskState): boolean {
116
- if (!task.startedAt || !task.finishedAt) return false;
117
-
118
- const started = new Date(task.startedAt).getTime();
119
- const finished = new Date(task.finishedAt).getTime();
120
-
121
- if (Number.isNaN(started) || Number.isNaN(finished)) return false;
122
-
123
- const duration = finished - started;
124
- return duration > 0 && duration < ONE_HOUR_MS;
125
- }
126
-
127
- /**
128
- * Compute the quality score for a completed task.
129
- *
130
- * Uses simple additive scoring across 5 criteria:
131
- * - hasDiagnostics: +1 if task.diagnostics exists and has keys
132
- * - hasMetrics: +1 if task.metrics exists and has keys
133
- * - producedArtifacts: +1 if artifactsDir has files for this task
134
- * - hasDescription: +1 if task has a non-empty result/description
135
- * - durationReasonable: +1 if task has both startedAt and finishedAt, duration > 0 and < 1 hour
136
- *
137
- * @param task - The task state to evaluate
138
- * @param artifactsDir - Optional path to the run artifacts directory
139
- * @returns TaskQualityScore with numeric score, breakdown, and letter grade
140
- */
141
- export function computeTaskQuality(
142
- task: TeamTaskState,
143
- artifactsDir?: string,
144
- ): TaskQualityScore {
145
- const hasDiagnostics =
146
- task.diagnostics !== undefined &&
147
- typeof task.diagnostics === "object" &&
148
- Object.keys(task.diagnostics).length > 0;
149
-
150
- const hasMetrics =
151
- task.metrics !== undefined &&
152
- typeof task.metrics === "object" &&
153
- Object.keys(task.metrics).length > 0;
154
-
155
- const producedArtifacts =
156
- artifactsDir !== undefined && hasTaskArtifacts(task.id, artifactsDir);
157
-
158
- const hasDescription = isResultDescriptive(task);
159
-
160
- const durationReasonable = isDurationReasonable(task);
161
-
162
- const breakdown: QualityBreakdown = {
163
- hasDiagnostics,
164
- hasMetrics,
165
- producedArtifacts,
166
- hasDescription,
167
- durationReasonable,
168
- };
169
-
170
- const score =
171
- (hasDiagnostics ? 1 : 0) +
172
- (hasMetrics ? 1 : 0) +
173
- (producedArtifacts ? 1 : 0) +
174
- (hasDescription ? 1 : 0) +
175
- (durationReasonable ? 1 : 0);
176
-
177
- return {
178
- score,
179
- breakdown,
180
- grade: scoreToGrade(score),
181
- };
182
- }
183
-
184
- /** Human-readable labels for each quality criterion. */
185
- const CRITERION_LABELS: Record<keyof QualityBreakdown, string> = {
186
- hasDiagnostics: "diagnostics",
187
- hasMetrics: "metrics",
188
- producedArtifacts: "artifacts",
189
- hasDescription: "description",
190
- durationReasonable: "duration",
191
- };
192
-
193
- /**
194
- * Format a quality score as a human-readable one-line string.
195
- *
196
- * Format: "Quality: B (3/5: diagnostics, metrics, description)"
197
- *
198
- * @param score - The quality score to format
199
- * @returns Formatted string
200
- */
201
- export function formatQualityScore(score: TaskQualityScore): string {
202
- const metCriteria = Object.entries(score.breakdown)
203
- .filter(([, met]) => met)
204
- .map(([key]) => CRITERION_LABELS[key as keyof QualityBreakdown]);
205
-
206
- return `Quality: ${score.grade} (${score.score}/5${metCriteria.length > 0 ? `: ${metCriteria.join(", ")}` : ""})`;
207
- }
1
+ /**
2
+ * Task quality scoring — simple additive heuristic for evaluating task
3
+ * completion quality based on diagnostics, metrics, artifacts, and duration.
4
+ *
5
+ * Distilled from pi-autoresearch's quality scoring pattern.
6
+ */
7
+ import * as fs from "node:fs";
8
+ import * as path from "node:path";
9
+ import type { TeamTaskState } from "../state/types.ts";
10
+
11
+ /** Letter grade for task quality. */
12
+ export type QualityGrade = "A" | "B" | "C" | "D";
13
+
14
+ /** Breakdown of individual quality criteria. */
15
+ export interface QualityBreakdown {
16
+ /** Task has a non-empty diagnostics object. */
17
+ hasDiagnostics: boolean;
18
+ /** Task has a non-empty metrics object. */
19
+ hasMetrics: boolean;
20
+ /** Task produced files in the artifacts directory. */
21
+ producedArtifacts: boolean;
22
+ /** Task has a non-empty result/description. */
23
+ hasDescription: boolean;
24
+ /** Task duration is reasonable (> 0 and < 1 hour). */
25
+ durationReasonable: boolean;
26
+ }
27
+
28
+ /** Scored quality result for a task. */
29
+ export interface TaskQualityScore {
30
+ /** Numeric score (0–5). */
31
+ score: number;
32
+ /** Individual criterion breakdown. */
33
+ breakdown: QualityBreakdown;
34
+ /** Letter grade based on score thresholds. */
35
+ grade: QualityGrade;
36
+ }
37
+
38
+ /** One hour in milliseconds. */
39
+ const ONE_HOUR_MS = 3_600_000;
40
+
41
+ /**
42
+ * Determine the letter grade for a given numeric score.
43
+ *
44
+ * A: 4–5, B: 3, C: 2, D: 0–1
45
+ */
46
+ function scoreToGrade(score: number): QualityGrade {
47
+ if (score >= 4) return "A";
48
+ if (score === 3) return "B";
49
+ if (score === 2) return "C";
50
+ return "D";
51
+ }
52
+
53
+ /**
54
+ * Check whether the artifacts directory contains files for the given task.
55
+ *
56
+ * Looks for a subdirectory named after the task ID, or files containing
57
+ * the task ID prefix in the artifacts directory.
58
+ */
59
+ function hasTaskArtifacts(taskId: string, artifactsDir: string): boolean {
60
+ try {
61
+ if (!fs.existsSync(artifactsDir)) return false;
62
+
63
+ // Check for a task-specific subdirectory
64
+ const taskDir = path.join(artifactsDir, taskId);
65
+ if (fs.existsSync(taskDir)) {
66
+ const stat = fs.statSync(taskDir);
67
+ if (stat.isDirectory()) {
68
+ const entries = fs.readdirSync(taskDir);
69
+ return entries.length > 0;
70
+ }
71
+ }
72
+
73
+ // Check for files containing the task ID prefix
74
+ const entries = fs.readdirSync(artifactsDir);
75
+ const safePrefix = taskId.replace(/[^a-zA-Z0-9_-]/g, "");
76
+ return entries.some((entry) => entry.includes(safePrefix));
77
+ } catch {
78
+ return false;
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Check if a task result string is a non-empty description.
84
+ *
85
+ * A result is considered descriptive if any of these sources have non-empty content:
86
+ * - task.resultArtifact exists with a path
87
+ * - task.error is a non-empty string (workers often set this with result info)
88
+ * - task.verification.satisfied is true
89
+ * - task.diagnostics contains a 'result' string
90
+ */
91
+ function isResultDescriptive(task: TeamTaskState): boolean {
92
+ // Check resultArtifact — presence of a result artifact indicates output was produced
93
+ if (task.resultArtifact?.path) return true;
94
+
95
+ // Check error field — workers often put result info here
96
+ if (typeof task.error === "string" && task.error.trim().length > 0) return true;
97
+
98
+ // Check verification — satisfied verification indicates meaningful output
99
+ if (task.verification?.satisfied) return true;
100
+
101
+ // Check diagnostics for an explicit result string
102
+ if (
103
+ task.diagnostics &&
104
+ typeof task.diagnostics === "object" &&
105
+ typeof task.diagnostics.result === "string" &&
106
+ (task.diagnostics.result as string).trim().length > 0
107
+ ) return true;
108
+
109
+ return false;
110
+ }
111
+
112
+ /**
113
+ * Check if the task duration is reasonable (started, finished, > 0, < 1 hour).
114
+ */
115
+ function isDurationReasonable(task: TeamTaskState): boolean {
116
+ if (!task.startedAt || !task.finishedAt) return false;
117
+
118
+ const started = new Date(task.startedAt).getTime();
119
+ const finished = new Date(task.finishedAt).getTime();
120
+
121
+ if (Number.isNaN(started) || Number.isNaN(finished)) return false;
122
+
123
+ const duration = finished - started;
124
+ return duration > 0 && duration < ONE_HOUR_MS;
125
+ }
126
+
127
+ /**
128
+ * Compute the quality score for a completed task.
129
+ *
130
+ * Uses simple additive scoring across 5 criteria:
131
+ * - hasDiagnostics: +1 if task.diagnostics exists and has keys
132
+ * - hasMetrics: +1 if task.metrics exists and has keys
133
+ * - producedArtifacts: +1 if artifactsDir has files for this task
134
+ * - hasDescription: +1 if task has a non-empty result/description
135
+ * - durationReasonable: +1 if task has both startedAt and finishedAt, duration > 0 and < 1 hour
136
+ *
137
+ * @param task - The task state to evaluate
138
+ * @param artifactsDir - Optional path to the run artifacts directory
139
+ * @returns TaskQualityScore with numeric score, breakdown, and letter grade
140
+ */
141
+ export function computeTaskQuality(
142
+ task: TeamTaskState,
143
+ artifactsDir?: string,
144
+ ): TaskQualityScore {
145
+ const hasDiagnostics =
146
+ task.diagnostics !== undefined &&
147
+ typeof task.diagnostics === "object" &&
148
+ Object.keys(task.diagnostics).length > 0;
149
+
150
+ const hasMetrics =
151
+ task.metrics !== undefined &&
152
+ typeof task.metrics === "object" &&
153
+ Object.keys(task.metrics).length > 0;
154
+
155
+ const producedArtifacts =
156
+ artifactsDir !== undefined && hasTaskArtifacts(task.id, artifactsDir);
157
+
158
+ const hasDescription = isResultDescriptive(task);
159
+
160
+ const durationReasonable = isDurationReasonable(task);
161
+
162
+ const breakdown: QualityBreakdown = {
163
+ hasDiagnostics,
164
+ hasMetrics,
165
+ producedArtifacts,
166
+ hasDescription,
167
+ durationReasonable,
168
+ };
169
+
170
+ const score =
171
+ (hasDiagnostics ? 1 : 0) +
172
+ (hasMetrics ? 1 : 0) +
173
+ (producedArtifacts ? 1 : 0) +
174
+ (hasDescription ? 1 : 0) +
175
+ (durationReasonable ? 1 : 0);
176
+
177
+ return {
178
+ score,
179
+ breakdown,
180
+ grade: scoreToGrade(score),
181
+ };
182
+ }
183
+
184
+ /** Human-readable labels for each quality criterion. */
185
+ const CRITERION_LABELS: Record<keyof QualityBreakdown, string> = {
186
+ hasDiagnostics: "diagnostics",
187
+ hasMetrics: "metrics",
188
+ producedArtifacts: "artifacts",
189
+ hasDescription: "description",
190
+ durationReasonable: "duration",
191
+ };
192
+
193
+ /**
194
+ * Format a quality score as a human-readable one-line string.
195
+ *
196
+ * Format: "Quality: B (3/5: diagnostics, metrics, description)"
197
+ *
198
+ * @param score - The quality score to format
199
+ * @returns Formatted string
200
+ */
201
+ export function formatQualityScore(score: TaskQualityScore): string {
202
+ const metCriteria = Object.entries(score.breakdown)
203
+ .filter(([, met]) => met)
204
+ .map(([key]) => CRITERION_LABELS[key as keyof QualityBreakdown]);
205
+
206
+ return `Quality: ${score.grade} (${score.score}/5${metCriteria.length > 0 ? `: ${metCriteria.join(", ")}` : ""})`;
207
+ }
@@ -1,78 +1,78 @@
1
- import type { AgentConfig } from "../../agents/agent-config.ts";
2
- import type { CrewRuntimeKind } from "../crew-agent-runtime.ts";
3
-
4
- export interface WorkerCapabilityInventory {
5
- schemaVersion: 1;
6
- taskId: string;
7
- role: string;
8
- agent: string;
9
- runtime: CrewRuntimeKind;
10
- permissionMode: string;
11
- tools: string[];
12
- extensions: string[];
13
- skills: {
14
- names: string[];
15
- paths: string[];
16
- disabled: boolean;
17
- };
18
- model: {
19
- requested?: string;
20
- agentDefault?: string;
21
- fallbacks: string[];
22
- teamRole?: string;
23
- step?: string;
24
- };
25
- inheritance: {
26
- projectContext: boolean;
27
- skills: boolean;
28
- systemPromptMode: "replace" | "append";
29
- };
30
- }
31
-
32
- export interface BuildWorkerCapabilityInventoryInput {
33
- taskId: string;
34
- role: string;
35
- agent: AgentConfig;
36
- runtime: CrewRuntimeKind;
37
- permissionMode: string;
38
- skillNames?: string[];
39
- skillPaths?: string[];
40
- skillsDisabled: boolean;
41
- modelOverride?: string;
42
- teamRoleModel?: string;
43
- stepModel?: string;
44
- }
45
-
46
- function uniqueSorted(values: readonly string[] | undefined): string[] {
47
- return [...new Set((values ?? []).map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
48
- }
49
-
50
- export function buildWorkerCapabilityInventory(input: BuildWorkerCapabilityInventoryInput): WorkerCapabilityInventory {
51
- return {
52
- schemaVersion: 1,
53
- taskId: input.taskId,
54
- role: input.role,
55
- agent: input.agent.name,
56
- runtime: input.runtime,
57
- permissionMode: input.permissionMode,
58
- tools: uniqueSorted(input.agent.tools),
59
- extensions: uniqueSorted(input.agent.extensions),
60
- skills: {
61
- names: uniqueSorted(input.skillNames),
62
- paths: uniqueSorted(input.skillPaths),
63
- disabled: input.skillsDisabled,
64
- },
65
- model: {
66
- requested: input.modelOverride,
67
- agentDefault: input.agent.model,
68
- fallbacks: uniqueSorted(input.agent.fallbackModels),
69
- teamRole: input.teamRoleModel,
70
- step: input.stepModel,
71
- },
72
- inheritance: {
73
- projectContext: input.agent.inheritProjectContext === true,
74
- skills: input.agent.inheritSkills === true,
75
- systemPromptMode: input.agent.systemPromptMode ?? "replace",
76
- },
77
- };
78
- }
1
+ import type { AgentConfig } from "../../agents/agent-config.ts";
2
+ import type { CrewRuntimeKind } from "../crew-agent-runtime.ts";
3
+
4
+ export interface WorkerCapabilityInventory {
5
+ schemaVersion: 1;
6
+ taskId: string;
7
+ role: string;
8
+ agent: string;
9
+ runtime: CrewRuntimeKind;
10
+ permissionMode: string;
11
+ tools: string[];
12
+ extensions: string[];
13
+ skills: {
14
+ names: string[];
15
+ paths: string[];
16
+ disabled: boolean;
17
+ };
18
+ model: {
19
+ requested?: string;
20
+ agentDefault?: string;
21
+ fallbacks: string[];
22
+ teamRole?: string;
23
+ step?: string;
24
+ };
25
+ inheritance: {
26
+ projectContext: boolean;
27
+ skills: boolean;
28
+ systemPromptMode: "replace" | "append";
29
+ };
30
+ }
31
+
32
+ export interface BuildWorkerCapabilityInventoryInput {
33
+ taskId: string;
34
+ role: string;
35
+ agent: AgentConfig;
36
+ runtime: CrewRuntimeKind;
37
+ permissionMode: string;
38
+ skillNames?: string[];
39
+ skillPaths?: string[];
40
+ skillsDisabled: boolean;
41
+ modelOverride?: string;
42
+ teamRoleModel?: string;
43
+ stepModel?: string;
44
+ }
45
+
46
+ function uniqueSorted(values: readonly string[] | undefined): string[] {
47
+ return [...new Set((values ?? []).map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
48
+ }
49
+
50
+ export function buildWorkerCapabilityInventory(input: BuildWorkerCapabilityInventoryInput): WorkerCapabilityInventory {
51
+ return {
52
+ schemaVersion: 1,
53
+ taskId: input.taskId,
54
+ role: input.role,
55
+ agent: input.agent.name,
56
+ runtime: input.runtime,
57
+ permissionMode: input.permissionMode,
58
+ tools: uniqueSorted(input.agent.tools),
59
+ extensions: uniqueSorted(input.agent.extensions),
60
+ skills: {
61
+ names: uniqueSorted(input.skillNames),
62
+ paths: uniqueSorted(input.skillPaths),
63
+ disabled: input.skillsDisabled,
64
+ },
65
+ model: {
66
+ requested: input.modelOverride,
67
+ agentDefault: input.agent.model,
68
+ fallbacks: uniqueSorted(input.agent.fallbackModels),
69
+ teamRole: input.teamRoleModel,
70
+ step: input.stepModel,
71
+ },
72
+ inheritance: {
73
+ projectContext: input.agent.inheritProjectContext === true,
74
+ skills: input.agent.inheritSkills === true,
75
+ systemPromptMode: input.agent.systemPromptMode ?? "replace",
76
+ },
77
+ };
78
+ }