pi-crew 0.1.46 → 0.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/agents/analyst.md +11 -11
- package/agents/critic.md +11 -11
- package/agents/executor.md +11 -11
- package/agents/explorer.md +11 -11
- package/agents/planner.md +11 -11
- package/agents/reviewer.md +11 -11
- package/agents/security-reviewer.md +11 -11
- package/agents/test-engineer.md +11 -11
- package/agents/verifier.md +11 -11
- package/agents/writer.md +11 -11
- package/docs/next-upgrade-roadmap.md +117 -42
- package/docs/refactor-tasks-phase3.md +394 -394
- package/docs/refactor-tasks-phase4.md +564 -564
- package/docs/refactor-tasks-phase5.md +402 -402
- package/docs/refactor-tasks-phase6.md +662 -662
- package/docs/research/AGENT-EXECUTION-ARCHITECTURE.md +261 -0
- package/docs/research/AGENT-LIFECYCLE-COMPARISON.md +111 -0
- package/docs/research/AUDIT_OH_MY_PI.md +261 -0
- package/docs/research/AUDIT_PI_CREW.md +457 -0
- package/docs/research/CAVEMAN-DEEP-RESEARCH.md +281 -0
- package/docs/research/COMPARISON_OH_MY_PI_VS_PI_CREW.md +264 -0
- package/docs/research/DEEP-RESEARCH-PI-POWERBAR.md +343 -0
- package/docs/research/DEEP_RESEARCH_SUBAGENT_ARCHITECTURE.md +480 -0
- package/docs/research/GAP_CLOSURE_IMPLEMENTATION_PLAN.md +354 -0
- package/docs/research/IMPLEMENTATION_PLAN.md +385 -0
- package/docs/research/LIVE-SESSION-PRODUCTION-READY-PLAN.md +502 -0
- package/docs/research/OH-MY-PI-DEEP-RESEARCH-v14.7.6.md +266 -0
- package/docs/research/REMAINING-GAPS-PLAN.md +363 -0
- package/docs/research/SESSION-SUMMARY-2026-05-08.md +146 -0
- package/docs/research/UI-RESPONSIVENESS-AUDIT.md +173 -0
- package/docs/research-awesome-agent-skills-distillation.md +100 -100
- package/docs/research-extension-examples.md +297 -297
- package/docs/research-extension-system.md +324 -324
- package/docs/research-oh-my-pi-distillation.md +56 -9
- package/docs/research-optimization-plan.md +548 -548
- package/docs/research-phase10-distillation.md +198 -198
- package/docs/research-phase11-distillation.md +201 -201
- package/docs/research-pi-coding-agent.md +357 -357
- package/docs/research-source-pi-crew-reference.md +174 -174
- package/docs/runtime-flow.md +148 -148
- package/docs/source-runtime-refactor-map.md +107 -107
- package/index.ts +6 -6
- package/package.json +99 -98
- package/schema.json +8 -0
- package/skills/async-worker-recovery/SKILL.md +42 -42
- package/skills/context-artifact-hygiene/SKILL.md +52 -52
- package/skills/delegation-patterns/SKILL.md +54 -54
- package/skills/mailbox-interactive/SKILL.md +40 -40
- package/skills/model-routing-context/SKILL.md +39 -39
- package/skills/multi-perspective-review/SKILL.md +58 -58
- package/skills/observability-reliability/SKILL.md +41 -41
- package/skills/orchestration/SKILL.md +157 -0
- package/skills/ownership-session-security/SKILL.md +41 -41
- package/skills/pi-extension-lifecycle/SKILL.md +39 -39
- package/skills/requirements-to-task-packet/SKILL.md +63 -63
- package/skills/resource-discovery-config/SKILL.md +41 -41
- package/skills/runtime-state-reader/SKILL.md +44 -44
- package/skills/secure-agent-orchestration-review/SKILL.md +45 -45
- package/skills/state-mutation-locking/SKILL.md +42 -42
- package/skills/systematic-debugging/SKILL.md +67 -67
- package/skills/ui-render-performance/SKILL.md +39 -39
- package/skills/verification-before-done/SKILL.md +57 -57
- package/skills/worktree-isolation/SKILL.md +39 -39
- package/src/agents/agent-config.ts +6 -0
- package/src/agents/agent-search.ts +98 -0
- package/src/agents/agent-serializer.ts +4 -0
- package/src/agents/discover-agents.ts +17 -4
- package/src/config/config.ts +24 -0
- package/src/config/defaults.ts +11 -0
- package/src/extension/autonomous-policy.ts +26 -33
- package/src/extension/cross-extension-rpc.ts +82 -82
- package/src/extension/help.ts +1 -0
- package/src/extension/management.ts +5 -0
- package/src/extension/register.ts +58 -13
- package/src/extension/registration/commands.ts +33 -1
- package/src/extension/registration/compaction-guard.ts +125 -125
- package/src/extension/registration/team-tool.ts +6 -4
- package/src/extension/run-bundle-schema.ts +89 -89
- package/src/extension/run-index.ts +24 -18
- package/src/extension/run-maintenance.ts +68 -62
- package/src/extension/team-tool/api.ts +23 -2
- package/src/extension/team-tool/cancel.ts +86 -11
- package/src/extension/team-tool/context.ts +3 -0
- package/src/extension/team-tool/handle-settings.ts +188 -188
- package/src/extension/team-tool/inspect.ts +41 -41
- package/src/extension/team-tool/intent-policy.ts +42 -0
- package/src/extension/team-tool/lifecycle-actions.ts +47 -18
- package/src/extension/team-tool/parallel-dispatch.ts +156 -0
- package/src/extension/team-tool/plan.ts +19 -19
- package/src/extension/team-tool/respond.ts +10 -2
- package/src/extension/team-tool/run.ts +3 -2
- package/src/extension/team-tool/status.ts +1 -1
- package/src/extension/team-tool-types.ts +1 -0
- package/src/extension/team-tool.ts +13 -3
- package/src/hooks/registry.ts +61 -0
- package/src/hooks/types.ts +41 -0
- package/src/i18n.ts +184 -184
- package/src/observability/exporters/otlp-exporter.ts +77 -77
- package/src/prompt/prompt-runtime.ts +72 -72
- package/src/runtime/agent-control.ts +108 -2
- package/src/runtime/agent-memory.ts +72 -72
- package/src/runtime/agent-observability.ts +114 -114
- package/src/runtime/async-marker.ts +26 -26
- package/src/runtime/async-runner.ts +3 -1
- package/src/runtime/attention-events.ts +28 -28
- package/src/runtime/background-runner.ts +19 -0
- package/src/runtime/cancellation-token.ts +89 -0
- package/src/runtime/cancellation.ts +61 -51
- package/src/runtime/capability-inventory.ts +116 -0
- package/src/runtime/child-pi.ts +2 -1
- package/src/runtime/code-summary.ts +247 -0
- package/src/runtime/completion-guard.ts +190 -190
- package/src/runtime/crash-recovery.ts +181 -0
- package/src/runtime/crew-agent-records.ts +35 -7
- package/src/runtime/crew-agent-runtime.ts +1 -0
- package/src/runtime/custom-tools/irc-tool.ts +201 -0
- package/src/runtime/custom-tools/submit-result-tool.ts +90 -0
- package/src/runtime/delivery-coordinator.ts +3 -1
- package/src/runtime/direct-run.ts +35 -35
- package/src/runtime/effectiveness.ts +81 -76
- package/src/runtime/event-stream-bridge.ts +90 -0
- package/src/runtime/foreground-control.ts +82 -82
- package/src/runtime/green-contract.ts +46 -46
- package/src/runtime/group-join.ts +106 -106
- package/src/runtime/heartbeat-gradient.ts +28 -28
- package/src/runtime/heartbeat-watcher.ts +124 -124
- package/src/runtime/live-agent-control.ts +88 -88
- package/src/runtime/live-agent-manager.ts +78 -2
- package/src/runtime/live-control-realtime.ts +36 -36
- package/src/runtime/live-extension-bridge.ts +150 -0
- package/src/runtime/live-irc.ts +92 -0
- package/src/runtime/live-session-health.ts +100 -0
- package/src/runtime/live-session-runtime.ts +297 -7
- package/src/runtime/mcp-proxy.ts +113 -0
- package/src/runtime/notebook-helpers.ts +90 -0
- package/src/runtime/orphan-sentinel.ts +7 -0
- package/src/runtime/output-validator.ts +187 -0
- package/src/runtime/parallel-research.ts +44 -44
- package/src/runtime/parallel-utils.ts +57 -0
- package/src/runtime/parent-guard.ts +80 -0
- package/src/runtime/pi-json-output.ts +111 -111
- package/src/runtime/policy-engine.ts +79 -79
- package/src/runtime/progress-event-coalescer.ts +43 -43
- package/src/runtime/prose-compressor.ts +164 -0
- package/src/runtime/recovery-recipes.ts +74 -74
- package/src/runtime/result-extractor.ts +121 -0
- package/src/runtime/role-permission.ts +39 -39
- package/src/runtime/runtime-resolver.ts +1 -4
- package/src/runtime/semaphore.ts +131 -0
- package/src/runtime/sensitive-paths.ts +92 -0
- package/src/runtime/session-resources.ts +25 -25
- package/src/runtime/session-snapshot.ts +59 -59
- package/src/runtime/session-usage.ts +79 -79
- package/src/runtime/sidechain-output.ts +29 -29
- package/src/runtime/stream-preview.ts +177 -0
- package/src/runtime/subagent-manager.ts +3 -2
- package/src/runtime/subprocess-tool-registry.ts +67 -0
- package/src/runtime/supervisor-contact.ts +59 -59
- package/src/runtime/task-display.ts +38 -38
- package/src/runtime/task-output-context.ts +59 -9
- package/src/runtime/task-runner/capabilities.ts +78 -78
- package/src/runtime/task-runner/live-executor.ts +2 -0
- package/src/runtime/task-runner/progress.ts +119 -119
- package/src/runtime/task-runner/prompt-builder.ts +70 -8
- package/src/runtime/task-runner/prompt-pipeline.ts +64 -64
- package/src/runtime/task-runner/result-utils.ts +14 -14
- package/src/runtime/task-runner/run-projection.ts +104 -0
- package/src/runtime/task-runner/state-helpers.ts +22 -22
- package/src/runtime/task-runner.ts +75 -4
- package/src/runtime/team-runner.ts +60 -8
- package/src/runtime/worker-heartbeat.ts +21 -21
- package/src/runtime/worker-startup.ts +57 -57
- package/src/runtime/workspace-tree.ts +298 -0
- package/src/runtime/yield-handler.ts +189 -0
- package/src/schema/config-schema.ts +6 -0
- package/src/schema/team-tool-schema.ts +11 -1
- package/src/skills/discover-skills.ts +67 -0
- package/src/state/active-run-registry.ts +4 -2
- package/src/state/artifact-store.ts +4 -1
- package/src/state/atomic-write.ts +50 -1
- package/src/state/blob-store.ts +117 -0
- package/src/state/contracts.ts +1 -0
- package/src/state/event-log-rotation.ts +158 -0
- package/src/state/event-log.ts +52 -2
- package/src/state/mailbox.ts +87 -7
- package/src/state/state-store.ts +24 -4
- package/src/state/task-claims.ts +44 -44
- package/src/state/types.ts +20 -0
- package/src/state/usage.ts +29 -29
- package/src/subagents/async-entry.ts +1 -1
- package/src/subagents/index.ts +3 -3
- package/src/subagents/live/control.ts +1 -1
- package/src/subagents/live/manager.ts +1 -1
- package/src/subagents/live/realtime.ts +1 -1
- package/src/subagents/live/session-runtime.ts +1 -1
- package/src/subagents/manager.ts +1 -1
- package/src/subagents/spawn.ts +1 -1
- package/src/teams/team-serializer.ts +38 -38
- package/src/types/diff.d.ts +18 -18
- package/src/ui/agent-management-overlay.ts +144 -0
- package/src/ui/crew-footer.ts +101 -101
- package/src/ui/crew-select-list.ts +111 -111
- package/src/ui/crew-widget.ts +11 -2
- package/src/ui/dashboard-panes/cancellation-pane.ts +43 -0
- package/src/ui/dashboard-panes/capability-pane.ts +60 -0
- package/src/ui/dashboard-panes/mailbox-pane.ts +35 -11
- package/src/ui/dashboard-panes/metrics-pane.ts +34 -34
- package/src/ui/dynamic-border.ts +25 -25
- package/src/ui/layout-primitives.ts +106 -106
- package/src/ui/live-run-sidebar.ts +4 -0
- package/src/ui/loaders.ts +158 -158
- package/src/ui/powerbar-publisher.ts +77 -15
- package/src/ui/render-coalescer.ts +51 -0
- package/src/ui/render-diff.ts +119 -119
- package/src/ui/render-scheduler.ts +143 -143
- package/src/ui/run-dashboard.ts +4 -0
- package/src/ui/run-event-bus.ts +209 -0
- package/src/ui/run-snapshot-cache.ts +68 -16
- package/src/ui/snapshot-types.ts +8 -0
- package/src/ui/spinner.ts +17 -17
- package/src/ui/status-colors.ts +58 -58
- package/src/ui/syntax-highlight.ts +116 -116
- package/src/ui/transcript-entries.ts +258 -0
- package/src/utils/atomic-write.ts +33 -33
- package/src/utils/completion-dedupe.ts +63 -63
- package/src/utils/frontmatter.ts +68 -68
- package/src/utils/git.ts +262 -262
- package/src/utils/ids.ts +17 -12
- package/src/utils/incremental-reader.ts +104 -0
- package/src/utils/names.ts +27 -27
- package/src/utils/redaction.ts +44 -44
- package/src/utils/safe-paths.ts +47 -47
- package/src/utils/scan-cache.ts +137 -0
- package/src/utils/sleep.ts +32 -32
- package/src/utils/sse-parser.ts +134 -0
- package/src/utils/task-name-generator.ts +337 -0
- package/src/utils/visual.ts +33 -2
- package/src/workflows/validate-workflow.ts +40 -40
- package/src/worktree/branch-freshness.ts +45 -45
- package/src/worktree/cleanup.ts +2 -1
- package/teams/default.team.md +12 -12
- package/teams/fast-fix.team.md +11 -11
- package/teams/implementation.team.md +18 -18
- package/teams/parallel-research.team.md +14 -14
- package/teams/research.team.md +11 -11
- package/teams/review.team.md +12 -12
- package/workflows/default.workflow.md +29 -29
- package/workflows/fast-fix.workflow.md +22 -22
- package/workflows/implementation.workflow.md +38 -38
- package/workflows/parallel-research.workflow.md +46 -46
- package/workflows/research.workflow.md +22 -22
- package/workflows/review.workflow.md +30 -30
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structural code summary — regex-based summarizer that elides function bodies,
|
|
3
|
+
* long arrays, block comments, and import groups, keeping signatures.
|
|
4
|
+
* Pure TypeScript fallback (no tree-sitter / Rust native dependency).
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// ── Public types ──
|
|
8
|
+
|
|
9
|
+
export interface SummarySegment {
|
|
10
|
+
kind: "kept" | "elided";
|
|
11
|
+
startLine: number;
|
|
12
|
+
endLine: number;
|
|
13
|
+
/** Verbatim text for kept segments; absent for elided */
|
|
14
|
+
text?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface SummaryResult {
|
|
18
|
+
language: string | null;
|
|
19
|
+
totalLines: number;
|
|
20
|
+
elided: boolean;
|
|
21
|
+
segments: SummarySegment[];
|
|
22
|
+
rendered: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface SummaryOptions {
|
|
26
|
+
minBodyLines?: number; // default 4
|
|
27
|
+
minCommentLines?: number; // default 6
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ── Language detection ──
|
|
31
|
+
|
|
32
|
+
const EXT_MAP: ReadonlyMap<string, string> = new Map([
|
|
33
|
+
[".ts", "typescript"], [".tsx", "typescript"],
|
|
34
|
+
[".js", "javascript"], [".jsx", "javascript"],
|
|
35
|
+
[".mjs", "javascript"], [".cjs", "javascript"],
|
|
36
|
+
[".py", "python"], [".rs", "rust"],
|
|
37
|
+
]);
|
|
38
|
+
|
|
39
|
+
export function detectLanguage(filePath: string): string | null {
|
|
40
|
+
const dot = filePath.lastIndexOf(".");
|
|
41
|
+
if (dot === -1) return null;
|
|
42
|
+
return EXT_MAP.get(filePath.slice(dot).toLowerCase()) ?? null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Internal range helpers ──
|
|
46
|
+
|
|
47
|
+
interface Range { start: number; end: number; }
|
|
48
|
+
|
|
49
|
+
function mergeRanges(ranges: Range[]): Range[] {
|
|
50
|
+
if (ranges.length === 0) return [];
|
|
51
|
+
const sorted = [...ranges].sort((a, b) => a.start - b.start || a.end - b.end);
|
|
52
|
+
const merged: Range[] = [sorted[0]];
|
|
53
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
54
|
+
const last = merged[merged.length - 1];
|
|
55
|
+
if (sorted[i].start <= last.end + 1) last.end = Math.max(last.end, sorted[i].end);
|
|
56
|
+
else merged.push({ ...sorted[i] });
|
|
57
|
+
}
|
|
58
|
+
return merged;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ── Brace-based elision (TS/JS/Rust) ──
|
|
62
|
+
// NOTE: This is a regex heuristic, not a parser. Braces inside string literals,
|
|
63
|
+
// template strings, regex, and comments are counted, which can produce incorrect
|
|
64
|
+
// elision for edge cases like `const s = "{...}"` or `${expr}`. Acceptable for
|
|
65
|
+
// summaries; do not use for correctness-sensitive parsing.
|
|
66
|
+
|
|
67
|
+
function findBraceRanges(lines: string[], openPattern: RegExp, minBody: number): Range[] {
|
|
68
|
+
const ranges: Range[] = [];
|
|
69
|
+
for (let i = 0; i < lines.length; i++) {
|
|
70
|
+
if (!openPattern.test(lines[i])) continue;
|
|
71
|
+
let depth = 0;
|
|
72
|
+
let foundOpen = false;
|
|
73
|
+
const start = i;
|
|
74
|
+
for (let j = i; j < lines.length; j++) {
|
|
75
|
+
for (const ch of lines[j]) {
|
|
76
|
+
if (ch === "{") { depth++; foundOpen = true; }
|
|
77
|
+
else if (ch === "}") { depth--; }
|
|
78
|
+
}
|
|
79
|
+
if (foundOpen && depth <= 0) {
|
|
80
|
+
if (j - start - 1 >= minBody) ranges.push({ start: start + 1, end: j - 1 });
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return ranges;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ── TypeScript / JavaScript ──
|
|
89
|
+
|
|
90
|
+
const TS_FN_SIG =
|
|
91
|
+
/^\s*(export\s+)?(async\s+)?function\s|^\s*(export\s+)?(static\s+|get\s+|set\s+|private\s+|public\s+|protected\s+|readonly\s+)*\*?\s*\w+\s*[\(<]/;
|
|
92
|
+
const TS_CLASS_SIG = /^\s*(export\s+)?(default\s+)?(abstract\s+)?class\s/;
|
|
93
|
+
const TS_STRUCT_SIG = /^\s*(export\s+)?(default\s+)?(const|let|var)\s+\w+\s*=\s*(\[[\s]*$|\{[\s]*$)/;
|
|
94
|
+
|
|
95
|
+
function tsRanges(lines: string[], minBody: number): Range[] {
|
|
96
|
+
return [
|
|
97
|
+
...findBraceRanges(lines, TS_FN_SIG, minBody),
|
|
98
|
+
...findBraceRanges(lines, TS_CLASS_SIG, minBody),
|
|
99
|
+
...findBraceRanges(lines, TS_STRUCT_SIG, minBody),
|
|
100
|
+
];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ── Block comments ──
|
|
104
|
+
|
|
105
|
+
function blockCommentRanges(lines: string[], minComment: number): Range[] {
|
|
106
|
+
const ranges: Range[] = [];
|
|
107
|
+
let i = 0;
|
|
108
|
+
while (i < lines.length) {
|
|
109
|
+
const idx = lines[i].indexOf("/*");
|
|
110
|
+
if (idx === -1 || lines[i].includes("*/", idx + 2)) { i++; continue; }
|
|
111
|
+
const openLine = i;
|
|
112
|
+
let j = i + 1;
|
|
113
|
+
while (j < lines.length && !lines[j].includes("*/")) j++;
|
|
114
|
+
if (j < lines.length && j - openLine - 1 >= minComment)
|
|
115
|
+
ranges.push({ start: openLine + 1, end: j - 1 });
|
|
116
|
+
i = j + 1;
|
|
117
|
+
}
|
|
118
|
+
return ranges;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ── Import groups ──
|
|
122
|
+
|
|
123
|
+
const IMPORT_RE = /^\s*import\s/;
|
|
124
|
+
const PY_IMPORT_RE = /^\s*(import\s|from\s+\S+\s+import\s)/;
|
|
125
|
+
|
|
126
|
+
function importGroupRanges(lines: string[], pattern: RegExp): Range[] {
|
|
127
|
+
const groups: Array<{ start: number; end: number }> = [];
|
|
128
|
+
let gs = -1, last = -1;
|
|
129
|
+
for (let i = 0; i < lines.length; i++) {
|
|
130
|
+
if (pattern.test(lines[i])) { if (gs === -1) gs = i; last = i; }
|
|
131
|
+
else if (gs !== -1 && i > last) { groups.push({ start: gs, end: last }); gs = -1; last = -1; }
|
|
132
|
+
}
|
|
133
|
+
if (gs !== -1) groups.push({ start: gs, end: last });
|
|
134
|
+
const ranges: Range[] = [];
|
|
135
|
+
for (const g of groups) {
|
|
136
|
+
if (g.end - g.start >= 2) ranges.push({ start: g.start + 1, end: g.end - 1 });
|
|
137
|
+
}
|
|
138
|
+
return ranges;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ── Python ──
|
|
142
|
+
|
|
143
|
+
function pythonRanges(lines: string[], minBody: number): Range[] {
|
|
144
|
+
const ranges: Range[] = [];
|
|
145
|
+
for (let i = 0; i < lines.length; i++) {
|
|
146
|
+
const m = /^(\s*)(async\s+)?def\s/.exec(lines[i]) || /^(\s*)class\s/.exec(lines[i]);
|
|
147
|
+
if (!m) continue;
|
|
148
|
+
const base = m[1].length;
|
|
149
|
+
let bs = -1, be = -1;
|
|
150
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
151
|
+
if (lines[j].trim() === "") continue;
|
|
152
|
+
const indent = lines[j].length - lines[j].trimStart().length;
|
|
153
|
+
if (indent <= base) break;
|
|
154
|
+
if (bs === -1) bs = j;
|
|
155
|
+
be = j;
|
|
156
|
+
}
|
|
157
|
+
if (bs !== -1 && be - bs + 1 >= minBody) ranges.push({ start: bs, end: be });
|
|
158
|
+
}
|
|
159
|
+
ranges.push(...importGroupRanges(lines, PY_IMPORT_RE));
|
|
160
|
+
return ranges;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ── Rust ──
|
|
164
|
+
|
|
165
|
+
const RS_FN_SIG = /^\s*(pub\s+)?(async\s+)?(unsafe\s+)?fn\s/;
|
|
166
|
+
const RS_STRUCT_SIG = /^\s*(pub\s+)?struct\s+\w+.*\{$/;
|
|
167
|
+
const RS_ENUM_SIG = /^\s*(pub\s+)?enum\s+\w+.*\{$/;
|
|
168
|
+
const RS_MOD_SIG = /^\s*(pub\s+)?mod\s+\w+.*\{$/;
|
|
169
|
+
|
|
170
|
+
function rustRanges(lines: string[], minBody: number): Range[] {
|
|
171
|
+
return [
|
|
172
|
+
...findBraceRanges(lines, RS_FN_SIG, minBody),
|
|
173
|
+
...findBraceRanges(lines, RS_STRUCT_SIG, minBody),
|
|
174
|
+
...findBraceRanges(lines, RS_ENUM_SIG, minBody),
|
|
175
|
+
...findBraceRanges(lines, RS_MOD_SIG, minBody),
|
|
176
|
+
];
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ── Main entry ──
|
|
180
|
+
|
|
181
|
+
function fullResult(language: string | null, totalLines: number, code: string): SummaryResult {
|
|
182
|
+
return {
|
|
183
|
+
language, totalLines, elided: false,
|
|
184
|
+
segments: [{ kind: "kept", startLine: 1, endLine: totalLines, text: code }],
|
|
185
|
+
rendered: code,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export function summarizeCode(
|
|
190
|
+
code: string,
|
|
191
|
+
language: string | null,
|
|
192
|
+
options?: SummaryOptions,
|
|
193
|
+
): SummaryResult {
|
|
194
|
+
const minBody = options?.minBodyLines ?? 4;
|
|
195
|
+
const minComment = options?.minCommentLines ?? 6;
|
|
196
|
+
|
|
197
|
+
if (!code || code.trim() === "") {
|
|
198
|
+
return { language, totalLines: 0, elided: false, segments: [], rendered: "" };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const lines = code.split("\n");
|
|
202
|
+
const totalLines = lines.length;
|
|
203
|
+
|
|
204
|
+
if (!language) return fullResult(null, totalLines, code);
|
|
205
|
+
|
|
206
|
+
const rawRanges: Range[] = [];
|
|
207
|
+
switch (language) {
|
|
208
|
+
case "typescript":
|
|
209
|
+
case "javascript":
|
|
210
|
+
rawRanges.push(...tsRanges(lines, minBody), ...blockCommentRanges(lines, minComment), ...importGroupRanges(lines, IMPORT_RE));
|
|
211
|
+
break;
|
|
212
|
+
case "python":
|
|
213
|
+
rawRanges.push(...pythonRanges(lines, minBody));
|
|
214
|
+
break;
|
|
215
|
+
case "rust":
|
|
216
|
+
rawRanges.push(...rustRanges(lines, minBody), ...blockCommentRanges(lines, minComment));
|
|
217
|
+
break;
|
|
218
|
+
default:
|
|
219
|
+
return fullResult(language, totalLines, code);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const ranges = mergeRanges(rawRanges);
|
|
223
|
+
if (ranges.length === 0) return fullResult(language, totalLines, code);
|
|
224
|
+
|
|
225
|
+
// Build segments
|
|
226
|
+
const segments: SummarySegment[] = [];
|
|
227
|
+
let cursor = 0;
|
|
228
|
+
for (const r of ranges) {
|
|
229
|
+
if (cursor < r.start) {
|
|
230
|
+
segments.push({ kind: "kept", startLine: cursor + 1, endLine: r.start, text: lines.slice(cursor, r.start).join("\n") });
|
|
231
|
+
}
|
|
232
|
+
segments.push({ kind: "elided", startLine: r.start + 1, endLine: r.end + 1 });
|
|
233
|
+
cursor = r.end + 1;
|
|
234
|
+
}
|
|
235
|
+
if (cursor < totalLines) {
|
|
236
|
+
segments.push({ kind: "kept", startLine: cursor + 1, endLine: totalLines, text: lines.slice(cursor).join("\n") });
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Render
|
|
240
|
+
const parts: string[] = [];
|
|
241
|
+
for (const seg of segments) {
|
|
242
|
+
if (seg.kind === "kept") parts.push(seg.text ?? "");
|
|
243
|
+
else parts.push(` ... ${seg.endLine - seg.startLine + 1} lines elided ...`);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return { language, totalLines, elided: true, segments, rendered: parts.join("\n") };
|
|
247
|
+
}
|
|
@@ -1,190 +1,190 @@
|
|
|
1
|
-
import * as fs from "node:fs";
|
|
2
|
-
import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
|
|
3
|
-
|
|
4
|
-
// ============================================================================
|
|
5
|
-
// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
|
|
6
|
-
// made no observable mutations. Used by task-runner.ts.
|
|
7
|
-
// ============================================================================
|
|
8
|
-
|
|
9
|
-
export interface CompletionMutationGuardInput {
|
|
10
|
-
role: string;
|
|
11
|
-
taskText?: string;
|
|
12
|
-
transcriptPath?: string;
|
|
13
|
-
stdout?: string;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface CompletionMutationGuardResult {
|
|
17
|
-
expectedMutation: boolean;
|
|
18
|
-
observedMutation: boolean;
|
|
19
|
-
reason?: "no_mutation_observed";
|
|
20
|
-
observedTools: string[];
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
|
|
24
|
-
const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
|
|
25
|
-
const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
|
|
26
|
-
const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
|
|
27
|
-
const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
|
|
28
|
-
|
|
29
|
-
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
|
30
|
-
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function commandText(value: unknown): string {
|
|
34
|
-
const record = asRecord(value);
|
|
35
|
-
if (!record) return typeof value === "string" ? value : "";
|
|
36
|
-
for (const key of ["command", "cmd", "script", "input"]) {
|
|
37
|
-
const raw = record[key];
|
|
38
|
-
if (typeof raw === "string") return raw;
|
|
39
|
-
}
|
|
40
|
-
return JSON.stringify(record);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function isMutatingTool(tool: string, args: unknown): boolean {
|
|
44
|
-
const normalized = tool.toLowerCase();
|
|
45
|
-
if (MUTATING_TOOLS.has(normalized)) return true;
|
|
46
|
-
if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
|
|
47
|
-
const command = commandText(args).trim();
|
|
48
|
-
if (!command) return false;
|
|
49
|
-
// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
|
|
50
|
-
if (MUTATING_COMMANDS.test(command)) return true;
|
|
51
|
-
if (READ_ONLY_COMMANDS.test(command)) return false;
|
|
52
|
-
// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
|
|
53
|
-
return true;
|
|
54
|
-
}
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
|
|
59
|
-
const record = asRecord(event);
|
|
60
|
-
if (!record) return [];
|
|
61
|
-
const calls: Array<{ tool: string; args?: unknown }> = [];
|
|
62
|
-
const directTool = record.toolName ?? record.name ?? record.tool;
|
|
63
|
-
if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
|
|
64
|
-
calls.push({ tool: directTool, args: record.args ?? record.input });
|
|
65
|
-
}
|
|
66
|
-
const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
|
|
67
|
-
if (Array.isArray(content)) {
|
|
68
|
-
for (const part of content) {
|
|
69
|
-
const item = asRecord(part);
|
|
70
|
-
if (!item) continue;
|
|
71
|
-
const tool = item.name ?? item.toolName ?? item.tool;
|
|
72
|
-
if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
return calls;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function transcriptText(input: CompletionMutationGuardInput): string {
|
|
79
|
-
if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
|
|
80
|
-
return input.stdout ?? "";
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
|
|
84
|
-
if (!MUTATING_ROLES.has(input.role)) return false;
|
|
85
|
-
return !READ_ONLY_HINTS.test(input.taskText ?? "");
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
|
|
89
|
-
const expectedMutation = expectsImplementationMutation(input);
|
|
90
|
-
const observedTools: string[] = [];
|
|
91
|
-
let observedMutation = false;
|
|
92
|
-
const text = transcriptText(input);
|
|
93
|
-
for (const line of text.split("\n")) {
|
|
94
|
-
const trimmed = line.trim();
|
|
95
|
-
if (!trimmed) continue;
|
|
96
|
-
let event: unknown;
|
|
97
|
-
try { event = JSON.parse(trimmed); } catch { continue; }
|
|
98
|
-
for (const call of collectToolCallsFromEvent(event)) {
|
|
99
|
-
observedTools.push(call.tool);
|
|
100
|
-
if (isMutatingTool(call.tool, call.args)) observedMutation = true;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
return {
|
|
104
|
-
expectedMutation,
|
|
105
|
-
observedMutation,
|
|
106
|
-
observedTools,
|
|
107
|
-
...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// ============================================================================
|
|
112
|
-
// Phase 11a: Artifact-based Completion Verification — a second layer that
|
|
113
|
-
// checks whether a completed task actually produced meaningful artifacts.
|
|
114
|
-
// ============================================================================
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Guard against false-positive task completions.
|
|
118
|
-
*
|
|
119
|
-
* Checks whether a task that claims success actually produced meaningful output.
|
|
120
|
-
* Returns a verification result with the green level (0-3) and any warnings.
|
|
121
|
-
*/
|
|
122
|
-
export interface CompletionVerifyResult {
|
|
123
|
-
/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
|
|
124
|
-
greenLevel: number;
|
|
125
|
-
/** Warnings about potentially incomplete work */
|
|
126
|
-
warnings: string[];
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const MAX_OUTPUT_PREVIEW = 200;
|
|
130
|
-
|
|
131
|
-
function isTrivialError(error: string | undefined): boolean {
|
|
132
|
-
if (!error) return false;
|
|
133
|
-
return error.trim().length === 0;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
export function verifyTaskCompletion(
|
|
137
|
-
task: TeamTaskState,
|
|
138
|
-
manifest: TeamRunManifest,
|
|
139
|
-
): CompletionVerifyResult {
|
|
140
|
-
const warnings: string[] = [];
|
|
141
|
-
let greenLevel = 0;
|
|
142
|
-
|
|
143
|
-
// Check 1: Has an error?
|
|
144
|
-
if (task.error && !isTrivialError(task.error)) {
|
|
145
|
-
return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Check 2: Has result artifact?
|
|
149
|
-
if (task.resultArtifact) {
|
|
150
|
-
greenLevel += 1;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Check 3: Has transcript?
|
|
154
|
-
if (task.transcriptArtifact) {
|
|
155
|
-
greenLevel += 1;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Check 4: For implementation tasks, verify artifacts were actually produced
|
|
159
|
-
const runArtifacts = manifest.artifacts.filter(
|
|
160
|
-
(a) => a.producer === task.id || a.producer === task.agent,
|
|
161
|
-
);
|
|
162
|
-
if (runArtifacts.length > 0) {
|
|
163
|
-
greenLevel += 1;
|
|
164
|
-
} else if (greenLevel < 3) {
|
|
165
|
-
warnings.push("No run-level artifacts produced by this task");
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// Check 5: Usage tracking — did the task actually consume tokens?
|
|
169
|
-
if (task.usage) {
|
|
170
|
-
const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
|
|
171
|
-
if (totalTokens === 0 && greenLevel < 3) {
|
|
172
|
-
warnings.push("Task reports zero token usage — may not have executed");
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
return {
|
|
177
|
-
greenLevel: Math.min(greenLevel, 3),
|
|
178
|
-
warnings,
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* Format a preview of task output for diagnostic display.
|
|
184
|
-
*/
|
|
185
|
-
export function formatOutputPreview(output: string | undefined): string {
|
|
186
|
-
if (!output) return "(no output)";
|
|
187
|
-
const trimmed = output.trim();
|
|
188
|
-
if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
|
|
189
|
-
return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
|
|
190
|
-
}
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
|
|
3
|
+
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
|
|
6
|
+
// made no observable mutations. Used by task-runner.ts.
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
export interface CompletionMutationGuardInput {
|
|
10
|
+
role: string;
|
|
11
|
+
taskText?: string;
|
|
12
|
+
transcriptPath?: string;
|
|
13
|
+
stdout?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface CompletionMutationGuardResult {
|
|
17
|
+
expectedMutation: boolean;
|
|
18
|
+
observedMutation: boolean;
|
|
19
|
+
reason?: "no_mutation_observed";
|
|
20
|
+
observedTools: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
|
|
24
|
+
const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
|
|
25
|
+
const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
|
|
26
|
+
const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
|
|
27
|
+
const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
|
|
28
|
+
|
|
29
|
+
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
|
30
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function commandText(value: unknown): string {
|
|
34
|
+
const record = asRecord(value);
|
|
35
|
+
if (!record) return typeof value === "string" ? value : "";
|
|
36
|
+
for (const key of ["command", "cmd", "script", "input"]) {
|
|
37
|
+
const raw = record[key];
|
|
38
|
+
if (typeof raw === "string") return raw;
|
|
39
|
+
}
|
|
40
|
+
return JSON.stringify(record);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function isMutatingTool(tool: string, args: unknown): boolean {
|
|
44
|
+
const normalized = tool.toLowerCase();
|
|
45
|
+
if (MUTATING_TOOLS.has(normalized)) return true;
|
|
46
|
+
if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
|
|
47
|
+
const command = commandText(args).trim();
|
|
48
|
+
if (!command) return false;
|
|
49
|
+
// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
|
|
50
|
+
if (MUTATING_COMMANDS.test(command)) return true;
|
|
51
|
+
if (READ_ONLY_COMMANDS.test(command)) return false;
|
|
52
|
+
// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
|
|
59
|
+
const record = asRecord(event);
|
|
60
|
+
if (!record) return [];
|
|
61
|
+
const calls: Array<{ tool: string; args?: unknown }> = [];
|
|
62
|
+
const directTool = record.toolName ?? record.name ?? record.tool;
|
|
63
|
+
if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
|
|
64
|
+
calls.push({ tool: directTool, args: record.args ?? record.input });
|
|
65
|
+
}
|
|
66
|
+
const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
|
|
67
|
+
if (Array.isArray(content)) {
|
|
68
|
+
for (const part of content) {
|
|
69
|
+
const item = asRecord(part);
|
|
70
|
+
if (!item) continue;
|
|
71
|
+
const tool = item.name ?? item.toolName ?? item.tool;
|
|
72
|
+
if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return calls;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function transcriptText(input: CompletionMutationGuardInput): string {
|
|
79
|
+
if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
|
|
80
|
+
return input.stdout ?? "";
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
|
|
84
|
+
if (!MUTATING_ROLES.has(input.role)) return false;
|
|
85
|
+
return !READ_ONLY_HINTS.test(input.taskText ?? "");
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
|
|
89
|
+
const expectedMutation = expectsImplementationMutation(input);
|
|
90
|
+
const observedTools: string[] = [];
|
|
91
|
+
let observedMutation = false;
|
|
92
|
+
const text = transcriptText(input);
|
|
93
|
+
for (const line of text.split("\n")) {
|
|
94
|
+
const trimmed = line.trim();
|
|
95
|
+
if (!trimmed) continue;
|
|
96
|
+
let event: unknown;
|
|
97
|
+
try { event = JSON.parse(trimmed); } catch { continue; }
|
|
98
|
+
for (const call of collectToolCallsFromEvent(event)) {
|
|
99
|
+
observedTools.push(call.tool);
|
|
100
|
+
if (isMutatingTool(call.tool, call.args)) observedMutation = true;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return {
|
|
104
|
+
expectedMutation,
|
|
105
|
+
observedMutation,
|
|
106
|
+
observedTools,
|
|
107
|
+
...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ============================================================================
|
|
112
|
+
// Phase 11a: Artifact-based Completion Verification — a second layer that
|
|
113
|
+
// checks whether a completed task actually produced meaningful artifacts.
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Guard against false-positive task completions.
|
|
118
|
+
*
|
|
119
|
+
* Checks whether a task that claims success actually produced meaningful output.
|
|
120
|
+
* Returns a verification result with the green level (0-3) and any warnings.
|
|
121
|
+
*/
|
|
122
|
+
export interface CompletionVerifyResult {
|
|
123
|
+
/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
|
|
124
|
+
greenLevel: number;
|
|
125
|
+
/** Warnings about potentially incomplete work */
|
|
126
|
+
warnings: string[];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const MAX_OUTPUT_PREVIEW = 200;
|
|
130
|
+
|
|
131
|
+
function isTrivialError(error: string | undefined): boolean {
|
|
132
|
+
if (!error) return false;
|
|
133
|
+
return error.trim().length === 0;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function verifyTaskCompletion(
|
|
137
|
+
task: TeamTaskState,
|
|
138
|
+
manifest: TeamRunManifest,
|
|
139
|
+
): CompletionVerifyResult {
|
|
140
|
+
const warnings: string[] = [];
|
|
141
|
+
let greenLevel = 0;
|
|
142
|
+
|
|
143
|
+
// Check 1: Has an error?
|
|
144
|
+
if (task.error && !isTrivialError(task.error)) {
|
|
145
|
+
return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Check 2: Has result artifact?
|
|
149
|
+
if (task.resultArtifact) {
|
|
150
|
+
greenLevel += 1;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Check 3: Has transcript?
|
|
154
|
+
if (task.transcriptArtifact) {
|
|
155
|
+
greenLevel += 1;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Check 4: For implementation tasks, verify artifacts were actually produced
|
|
159
|
+
const runArtifacts = manifest.artifacts.filter(
|
|
160
|
+
(a) => a.producer === task.id || a.producer === task.agent,
|
|
161
|
+
);
|
|
162
|
+
if (runArtifacts.length > 0) {
|
|
163
|
+
greenLevel += 1;
|
|
164
|
+
} else if (greenLevel < 3) {
|
|
165
|
+
warnings.push("No run-level artifacts produced by this task");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Check 5: Usage tracking — did the task actually consume tokens?
|
|
169
|
+
if (task.usage) {
|
|
170
|
+
const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
|
|
171
|
+
if (totalTokens === 0 && greenLevel < 3) {
|
|
172
|
+
warnings.push("Task reports zero token usage — may not have executed");
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
greenLevel: Math.min(greenLevel, 3),
|
|
178
|
+
warnings,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Format a preview of task output for diagnostic display.
|
|
184
|
+
*/
|
|
185
|
+
export function formatOutputPreview(output: string | undefined): string {
|
|
186
|
+
if (!output) return "(no output)";
|
|
187
|
+
const trimmed = output.trim();
|
|
188
|
+
if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
|
|
189
|
+
return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
|
|
190
|
+
}
|