pi-crew 0.5.2 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -0
- package/docs/bugs/cross-session-notification-leakage.md +82 -0
- package/docs/coding-agent-optimization.md +268 -0
- package/docs/deep-review-report.md +384 -0
- package/docs/distillation/cybersecurity-patterns.md +294 -0
- package/docs/migration-v0.4-v0.5.md +191 -0
- package/docs/optimization-plan.md +642 -0
- package/docs/pi-mono-opportunities.md +969 -0
- package/docs/pi-mono-review.md +291 -0
- package/docs/skills/REFERENCE.md +144 -0
- package/package.json +7 -6
- package/skills/artifact-analysis-loop/SKILL.md +302 -0
- package/skills/async-worker-recovery/SKILL.md +19 -1
- package/skills/child-pi-spawning/SKILL.md +19 -6
- package/skills/context-artifact-hygiene/SKILL.md +19 -2
- package/skills/delegation-patterns/SKILL.md +68 -3
- package/skills/detection-pipeline-design/SKILL.md +285 -0
- package/skills/event-log-tracing/SKILL.md +20 -6
- package/skills/git-master/SKILL.md +20 -6
- package/skills/hunting-investigation-loop/SKILL.md +401 -0
- package/skills/incident-playbook-construction/SKILL.md +383 -0
- package/skills/live-agent-lifecycle/SKILL.md +20 -6
- package/skills/mailbox-interactive/SKILL.md +19 -6
- package/skills/model-routing-context/SKILL.md +19 -1
- package/skills/multi-perspective-review/SKILL.md +19 -4
- package/skills/observability-reliability/SKILL.md +19 -2
- package/skills/orchestration/SKILL.md +20 -2
- package/skills/ownership-session-security/SKILL.md +20 -2
- package/skills/pi-extension-lifecycle/SKILL.md +20 -2
- package/skills/post-mortem/SKILL.md +7 -2
- package/skills/read-only-explorer/SKILL.md +20 -6
- package/skills/requirements-to-task-packet/SKILL.md +23 -3
- package/skills/resource-discovery-config/SKILL.md +20 -2
- package/skills/runtime-state-reader/SKILL.md +20 -2
- package/skills/safe-bash/SKILL.md +21 -6
- package/skills/scrutinize/SKILL.md +20 -2
- package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
- package/skills/security-review/SKILL.md +560 -0
- package/skills/state-mutation-locking/SKILL.md +22 -2
- package/skills/systematic-debugging/SKILL.md +8 -6
- package/skills/threat-hypothesis-framework/SKILL.md +175 -0
- package/skills/ui-render-performance/SKILL.md +20 -2
- package/skills/verification-before-done/SKILL.md +17 -2
- package/skills/widget-rendering/SKILL.md +21 -6
- package/skills/workspace-isolation/SKILL.md +20 -6
- package/skills/worktree-isolation/SKILL.md +20 -6
- package/src/agents/agent-config.ts +40 -1
- package/src/config/config.ts +22 -5
- package/src/config/role-tools.ts +82 -0
- package/src/config/types.ts +4 -0
- package/src/extension/crew-cleanup.ts +114 -0
- package/src/extension/register.ts +15 -3
- package/src/extension/team-tool/run.ts +7 -7
- package/src/observability/event-bus.ts +60 -0
- package/src/runtime/background-runner.ts +8 -2
- package/src/runtime/child-pi.ts +122 -34
- package/src/runtime/crew-agent-runtime.ts +1 -0
- package/src/runtime/foreground-control.ts +87 -17
- package/src/runtime/pi-args.ts +11 -1
- package/src/runtime/pi-json-output.ts +31 -0
- package/src/runtime/progress-tracker.ts +124 -0
- package/src/runtime/skill-effectiveness.ts +473 -0
- package/src/runtime/skill-instructions.ts +37 -3
- package/src/runtime/task-runner.ts +91 -17
- package/src/runtime/team-runner.ts +11 -11
- package/src/runtime/tool-progress.ts +10 -3
- package/src/runtime/verification-gates.ts +367 -0
- package/src/schema/team-tool-schema.ts +7 -0
- package/src/state/decision-ledger.ts +92 -43
- package/src/state/event-log.ts +136 -10
- package/src/state/hook-instinct-bridge.ts +5 -5
- package/src/state/state-store.ts +3 -1
- package/src/state/types.ts +4 -0
- package/src/types/new-api-types.ts +34 -0
- package/src/ui/agent-management-overlay.ts +5 -1
- package/src/ui/crew-widget.ts +29 -15
- package/src/ui/powerbar-publisher.ts +100 -7
- package/src/ui/tool-render.ts +15 -15
- package/src/utils/session-utils.ts +52 -0
- package/src/worktree/worktree-manager.ts +32 -13
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification Gates — ECC VERIFICATION_LOOP Pattern Implementation
|
|
3
|
+
*
|
|
4
|
+
* Implements RED/GREEN phase gates for task verification.
|
|
5
|
+
* Sequential execution: cannot skip to Phase N+1 without Phase N passing.
|
|
6
|
+
*
|
|
7
|
+
* Based on: docs/distillation/ECC-10-skills.md §2 (verification-loop)
|
|
8
|
+
*
|
|
9
|
+
* @module verification-gates
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { spawn } from "node:child_process";
|
|
13
|
+
import * as fs from "node:fs";
|
|
14
|
+
import * as path from "node:path";
|
|
15
|
+
import { writeArtifact } from "../state/artifact-store.ts";
|
|
16
|
+
import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
|
|
17
|
+
|
|
18
|
+
export interface PhaseGateResult {
|
|
19
|
+
phase: number;
|
|
20
|
+
name: string;
|
|
21
|
+
status: "passed" | "failed" | "skipped";
|
|
22
|
+
command: string;
|
|
23
|
+
exitCode?: number | null;
|
|
24
|
+
output?: string;
|
|
25
|
+
durationMs: number;
|
|
26
|
+
error?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface PhaseGateBundle {
|
|
30
|
+
results: PhaseGateResult[];
|
|
31
|
+
totalDurationMs: number;
|
|
32
|
+
allPassed: boolean;
|
|
33
|
+
stoppedAt?: number; // phase number where stopped
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Standard phase gate definitions for npm/TypeScript projects.
|
|
38
|
+
* Sequential enforcement: each phase must pass before proceeding.
|
|
39
|
+
*/
|
|
40
|
+
export const NPM_TYPESCRIPT_GATES: Array<{ name: string; command: string; critical: boolean }> = [
|
|
41
|
+
{ name: "build", command: "npm run build 2>&1 || true", critical: true },
|
|
42
|
+
{ name: "typecheck", command: "npx tsc --noEmit 2>&1 || true", critical: true },
|
|
43
|
+
{ name: "lint", command: "npm run lint 2>&1 || true", critical: false },
|
|
44
|
+
{ name: "tests", command: "npm test 2>&1 || true", critical: true },
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Cargo/Rust project phase gates.
|
|
49
|
+
*/
|
|
50
|
+
export const CARGO_RUST_GATES: Array<{ name: string; command: string; critical: boolean }> = [
|
|
51
|
+
{ name: "check", command: "cargo check 2>&1 || true", critical: true },
|
|
52
|
+
{ name: "test", command: "cargo test 2>&1 || true", critical: true },
|
|
53
|
+
{ name: "clippy", command: "cargo clippy 2>&1 || true", critical: false },
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Execute a single command and capture output.
|
|
58
|
+
*/
|
|
59
|
+
async function executeCommand(
|
|
60
|
+
command: string,
|
|
61
|
+
cwd: string,
|
|
62
|
+
timeoutMs: number = 120000,
|
|
63
|
+
): Promise<{ exitCode: number | null; output: string; durationMs: number }> {
|
|
64
|
+
const start = Date.now();
|
|
65
|
+
let output = "";
|
|
66
|
+
let exitCode: number | null = null;
|
|
67
|
+
|
|
68
|
+
return new Promise((resolve) => {
|
|
69
|
+
// Use shell to handle compound commands
|
|
70
|
+
const shell = spawn("sh", ["-c", command], {
|
|
71
|
+
cwd,
|
|
72
|
+
timeout: timeoutMs,
|
|
73
|
+
env: { ...process.env, FORCE_COLOR: "0" },
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
shell.stdout?.on("data", (data) => {
|
|
77
|
+
output += data.toString();
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
shell.stderr?.on("data", (data) => {
|
|
81
|
+
output += data.toString();
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
shell.on("close", (code) => {
|
|
85
|
+
exitCode = code;
|
|
86
|
+
resolve({
|
|
87
|
+
exitCode,
|
|
88
|
+
output: output.slice(-100000), // Cap at 100KB
|
|
89
|
+
durationMs: Date.now() - start,
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
shell.on("error", (err) => {
|
|
94
|
+
resolve({
|
|
95
|
+
exitCode: -1,
|
|
96
|
+
output: `Execution error: ${err.message}`,
|
|
97
|
+
durationMs: Date.now() - start,
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// Handle timeout
|
|
102
|
+
setTimeout(() => {
|
|
103
|
+
shell.kill("SIGKILL");
|
|
104
|
+
resolve({
|
|
105
|
+
exitCode: -1,
|
|
106
|
+
output: output + "\n[TIMEOUT: Command exceeded limit]",
|
|
107
|
+
durationMs: Date.now() - start,
|
|
108
|
+
});
|
|
109
|
+
}, timeoutMs);
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Run phase gates sequentially, stopping on first critical failure.
|
|
115
|
+
*
|
|
116
|
+
* @param gates - Array of phase gate definitions
|
|
117
|
+
* @param cwd - Working directory to execute commands in
|
|
118
|
+
* @param signal - Optional abort signal
|
|
119
|
+
* @param onPhase - Optional callback for each phase completion
|
|
120
|
+
* @returns Phase gate bundle with all results
|
|
121
|
+
*/
|
|
122
|
+
export async function runPhaseGates(
|
|
123
|
+
gates: Array<{ name: string; command: string; critical: boolean }>,
|
|
124
|
+
cwd: string,
|
|
125
|
+
signal?: AbortSignal,
|
|
126
|
+
onPhase?: (result: PhaseGateResult) => void,
|
|
127
|
+
): Promise<PhaseGateBundle> {
|
|
128
|
+
const results: PhaseGateResult[] = [];
|
|
129
|
+
const startTime = Date.now();
|
|
130
|
+
let stoppedAt: number | undefined;
|
|
131
|
+
|
|
132
|
+
for (let i = 0; i < gates.length; i++) {
|
|
133
|
+
// Check abort signal
|
|
134
|
+
if (signal?.aborted) {
|
|
135
|
+
results.push({
|
|
136
|
+
phase: i + 1,
|
|
137
|
+
name: gates[i].name,
|
|
138
|
+
status: "skipped",
|
|
139
|
+
command: gates[i].command,
|
|
140
|
+
durationMs: 0,
|
|
141
|
+
error: "Aborted",
|
|
142
|
+
});
|
|
143
|
+
stoppedAt = i + 1;
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const gate = gates[i];
|
|
148
|
+
const phaseStart = Date.now();
|
|
149
|
+
|
|
150
|
+
// Execute the gate command
|
|
151
|
+
const { exitCode, output, durationMs } = await executeCommand(
|
|
152
|
+
gate.command,
|
|
153
|
+
cwd,
|
|
154
|
+
120000, // 2 minute timeout
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
const passed = exitCode === 0;
|
|
158
|
+
const result: PhaseGateResult = {
|
|
159
|
+
phase: i + 1,
|
|
160
|
+
name: gate.name,
|
|
161
|
+
status: passed ? "passed" : "failed",
|
|
162
|
+
command: gate.command,
|
|
163
|
+
exitCode,
|
|
164
|
+
output,
|
|
165
|
+
durationMs,
|
|
166
|
+
error: passed ? undefined : `Exit code: ${exitCode}`,
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
results.push(result);
|
|
170
|
+
onPhase?.(result);
|
|
171
|
+
|
|
172
|
+
// Stop on critical failure
|
|
173
|
+
if (!passed && gate.critical) {
|
|
174
|
+
stoppedAt = i + 1;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
results,
|
|
181
|
+
totalDurationMs: Date.now() - startTime,
|
|
182
|
+
allPassed: results.every((r) => r.status === "passed"),
|
|
183
|
+
stoppedAt,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Execute verification commands from a task's verification contract.
|
|
189
|
+
* Maps the contract commands to phase gates and runs them sequentially.
|
|
190
|
+
*
|
|
191
|
+
* @param contract - Verification contract with commands to execute
|
|
192
|
+
* @param cwd - Working directory
|
|
193
|
+
* @param runId - Run ID for artifact naming
|
|
194
|
+
* @param taskId - Task ID for artifact naming
|
|
195
|
+
* @param artifactsRoot - Artifacts root directory
|
|
196
|
+
* @param signal - Optional abort signal
|
|
197
|
+
* @returns Array of verification command results
|
|
198
|
+
*/
|
|
199
|
+
export async function executeVerificationCommands(
|
|
200
|
+
contract: VerificationContract,
|
|
201
|
+
cwd: string,
|
|
202
|
+
runId: string,
|
|
203
|
+
taskId: string,
|
|
204
|
+
artifactsRoot: string,
|
|
205
|
+
signal?: AbortSignal,
|
|
206
|
+
): Promise<VerificationCommandResult[]> {
|
|
207
|
+
if (!contract.commands || contract.commands.length === 0) {
|
|
208
|
+
return [];
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const results: VerificationCommandResult[] = [];
|
|
212
|
+
|
|
213
|
+
// Map commands to phase gates
|
|
214
|
+
const gates = contract.commands.map((cmd, index) => ({
|
|
215
|
+
name: `verification-${index + 1}`,
|
|
216
|
+
command: cmd,
|
|
217
|
+
critical: true, // All verification commands are critical by default
|
|
218
|
+
}));
|
|
219
|
+
|
|
220
|
+
// Create artifacts directory
|
|
221
|
+
const gatesDir = path.join(artifactsRoot, "verification-gates");
|
|
222
|
+
if (!fs.existsSync(gatesDir)) {
|
|
223
|
+
fs.mkdirSync(gatesDir, { recursive: true });
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Run phase gates
|
|
227
|
+
const bundle = await runPhaseGates(gates, cwd, signal, (phaseResult) => {
|
|
228
|
+
// Write phase artifact immediately for observability
|
|
229
|
+
const phaseArtifact = writeArtifact(artifactsRoot, {
|
|
230
|
+
kind: "log",
|
|
231
|
+
relativePath: `verification-gates/${taskId}-phase-${phaseResult.phase}-${phaseResult.name}.log`,
|
|
232
|
+
content: [
|
|
233
|
+
`# Phase ${phaseResult.phase}: ${phaseResult.name}`,
|
|
234
|
+
`Status: ${phaseResult.status.toUpperCase()}`,
|
|
235
|
+
`Command: ${phaseResult.command}`,
|
|
236
|
+
`Duration: ${phaseResult.durationMs}ms`,
|
|
237
|
+
phaseResult.exitCode != null ? `Exit Code: ${phaseResult.exitCode}` : "",
|
|
238
|
+
phaseResult.error ? `Error: ${phaseResult.error}` : "",
|
|
239
|
+
"",
|
|
240
|
+
"## Output",
|
|
241
|
+
phaseResult.output || "(no output)",
|
|
242
|
+
].join("\n"),
|
|
243
|
+
producer: taskId,
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
results.push({
|
|
247
|
+
cmd: phaseResult.command,
|
|
248
|
+
status: phaseResult.status === "passed" ? "passed" : "failed",
|
|
249
|
+
exitCode: phaseResult.exitCode,
|
|
250
|
+
outputArtifact: phaseArtifact,
|
|
251
|
+
});
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
// Write summary artifact
|
|
255
|
+
const summaryArtifact = writeArtifact(artifactsRoot, {
|
|
256
|
+
kind: "metadata",
|
|
257
|
+
relativePath: `verification-gates/${taskId}-summary.json`,
|
|
258
|
+
content: JSON.stringify(bundle, null, 2),
|
|
259
|
+
producer: taskId,
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
// Fill in any remaining results (in case of early exit)
|
|
263
|
+
for (let i = results.length; i < gates.length; i++) {
|
|
264
|
+
results.push({
|
|
265
|
+
cmd: gates[i].command,
|
|
266
|
+
status: "not_run",
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return results;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Compute observed green level from verification results.
|
|
275
|
+
* Maps verification outcomes to green levels per ECC pattern.
|
|
276
|
+
*
|
|
277
|
+
* @param commands - Array of verification command results
|
|
278
|
+
* @param requiredLevel - Required green level from contract
|
|
279
|
+
* @returns Observed green level
|
|
280
|
+
*/
|
|
281
|
+
export function computeGreenLevelFromResults(
|
|
282
|
+
commands: VerificationCommandResult[],
|
|
283
|
+
requiredLevel: GreenLevel,
|
|
284
|
+
): GreenLevel {
|
|
285
|
+
if (commands.length === 0) {
|
|
286
|
+
return "none";
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const passed = commands.filter((c) => c.status === "passed").length;
|
|
290
|
+
const failed = commands.filter((c) => c.status === "failed").length;
|
|
291
|
+
const notRun = commands.filter((c) => c.status === "not_run").length;
|
|
292
|
+
|
|
293
|
+
// If any critical verification failed, return none
|
|
294
|
+
if (failed > 0) {
|
|
295
|
+
return "none";
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// If all passed, return the required level (capped at merge_ready)
|
|
299
|
+
if (passed === commands.length) {
|
|
300
|
+
return requiredLevel === "none" ? "targeted" : requiredLevel;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Partial pass - return targeted
|
|
304
|
+
if (passed > 0) {
|
|
305
|
+
return "targeted";
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Nothing run
|
|
309
|
+
return "none";
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Create a verification gate report artifact.
|
|
314
|
+
* Formatted for human review per ECC verification-loop pattern.
|
|
315
|
+
*/
|
|
316
|
+
export function createVerificationGateReport(
|
|
317
|
+
taskId: string,
|
|
318
|
+
contract: VerificationContract,
|
|
319
|
+
results: VerificationCommandResult[],
|
|
320
|
+
bundle: PhaseGateBundle,
|
|
321
|
+
): string {
|
|
322
|
+
const lines = [
|
|
323
|
+
`# Verification Gate Report: ${taskId}`,
|
|
324
|
+
"",
|
|
325
|
+
`## Contract`,
|
|
326
|
+
`- Required Green Level: ${contract.requiredGreenLevel}`,
|
|
327
|
+
`- Allow Manual Evidence: ${contract.allowManualEvidence}`,
|
|
328
|
+
`- Commands: ${contract.commands.length}`,
|
|
329
|
+
"",
|
|
330
|
+
`## Results`,
|
|
331
|
+
"",
|
|
332
|
+
`| Phase | Command | Status | Exit Code | Duration |`,
|
|
333
|
+
`|-------|---------|--------|-----------|----------|`,
|
|
334
|
+
];
|
|
335
|
+
|
|
336
|
+
for (const result of results) {
|
|
337
|
+
const phaseIndex = results.indexOf(result) + 1;
|
|
338
|
+
const statusIcon = result.status === "passed" ? "✅" : result.status === "failed" ? "❌" : "⏭️";
|
|
339
|
+
lines.push(
|
|
340
|
+
`| ${phaseIndex} | \`${truncate(result.cmd, 40)}\` | ${statusIcon} ${result.status} | ${result.exitCode ?? "-"} | ${result.durationMs ?? 0}ms |`,
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
lines.push("");
|
|
345
|
+
lines.push(`## Summary`);
|
|
346
|
+
lines.push(`- Total Phases: ${bundle.results.length}`);
|
|
347
|
+
lines.push(`- Passed: ${bundle.results.filter((r) => r.status === "passed").length}`);
|
|
348
|
+
lines.push(`- Failed: ${bundle.results.filter((r) => r.status === "failed").length}`);
|
|
349
|
+
lines.push(`- Skipped: ${bundle.results.filter((r) => r.status === "skipped").length}`);
|
|
350
|
+
lines.push(`- Total Duration: ${bundle.totalDurationMs}ms`);
|
|
351
|
+
lines.push(`- All Passed: ${bundle.allPassed ? "YES ✅" : "NO ❌"}`);
|
|
352
|
+
|
|
353
|
+
if (bundle.stoppedAt) {
|
|
354
|
+
lines.push(`- Stopped At: Phase ${bundle.stoppedAt}`);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
lines.push("");
|
|
358
|
+
lines.push("## VERIFICATION");
|
|
359
|
+
lines.push(bundle.allPassed ? "**PASSED** - All gates green ✅" : "**FAILED** - One or more gates red ❌");
|
|
360
|
+
|
|
361
|
+
return lines.join("\n");
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function truncate(str: string, maxLen: number): string {
|
|
365
|
+
if (str.length <= maxLen) return str;
|
|
366
|
+
return str.slice(0, maxLen - 3) + "...";
|
|
367
|
+
}
|
|
@@ -204,6 +204,11 @@ export const TeamToolParams = Type.Object({
|
|
|
204
204
|
once: Type.Optional(
|
|
205
205
|
Type.Union([Type.String(), Type.Number()], { description: "ISO timestamp or epoch ms for a one-time scheduled run." }),
|
|
206
206
|
),
|
|
207
|
+
excludeContextBash: Type.Optional(
|
|
208
|
+
Type.Boolean({
|
|
209
|
+
description: "Mark certain bash commands as excludeFromContext to reduce context tokens (default: false).",
|
|
210
|
+
}),
|
|
211
|
+
),
|
|
207
212
|
});
|
|
208
213
|
|
|
209
214
|
export interface TeamToolParamsValue {
|
|
@@ -287,4 +292,6 @@ export interface TeamToolParamsValue {
|
|
|
287
292
|
cron?: string;
|
|
288
293
|
interval?: number;
|
|
289
294
|
once?: string | number;
|
|
295
|
+
/** Mark certain bash commands as excludeFromContext to reduce context tokens (default: false). */
|
|
296
|
+
excludeContextBash?: boolean;
|
|
290
297
|
}
|
|
@@ -107,7 +107,7 @@ export function appendEntry(runId: string, entry: RolloutEntry): RolloutEntry {
|
|
|
107
107
|
// Get existing entries to compute coherence
|
|
108
108
|
const ledger = getLedger(runId);
|
|
109
109
|
|
|
110
|
-
// Compute coherence
|
|
110
|
+
// Compute coherence
|
|
111
111
|
const coherenceMark = computeCoherence(entry, ledger);
|
|
112
112
|
const entryWithCoherence: RolloutEntry = {
|
|
113
113
|
...entry,
|
|
@@ -218,42 +218,76 @@ export function summarizeLedger(runId: string): string {
|
|
|
218
218
|
return lines.join("\n");
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
+
/**
|
|
222
|
+
* Override the coherence mark of the last entry in the ledger.
|
|
223
|
+
* FIX: This preserves all previous entries while updating just the last one.
|
|
224
|
+
* Previously this would truncate the entire ledger!
|
|
225
|
+
*/
|
|
226
|
+
function overrideLastEntry(runId: string, coherenceMark: import("./types.js").CoherenceMark): RolloutEntry {
|
|
227
|
+
const ledger = getLedger(runId);
|
|
228
|
+
if (ledger.length === 0) {
|
|
229
|
+
throw new Error(`No ledger entries found for run ${runId}`);
|
|
230
|
+
}
|
|
231
|
+
// Update the last entry with the new coherence mark
|
|
232
|
+
const lastIndex = ledger.length - 1;
|
|
233
|
+
ledger[lastIndex] = { ...ledger[lastIndex], coherenceMark };
|
|
234
|
+
// Rewrite entire ledger to preserve all entries
|
|
235
|
+
const ledgerPath = getLedgerPath(runId);
|
|
236
|
+
writeFileSync(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
|
|
237
|
+
return ledger[lastIndex];
|
|
238
|
+
}
|
|
239
|
+
|
|
221
240
|
/**
|
|
222
241
|
* Promote a candidate by marking it as accepted with proper coherence.
|
|
223
242
|
*/
|
|
224
243
|
export function promoteCandidate(runId: string, candidate: string): RolloutEntry {
|
|
225
244
|
const latestDecision = getLatestDecision(runId);
|
|
226
245
|
|
|
227
|
-
|
|
246
|
+
// Get existing entries to compute proper coherence
|
|
247
|
+
const ledger = getLedger(runId);
|
|
248
|
+
|
|
249
|
+
// Create entry without coherence first
|
|
250
|
+
const entryWithoutCoherence = {
|
|
228
251
|
rolloutId: `promote-${Date.now()}`,
|
|
229
252
|
timestamp: new Date().toISOString(),
|
|
230
253
|
priorWinner: latestDecision?.topCandidates[0],
|
|
231
254
|
searchSpace: latestDecision?.searchSpace || "unknown",
|
|
232
255
|
trialCount: (latestDecision?.trialCount || 0) + 1,
|
|
233
256
|
topCandidates: [candidate],
|
|
234
|
-
decisionMark: "accept",
|
|
235
|
-
coherenceMark: {
|
|
236
|
-
matchesPrior: false,
|
|
237
|
-
matchesRecursive: false,
|
|
238
|
-
promotionAllowed: true,
|
|
239
|
-
reason: "Manual promotion by user",
|
|
240
|
-
},
|
|
257
|
+
decisionMark: "accept" as const,
|
|
241
258
|
};
|
|
242
259
|
|
|
243
|
-
//
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
260
|
+
// Compute coherence (empty ledger = no matches)
|
|
261
|
+
const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
|
|
262
|
+
|
|
263
|
+
// Manual promotion always allows further promotion
|
|
264
|
+
coherenceMark.promotionAllowed = true;
|
|
265
|
+
coherenceMark.reason = "Manual promotion - promotion allowed";
|
|
266
|
+
|
|
267
|
+
// Create full entry with coherence
|
|
268
|
+
const entry: RolloutEntry = {
|
|
269
|
+
...entryWithoutCoherence,
|
|
270
|
+
coherenceMark,
|
|
250
271
|
};
|
|
251
|
-
|
|
252
|
-
//
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
272
|
+
|
|
273
|
+
// Update last entry in memory if there are existing entries
|
|
274
|
+
if (ledger.length > 0) {
|
|
275
|
+
const lastIndex = ledger.length - 1;
|
|
276
|
+
ledger[lastIndex] = entry;
|
|
277
|
+
} else {
|
|
278
|
+
// No existing entries - just write this one
|
|
279
|
+
ledger.push(entry);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Rewrite entire ledger to preserve all entries
|
|
283
|
+
const ledgerPath = getLedgerPath(runId);
|
|
284
|
+
const dir = dirname(ledgerPath);
|
|
285
|
+
if (!existsSync(dir)) {
|
|
286
|
+
mkdirSync(dir, { recursive: true });
|
|
287
|
+
}
|
|
288
|
+
writeFileSync(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
|
|
289
|
+
|
|
290
|
+
return entry;
|
|
257
291
|
}
|
|
258
292
|
|
|
259
293
|
/**
|
|
@@ -262,34 +296,49 @@ export function promoteCandidate(runId: string, candidate: string): RolloutEntry
|
|
|
262
296
|
export function decayCandidate(runId: string, candidate: string): RolloutEntry {
|
|
263
297
|
const latestDecision = getLatestDecision(runId);
|
|
264
298
|
|
|
265
|
-
|
|
299
|
+
// Get existing entries to compute proper coherence
|
|
300
|
+
const ledger = getLedger(runId);
|
|
301
|
+
|
|
302
|
+
// Create entry without coherence first
|
|
303
|
+
const entryWithoutCoherence = {
|
|
266
304
|
rolloutId: `decay-${Date.now()}`,
|
|
267
305
|
timestamp: new Date().toISOString(),
|
|
268
306
|
priorWinner: latestDecision?.topCandidates[0],
|
|
269
307
|
searchSpace: latestDecision?.searchSpace || "unknown",
|
|
270
308
|
trialCount: (latestDecision?.trialCount || 0) + 1,
|
|
271
309
|
topCandidates: [candidate],
|
|
272
|
-
decisionMark: "decay",
|
|
273
|
-
coherenceMark: {
|
|
274
|
-
matchesPrior: false,
|
|
275
|
-
matchesRecursive: false,
|
|
276
|
-
promotionAllowed: false,
|
|
277
|
-
reason: "Manual decay by user",
|
|
278
|
-
},
|
|
310
|
+
decisionMark: "decay" as const,
|
|
279
311
|
};
|
|
280
312
|
|
|
281
|
-
//
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
313
|
+
// Compute coherence (empty ledger = no matches)
|
|
314
|
+
const coherenceMark = computeCoherence(entryWithoutCoherence as RolloutEntry, ledger);
|
|
315
|
+
|
|
316
|
+
// Manual decay never allows promotion
|
|
317
|
+
coherenceMark.promotionAllowed = false;
|
|
318
|
+
coherenceMark.reason = "Manual decay - promotion not allowed";
|
|
319
|
+
|
|
320
|
+
// Create full entry with coherence
|
|
321
|
+
const entry: RolloutEntry = {
|
|
322
|
+
...entryWithoutCoherence,
|
|
323
|
+
coherenceMark,
|
|
288
324
|
};
|
|
289
|
-
|
|
290
|
-
//
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
325
|
+
|
|
326
|
+
// Update last entry in memory if there are existing entries
|
|
327
|
+
if (ledger.length > 0) {
|
|
328
|
+
const lastIndex = ledger.length - 1;
|
|
329
|
+
ledger[lastIndex] = entry;
|
|
330
|
+
} else {
|
|
331
|
+
// No existing entries - just write this one
|
|
332
|
+
ledger.push(entry);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Rewrite entire ledger to preserve all entries
|
|
336
|
+
const ledgerPath = getLedgerPath(runId);
|
|
337
|
+
const dir = dirname(ledgerPath);
|
|
338
|
+
if (!existsSync(dir)) {
|
|
339
|
+
mkdirSync(dir, { recursive: true });
|
|
340
|
+
}
|
|
341
|
+
writeFileSync(ledgerPath, ledger.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
|
|
342
|
+
|
|
343
|
+
return entry;
|
|
295
344
|
}
|