pi-crew 0.2.20 → 0.2.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -10
- package/README.md +4 -2
- package/docs/PROJECT_REVIEW.md +271 -0
- package/docs/PROJECT_REVIEW_FIXES.md +343 -0
- package/docs/PROJECT_REVIEW_ROUND4.md +156 -0
- package/docs/PROJECT_REVIEW_ROUND5.md +86 -0
- package/docs/fixes/BATCH_A_H1_H2.md +86 -0
- package/docs/fixes/bug-006-foreground-cancel-concurrent.md +78 -0
- package/docs/fixes/bug-007-async-notifier-stale-ctx.md +112 -0
- package/docs/fixes/bug-008-child-process-silent-timeout.md +100 -0
- package/docs/fixes/bug-009-executor-yield-limit-needs-attention.md +75 -0
- package/docs/fixes/bug-010-child-process-api-key-filtered.md +109 -0
- package/docs/fixes/bug-011-spawn-pi-enoent.md +92 -0
- package/docs/fixes/bug-012-essential-env-stripped.md +89 -0
- package/docs/fixes/bug-013-background-runner-death.md +84 -0
- package/docs/fixes/bug-014-infinite-retry-loop-needs-attention.md +82 -0
- package/docs/fixes/bug-015-background-runner-sigterm.md +65 -0
- package/docs/fixes/bug-017-background-runner-session-shutdown.md +66 -0
- package/docs/fixes/bug-017-background-runner-sigkill-double-fork.md +28 -0
- package/docs/fixes/bug-018-child-pi-worker-stdin-hang.md +61 -0
- package/docs/fixes/bug-019-phantom-runs-temp-workspace.md +52 -0
- package/docs/pi-crew-bugs.md +954 -0
- package/docs/pi-crew-investigation-report.md +411 -0
- package/docs/pi-crew-test-final.md +120 -0
- package/docs/pi-crew-test-results.md +260 -0
- package/docs/pi-crew-test-round2.md +136 -0
- package/docs/pi-crew-test-round4.md +100 -0
- package/docs/pi-crew-test-round5.md +70 -0
- package/docs/pi-crew-test-round6.md +110 -0
- package/docs/usage.md +14 -0
- package/package.json +4 -2
- package/src/adapters/export-util.ts +12 -6
- package/src/agents/agent-config.ts +2 -0
- package/src/config/defaults.ts +1 -1
- package/src/config/markers.ts +22 -17
- package/src/config/resilient-parser.ts +1 -1
- package/src/extension/async-notifier.ts +4 -2
- package/src/extension/management.ts +52 -0
- package/src/extension/register.ts +47 -10
- package/src/extension/run-index.ts +20 -2
- package/src/extension/run-maintenance.ts +2 -2
- package/src/extension/team-tool/parallel-dispatch.ts +1 -1
- package/src/extension/team-tool/run.ts +3 -6
- package/src/extension/team-tool.ts +67 -11
- package/src/observability/event-to-metric.ts +2 -1
- package/src/runtime/async-runner.ts +42 -34
- package/src/runtime/background-runner.ts +165 -7
- package/src/runtime/child-pi.ts +111 -18
- package/src/runtime/code-summary.ts +1 -1
- package/src/runtime/crash-recovery.ts +1 -1
- package/src/runtime/crew-agent-runtime.ts +2 -1
- package/src/runtime/heartbeat-watcher.ts +4 -0
- package/src/runtime/live-agent-manager.ts +1 -1
- package/src/runtime/live-session-runtime.ts +2 -1
- package/src/runtime/manifest-cache.ts +2 -2
- package/src/runtime/model-fallback.ts +2 -1
- package/src/runtime/phase-progress.ts +1 -1
- package/src/runtime/pi-args.ts +3 -1
- package/src/runtime/pi-spawn.ts +6 -0
- package/src/runtime/prose-compressor.ts +1 -1
- package/src/runtime/result-extractor.ts +0 -1
- package/src/runtime/retry-executor.ts +1 -1
- package/src/runtime/runtime-resolver.ts +1 -1
- package/src/runtime/skill-instructions.ts +0 -1
- package/src/runtime/stale-reconciler.ts +30 -3
- package/src/runtime/subagent-manager.ts +2 -0
- package/src/runtime/task-display.ts +1 -1
- package/src/runtime/task-graph-scheduler.ts +1 -1
- package/src/runtime/task-runner/tail-read.ts +26 -0
- package/src/runtime/task-runner.ts +1007 -383
- package/src/runtime/team-runner.ts +9 -5
- package/src/runtime/worker-startup.ts +3 -1
- package/src/schema/team-tool-schema.ts +2 -1
- package/src/state/active-run-registry.ts +8 -2
- package/src/state/atomic-write.ts +17 -0
- package/src/state/contracts.ts +5 -2
- package/src/state/event-log-rotation.ts +118 -31
- package/src/state/event-log.ts +33 -5
- package/src/state/event-reconstructor.ts +4 -2
- package/src/state/mailbox.ts +5 -1
- package/src/state/schedule.ts +146 -0
- package/src/state/types.ts +40 -0
- package/src/state/usage.ts +20 -0
- package/src/ui/crew-widget.ts +2 -2
- package/src/ui/run-event-bus.ts +1 -1
- package/src/ui/run-snapshot-cache.ts +2 -1
- package/src/ui/snapshot-types.ts +1 -0
- package/src/utils/gh-protocol.ts +2 -2
- package/src/utils/names.ts +1 -1
- package/src/utils/sse-parser.ts +0 -2
- package/src/worktree/branch-freshness.ts +1 -1
- package/src/worktree/cleanup.ts +54 -14
- package/src/worktree/worktree-manager.ts +19 -9
|
@@ -101,7 +101,9 @@ function shouldMergeTaskUpdate(current: TeamTaskState, updated: TeamTaskState):
|
|
|
101
101
|
return updated.status !== current.status || updated.finishedAt !== current.finishedAt || updated.startedAt !== current.startedAt || Boolean(updated.resultArtifact) || Boolean(updated.error) || Boolean(updated.modelAttempts?.length) || Boolean(updated.usage) || Boolean(updated.attempts?.length);
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
// H4 fix: rename to descriptive name. Kept __test__ as alias for backward
|
|
105
|
+
// compat test imports.
|
|
106
|
+
export function mergeTaskUpdatesPreservingTerminal(base: TeamTaskState[], results: Array<{ tasks: TeamTaskState[] }>): TeamTaskState[] {
|
|
105
107
|
let merged = base;
|
|
106
108
|
for (const result of results) {
|
|
107
109
|
for (const updated of result.tasks) {
|
|
@@ -112,6 +114,8 @@ export function __test__mergeTaskUpdates(base: TeamTaskState[], results: Array<{
|
|
|
112
114
|
}
|
|
113
115
|
return refreshTaskGraphQueues(merged);
|
|
114
116
|
}
|
|
117
|
+
/** @deprecated Use mergeTaskUpdatesPreservingTerminal. Kept for backward test import compat. */
|
|
118
|
+
export const __test__mergeTaskUpdates = mergeTaskUpdatesPreservingTerminal;
|
|
115
119
|
|
|
116
120
|
// 2.8: adaptive-plan parsing/repair/injection moved to src/runtime/adaptive-plan.ts.
|
|
117
121
|
// Re-export the test-only helpers so existing test imports still resolve.
|
|
@@ -260,10 +264,10 @@ function dagReadyTaskIds(tasks: TeamTaskState[], completedIds: Set<string>): str
|
|
|
260
264
|
}
|
|
261
265
|
|
|
262
266
|
export async function executeTeamRun(input: ExecuteTeamRunInput): Promise<{ manifest: TeamRunManifest; tasks: TeamTaskState[] }> {
|
|
263
|
-
|
|
267
|
+
const workflow = input.workflow;
|
|
264
268
|
let manifest = updateRunStatus(input.manifest, "running", input.executeWorkers ? "Executing team workflow." : "Creating workflow prompts and placeholder results.");
|
|
265
269
|
|
|
266
|
-
|
|
270
|
+
void registerRunPromise(manifest.runId);
|
|
267
271
|
|
|
268
272
|
const cleanupUsage = (): void => {
|
|
269
273
|
for (const task of input.tasks) clearTrackedTaskUsage(task.id);
|
|
@@ -541,10 +545,10 @@ async function executeTeamRunCore(
|
|
|
541
545
|
);
|
|
542
546
|
if (results.length === 0) break;
|
|
543
547
|
manifest = { ...results.at(-1)!.manifest, artifacts: mergeArtifacts([manifest.artifacts, ...results.map((item) => item.manifest.artifacts)].flat()) };
|
|
544
|
-
tasks =
|
|
548
|
+
tasks = mergeTaskUpdatesPreservingTerminal(tasks, results);
|
|
545
549
|
|
|
546
550
|
// Advance workflow phases whose tasks are all in terminal state
|
|
547
|
-
const terminalStatuses = new Set(["completed", "failed", "skipped", "cancelled"]);
|
|
551
|
+
const terminalStatuses = new Set(["completed", "failed", "skipped", "cancelled", "needs_attention"]);
|
|
548
552
|
const phaseTaskMap = new Map<string, string[]>();
|
|
549
553
|
for (const task of tasks) {
|
|
550
554
|
if (!task.stepId) continue;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export type WorkerLifecycleState = "spawning" | "trust_required" | "ready_for_prompt" | "running" | "finished" | "failed";
|
|
2
|
-
export type StartupFailureClassification = "trust_required" | "prompt_misdelivery" | "prompt_acceptance_timeout" | "transport_dead" | "worker_crashed" | "unknown";
|
|
2
|
+
export type StartupFailureClassification = "trust_required" | "prompt_misdelivery" | "prompt_acceptance_timeout" | "transport_dead" | "worker_crashed" | "rate_limited" | "provider_error" | "unknown";
|
|
3
3
|
|
|
4
4
|
export interface WorkerStartupEvidence {
|
|
5
5
|
lastLifecycleState: WorkerLifecycleState;
|
|
@@ -20,6 +20,8 @@ export function detectTrustPrompt(text: string): boolean {
|
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
export function classifyStartupFailure(evidence: Omit<WorkerStartupEvidence, "classification">): StartupFailureClassification {
|
|
23
|
+
if (evidence.stderrPreview && /429|rate.?limit/i.test(evidence.stderrPreview)) return "rate_limited";
|
|
24
|
+
if (evidence.stderrPreview && /5\d{2}|server.?error|internal.?error|provider.?error/i.test(evidence.stderrPreview)) return "provider_error";
|
|
23
25
|
if (!evidence.transportHealthy) return "transport_dead";
|
|
24
26
|
if (evidence.trustPromptDetected || evidence.lastLifecycleState === "trust_required") return "trust_required";
|
|
25
27
|
if (evidence.promptSentAt && !evidence.promptAccepted && evidence.childProcessAlive) return "prompt_acceptance_timeout";
|
|
@@ -49,6 +49,7 @@ export const TeamToolParams = Type.Object({
|
|
|
49
49
|
Type.Literal("autonomy"),
|
|
50
50
|
Type.Literal("api"),
|
|
51
51
|
Type.Literal("settings"),
|
|
52
|
+
Type.Literal("steer"),
|
|
52
53
|
], { description: "Team action. Defaults to 'list' when omitted." })),
|
|
53
54
|
resource: Type.Optional(Type.Union([
|
|
54
55
|
Type.Literal("agent"),
|
|
@@ -93,7 +94,7 @@ export const TeamToolParams = Type.Object({
|
|
|
93
94
|
});
|
|
94
95
|
|
|
95
96
|
export interface TeamToolParamsValue {
|
|
96
|
-
action?: "run" | "parallel" | "plan" | "status" | "list" | "get" | "cancel" | "retry" | "resume" | "respond" | "create" | "update" | "delete" | "doctor" | "cleanup" | "events" | "artifacts" | "worktrees" | "forget" | "summary" | "prune" | "export" | "import" | "imports" | "help" | "validate" | "config" | "init" | "recommend" | "autonomy" | "api" | "settings";
|
|
97
|
+
action?: "run" | "parallel" | "plan" | "status" | "list" | "get" | "cancel" | "retry" | "resume" | "respond" | "create" | "update" | "delete" | "doctor" | "cleanup" | "events" | "artifacts" | "worktrees" | "forget" | "summary" | "prune" | "export" | "import" | "imports" | "help" | "validate" | "config" | "init" | "recommend" | "autonomy" | "api" | "settings" | "steer";
|
|
97
98
|
resource?: "agent" | "team" | "workflow";
|
|
98
99
|
team?: string;
|
|
99
100
|
workflow?: string;
|
|
@@ -121,7 +121,7 @@ export function readActiveRunRegistry(maxEntries = DEFAULT_CACHE.manifestMaxEntr
|
|
|
121
121
|
}
|
|
122
122
|
const entries = Array.isArray(parsed) ? parsed.map(normalizeEntry).filter((entry): entry is ActiveRunRegistryEntry => entry !== undefined) : [];
|
|
123
123
|
const byId = new Map<string, ActiveRunRegistryEntry>();
|
|
124
|
-
for (const entry of entries.sort((a, b) => b.updatedAt.localeCompare(a.updatedAt))) {
|
|
124
|
+
for (const entry of entries.sort((a, b) => (b.updatedAt ?? "").localeCompare(a.updatedAt ?? ""))) {
|
|
125
125
|
if (!byId.has(entry.runId)) byId.set(entry.runId, entry);
|
|
126
126
|
}
|
|
127
127
|
return [...byId.values()].slice(0, Math.max(0, maxEntries));
|
|
@@ -157,12 +157,18 @@ function filterAliveEntries(entries: ActiveRunRegistryEntry[]): ActiveRunRegistr
|
|
|
157
157
|
return false;
|
|
158
158
|
}
|
|
159
159
|
try {
|
|
160
|
-
const raw = JSON.parse(fs.readFileSync(entry.manifestPath, "utf-8")) as { status?: string; async?: { pid?: number } };
|
|
160
|
+
const raw = JSON.parse(fs.readFileSync(entry.manifestPath, "utf-8")) as { status?: string; async?: { pid?: number }; updatedAt?: string };
|
|
161
161
|
if (TERMINAL_STATUSES.has(raw.status ?? "")) return false;
|
|
162
162
|
// Dead PID = stale async run
|
|
163
163
|
if (raw.async?.pid) {
|
|
164
164
|
try { process.kill(raw.async.pid, 0); } catch { return false; }
|
|
165
165
|
}
|
|
166
|
+
// 2.19 — Stale non-async run: live-session/scaffold runs older than 30 min
|
|
167
|
+
// Without this, test runs that crash/leak would stay in the registry forever.
|
|
168
|
+
if (!raw.async) {
|
|
169
|
+
const updatedAt = typeof raw.updatedAt === 'string' ? Date.parse(raw.updatedAt) : NaN;
|
|
170
|
+
if (Number.isFinite(updatedAt) && Date.now() - updatedAt > 30 * 60 * 1000) return false;
|
|
171
|
+
}
|
|
166
172
|
} catch {
|
|
167
173
|
return false;
|
|
168
174
|
}
|
|
@@ -114,6 +114,23 @@ export function atomicWriteFile(filePath: string, content: string): void {
|
|
|
114
114
|
try {
|
|
115
115
|
renameWithRetry(tempPath, filePath);
|
|
116
116
|
} catch (renameError) {
|
|
117
|
+
// H3 fix: re-check symlink safety before fallback.
|
|
118
|
+
// Between isSymlinkSafePath at top and rename attempt, the file
|
|
119
|
+
// could have been replaced with a symlink (TOCTOU). Refuse if so.
|
|
120
|
+
try {
|
|
121
|
+
const lstat = fs.lstatSync(filePath);
|
|
122
|
+
if (lstat.isSymbolicLink()) {
|
|
123
|
+
try { fs.rmSync(tempPath, { force: true }); } catch { /* best-effort */ }
|
|
124
|
+
throw renameError;
|
|
125
|
+
}
|
|
126
|
+
} catch (checkError) {
|
|
127
|
+
// Only ENOENT / ENOTDIR means the file genuinely doesn't exist — safe to proceed.
|
|
128
|
+
// Re-throw everything else (EACCES, EPERM, EBUSY, etc.)
|
|
129
|
+
const code = (checkError as NodeJS.ErrnoException).code;
|
|
130
|
+
if (code !== "ENOENT" && code !== "ENOTDIR") {
|
|
131
|
+
throw checkError;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
117
134
|
// Fallback: if rename fails (Windows EPERM/EBUSY), try direct write.
|
|
118
135
|
// This is less atomic but avoids data loss when concurrent writers contend.
|
|
119
136
|
try {
|
package/src/state/contracts.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
export const TEAM_RUN_STATUSES = ["queued", "planning", "running", "blocked", "completed", "failed", "cancelled"] as const;
|
|
2
2
|
export type TeamRunStatus = typeof TEAM_RUN_STATUSES[number];
|
|
3
3
|
|
|
4
|
-
export const TEAM_TASK_STATUSES = ["queued", "running", "waiting", "completed", "failed", "cancelled", "skipped"] as const;
|
|
4
|
+
export const TEAM_TASK_STATUSES = ["queued", "running", "waiting", "completed", "failed", "cancelled", "skipped", "needs_attention"] as const;
|
|
5
5
|
export type TeamTaskStatus = typeof TEAM_TASK_STATUSES[number];
|
|
6
6
|
|
|
7
7
|
export const TEAM_TERMINAL_RUN_STATUSES: ReadonlySet<TeamRunStatus> = new Set(["blocked", "completed", "failed", "cancelled"]);
|
|
8
|
-
export const TEAM_TERMINAL_TASK_STATUSES: ReadonlySet<TeamTaskStatus> = new Set(["completed", "failed", "cancelled", "skipped"]);
|
|
8
|
+
export const TEAM_TERMINAL_TASK_STATUSES: ReadonlySet<TeamTaskStatus> = new Set(["completed", "failed", "cancelled", "skipped", "needs_attention"]);
|
|
9
9
|
|
|
10
10
|
export const TEAM_RUN_STATUS_TRANSITIONS: Readonly<Record<TeamRunStatus, readonly TeamRunStatus[]>> = {
|
|
11
11
|
queued: ["planning", "running", "cancelled", "failed"],
|
|
@@ -25,6 +25,7 @@ export const TEAM_TASK_STATUS_TRANSITIONS: Readonly<Record<TeamTaskStatus, reado
|
|
|
25
25
|
failed: ["queued", "cancelled"],
|
|
26
26
|
cancelled: ["queued"],
|
|
27
27
|
skipped: ["queued", "cancelled"],
|
|
28
|
+
needs_attention: ["queued", "running"],
|
|
28
29
|
};
|
|
29
30
|
|
|
30
31
|
export const TEAM_EVENT_TYPES = [
|
|
@@ -46,6 +47,7 @@ export const TEAM_EVENT_TYPES = [
|
|
|
46
47
|
"task.failed",
|
|
47
48
|
"task.cancelled",
|
|
48
49
|
"task.skipped",
|
|
50
|
+
"task.needs_attention",
|
|
49
51
|
"review.approved",
|
|
50
52
|
"review.rejected",
|
|
51
53
|
"policy.action",
|
|
@@ -77,6 +79,7 @@ export const TEAM_WAKEABLE_EVENT_TYPES: ReadonlySet<TeamEventType> = new Set([
|
|
|
77
79
|
"task.failed",
|
|
78
80
|
"task.cancelled",
|
|
79
81
|
"task.skipped",
|
|
82
|
+
"task.needs_attention",
|
|
80
83
|
"async.completed",
|
|
81
84
|
"async.failed",
|
|
82
85
|
"async.stale",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import { readEvents } from "./event-log.ts";
|
|
3
3
|
import { atomicWriteFile } from "./atomic-write.ts";
|
|
4
|
+
import { logInternalError } from "../utils/internal-error.ts";
|
|
4
5
|
|
|
5
6
|
export interface RotationConfig {
|
|
6
7
|
maxFileSizeBytes: number;
|
|
@@ -77,24 +78,40 @@ export function compactEventLog(eventsPath: string, config?: Partial<RotationCon
|
|
|
77
78
|
// Concurrent write conflict — skip compaction this cycle
|
|
78
79
|
return undefined;
|
|
79
80
|
}
|
|
80
|
-
// C2: Re-read to recover any events appended
|
|
81
|
+
// C2: Re-read to recover any events appended during the compaction window.
|
|
82
|
+
// If events were appended and then overwritten by atomicWriteFile, they are LOST.
|
|
83
|
+
// Detect this and re-append any missing events.
|
|
81
84
|
try {
|
|
82
85
|
const afterWrite = readEvents(eventsPath);
|
|
83
|
-
if (afterWrite.length > kept.length) {
|
|
84
|
-
// Events were appended during the window — they're already in the file,
|
|
85
|
-
// no data loss occurred since atomicWriteFile preserves appends after its write point
|
|
86
|
-
}
|
|
87
86
|
const appendedDuringWindow = afterWrite.length - kept.length;
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
87
|
+
if (appendedDuringWindow >= 0) {
|
|
88
|
+
// No data loss — either events were appended and kept, or nothing happened.
|
|
89
|
+
return {
|
|
91
90
|
originalSize,
|
|
92
|
-
compactedSize,
|
|
93
|
-
eventsRemoved: originalCount
|
|
94
|
-
eventsKept,
|
|
91
|
+
compactedSize: fs.statSync(eventsPath).size,
|
|
92
|
+
eventsRemoved: originalCount - kept.length,
|
|
93
|
+
eventsKept: kept.length + Math.max(0, appendedDuringWindow),
|
|
95
94
|
};
|
|
95
|
+
}
|
|
96
|
+
// afterWrite.length < kept.length — events were lost during compaction window.
|
|
97
|
+
// Find missing events and re-append them.
|
|
98
|
+
const afterSet = new Set(afterWrite.map((e) => JSON.stringify(e)));
|
|
99
|
+
const missingEvents = kept.filter((e) => !afterSet.has(JSON.stringify(e)));
|
|
100
|
+
for (const event of missingEvents) {
|
|
101
|
+
try {
|
|
102
|
+
fs.appendFileSync(eventsPath, JSON.stringify(event) + "\n", "utf-8");
|
|
103
|
+
} catch {
|
|
104
|
+
// Append failed — log but don't throw.
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
originalSize,
|
|
109
|
+
compactedSize: fs.statSync(eventsPath).size,
|
|
110
|
+
eventsRemoved: originalCount - kept.length,
|
|
111
|
+
eventsKept: kept.length,
|
|
112
|
+
};
|
|
96
113
|
} catch {
|
|
97
|
-
// Post-write verification failed
|
|
114
|
+
// Post-write verification failed — compaction likely succeeded.
|
|
98
115
|
const compactedSize = fs.statSync(eventsPath).size;
|
|
99
116
|
return {
|
|
100
117
|
originalSize,
|
|
@@ -105,6 +122,27 @@ export function compactEventLog(eventsPath: string, config?: Partial<RotationCon
|
|
|
105
122
|
}
|
|
106
123
|
}
|
|
107
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Rotate an event log file by archiving it with a timestamp.
|
|
127
|
+
* The current file is renamed to `<eventsPath>.<timestamp>.archive.jsonl`
|
|
128
|
+
* and a fresh empty file is created in its place.
|
|
129
|
+
* Readers using `readEvents` will see the new file; archived files can be
|
|
130
|
+
* picked up by snapshot replay if needed.
|
|
131
|
+
*/
|
|
132
|
+
export function rotateEventLog(eventsPath: string): boolean {
|
|
133
|
+
if (!fs.existsSync(eventsPath)) return false;
|
|
134
|
+
try {
|
|
135
|
+
const ts = new Date().toISOString().replace(/[:.]/g, "-");
|
|
136
|
+
const archivePath = `${eventsPath}.${ts}.archive.jsonl`;
|
|
137
|
+
fs.renameSync(eventsPath, archivePath);
|
|
138
|
+
fs.writeFileSync(eventsPath, "", "utf-8");
|
|
139
|
+
return true;
|
|
140
|
+
} catch (error) {
|
|
141
|
+
logInternalError("event-log.rotate", error, `eventsPath=${eventsPath}`);
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
108
146
|
export interface EventLogStats {
|
|
109
147
|
fileSizeBytes: number;
|
|
110
148
|
eventCount: number;
|
|
@@ -125,29 +163,77 @@ export function getEventLogStats(eventsPath: string): EventLogStats | undefined
|
|
|
125
163
|
return { fileSizeBytes: 0, eventCount: 0 };
|
|
126
164
|
}
|
|
127
165
|
|
|
128
|
-
//
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
const
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
166
|
+
// NEW-9 fix: stream-scan for line count (no full-file load).
|
|
167
|
+
// Read last up-to-1KB for newest timestamp.
|
|
168
|
+
let newestTimestamp: string | undefined;
|
|
169
|
+
let lastLine = "";
|
|
170
|
+
const tailSize = Math.min(fileSizeBytes, 1024);
|
|
171
|
+
{
|
|
172
|
+
const tailBuf = Buffer.alloc(tailSize);
|
|
173
|
+
const fd = fs.openSync(eventsPath, "r");
|
|
174
|
+
try {
|
|
175
|
+
fs.readSync(fd, tailBuf, 0, tailSize, fileSizeBytes - tailSize);
|
|
176
|
+
} finally {
|
|
177
|
+
fs.closeSync(fd);
|
|
139
178
|
}
|
|
140
|
-
|
|
179
|
+
const tailStr = tailBuf.toString("utf-8");
|
|
180
|
+
// JSONL files end with "\n", so the last newline bounds an empty string.
|
|
181
|
+
// Walk backwards to find the last non-empty line.
|
|
182
|
+
let searchFrom = tailStr.length;
|
|
183
|
+
for (;;) {
|
|
184
|
+
const nl = tailStr.lastIndexOf("\n", searchFrom - 1);
|
|
185
|
+
if (nl < 0) { lastLine = tailStr.trim(); break; }
|
|
186
|
+
const candidate = tailStr.slice(nl + 1, searchFrom).trim();
|
|
187
|
+
if (candidate) { lastLine = candidate; break; }
|
|
188
|
+
searchFrom = nl;
|
|
189
|
+
}
|
|
190
|
+
try {
|
|
191
|
+
if (lastLine) {
|
|
192
|
+
newestTimestamp = (JSON.parse(lastLine) as { time: string }).time;
|
|
193
|
+
}
|
|
194
|
+
} catch { /* corrupt tail */ }
|
|
195
|
+
}
|
|
141
196
|
|
|
142
|
-
//
|
|
143
|
-
let
|
|
197
|
+
// Stream-scan to count newlines and find first line boundary.
|
|
198
|
+
let eventCount = 0;
|
|
199
|
+
let firstLineBytes = 0;
|
|
200
|
+
const buf = Buffer.alloc(8192);
|
|
201
|
+
let offset = 0;
|
|
202
|
+
let newlineCount = 0;
|
|
203
|
+
const scanFd = fs.openSync(eventsPath, "r");
|
|
144
204
|
try {
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
205
|
+
let bytesRead: number;
|
|
206
|
+
while ((bytesRead = fs.readSync(scanFd, buf, 0, buf.length, offset)) > 0) {
|
|
207
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
208
|
+
if (buf[i] === 10) {
|
|
209
|
+
if (newlineCount === 0) firstLineBytes = offset + i + 1;
|
|
210
|
+
newlineCount++;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
offset += bytesRead;
|
|
214
|
+
}
|
|
215
|
+
} finally {
|
|
216
|
+
fs.closeSync(scanFd);
|
|
149
217
|
}
|
|
150
|
-
|
|
218
|
+
eventCount = newlineCount;
|
|
219
|
+
|
|
220
|
+
// Read first line for oldest timestamp.
|
|
221
|
+
let oldestTimestamp: string | undefined;
|
|
222
|
+
if (firstLineBytes > 0) {
|
|
223
|
+
try {
|
|
224
|
+
const firstBuf = Buffer.alloc(firstLineBytes);
|
|
225
|
+
const fd = fs.openSync(eventsPath, "r");
|
|
226
|
+
try {
|
|
227
|
+
fs.readSync(fd, firstBuf, 0, firstLineBytes, 0);
|
|
228
|
+
} finally {
|
|
229
|
+
fs.closeSync(fd);
|
|
230
|
+
}
|
|
231
|
+
const firstLine = firstBuf.toString("utf-8").trim();
|
|
232
|
+
if (firstLine) {
|
|
233
|
+
oldestTimestamp = (JSON.parse(firstLine) as { time: string }).time;
|
|
234
|
+
}
|
|
235
|
+
} catch { /* corrupt head */ }
|
|
236
|
+
}
|
|
151
237
|
|
|
152
238
|
return {
|
|
153
239
|
fileSizeBytes,
|
|
@@ -159,3 +245,4 @@ export function getEventLogStats(eventsPath: string): EventLogStats | undefined
|
|
|
159
245
|
return undefined;
|
|
160
246
|
}
|
|
161
247
|
}
|
|
248
|
+
|
package/src/state/event-log.ts
CHANGED
|
@@ -8,7 +8,7 @@ import { logInternalError } from "../utils/internal-error.ts";
|
|
|
8
8
|
import { readJsonlSince, type IncrementalReadState } from "../utils/incremental-reader.ts";
|
|
9
9
|
import { redactSecrets } from "../utils/redaction.ts";
|
|
10
10
|
import { sleepSync } from "../utils/sleep.ts";
|
|
11
|
-
import { needsRotation, compactEventLog } from "./event-log-rotation.ts";
|
|
11
|
+
import { needsRotation, compactEventLog, rotateEventLog } from "./event-log-rotation.ts";
|
|
12
12
|
|
|
13
13
|
export type TeamEventProvenance = "live_worker" | "test" | "healthcheck" | "replay" | "api" | "background" | "team_runner";
|
|
14
14
|
export type TeamWatcherAction = "act" | "observe" | "ignore";
|
|
@@ -64,7 +64,7 @@ let appendCounter = 0;
|
|
|
64
64
|
/** Simple cross-process lock for an eventsPath to prevent JSONL interleave on concurrent append.
|
|
65
65
|
* Detects stale locks by checking the owner PID written inside the lock directory.
|
|
66
66
|
*/
|
|
67
|
-
function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
|
|
67
|
+
export function withEventLogLockSync<T>(eventsPath: string, fn: () => T): T {
|
|
68
68
|
const lockDir = `${eventsPath}.lock`;
|
|
69
69
|
const pidFile = path.join(lockDir, "pid");
|
|
70
70
|
const start = Date.now();
|
|
@@ -208,15 +208,43 @@ function appendEventInsideLock(eventsPath: string, event: AppendTeamEvent): Team
|
|
|
208
208
|
metadata = { ...metadata, fingerprint: baseMetadata?.fingerprint ?? computeEventFingerprint(fullEvent) };
|
|
209
209
|
fullEvent.metadata = metadata;
|
|
210
210
|
}
|
|
211
|
+
// H1 fix: handle overflow before appending.
|
|
212
|
+
// 1. Terminal events must always be persisted regardless of size.
|
|
213
|
+
// 2. Non-terminal events exceeding MAX_EVENTS_BYTES trigger immediate compact.
|
|
214
|
+
// 3. After compact, if still over limit, rotate.
|
|
215
|
+
const isTerminal = TERMINAL_EVENT_TYPES.has(fullEvent.type);
|
|
216
|
+
let skippedDueToSize = false;
|
|
217
|
+
if (!isTerminal && fs.existsSync(eventsPath)) {
|
|
218
|
+
const stat = fs.statSync(eventsPath);
|
|
219
|
+
if (stat.size > MAX_EVENTS_BYTES) {
|
|
220
|
+
// Try immediate compact (not waiting for counter % 100)
|
|
221
|
+
try {
|
|
222
|
+
compactEventLog(eventsPath);
|
|
223
|
+
} catch (error) {
|
|
224
|
+
logInternalError("event-log.immediate-compact", error, `eventsPath=${eventsPath}`);
|
|
225
|
+
}
|
|
226
|
+
// Check if still too large after compact — if so, rotate
|
|
227
|
+
if (fs.existsSync(eventsPath)) {
|
|
228
|
+
const afterCompact = fs.statSync(eventsPath);
|
|
229
|
+
if (afterCompact.size > MAX_EVENTS_BYTES) {
|
|
230
|
+
rotateEventLog(eventsPath);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
211
235
|
try {
|
|
212
236
|
if (fs.existsSync(eventsPath) && fs.statSync(eventsPath).size > MAX_EVENTS_BYTES) {
|
|
213
|
-
|
|
214
|
-
|
|
237
|
+
// Only reach here for non-terminal events that still overflow after compact+rotate.
|
|
238
|
+
// Log and mark as not appended.
|
|
239
|
+
logInternalError("event-log.size-limit", new Error(`events file ${eventsPath} exceeds ${MAX_EVENTS_BYTES} bytes after compaction`), `eventsPath=${eventsPath}`);
|
|
240
|
+
skippedDueToSize = true;
|
|
215
241
|
}
|
|
216
242
|
} catch (error) {
|
|
217
243
|
logInternalError("event-log.size-check", error, `eventsPath=${eventsPath}`);
|
|
218
244
|
}
|
|
219
|
-
|
|
245
|
+
if (!skippedDueToSize) {
|
|
246
|
+
fs.appendFileSync(eventsPath, `${JSON.stringify(redactSecrets(fullEvent))}\n`, "utf-8");
|
|
247
|
+
}
|
|
220
248
|
appendCounter++;
|
|
221
249
|
if (appendCounter % 100 === 0 && needsRotation(eventsPath)) {
|
|
222
250
|
try { compactEventLog(eventsPath); } catch (error) { logInternalError("event-log.rotation", error, `eventsPath=${eventsPath}`); }
|
|
@@ -11,7 +11,7 @@ import type { TeamEvent } from "./event-log.ts";
|
|
|
11
11
|
import { readEvents } from "./event-log.ts";
|
|
12
12
|
|
|
13
13
|
/** Task status values that can be reconstructed from lifecycle events. */
|
|
14
|
-
const RECONSTRUCTABLE_STATUSES = new Set(["created", "queued", "running", "completed", "failed", "cancelled", "skipped", "waiting"]);
|
|
14
|
+
const RECONSTRUCTABLE_STATUSES = new Set(["created", "queued", "running", "completed", "failed", "cancelled", "skipped", "waiting", "needs_attention"]);
|
|
15
15
|
|
|
16
16
|
/** Event types that carry task lifecycle state transitions. */
|
|
17
17
|
const TASK_LIFECYCLE_EVENT_TYPES = new Set([
|
|
@@ -21,6 +21,7 @@ const TASK_LIFECYCLE_EVENT_TYPES = new Set([
|
|
|
21
21
|
"task.failed",
|
|
22
22
|
"task.skipped",
|
|
23
23
|
"task.cancelled",
|
|
24
|
+
"task.needs_attention",
|
|
24
25
|
"task.waiting",
|
|
25
26
|
"task.resumed",
|
|
26
27
|
"task.retried",
|
|
@@ -31,7 +32,7 @@ const TASK_LIFECYCLE_EVENT_TYPES = new Set([
|
|
|
31
32
|
]);
|
|
32
33
|
|
|
33
34
|
/** Terminal events that set finishedAt. */
|
|
34
|
-
const TERMINAL_EVENTS = new Set(["task.completed", "task.failed", "task.cancelled", "task.skipped"]);
|
|
35
|
+
const TERMINAL_EVENTS = new Set(["task.completed", "task.failed", "task.cancelled", "task.skipped", "task.needs_attention"]);
|
|
35
36
|
|
|
36
37
|
/** Mapping from event type to the reconstructed task status. */
|
|
37
38
|
const EVENT_STATUS_MAP: Readonly<Record<string, string>> = {
|
|
@@ -41,6 +42,7 @@ const EVENT_STATUS_MAP: Readonly<Record<string, string>> = {
|
|
|
41
42
|
"task.failed": "failed",
|
|
42
43
|
"task.skipped": "skipped",
|
|
43
44
|
"task.cancelled": "cancelled",
|
|
45
|
+
"task.needs_attention": "needs_attention",
|
|
44
46
|
"task.waiting": "waiting",
|
|
45
47
|
"task.resumed": "running",
|
|
46
48
|
"task.retried": "queued",
|
package/src/state/mailbox.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { resolveRealContainedPath } from "../utils/safe-paths.ts";
|
|
|
5
5
|
import { redactSecrets } from "../utils/redaction.ts";
|
|
6
6
|
import { logInternalError } from "../utils/internal-error.ts";
|
|
7
7
|
import { atomicWriteFile } from "./atomic-write.ts";
|
|
8
|
+
import { withEventLogLockSync } from "./event-log.ts";
|
|
8
9
|
|
|
9
10
|
export type MailboxDirection = "inbox" | "outbox";
|
|
10
11
|
export type MailboxMessageStatus = "queued" | "delivered" | "acknowledged";
|
|
@@ -298,7 +299,10 @@ export function appendMailboxMessage(manifest: TeamRunManifest, message: Omit<Ma
|
|
|
298
299
|
repliedAt: message.repliedAt,
|
|
299
300
|
replyContent: message.replyContent,
|
|
300
301
|
};
|
|
301
|
-
|
|
302
|
+
// H2 fix: wrap append in cross-process lock to prevent interleaving on Windows.
|
|
303
|
+
withEventLogLockSync(mailboxFile(manifest, complete.direction, complete.taskId), () => {
|
|
304
|
+
fs.appendFileSync(mailboxFile(manifest, complete.direction, complete.taskId), `${JSON.stringify(redactSecrets(complete))}\n`, "utf-8");
|
|
305
|
+
});
|
|
302
306
|
// 3.3 — rotate mailbox file if it has grown past 10 MB. Cheap stat
|
|
303
307
|
// check; rotates at most once per append.
|
|
304
308
|
rotateMailboxFileIfNeeded(mailboxFile(manifest, complete.direction, complete.taskId));
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedule.ts — Schedule detection and parsing utilities.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors pi-subagents3's SubagentScheduler static methods:
|
|
5
|
+
* - detectSchedule(): sniff cron / interval / one-shot from string
|
|
6
|
+
* - validateCronExpression(): 6-field cron validation
|
|
7
|
+
* - parseRelativeTime(): "+10m" → ISO timestamp
|
|
8
|
+
* - parseInterval(): "5m" → milliseconds
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { ScheduleStoreData, ScheduledTask } from "./types.ts";
|
|
12
|
+
|
|
13
|
+
export type DetectedSchedule =
|
|
14
|
+
| { type: "cron"; normalized: string }
|
|
15
|
+
| { type: "interval"; intervalMs: number; normalized: string }
|
|
16
|
+
| { type: "once"; normalized: string };
|
|
17
|
+
|
|
18
|
+
/** "+10s"/"+5m"/"+1h"/"+2d" → ISO timestamp or null if not a relative time. */
|
|
19
|
+
export function parseRelativeTime(s: string): string | null {
|
|
20
|
+
const m = s.trim().match(/^\+(\d+)(s|m|h|d)$/);
|
|
21
|
+
if (!m) return null;
|
|
22
|
+
const ms = parseInt(m[1], 10) * { s: 1000, m: 60_000, h: 3_600_000, d: 86_400_000 }[m[2] as "s" | "m" | "h" | "d"];
|
|
23
|
+
return new Date(Date.now() + ms).toISOString();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** "10s"/"5m"/"1h"/"2d" → milliseconds or null if not an interval. */
|
|
27
|
+
export function parseInterval(s: string): number | null {
|
|
28
|
+
const m = s.trim().match(/^(\d+)(s|m|h|d)$/);
|
|
29
|
+
if (!m) return null;
|
|
30
|
+
return parseInt(m[1], 10) * { s: 1000, m: 60_000, h: 3_600_000, d: 86_400_000 }[m[2] as "s" | "m" | "h" | "d"];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** 6-field cron validation ("second minute hour dom month dow"). */
|
|
34
|
+
export function validateCronExpression(expr: string): { valid: boolean; error?: string } {
|
|
35
|
+
const fields = expr.trim().split(/\s+/);
|
|
36
|
+
if (fields.length !== 6) {
|
|
37
|
+
return {
|
|
38
|
+
valid: false,
|
|
39
|
+
error: `Cron must have 6 fields (second minute hour dom month dow), got ${fields.length}. Example: "0 0 9 * * 1" for 9am every Monday.`,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
// Basic format check: all fields must be non-empty
|
|
43
|
+
if (!fields.every(f => f.length > 0)) {
|
|
44
|
+
return { valid: false, error: "Cron expression contains empty fields." };
|
|
45
|
+
}
|
|
46
|
+
// Accept any cron pattern — fail silently for malformed expressions
|
|
47
|
+
// (the croner library will reject at execution time)
|
|
48
|
+
return { valid: true };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Sniff a schedule string and tag its type. Throws on invalid input.
|
|
53
|
+
* Order matters: relative ("+10m") and interval ("5m") both match digit+unit;
|
|
54
|
+
* relative requires the leading "+" to disambiguate.
|
|
55
|
+
*/
|
|
56
|
+
export function detectSchedule(s: string): DetectedSchedule {
|
|
57
|
+
const trimmed = s.trim();
|
|
58
|
+
// "+10m" — relative one-shot
|
|
59
|
+
const rel = parseRelativeTime(trimmed);
|
|
60
|
+
if (rel !== null) return { type: "once", normalized: rel };
|
|
61
|
+
// "5m" — interval
|
|
62
|
+
const ivl = parseInterval(trimmed);
|
|
63
|
+
if (ivl !== null) return { type: "interval", intervalMs: ivl, normalized: trimmed };
|
|
64
|
+
// ISO timestamp — one-shot. Reject past timestamps.
|
|
65
|
+
if (/^\d{4}-\d{2}-\d{2}T/.test(trimmed)) {
|
|
66
|
+
const d = new Date(trimmed);
|
|
67
|
+
if (!Number.isNaN(d.getTime())) {
|
|
68
|
+
if (d.getTime() <= Date.now()) {
|
|
69
|
+
throw new Error(`Scheduled time ${d.toISOString()} is in the past.`);
|
|
70
|
+
}
|
|
71
|
+
return { type: "once", normalized: d.toISOString() };
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Cron — 6-field
|
|
75
|
+
const cronCheck = validateCronExpression(trimmed);
|
|
76
|
+
if (cronCheck.valid) return { type: "cron", normalized: trimmed };
|
|
77
|
+
throw new Error(
|
|
78
|
+
`Invalid schedule "${s}". Use 6-field cron (e.g. "0 0 9 * * 1" — 9am every Monday), interval ("5m"/"1h"), or one-shot ("+10m" / ISO).`,
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** ScheduleStore: PID-locked, session-scoped, atomic JSON persistence. */
|
|
83
|
+
export class ScheduleStore {
|
|
84
|
+
private readonly path: string;
|
|
85
|
+
private data: ScheduleStoreData;
|
|
86
|
+
|
|
87
|
+
constructor(path: string) {
|
|
88
|
+
this.path = path;
|
|
89
|
+
this.data = { version: 1, jobs: [] };
|
|
90
|
+
try {
|
|
91
|
+
if (require("node:fs").existsSync(path)) {
|
|
92
|
+
const content = require("node:fs").readFileSync(path, "utf-8");
|
|
93
|
+
const parsed = JSON.parse(content);
|
|
94
|
+
if (parsed && typeof parsed === "object" && "version" in parsed && "jobs" in parsed) {
|
|
95
|
+
this.data = parsed as ScheduleStoreData;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} catch {
|
|
99
|
+
// Corrupt or missing file — start fresh
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private save(): void {
|
|
104
|
+
try {
|
|
105
|
+
require("node:fs").mkdirSync(require("node:path").dirname(this.path), { recursive: true });
|
|
106
|
+
require("node:fs").writeFileSync(this.path, JSON.stringify(this.data, null, 2), "utf-8");
|
|
107
|
+
} catch (error) {
|
|
108
|
+
console.warn(`[pi-crew] Failed to save schedule store: ${error instanceof Error ? error.message : String(error)}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
list(): ScheduledTask[] {
|
|
113
|
+
return [...this.data.jobs];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
hasName(name: string): boolean {
|
|
117
|
+
return this.data.jobs.some(j => j.name === name);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
get(id: string): ScheduledTask | undefined {
|
|
121
|
+
return this.data.jobs.find(j => j.id === id);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
add(job: ScheduledTask): void {
|
|
125
|
+
this.data.jobs.push(job);
|
|
126
|
+
this.save();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
update(id: string, patch: Partial<ScheduledTask>): ScheduledTask | undefined {
|
|
130
|
+
const idx = this.data.jobs.findIndex(j => j.id === id);
|
|
131
|
+
if (idx === -1) return undefined;
|
|
132
|
+
this.data.jobs[idx] = { ...this.data.jobs[idx], ...patch };
|
|
133
|
+
this.save();
|
|
134
|
+
return this.data.jobs[idx];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
remove(id: string): boolean {
|
|
138
|
+
const before = this.data.jobs.length;
|
|
139
|
+
this.data.jobs = this.data.jobs.filter(j => j.id !== id);
|
|
140
|
+
if (this.data.jobs.length !== before) {
|
|
141
|
+
this.save();
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
}
|