brainclaw 1.7.2 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -102
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +13 -1
- package/dist/commands/harvest.js +124 -1
- package/dist/commands/mcp.js +23 -10
- package/dist/core/agent-capability.js +28 -0
- package/dist/core/agent-inventory.js +54 -7
- package/dist/core/agentrun-reconciler.js +72 -6
- package/dist/core/dirty-scope.js +11 -5
- package/dist/core/dispatch-status.js +67 -4
- package/dist/core/dispatcher.js +51 -3
- package/dist/core/entity-operations.js +36 -0
- package/dist/core/entity-registry.js +1 -1
- package/dist/core/instruction-templates.js +1 -1
- package/dist/core/runtime-signals.js +72 -0
- package/dist/core/schema.js +18 -0
- package/dist/core/worktree.js +227 -7
- package/dist/facts.js +3 -3
- package/dist/facts.json +2 -2
- package/package.json +1 -1
|
@@ -5,6 +5,14 @@ import { spawnSync } from 'node:child_process';
|
|
|
5
5
|
import yaml from 'yaml';
|
|
6
6
|
import { MEMORY_DIR } from './io.js';
|
|
7
7
|
import { detectHostExecutionProfile, } from './execution-profile.js';
|
|
8
|
+
import { getCapabilityProfile } from './agent-capability.js';
|
|
9
|
+
/**
|
|
10
|
+
* trp#427 — cold-start CLI `--version` probes need headroom; a 3s timeout
|
|
11
|
+
* false-negatived claude-code on first launch. The spawnable check (binary on
|
|
12
|
+
* PATH) is the robust signal, so this only affects version-string capture
|
|
13
|
+
* latency, not the installed/spawnable decision.
|
|
14
|
+
*/
|
|
15
|
+
const VERSION_PROBE_TIMEOUT_MS = 8000;
|
|
8
16
|
function tryCommand(command, args, timeout = 5000) {
|
|
9
17
|
try {
|
|
10
18
|
const r = spawnSync(command, args, { encoding: 'utf-8', timeout, windowsHide: true });
|
|
@@ -14,12 +22,40 @@ function tryCommand(command, args, timeout = 5000) {
|
|
|
14
22
|
return { ok: false, stdout: '' };
|
|
15
23
|
}
|
|
16
24
|
}
|
|
25
|
+
/**
|
|
26
|
+
* trp#427 — fast PATH resolution for a binary (no process launch, unlike a
|
|
27
|
+
* `--version` probe). Uses `where` (Windows) / `which` (POSIX).
|
|
28
|
+
*/
|
|
29
|
+
function isBinaryOnPath(binary) {
|
|
30
|
+
if (!binary)
|
|
31
|
+
return false;
|
|
32
|
+
try {
|
|
33
|
+
const cmd = process.platform === 'win32' ? 'where' : 'which';
|
|
34
|
+
const r = spawnSync(cmd, [binary], { encoding: 'utf-8', timeout: 3000, windowsHide: true });
|
|
35
|
+
return r.status === 0 && (r.stdout ?? '').trim().length > 0;
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* trp#427 — an agent is SPAWNABLE when its capability profile is CLI-spawnable,
|
|
43
|
+
* declares an invoke binary, and that binary resolves on PATH. Decoupled from
|
|
44
|
+
* the `--version` health probe so a slow cold-start CLI is never misreported as
|
|
45
|
+
* "not installed" / undispatchable.
|
|
46
|
+
*/
|
|
47
|
+
export function detectSpawnable(agentName) {
|
|
48
|
+
const profile = getCapabilityProfile(agentName);
|
|
49
|
+
if (!profile || !profile.runtime?.canBeSpawnedCli || !profile.invoke_binary)
|
|
50
|
+
return false;
|
|
51
|
+
return isBinaryOnPath(profile.invoke_binary);
|
|
52
|
+
}
|
|
17
53
|
const AGENT_DEFINITIONS = [
|
|
18
54
|
{
|
|
19
55
|
name: 'claude-code',
|
|
20
56
|
detect: (_home, env) => {
|
|
21
57
|
// Check if claude CLI is available
|
|
22
|
-
const cli = tryCommand('claude', ['--version'],
|
|
58
|
+
const cli = tryCommand('claude', ['--version'], VERSION_PROBE_TIMEOUT_MS);
|
|
23
59
|
if (cli.ok) {
|
|
24
60
|
const ver = cli.stdout.trim().match(/(\d+\.\d+\.\d+)/)?.[1];
|
|
25
61
|
return { installed: true, method: 'claude CLI', version: ver };
|
|
@@ -81,7 +117,7 @@ const AGENT_DEFINITIONS = [
|
|
|
81
117
|
if (fs.existsSync(codexDir)) {
|
|
82
118
|
return { installed: true, method: '~/.codex directory' };
|
|
83
119
|
}
|
|
84
|
-
const cli = tryCommand('codex', ['--version'],
|
|
120
|
+
const cli = tryCommand('codex', ['--version'], VERSION_PROBE_TIMEOUT_MS);
|
|
85
121
|
if (cli.ok) {
|
|
86
122
|
const ver = cli.stdout.trim().match(/(\d+\.\d+\.\d+)/)?.[1];
|
|
87
123
|
return { installed: true, method: 'codex CLI', version: ver };
|
|
@@ -252,7 +288,7 @@ const AGENT_DEFINITIONS = [
|
|
|
252
288
|
if (fs.existsSync(path.join(home, '.gemini', 'antigravity'))) {
|
|
253
289
|
return { installed: true, method: '~/.gemini/antigravity directory' };
|
|
254
290
|
}
|
|
255
|
-
const cli = tryCommand('gemini', ['--version'],
|
|
291
|
+
const cli = tryCommand('gemini', ['--version'], VERSION_PROBE_TIMEOUT_MS);
|
|
256
292
|
if (cli.ok) {
|
|
257
293
|
return { installed: true, method: 'gemini CLI', version: cli.stdout.trim() };
|
|
258
294
|
}
|
|
@@ -309,7 +345,7 @@ const AGENT_DEFINITIONS = [
|
|
|
309
345
|
if (fs.existsSync(path.join(home, '.hermes'))) {
|
|
310
346
|
return { installed: true, method: '~/.hermes directory' };
|
|
311
347
|
}
|
|
312
|
-
const cli = tryCommand('hermes', ['--version'],
|
|
348
|
+
const cli = tryCommand('hermes', ['--version'], VERSION_PROBE_TIMEOUT_MS);
|
|
313
349
|
if (cli.ok) {
|
|
314
350
|
return { installed: true, method: 'hermes CLI', version: cli.stdout.trim() };
|
|
315
351
|
}
|
|
@@ -332,14 +368,23 @@ const AGENT_DEFINITIONS = [
|
|
|
332
368
|
/**
|
|
333
369
|
* Detect ALL installed agents on this machine (not just the running one).
|
|
334
370
|
*/
|
|
335
|
-
export function buildAgentInventory(homeDir = os.homedir(), env = process.env) {
|
|
371
|
+
export function buildAgentInventory(homeDir = os.homedir(), env = process.env, opts = {}) {
|
|
372
|
+
const spawnableResolver = opts.spawnableResolver ?? detectSpawnable;
|
|
336
373
|
const agents = AGENT_DEFINITIONS.map(def => {
|
|
337
374
|
const detection = def.detect(homeDir, env);
|
|
375
|
+
const spawnable = spawnableResolver(def.name);
|
|
376
|
+
// trp#427: an agent brainclaw can spawn (invoke binary on PATH) IS installed,
|
|
377
|
+
// even when the cold-start `--version` probe timed out. This decouples the
|
|
378
|
+
// dispatch decision (getInstalledAgentNames) from probe latency.
|
|
379
|
+
const installed = detection.installed || spawnable;
|
|
338
380
|
return {
|
|
339
381
|
name: def.name,
|
|
340
|
-
installed
|
|
341
|
-
detection_method: detection.
|
|
382
|
+
installed,
|
|
383
|
+
detection_method: detection.installed
|
|
384
|
+
? detection.method
|
|
385
|
+
: (spawnable ? 'spawnable: invoke binary on PATH' : detection.method),
|
|
342
386
|
version: detection.version,
|
|
387
|
+
spawnable,
|
|
343
388
|
models: def.models,
|
|
344
389
|
native_tools: def.native_tools,
|
|
345
390
|
mcp_support: def.mcp_support,
|
|
@@ -415,6 +460,8 @@ export function renderAgentInventorySummary(inventory) {
|
|
|
415
460
|
features.push('Rules');
|
|
416
461
|
if (agent.hooks_support)
|
|
417
462
|
features.push('Hooks');
|
|
463
|
+
if (agent.spawnable)
|
|
464
|
+
features.push('Spawnable');
|
|
418
465
|
lines.push(` Features: ${features.join(', ') || 'none'}`);
|
|
419
466
|
if (agent.instruction_file) {
|
|
420
467
|
lines.push(` Instructions: ${agent.instruction_file}`);
|
|
@@ -34,11 +34,11 @@
|
|
|
34
34
|
*/
|
|
35
35
|
import { spawnSync } from 'node:child_process';
|
|
36
36
|
import { loadAgentRun, transitionAgentRun, listAgentRuns } from './agentruns.js';
|
|
37
|
-
import { loadClaim } from './claims.js';
|
|
37
|
+
import { loadClaim, releaseClaim } from './claims.js';
|
|
38
38
|
import { loadAssignment } from './assignments.js';
|
|
39
39
|
import { createRuntimeEvent } from './events.js';
|
|
40
40
|
import { nowISO } from './ids.js';
|
|
41
|
-
import { readHeartbeat, readLogTail, signalExists } from './runtime-signals.js';
|
|
41
|
+
import { readHeartbeat, readLogTail, signalExists, latestActivityMs } from './runtime-signals.js';
|
|
42
42
|
// ── Constants ──────────────────────────────────────────────────────────────
|
|
43
43
|
/**
|
|
44
44
|
* Minimum age before a run is eligible for reconciliation. Below this, the
|
|
@@ -175,11 +175,59 @@ export function collectEvidence(run, cwd, options) {
|
|
|
175
175
|
heartbeat_age_ms = now - hb.mtimeMs;
|
|
176
176
|
}
|
|
177
177
|
catch { /* defensive */ }
|
|
178
|
+
// pln#527 — filesystem-activity liveness (logs + worktree). Independent of the
|
|
179
|
+
// heartbeat: a worker can be actively editing files / streaming to stderr while
|
|
180
|
+
// its heartbeat is frozen (written once at step 0).
|
|
181
|
+
let fs_activity_age_ms;
|
|
182
|
+
try {
|
|
183
|
+
const lastFs = latestActivityMs(signalRoot, run.assignment_id, run.worktree_path);
|
|
184
|
+
if (lastFs !== undefined)
|
|
185
|
+
fs_activity_age_ms = now - lastFs;
|
|
186
|
+
}
|
|
187
|
+
catch { /* defensive */ }
|
|
178
188
|
return {
|
|
179
189
|
age_ms, has_post_start_commit, claim_released, assignment_completed, process_alive,
|
|
180
|
-
completed_signal, failed_signal, heartbeat_exists, heartbeat_age_ms,
|
|
190
|
+
completed_signal, failed_signal, heartbeat_exists, heartbeat_age_ms, fs_activity_age_ms,
|
|
181
191
|
};
|
|
182
192
|
}
|
|
193
|
+
/**
|
|
194
|
+
* pln#527 — true when the run shows filesystem activity within `windowMs`
|
|
195
|
+
* (logs growing / worktree files touched). Used to VETO a `stalled` verdict: a
|
|
196
|
+
* stale heartbeat with fresh fs activity means "working", not "hung".
|
|
197
|
+
*/
|
|
198
|
+
function fsActiveWithin(evidence, windowMs) {
|
|
199
|
+
return evidence.fs_activity_age_ms !== undefined && evidence.fs_activity_age_ms < windowMs;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* trp#433 — when a run is reconciled to `failed` (silent_death / stalled), release
|
|
203
|
+
* its linked claim so dead runs stop leaving active claims (and their worktrees)
|
|
204
|
+
* accumulating for manual cleanup. Best-effort + idempotent: only an active claim
|
|
205
|
+
* is released, and any error is swallowed (GC must never break reconciliation).
|
|
206
|
+
* Inference only fires after the stale window with no life evidence, so this is
|
|
207
|
+
* conservative. (Loop auto-close on failure is a follow-up.)
|
|
208
|
+
*/
|
|
209
|
+
function cascadeReleaseOnFailure(run, actor, cwd) {
|
|
210
|
+
if (!run.claim_id)
|
|
211
|
+
return;
|
|
212
|
+
try {
|
|
213
|
+
const claim = loadClaim(run.claim_id, cwd);
|
|
214
|
+
if (claim && claim.status === 'active') {
|
|
215
|
+
releaseClaim(run.claim_id, cwd);
|
|
216
|
+
createRuntimeEvent({
|
|
217
|
+
agent: actor,
|
|
218
|
+
session_id: run.session_id,
|
|
219
|
+
event_type: 'run_failed',
|
|
220
|
+
text: `Auto-released claim ${run.claim_id} after run ${run.id} was reconciled to failed (trp#433 GC cascade)`,
|
|
221
|
+
tags: ['reconciler', 'gc', 'claim-release'],
|
|
222
|
+
assignment_id: run.assignment_id,
|
|
223
|
+
run_id: run.id,
|
|
224
|
+
claim_id: run.claim_id,
|
|
225
|
+
status_reason: 'gc_cascade_release_on_failure',
|
|
226
|
+
}, cwd);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
catch { /* best-effort — never let GC break reconciliation */ }
|
|
230
|
+
}
|
|
183
231
|
function anyCompletionEvidence(evidence) {
|
|
184
232
|
return evidence.completed_signal
|
|
185
233
|
|| evidence.has_post_start_commit
|
|
@@ -328,6 +376,7 @@ export function reconcileAgentRun(runId, cwd, options = {}) {
|
|
|
328
376
|
const failHere = (reason) => {
|
|
329
377
|
try {
|
|
330
378
|
transitionAgentRun(runId, 'failed', { actor, status_reason: reason }, cwd);
|
|
379
|
+
cascadeReleaseOnFailure(run, actor, cwd);
|
|
331
380
|
return { run_id: runId, action: 'inferred_failed', reason, evidence, previous_status, current_status: 'failed' };
|
|
332
381
|
}
|
|
333
382
|
catch (err) {
|
|
@@ -342,9 +391,18 @@ export function reconcileAgentRun(runId, cwd, options = {}) {
|
|
|
342
391
|
if (evidence.failed_signal) {
|
|
343
392
|
return failHere(`failed_silent: wrapper reported non-zero exit${logTailSuffix(run, cwd)}`);
|
|
344
393
|
}
|
|
345
|
-
// Heartbeat present but stale → reached the loop then went silent
|
|
394
|
+
// Heartbeat present but stale → reached the loop then went silent — UNLESS the
|
|
395
|
+
// filesystem shows recent activity (pln#527): a frozen heartbeat with fresh
|
|
396
|
+
// log/worktree writes means the worker is mid-operation, not hung.
|
|
346
397
|
if (evidence.heartbeat_exists && evidence.heartbeat_age_ms !== undefined && evidence.heartbeat_age_ms >= heartbeatStale) {
|
|
347
|
-
|
|
398
|
+
if (fsActiveWithin(evidence, heartbeatStale)) {
|
|
399
|
+
return {
|
|
400
|
+
run_id: runId, action: 'no_op',
|
|
401
|
+
reason: `heartbeat stale (${Math.round(evidence.heartbeat_age_ms / 1000)}s) but fs active ${Math.round((evidence.fs_activity_age_ms ?? 0) / 1000)}s ago — working, not stalled`,
|
|
402
|
+
evidence, previous_status, current_status: run.status,
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
return failHere(`stalled: heartbeat last seen ${Math.round(evidence.heartbeat_age_ms / 1000)}s ago, no fs activity${logTailSuffix(run, cwd)}`);
|
|
348
406
|
}
|
|
349
407
|
// Fresh heartbeat → alive; trust it over the untrustworthy wrapper pid.
|
|
350
408
|
if (evidence.heartbeat_exists) {
|
|
@@ -416,6 +474,7 @@ export function reconcileDeadPidRunningAgentRunAtRead(runId, cwd, options = {})
|
|
|
416
474
|
const failRun = (reason) => {
|
|
417
475
|
try {
|
|
418
476
|
transitionAgentRun(run.id, 'failed', { actor, status_reason: reason }, cwd);
|
|
477
|
+
cascadeReleaseOnFailure(run, actor, cwd);
|
|
419
478
|
return { run_id: run.id, action: 'inferred_failed', reason, evidence, previous_status: run.status, current_status: 'failed' };
|
|
420
479
|
}
|
|
421
480
|
catch (err) {
|
|
@@ -458,7 +517,14 @@ export function reconcileDeadPidRunningAgentRunAtRead(runId, cwd, options = {})
|
|
|
458
517
|
// 3. Heartbeat present but STALE → the worker reached its loop then went
|
|
459
518
|
// silent (e.g. hung). pid-independent: a hung worker keeps the wrapper alive.
|
|
460
519
|
if (evidence.heartbeat_exists && evidence.heartbeat_age_ms !== undefined && evidence.heartbeat_age_ms >= heartbeatStale) {
|
|
461
|
-
|
|
520
|
+
if (fsActiveWithin(evidence, heartbeatStale)) {
|
|
521
|
+
return {
|
|
522
|
+
run_id: run.id, action: 'no_op',
|
|
523
|
+
reason: `heartbeat stale (${Math.round(evidence.heartbeat_age_ms / 1000)}s) but fs active ${Math.round((evidence.fs_activity_age_ms ?? 0) / 1000)}s ago — working, not stalled`,
|
|
524
|
+
evidence, previous_status: run.status, current_status: run.status,
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
return failRun(`stalled: heartbeat last seen ${Math.round(evidence.heartbeat_age_ms / 1000)}s ago, no fs activity${logTailSuffix(run, cwd)}`);
|
|
462
528
|
}
|
|
463
529
|
// 4. Fresh heartbeat → the worker is alive and working; trust it OVER the
|
|
464
530
|
// (untrustworthy) wrapper pid. This is the can_f792cacd fix: never fail a
|
package/dist/core/dirty-scope.js
CHANGED
|
@@ -44,13 +44,19 @@ function defaultRunGit(cwd, args) {
|
|
|
44
44
|
return { ok: false, stdout: '' };
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
|
-
/**
|
|
47
|
+
/**
|
|
48
|
+
* Top-level directories that are dirty as a side effect of coordination /
|
|
49
|
+
* agent tooling, never part of a dispatch's code scope:
|
|
50
|
+
* - `.brainclaw`, `.git` — coordination store + VCS metadata.
|
|
51
|
+
* - `.claude`, `.cursor`, `.codex` — per-agent local config (trp#371). A
|
|
52
|
+
* worker leaving these dirty (Claude Code settings, etc.) must not block an
|
|
53
|
+
* otherwise-safe dispatch of an unrelated code scope.
|
|
54
|
+
*/
|
|
55
|
+
const SYSTEM_DIRTY_DIRS = ['.brainclaw', '.git', '.claude', '.cursor', '.codex'];
|
|
56
|
+
/** True for coordination/store/agent-config paths that are dirty as a side effect of tooling. */
|
|
48
57
|
export function isSystemDirtyPath(p) {
|
|
49
58
|
const norm = p.replace(/\\/g, '/');
|
|
50
|
-
return norm ===
|
|
51
|
-
|| norm.startsWith('.brainclaw/')
|
|
52
|
-
|| norm === '.git'
|
|
53
|
-
|| norm.startsWith('.git/');
|
|
59
|
+
return SYSTEM_DIRTY_DIRS.some((dir) => norm === dir || norm.startsWith(dir + '/'));
|
|
54
60
|
}
|
|
55
61
|
/**
|
|
56
62
|
* Parse `git status --porcelain=v1 -z` output into a flat list of paths.
|
|
@@ -25,6 +25,7 @@ import { loadAgentRun, listAgentRuns } from './agentruns.js';
|
|
|
25
25
|
import { loadClaim } from './claims.js';
|
|
26
26
|
import { getLoop, listLoops } from './loops/store.js';
|
|
27
27
|
import { isProcessAlive } from './agentrun-reconciler.js';
|
|
28
|
+
import { latestActivityMs } from './runtime-signals.js';
|
|
28
29
|
const DEFAULT_TAIL = 20;
|
|
29
30
|
const DEFAULT_STALL_MS = 5 * 60_000;
|
|
30
31
|
// ── Internal helpers ──────────────────────────────────────────────────────
|
|
@@ -96,6 +97,37 @@ function resolveTarget(targetId, cwd) {
|
|
|
96
97
|
const TERMINAL_RUN_STATUSES = new Set([
|
|
97
98
|
'completed', 'failed', 'cancelled', 'timed_out', 'interrupted',
|
|
98
99
|
]);
|
|
100
|
+
/**
|
|
101
|
+
* pln#527 (#5) — recognize known fatal boot signatures in a worker's stderr tail
|
|
102
|
+
* so dispatch_status returns a targeted diagnosis + remediation instead of a
|
|
103
|
+
* generic silent_death. These are agent/CLI/config faults (NOT brainclaw bugs)
|
|
104
|
+
* that a coordinator can fix and re-dispatch. Patterns sourced from field traps
|
|
105
|
+
* (trp#292 codex service_tier / model mismatch).
|
|
106
|
+
*/
|
|
107
|
+
export function recognizeStderrSignature(tail) {
|
|
108
|
+
if (!tail || tail.length === 0)
|
|
109
|
+
return undefined;
|
|
110
|
+
const text = tail.join('\n');
|
|
111
|
+
if (/service_tier/i.test(text) && /flex|unsupported/i.test(text)) {
|
|
112
|
+
return {
|
|
113
|
+
summary: 'codex rejected an unsupported `service_tier` (e.g. flex) — a config/model mismatch at boot, not a brainclaw fault',
|
|
114
|
+
recommended_next_action: 'Fix ~/.codex/config.toml `service_tier` (remove it or set a supported value) or upgrade codex, then re-dispatch. See trap trp#292.',
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
if (/unknown variant/i.test(text)) {
|
|
118
|
+
return {
|
|
119
|
+
summary: 'codex CLI rejected an unknown config variant — the installed codex does not support a value in ~/.codex/config.toml (e.g. model/approval)',
|
|
120
|
+
recommended_next_action: 'Reconcile ~/.codex/config.toml with the installed codex (`codex --version`) or upgrade codex, then re-dispatch.',
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
if (/\b400\b/.test(text) && /(unsupported|requires a newer|model)/i.test(text)) {
|
|
124
|
+
return {
|
|
125
|
+
summary: 'the model API returned 400 (unsupported model / needs a newer CLI) — the worker died at boot, before doing work',
|
|
126
|
+
recommended_next_action: 'Check the configured model vs the installed CLI version; upgrade the agent CLI or pick a supported model, then re-dispatch.',
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
return undefined;
|
|
130
|
+
}
|
|
99
131
|
function computeDiagnosis(assignment, agentRun, runtime, options) {
|
|
100
132
|
if (!assignment && !agentRun) {
|
|
101
133
|
return {
|
|
@@ -127,17 +159,37 @@ function computeDiagnosis(assignment, agentRun, runtime, options) {
|
|
|
127
159
|
const lastEventMs = new Date(agentRun.last_event_at ?? agentRun.started_at ?? agentRun.created_at).getTime();
|
|
128
160
|
const stallAge = options.nowMs - lastEventMs;
|
|
129
161
|
if (runtime.pid_alive === false) {
|
|
162
|
+
// pln#527 (#5) — surface a TARGETED diagnosis when the captured stderr matches
|
|
163
|
+
// a known fatal boot signature (codex model/service_tier mismatch, API 400)
|
|
164
|
+
// instead of a generic "silent_death".
|
|
165
|
+
const sig = recognizeStderrSignature(runtime.log_files.stderr?.tail);
|
|
130
166
|
return {
|
|
131
167
|
health: 'silent_death',
|
|
132
|
-
summary:
|
|
133
|
-
|
|
168
|
+
summary: sig
|
|
169
|
+
? `agent_run.status="${agentRun.status}", pid ${runtime.pid} dead — ${sig.summary}`
|
|
170
|
+
: `agent_run.status="${agentRun.status}" but pid ${runtime.pid} is dead — worker exited without self-reporting; lazy reconciler will mark it failed after the stale window (default 30min)`,
|
|
171
|
+
recommended_next_action: sig?.recommended_next_action
|
|
172
|
+
?? 'Read .stderr.log for the exit reason; then trigger reconciliation by calling bclaw_find(entity="agent_run") again, or cancel + reroute.',
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// pln#527 — a stale last_event_at is NOT "stalled" when the filesystem is still
|
|
176
|
+
// active (logs streaming / worktree files edited). Workers emit no heartbeat
|
|
177
|
+
// during a long single operation (codex→stderr, claude -p buffering stdout),
|
|
178
|
+
// so fs activity is the truer liveness signal and vetoes the false-stalled.
|
|
179
|
+
const fsAge = runtime.last_fs_activity_ms;
|
|
180
|
+
const fsActive = fsAge !== undefined && fsAge < options.stallMs;
|
|
181
|
+
if (runtime.pid_alive === true && stallAge > options.stallMs && fsActive) {
|
|
182
|
+
return {
|
|
183
|
+
health: 'healthy',
|
|
184
|
+
summary: `agent_run alive (pid=${runtime.pid}); last_event_at stale (${Math.round(stallAge / 1000)}s) but filesystem active ${Math.round((fsAge ?? 0) / 1000)}s ago — working through a long op without a heartbeat`,
|
|
185
|
+
recommended_next_action: 'No action — the worker is actively writing to logs/worktree. Re-check periodically until terminal.',
|
|
134
186
|
};
|
|
135
187
|
}
|
|
136
188
|
if (runtime.pid_alive === true && stallAge > options.stallMs) {
|
|
137
189
|
return {
|
|
138
190
|
health: 'stalled',
|
|
139
|
-
summary: `agent_run alive (pid=${runtime.pid}) but no activity for ${Math.round(stallAge / 1000)}s; last_event_at=${agentRun.last_event_at ?? '(never)'}`,
|
|
140
|
-
recommended_next_action: '
|
|
191
|
+
summary: `agent_run alive (pid=${runtime.pid}) but no activity for ${Math.round(stallAge / 1000)}s AND no filesystem writes${fsAge !== undefined ? ` (last fs ${Math.round(fsAge / 1000)}s ago)` : ' (no logs/worktree mtime)'}; last_event_at=${agentRun.last_event_at ?? '(never)'}`,
|
|
192
|
+
recommended_next_action: 'Worker appears genuinely hung (no log/file writes). Tail stderr to confirm, then kill the pid and reroute.',
|
|
141
193
|
};
|
|
142
194
|
}
|
|
143
195
|
if (runtime.pid_alive === true) {
|
|
@@ -186,6 +238,16 @@ export function getDispatchStatus(options) {
|
|
|
186
238
|
const ackPath = assignmentId ? path.join(runtimeRoot, 'ack', `${assignmentId}.ack`) : undefined;
|
|
187
239
|
const stdoutPath = assignmentId ? path.join(runtimeRoot, 'log', `${assignmentId}.stdout.log`) : undefined;
|
|
188
240
|
const stderrPath = assignmentId ? path.join(runtimeRoot, 'log', `${assignmentId}.stderr.log`) : undefined;
|
|
241
|
+
// pln#527 — filesystem-activity age: max mtime across the captured logs + the
|
|
242
|
+
// run's worktree files (skipping junctions). The truer liveness signal when
|
|
243
|
+
// the heartbeat / last_event_at is stale during a long single operation.
|
|
244
|
+
const worktreeForFs = agentRun?.worktree_path ?? claim?.worktree_path;
|
|
245
|
+
let lastFsActivityMs;
|
|
246
|
+
if (assignmentId) {
|
|
247
|
+
const lastFs = latestActivityMs(projectRoot, assignmentId, worktreeForFs);
|
|
248
|
+
if (lastFs !== undefined)
|
|
249
|
+
lastFsActivityMs = nowMs - lastFs;
|
|
250
|
+
}
|
|
189
251
|
const runtime = {
|
|
190
252
|
pid: agentRun?.pid,
|
|
191
253
|
pid_alive: isProcessAlive(agentRun?.pid),
|
|
@@ -197,6 +259,7 @@ export function getDispatchStatus(options) {
|
|
|
197
259
|
stdout: stdoutPath ? readLogTail(stdoutPath, tailLines) : undefined,
|
|
198
260
|
stderr: stderrPath ? readLogTail(stderrPath, tailLines) : undefined,
|
|
199
261
|
},
|
|
262
|
+
last_fs_activity_ms: lastFsActivityMs,
|
|
200
263
|
};
|
|
201
264
|
const diagnosis = computeDiagnosis(assignment, agentRun, runtime, { stallMs, nowMs });
|
|
202
265
|
return {
|
package/dist/core/dispatcher.js
CHANGED
|
@@ -43,7 +43,7 @@ import { memoryDir } from './io.js';
|
|
|
43
43
|
import { loadVersionedJsonFile } from './migration.js';
|
|
44
44
|
import fs from 'node:fs';
|
|
45
45
|
import path from 'node:path';
|
|
46
|
-
import { buildInvokeCommand, resolveBriefMode, getCapabilityProfile, resolveConcurrencyLimit, resolveResourceKey, resolveModel, serializeConcurrencyLimit } from './agent-capability.js';
|
|
46
|
+
import { buildInvokeCommand, resolveBriefMode, getCapabilityProfile, dispatchHasMcp, resolveConcurrencyLimit, resolveResourceKey, resolveModel, serializeConcurrencyLimit } from './agent-capability.js';
|
|
47
47
|
import { getRuntimeSignalPath } from './runtime-signals.js';
|
|
48
48
|
import { attemptExecution } from './execution.js';
|
|
49
49
|
import { createAssignment, transitionAssignment, generateAssignmentId, patchAssignmentMessageId } from './assignments.js';
|
|
@@ -156,6 +156,11 @@ export function analyzeSequence(cwd) {
|
|
|
156
156
|
plan,
|
|
157
157
|
lane: item.lane,
|
|
158
158
|
reason: `All hard dependencies met${softNote}`,
|
|
159
|
+
// pln#529 — readiness ≠ code-availability for gated lanes.
|
|
160
|
+
...(item.hard_after.length > 0 ? {
|
|
161
|
+
code_propagation_note: `Unblocked by hard_after [${item.hard_after.join(', ')}]. Ensure that work is committed AND on the dispatch base (HEAD), ` +
|
|
162
|
+
`or dispatch this lane with ref=<predecessor branch> — otherwise the worker spawns from HEAD without it.`,
|
|
163
|
+
} : {}),
|
|
159
164
|
});
|
|
160
165
|
}
|
|
161
166
|
// Build capacity summary per agent (multi-instance aware)
|
|
@@ -238,6 +243,13 @@ export function buildProtocolSection(options) {
|
|
|
238
243
|
}
|
|
239
244
|
if (options?.worktreePath) {
|
|
240
245
|
parts.push(`Worktree: ${options.worktreePath}`);
|
|
246
|
+
// pln#523: tell the worker how dependencies are provisioned so it does not
|
|
247
|
+
// stall trying to install them. node_modules (and per-package node_modules in
|
|
248
|
+
// monorepos) are junction-linked from the main repo — run builds/typecheck
|
|
249
|
+
// directly. If they are missing, do NOT `npm install` in the worktree: check
|
|
250
|
+
// `.brainclaw-worktree.json` → `symlink_warnings` (a link may have failed,
|
|
251
|
+
// e.g. cross-volume) and validate the build centrally with the coordinator.
|
|
252
|
+
parts.push('Dependencies: node_modules is linked from the main repo (incl. monorepo per-package). Build/typecheck directly; if deps are missing, do NOT npm install here — see .brainclaw-worktree.json symlink_warnings and validate centrally.');
|
|
241
253
|
}
|
|
242
254
|
parts.push('');
|
|
243
255
|
// Assignment lifecycle protocol (Agent SDK)
|
|
@@ -254,6 +266,14 @@ export function buildProtocolSection(options) {
|
|
|
254
266
|
parts.push(`${options.worktreePath ? '7' : '6'}. Release the claim: bclaw_release_claim(${claimRef}, planStatus: "done") — required for hard_after gating to unblock downstream tasks`);
|
|
255
267
|
parts.push(`${options.worktreePath ? '8' : '7'}. If blocked: bclaw_assignment_update(status: "blocked", blocker: "...")`);
|
|
256
268
|
parts.push(`${options.worktreePath ? '9' : '8'}. If failed: bclaw_assignment_update(status: "failed", error_message: "...")`);
|
|
269
|
+
// pln#479: compile-check contract for code workers — a per-worktree
|
|
270
|
+
// pre-commit gate may HARD-block a commit that fails tsc (opt-in).
|
|
271
|
+
if (options.worktreePath) {
|
|
272
|
+
parts.push('**Compile check**: before every commit, `tsc --noEmit` (or the project build) must pass — a per-worktree pre-commit gate may enforce this and reject the commit otherwise. Do not bypass with --no-verify unless you intend to hand off a known-broken state.');
|
|
273
|
+
}
|
|
274
|
+
// pln#526: standard fallback channel — works even when MCP is unreachable
|
|
275
|
+
// (sandboxed agents). The coordinator ingests it with `brainclaw harvest`.
|
|
276
|
+
parts.push(`Final fallback (if bclaw_assignment_update / MCP is unavailable, e.g. a sandboxed agent): write LANE-RESULT.json at the worktree root — {"assignment_id":"${options.assignmentId}","status":"completed|blocked|failed","summary":"<what you did>","files_changed":["..."],"artifacts":["..."]}. The coordinator harvests it via \`brainclaw harvest ${options.assignmentId}\`.`);
|
|
257
277
|
}
|
|
258
278
|
else if (options?.claimId) {
|
|
259
279
|
parts.push('1. Call bclaw_session_start to register your session');
|
|
@@ -406,6 +426,25 @@ export function generateBrief(plan, item, cwd, briefMode, options) {
|
|
|
406
426
|
if (mode === 'full') {
|
|
407
427
|
parts.push(buildProtocolSection(options));
|
|
408
428
|
}
|
|
429
|
+
// pln#528 — transport-aware addendum (debrief LeaseUp P1#2). When the agent is
|
|
430
|
+
// spawned sandboxed (no MCP + no git commit — e.g. codex --sandbox
|
|
431
|
+
// workspace-write), the MCP lifecycle lines in the Protocol section do NOT
|
|
432
|
+
// apply. Say so explicitly and make the FILE protocol authoritative, so the
|
|
433
|
+
// worker never receives instructions it cannot follow nor has to guess the
|
|
434
|
+
// fallback. (Note: resolveBriefMode still returns 'full' for codex per pln#496
|
|
435
|
+
// so the reconciler-independent path is preserved; this addendum disambiguates
|
|
436
|
+
// the transport rather than stripping the section — the full compact reversal
|
|
437
|
+
// is a separate human-owned call on the May-vs-June MCP-availability conflict.)
|
|
438
|
+
const briefProfile = options?.agent ? getCapabilityProfile(options.agent) : undefined;
|
|
439
|
+
if (briefProfile && !dispatchHasMcp(briefProfile)) {
|
|
440
|
+
parts.push('## ⚠ Transport: sandboxed run (no MCP, no commit)');
|
|
441
|
+
parts.push('Your runtime is sandboxed — the brainclaw MCP server is NOT reachable and `git commit` is unavailable (.git is outside the sandbox root). Any `bclaw_*` MCP instruction above does NOT apply to you. Report your outcome via the FILE protocol only — it is authoritative for this run:');
|
|
442
|
+
const asgn = options?.assignmentId ?? '<assignment_id>';
|
|
443
|
+
parts.push(`- When done, write LANE-RESULT.json at the worktree root: {"assignment_id":"${asgn}","status":"completed|blocked|failed","summary":"<what you did>","files_changed":["..."]}.`);
|
|
444
|
+
parts.push('- Capture decisions/traps as candidate JSON under .brainclaw/coordination/inbox/ (the coordinator harvests them).');
|
|
445
|
+
parts.push('- Do NOT call bclaw_* tools — they are unavailable here. The coordinator harvests your result and integrates/commits it.');
|
|
446
|
+
parts.push('');
|
|
447
|
+
}
|
|
409
448
|
// Codex-specific constraints: focus and speed guidance for sandboxed runs.
|
|
410
449
|
// Gated on agent identity (not brief mode) so future non-codex compact consumers
|
|
411
450
|
// don't inherit sandbox-specific wording. (Codex review cnd#561)
|
|
@@ -413,7 +452,6 @@ export function generateBrief(plan, item, cwd, briefMode, options) {
|
|
|
413
452
|
parts.push('## Constraints');
|
|
414
453
|
parts.push('- Focus on specified files only — do not explore the broader codebase');
|
|
415
454
|
parts.push('- Produce output quickly; if blocked, capture as trap candidate and move on');
|
|
416
|
-
parts.push('- Sandbox blocks MCP writes: use filesystem writes for candidates, coordinator harvests');
|
|
417
455
|
parts.push('');
|
|
418
456
|
}
|
|
419
457
|
return parts.join('\n');
|
|
@@ -437,12 +475,22 @@ export function generateDispatchBrief(options) {
|
|
|
437
475
|
assignmentId: options.assignmentId,
|
|
438
476
|
}));
|
|
439
477
|
}
|
|
478
|
+
// pln#528 — transport-aware addendum for sandboxed agents (see generateBrief).
|
|
479
|
+
const taskBriefProfile = options.agent ? getCapabilityProfile(options.agent) : undefined;
|
|
480
|
+
if (taskBriefProfile && !dispatchHasMcp(taskBriefProfile)) {
|
|
481
|
+
parts.push('## ⚠ Transport: sandboxed run (no MCP, no commit)');
|
|
482
|
+
parts.push('Your runtime is sandboxed — the brainclaw MCP server is NOT reachable and `git commit` is unavailable (.git is outside the sandbox root). Any `bclaw_*` MCP instruction above does NOT apply to you. Report your outcome via the FILE protocol only — it is authoritative for this run:');
|
|
483
|
+
const asgn = options.assignmentId ?? '<assignment_id>';
|
|
484
|
+
parts.push(`- When done, write LANE-RESULT.json at the worktree root: {"assignment_id":"${asgn}","status":"completed|blocked|failed","summary":"<what you did>","files_changed":["..."]}.`);
|
|
485
|
+
parts.push('- Capture decisions/traps as candidate JSON under .brainclaw/coordination/inbox/ (the coordinator harvests them).');
|
|
486
|
+
parts.push('- Do NOT call bclaw_* tools — they are unavailable here. The coordinator harvests your result and integrates/commits it.');
|
|
487
|
+
parts.push('');
|
|
488
|
+
}
|
|
440
489
|
// Codex-specific constraints: focus and speed guidance for sandboxed runs
|
|
441
490
|
if (options.agent === 'codex') {
|
|
442
491
|
parts.push('## Constraints');
|
|
443
492
|
parts.push('- Focus on specified files only — do not explore the broader codebase');
|
|
444
493
|
parts.push('- Produce output quickly; if blocked, capture as trap candidate and move on');
|
|
445
|
-
parts.push('- Sandbox blocks MCP writes: use filesystem writes for candidates, coordinator harvests');
|
|
446
494
|
parts.push('');
|
|
447
495
|
}
|
|
448
496
|
return parts.join('\n');
|
|
@@ -126,6 +126,42 @@ export function listEntities(name, cwd, filter = {}) {
|
|
|
126
126
|
const paged = applyPaging(filtered, filter);
|
|
127
127
|
return { entity: name, total: filtered.length, items: paged };
|
|
128
128
|
}
|
|
129
|
+
/** Default serialized-items budget (chars) — keeps a bclaw_find payload well under the ~25k-token MCP cap (trp#449). */
|
|
130
|
+
export const DEFAULT_FIND_CHAR_BUDGET = 40000;
|
|
131
|
+
/**
|
|
132
|
+
* pln#491 — bound a list payload so a verbose result set never overflows the MCP
|
|
133
|
+
* token cap (which makes agents silently fall back to the CLI, trp#449).
|
|
134
|
+
* `listEntities` already caps COUNT (default 50 via applyPaging); this additionally
|
|
135
|
+
* caps SIZE: if the serialized items exceed `charBudget`, the page is shrunk until
|
|
136
|
+
* it fits (always keeping at least one item). Either way the result advertises
|
|
137
|
+
* has_more / next_offset / a hint so the caller paginates explicitly instead of
|
|
138
|
+
* guessing or falling back to the terminal.
|
|
139
|
+
*/
|
|
140
|
+
export function boundListResult(result, offset, charBudget = DEFAULT_FIND_CHAR_BUDGET) {
|
|
141
|
+
let items = result.items;
|
|
142
|
+
let omittedForSize = 0;
|
|
143
|
+
while (items.length > 1 && JSON.stringify(items).length > charBudget) {
|
|
144
|
+
const drop = Math.max(1, Math.ceil(items.length * 0.25));
|
|
145
|
+
items = items.slice(0, items.length - drop);
|
|
146
|
+
omittedForSize = result.items.length - items.length;
|
|
147
|
+
}
|
|
148
|
+
const returned = items.length;
|
|
149
|
+
const hasMore = offset + returned < result.total;
|
|
150
|
+
const bounded = {
|
|
151
|
+
...result,
|
|
152
|
+
items,
|
|
153
|
+
returned,
|
|
154
|
+
has_more: hasMore,
|
|
155
|
+
...(omittedForSize > 0 ? { omitted_for_size: omittedForSize } : {}),
|
|
156
|
+
};
|
|
157
|
+
if (hasMore) {
|
|
158
|
+
bounded.next_offset = offset + returned;
|
|
159
|
+
bounded.hint = omittedForSize > 0
|
|
160
|
+
? `Payload size-bounded: returned ${returned} of ${result.total} ${result.entity} item(s). Fetch more with filter.offset=${bounded.next_offset}, or narrow the filter (status/tag/author).`
|
|
161
|
+
: `Returned ${returned} of ${result.total} ${result.entity} item(s). Page with filter.offset=${bounded.next_offset}, or narrow the filter.`;
|
|
162
|
+
}
|
|
163
|
+
return bounded;
|
|
164
|
+
}
|
|
129
165
|
function loadAll(name, cwd) {
|
|
130
166
|
switch (name) {
|
|
131
167
|
case 'plan': return loadState(cwd).plan_items;
|
|
@@ -23,7 +23,7 @@ const plan = {
|
|
|
23
23
|
name: 'plan',
|
|
24
24
|
shortLabelPrefix: 'pln',
|
|
25
25
|
schema: PlanItemSchema,
|
|
26
|
-
updatable: ['text', 'priority', 'tags', 'assignee', 'estimated_effort', 'actual_effort', 'depends_on'],
|
|
26
|
+
updatable: ['text', 'priority', 'tags', 'assignee', 'estimated_effort', 'actual_effort', 'depends_on', 'related_paths'],
|
|
27
27
|
statusField: 'status',
|
|
28
28
|
transitions: {
|
|
29
29
|
todo: ['in_progress', 'blocked', 'done', 'dropped'],
|
|
@@ -237,7 +237,7 @@ function renderSessionProtocol() {
|
|
|
237
237
|
'',
|
|
238
238
|
'Do NOT call `bclaw_loop(intent=open)` directly — it creates a loop structure without dispatch, so the reviewer/participant never gets the work. Use the goal entries above.',
|
|
239
239
|
'',
|
|
240
|
-
'_How to verify a dispatch actually worked:_ `execution_status="delivered_and_started"` only means the brief-ack sentinel was touched — it does NOT mean the worker is doing useful work.
|
|
240
|
+
'_How to verify a dispatch actually worked:_ `execution_status="delivered_and_started"` only means the brief-ack sentinel was touched — it does NOT mean the worker is doing useful work. (1) Call `bclaw_dispatch_status(target_id=<asgn_…|clm_…|lop_…|run_…>)` — the purpose-built facade: it resolves the linked entities, reads the runtime sentinels (`ack` / `heartbeat` / `completed` / `failed`) and the captured stdout/stderr tails, checks pid liveness, and returns a single health verdict plus a recommended next action. This is the `verify_with` target named in the coordinate/dispatch response — prefer it over assembling the picture by hand. (2) Do NOT diagnose liveness from the tracked pid yourself: on Windows an ack-wrapped spawn runs under a `cmd.exe` shell, so `agent_run.pid` is the wrapper (which exits early by design), NOT the real worker — `Get-Process -Id <pid>` reads it dead while the worker is alive and committing. Trust the sentinel-derived verdict instead; the reconciler already infers `completed` from a post-start commit on the worktree branch even when the worker never called `bclaw_assignment_update`. (3) Fallback only if the facade is unavailable: `bclaw_find(entity="agent_run", filter={assignment_id})` plus the captured streams at `.brainclaw/coordination/runtime/log/<assignment_id>.{stdout,stderr}.log` — note that `claude -p` buffers stdout until exit, so an empty log mid-run is expected; use the `heartbeat` sentinel as the live progress signal, not stdout. Full FSM tables + diagnostic decision tree in `docs/concepts/dispatch-lifecycle.md`.',
|
|
241
241
|
].join('\n');
|
|
242
242
|
}
|
|
243
243
|
function renderUserWorkflow() {
|
|
@@ -99,4 +99,76 @@ export function readLogTail(root, assignmentId, stream, maxBytes = 2000) {
|
|
|
99
99
|
return '';
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
|
+
/**
|
|
103
|
+
* pln#527 — directories never worth walking for filesystem-activity (junction
|
|
104
|
+
* targets / VCS / coordination store). Skipping them keeps the worktree mtime
|
|
105
|
+
* scan cheap AND avoids following node_modules/dist junctions into the main repo.
|
|
106
|
+
*/
|
|
107
|
+
const FS_ACTIVITY_SKIP_DIRS = new Set(['.git', '.brainclaw', 'node_modules', 'dist', '.venv', 'venv', 'vendor']);
|
|
108
|
+
/**
|
|
109
|
+
* pln#527 — most-recent file mtime (ms) under a worktree, via a bounded walk that
|
|
110
|
+
* NEVER follows symlinks/junctions (lstat) and skips dependency/VCS dirs. This is
|
|
111
|
+
* the liveness signal for workers that edit files but emit no heartbeat/stdout
|
|
112
|
+
* (e.g. `claude -p` buffers stdout; a long single edit pass refreshes no
|
|
113
|
+
* sentinel). Returns undefined when the path is absent/unreadable.
|
|
114
|
+
*/
|
|
115
|
+
export function latestWorktreeFileMtimeMs(worktreePath, maxDepth = 4) {
|
|
116
|
+
let latest;
|
|
117
|
+
const walk = (dir, depth) => {
|
|
118
|
+
if (depth > maxDepth)
|
|
119
|
+
return;
|
|
120
|
+
let entries;
|
|
121
|
+
try {
|
|
122
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
for (const entry of entries) {
|
|
128
|
+
if (entry.isSymbolicLink())
|
|
129
|
+
continue; // never follow junctions (node_modules/dist)
|
|
130
|
+
const full = path.join(dir, entry.name);
|
|
131
|
+
if (entry.isDirectory()) {
|
|
132
|
+
if (FS_ACTIVITY_SKIP_DIRS.has(entry.name))
|
|
133
|
+
continue;
|
|
134
|
+
walk(full, depth + 1);
|
|
135
|
+
}
|
|
136
|
+
else if (entry.isFile()) {
|
|
137
|
+
try {
|
|
138
|
+
const m = fs.statSync(full).mtimeMs;
|
|
139
|
+
if (latest === undefined || m > latest)
|
|
140
|
+
latest = m;
|
|
141
|
+
}
|
|
142
|
+
catch { /* ignore */ }
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
walk(worktreePath, 0);
|
|
147
|
+
return latest;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* pln#527 — the most recent filesystem activity (ms since epoch) attributable to
|
|
151
|
+
* a dispatched run: the max mtime across its captured stdout/stderr logs AND any
|
|
152
|
+
* file in its worktree. Lets the reconciler / dispatch_status distinguish
|
|
153
|
+
* "no heartbeat BUT fs active" (working — e.g. codex streaming to stderr, or
|
|
154
|
+
* claude -p editing files) from "no heartbeat AND fs inert" (genuinely stalled),
|
|
155
|
+
* fixing the false-`stalled` verdict (debrief LeaseUp P1#1). Returns undefined
|
|
156
|
+
* when nothing is observable.
|
|
157
|
+
*/
|
|
158
|
+
export function latestActivityMs(root, assignmentId, worktreePath) {
|
|
159
|
+
let latest;
|
|
160
|
+
const bump = (ms) => {
|
|
161
|
+
if (ms !== undefined && (latest === undefined || ms > latest))
|
|
162
|
+
latest = ms;
|
|
163
|
+
};
|
|
164
|
+
for (const stream of ['stdout', 'stderr']) {
|
|
165
|
+
try {
|
|
166
|
+
bump(fs.statSync(getRuntimeLogPath(root, assignmentId, stream)).mtimeMs);
|
|
167
|
+
}
|
|
168
|
+
catch { /* no log */ }
|
|
169
|
+
}
|
|
170
|
+
if (worktreePath)
|
|
171
|
+
bump(latestWorktreeFileMtimeMs(worktreePath));
|
|
172
|
+
return latest;
|
|
173
|
+
}
|
|
102
174
|
//# sourceMappingURL=runtime-signals.js.map
|