baxian 1.2.21 → 1.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/manager.d.ts +8 -0
- package/dist/agent/manager.d.ts.map +1 -1
- package/dist/agent/manager.js +311 -27
- package/dist/agent/manager.js.map +1 -1
- package/dist/agent/phase-signal-watcher.d.ts +6 -0
- package/dist/agent/phase-signal-watcher.d.ts.map +1 -1
- package/dist/agent/phase-signal-watcher.js +62 -0
- package/dist/agent/phase-signal-watcher.js.map +1 -1
- package/dist/agent/phase-signal.d.ts +4 -1
- package/dist/agent/phase-signal.d.ts.map +1 -1
- package/dist/agent/phase-signal.js +2 -1
- package/dist/agent/phase-signal.js.map +1 -1
- package/dist/agent/prompt.d.ts +2 -1
- package/dist/agent/prompt.d.ts.map +1 -1
- package/dist/agent/prompt.js +152 -278
- package/dist/agent/prompt.js.map +1 -1
- package/dist/agent/tmux.d.ts +1 -0
- package/dist/agent/tmux.d.ts.map +1 -1
- package/dist/agent/tmux.js +42 -10
- package/dist/agent/tmux.js.map +1 -1
- package/dist/api/projects.js +9 -2
- package/dist/api/projects.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/shared/constants.js +5 -5
- package/dist/shared/constants.js.map +1 -1
- package/dist/skill/registry.d.ts +2 -0
- package/dist/skill/registry.d.ts.map +1 -1
- package/dist/skill/registry.js +16 -0
- package/dist/skill/registry.js.map +1 -1
- package/dist/skills/baxian-greeting/SKILL.md +15 -0
- package/dist/skills/baxian-greeting/agents/openai.yaml +2 -0
- package/dist/skills/baxian-pr-feedback/SKILL.md +20 -3
- package/dist/skills/baxian-pr-recheck/SKILL.md +11 -11
- package/dist/skills/baxian-pr-review/SKILL.md +11 -11
- package/dist/skills/baxian-server-feedback/SKILL.md +38 -0
- package/dist/skills/baxian-server-feedback/agents/openai.yaml +2 -0
- package/dist/skills/baxian-server-review/SKILL.md +55 -0
- package/dist/skills/baxian-server-review/agents/openai.yaml +2 -0
- package/dist/skills/baxian-signals/SKILL.md +11 -5
- package/dist/skills/baxian-task-check/SKILL.md +35 -20
- package/dist/web/assets/index-B3nBJsTG.js +4 -0
- package/dist/web/index.html +1 -1
- package/package.json +1 -1
- package/dist/web/assets/index-C9dvXS8C.js +0 -4
package/dist/agent/manager.js
CHANGED
|
@@ -13,7 +13,7 @@ import { WorktreeManager } from './worktree.js';
|
|
|
13
13
|
import { RepoStore, createRepoStoreCache } from './repo-store.js';
|
|
14
14
|
import { PhaseSignalWatcher } from './phase-signal-watcher.js';
|
|
15
15
|
import { ReviewTransport } from './review-transport.js';
|
|
16
|
-
import { buildPromptInline, buildPostMergeCleanupPrompt, PromptSizeError, RequiredSkillsMissingError, MAX_PROMPT_BYTES_ROUTE_LIMIT, } from './prompt.js';
|
|
16
|
+
import { buildPromptInline, buildGreetingPrompt, buildPostMergeCleanupPrompt, PromptSizeError, RequiredSkillsMissingError, MAX_PROMPT_BYTES_ROUTE_LIMIT, } from './prompt.js';
|
|
17
17
|
import { ApiError } from '../errors.js';
|
|
18
18
|
import { prepareConfig } from '../config/loader.js';
|
|
19
19
|
export class EnsureSessionError extends Error {
|
|
@@ -53,7 +53,7 @@ export function buildLaunchCommand(agent) {
|
|
|
53
53
|
const segments = [];
|
|
54
54
|
switch (agent.runtime) {
|
|
55
55
|
case 'claude-code':
|
|
56
|
-
segments.push('env CLAUDE_CODE_NO_FLICKER=1 claude --permission-mode bypassPermissions');
|
|
56
|
+
segments.push('env CLAUDE_CODE_NO_FLICKER=1 CLAUDE_CODE_DISABLE_FEEDBACK_SURVEY=1 claude --permission-mode bypassPermissions');
|
|
57
57
|
break;
|
|
58
58
|
case 'codex':
|
|
59
59
|
segments.push('codex --dangerously-bypass-approvals-and-sandbox');
|
|
@@ -123,10 +123,16 @@ function cancelPhaseDowngrades(prev, next) {
|
|
|
123
123
|
// A prompt line still holding the typed `/clear` (e.g. `❯ /clear`, `› /clear`) = the Enter was swallowed,
|
|
124
124
|
// so /clear was never submitted. After a real submission /clear wipes the screen and the composer is empty.
|
|
125
125
|
const CLEAR_PENDING_IN_COMPOSER_RE = /(?:^|\n)[ \t]*[❯>›→][ \t]*\/clear\b/;
|
|
126
|
+
// A greeting capability failure is NOT cleared by a plain Resume or by recover()'s auto-release:
|
|
127
|
+
// the agent must re-prove it can signal (restart/retry re-runs the handshake). Auto-releasing it
|
|
128
|
+
// would slip an unverified agent back to dispatchable, defeating the whole bootstrap gate.
|
|
129
|
+
const REGREET_REQUIRED_HOLD_PHASES = new Set(['greeting_failed']);
|
|
126
130
|
// Resume / recover 共用:决定 Held agent 的 binding 是否随状态恢复一起清掉。
|
|
127
131
|
export function shouldReleaseHeldBinding(state, boundTask) {
|
|
128
132
|
if (state.awaitingPhase != null && UNCLEARED_PANE_PHASES.has(state.awaitingPhase))
|
|
129
133
|
return false;
|
|
134
|
+
if (state.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(state.awaitingPhase))
|
|
135
|
+
return false;
|
|
130
136
|
const taskIsTerminal = !!boundTask && TERMINAL_STATUSES.includes(boundTask.status);
|
|
131
137
|
const turnCompleted = state.awaitingPhase != null && TURN_COMPLETED_AWAITING_PHASES.has(state.awaitingPhase);
|
|
132
138
|
return !boundTask || taskIsTerminal || turnCompleted;
|
|
@@ -167,6 +173,9 @@ export class AgentManager {
|
|
|
167
173
|
platformRunner;
|
|
168
174
|
imageStagingRoot;
|
|
169
175
|
bootstrapTimeoutsMs;
|
|
176
|
+
// Bootstrap greeting handshake: total attempts before holding the agent, to absorb a
|
|
177
|
+
// single transient slow/garbled reply without failing a genuinely capable agent.
|
|
178
|
+
greetingMaxAttempts = 2;
|
|
170
179
|
runtimeMenuWatchers = new Map();
|
|
171
180
|
runtimeMenuPollIntervalMs = 10_000;
|
|
172
181
|
compactIdleWaitMs = 5 * 60_000;
|
|
@@ -228,6 +237,7 @@ export class AgentManager {
|
|
|
228
237
|
this.bootstrapTimeoutsMs = {
|
|
229
238
|
trustDialog: deps.bootstrapTimeoutsMs?.trustDialog ?? 10_000,
|
|
230
239
|
waitReplReady: deps.bootstrapTimeoutsMs?.waitReplReady ?? 30_000,
|
|
240
|
+
greeting: deps.bootstrapTimeoutsMs?.greeting ?? 120_000,
|
|
231
241
|
};
|
|
232
242
|
}
|
|
233
243
|
withTaskLock(fn) {
|
|
@@ -583,6 +593,22 @@ export class AgentManager {
|
|
|
583
593
|
};
|
|
584
594
|
try {
|
|
585
595
|
const result = await this.ensureSession(agentId, 'create');
|
|
596
|
+
// Capability gate: hold the agent until it proves (via the baxian-signals skill)
|
|
597
|
+
// that it can load skills and echo a valid greeting signal back through its pane.
|
|
598
|
+
// A non-greeting agent that reached 'ok' would silently hang on its first real signal.
|
|
599
|
+
if (!(await this.runGreetingHandshake(agentId, cfgAtStart, result.paneId))) {
|
|
600
|
+
// A newer create may have rotated creationToken during the (slow) greeting wait.
|
|
601
|
+
// Mirror the success token-mismatch path: kill the orphan session we created so the
|
|
602
|
+
// next generation's `create` doesn't trip on a pre-existing tmux session.
|
|
603
|
+
const current = await this.agentStore.get(agentId);
|
|
604
|
+
if (current && current.creationToken !== creationToken) {
|
|
605
|
+
console.warn(`[bootstrap] ${agentId} creationToken changed during greeting — killing orphan session`);
|
|
606
|
+
await tryKillOrphanSession('greeting-failure token mismatch');
|
|
607
|
+
return;
|
|
608
|
+
}
|
|
609
|
+
await this.markGreetingFailed(agentId, creationToken);
|
|
610
|
+
return;
|
|
611
|
+
}
|
|
586
612
|
let resolvedExisting = null;
|
|
587
613
|
const now = new Date().toISOString();
|
|
588
614
|
await this.agentStore.update(agentId, (existing) => {
|
|
@@ -635,10 +661,144 @@ export class AgentManager {
|
|
|
635
661
|
await this.markBootstrapFailed(agentId, creationToken, message);
|
|
636
662
|
}
|
|
637
663
|
}
|
|
664
|
+
// Bootstrap capability handshake: inject the greeting prompt and wait for the agent to
|
|
665
|
+
// echo [bx:greeting:<token>] per the baxian-signals skill. Returns true on a verified
|
|
666
|
+
// echo, false on timeout / lost session across all attempts. No task binding exists yet,
|
|
667
|
+
// so this drives the low-level inject + the pane-scoped awaitOnce directly.
|
|
668
|
+
async runGreetingHandshake(agentId, agent, paneId) {
|
|
669
|
+
const watcher = this.phaseSignalWatcher;
|
|
670
|
+
if (!watcher)
|
|
671
|
+
return true; // no watcher wired (minimal harness) — nothing to gate on
|
|
672
|
+
const tmux = new TmuxManager(this.createRunnerFor(agent));
|
|
673
|
+
for (let attempt = 1; attempt <= this.greetingMaxAttempts; attempt++) {
|
|
674
|
+
const token = createSignalToken();
|
|
675
|
+
try {
|
|
676
|
+
// Inject FIRST, then arm the wait: if the paste fails the agent never sees the
|
|
677
|
+
// prompt and cannot echo, so skip the (default 120s) wait entirely and retry.
|
|
678
|
+
await this.injectAndAwaitAckSteps(tmux, paneId, buildGreetingPrompt(token, agent.runtime), agentId, agent.runtime);
|
|
679
|
+
}
|
|
680
|
+
catch (err) {
|
|
681
|
+
console.warn(`[bootstrap] greeting inject failed for ${agentId} (attempt ${attempt}):`, err);
|
|
682
|
+
// ack_unknown = injectAndAwaitAckSteps could NOT confirm the composer was cleared, so the
|
|
683
|
+
// next paste would land on a live/unconfirmed input stream. Hold rather than concatenate.
|
|
684
|
+
// A raw (non-ack_unknown) throw means the composer was already C-c'd → safe to retry.
|
|
685
|
+
if (err instanceof DispatchTerminalError && err.reason === 'ack_unknown')
|
|
686
|
+
break;
|
|
687
|
+
continue;
|
|
688
|
+
}
|
|
689
|
+
const outcome = await watcher.awaitOnce({
|
|
690
|
+
agentId,
|
|
691
|
+
kind: 'greeting',
|
|
692
|
+
token,
|
|
693
|
+
timeoutMs: this.bootstrapTimeoutsMs.greeting,
|
|
694
|
+
});
|
|
695
|
+
if (outcome === 'matched')
|
|
696
|
+
return true;
|
|
697
|
+
console.warn(`[bootstrap] greeting attempt ${attempt}/${this.greetingMaxAttempts} for ${agentId}: ${outcome}`);
|
|
698
|
+
// 'no-agent' = config removed, unrecoverable. 'timeout'/'session-gone' (incl. a transient
|
|
699
|
+
// subscribe fault disguised as session-gone) keep the remaining retries — a one-off pane
|
|
700
|
+
// jitter must not fail a genuinely capable agent.
|
|
701
|
+
if (outcome === 'no-agent')
|
|
702
|
+
break;
|
|
703
|
+
// An ack-timeout paste returns acked:false and leaves the unsubmitted greeting prompt in the
|
|
704
|
+
// composer; the next injectPrompt would concatenate onto it. Clear it before retrying — if the
|
|
705
|
+
// composer can't be confirmed clean, hold rather than paste onto a dirty/unsafe one.
|
|
706
|
+
if (attempt < this.greetingMaxAttempts && !(await this.clearComposerForReuse(tmux, paneId, agentId))) {
|
|
707
|
+
break;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
return false;
|
|
711
|
+
}
|
|
712
|
+
// Greeting failed: hold the agent for a human (awaiting_human → not dispatchable) with a
|
|
713
|
+
// reason that names the capability gap. Clearing creationToken drops the "starting" pill;
|
|
714
|
+
// the operator fixes the runtime and restarts (re-greets) or Resumes to override.
|
|
715
|
+
async markGreetingFailed(agentId, creationToken) {
|
|
716
|
+
const existing = await this.agentStore.get(agentId);
|
|
717
|
+
if (!existing)
|
|
718
|
+
return;
|
|
719
|
+
if (creationToken !== undefined && existing.creationToken !== creationToken)
|
|
720
|
+
return;
|
|
721
|
+
const now = new Date().toISOString();
|
|
722
|
+
const reason = 'Greeting capability check failed: the agent did not echo a valid [bx:greeting] signal ' +
|
|
723
|
+
'per the baxian-signals skill within the timeout. Its runtime/model may not meet baxian ' +
|
|
724
|
+
'requirements (skill loading or pane signalling). Fix the runtime, then restart-repl or ' +
|
|
725
|
+
'retry the agent to re-run the check (Resume will not clear it — capability must be re-proven).';
|
|
726
|
+
let wrote = false;
|
|
727
|
+
await this.agentStore.update(agentId, (fresh) => {
|
|
728
|
+
if (!fresh)
|
|
729
|
+
return AGENT_STORE_NOOP;
|
|
730
|
+
if (creationToken !== undefined && fresh.creationToken !== creationToken)
|
|
731
|
+
return AGENT_STORE_NOOP;
|
|
732
|
+
wrote = true;
|
|
733
|
+
return {
|
|
734
|
+
...fresh,
|
|
735
|
+
creationToken: undefined,
|
|
736
|
+
status: 'awaiting_human',
|
|
737
|
+
awaitingPhase: 'greeting_failed',
|
|
738
|
+
awaitingReason: reason,
|
|
739
|
+
awaitingSince: now,
|
|
740
|
+
updatedAt: now,
|
|
741
|
+
};
|
|
742
|
+
});
|
|
743
|
+
if (!wrote)
|
|
744
|
+
return;
|
|
745
|
+
await this.safeEmit({
|
|
746
|
+
id: '',
|
|
747
|
+
type: 'human.intervention',
|
|
748
|
+
timestamp: now,
|
|
749
|
+
projectId: existing.projectId,
|
|
750
|
+
agentId,
|
|
751
|
+
data: { phase: 'greeting_failed', reason },
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
// Operator restart-repl/retry recovery for a greeting_failed agent: re-run the handshake on
|
|
755
|
+
// the freshly-restarted REPL. Only a passing greeting clears the hold; a failure re-holds it.
|
|
756
|
+
// Returns true when it took ownership of a greeting_failed agent (caller skips its normal clear).
|
|
757
|
+
async regreetHeldAgent(agentId) {
|
|
758
|
+
const agent = this.getAgentConfig(agentId);
|
|
759
|
+
if (!agent)
|
|
760
|
+
return false;
|
|
761
|
+
const state = await this.agentStore.get(agentId);
|
|
762
|
+
if (state?.awaitingPhase !== 'greeting_failed')
|
|
763
|
+
return false;
|
|
764
|
+
// Identity of THIS hold: a greeting_failed binding carries no creationToken, so a DELETE+recreate
|
|
765
|
+
// during the (slow, up to 2× timeout) handshake is detected via awaitingSince — a stale regreet
|
|
766
|
+
// must never write onto the recreated generation.
|
|
767
|
+
const guardSince = state.awaitingSince;
|
|
768
|
+
let paneId = state.paneId;
|
|
769
|
+
if (!paneId) {
|
|
770
|
+
try {
|
|
771
|
+
paneId = await new TmuxManager(this.createRunnerFor(agent)).getSinglePaneId(agentId);
|
|
772
|
+
}
|
|
773
|
+
catch (err) {
|
|
774
|
+
console.warn(`[regreet] cannot resolve pane for ${agentId}:`, err);
|
|
775
|
+
return true; // leave it held; operator can restart/retry again
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
if (!(await this.runGreetingHandshake(agentId, agent, paneId))) {
|
|
779
|
+
// Failed → leave the existing hold untouched. Do NOT re-write it: an unguarded write could land
|
|
780
|
+
// on a DELETE+recreated generation that reused this id. Operator can restart/retry again.
|
|
781
|
+
return true;
|
|
782
|
+
}
|
|
783
|
+
// Passed → clear the hold, but only if this exact greeting_failed generation is still present.
|
|
784
|
+
await this.agentStore.update(agentId, (fresh) => {
|
|
785
|
+
if (!fresh || fresh.awaitingPhase !== 'greeting_failed' || fresh.awaitingSince !== guardSince) {
|
|
786
|
+
return AGENT_STORE_NOOP;
|
|
787
|
+
}
|
|
788
|
+
const { status: _s, awaitingPhase: _ap, awaitingReason: _ar, awaitingSince: _as, ...ready } = fresh;
|
|
789
|
+
return { ...ready, updatedAt: new Date().toISOString() };
|
|
790
|
+
});
|
|
791
|
+
return true;
|
|
792
|
+
}
|
|
638
793
|
async markDialogPending(agentId, creationToken, opts = {}) {
|
|
639
794
|
const existing = await this.agentStore.get(agentId);
|
|
640
795
|
if (!existing)
|
|
641
796
|
return;
|
|
797
|
+
// A greeting capability failure must not be downgraded into a dialog-resolvable hold: doing so
|
|
798
|
+
// would let Resume release a never-re-greeted agent. Keep the greeting_failed hold; restart/retry
|
|
799
|
+
// re-runs the handshake.
|
|
800
|
+
if (existing.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(existing.awaitingPhase))
|
|
801
|
+
return;
|
|
642
802
|
// runtime path snapshot 全空时直接拒绝——既无 paneId 也无 taskId 作 generation 证据,
|
|
643
803
|
// 旧 callback 通过 guard 污染同样 idle 的新 agent 的风险无法排除。
|
|
644
804
|
if (opts.runtimePath && opts.expectedPaneId === undefined && opts.expectedTaskId === undefined) {
|
|
@@ -676,6 +836,8 @@ export class AgentManager {
|
|
|
676
836
|
await this.agentStore.update(agentId, (fresh) => {
|
|
677
837
|
if (!fresh)
|
|
678
838
|
return AGENT_STORE_NOOP;
|
|
839
|
+
if (fresh.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(fresh.awaitingPhase))
|
|
840
|
+
return AGENT_STORE_NOOP;
|
|
679
841
|
if (opts.runtimePath) {
|
|
680
842
|
if (fresh.creationToken !== undefined)
|
|
681
843
|
return AGENT_STORE_NOOP;
|
|
@@ -924,11 +1086,24 @@ export class AgentManager {
|
|
|
924
1086
|
// 推 failed)+ lock 在;ready 后切到 'agent_dialog_resolved_runtime' phase,让 resumeAgent 放行
|
|
925
1087
|
// 让 operator 显式确认。仍保留 awaiting_human + lock 防止"dialog ready 自动派下一 task 撞 pane"。
|
|
926
1088
|
const isBootstrapPath = creationToken !== undefined;
|
|
1089
|
+
// A create bootstrap that was blocked on a startup dialog still owes the greeting gate:
|
|
1090
|
+
// now that the dialog is dismissed and the REPL is ready, run it before clearing to 'ok',
|
|
1091
|
+
// else a dialog-resolved agent would reach the dispatch pool without proving capability.
|
|
1092
|
+
if (isBootstrapPath && !(await this.runGreetingHandshake(agentId, cfg, paneId))) {
|
|
1093
|
+
await this.markGreetingFailed(agentId, creationToken);
|
|
1094
|
+
return;
|
|
1095
|
+
}
|
|
927
1096
|
await this.agentStore.update(agentId, (fresh) => {
|
|
928
1097
|
if (!fresh)
|
|
929
1098
|
return AGENT_STORE_NOOP;
|
|
930
1099
|
if (generationMismatch(fresh))
|
|
931
1100
|
return AGENT_STORE_NOOP;
|
|
1101
|
+
// A greeting capability hold must not be downgraded to a dialog-resolvable phase here (the
|
|
1102
|
+
// runtime branch would otherwise rewrite it to agent_dialog_resolved_runtime, which Resume
|
|
1103
|
+
// then releases un-regreeted). Preserve it; restart/retry's regreet is its recovery path.
|
|
1104
|
+
if (fresh.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(fresh.awaitingPhase)) {
|
|
1105
|
+
return AGENT_STORE_NOOP;
|
|
1106
|
+
}
|
|
932
1107
|
projectIdForEmit = fresh.projectId;
|
|
933
1108
|
wrote = true;
|
|
934
1109
|
if (isBootstrapPath) {
|
|
@@ -1423,16 +1598,17 @@ export class AgentManager {
|
|
|
1423
1598
|
const result = await this.withTaskLock(async () => {
|
|
1424
1599
|
const state = await this.agentStore.get(agentId);
|
|
1425
1600
|
if (!state)
|
|
1426
|
-
return { resumed: false, releasedBinding: false };
|
|
1601
|
+
return { resumed: false, releasedBinding: false, reason: 'Agent state not found.' };
|
|
1427
1602
|
if (state.status !== 'awaiting_human') {
|
|
1428
|
-
return { resumed: false, releasedBinding: false };
|
|
1603
|
+
return { resumed: false, releasedBinding: false, reason: 'Agent is not awaiting human; nothing to resume.' };
|
|
1429
1604
|
}
|
|
1430
1605
|
// creationToken 仍 set = bootstrap dialog 仍未解决。Resume 不能让它"继续"——
|
|
1431
1606
|
// dialog 在 pane 里需要 operator 通过 web terminal 处理,slowPoll 解决后自动清状态。
|
|
1432
1607
|
// 如果 operator 想放弃这个 agent,应该走 DELETE 路径。
|
|
1433
1608
|
if (state.creationToken) {
|
|
1434
|
-
|
|
1435
|
-
|
|
1609
|
+
const reason = 'Bootstrap dialog still unresolved; resolve it via the web terminal or DELETE the agent.';
|
|
1610
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} still has creationToken — ${reason}`);
|
|
1611
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1436
1612
|
}
|
|
1437
1613
|
const boundTask = state.taskId ? await this.taskStore.get(state.taskId) : null;
|
|
1438
1614
|
// "prompt 可能仍在 pane 中跑"类 phase + bound task 仍 active 时 refuse:Resume 让
|
|
@@ -1448,8 +1624,9 @@ export class AgentManager {
|
|
|
1448
1624
|
if (state.awaitingPhase != null
|
|
1449
1625
|
&& PROMPT_MAYBE_RUNNING_PHASES.has(state.awaitingPhase)
|
|
1450
1626
|
&& boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
|
|
1451
|
-
|
|
1452
|
-
|
|
1627
|
+
const reason = `Prompt may still be running (${state.awaitingPhase}); Resume is blocked until the task outcome arrives. Cancel task ${state.taskId} or DELETE the agent to recover.`;
|
|
1628
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} — ${reason}`);
|
|
1629
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1453
1630
|
}
|
|
1454
1631
|
// agent_dialog_pending: pane 仍卡 startup dialog,REPL 未 ready。Resume 让
|
|
1455
1632
|
// shouldReleaseHeldBinding 看 task terminal/missing 放行后会清 binding/lock,下一次
|
|
@@ -1458,8 +1635,9 @@ export class AgentManager {
|
|
|
1458
1635
|
// agent_dialog_resolved_runtime(Resume 放行)或 bootstrap path 直接清 Held → status='ok',
|
|
1459
1636
|
// 或 DELETE agent。
|
|
1460
1637
|
if (state.awaitingPhase === 'agent_dialog_pending') {
|
|
1461
|
-
|
|
1462
|
-
|
|
1638
|
+
const reason = 'Startup dialog still pending; Resume cannot dismiss it. Dismiss the dialog via the web terminal (baxian will auto-resume) or DELETE the agent.';
|
|
1639
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} — ${reason}`);
|
|
1640
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1463
1641
|
}
|
|
1464
1642
|
// agent_dialog_resolved_runtime + active task:正常路径下 handleDialogPendingFromRuntime
|
|
1465
1643
|
// 已 fail task → boundTask 应 terminal。bound task 仍 active 表示 crash window
|
|
@@ -1468,8 +1646,9 @@ export class AgentManager {
|
|
|
1468
1646
|
// refuse Resume,提示 operator 显式 cancel task 或 DELETE agent。
|
|
1469
1647
|
if (state.awaitingPhase === 'agent_dialog_resolved_runtime'
|
|
1470
1648
|
&& boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
|
|
1471
|
-
|
|
1472
|
-
|
|
1649
|
+
const reason = `Dialog resolved but task ${state.taskId} is still active and its prompt was never injected; Resume would strand it. Cancel the task or DELETE the agent.`;
|
|
1650
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} — ${reason}`);
|
|
1651
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1473
1652
|
}
|
|
1474
1653
|
// code-dispatch-failed: the code-phase prompt never reached the pane (spec
|
|
1475
1654
|
// approval already transitioned the task). Resume = clear the hold AND
|
|
@@ -1499,15 +1678,25 @@ export class AgentManager {
|
|
|
1499
1678
|
// Refuse while the task is active; operator must cancel the task or DELETE the agent to retry.
|
|
1500
1679
|
if (state.awaitingPhase?.startsWith('signal-arm-failed')
|
|
1501
1680
|
&& boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
|
|
1502
|
-
|
|
1503
|
-
|
|
1681
|
+
const reason = `The dispatched prompt's pane signal has no consumer and Resume cannot rebuild the watcher; cancel task ${state.taskId} or DELETE the agent to retry.`;
|
|
1682
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} — ${reason}`);
|
|
1683
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1504
1684
|
}
|
|
1505
1685
|
// Un-cleared pane (cancel mid-clear or /clear unconfirmed): Resume would free + reuse it (terminal
|
|
1506
1686
|
// task → shouldReleaseHeldBinding) and leak the cancelled task's context. Refuse; only DELETE (which
|
|
1507
1687
|
// destroys the pane) is a safe recovery.
|
|
1508
1688
|
if (state.awaitingPhase != null && UNCLEARED_PANE_PHASES.has(state.awaitingPhase)) {
|
|
1509
|
-
|
|
1510
|
-
|
|
1689
|
+
const reason = 'The pane holds un-cleared context from a cancelled task; Resume would leak it into the next task. DELETE the agent to discard it.';
|
|
1690
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} — ${reason}`);
|
|
1691
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1692
|
+
}
|
|
1693
|
+
// A greeting capability failure must be RE-PROVEN, not Resumed away: the default path below
|
|
1694
|
+
// flips status→'ok' regardless of shouldReleaseHeldBinding, which would put an unverified
|
|
1695
|
+
// agent back in the dispatch pool. The recovery path is restart-repl / retry (re-greets).
|
|
1696
|
+
if (state.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(state.awaitingPhase)) {
|
|
1697
|
+
const reason = 'Greeting capability check failed; the runtime must re-prove it. Resume cannot clear this hold — use Restart REPL to re-run the greeting check.';
|
|
1698
|
+
console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} — ${reason}`);
|
|
1699
|
+
return { resumed: false, releasedBinding: false, reason };
|
|
1511
1700
|
}
|
|
1512
1701
|
const now = new Date().toISOString();
|
|
1513
1702
|
const shouldReleaseBinding = shouldReleaseHeldBinding(state, boundTask);
|
|
@@ -1578,7 +1767,11 @@ export class AgentManager {
|
|
|
1578
1767
|
await this.markAwaitingHuman(agentId, 'code-dispatch-failed', 'Code-phase redispatch on Resume failed; Resume again to retry or cancel the task.', { expectedTaskId: result.redispatchCodeTaskId }).catch(() => undefined);
|
|
1579
1768
|
}
|
|
1580
1769
|
}
|
|
1581
|
-
return {
|
|
1770
|
+
return {
|
|
1771
|
+
resumed: result.resumed,
|
|
1772
|
+
releasedBinding: result.releasedBinding,
|
|
1773
|
+
...(result.reason ? { reason: result.reason } : {}),
|
|
1774
|
+
};
|
|
1582
1775
|
}
|
|
1583
1776
|
async resolvePaneId(state, cfg) {
|
|
1584
1777
|
if (state.paneId)
|
|
@@ -1935,15 +2128,51 @@ export class AgentManager {
|
|
|
1935
2128
|
else if (!SHELL.test(cmd)) {
|
|
1936
2129
|
throw new Error(`restart-repl precondition failed: unexpected pane state "${cmd}"`);
|
|
1937
2130
|
}
|
|
2131
|
+
// Re-materialize skills BEFORE the relaunch so the fresh REPL scans the current tree. restart-repl
|
|
2132
|
+
// is the operator's recovery for a greeting_failed agent whose on-disk skill tree was stale/missing,
|
|
2133
|
+
// and unlike retry it does not go through ensureSession's provisionRepoSkills. Best-effort: a
|
|
2134
|
+
// provisioning blip must not block the REPL restart (a still-broken tree surfaces on the next regreet).
|
|
2135
|
+
const project = this.getProjectConfig(cfg.projectId);
|
|
2136
|
+
let workdir;
|
|
2137
|
+
let provisioned = false;
|
|
2138
|
+
if (project) {
|
|
2139
|
+
try {
|
|
2140
|
+
workdir = (await this.ensureWorkdir(cfg, project, runner)).workdir;
|
|
2141
|
+
await this.provisionRepoSkills(runner, cfg, workdir);
|
|
2142
|
+
provisioned = true;
|
|
2143
|
+
}
|
|
2144
|
+
catch (err) {
|
|
2145
|
+
console.warn(`[restart-repl] skill re-provision failed for ${agentId} (continuing):`, err);
|
|
2146
|
+
}
|
|
2147
|
+
}
|
|
1938
2148
|
const runtime = agentRuntimeKindFor(cfg);
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
2149
|
+
const relaunch = async () => {
|
|
2150
|
+
await tmux.sendKeysToPane(paneId, `${buildLaunchCommand(cfg)}\n`);
|
|
2151
|
+
await tmux.handleTrustDialog(paneId, runtime, {
|
|
2152
|
+
timeoutMs: this.bootstrapTimeoutsMs.trustDialog,
|
|
2153
|
+
});
|
|
2154
|
+
await tmux.waitReplReady(paneId, runtime, {
|
|
2155
|
+
timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
|
|
2156
|
+
scrollback: 0,
|
|
2157
|
+
});
|
|
2158
|
+
// Only re-tag when the tree was actually re-provisioned. Tagging after a FAILED provision
|
|
2159
|
+
// would stamp the current version onto a REPL that scanned the stale/missing tree, so the
|
|
2160
|
+
// next ensureSession reads it as fresh and reuses it instead of self-healing (rebuild). A
|
|
2161
|
+
// successful provision DOES need the tag, else ensureSession needlessly kills this REPL and
|
|
2162
|
+
// drops the agent onto a different, ungreeted one.
|
|
2163
|
+
if (provisioned) {
|
|
2164
|
+
await this.tagSessionSkillsVersion(tmux, agentId);
|
|
2165
|
+
}
|
|
2166
|
+
};
|
|
2167
|
+
// Hold the per-skills-dir lock ACROSS the relaunch (like buildFreshSessionLocked) so a concurrent
|
|
2168
|
+
// same-dir agent's provisioning — which transiently removes helper files (agents/openai.yaml) —
|
|
2169
|
+
// can't make this fresh REPL scan an incomplete skill tree.
|
|
2170
|
+
if (workdir !== undefined) {
|
|
2171
|
+
await this.runUnderSkillDirLock(this.skillDirLockKey(cfg, workdir), relaunch);
|
|
2172
|
+
}
|
|
2173
|
+
else {
|
|
2174
|
+
await relaunch();
|
|
2175
|
+
}
|
|
1947
2176
|
await this.agentStore.update(agentId, (state) => {
|
|
1948
2177
|
if (!state)
|
|
1949
2178
|
return AGENT_STORE_NOOP;
|
|
@@ -2075,6 +2304,10 @@ export class AgentManager {
|
|
|
2075
2304
|
agent: cfg,
|
|
2076
2305
|
worktreePath: worktreePathBound,
|
|
2077
2306
|
skillRegistry: this.skillRegistry,
|
|
2307
|
+
// A representative token so the preview exercises the SAME required-skill set (baxian-signals)
|
|
2308
|
+
// and worst-case byte size the real signal-emitting dispatch will build — else a missing
|
|
2309
|
+
// baxian-signals only surfaces async after the task is already created (201).
|
|
2310
|
+
signalToken: 'preview-signal-token',
|
|
2078
2311
|
});
|
|
2079
2312
|
return Buffer.byteLength(fullPrompt, 'utf8');
|
|
2080
2313
|
}
|
|
@@ -3268,6 +3501,15 @@ export class AgentManager {
|
|
|
3268
3501
|
catch { }
|
|
3269
3502
|
return false;
|
|
3270
3503
|
}
|
|
3504
|
+
// Pane exists now but the prompt is not out — arm here so a request it triggers is a live chunk,
|
|
3505
|
+
// not snapshot-suppressed scrollback. Abort cleanly (no binding written yet) if it cannot arm.
|
|
3506
|
+
if (opts.armBeforeInject && !(await opts.armBeforeInject())) {
|
|
3507
|
+
try {
|
|
3508
|
+
await worktree.removeWithBranch(workdir, worktreePath, customBranch);
|
|
3509
|
+
}
|
|
3510
|
+
catch { }
|
|
3511
|
+
return false;
|
|
3512
|
+
}
|
|
3271
3513
|
const now = new Date().toISOString();
|
|
3272
3514
|
let agentMarkedRunning = false;
|
|
3273
3515
|
try {
|
|
@@ -3668,6 +3910,10 @@ export class AgentManager {
|
|
|
3668
3910
|
return false;
|
|
3669
3911
|
}
|
|
3670
3912
|
}
|
|
3913
|
+
// Arm before paste (same reasoning as startSession): pane exists, prompt not out yet.
|
|
3914
|
+
if (opts.armBeforeInject && !(await opts.armBeforeInject())) {
|
|
3915
|
+
return false;
|
|
3916
|
+
}
|
|
3671
3917
|
const now = new Date().toISOString();
|
|
3672
3918
|
await this.agentStore.update(agentId, (latest) => {
|
|
3673
3919
|
if (!latest)
|
|
@@ -3778,6 +4024,31 @@ export class AgentManager {
|
|
|
3778
4024
|
// dialog-pending catch below, so a mid-bootstrap task blocked on a startup dialog isn't held forever.)
|
|
3779
4025
|
if (await this.rollbackUndeliveredBootstrap(state, agentConfig))
|
|
3780
4026
|
continue;
|
|
4027
|
+
// An incomplete create bootstrap (creationToken still set, no task) crashed before it
|
|
4028
|
+
// proved signal capability. Re-run the greeting gate — but in the BACKGROUND: recover() is
|
|
4029
|
+
// awaited before the server serves, and a synchronous handshake would block startup up to
|
|
4030
|
+
// 2×greeting-timeout per such agent (serially). creationToken stays set meanwhile, so
|
|
4031
|
+
// canDispatchWithBinding keeps the agent out of the pool until the handshake resolves.
|
|
4032
|
+
if (state.creationToken && !state.taskId) {
|
|
4033
|
+
const ct = state.creationToken;
|
|
4034
|
+
const pane = result.paneId;
|
|
4035
|
+
const cfg = agentConfig;
|
|
4036
|
+
const agentId = state.id;
|
|
4037
|
+
void (async () => {
|
|
4038
|
+
if (await this.runGreetingHandshake(agentId, cfg, pane)) {
|
|
4039
|
+
await this.agentStore.update(agentId, (latest) => {
|
|
4040
|
+
if (!latest || latest.creationToken !== ct)
|
|
4041
|
+
return AGENT_STORE_NOOP;
|
|
4042
|
+
const { creationToken: _ct, status: _s, awaitingPhase: _ap, awaitingReason: _ar, awaitingSince: _as, ...ready } = latest;
|
|
4043
|
+
return { ...ready, paneId: pane, updatedAt: new Date().toISOString() };
|
|
4044
|
+
});
|
|
4045
|
+
}
|
|
4046
|
+
else {
|
|
4047
|
+
await this.markGreetingFailed(agentId, ct);
|
|
4048
|
+
}
|
|
4049
|
+
})().catch((err) => console.warn(`[recover] background re-greet for ${agentId} crashed:`, err));
|
|
4050
|
+
continue;
|
|
4051
|
+
}
|
|
3781
4052
|
// recover 成功 = server 重启前 dialog_pending 的 agent 现在 REPL ready。
|
|
3782
4053
|
// 处理 Held:与 resumeAgent 共用 shouldReleaseHeldBinding 规则(task terminal/无 task /
|
|
3783
4054
|
// turn-completed phase → 同步清 binding;task active 且 phase 不在 completed 集合 → 保留 binding)。
|
|
@@ -3966,6 +4237,12 @@ export class AgentManager {
|
|
|
3966
4237
|
return AGENT_STORE_NOOP;
|
|
3967
4238
|
if (existing.creationToken)
|
|
3968
4239
|
return AGENT_STORE_NOOP;
|
|
4240
|
+
// A greeting capability hold must survive a transient/real tmux disappearance — wiping it
|
|
4241
|
+
// (it carries a paneId on the dialog path) would slip an unverified agent back into the
|
|
4242
|
+
// dispatch pool. Operator restart/retry re-greets; recover/Resume already preserve it.
|
|
4243
|
+
if (existing.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(existing.awaitingPhase)) {
|
|
4244
|
+
return AGENT_STORE_NOOP;
|
|
4245
|
+
}
|
|
3969
4246
|
timestamp = new Date().toISOString();
|
|
3970
4247
|
projectId = existing.projectId;
|
|
3971
4248
|
hadBinding = !!existing.taskId;
|
|
@@ -5649,6 +5926,9 @@ export class AgentManager {
|
|
|
5649
5926
|
...(opts.priorFindingsJson ? { serverPriorFindings: opts.priorFindingsJson } : {}),
|
|
5650
5927
|
...(opts.priorResponseJson ? { serverPriorResponse: opts.priorResponseJson } : {}),
|
|
5651
5928
|
...(opts.phase === 'spec' ? { currentSpecRound: newRound } : {}),
|
|
5929
|
+
// Arm the verdict + read-file watcher in the pane-exists / pre-paste window so a QA
|
|
5930
|
+
// [bx:read-file:...] emitted during the dispatch is a live chunk, not snapshot-suppressed.
|
|
5931
|
+
armBeforeInject: () => this.setupPhaseSignalWatcher(taskId, qaId, expectedKind, newToken, false, (req) => { void this.handleReadFileRequest(taskId, qaId, req); }),
|
|
5652
5932
|
};
|
|
5653
5933
|
// A continuation consumed the QA's reviewed signal (not the dev's entry
|
|
5654
5934
|
// signal): rollback restores the prior slice's review/token, so re-arm the
|
|
@@ -5669,6 +5949,9 @@ export class AgentManager {
|
|
|
5669
5949
|
: await this.startSession(taskId, qaId, dispatchPhase, sessionOpts);
|
|
5670
5950
|
}
|
|
5671
5951
|
catch (err) {
|
|
5952
|
+
// armBeforeInject may have armed the watcher before the failing paste — drop it so a stale
|
|
5953
|
+
// entry can't fire on a rolled-back / failed task (no-op if it never armed).
|
|
5954
|
+
this.stopPhaseSignalWatcher(taskId);
|
|
5672
5955
|
if (err instanceof DispatchTerminalError) {
|
|
5673
5956
|
await this.failTaskForDispatchError(taskId, dispatchPhase, qaId, err);
|
|
5674
5957
|
}
|
|
@@ -5685,6 +5968,9 @@ export class AgentManager {
|
|
|
5685
5968
|
throw err;
|
|
5686
5969
|
}
|
|
5687
5970
|
if (!started) {
|
|
5971
|
+
// Covers armBeforeInject returning false (watcher couldn't arm) as well as any other
|
|
5972
|
+
// pre-paste abort; stop is a no-op when nothing armed.
|
|
5973
|
+
this.stopPhaseSignalWatcher(taskId);
|
|
5688
5974
|
await rollback();
|
|
5689
5975
|
if (!opts.continuation) {
|
|
5690
5976
|
await this.releaseAgentForTask(qaId, taskId, 'idle').catch(() => undefined);
|
|
@@ -5701,8 +5987,6 @@ export class AgentManager {
|
|
|
5701
5987
|
});
|
|
5702
5988
|
return null;
|
|
5703
5989
|
}
|
|
5704
|
-
this.stopPhaseSignalWatcher(taskId);
|
|
5705
|
-
await this.armPostDispatchSignalOrHold(taskId, qaId, expectedKind, newToken, false, (req) => { void this.handleReadFileRequest(taskId, qaId, req); });
|
|
5706
5990
|
return await this.taskStore.get(taskId);
|
|
5707
5991
|
}
|
|
5708
5992
|
async dispatchServerFixToDev(taskId, findingsJson) {
|