botmux 2.70.0 → 2.71.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/cli/claude-code.d.ts.map +1 -1
- package/dist/adapters/cli/claude-code.js +36 -0
- package/dist/adapters/cli/claude-code.js.map +1 -1
- package/dist/adapters/cli/codex.d.ts.map +1 -1
- package/dist/adapters/cli/codex.js +1 -0
- package/dist/adapters/cli/codex.js.map +1 -1
- package/dist/adapters/cli/types.d.ts +33 -0
- package/dist/adapters/cli/types.d.ts.map +1 -1
- package/dist/core/command-handler.d.ts +6 -4
- package/dist/core/command-handler.d.ts.map +1 -1
- package/dist/core/command-handler.js +59 -12
- package/dist/core/command-handler.js.map +1 -1
- package/dist/core/types.d.ts +16 -0
- package/dist/core/types.d.ts.map +1 -1
- package/dist/core/types.js.map +1 -1
- package/dist/core/worker-pool.d.ts.map +1 -1
- package/dist/core/worker-pool.js +20 -1
- package/dist/core/worker-pool.js.map +1 -1
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +125 -1
- package/dist/daemon.js.map +1 -1
- package/dist/i18n/en.d.ts.map +1 -1
- package/dist/i18n/en.js +4 -3
- package/dist/i18n/en.js.map +1 -1
- package/dist/i18n/zh.d.ts.map +1 -1
- package/dist/i18n/zh.js +4 -3
- package/dist/i18n/zh.js.map +1 -1
- package/dist/im/lark/card-builder.d.ts +6 -4
- package/dist/im/lark/card-builder.d.ts.map +1 -1
- package/dist/im/lark/card-builder.js +14 -7
- package/dist/im/lark/card-builder.js.map +1 -1
- package/dist/im/lark/card-handler.d.ts.map +1 -1
- package/dist/im/lark/card-handler.js +35 -4
- package/dist/im/lark/card-handler.js.map +1 -1
- package/dist/types.d.ts +8 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/worker.js +152 -24
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
package/dist/worker.js
CHANGED
|
@@ -71,6 +71,26 @@ let cliPidMarker = null; // path to .botmux-cli-pids/<pid>
|
|
|
71
71
|
let sandboxStopWatcher = null; // stop fn for the sandbox outbox watcher
|
|
72
72
|
let sandboxCleanup = null; // unmount overlays + rm the per-session sandbox tree
|
|
73
73
|
let sandboxTeardownDone = false; // guards the exit-time best-effort teardown from double-running / running on suspend-for-resume
|
|
74
|
+
/** Counts consecutive in-worker restart cycles (see case 'restart'). Used by
|
|
75
|
+
* the SECONDARY guard so an adapter whose checkResumeTargetExists misses
|
|
76
|
+
* (returns undefined) or whose resume target vanishes between the check and
|
|
77
|
+
* spawn never crash-loops: 2nd consecutive restart → drop resume semantics,
|
|
78
|
+
* spawn fresh. Reset to 0 whenever spawnCli proceeds with a successful
|
|
79
|
+
* (non-forced) config, so healthy restarts (e.g. user `/restart`) are
|
|
80
|
+
* unaffected. */
|
|
81
|
+
let consecutiveInWorkerRestarts = 0;
|
|
82
|
+
/** Guard: user_notify for "resume → fresh fallback" is sent once per worker
|
|
83
|
+
* lifecycle so a 4× crash loop does not spam the Lark thread with 4 copies
|
|
84
|
+
* of the same warning. */
|
|
85
|
+
let resumeFallbackNotified = false;
|
|
86
|
+
/** The effectiveResume flag used by the most recent spawnCli call. Written
|
|
87
|
+
* immediately after the two-tier fallback check so late-attach timers
|
|
88
|
+
* (hermes, cursor, etc.) can read THE SAME semantics the spawn used,
|
|
89
|
+
* instead of re-deriving from lastInitConfig.resume (which never reflects
|
|
90
|
+
* Tier-1/Tier-2 fresh demotion). Updated in spawnCli BEFORE any bridge
|
|
91
|
+
* setup so even the tick that fires between spawnCli-start and the
|
|
92
|
+
* adapter's hermesBridgeAttach reads the correct mode. */
|
|
93
|
+
let lastSpawnEffectiveResume = false;
|
|
74
94
|
let idleDetector = null;
|
|
75
95
|
let isTmuxMode = false;
|
|
76
96
|
/** Adopt-bridge mode using TmuxPipeBackend: not a tmux attach client, all
|
|
@@ -1541,8 +1561,12 @@ function codexBridgeStartTimer() {
|
|
|
1541
1561
|
codexBridgeTimer = setInterval(() => {
|
|
1542
1562
|
try {
|
|
1543
1563
|
if (structuredBridgeIsHermes()) {
|
|
1564
|
+
// Use lastSpawnEffectiveResume (written by spawnCli AFTER the
|
|
1565
|
+
// two-tier fallback), NOT lastInitConfig.resume. Otherwise a
|
|
1566
|
+
// Tier-1/Tier-2 demotion to fresh would still baseline the empty
|
|
1567
|
+
// hermes store as "existing" and swallow the first turn.
|
|
1544
1568
|
if (!hermesBridgeBaselineDone)
|
|
1545
|
-
hermesBridgeAttach(
|
|
1569
|
+
hermesBridgeAttach(lastSpawnEffectiveResume ? 'baseline-existing' : 'fresh-empty');
|
|
1546
1570
|
hermesBridgeIngest();
|
|
1547
1571
|
if (isPromptReady)
|
|
1548
1572
|
emitReadyCodexTurns();
|
|
@@ -2768,6 +2792,13 @@ function markPromptReady() {
|
|
|
2768
2792
|
return;
|
|
2769
2793
|
}
|
|
2770
2794
|
isPromptReady = true;
|
|
2795
|
+
// CLI 实际启动成功(回到 prompt):复位连续重启计数。
|
|
2796
|
+
// 任何能到这一步的 spawn 都算"成功"——后续即便再崩溃(不是 resume 目标不存在
|
|
2797
|
+
// 的问题),下一轮也该有新的 2 次重试预算,而不是被历史重启计数卡住。
|
|
2798
|
+
if (consecutiveInWorkerRestarts > 0) {
|
|
2799
|
+
log(`CLI reached prompt successfully — resetting consecutive restart count (was ${consecutiveInWorkerRestarts})`);
|
|
2800
|
+
consecutiveInWorkerRestarts = 0;
|
|
2801
|
+
}
|
|
2771
2802
|
// CLI is back at its prompt — every previously written input has been
|
|
2772
2803
|
// consumed, so nothing is in flight anymore. A later crash must not
|
|
2773
2804
|
// replay these.
|
|
@@ -3499,15 +3530,86 @@ function spawnCli(cfg) {
|
|
|
3499
3530
|
log(`[sandbox] redirecting Claude bridge dataDir → overlay upper: ${redirected}`);
|
|
3500
3531
|
claudeDataDir = redirected;
|
|
3501
3532
|
}
|
|
3502
|
-
|
|
3533
|
+
// ── Resume pre-flight check + two-tier fallback ──────────────────────────
|
|
3534
|
+
// Tier 1 (adapter probe): adapter.checkResumeTargetExists returns false
|
|
3535
|
+
// → skip --resume, spawn FRESH.
|
|
3536
|
+
// Tier 2 (restart count): 2nd consecutive in-worker restart → force FRESH,
|
|
3537
|
+
// regardless of probe result. This covers adapters without a probe AND
|
|
3538
|
+
// probe/spawn races (target vanishes between the check and spawn).
|
|
3539
|
+
//
|
|
3540
|
+
// Supersedes the claude-family-only inline probe (PR #189) with a
|
|
3541
|
+
// general adapter-owned check (cleaner boundary) + a numeric safety net.
|
|
3542
|
+
//
|
|
3543
|
+
// User impact: losing context is better than a 4× daemon-side crash loop
|
|
3544
|
+
// that leaves the bot stuck in "crashed N times" state until the human
|
|
3545
|
+
// re-closes the session.
|
|
3546
|
+
let effectiveResume = cfg.resume ?? false;
|
|
3547
|
+
let effectiveCliSessionId = cfg.cliSessionId;
|
|
3548
|
+
let effectiveAdapterSessionId = adapterSessionId;
|
|
3549
|
+
const tier2ForceFresh = effectiveResume && consecutiveInWorkerRestarts >= 2;
|
|
3550
|
+
let tier1ProbeFalse = false;
|
|
3551
|
+
if (effectiveResume && !tier2ForceFresh) {
|
|
3552
|
+
const probe = cliAdapter.checkResumeTargetExists?.({
|
|
3553
|
+
sessionId: effectiveAdapterSessionId,
|
|
3554
|
+
cliSessionId: effectiveCliSessionId,
|
|
3555
|
+
workingDir: cfg.workingDir,
|
|
3556
|
+
dataDir: claudeDataDir,
|
|
3557
|
+
});
|
|
3558
|
+
if (probe === false)
|
|
3559
|
+
tier1ProbeFalse = true;
|
|
3560
|
+
}
|
|
3561
|
+
const fallBackToFresh = effectiveResume && (tier1ProbeFalse || tier2ForceFresh);
|
|
3562
|
+
if (fallBackToFresh) {
|
|
3563
|
+
const reason = tier2ForceFresh
|
|
3564
|
+
? `consecutive restart x${consecutiveInWorkerRestarts} — 2nd failed resume attempt`
|
|
3565
|
+
: 'adapter confirmed resume target does not exist on disk';
|
|
3566
|
+
log(`Resume fallback: dropping --resume (${reason}) → fresh session ${cfg.sessionId}`);
|
|
3567
|
+
effectiveResume = false;
|
|
3568
|
+
effectiveCliSessionId = undefined;
|
|
3569
|
+
effectiveAdapterSessionId = cfg.sessionId;
|
|
3570
|
+
// Recompute the claude-family JSONL path: it now targets the FRESH
|
|
3571
|
+
// sessionId (fresh spawn creates <newSid>.jsonl, not the old one).
|
|
3572
|
+
if (claudeDataDir) {
|
|
3573
|
+
backend.claudeJsonlPath =
|
|
3574
|
+
claudeJsonlPathForSession(effectiveAdapterSessionId, cfg.workingDir, claudeDataDir);
|
|
3575
|
+
}
|
|
3576
|
+
// Single human-visible warning. Spam guard: at most once per worker
|
|
3577
|
+
// lifecycle (a 4× crash loop otherwise duplicates the notice).
|
|
3578
|
+
if (!resumeFallbackNotified) {
|
|
3579
|
+
resumeFallbackNotified = true;
|
|
3580
|
+
send({
|
|
3581
|
+
type: 'user_notify',
|
|
3582
|
+
turnId: currentBotmuxTurnId,
|
|
3583
|
+
message: `⚠️ 历史会话(${(cfg.cliSessionId ?? cfg.originalSessionId ?? cfg.sessionId).substring(0, 16)}…)` +
|
|
3584
|
+
`无法恢复,已为你**新起一个干净会话**(原因:${reason})。\n` +
|
|
3585
|
+
`之前的上下文不会带到本轮,需要的话请简述背景。`,
|
|
3586
|
+
});
|
|
3587
|
+
}
|
|
3588
|
+
// Reset the counter so the fresh spawn gets a clean 2-attempt budget in
|
|
3589
|
+
// case IT crashes later for an unrelated reason.
|
|
3590
|
+
consecutiveInWorkerRestarts = 0;
|
|
3591
|
+
}
|
|
3592
|
+
else if (claudeDataDir) {
|
|
3593
|
+
// Watch where the spawned CLI will actually write: the resumed conversation
|
|
3594
|
+
// when resuming, else the fresh session id (a stale cliSessionId would point
|
|
3595
|
+
// the bridge at the gone jsonl).
|
|
3596
|
+
const bridgeWatchId = effectiveResume
|
|
3597
|
+
? (effectiveCliSessionId ?? effectiveAdapterSessionId)
|
|
3598
|
+
: effectiveAdapterSessionId;
|
|
3503
3599
|
backend.claudeJsonlPath =
|
|
3504
|
-
claudeJsonlPathForSession(
|
|
3505
|
-
}
|
|
3600
|
+
claudeJsonlPathForSession(bridgeWatchId, cfg.workingDir, claudeDataDir);
|
|
3601
|
+
}
|
|
3602
|
+
// Publish the resolved resume semantics so any late-attach timer (hermes,
|
|
3603
|
+
// cursor, …) driven by codexBridgeStartTimer sees the SAME mode the spawn
|
|
3604
|
+
// used. Without this, Tier-1/Tier-2 fresh demotion would still use
|
|
3605
|
+
// `lastInitConfig.resume` (= true) and baseline an empty store, swallowing
|
|
3606
|
+
// the fresh session's first turn.
|
|
3607
|
+
lastSpawnEffectiveResume = effectiveResume;
|
|
3506
3608
|
const args = cliAdapter.buildArgs({
|
|
3507
|
-
sessionId:
|
|
3508
|
-
resume:
|
|
3609
|
+
sessionId: effectiveAdapterSessionId,
|
|
3610
|
+
resume: effectiveResume,
|
|
3509
3611
|
workingDir: cfg.workingDir,
|
|
3510
|
-
resumeSessionId:
|
|
3612
|
+
resumeSessionId: effectiveCliSessionId,
|
|
3511
3613
|
initialPrompt: cfg.prompt || undefined,
|
|
3512
3614
|
botName: cfg.botName,
|
|
3513
3615
|
botOpenId: cfg.botOpenId,
|
|
@@ -3797,9 +3899,6 @@ function spawnCli(cfg) {
|
|
|
3797
3899
|
};
|
|
3798
3900
|
setTimeout(resolveCliPidLate, 120);
|
|
3799
3901
|
}
|
|
3800
|
-
// On tmux re-attach, keep awaitingFirstPrompt = true so screen updates are
|
|
3801
|
-
// suppressed until the idle detector fires markNewTurn() — this prevents the
|
|
3802
|
-
// full tmux scrollback history from leaking into the streaming card.
|
|
3803
3902
|
// Bridge fallback: claude-code only. Tail Claude's transcript JSONL so a
|
|
3804
3903
|
// turn the model finishes WITHOUT calling `botmux send` still gets its
|
|
3805
3904
|
// assistant text forwarded to Lark (the gate in emitReadyTurns suppresses
|
|
@@ -3808,13 +3907,19 @@ function spawnCli(cfg) {
|
|
|
3808
3907
|
// the file Claude creates on first submit isn't absorbed as history,
|
|
3809
3908
|
// and baseline-existing on resume so prior-run turns ARE absorbed (we
|
|
3810
3909
|
// don't want to re-emit yesterday's conversation as fresh turns).
|
|
3811
|
-
|
|
3812
|
-
|
|
3910
|
+
//
|
|
3911
|
+
// NOTE: use effectiveResume / effectiveAdapterSessionId / effectiveCliSessionId
|
|
3912
|
+
// here, NOT cfg.* — the two-tier fallback above may have flipped
|
|
3913
|
+
// resume → FRESH, in which case the baseline mode and session id MUST
|
|
3914
|
+
// follow the flip. The same variables also cover Tier-2 (count-based)
|
|
3915
|
+
// fallbacks that fire for non-Claude CLIs (below).
|
|
3916
|
+
if (claudeDataDir && effectiveAdapterSessionId) {
|
|
3917
|
+
const claudeBridgeSessionId = effectiveCliSessionId ?? effectiveAdapterSessionId;
|
|
3813
3918
|
const claudeJsonl = claudeJsonlPathForSession(claudeBridgeSessionId, cfg.workingDir, claudeDataDir);
|
|
3814
3919
|
startBridgeWatcher(claudeJsonl, {
|
|
3815
3920
|
cliPid: cliPid ?? undefined,
|
|
3816
3921
|
cliCwd: cfg.workingDir,
|
|
3817
|
-
mode:
|
|
3922
|
+
mode: effectiveResume ? 'baseline-existing' : 'fresh-empty',
|
|
3818
3923
|
dataDir: claudeDataDir,
|
|
3819
3924
|
});
|
|
3820
3925
|
}
|
|
@@ -3824,17 +3929,21 @@ function spawnCli(cfg) {
|
|
|
3824
3929
|
// discovered after the first submit; CoCo's events path is deterministic
|
|
3825
3930
|
// from botmux sessionId. Hermes and MTR use SQLite stores, so baseline the
|
|
3826
3931
|
// relevant cursor at spawn and poll for rows after each queued prompt flushes.
|
|
3932
|
+
//
|
|
3933
|
+
// Mode uses effectiveResume: when the resume probe flipped us to FRESH, we
|
|
3934
|
+
// must NOT baseline the "restored" cursor against an empty / absent store
|
|
3935
|
+
// (would otherwise swallow the fresh session's first turn).
|
|
3827
3936
|
if (cfg.cliId === 'hermes') {
|
|
3828
|
-
hermesBridgeAttach(
|
|
3937
|
+
hermesBridgeAttach(effectiveResume ? 'baseline-existing' : 'fresh-empty');
|
|
3829
3938
|
}
|
|
3830
3939
|
else if (cfg.cliId === 'codex') {
|
|
3831
|
-
if (
|
|
3832
|
-
const rolloutPath = findCodexRolloutBySessionId(
|
|
3940
|
+
if (effectiveCliSessionId) {
|
|
3941
|
+
const rolloutPath = findCodexRolloutBySessionId(effectiveCliSessionId);
|
|
3833
3942
|
if (rolloutPath) {
|
|
3834
3943
|
codexBridgeAttach(rolloutPath, 'baseline-existing');
|
|
3835
3944
|
}
|
|
3836
3945
|
else {
|
|
3837
|
-
codexBridgePendingSessionId =
|
|
3946
|
+
codexBridgePendingSessionId = effectiveCliSessionId;
|
|
3838
3947
|
codexBridgeStartTimer();
|
|
3839
3948
|
}
|
|
3840
3949
|
}
|
|
@@ -3847,13 +3956,13 @@ function spawnCli(cfg) {
|
|
|
3847
3956
|
// spawn (no cliSessionId yet) we just arm the poller; writeInput will
|
|
3848
3957
|
// surface the cliSessionId on the first successful submit and trigger
|
|
3849
3958
|
// codexBridgeNotifyCliSessionId → rollout attach.
|
|
3850
|
-
if (
|
|
3851
|
-
const rolloutPath = findTraexRolloutBySessionId(
|
|
3959
|
+
if (effectiveCliSessionId) {
|
|
3960
|
+
const rolloutPath = findTraexRolloutBySessionId(effectiveCliSessionId);
|
|
3852
3961
|
if (rolloutPath) {
|
|
3853
3962
|
codexBridgeAttach(rolloutPath, 'baseline-existing');
|
|
3854
3963
|
}
|
|
3855
3964
|
else {
|
|
3856
|
-
codexBridgePendingSessionId =
|
|
3965
|
+
codexBridgePendingSessionId = effectiveCliSessionId;
|
|
3857
3966
|
codexBridgeStartTimer();
|
|
3858
3967
|
}
|
|
3859
3968
|
}
|
|
@@ -3862,16 +3971,16 @@ function spawnCli(cfg) {
|
|
|
3862
3971
|
}
|
|
3863
3972
|
}
|
|
3864
3973
|
else if (cfg.cliId === 'coco') {
|
|
3865
|
-
const eventsPath = cocoEventsPathForSession(
|
|
3866
|
-
codexBridgeAttach(eventsPath,
|
|
3974
|
+
const eventsPath = cocoEventsPathForSession(effectiveAdapterSessionId);
|
|
3975
|
+
codexBridgeAttach(eventsPath, effectiveResume ? 'baseline-existing' : 'fresh-empty');
|
|
3867
3976
|
codexBridgeStartTimer();
|
|
3868
3977
|
}
|
|
3869
3978
|
else if (cfg.cliId === 'mtr') {
|
|
3870
|
-
const mtrSessionId =
|
|
3979
|
+
const mtrSessionId = effectiveCliSessionId ?? mtrSessionIdForBotmuxSession(effectiveAdapterSessionId);
|
|
3871
3980
|
codexBridgePendingSessionId = mtrSessionId;
|
|
3872
3981
|
const source = findMtrSessionById(mtrSessionId);
|
|
3873
3982
|
if (source) {
|
|
3874
|
-
mtrBridgeAttach(source,
|
|
3983
|
+
mtrBridgeAttach(source, effectiveResume ? 'baseline-existing' : 'fresh-empty');
|
|
3875
3984
|
}
|
|
3876
3985
|
else {
|
|
3877
3986
|
codexBridgeStartTimer();
|
|
@@ -4738,6 +4847,14 @@ process.on('message', async (raw) => {
|
|
|
4738
4847
|
isPromptReady = false;
|
|
4739
4848
|
idleDetector?.reset();
|
|
4740
4849
|
log(`Passthrough slash command: ${msg.content}`);
|
|
4850
|
+
// Follow-up rides on the SAME IPC (see DaemonToWorker.raw_input) so it
|
|
4851
|
+
// cannot race the 200ms text→Enter window above. Enqueue only after the
|
|
4852
|
+
// Enter landed: sendToPty queues it as the next turn (type-ahead /
|
|
4853
|
+
// pendingMessages), exactly like a Lark message arriving while busy.
|
|
4854
|
+
if (msg.followUpContent) {
|
|
4855
|
+
sendToPty(msg.followUpContent);
|
|
4856
|
+
log(`Enqueued follow-up after raw input (${msg.followUpContent.length} chars)`);
|
|
4857
|
+
}
|
|
4741
4858
|
}
|
|
4742
4859
|
break;
|
|
4743
4860
|
}
|
|
@@ -4747,6 +4864,17 @@ process.on('message', async (raw) => {
|
|
|
4747
4864
|
break;
|
|
4748
4865
|
}
|
|
4749
4866
|
log('Restart requested');
|
|
4867
|
+
// Tier-2 guard: 2nd consecutive in-worker restart forces FRESH.
|
|
4868
|
+
// Increment BEFORE spawnCli so the guard trips at count==2 (i.e. the
|
|
4869
|
+
// third attempted spawn in a 1-success → 2-failure sequence):
|
|
4870
|
+
// initial spawn (count=0) → fail → claude_exit → daemon sends restart
|
|
4871
|
+
// 1st restart (count=1) → resume still fails → restart
|
|
4872
|
+
// 2nd restart (count=2) → tier-2 kicks in → FRESH
|
|
4873
|
+
// Tier 1 probe (adapter.checkResumeTargetExists) is re-run on each
|
|
4874
|
+
// spawn, so even count=1 often short-circuits; tier-2 only catches
|
|
4875
|
+
// silent/race failures and adapters that don't implement the probe.
|
|
4876
|
+
consecutiveInWorkerRestarts++;
|
|
4877
|
+
log(`Restart count: ${consecutiveInWorkerRestarts} (>=2 forces FRESH)`);
|
|
4750
4878
|
// Must destroySession(), not kill(): for persistent backends (tmux/herdr)
|
|
4751
4879
|
// kill() only detaches — the backing session + CLI process keep running,
|
|
4752
4880
|
// so the resume:true spawnCli below would re-attach to the SAME live CLI
|