botmux 2.33.0 → 2.33.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/README.en.md +12 -1
  2. package/README.md +45 -1
  3. package/dist/adapters/cli/claude-code.d.ts.map +1 -1
  4. package/dist/adapters/cli/claude-code.js +11 -0
  5. package/dist/adapters/cli/claude-code.js.map +1 -1
  6. package/dist/cli/bots-list-output.d.ts +21 -0
  7. package/dist/cli/bots-list-output.d.ts.map +1 -0
  8. package/dist/cli/bots-list-output.js +23 -0
  9. package/dist/cli/bots-list-output.js.map +1 -0
  10. package/dist/cli/workflow.d.ts +13 -0
  11. package/dist/cli/workflow.d.ts.map +1 -0
  12. package/dist/cli/workflow.js +781 -0
  13. package/dist/cli/workflow.js.map +1 -0
  14. package/dist/cli.js +69 -14
  15. package/dist/cli.js.map +1 -1
  16. package/dist/core/command-handler.d.ts.map +1 -1
  17. package/dist/core/command-handler.js +211 -4
  18. package/dist/core/command-handler.js.map +1 -1
  19. package/dist/core/session-manager.d.ts +6 -1
  20. package/dist/core/session-manager.d.ts.map +1 -1
  21. package/dist/core/session-manager.js +22 -12
  22. package/dist/core/session-manager.js.map +1 -1
  23. package/dist/core/worker-pool.d.ts +13 -0
  24. package/dist/core/worker-pool.d.ts.map +1 -1
  25. package/dist/core/worker-pool.js +100 -6
  26. package/dist/core/worker-pool.js.map +1 -1
  27. package/dist/daemon.d.ts +3 -0
  28. package/dist/daemon.d.ts.map +1 -1
  29. package/dist/daemon.js +884 -3
  30. package/dist/daemon.js.map +1 -1
  31. package/dist/dashboard/auth.d.ts +36 -0
  32. package/dist/dashboard/auth.d.ts.map +1 -1
  33. package/dist/dashboard/auth.js +22 -0
  34. package/dist/dashboard/auth.js.map +1 -1
  35. package/dist/dashboard/web/app.js +20 -1
  36. package/dist/dashboard/web/app.js.map +1 -1
  37. package/dist/dashboard/web/i18n.d.ts.map +1 -1
  38. package/dist/dashboard/web/i18n.js +356 -0
  39. package/dist/dashboard/web/i18n.js.map +1 -1
  40. package/dist/dashboard/web/workflow-catalog.d.ts +2 -0
  41. package/dist/dashboard/web/workflow-catalog.d.ts.map +1 -0
  42. package/dist/dashboard/web/workflow-catalog.js +323 -0
  43. package/dist/dashboard/web/workflow-catalog.js.map +1 -0
  44. package/dist/dashboard/web/workflows.d.ts +2 -0
  45. package/dist/dashboard/web/workflows.d.ts.map +1 -0
  46. package/dist/dashboard/web/workflows.js +1618 -0
  47. package/dist/dashboard/web/workflows.js.map +1 -0
  48. package/dist/dashboard/workflow-api.d.ts +23 -0
  49. package/dist/dashboard/workflow-api.d.ts.map +1 -0
  50. package/dist/dashboard/workflow-api.js +463 -0
  51. package/dist/dashboard/workflow-api.js.map +1 -0
  52. package/dist/dashboard-web/app.js +494 -199
  53. package/dist/dashboard-web/index.html +1 -0
  54. package/dist/dashboard-web/style.css +160 -6
  55. package/dist/dashboard-web/terminal-replay.html +227 -0
  56. package/dist/dashboard.js +29 -12
  57. package/dist/dashboard.js.map +1 -1
  58. package/dist/i18n/en.d.ts.map +1 -1
  59. package/dist/i18n/en.js +12 -0
  60. package/dist/i18n/en.js.map +1 -1
  61. package/dist/i18n/zh.d.ts.map +1 -1
  62. package/dist/i18n/zh.js +12 -0
  63. package/dist/i18n/zh.js.map +1 -1
  64. package/dist/im/lark/card-handler.d.ts +3 -0
  65. package/dist/im/lark/card-handler.d.ts.map +1 -1
  66. package/dist/im/lark/card-handler.js +27 -1
  67. package/dist/im/lark/card-handler.js.map +1 -1
  68. package/dist/im/lark/client.d.ts +19 -2
  69. package/dist/im/lark/client.d.ts.map +1 -1
  70. package/dist/im/lark/client.js +21 -2
  71. package/dist/im/lark/client.js.map +1 -1
  72. package/dist/im/lark/workflow-card-handler.d.ts +50 -0
  73. package/dist/im/lark/workflow-card-handler.d.ts.map +1 -0
  74. package/dist/im/lark/workflow-card-handler.js +152 -0
  75. package/dist/im/lark/workflow-card-handler.js.map +1 -0
  76. package/dist/im/lark/workflow-cards.d.ts +46 -0
  77. package/dist/im/lark/workflow-cards.d.ts.map +1 -0
  78. package/dist/im/lark/workflow-cards.js +226 -0
  79. package/dist/im/lark/workflow-cards.js.map +1 -0
  80. package/dist/im/lark/workflow-progress-card.d.ts +76 -0
  81. package/dist/im/lark/workflow-progress-card.d.ts.map +1 -0
  82. package/dist/im/lark/workflow-progress-card.js +279 -0
  83. package/dist/im/lark/workflow-progress-card.js.map +1 -0
  84. package/dist/im/lark/workflow-slash-command.d.ts +92 -0
  85. package/dist/im/lark/workflow-slash-command.d.ts.map +1 -0
  86. package/dist/im/lark/workflow-slash-command.js +185 -0
  87. package/dist/im/lark/workflow-slash-command.js.map +1 -0
  88. package/dist/services/group-creator.d.ts.map +1 -1
  89. package/dist/services/group-creator.js +17 -4
  90. package/dist/services/group-creator.js.map +1 -1
  91. package/dist/services/groups-store.d.ts +11 -0
  92. package/dist/services/groups-store.d.ts.map +1 -1
  93. package/dist/services/groups-store.js +26 -0
  94. package/dist/services/groups-store.js.map +1 -1
  95. package/dist/services/jsonl-cursor.d.ts +12 -0
  96. package/dist/services/jsonl-cursor.d.ts.map +1 -0
  97. package/dist/services/jsonl-cursor.js +45 -0
  98. package/dist/services/jsonl-cursor.js.map +1 -0
  99. package/dist/services/schedule-store.d.ts +35 -0
  100. package/dist/services/schedule-store.d.ts.map +1 -1
  101. package/dist/services/schedule-store.js +108 -1
  102. package/dist/services/schedule-store.js.map +1 -1
  103. package/dist/skills/definitions.d.ts.map +1 -1
  104. package/dist/skills/definitions.js +399 -0
  105. package/dist/skills/definitions.js.map +1 -1
  106. package/dist/types.d.ts +4 -0
  107. package/dist/types.d.ts.map +1 -1
  108. package/dist/utils/cli-usage-limit.d.ts.map +1 -1
  109. package/dist/utils/cli-usage-limit.js +4 -0
  110. package/dist/utils/cli-usage-limit.js.map +1 -1
  111. package/dist/worker.js +118 -14
  112. package/dist/worker.js.map +1 -1
  113. package/dist/workflows/attempt-resume.d.ts +114 -0
  114. package/dist/workflows/attempt-resume.d.ts.map +1 -0
  115. package/dist/workflows/attempt-resume.js +385 -0
  116. package/dist/workflows/attempt-resume.js.map +1 -0
  117. package/dist/workflows/attempt-terminal.d.ts +21 -0
  118. package/dist/workflows/attempt-terminal.d.ts.map +1 -0
  119. package/dist/workflows/attempt-terminal.js +7 -0
  120. package/dist/workflows/attempt-terminal.js.map +1 -0
  121. package/dist/workflows/blob.d.ts +27 -0
  122. package/dist/workflows/blob.d.ts.map +1 -0
  123. package/dist/workflows/blob.js +39 -0
  124. package/dist/workflows/blob.js.map +1 -0
  125. package/dist/workflows/cancel-run.d.ts +45 -0
  126. package/dist/workflows/cancel-run.d.ts.map +1 -0
  127. package/dist/workflows/cancel-run.js +99 -0
  128. package/dist/workflows/cancel-run.js.map +1 -0
  129. package/dist/workflows/cancel.d.ts +111 -0
  130. package/dist/workflows/cancel.d.ts.map +1 -0
  131. package/dist/workflows/cancel.js +120 -0
  132. package/dist/workflows/cancel.js.map +1 -0
  133. package/dist/workflows/catalog.d.ts +60 -0
  134. package/dist/workflows/catalog.d.ts.map +1 -0
  135. package/dist/workflows/catalog.js +119 -0
  136. package/dist/workflows/catalog.js.map +1 -0
  137. package/dist/workflows/cold-attach.d.ts +30 -0
  138. package/dist/workflows/cold-attach.d.ts.map +1 -0
  139. package/dist/workflows/cold-attach.js +40 -0
  140. package/dist/workflows/cold-attach.js.map +1 -0
  141. package/dist/workflows/cold-scan.d.ts +21 -0
  142. package/dist/workflows/cold-scan.d.ts.map +1 -0
  143. package/dist/workflows/cold-scan.js +70 -0
  144. package/dist/workflows/cold-scan.js.map +1 -0
  145. package/dist/workflows/daemon-spawn.d.ts +117 -0
  146. package/dist/workflows/daemon-spawn.d.ts.map +1 -0
  147. package/dist/workflows/daemon-spawn.js +551 -0
  148. package/dist/workflows/daemon-spawn.js.map +1 -0
  149. package/dist/workflows/definition.d.ts +1309 -0
  150. package/dist/workflows/definition.d.ts.map +1 -0
  151. package/dist/workflows/definition.js +334 -0
  152. package/dist/workflows/definition.js.map +1 -0
  153. package/dist/workflows/effect-input.d.ts +4 -0
  154. package/dist/workflows/effect-input.d.ts.map +1 -0
  155. package/dist/workflows/effect-input.js +18 -0
  156. package/dist/workflows/effect-input.js.map +1 -0
  157. package/dist/workflows/events/append.d.ts +77 -0
  158. package/dist/workflows/events/append.d.ts.map +1 -0
  159. package/dist/workflows/events/append.js +214 -0
  160. package/dist/workflows/events/append.js.map +1 -0
  161. package/dist/workflows/events/idempotency.d.ts +77 -0
  162. package/dist/workflows/events/idempotency.d.ts.map +1 -0
  163. package/dist/workflows/events/idempotency.js +116 -0
  164. package/dist/workflows/events/idempotency.js.map +1 -0
  165. package/dist/workflows/events/index.d.ts +7 -0
  166. package/dist/workflows/events/index.d.ts.map +1 -0
  167. package/dist/workflows/events/index.js +7 -0
  168. package/dist/workflows/events/index.js.map +1 -0
  169. package/dist/workflows/events/payloads.d.ts +917 -0
  170. package/dist/workflows/events/payloads.d.ts.map +1 -0
  171. package/dist/workflows/events/payloads.js +337 -0
  172. package/dist/workflows/events/payloads.js.map +1 -0
  173. package/dist/workflows/events/replay.d.ts +238 -0
  174. package/dist/workflows/events/replay.d.ts.map +1 -0
  175. package/dist/workflows/events/replay.js +608 -0
  176. package/dist/workflows/events/replay.js.map +1 -0
  177. package/dist/workflows/events/schema.d.ts +5242 -0
  178. package/dist/workflows/events/schema.d.ts.map +1 -0
  179. package/dist/workflows/events/schema.js +295 -0
  180. package/dist/workflows/events/schema.js.map +1 -0
  181. package/dist/workflows/events/types.d.ts +34 -0
  182. package/dist/workflows/events/types.d.ts.map +1 -0
  183. package/dist/workflows/events/types.js +2 -0
  184. package/dist/workflows/events/types.js.map +1 -0
  185. package/dist/workflows/fanout.d.ts +36 -0
  186. package/dist/workflows/fanout.d.ts.map +1 -0
  187. package/dist/workflows/fanout.js +114 -0
  188. package/dist/workflows/fanout.js.map +1 -0
  189. package/dist/workflows/hostExecutors/botmux-schedule.d.ts +41 -0
  190. package/dist/workflows/hostExecutors/botmux-schedule.d.ts.map +1 -0
  191. package/dist/workflows/hostExecutors/botmux-schedule.js +121 -0
  192. package/dist/workflows/hostExecutors/botmux-schedule.js.map +1 -0
  193. package/dist/workflows/hostExecutors/feishu-im.d.ts +12 -0
  194. package/dist/workflows/hostExecutors/feishu-im.d.ts.map +1 -0
  195. package/dist/workflows/hostExecutors/feishu-im.js +49 -0
  196. package/dist/workflows/hostExecutors/feishu-im.js.map +1 -0
  197. package/dist/workflows/hostExecutors/feishu-reply.d.ts +24 -0
  198. package/dist/workflows/hostExecutors/feishu-reply.d.ts.map +1 -0
  199. package/dist/workflows/hostExecutors/feishu-reply.js +88 -0
  200. package/dist/workflows/hostExecutors/feishu-reply.js.map +1 -0
  201. package/dist/workflows/hostExecutors/feishu-send.d.ts +23 -0
  202. package/dist/workflows/hostExecutors/feishu-send.d.ts.map +1 -0
  203. package/dist/workflows/hostExecutors/feishu-send.js +124 -0
  204. package/dist/workflows/hostExecutors/feishu-send.js.map +1 -0
  205. package/dist/workflows/hostExecutors/index.d.ts +8 -0
  206. package/dist/workflows/hostExecutors/index.d.ts.map +1 -0
  207. package/dist/workflows/hostExecutors/index.js +8 -0
  208. package/dist/workflows/hostExecutors/index.js.map +1 -0
  209. package/dist/workflows/hostExecutors/protocol.d.ts +42 -0
  210. package/dist/workflows/hostExecutors/protocol.d.ts.map +1 -0
  211. package/dist/workflows/hostExecutors/protocol.js +181 -0
  212. package/dist/workflows/hostExecutors/protocol.js.map +1 -0
  213. package/dist/workflows/hostExecutors/registry.d.ts +10 -0
  214. package/dist/workflows/hostExecutors/registry.d.ts.map +1 -0
  215. package/dist/workflows/hostExecutors/registry.js +36 -0
  216. package/dist/workflows/hostExecutors/registry.js.map +1 -0
  217. package/dist/workflows/hostExecutors/types.d.ts +78 -0
  218. package/dist/workflows/hostExecutors/types.d.ts.map +1 -0
  219. package/dist/workflows/hostExecutors/types.js +2 -0
  220. package/dist/workflows/hostExecutors/types.js.map +1 -0
  221. package/dist/workflows/loader.d.ts +16 -0
  222. package/dist/workflows/loader.d.ts.map +1 -0
  223. package/dist/workflows/loader.js +56 -0
  224. package/dist/workflows/loader.js.map +1 -0
  225. package/dist/workflows/loop.d.ts +50 -0
  226. package/dist/workflows/loop.d.ts.map +1 -0
  227. package/dist/workflows/loop.js +350 -0
  228. package/dist/workflows/loop.js.map +1 -0
  229. package/dist/workflows/ops-projection.d.ts +168 -0
  230. package/dist/workflows/ops-projection.d.ts.map +1 -0
  231. package/dist/workflows/ops-projection.js +707 -0
  232. package/dist/workflows/ops-projection.js.map +1 -0
  233. package/dist/workflows/orchestrator.d.ts +107 -0
  234. package/dist/workflows/orchestrator.d.ts.map +1 -0
  235. package/dist/workflows/orchestrator.js +197 -0
  236. package/dist/workflows/orchestrator.js.map +1 -0
  237. package/dist/workflows/output-binding.d.ts +70 -0
  238. package/dist/workflows/output-binding.d.ts.map +1 -0
  239. package/dist/workflows/output-binding.js +265 -0
  240. package/dist/workflows/output-binding.js.map +1 -0
  241. package/dist/workflows/params.d.ts +61 -0
  242. package/dist/workflows/params.d.ts.map +1 -0
  243. package/dist/workflows/params.js +195 -0
  244. package/dist/workflows/params.js.map +1 -0
  245. package/dist/workflows/resume.d.ts +263 -0
  246. package/dist/workflows/resume.d.ts.map +1 -0
  247. package/dist/workflows/resume.js +808 -0
  248. package/dist/workflows/resume.js.map +1 -0
  249. package/dist/workflows/run-id.d.ts +2 -0
  250. package/dist/workflows/run-id.d.ts.map +1 -0
  251. package/dist/workflows/run-id.js +7 -0
  252. package/dist/workflows/run-id.js.map +1 -0
  253. package/dist/workflows/run-init.d.ts +48 -0
  254. package/dist/workflows/run-init.d.ts.map +1 -0
  255. package/dist/workflows/run-init.js +99 -0
  256. package/dist/workflows/run-init.js.map +1 -0
  257. package/dist/workflows/runs-dir.d.ts +4 -0
  258. package/dist/workflows/runs-dir.d.ts.map +1 -0
  259. package/dist/workflows/runs-dir.js +15 -0
  260. package/dist/workflows/runs-dir.js.map +1 -0
  261. package/dist/workflows/runtime.d.ts +211 -0
  262. package/dist/workflows/runtime.d.ts.map +1 -0
  263. package/dist/workflows/runtime.js +594 -0
  264. package/dist/workflows/runtime.js.map +1 -0
  265. package/dist/workflows/spawn-bot.d.ts +165 -0
  266. package/dist/workflows/spawn-bot.d.ts.map +1 -0
  267. package/dist/workflows/spawn-bot.js +215 -0
  268. package/dist/workflows/spawn-bot.js.map +1 -0
  269. package/dist/workflows/system.d.ts +49 -0
  270. package/dist/workflows/system.d.ts.map +1 -0
  271. package/dist/workflows/system.js +48 -0
  272. package/dist/workflows/system.js.map +1 -0
  273. package/dist/workflows/trigger-run.d.ts +70 -0
  274. package/dist/workflows/trigger-run.d.ts.map +1 -0
  275. package/dist/workflows/trigger-run.js +88 -0
  276. package/dist/workflows/trigger-run.js.map +1 -0
  277. package/dist/workflows/wait.d.ts +120 -0
  278. package/dist/workflows/wait.d.ts.map +1 -0
  279. package/dist/workflows/wait.js +181 -0
  280. package/dist/workflows/wait.js.map +1 -0
  281. package/package.json +3 -3
package/dist/daemon.js CHANGED
@@ -7,7 +7,7 @@ const __filename = fileURLToPath(import.meta.url);
7
7
  const __dirname = dirname(__filename);
8
8
  import { config } from './config.js';
9
9
  import { statSync } from 'node:fs';
10
- import { getChatMode, replyMessage, resolveAllowedUsersWithMap, sendMessage } from './im/lark/client.js';
10
+ import { getChatMode, replyMessage, resolveAllowedUsersWithMap, sendMessage, updateMessage } from './im/lark/client.js';
11
11
  import { loadBotConfigs, registerBot, getBot, getAllBots, findOncallChatForAnyBot } from './bot-registry.js';
12
12
  import * as sessionStore from './services/session-store.js';
13
13
  import * as chatFirstSeenStore from './services/chat-first-seen-store.js';
@@ -24,26 +24,135 @@ import { sessionKey, sessionAnchorId } from './core/types.js';
24
24
  import * as scheduler from './core/scheduler.js';
25
25
  import { scanMultipleProjects } from './services/project-scanner.js';
26
26
  import { buildRepoSelectCard, buildStreamingCard, getCliDisplayName } from './im/lark/card-builder.js';
27
- import { t as tr, localeForBot } from './i18n/index.js';
27
+ import { t as tr, botLocale, localeForBot } from './i18n/index.js';
28
28
  import { createCliAdapterSync } from './adapters/cli/registry.js';
29
29
  import { initWorkerPool, setActiveSessionsRegistry, forkWorker, killWorker, scheduleCardPatch, setCurrentCliVersion, CARD_POSTING_SENTINEL, parkStreamCard, closeSession as closeSessionHelper, } from './core/worker-pool.js';
30
- import { setBotName, setLarkAppId, startIpcServer } from './core/dashboard-ipc-server.js';
30
+ import { ipcRoute, jsonRes, readJsonBody, setBotName, setLarkAppId, startIpcServer } from './core/dashboard-ipc-server.js';
31
31
  import { saveFrozenCards } from './services/frozen-card-store.js';
32
32
  import { DAEMON_COMMANDS, PASSTHROUGH_COMMANDS, handleCommand, parseSlashCommandInvocation, parseForceTopicInvocation } from './core/command-handler.js';
33
33
  import { findInheritablePeer } from './core/inherit-peer.js';
34
34
  import { isCallbackUrl, handleCallbackUrl } from './utils/user-token.js';
35
35
  import { getSessionWorkingDir, getProjectScanDirs, expandHome, downloadResources, formatAttachmentsHint, buildNewTopicPrompt, buildFollowUpContent, buildBridgeInputContent, buildReforkPrompt, getAvailableBots, restoreActiveSessions, executeScheduledTask, persistStreamCardState, rememberLastCliInput, } from './core/session-manager.js';
36
36
  import { handleCardAction } from './im/lark/card-handler.js';
37
+ import { executeWorkflowCommand, resolveBotSnapshot, } from './im/lark/workflow-slash-command.js';
38
+ import { workflowRunDetailUrl } from './im/lark/workflow-cards.js';
39
+ import { buildWorkflowStartingCard, buildWorkflowProgressCard, buildAttemptDeeplinkEnricher, } from './im/lark/workflow-progress-card.js';
40
+ import { EventLog as WorkflowEventLog } from './workflows/events/append.js';
41
+ import { replay as replayWorkflow } from './workflows/events/replay.js';
37
42
  import { isBotMentioned, probeBotOpenId, startLarkEventDispatcher, writeBotInfoFile, canOperate, isKnownPeerBot, checkRequiredScopes } from './im/lark/event-dispatcher.js';
38
43
  import { learnFromMentions, resolveSender, flushIdentityCacheSync } from './im/lark/identity-cache.js';
39
44
  import { renderSenderTag } from './core/session-manager.js';
40
45
  import { markSessionActivity } from './core/session-activity.js';
46
+ import { WorkflowEventWatcher, handleWorkflowFanoutEvent } from './workflows/fanout.js';
47
+ import { runLoop } from './workflows/loop.js';
48
+ import { createWorkflowDaemonSpawn } from './workflows/daemon-spawn.js';
49
+ import { createDaemonSpawnFn } from './workflows/spawn-bot.js';
50
+ import { attachColdWorkflowRunsForDaemon } from './workflows/cold-attach.js';
51
+ import { getRunsDir } from './workflows/runs-dir.js';
52
+ import { loadEffectInputSidecar } from './workflows/effect-input.js';
53
+ import { isValidWorkflowId } from './workflows/catalog.js';
54
+ import { triggerWorkflowRun } from './workflows/trigger-run.js';
55
+ import { createDefaultHostExecutorRegistry, createDefaultProviderReconcilers, } from './workflows/hostExecutors/registry.js';
56
+ import { cancelWorkflowRun, guardWorkflowRunCancelChatScope, isTerminalRunStatus, } from './workflows/cancel-run.js';
57
+ import { requestCancel } from './workflows/cancel.js';
58
+ import { resolveWait } from './workflows/wait.js';
59
+ import { replay } from './workflows/events/replay.js';
60
+ import { isValidRunId, readRunSnapshot } from './workflows/ops-projection.js';
61
+ import { AttemptResumeManager } from './workflows/attempt-resume.js';
41
62
  // ─── State ───────────────────────────────────────────────────────────────────
42
63
  const activeSessions = new Map();
64
+ const workflowEventWatchers = new Map();
65
+ const workflowRuns = new Map();
66
+ // v0.1.5 slice 1: run-level progress card index. daemon-internal only
67
+ // (codex contract boundary 2: daemon restart drops the cardMessageId
68
+ // and we accept losing card updates for that run — the dashboard link
69
+ // inside any prior card still works).
70
+ const workflowRunCards = new Map();
71
+ const workflowAttemptResumes = new AttemptResumeManager({
72
+ runsDir: getRunsDir(),
73
+ externalHost: config.web.externalHost,
74
+ resolveBot: (larkAppId, terminal) => {
75
+ try {
76
+ const bot = getBot(larkAppId);
77
+ return {
78
+ larkAppId: bot.config.larkAppId,
79
+ larkAppSecret: bot.config.larkAppSecret,
80
+ cliId: terminal.cliId ?? bot.config.cliId,
81
+ cliPathOverride: bot.config.cliPathOverride,
82
+ backendType: bot.config.backendType,
83
+ botName: bot.botName ?? terminal.botName,
84
+ botOpenId: bot.botOpenId,
85
+ locale: botLocale(bot.config),
86
+ };
87
+ }
88
+ catch {
89
+ return undefined;
90
+ }
91
+ },
92
+ });
43
93
  // Cache last /repo scan results per chat for /repo <number> fallback
44
94
  const lastRepoScan = new Map();
45
95
  const cliVersionCache = new Map();
46
96
  const VERSION_CHECK_INTERVAL = 60_000; // cache 1 min
97
+ function parsePositiveIntEnv(name) {
98
+ const raw = process.env[name];
99
+ if (!raw)
100
+ return 0;
101
+ const parsed = Number(raw);
102
+ if (!Number.isFinite(parsed) || parsed <= 0) {
103
+ logger.warn(`[memdiag] ignoring invalid ${name}=${JSON.stringify(raw)}`);
104
+ return 0;
105
+ }
106
+ return Math.floor(parsed);
107
+ }
108
+ function formatMiB(bytes) {
109
+ if (!Number.isFinite(bytes))
110
+ return 'n/a';
111
+ return `${((bytes ?? 0) / 1024 / 1024).toFixed(1)}MiB`;
112
+ }
113
+ function summarizeActiveResources() {
114
+ if (typeof process.getActiveResourcesInfo !== 'function')
115
+ return 'unavailable';
116
+ const counts = new Map();
117
+ for (const name of process.getActiveResourcesInfo()) {
118
+ counts.set(name, (counts.get(name) ?? 0) + 1);
119
+ }
120
+ if (counts.size === 0)
121
+ return 'none';
122
+ return [...counts.entries()]
123
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
124
+ .slice(0, 16)
125
+ .map(([name, count]) => `${name}:${count}`)
126
+ .join(',');
127
+ }
128
+ function logMemoryDiagnostics(reason) {
129
+ const usage = process.memoryUsage();
130
+ const external = usage.external ?? 0;
131
+ const arrayBuffers = usage.arrayBuffers ?? 0;
132
+ const nativeOther = Math.max(0, usage.rss - usage.heapTotal - external);
133
+ logger.info(`[memdiag] reason=${reason} ` +
134
+ `rss=${formatMiB(usage.rss)} ` +
135
+ `heapUsed=${formatMiB(usage.heapUsed)} ` +
136
+ `heapTotal=${formatMiB(usage.heapTotal)} ` +
137
+ `external=${formatMiB(external)} ` +
138
+ `arrayBuffers=${formatMiB(arrayBuffers)} ` +
139
+ `nativeOther~=${formatMiB(nativeOther)} ` +
140
+ `activeSessions=${activeSessions.size} ` +
141
+ `workflowRuns=${workflowRuns.size} ` +
142
+ `workflowWatchers=${workflowEventWatchers.size} ` +
143
+ `resources=${summarizeActiveResources()}`);
144
+ }
145
+ function startMemoryDiagnostics() {
146
+ const intervalMs = parsePositiveIntEnv('BOTMUX_MEMORY_DIAG_INTERVAL_MS');
147
+ if (!intervalMs)
148
+ return undefined;
149
+ logger.info(`[memdiag] enabled intervalMs=${intervalMs}`);
150
+ logMemoryDiagnostics('startup');
151
+ const timer = setInterval(() => logMemoryDiagnostics('interval'), intervalMs);
152
+ if (typeof timer.unref === 'function')
153
+ timer.unref();
154
+ return timer;
155
+ }
47
156
  /**
48
157
  * Reply into a session — scope-aware.
49
158
  *
@@ -214,6 +323,587 @@ function refreshCliVersion(cliId, cliPathOverride) {
214
323
  function tag(ds) {
215
324
  return ds.session.sessionId.substring(0, 8);
216
325
  }
326
+ export function attachWorkflowEventWatcher(runId, ctx) {
327
+ if (ctx) {
328
+ // v0.1.4-a: wire registerAborters so runLoop's per-tick AbortController
329
+ // map is reachable from `cancelWorkflowRunOnDaemon` without having to
330
+ // poll the EventLog. Wrap idempotently — if the caller already set
331
+ // one, prefer ours so the workflowRuns entry stays the source of truth.
332
+ ctx.registerAborters = (aborters) => {
333
+ const entry = workflowRuns.get(runId);
334
+ if (!entry)
335
+ return;
336
+ if (aborters)
337
+ entry.aborters = aborters;
338
+ else
339
+ delete entry.aborters;
340
+ };
341
+ const existingRun = workflowRuns.get(runId);
342
+ workflowRuns.set(runId, { ...existingRun, ctx });
343
+ }
344
+ const existing = workflowEventWatchers.get(runId);
345
+ if (existing)
346
+ return existing;
347
+ const watcher = new WorkflowEventWatcher(runId, async (event) => {
348
+ // Progress card refresh is best-effort and runs first so a stale
349
+ // card never hangs around through approval / terminal events.
350
+ // Errors are swallowed inside updateWorkflowProgressCard.
351
+ await updateWorkflowProgressCard(runId);
352
+ await handleWorkflowFanoutEvent(event);
353
+ }, {
354
+ onError: (err) => logger.warn(`[workflow:${runId}] fanout failed: ${err instanceof Error ? err.message : String(err)}`),
355
+ });
356
+ workflowEventWatchers.set(runId, watcher);
357
+ watcher.ready.catch((err) => {
358
+ workflowEventWatchers.delete(runId);
359
+ logger.warn(`[workflow:${runId}] watcher failed to start: ${err instanceof Error ? err.message : String(err)}`);
360
+ });
361
+ return watcher;
362
+ }
363
+ async function driveWorkflowRun(runId) {
364
+ const entry = workflowRuns.get(runId);
365
+ if (!entry) {
366
+ throw new Error(`workflow runtime context not registered: ${runId}`);
367
+ }
368
+ if (entry.running)
369
+ return entry.running;
370
+ entry.running = runLoop(entry.ctx)
371
+ .then(async (result) => {
372
+ logger.info(`[workflow:${runId}] loop stopped: ${result.reason} (ticks=${result.ticks})`);
373
+ if (result.reason === 'terminal') {
374
+ // Codex round 1 blocker: patch the final card BEFORE cleanup deletes
375
+ // the cardMessageId, otherwise the watcher's drain may run too late
376
+ // and the user is stuck looking at a "running" tile forever.
377
+ await updateWorkflowProgressCard(runId);
378
+ cleanupWorkflowRun(runId);
379
+ }
380
+ return result;
381
+ })
382
+ .catch((err) => {
383
+ logger.warn(`[workflow:${runId}] loop failed: ${err instanceof Error ? err.message : String(err)}`);
384
+ throw err;
385
+ })
386
+ .finally(() => {
387
+ const current = workflowRuns.get(runId);
388
+ if (current)
389
+ current.running = undefined;
390
+ });
391
+ return entry.running;
392
+ }
393
+ function cleanupWorkflowRun(runId) {
394
+ workflowRuns.delete(runId);
395
+ workflowRunCards.delete(runId);
396
+ const watcher = workflowEventWatchers.get(runId);
397
+ if (watcher) {
398
+ watcher.close();
399
+ workflowEventWatchers.delete(runId);
400
+ }
401
+ }
402
+ /**
403
+ * v0.1.5 slice 1: progress card update path.
404
+ *
405
+ * Replay the run's EventLog → build a fresh card JSON → PATCH the
406
+ * previously-sent message. Failure is logged at warn and swallowed —
407
+ * codex contract boundary 1: workflow runtime semantics must never
408
+ * depend on Feishu PATCH succeeding.
409
+ *
410
+ * Called after every event the fanout watcher sees, BEFORE handing the
411
+ * event off to handleWorkflowFanoutEvent (so an approval card landing
412
+ * doesn't race the progress card's "waiting" state).
413
+ */
414
+ async function updateWorkflowProgressCard(runId) {
415
+ const card = workflowRunCards.get(runId);
416
+ if (!card)
417
+ return;
418
+ // Chain on the previous update so two fanout-triggered updates can't
419
+ // race and PATCH out of order (which manifests as the card briefly
420
+ // flipping back to an older state, e.g. red → blue after a failed
421
+ // run). Each call awaits the predecessor's PATCH to land first.
422
+ const next = card.updateChain.then(async () => {
423
+ // Re-fetch the card entry — it may have been GC'd between when
424
+ // we were enqueued and when our turn came (e.g. terminal cleanup
425
+ // ran while we were waiting).
426
+ const current = workflowRunCards.get(runId);
427
+ if (!current)
428
+ return;
429
+ try {
430
+ const log = new WorkflowEventLog(runId, getRunsDir());
431
+ const snapshot = replayWorkflow(await log.readAll());
432
+ // Pull node count from the live workflow definition if we still
433
+ // hold a runtime context for this run — `snapshot.nodes` only
434
+ // contains TRIGGERED nodes so its size grows as the run
435
+ // progresses and gives a misleading "X / Y" fraction otherwise.
436
+ // (e.g. 1/2 when first node fires → 2/3 at end on a 3-node wf).
437
+ const runtimeEntry = workflowRuns.get(runId);
438
+ const totalNodes = runtimeEntry?.ctx.def?.nodes
439
+ ? Object.keys(runtimeEntry.ctx.def.nodes).length
440
+ : undefined;
441
+ const cardJson = buildWorkflowProgressCard(snapshot, {
442
+ // v0.1.5 slice 3: hand the per-row "查看当前终端" link to the
443
+ // dashboard deeplink contract codex set up in slice 2 (3335adc).
444
+ enrichWithTerminalLink: buildAttemptDeeplinkEnricher(runId, snapshot),
445
+ totalNodes,
446
+ });
447
+ await updateMessage(current.larkAppId, current.cardMessageId, cardJson);
448
+ }
449
+ catch (err) {
450
+ logger.warn(`[workflow:${runId}] progress card update failed (continuing): ${err instanceof Error ? err.message : String(err)}`);
451
+ }
452
+ });
453
+ card.updateChain = next;
454
+ await next;
455
+ }
456
+ async function cancelWorkflowRunOnDaemon(runId, reason, opts = {}) {
457
+ if (!isValidRunId(runId))
458
+ return { ok: false, error: 'bad_run_id' };
459
+ if (opts.expectedChatId) {
460
+ const scope = await guardWorkflowRunCancelChatScope(getRunsDir(), runId, opts.expectedChatId);
461
+ if (!scope.ok)
462
+ return scope;
463
+ }
464
+ const entry = workflowRuns.get(runId);
465
+ if (entry?.running) {
466
+ const snapshot = replay(await entry.ctx.log.readAll());
467
+ if (isTerminalRunStatus(snapshot.run.status)) {
468
+ return {
469
+ ok: true,
470
+ runId,
471
+ status: snapshot.run.status,
472
+ alreadyTerminal: true,
473
+ lastSeq: snapshot.lastSeq,
474
+ };
475
+ }
476
+ // Dedup concurrent cancel calls (codex round 3 M1). The first caller
477
+ // synchronously assigns `entry.cancelling` BEFORE any await so a
478
+ // second caller arriving mid-flight sees the in-flight promise and
479
+ // returns the same result instead of re-writing `cancelRequested` or
480
+ // re-firing aborters.
481
+ if (entry.cancelling) {
482
+ return await entry.cancelling;
483
+ }
484
+ const cancelling = startRunningCancel(entry, runId, reason, opts.by ?? 'dashboard');
485
+ entry.cancelling = cancelling;
486
+ cancelling.catch((err) => {
487
+ logger.warn(`[workflow:${runId}] cancel foreground failed: ${err instanceof Error ? err.message : String(err)}`);
488
+ }).finally(() => {
489
+ const e = workflowRuns.get(runId);
490
+ if (e && e.cancelling === cancelling)
491
+ delete e.cancelling;
492
+ });
493
+ return await cancelling;
494
+ }
495
+ const current = workflowRuns.get(runId);
496
+ if (!current) {
497
+ const snapshot = await readRunSnapshot(getRunsDir(), runId);
498
+ if (!snapshot)
499
+ return { ok: false, error: 'unknown_run' };
500
+ if (isTerminalRunStatus(snapshot.run.status)) {
501
+ return {
502
+ ok: true,
503
+ runId,
504
+ status: snapshot.run.status,
505
+ alreadyTerminal: true,
506
+ lastSeq: snapshot.lastSeq,
507
+ };
508
+ }
509
+ return { ok: false, error: 'workflow_not_attached', status: snapshot.run.status };
510
+ }
511
+ const result = await cancelWorkflowRun({
512
+ ctx: current.ctx,
513
+ reason,
514
+ by: opts.by ?? 'dashboard',
515
+ actor: 'human',
516
+ maxTicks: 200,
517
+ });
518
+ if (isTerminalRunStatus(result.snapshot.run.status)) {
519
+ await updateWorkflowProgressCard(runId);
520
+ cleanupWorkflowRun(runId);
521
+ }
522
+ return {
523
+ ok: true,
524
+ runId,
525
+ status: result.snapshot.run.status,
526
+ alreadyTerminal: result.alreadyTerminal,
527
+ cancelEventId: result.cancelEventId,
528
+ loopReason: result.loopResult?.reason,
529
+ lastSeq: result.snapshot.lastSeq,
530
+ };
531
+ }
532
+ /**
533
+ * Foreground portion of the running-cancel chain (v0.1.4-a, codex round 3 M1).
534
+ *
535
+ * Returns the API response object the caller surfaces to the dashboard /
536
+ * IM caller. Synchronously starts a background task that awaits the
537
+ * running loop draining and then drives `cancelWorkflowRun` to finalize
538
+ * the cancel chain (cancelDelivered → activityCanceled → nodeCanceled →
539
+ * runCanceled).
540
+ *
541
+ * The function is wrapped in an IIFE'd async closure by the caller and
542
+ * assigned to `entry.cancelling` BEFORE awaiting it, so that a
543
+ * concurrent second cancel call sees the in-flight promise and dedupes
544
+ * onto it instead of re-writing `cancelRequested` or re-firing
545
+ * aborters.
546
+ */
547
+ async function startRunningCancel(entry, runId, reason, by) {
548
+ const snapshot = replay(await entry.ctx.log.readAll());
549
+ if (isTerminalRunStatus(snapshot.run.status)) {
550
+ return {
551
+ ok: true,
552
+ runId,
553
+ status: snapshot.run.status,
554
+ alreadyTerminal: true,
555
+ cancelEventId: snapshot.cancelledRunIntent?.cancelOriginEventId,
556
+ lastSeq: snapshot.lastSeq,
557
+ };
558
+ }
559
+ // 1) Write `cancelRequested` if not already present.
560
+ let cancelEventId = snapshot.cancelledRunIntent?.cancelOriginEventId;
561
+ if (!cancelEventId) {
562
+ const cancel = await requestCancel(entry.ctx.log, { target: { kind: 'run', runId }, reason, by }, 'human');
563
+ cancelEventId = cancel.eventId;
564
+ }
565
+ // 2) Fire all in-flight dispatch aborters so workers stop ASAP instead
566
+ // of waiting for the EventLog 200ms polling fallback.
567
+ if (entry.aborters && entry.aborters.size > 0) {
568
+ const abortReason = { cancelOriginEventId: cancelEventId };
569
+ for (const ac of entry.aborters.values()) {
570
+ if (!ac.signal.aborted)
571
+ ac.abort(abortReason);
572
+ }
573
+ }
574
+ // 3) Fire-and-forget background finalize: await the running loop, then
575
+ // drive `cancelWorkflowRun` to terminate the run. Idempotent so a
576
+ // redundant invocation (e.g. via a separate cold-attach path) is
577
+ // safe — replay short-circuits on already-terminal.
578
+ void (async () => {
579
+ try {
580
+ await entry.running?.catch(() => { });
581
+ }
582
+ finally {
583
+ const current = workflowRuns.get(runId);
584
+ if (current) {
585
+ try {
586
+ const result = await cancelWorkflowRun({
587
+ ctx: current.ctx,
588
+ reason,
589
+ by,
590
+ actor: 'human',
591
+ maxTicks: 200,
592
+ });
593
+ if (isTerminalRunStatus(result.snapshot.run.status)) {
594
+ await updateWorkflowProgressCard(runId);
595
+ cleanupWorkflowRun(runId);
596
+ }
597
+ }
598
+ catch (err) {
599
+ logger.warn(`[workflow:${runId}] cancel finalize failed: ${err instanceof Error ? err.message : String(err)}`);
600
+ }
601
+ }
602
+ }
603
+ })();
604
+ const after = replay(await entry.ctx.log.readAll());
605
+ return {
606
+ ok: true,
607
+ runId,
608
+ status: after.run.status,
609
+ alreadyTerminal: false,
610
+ cancelEventId,
611
+ loopReason: 'already-running',
612
+ pending: true,
613
+ lastSeq: after.lastSeq,
614
+ };
615
+ }
616
+ async function resolveDashboardWait(runId, resolution, comment) {
617
+ if (!isValidRunId(runId))
618
+ return { ok: false, error: 'bad_run_id' };
619
+ const entry = workflowRuns.get(runId);
620
+ if (!entry) {
621
+ const snapshot = await readRunSnapshot(getRunsDir(), runId);
622
+ if (!snapshot)
623
+ return { ok: false, error: 'unknown_run' };
624
+ if (isTerminalRunStatus(snapshot.run.status)) {
625
+ // Treat as benign idempotent success — the wait was already resolved
626
+ // by an earlier action (Lark card, CLI, or this dashboard).
627
+ return {
628
+ ok: true,
629
+ runId,
630
+ resolution,
631
+ activityId: '',
632
+ attemptId: '',
633
+ resolvedAt: snapshot.updatedAt,
634
+ lastSeq: snapshot.lastSeq,
635
+ alreadyTerminal: true,
636
+ };
637
+ }
638
+ return {
639
+ ok: false,
640
+ error: 'workflow_not_attached',
641
+ status: snapshot.run.status,
642
+ hint: 'Run not attached to this daemon (perhaps still cold). Try again shortly or check daemon logs.',
643
+ };
644
+ }
645
+ const events = await entry.ctx.log.readAll();
646
+ const snapshot = replay(events);
647
+ const updatedAt = events[events.length - 1]?.timestamp ?? Date.now();
648
+ if (isTerminalRunStatus(snapshot.run.status)) {
649
+ return {
650
+ ok: true,
651
+ runId,
652
+ resolution,
653
+ activityId: '',
654
+ attemptId: '',
655
+ resolvedAt: updatedAt,
656
+ lastSeq: snapshot.lastSeq,
657
+ alreadyTerminal: true,
658
+ };
659
+ }
660
+ // Find the unique pending human-gate wait. Other wait kinds (time /
661
+ // condition) aren't approvable through this dashboard route; restricting
662
+ // to human-gate matches codex's API contract and keeps the surface tight.
663
+ // `approvers` lives on the original waitCreated event payload, not on
664
+ // replay state — pull it from there so we don't reshape replay AttemptState
665
+ // for a single auth check.
666
+ const waitEventsByActivity = new Map();
667
+ for (const ev of events) {
668
+ if (ev.type !== 'waitCreated')
669
+ continue;
670
+ const p = ev.payload;
671
+ if (typeof p.activityId !== 'string')
672
+ continue;
673
+ const approvers = Array.isArray(p.approvers)
674
+ ? p.approvers.filter((x) => typeof x === 'string')
675
+ : undefined;
676
+ // Last waitCreated for the activity wins (re-create case).
677
+ waitEventsByActivity.set(p.activityId, { approvers });
678
+ }
679
+ const candidates = [];
680
+ for (const activityId of snapshot.danglingWaits) {
681
+ const activity = snapshot.activities.get(activityId);
682
+ const at = activity?.attempts[activity.attempts.length - 1];
683
+ if (!at?.wait || at.wait.waitKind !== 'human-gate')
684
+ continue;
685
+ candidates.push({
686
+ activityId,
687
+ attemptId: at.attemptId,
688
+ approvers: waitEventsByActivity.get(activityId)?.approvers,
689
+ });
690
+ }
691
+ if (candidates.length === 0) {
692
+ return {
693
+ ok: false,
694
+ error: 'no_open_wait',
695
+ hint: 'No pending humanGate wait on this run.',
696
+ };
697
+ }
698
+ if (candidates.length > 1) {
699
+ return {
700
+ ok: false,
701
+ error: 'ambiguous_wait',
702
+ hint: `Run has ${candidates.length} pending humanGate waits; dashboard cannot ` +
703
+ `pick one yet. Use the Lark approval card.`,
704
+ };
705
+ }
706
+ const target = candidates[0];
707
+ // approvers allowlist non-empty → preserve restricted-approval semantics.
708
+ // Dashboard cookie auth doesn't carry user identity, so we don't try to
709
+ // satisfy the allowlist from this path — defer to the Lark card.
710
+ // Read approvers from the wait state (we stashed it on the candidate).
711
+ if ((target.approvers?.length ?? 0) > 0) {
712
+ return {
713
+ ok: false,
714
+ error: 'needs_lark_approval',
715
+ hint: 'This gate has an approver allowlist; the Lark approval card is the ' +
716
+ 'only path that authenticates the approver identity.',
717
+ };
718
+ }
719
+ try {
720
+ const resolved = await resolveWait(entry.ctx.log, {
721
+ activityId: target.activityId,
722
+ attemptId: target.attemptId,
723
+ resolution,
724
+ by: 'dashboard',
725
+ comment,
726
+ });
727
+ const after = replay(await entry.ctx.log.readAll());
728
+ // Fire-and-forget re-drive — same pattern as Lark card path
729
+ // (workflowApprovalResolved hook). Don't await; the dashboard caller
730
+ // only needs the wait resolution to be persisted before responding.
731
+ driveWorkflowRun(runId).catch((err) => {
732
+ logger.warn(`[workflow:${runId}] re-entry after dashboard approval failed: ` +
733
+ (err instanceof Error ? err.message : String(err)));
734
+ });
735
+ logger.info(`[workflow:${runId}] wait ${target.activityId}/${target.attemptId} resolved=${resolution} via dashboard`);
736
+ return {
737
+ ok: true,
738
+ runId,
739
+ resolution,
740
+ activityId: target.activityId,
741
+ attemptId: target.attemptId,
742
+ resolvedAt: resolved.resolutionEvent.timestamp,
743
+ lastSeq: after.lastSeq,
744
+ pending: !isTerminalRunStatus(after.run.status),
745
+ };
746
+ }
747
+ catch (err) {
748
+ return {
749
+ ok: false,
750
+ error: 'internal_error',
751
+ message: err instanceof Error ? err.message : String(err),
752
+ };
753
+ }
754
+ }
755
+ async function attachColdWorkflowRuns(ownerLarkAppId) {
756
+ const runsDir = getRunsDir();
757
+ try {
758
+ const result = await attachColdWorkflowRunsForDaemon({
759
+ runsDir,
760
+ ownerLarkAppId,
761
+ isAttached: (runId) => workflowRuns.has(runId),
762
+ makeContext: (run, log) => ({
763
+ log,
764
+ def: run.def,
765
+ spawnSubagent: workflowSpawnFn(),
766
+ hostExecutors: createDefaultHostExecutorRegistry(),
767
+ reconcilers: createDefaultProviderReconcilers(),
768
+ loadEffectInput: (activityId, attemptId) => loadEffectInputSidecar(log, activityId, attemptId),
769
+ }),
770
+ attachWatcher: (runId, ctx) => attachWorkflowEventWatcher(runId, ctx),
771
+ driveRun: (runId) => driveWorkflowRun(runId),
772
+ onSkip: (runId, reason) => logger.debug(`[workflow:${runId}] cold-scan skipped: ${reason}`),
773
+ onAttached: (run) => {
774
+ logger.info(`[workflow:${run.runId}] cold-attached status=${run.snapshot.run.status} ` +
775
+ `danglingEffects=${run.snapshot.danglingEffectAttempted.length} ` +
776
+ `danglingWaits=${run.snapshot.danglingWaits.length}`);
777
+ },
778
+ onDriveError: (runId, err) => {
779
+ logger.warn(`[workflow:${runId}] cold-scan drive failed: ${err instanceof Error ? err.message : String(err)}`);
780
+ },
781
+ });
782
+ if (result.discovered === 0) {
783
+ logger.info(`[workflow] cold-scan: no active runs for ${ownerLarkAppId}`);
784
+ }
785
+ }
786
+ catch (err) {
787
+ logger.warn(`[workflow] cold-scan failed for ${ownerLarkAppId}; continuing daemon startup: ${err instanceof Error ? err.message : String(err)}`);
788
+ }
789
+ }
790
+ /**
791
+ * Build the daemon-backed WorkerSpawnFn lazily. We avoid touching
792
+ * bot-registry at module-init time (it isn't loaded yet); each call
793
+ * resolves credentials by the workflow node's `bot` name, falling
794
+ * back to the IM larkAppId if the bot rename hasn't propagated.
795
+ *
796
+ * Multi-daemon: each process registers only its own bot in memory, but
797
+ * workflow subagent nodes may target sibling bots (e.g. coco/aiden) that
798
+ * live in other daemon processes. The shared bots.json is the source of
799
+ * truth across daemons, so we fall back to it when the in-memory
800
+ * registry misses.
801
+ */
802
+ function workflowSpawnFn() {
803
+ const daemonDeps = createWorkflowDaemonSpawn({
804
+ resolveLarkCredentials: (botName) => {
805
+ const bot = getAllBots().find((b) => b.config.name === botName || b.botName === botName || b.config.larkAppId === botName);
806
+ if (bot) {
807
+ return {
808
+ larkAppId: bot.config.larkAppId,
809
+ larkAppSecret: bot.config.larkAppSecret,
810
+ };
811
+ }
812
+ const siblingConfigs = loadBotConfigs();
813
+ const sibling = siblingConfigs.find((c) => c.name === botName || c.larkAppId === botName);
814
+ if (!sibling) {
815
+ throw new Error(`workflow: bot '${botName}' not found in registry`);
816
+ }
817
+ return {
818
+ larkAppId: sibling.larkAppId,
819
+ larkAppSecret: sibling.larkAppSecret,
820
+ };
821
+ },
822
+ });
823
+ return createDaemonSpawnFn(daemonDeps);
824
+ }
825
+ async function handleWorkflowCommandIfAny(content, anchor, chatId, larkAppId, initiator) {
826
+ // Captured by the `onRunCreated` closure so the trailing text reply can be
827
+ // suppressed when the run-level progress card already landed. Codex
828
+ // round 1 medium: "single self-updating tile" promise breaks if we also
829
+ // dump a `Workflow loop stopped: …` line at the end.
830
+ let startingCardSent = false;
831
+ const result = await executeWorkflowCommand({
832
+ content,
833
+ chatId,
834
+ larkAppId,
835
+ initiator: initiator ?? 'unknown',
836
+ }, {
837
+ attachWorkflowEventWatcher,
838
+ spawnSubagent: workflowSpawnFn(),
839
+ runLoopFn: (ctx) => driveWorkflowRun(ctx.log.runId),
840
+ cancelWorkflowRunFn: (runId, reason, opts) => cancelWorkflowRunOnDaemon(runId, reason, opts),
841
+ onRunCreated: async (info) => {
842
+ // v0.1.5 slice 1: send the run-level progress card so the user
843
+ // sees a single self-updating tile. Best-effort: if the card
844
+ // send fails we still fall back to a plain-text "started"
845
+ // reply so they at least see the runId.
846
+ try {
847
+ const cardJson = buildWorkflowStartingCard({
848
+ runId: info.runId,
849
+ workflowId: info.workflowId,
850
+ });
851
+ const cardMessageId = await sessionReply(anchor, cardJson, 'interactive', larkAppId);
852
+ if (chatId) {
853
+ workflowRunCards.set(info.runId, {
854
+ cardMessageId,
855
+ larkAppId,
856
+ chatId,
857
+ updateChain: Promise.resolve(),
858
+ });
859
+ }
860
+ startingCardSent = true;
861
+ }
862
+ catch (err) {
863
+ logger.warn(`[workflow:${info.runId}] failed to send progress card (falling back to text): ${err instanceof Error ? err.message : String(err)}`);
864
+ try {
865
+ await sessionReply(anchor, `Workflow started: ${info.workflowId}\nrunId: ${info.runId}\nWeb: ${workflowRunDetailUrl(info.runId)}`, 'text', larkAppId);
866
+ }
867
+ catch (fallbackErr) {
868
+ logger.warn(`[workflow:${info.runId}] failed to send start reply: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
869
+ }
870
+ }
871
+ },
872
+ });
873
+ if (!result.handled)
874
+ return false;
875
+ if (!result.ok) {
876
+ await sessionReply(anchor, `Workflow 命令失败:${result.error}${result.usage ? `\n${result.usage}` : ''}`, 'text', larkAppId);
877
+ return true;
878
+ }
879
+ // Skip the trailing text echo only for `run` commands whose progress card
880
+ // landed — the card already shows status/runId/web link, and the card
881
+ // patch path covers final state. `cancel` keeps the text since cancel
882
+ // doesn't drive `onRunCreated` and may target a card-less run.
883
+ if (result.command === 'run' && startingCardSent) {
884
+ return true;
885
+ }
886
+ await sessionReply(anchor, formatWorkflowCommandResult(result), 'text', larkAppId);
887
+ return true;
888
+ }
889
+ function formatWorkflowCommandResult(result) {
890
+ if (result.command === 'cancel') {
891
+ if (result.alreadyTerminal) {
892
+ return `Workflow already terminal: ${result.status}\nrunId: ${result.runId}`;
893
+ }
894
+ if (result.pending) {
895
+ return `Workflow cancel requested; waiting for running activity to drain.\nrunId: ${result.runId}\nstatus: ${result.status}`;
896
+ }
897
+ return `Workflow cancel processed.\nrunId: ${result.runId}\nstatus: ${result.status}`;
898
+ }
899
+ const status = result.loopResult.reason === 'awaiting-wait'
900
+ ? '等待审批'
901
+ : result.loopResult.reason;
902
+ const next = result.loopResult.reason === 'awaiting-wait'
903
+ ? '\n请在群里查看审批卡,点击后 workflow 会继续执行。'
904
+ : '';
905
+ return `Workflow loop stopped: ${status}\nrunId: ${result.runId}${next}`;
906
+ }
217
907
  function getActiveCount() {
218
908
  let count = 0;
219
909
  for (const [, ds] of activeSessions) {
@@ -275,7 +965,182 @@ const cardDeps = {
275
965
  activeSessions,
276
966
  sessionReply,
277
967
  lastRepoScan,
968
+ workflowApprovalResolved: (runId) => {
969
+ driveWorkflowRun(runId).catch((err) => {
970
+ logger.warn(`[workflow:${runId}] re-entry after approval failed: ${err instanceof Error ? err.message : String(err)}`);
971
+ });
972
+ },
278
973
  };
974
+ function dashboardWaitStatus(error) {
975
+ switch (error.error) {
976
+ case 'bad_run_id': return 400;
977
+ case 'unknown_run': return 404;
978
+ case 'workflow_not_attached': return 409;
979
+ case 'no_open_wait': return 409;
980
+ case 'ambiguous_wait': return 409;
981
+ case 'needs_lark_approval': return 403;
982
+ case 'internal_error': return 500;
983
+ }
984
+ }
985
+ for (const [path, resolution] of [
986
+ ['/api/workflows/runs/:runId/approve', 'approved'],
987
+ ['/api/workflows/runs/:runId/reject', 'rejected'],
988
+ ]) {
989
+ ipcRoute('POST', path, async (req, res, params) => {
990
+ let body;
991
+ try {
992
+ body = await readJsonBody(req);
993
+ }
994
+ catch {
995
+ return jsonRes(res, 400, { ok: false, error: 'bad_json' });
996
+ }
997
+ const comment = typeof body.comment === 'string' && body.comment.trim()
998
+ ? body.comment.trim()
999
+ : undefined;
1000
+ const result = await resolveDashboardWait(params.runId, resolution, comment);
1001
+ if (!result.ok) {
1002
+ return jsonRes(res, dashboardWaitStatus(result), result);
1003
+ }
1004
+ return jsonRes(res, 200, result);
1005
+ });
1006
+ }
1007
+ function attemptResumeStatus(error) {
1008
+ switch (error.error) {
1009
+ case 'bad_run_id':
1010
+ case 'bad_attempt_id':
1011
+ case 'bad_json':
1012
+ return 400;
1013
+ case 'no_terminal_sidecar':
1014
+ case 'resume_not_running':
1015
+ return 404;
1016
+ case 'missing_cli_session_id':
1017
+ case 'missing_lark_app_id':
1018
+ case 'bot_not_registered':
1019
+ return 409;
1020
+ default:
1021
+ return 500;
1022
+ }
1023
+ }
1024
+ ipcRoute('POST', '/api/workflows/runs/:runId/attempts/:activityId/:attemptId/resume', async (_req, res, params) => {
1025
+ const result = await workflowAttemptResumes.start({
1026
+ runId: params.runId,
1027
+ activityId: params.activityId,
1028
+ attemptId: params.attemptId,
1029
+ });
1030
+ if (!result.ok)
1031
+ return jsonRes(res, attemptResumeStatus(result), result);
1032
+ return jsonRes(res, 200, result);
1033
+ });
1034
+ ipcRoute('POST', '/api/workflows/runs/:runId/attempts/:activityId/:attemptId/resume/end', async (req, res, params) => {
1035
+ let body;
1036
+ try {
1037
+ body = await readJsonBody(req);
1038
+ }
1039
+ catch {
1040
+ return jsonRes(res, 400, { ok: false, error: 'bad_json' });
1041
+ }
1042
+ const result = await workflowAttemptResumes.end({
1043
+ runId: params.runId,
1044
+ activityId: params.activityId,
1045
+ attemptId: params.attemptId,
1046
+ reason: typeof body.reason === 'string' && body.reason.trim()
1047
+ ? body.reason.trim()
1048
+ : 'ended_by_dashboard',
1049
+ });
1050
+ if (!result.ok)
1051
+ return jsonRes(res, attemptResumeStatus(result), result);
1052
+ return jsonRes(res, 200, result);
1053
+ });
1054
+ ipcRoute('POST', '/api/workflows/runs/:runId/cancel', async (req, res, params) => {
1055
+ let body;
1056
+ try {
1057
+ body = await readJsonBody(req);
1058
+ }
1059
+ catch {
1060
+ return jsonRes(res, 400, { ok: false, error: 'bad_json' });
1061
+ }
1062
+ const reason = typeof body.reason === 'string' && body.reason.trim()
1063
+ ? body.reason.trim()
1064
+ : 'cancelled via dashboard';
1065
+ const result = await cancelWorkflowRunOnDaemon(params.runId, reason);
1066
+ if (!result.ok) {
1067
+ const status = result.error === 'bad_run_id' ? 400 :
1068
+ result.error === 'unknown_run' ? 404 :
1069
+ result.error === 'workflow_not_attached' ? 409 :
1070
+ result.error === 'wrong_chat' ? 403 :
1071
+ 500;
1072
+ return jsonRes(res, status, result);
1073
+ }
1074
+ return jsonRes(res, 200, result);
1075
+ });
1076
+ ipcRoute('POST', '/api/workflows/definitions/:id/run', async (req, res, params) => {
1077
+ const workflowId = params.id;
1078
+ if (!isValidWorkflowId(workflowId)) {
1079
+ return jsonRes(res, 400, { ok: false, error: 'bad_id' });
1080
+ }
1081
+ let body;
1082
+ try {
1083
+ body = await readJsonBody(req);
1084
+ }
1085
+ catch {
1086
+ return jsonRes(res, 400, { ok: false, error: 'bad_json' });
1087
+ }
1088
+ const chatBinding = parseTriggerChatBinding(body.chatBinding);
1089
+ if (!chatBinding) {
1090
+ return jsonRes(res, 400, { ok: false, error: 'missing_chat_binding' });
1091
+ }
1092
+ if (body.params !== undefined) {
1093
+ if (typeof body.params !== 'object' || body.params === null || Array.isArray(body.params)) {
1094
+ return jsonRes(res, 400, { ok: false, error: 'bad_params_shape' });
1095
+ }
1096
+ }
1097
+ // Convert JSON-channel params (decoded values) into the shared RawParamInput
1098
+ // map. String-channel coercion stays on the IM `/workflow run` path.
1099
+ const rawParams = {};
1100
+ for (const [k, v] of Object.entries(body.params ?? {})) {
1101
+ rawParams[k] = { kind: 'json', value: v };
1102
+ }
1103
+ const result = await triggerWorkflowRun({
1104
+ workflowId,
1105
+ rawParams,
1106
+ chatBinding,
1107
+ initiator: 'dashboard',
1108
+ }, {
1109
+ spawnSubagent: workflowSpawnFn(),
1110
+ botResolver: resolveBotSnapshot,
1111
+ makeRuntimeContext: (log, def, spawnSubagent) => ({
1112
+ log,
1113
+ def,
1114
+ spawnSubagent,
1115
+ hostExecutors: createDefaultHostExecutorRegistry(),
1116
+ reconcilers: createDefaultProviderReconcilers(),
1117
+ loadEffectInput: (activityId, attemptId) => loadEffectInputSidecar(log, activityId, attemptId),
1118
+ }),
1119
+ attachRuntime: (runId, ctx) => attachWorkflowEventWatcher(runId, ctx),
1120
+ driveRun: (runId) => {
1121
+ driveWorkflowRun(runId).catch((err) => {
1122
+ logger.warn(`[workflow:${runId}] dashboard-trigger drive failed: ${err instanceof Error ? err.message : String(err)}`);
1123
+ });
1124
+ },
1125
+ });
1126
+ if (!result.ok) {
1127
+ const status = result.error === 'unknown_workflow' ? 404 :
1128
+ result.error === 'invalid_params' ? 400 :
1129
+ 500;
1130
+ return jsonRes(res, status, result);
1131
+ }
1132
+ return jsonRes(res, 200, result);
1133
+ });
1134
+ function parseTriggerChatBinding(raw) {
1135
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw))
1136
+ return undefined;
1137
+ const r = raw;
1138
+ if (typeof r.chatId !== 'string' || !r.chatId.trim())
1139
+ return undefined;
1140
+ if (typeof r.larkAppId !== 'string' || !r.larkAppId.trim())
1141
+ return undefined;
1142
+ return { chatId: r.chatId.trim(), larkAppId: r.larkAppId.trim() };
1143
+ }
279
1144
  // ─── Event handling ──────────────────────────────────────────────────────────
280
1145
  /**
281
1146
  * Default-oncall is a uniform forward-only policy: whenever the toggle is
@@ -415,6 +1280,9 @@ async function handleNewTopic(data, ctx) {
415
1280
  const senderOpenId = data.sender?.sender_id?.open_id;
416
1281
  const botCfg = getBot(larkAppId).config;
417
1282
  logger.info(`New session: "${content.substring(0, 60)}" (scope=${scope}, anchor=${anchor.substring(0, 12)}, resources: ${resources.length}, active: ${getActiveCount()}, messageId: ${messageId}, chatId: ${chatId})`);
1283
+ if (await handleWorkflowCommandIfAny(cmdContent, anchor, chatId, larkAppId, senderOpenId)) {
1284
+ return;
1285
+ }
418
1286
  // Intercept daemon commands in new topics (no session needed for some commands)
419
1287
  const invocation = parseSlashCommandInvocation(cmdContent);
420
1288
  if (invocation) {
@@ -701,6 +1569,9 @@ async function handleThreadReply(data, ctx) {
701
1569
  return;
702
1570
  }
703
1571
  }
1572
+ if (await handleWorkflowCommandIfAny(cmdContent, anchor, ctxChatId ?? data?.message?.chat_id, larkAppId, parsed.senderId || data?.sender?.sender_id?.open_id)) {
1573
+ return;
1574
+ }
704
1575
  // Intercept daemon commands
705
1576
  const invocation = parseSlashCommandInvocation(cmdContent);
706
1577
  if (invocation) {
@@ -1038,6 +1909,7 @@ export async function startDaemon(botIndex) {
1038
1909
  scheduleStore.startExternalWriteWatcher();
1039
1910
  logger.info(`Bot ${idx}/${botConfigs.length}: ${cfg.larkAppId} (cli: ${cfg.cliId})`);
1040
1911
  writePidFile();
1912
+ const memoryDiagnostics = startMemoryDiagnostics();
1041
1913
  // Publish self-descriptor for the dashboard registry. The dashboard sibling
1042
1914
  // process discovers running daemons by scanning ~/.botmux/data/dashboard-daemons/
1043
1915
  // and watching for mtime updates (heartbeat) / file removal (shutdown).
@@ -1173,6 +2045,7 @@ export async function startDaemon(botIndex) {
1173
2045
  }
1174
2046
  // Restore active sessions from previous run
1175
2047
  restoreActiveSessions(activeSessions);
2048
+ await attachColdWorkflowRuns(cfg.larkAppId);
1176
2049
  // Start scheduler in every daemon. Each daemon owns exactly one bot, so
1177
2050
  // each filters to only execute tasks whose `larkAppId` matches its bot
1178
2051
  // (unmatched tasks are handled by the owning bot's daemon instead; a
@@ -1196,7 +2069,13 @@ export async function startDaemon(botIndex) {
1196
2069
  shuttingDown = true;
1197
2070
  logger.info(`Daemon shutting down... (active: ${getActiveCount()})`);
1198
2071
  scheduler.stopScheduler();
2072
+ for (const watcher of workflowEventWatchers.values())
2073
+ watcher.close();
2074
+ workflowEventWatchers.clear();
2075
+ workflowRuns.clear();
1199
2076
  clearInterval(descriptorHeartbeat);
2077
+ if (memoryDiagnostics)
2078
+ clearInterval(memoryDiagnostics);
1200
2079
  removeDaemonDescriptor(cfg.larkAppId);
1201
2080
  ipcHandle.close().catch(() => { });
1202
2081
  const pendingExits = [];
@@ -1262,6 +2141,8 @@ export async function startDaemon(botIndex) {
1262
2141
  // the descriptor so the dashboard doesn't see a phantom daemon.
1263
2142
  process.on('exit', () => {
1264
2143
  clearInterval(descriptorHeartbeat);
2144
+ if (memoryDiagnostics)
2145
+ clearInterval(memoryDiagnostics);
1265
2146
  removeDaemonDescriptor(cfg.larkAppId);
1266
2147
  // Plain-exit path (uncaught fatal, manual process.exit) bypasses the
1267
2148
  // graceful shutdown above. flushIdentityCacheSync is synchronous and