ylib-syim 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bridges/main.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { setupBridgeLogger } from "./logger.ts";
1
+ import { formatBeijingLogTimestamp, setupBridgeLogger } from "./logger.ts";
2
2
  import fs from "node:fs";
3
3
  import http from "node:http";
4
4
  import os from "node:os";
@@ -61,10 +61,14 @@ const localConfigOnly =
61
61
  const runtimeStateStorePaths = resolveRuntimeStateStorePaths(
62
62
  (process.env.IM_AGENT_HUB_HOME || "").trim() || undefined,
63
63
  );
64
+ // runtime-state.json 是运行态的磁盘 warm snapshot。默认关闭读写,避免空配置、
65
+ // 配置源异常或本地误配置时把历史账号状态从磁盘恢复回内存并重新展示。
66
+ // 需要本地调试/对比实验时再显式开启:
67
+ // RUNTIME_STATE_WARM_RESTORE=1 RUNTIME_STATE_SNAPSHOT_PERSIST=1
64
68
  const runtimeStateWarmRestoreEnabled =
65
- process.env.RUNTIME_STATE_WARM_RESTORE !== "0";
69
+ process.env.RUNTIME_STATE_WARM_RESTORE === "1";
66
70
  const runtimeStateSnapshotPersistEnabled =
67
- process.env.RUNTIME_STATE_SNAPSHOT_PERSIST !== "0";
71
+ process.env.RUNTIME_STATE_SNAPSHOT_PERSIST === "1";
68
72
  const runtimeStateEventJournalEnabled =
69
73
  process.env.RUNTIME_STATE_EVENT_JOURNAL !== "0";
70
74
  const RUNTIME_STATE_EVENT_JOURNAL_MAX_BYTES_DEFAULT = 5 * 1024 * 1024;
@@ -123,6 +127,10 @@ const runtimeStatusProbeTimeoutMsWeixin = Math.max(
123
127
  runtimeStatusProbeTimeoutMs,
124
128
  Number(process.env.RUNTIME_STATUS_PROBE_TIMEOUT_MS_WEIXIN || "4000") || 4000,
125
129
  );
130
+ const runtimeAccountControlTimeoutMs = Math.max(
131
+ 1_000,
132
+ Number(process.env.RUNTIME_ACCOUNT_CONTROL_TIMEOUT_MS || "10000") || 10000,
133
+ );
126
134
  /** 与 OpenClaw `channels.status` 的 `timeoutMs` 语义对齐:最小 1000ms,HTTP 入参硬上限。 */
127
135
  const RUNTIME_STATUS_PROBE_TIMEOUT_OPENCLAW_MIN_MS = 1_000;
128
136
  const RUNTIME_STATUS_PROBE_TIMEOUT_HTTP_MAX_MS = 120_000;
@@ -575,6 +583,7 @@ const runtimeHealthGuardRestartsByHour = new Map<string, number[]>();
575
583
 
576
584
  // bot 控制并发保护(platform+account 粒度)。
577
585
  const botControlInFlight = new Set<string>();
586
+ let runtimeConfigApplyInFlight = false;
578
587
  // 账号归一化日志去重集合。
579
588
  const runtimeAccountNormalizationLogMemo = new Set<string>();
580
589
 
@@ -669,6 +678,8 @@ type RuntimeChannelActivityEntry = {
669
678
  const runtimeChannelActivityRegistry = new Map<string, RuntimeChannelActivityEntry>();
670
679
  /** 对标登出语义:用于快照层展示 `lastDisconnect.loggedOut`。 */
671
680
  const runtimeLogoutRegistry = new Set<string>();
681
+ /** 手动 stop 生命周期:防止迟到 probe/event 在保护窗后把已停止账号恢复为在线。 */
682
+ const runtimeManualStoppedKeys = new Set<string>();
672
683
 
673
684
  type RuntimeStateSnapshotFilePayload = {
674
685
  schema_version: 1;
@@ -925,6 +936,28 @@ function markRuntimeAccountLoggedOut(
925
936
  });
926
937
  }
927
938
 
939
+ function runtimeAccountKey(
940
+ platform: RuntimePlatform,
941
+ bot_account_id: string,
942
+ ): string {
943
+ const normalized = normalizeRuntimeAccountId(platform, bot_account_id);
944
+ return keyOf(platform, normalized || bot_account_id);
945
+ }
946
+
947
+ function markRuntimeAccountManuallyStopped(
948
+ platform: RuntimePlatform,
949
+ bot_account_id: string,
950
+ ): void {
951
+ runtimeManualStoppedKeys.add(runtimeAccountKey(platform, bot_account_id));
952
+ }
953
+
954
+ function clearRuntimeAccountManualStop(
955
+ platform: RuntimePlatform,
956
+ bot_account_id: string,
957
+ ): void {
958
+ runtimeManualStoppedKeys.delete(runtimeAccountKey(platform, bot_account_id));
959
+ }
960
+
928
961
  // shouldSkipAccountProbe: 按账号跳过未过期的 probe(对标按需探活)。
929
962
  function shouldSkipAccountProbe(
930
963
  key: string,
@@ -1186,6 +1219,41 @@ function applyRemoteRuntimeConfigToPluginHome(): void {
1186
1219
  }
1187
1220
  }
1188
1221
 
1222
+ function persistRuntimeConfigSnapshot(params: {
1223
+ config: Record<string, unknown>;
1224
+ targetPath: string;
1225
+ reason: string;
1226
+ }): void {
1227
+ const targetPath = String(params.targetPath || "").trim();
1228
+ if (!targetPath) {
1229
+ throw new Error("target config path is empty");
1230
+ }
1231
+ const dir = path.dirname(targetPath);
1232
+ fs.mkdirSync(dir, { recursive: true });
1233
+ const tempPath = path.join(
1234
+ dir,
1235
+ `.${path.basename(targetPath)}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`,
1236
+ );
1237
+ try {
1238
+ fs.writeFileSync(
1239
+ tempPath,
1240
+ JSON.stringify(params.config, null, 2),
1241
+ "utf-8",
1242
+ );
1243
+ fs.renameSync(tempPath, targetPath);
1244
+ console.log(
1245
+ `[bridges/main] runtime config persisted reason=${params.reason} path=${targetPath}`,
1246
+ );
1247
+ } catch (err) {
1248
+ try {
1249
+ if (fs.existsSync(tempPath)) fs.unlinkSync(tempPath);
1250
+ } catch {
1251
+ // ignore cleanup failure; original persist error is more useful.
1252
+ }
1253
+ throw err;
1254
+ }
1255
+ }
1256
+
1189
1257
  // nowIso: 生成当前 ISO 时间戳。
1190
1258
  function nowIso(): string {
1191
1259
  // 全链路统一使用 ISO 字符串时间戳。
@@ -1647,6 +1715,39 @@ function hasFreshRuntimeEventLivenessForProbeGuard(
1647
1715
  return ageMs <= runtimeStatusProbeDowngradeGuardEventFreshMs;
1648
1716
  }
1649
1717
 
1718
+ function isConnectedRuntimeEventLivenessName(
1719
+ eventName: string | null | undefined,
1720
+ ): boolean {
1721
+ const normalized = String(eventName || "")
1722
+ .trim()
1723
+ .toLowerCase();
1724
+ return (
1725
+ normalized === "socket_open" ||
1726
+ normalized === "socket_keepalive" ||
1727
+ normalized === "websocket_open" ||
1728
+ normalized === "websocket_keepalive" ||
1729
+ normalized === "get_updates_ok" ||
1730
+ normalized === "status:connected" ||
1731
+ normalized === "status_sink:connected"
1732
+ );
1733
+ }
1734
+
1735
+ function hasFreshConnectedRuntimeEventLivenessForRecovery(
1736
+ key: string,
1737
+ nowMs = Date.now(),
1738
+ ): boolean {
1739
+ const eventLiveness = runtimeEventLivenessRegistry.get(key);
1740
+ if (!isConnectedRuntimeEventLivenessName(eventLiveness?.last_event)) {
1741
+ return false;
1742
+ }
1743
+ const eventAtMs = parseIsoTimeMs(
1744
+ String(eventLiveness?.last_event_at || "").trim() || null,
1745
+ );
1746
+ if (eventAtMs == null) return false;
1747
+ const ageMs = Math.max(0, nowMs - eventAtMs);
1748
+ return ageMs <= runtimeStatusProbeDowngradeGuardEventFreshMs;
1749
+ }
1750
+
1650
1751
  function shouldGuardProbeDowngradeByFreshEvent(params: {
1651
1752
  key: string;
1652
1753
  prev: RuntimeBotStatus;
@@ -1678,12 +1779,29 @@ function shouldGuardProbeConnectedByEventBroken(params: {
1678
1779
  }
1679
1780
 
1680
1781
  function shouldBypassProtectWindowForEventRecovery(params: {
1782
+ key: string;
1681
1783
  prev: RuntimeBotStatus;
1682
1784
  next: RuntimeBotStatus;
1683
1785
  }): boolean {
1684
- const { prev, next } = params;
1786
+ const { key, prev, next } = params;
1685
1787
  const prevSource = prev.status_source || "probe";
1686
1788
  const nextSource = next.status_source || "probe";
1789
+ // manual/connecting 是控制面临时态:表示启动/重启请求已经提交,
1790
+ // 不是数据面证明“仍在连接中”。当后续 event 明确回传 connected 时,
1791
+ // 应立即收敛为在线;否则会因为 manual rank 更高,在保护窗内持续
1792
+ // 抑制 connected,导致前端看到“已有 keepalive 成功但仍显示连接中”。
1793
+ // probe 只能证明凭证/API 可用,不一定证明 WebSocket/long-poll 数据面
1794
+ // 已恢复,所以 probe/connected 只有在存在新鲜 connected 事件事实时才旁路。
1795
+ if (
1796
+ prevSource === "manual" &&
1797
+ prev.link_status === "connecting" &&
1798
+ next.link_status === "connected"
1799
+ ) {
1800
+ if (nextSource === "event") return true;
1801
+ if (nextSource === "probe") {
1802
+ return hasFreshConnectedRuntimeEventLivenessForRecovery(key);
1803
+ }
1804
+ }
1687
1805
  if (prevSource !== "event" || nextSource !== "event") return false;
1688
1806
  const prevBroken =
1689
1807
  prev.link_status === "disconnected" ||
@@ -1843,6 +1961,36 @@ function upsertRuntimeStatus(entry: RuntimeBotStatus): void {
1843
1961
  });
1844
1962
  }
1845
1963
 
1964
+ if (
1965
+ prev &&
1966
+ normalizedEntry.status_source !== "manual" &&
1967
+ runtimeManualStoppedKeys.has(key)
1968
+ ) {
1969
+ const stoppedApplied: RuntimeBotStatus = {
1970
+ ...prev,
1971
+ last_heartbeat_at:
1972
+ normalizedEntry.last_heartbeat_at || prev.last_heartbeat_at,
1973
+ last_probe_at: normalizedEntry.last_probe_at || prev.last_probe_at,
1974
+ reconnect_count:
1975
+ normalizedEntry.reconnect_count ?? prev.reconnect_count,
1976
+ restart_pending: false,
1977
+ running: false,
1978
+ busy: normalizedEntry.busy ?? prev.busy,
1979
+ active_runs: normalizedEntry.active_runs ?? prev.active_runs,
1980
+ last_run_activity_at:
1981
+ normalizedEntry.last_run_activity_at || prev.last_run_activity_at,
1982
+ mode: normalizedEntry.mode || prev.mode,
1983
+ };
1984
+ runtimeStatusRegistry.set(key, stoppedApplied);
1985
+ syncRuntimeChannelSnapshotFromStatus(stoppedApplied);
1986
+ mirrorHeartbeatFromStatus(stoppedApplied);
1987
+ scheduleRuntimeStateSnapshotPersist();
1988
+ logRuntimeStatusDebug(
1989
+ `status preserved manual-stop key=${key} prev=${prev.status_source || "probe"}/${prev.link_status} next=${normalizedEntry.status_source || "probe"}/${normalizedEntry.link_status}`,
1990
+ );
1991
+ return;
1992
+ }
1993
+
1846
1994
  if (
1847
1995
  prev &&
1848
1996
  shouldGuardProbeDowngradeByFreshEvent({
@@ -1971,6 +2119,7 @@ function upsertRuntimeStatus(entry: RuntimeBotStatus): void {
1971
2119
  const windowMs = resolveStatusProtectWindowMs(prev);
1972
2120
  const bypassProtectWindowForEventRecovery =
1973
2121
  shouldBypassProtectWindowForEventRecovery({
2122
+ key,
1974
2123
  prev,
1975
2124
  next: normalizedEntry,
1976
2125
  });
@@ -1982,6 +2131,9 @@ function upsertRuntimeStatus(entry: RuntimeBotStatus): void {
1982
2131
  !bypassProtectWindowForEventRecovery
1983
2132
  ) {
1984
2133
  // 低优先级回写被抑制时,仍更新部分时效字段,避免观测面停滞。
2134
+ const preserveManualStopLifecycle =
2135
+ (prev.status_source || "probe") === "manual" &&
2136
+ prev.link_status === "disconnected";
1985
2137
  logSuppressedTransition({
1986
2138
  key,
1987
2139
  prev,
@@ -1999,12 +2151,22 @@ function upsertRuntimeStatus(entry: RuntimeBotStatus): void {
1999
2151
  last_probe_at: normalizedEntry.last_probe_at || prev.last_probe_at,
2000
2152
  reconnect_count:
2001
2153
  normalizedEntry.reconnect_count ?? prev.reconnect_count,
2002
- last_event: normalizedEntry.last_event || prev.last_event,
2003
- last_event_at: normalizedEntry.last_event_at || prev.last_event_at,
2154
+ last_event: preserveManualStopLifecycle
2155
+ ? prev.last_event
2156
+ : normalizedEntry.last_event || prev.last_event,
2157
+ last_event_at: preserveManualStopLifecycle
2158
+ ? prev.last_event_at
2159
+ : normalizedEntry.last_event_at || prev.last_event_at,
2004
2160
  restart_pending:
2005
- normalizedEntry.restart_pending ?? prev.restart_pending ?? false,
2006
- running:
2007
- normalizedEntry.running ?? prev.running,
2161
+ preserveManualStopLifecycle
2162
+ ? prev.restart_pending ?? false
2163
+ : normalizedEntry.restart_pending ?? prev.restart_pending ?? false,
2164
+ // manual/disconnected 是显式停止生命周期。迟到的 keepalive/probe
2165
+ // 被保护窗抑制时不能顺手把 running 改回 true,否则展示层会从
2166
+ // “已停止”漂成普通“离线”,OpenClaw 兼容快照的 lastStopAt 也会失真。
2167
+ running: preserveManualStopLifecycle
2168
+ ? prev.running ?? false
2169
+ : normalizedEntry.running ?? prev.running,
2008
2170
  busy: normalizedEntry.busy ?? prev.busy,
2009
2171
  active_runs: normalizedEntry.active_runs ?? prev.active_runs,
2010
2172
  last_run_activity_at:
@@ -2474,6 +2636,16 @@ function restorePersistedRuntimeStateSnapshot(
2474
2636
  restored_filter:
2475
2637
  allowedKeys && allowedKeys.size > 0 ? "configured_accounts_only" : "all",
2476
2638
  });
2639
+ logRuntimeConfigReconcile({
2640
+ trigger: "startup_restore",
2641
+ mode: "startup_restore",
2642
+ nextAccounts: collectConfiguredAccountIdsByPlatform(config),
2643
+ affectedAccounts: allowedKeys ? Array.from(allowedKeys).sort() : undefined,
2644
+ restoredSnapshots,
2645
+ restoredHeartbeats,
2646
+ restoredEvents,
2647
+ restoredDisconnects,
2648
+ });
2477
2649
  scheduleRuntimeStateSnapshotPersist();
2478
2650
  }
2479
2651
 
@@ -2559,12 +2731,66 @@ function upsertRuntimeProbe(entry: RuntimeProbeEntry): void {
2559
2731
  scheduleRuntimeStateSnapshotPersist();
2560
2732
  }
2561
2733
 
2734
+ function normalizeRuntimeEventName(eventName: string | null | undefined): string {
2735
+ return String(eventName || "")
2736
+ .trim()
2737
+ .toLowerCase();
2738
+ }
2739
+
2740
+ const telemetryEventAliases: Record<string, string> = {
2741
+ "runtime-channel-inbound": "runtime_channel_inbound",
2742
+ "runtime.channel.inbound": "runtime_channel_inbound",
2743
+ channel_inbound: "runtime_channel_inbound",
2744
+ "runtime-channel-outbound": "runtime_channel_outbound",
2745
+ "runtime.channel.outbound": "runtime_channel_outbound",
2746
+ channel_outbound: "runtime_channel_outbound",
2747
+ "runtime-transport-disconnect": "runtime_transport_disconnect",
2748
+ "runtime.transport.disconnect": "runtime_transport_disconnect",
2749
+ transport_disconnect: "runtime_transport_disconnect",
2750
+ "runtime-account-logged-out": "runtime_account_logged_out",
2751
+ "runtime.account.logged_out": "runtime_account_logged_out",
2752
+ account_logged_out: "runtime_account_logged_out",
2753
+ };
2754
+
2755
+ const telemetryEventCanonicalAliases: Record<string, string> = {
2756
+ runtimechannelinbound: "runtime_channel_inbound",
2757
+ runtimechanneloutbound: "runtime_channel_outbound",
2758
+ runtimetransportdisconnect: "runtime_transport_disconnect",
2759
+ runtimeaccountloggedout: "runtime_account_logged_out",
2760
+ channelinbound: "runtime_channel_inbound",
2761
+ channeloutbound: "runtime_channel_outbound",
2762
+ transportdisconnect: "runtime_transport_disconnect",
2763
+ accountloggedout: "runtime_account_logged_out",
2764
+ };
2765
+
2766
+ function canonicalizeRuntimeEventName(eventName: string): string {
2767
+ return eventName.replace(/[^a-z0-9]/g, "");
2768
+ }
2769
+
2770
+ function normalizeRuntimeTelemetryEventName(eventName: string): string {
2771
+ const direct = telemetryEventAliases[eventName];
2772
+ if (direct) return direct;
2773
+ const canonical = canonicalizeRuntimeEventName(eventName);
2774
+ return telemetryEventCanonicalAliases[canonical] || eventName;
2775
+ }
2776
+
2777
+ function isTelemetryLikeEventName(eventName: string | null | undefined): boolean {
2778
+ const normalized = normalizeRuntimeEventName(eventName);
2779
+ if (!normalized) return false;
2780
+ const canonical = canonicalizeRuntimeEventName(normalized);
2781
+ if (!canonical) return false;
2782
+ return (
2783
+ canonical.includes("channel") ||
2784
+ canonical.includes("transport") ||
2785
+ canonical.includes("logout") ||
2786
+ canonical.includes("loggedout")
2787
+ );
2788
+ }
2789
+
2562
2790
  // isDiagnosticOnlyEvent: 运行时判定函数。
2563
2791
  function isDiagnosticOnlyEvent(eventName: string | null | undefined): boolean {
2564
2792
  // 诊断事件不驱动 link_status,只更新 diagnostic 维度。
2565
- const normalized = String(eventName || "")
2566
- .trim()
2567
- .toLowerCase();
2793
+ const normalized = normalizeRuntimeEventName(eventName);
2568
2794
  return (
2569
2795
  normalized === "runtime_log_diagnostic" ||
2570
2796
  normalized === "runtime_diagnostic"
@@ -2574,9 +2800,7 @@ function isDiagnosticOnlyEvent(eventName: string | null | undefined): boolean {
2574
2800
  // isHeartbeatOnlyEvent: 运行时判定函数。
2575
2801
  function isHeartbeatOnlyEvent(eventName: string | null | undefined): boolean {
2576
2802
  // 心跳类事件只更新 heartbeat/activity,不写最终状态。
2577
- const normalized = String(eventName || "")
2578
- .trim()
2579
- .toLowerCase();
2803
+ const normalized = normalizeRuntimeEventName(eventName);
2580
2804
  return (
2581
2805
  normalized === "runtime_heartbeat" ||
2582
2806
  normalized === "heartbeat" ||
@@ -2586,7 +2810,9 @@ function isHeartbeatOnlyEvent(eventName: string | null | undefined): boolean {
2586
2810
 
2587
2811
  // isTelemetryOnlyEvent: 仅更新 activity / lastDisconnect / 登出注册表,不要求 link_status。
2588
2812
  function isTelemetryOnlyEvent(eventName: string | null | undefined): boolean {
2589
- const n = String(eventName || "").trim();
2813
+ const n = normalizeRuntimeTelemetryEventName(
2814
+ normalizeRuntimeEventName(eventName),
2815
+ );
2590
2816
  return (
2591
2817
  n === "runtime_channel_inbound" ||
2592
2818
  n === "runtime_channel_outbound" ||
@@ -3653,7 +3879,7 @@ function readRecentBridgeErrorFromLog(
3653
3879
  return null;
3654
3880
  }
3655
3881
 
3656
- /** Bridge file lines from setupBridgeLogger: `[ISO] [log|info|warn|error|debug] message` */
3882
+ /** Bridge file lines from setupBridgeLogger: `[Asia/Shanghai timestamp] [log|info|warn|error|debug] message` */
3657
3883
  const BRIDGE_FILE_ERROR_LINE = /^\[[^\]]+\] \[error\] /;
3658
3884
 
3659
3885
  // getBridgeLoggerFilePath: 运行时辅助函数。
@@ -3711,7 +3937,7 @@ function buildRuntimeLogDownloadFileName(logFilePath: string | null): string {
3711
3937
  const baseName = path.basename(candidate).trim();
3712
3938
  if (baseName) return baseName;
3713
3939
  }
3714
- return `im-agent-hub-runtime-${new Date().toISOString().replace(/[:.]/g, "-")}.log`;
3940
+ return `im-agent-hub-runtime-${formatBeijingLogTimestamp().replace(/[:.]/g, "-")}.log`;
3715
3941
  }
3716
3942
 
3717
3943
  // parseRuntimeLogDownloadMaxBytes: 解析输入并返回规范化结果。
@@ -3877,10 +4103,34 @@ function clearPlatformStatuses(
3877
4103
  runtimeLogoutRegistry.delete(key);
3878
4104
  }
3879
4105
  }
4106
+ for (const key of Array.from(runtimeManualStoppedKeys)) {
4107
+ if (key.startsWith(`${platform}:`)) {
4108
+ runtimeManualStoppedKeys.delete(key);
4109
+ }
4110
+ }
3880
4111
  recomputeRuntimeHeartbeatLastReceivedAt();
3881
4112
  scheduleRuntimeStateSnapshotPersist();
3882
4113
  }
3883
4114
 
4115
+ function clearAllRuntimeStateRegistries(): void {
4116
+ runtimeStatusRegistry.clear();
4117
+ runtimeChannelSnapshotRegistry.clear();
4118
+ runtimeHeartbeatRegistry.clear();
4119
+ runtimeEventLivenessRegistry.clear();
4120
+ runtimeHeartbeatLastReceivedAt = null;
4121
+ runtimeDiagnosticRegistry.clear();
4122
+ runtimeProbeRegistry.clear();
4123
+ runtimeAuditRegistry.clear();
4124
+ runtimeLastDisconnectRegistry.clear();
4125
+ runtimeHealthReasonMemo.clear();
4126
+ runtimeHealthGuardRecoveringKeys.clear();
4127
+ runtimeHealthGuardLastRestartAtMs.clear();
4128
+ runtimeHealthGuardRestartsByHour.clear();
4129
+ runtimeChannelActivityRegistry.clear();
4130
+ runtimeLogoutRegistry.clear();
4131
+ runtimeManualStoppedKeys.clear();
4132
+ }
4133
+
3884
4134
  // prunePlatformStatusesByAccounts: 运行时辅助函数。
3885
4135
  function prunePlatformStatusesByAccounts(
3886
4136
  platform: "dingtalk" | "feishu" | "weixin",
@@ -3949,6 +4199,13 @@ function prunePlatformStatusesByAccounts(
3949
4199
  runtimeLogoutRegistry.delete(key);
3950
4200
  }
3951
4201
  }
4202
+ for (const key of Array.from(runtimeManualStoppedKeys)) {
4203
+ if (!key.startsWith(`${platform}:`)) continue;
4204
+ const accountKey = key.slice(platform.length + 1);
4205
+ if (!normalizedAccountSet.has(accountKey)) {
4206
+ runtimeManualStoppedKeys.delete(key);
4207
+ }
4208
+ }
3952
4209
 
3953
4210
  recomputeRuntimeHeartbeatLastReceivedAt();
3954
4211
  scheduleRuntimeStateSnapshotPersist();
@@ -4005,6 +4262,232 @@ function readConfiguredAccountIds(
4005
4262
  return hasLegacyRootCredential() ? ["__default__"] : [];
4006
4263
  }
4007
4264
 
4265
+ function collectConfiguredAccountIdsByPlatform(
4266
+ config: Record<string, unknown> | null,
4267
+ ): Record<RuntimePlatform, string[]> {
4268
+ const empty: Record<RuntimePlatform, string[]> = {
4269
+ dingtalk: [],
4270
+ feishu: [],
4271
+ weixin: [],
4272
+ };
4273
+ if (!config || typeof config !== "object" || Array.isArray(config)) {
4274
+ return empty;
4275
+ }
4276
+ const pairs: Array<
4277
+ [RuntimePlatform, "dingtalk-connector" | "feishu" | "openclaw-weixin"]
4278
+ > = [
4279
+ ["dingtalk", "dingtalk-connector"],
4280
+ ["feishu", "feishu"],
4281
+ ["weixin", "openclaw-weixin"],
4282
+ ];
4283
+ for (const [platform, channelKey] of pairs) {
4284
+ const normalizedIds = Array.from(
4285
+ new Set(
4286
+ readConfiguredAccountIds(config, channelKey)
4287
+ .map((accountId) => normalizeRuntimeAccountId(platform, accountId, config))
4288
+ .map((accountId) => String(accountId || "").trim())
4289
+ .filter((accountId) => accountId.length > 0),
4290
+ ),
4291
+ );
4292
+ empty[platform] = normalizedIds;
4293
+ }
4294
+ return empty;
4295
+ }
4296
+
4297
+ function markRuntimeAccountsConnecting(
4298
+ platform: RuntimePlatform,
4299
+ accountIds: string[],
4300
+ lastEvent = "config_loaded",
4301
+ ): string[] {
4302
+ const normalizedIds = Array.from(
4303
+ new Set(
4304
+ accountIds
4305
+ .map((accountId) => String(accountId || "").trim())
4306
+ .filter((accountId) => accountId.length > 0),
4307
+ ),
4308
+ );
4309
+ if (normalizedIds.length === 0) return [];
4310
+ const startedAt = nowIso();
4311
+ for (const accountId of normalizedIds) {
4312
+ clearRuntimeAccountManualStop(platform, accountId);
4313
+ upsertRuntimeStatus({
4314
+ platform,
4315
+ bot_account_id: accountId,
4316
+ link_status: "connecting",
4317
+ started_at: startedAt,
4318
+ last_heartbeat_at: startedAt,
4319
+ last_error: null,
4320
+ reconnect_count: 0,
4321
+ restart_pending: false,
4322
+ busy: false,
4323
+ active_runs: 0,
4324
+ last_run_activity_at: startedAt,
4325
+ mode: null,
4326
+ last_event: lastEvent,
4327
+ status_source: "manual",
4328
+ last_probe_at: null,
4329
+ });
4330
+ }
4331
+ return normalizedIds;
4332
+ }
4333
+
4334
+ function logRuntimeConfigReconcile(params: {
4335
+ trigger: string;
4336
+ mode: "full_reset" | "incremental" | "startup_restore";
4337
+ previousAccounts?: Record<RuntimePlatform, string[]>;
4338
+ nextAccounts?: Record<RuntimePlatform, string[]>;
4339
+ addedAccounts?: Record<RuntimePlatform, string[]>;
4340
+ removedAccounts?: Record<RuntimePlatform, string[]>;
4341
+ removedStopResults?: Record<RuntimePlatform, Array<Record<string, unknown>>>;
4342
+ affectedAccounts?: string[];
4343
+ markConnecting?: boolean;
4344
+ restoredSnapshots?: number;
4345
+ restoredHeartbeats?: number;
4346
+ restoredEvents?: number;
4347
+ restoredDisconnects?: number;
4348
+ }): void {
4349
+ console.log(
4350
+ `[bridges/main][config-reconcile] ${JSON.stringify({
4351
+ trigger: params.trigger,
4352
+ mode: params.mode,
4353
+ mark_connecting:
4354
+ typeof params.markConnecting === "boolean" ? params.markConnecting : null,
4355
+ previous_accounts: params.previousAccounts || null,
4356
+ next_accounts: params.nextAccounts || null,
4357
+ added_accounts: params.addedAccounts || null,
4358
+ removed_accounts: params.removedAccounts || null,
4359
+ removed_stop_results: params.removedStopResults || null,
4360
+ affected_accounts: params.affectedAccounts || null,
4361
+ restored_snapshots:
4362
+ typeof params.restoredSnapshots === "number"
4363
+ ? params.restoredSnapshots
4364
+ : null,
4365
+ restored_heartbeats:
4366
+ typeof params.restoredHeartbeats === "number"
4367
+ ? params.restoredHeartbeats
4368
+ : null,
4369
+ restored_events:
4370
+ typeof params.restoredEvents === "number" ? params.restoredEvents : null,
4371
+ restored_disconnects:
4372
+ typeof params.restoredDisconnects === "number"
4373
+ ? params.restoredDisconnects
4374
+ : null,
4375
+ })}`,
4376
+ );
4377
+ }
4378
+
4379
+ async function stopRemovedRuntimeAccounts(
4380
+ platform: RuntimePlatform,
4381
+ accountIds: string[],
4382
+ ): Promise<Array<Record<string, unknown>>> {
4383
+ const normalizedIds = Array.from(
4384
+ new Set(
4385
+ accountIds
4386
+ .map((accountId) => String(accountId || "").trim())
4387
+ .filter((accountId) => accountId.length > 0),
4388
+ ),
4389
+ );
4390
+ if (normalizedIds.length === 0) return [];
4391
+
4392
+ const control = resolveRuntimeBridgeControl(platform);
4393
+ const results: Array<Record<string, unknown>> = [];
4394
+ for (const accountId of normalizedIds) {
4395
+ const result: Record<string, unknown> = {
4396
+ platform,
4397
+ bot_account_id: accountId,
4398
+ attempted: false,
4399
+ ok: false,
4400
+ };
4401
+ if (typeof control?.stopAccount !== "function") {
4402
+ result.reason = "stopAccount_not_available";
4403
+ results.push(result);
4404
+ continue;
4405
+ }
4406
+ result.attempted = true;
4407
+ try {
4408
+ await withRuntimeControlTimeout(
4409
+ control.stopAccount(accountId),
4410
+ `stop removed account timeout platform=${platform} account=${accountId}`,
4411
+ );
4412
+ result.ok = true;
4413
+ result.reason = "stopped_removed_account";
4414
+ } catch (err) {
4415
+ result.error = err instanceof Error ? err.message : String(err);
4416
+ console.warn(
4417
+ `[bridges/main][config-reconcile] stop removed account failed platform=${platform} account=${accountId} err=${String(result.error || "unknown")}`,
4418
+ );
4419
+ }
4420
+ results.push(result);
4421
+ }
4422
+ return results;
4423
+ }
4424
+
4425
+ async function reconcileRuntimeStateForConfigDelta(params: {
4426
+ previousConfig: Record<string, unknown> | null;
4427
+ nextConfig: Record<string, unknown>;
4428
+ trigger: string;
4429
+ }): Promise<void> {
4430
+ const previousAccounts = collectConfiguredAccountIdsByPlatform(
4431
+ params.previousConfig,
4432
+ );
4433
+ const nextAccounts = collectConfiguredAccountIdsByPlatform(params.nextConfig);
4434
+ const addedAccounts: Record<RuntimePlatform, string[]> = {
4435
+ dingtalk: [],
4436
+ feishu: [],
4437
+ weixin: [],
4438
+ };
4439
+ const removedAccounts: Record<RuntimePlatform, string[]> = {
4440
+ dingtalk: [],
4441
+ feishu: [],
4442
+ weixin: [],
4443
+ };
4444
+ const removedStopResults: Record<RuntimePlatform, Array<Record<string, unknown>>> = {
4445
+ dingtalk: [],
4446
+ feishu: [],
4447
+ weixin: [],
4448
+ };
4449
+ const affectedAccounts = new Set<string>();
4450
+
4451
+ const platforms: RuntimePlatform[] = ["dingtalk", "feishu", "weixin"];
4452
+ for (const platform of platforms) {
4453
+ const prevIds = previousAccounts[platform];
4454
+ const nextIds = nextAccounts[platform];
4455
+ const prevSet = new Set(prevIds);
4456
+ const nextSet = new Set(nextIds);
4457
+ const added = nextIds.filter((accountId) => !prevSet.has(accountId));
4458
+ const removed = prevIds.filter((accountId) => !nextSet.has(accountId));
4459
+ addedAccounts[platform] = added;
4460
+ removedAccounts[platform] = removed;
4461
+ removedStopResults[platform] = await stopRemovedRuntimeAccounts(platform, removed);
4462
+
4463
+ if (nextIds.length === 0) {
4464
+ clearPlatformStatuses(platform);
4465
+ } else {
4466
+ prunePlatformStatusesByAccounts(platform, nextIds, params.nextConfig);
4467
+ markRuntimeAccountsConnecting(platform, added);
4468
+ }
4469
+
4470
+ for (const accountId of added) {
4471
+ affectedAccounts.add(keyOf(platform, accountId));
4472
+ }
4473
+ for (const accountId of removed) {
4474
+ affectedAccounts.add(keyOf(platform, accountId));
4475
+ }
4476
+ }
4477
+
4478
+ logRuntimeConfigReconcile({
4479
+ trigger: params.trigger,
4480
+ mode: "incremental",
4481
+ previousAccounts,
4482
+ nextAccounts,
4483
+ addedAccounts,
4484
+ removedAccounts,
4485
+ removedStopResults,
4486
+ affectedAccounts: Array.from(affectedAccounts).sort(),
4487
+ markConnecting: false,
4488
+ });
4489
+ }
4490
+
4008
4491
  // isConfiguredRuntimeAccount: 运行时判定函数。
4009
4492
  function isConfiguredRuntimeAccount(
4010
4493
  platform: "dingtalk" | "feishu" | "weixin",
@@ -4044,65 +4527,64 @@ function exposeRuntimeConfigToBridges(
4044
4527
  }
4045
4528
 
4046
4529
  // markConfiguredBotsAsConnecting: 标记运行态以驱动后续状态收敛。
4047
- function markConfiguredBotsAsConnecting(config: Record<string, unknown>): void {
4530
+ function markConfiguredBotsAsConnecting(
4531
+ config: Record<string, unknown>,
4532
+ trigger = "config_reset",
4533
+ ): void {
4048
4534
  // 应用全量配置时重置运行态基线,后续由 probe/event 收敛到真实状态。
4049
- runtimeStatusRegistry.clear();
4050
- runtimeChannelSnapshotRegistry.clear();
4051
- runtimeHeartbeatRegistry.clear();
4052
- runtimeEventLivenessRegistry.clear();
4053
- runtimeHeartbeatLastReceivedAt = null;
4054
- runtimeDiagnosticRegistry.clear();
4055
- runtimeProbeRegistry.clear();
4056
- runtimeAuditRegistry.clear();
4057
- runtimeLastDisconnectRegistry.clear();
4058
- runtimeHealthReasonMemo.clear();
4059
- runtimeHealthGuardRecoveringKeys.clear();
4060
- runtimeHealthGuardLastRestartAtMs.clear();
4061
- runtimeHealthGuardRestartsByHour.clear();
4062
- runtimeChannelActivityRegistry.clear();
4063
- runtimeLogoutRegistry.clear();
4064
-
4065
- const applyPlatformConnecting = (
4066
- platform: "dingtalk" | "feishu" | "weixin",
4067
- channelKey: "dingtalk-connector" | "feishu" | "openclaw-weixin",
4068
- ): void => {
4069
- const accountIds = readConfiguredAccountIds(config, channelKey);
4070
- if (accountIds.length === 0) return;
4071
- const startedAt = nowIso();
4072
- for (const accountId of accountIds) {
4073
- const normalizedAccountId = String(accountId || "").trim();
4074
- if (!normalizedAccountId) continue;
4075
- upsertRuntimeStatus({
4076
- platform,
4077
- bot_account_id: normalizedAccountId,
4078
- link_status: "connecting",
4079
- started_at: startedAt,
4080
- last_heartbeat_at: startedAt,
4081
- last_error: null,
4082
- reconnect_count: 0,
4083
- restart_pending: false,
4084
- busy: false,
4085
- active_runs: 0,
4086
- last_run_activity_at: startedAt,
4087
- mode: null,
4088
- last_event: "config_loaded",
4089
- status_source: "manual",
4090
- last_probe_at: null,
4091
- });
4092
- }
4535
+ clearAllRuntimeStateRegistries();
4536
+ const nextAccounts = collectConfiguredAccountIdsByPlatform(config);
4537
+ const addedAccounts: Record<RuntimePlatform, string[]> = {
4538
+ dingtalk: markRuntimeAccountsConnecting("dingtalk", nextAccounts.dingtalk),
4539
+ feishu: markRuntimeAccountsConnecting("feishu", nextAccounts.feishu),
4540
+ weixin: markRuntimeAccountsConnecting("weixin", nextAccounts.weixin),
4093
4541
  };
4094
-
4095
- applyPlatformConnecting("dingtalk", "dingtalk-connector");
4096
- applyPlatformConnecting("feishu", "feishu");
4097
- applyPlatformConnecting("weixin", "openclaw-weixin");
4542
+ const affectedAccounts = [
4543
+ ...addedAccounts.dingtalk.map((accountId) => keyOf("dingtalk", accountId)),
4544
+ ...addedAccounts.feishu.map((accountId) => keyOf("feishu", accountId)),
4545
+ ...addedAccounts.weixin.map((accountId) => keyOf("weixin", accountId)),
4546
+ ].sort();
4547
+ logRuntimeConfigReconcile({
4548
+ trigger,
4549
+ mode: "full_reset",
4550
+ nextAccounts,
4551
+ addedAccounts,
4552
+ removedAccounts: {
4553
+ dingtalk: [],
4554
+ feishu: [],
4555
+ weixin: [],
4556
+ },
4557
+ affectedAccounts,
4558
+ markConnecting: true,
4559
+ });
4098
4560
  scheduleRuntimeStateSnapshotPersist();
4099
4561
  }
4100
4562
 
4101
4563
  // markPlatformStarted: 标记运行态以驱动后续状态收敛。
4102
- function markPlatformStarted(platform: "dingtalk" | "feishu" | "weixin"): void {
4103
- // bridge 进程拉起后,先把该平台已配置账号标记为 connecting。
4564
+ function markPlatformStarted(
4565
+ platform: "dingtalk" | "feishu" | "weixin",
4566
+ accountIds?: string[],
4567
+ ): void {
4568
+ // bridge 进程拉起后的状态标记分两种语义:
4569
+ // - accountIds 为空:冷启动/全平台启动,沿用历史语义,标记该平台全部账号;
4570
+ // - accountIds 非空:单账号 lazy start(bot control / health guard / weixin invoke),
4571
+ // 只能标记目标账号,避免同平台其它已在线账号被误写成 manual/connecting。
4572
+ // 这里不影响真实 bridge 启动范围,只限制运行态标记的受影响账号集合。
4573
+ const targetAccounts = accountIds
4574
+ ? new Set(
4575
+ accountIds
4576
+ // 复用运行态账号归一化,确保 __default__ 单账号别名与 registry key 对齐。
4577
+ .map((accountId) => normalizeRuntimeAccountId(platform, accountId))
4578
+ .map((accountId) => String(accountId || "").trim())
4579
+ .filter((accountId) => accountId.length > 0),
4580
+ )
4581
+ : null;
4104
4582
  for (const value of listRuntimeBotStatuses()) {
4105
4583
  if (value.platform !== platform) continue;
4584
+ // 单账号 lazy start 场景必须跳过非目标账号,防止“新增/启动一个账号”
4585
+ // 把同平台所有账号都短暂打回 connecting。
4586
+ if (targetAccounts && !targetAccounts.has(value.bot_account_id)) continue;
4587
+ clearRuntimeAccountManualStop(platform, value.bot_account_id);
4106
4588
  upsertRuntimeStatus({
4107
4589
  ...value,
4108
4590
  link_status: "connecting",
@@ -4252,8 +4734,12 @@ async function ensureBridgesStartedByConfig(): Promise<void> {
4252
4734
  }
4253
4735
 
4254
4736
  // ensureWeixinBridgeReadyForGatewayInvoke: 确保目标运行资源已就绪。
4255
- async function ensureWeixinBridgeReadyForGatewayInvoke(): Promise<void> {
4737
+ async function ensureWeixinBridgeReadyForGatewayInvoke(
4738
+ accountIds?: string[],
4739
+ ): Promise<void> {
4256
4740
  // weixin 网关调用允许懒启动,避免“首次调用时 bridge 尚未拉起”直接失败。
4741
+ // accountIds 仅用于 lazy start 成功后的状态标记收敛范围;bridge 进程仍是
4742
+ // 平台级进程,不能理解为只启动某个账号。
4257
4743
  if (weixinBridgeStarted) {
4258
4744
  return;
4259
4745
  }
@@ -4275,7 +4761,7 @@ async function ensureWeixinBridgeReadyForGatewayInvoke(): Promise<void> {
4275
4761
  try {
4276
4762
  await import("./weixin-stdio-bridge.ts");
4277
4763
  weixinBridgeStarted = true;
4278
- markPlatformStarted("weixin");
4764
+ markPlatformStarted("weixin", accountIds);
4279
4765
  console.log("[bridges/main] weixin bridge lazy started");
4280
4766
  } catch (err) {
4281
4767
  console.error(
@@ -4301,8 +4787,11 @@ async function waitBridgeStartingFlag(
4301
4787
  // ensureBridgeReadyForBotControl: 确保目标运行资源已就绪。
4302
4788
  async function ensureBridgeReadyForBotControl(
4303
4789
  platform: RuntimePlatform,
4790
+ accountId?: string,
4304
4791
  ): Promise<void> {
4305
4792
  // bot/start|restart 的兜底懒启动,按平台定向拉起,不走全量启动逻辑。
4793
+ // accountId 用来限制 platform_process_started 的 manual/connecting 写入范围:
4794
+ // 单机器人控制只应影响当前 bot,不能污染同平台其它 bot 的展示态。
4306
4795
  const allowByMode =
4307
4796
  platform === "dingtalk"
4308
4797
  ? startupOnlyMode === "all" || startupOnlyMode === "dingtalk"
@@ -4327,7 +4816,7 @@ async function ensureBridgeReadyForBotControl(
4327
4816
  try {
4328
4817
  await import("./dingtalk-stdio-bridge.ts");
4329
4818
  dingtalkBridgeStarted = true;
4330
- markPlatformStarted("dingtalk");
4819
+ markPlatformStarted("dingtalk", accountId ? [accountId] : undefined);
4331
4820
  console.log(
4332
4821
  "[bridges/main] dingtalk bridge lazy started for bot control",
4333
4822
  );
@@ -4352,7 +4841,7 @@ async function ensureBridgeReadyForBotControl(
4352
4841
  try {
4353
4842
  await import("./lark-stdio-bridge.ts");
4354
4843
  larkBridgeStarted = true;
4355
- markPlatformStarted("feishu");
4844
+ markPlatformStarted("feishu", accountId ? [accountId] : undefined);
4356
4845
  console.log("[bridges/main] lark bridge lazy started for bot control");
4357
4846
  } catch (err) {
4358
4847
  console.error(
@@ -4364,7 +4853,9 @@ async function ensureBridgeReadyForBotControl(
4364
4853
  return;
4365
4854
  }
4366
4855
 
4367
- await ensureWeixinBridgeReadyForGatewayInvoke();
4856
+ await ensureWeixinBridgeReadyForGatewayInvoke(
4857
+ accountId ? [accountId] : undefined,
4858
+ );
4368
4859
  }
4369
4860
 
4370
4861
  // markAllBotsStopped: 标记运行态以驱动后续状态收敛。
@@ -4610,7 +5101,9 @@ async function invokeGatewayMethodByChannel(args: {
4610
5101
  | undefined;
4611
5102
 
4612
5103
  if (args.channel === "weixin" && typeof invoker !== "function") {
4613
- await ensureWeixinBridgeReadyForGatewayInvoke();
5104
+ await ensureWeixinBridgeReadyForGatewayInvoke(
5105
+ args.accountId ? [args.accountId] : undefined,
5106
+ );
4614
5107
  }
4615
5108
 
4616
5109
  const ensuredInvoker = (globalThis as Record<string, unknown>)[invokerKey] as
@@ -4715,6 +5208,25 @@ async function withProbeTimeout<T>(
4715
5208
  }
4716
5209
  }
4717
5210
 
5211
+ async function withRuntimeControlTimeout<T>(
5212
+ work: Promise<T>,
5213
+ timeoutMessage: string,
5214
+ timeoutMs = runtimeAccountControlTimeoutMs,
5215
+ ): Promise<T> {
5216
+ if (timeoutMs <= 0) return await work;
5217
+ let timer: ReturnType<typeof setTimeout> | null = null;
5218
+ try {
5219
+ return await Promise.race([
5220
+ work,
5221
+ new Promise<T>((_, reject) => {
5222
+ timer = setTimeout(() => reject(new Error(timeoutMessage)), timeoutMs);
5223
+ }),
5224
+ ]);
5225
+ } finally {
5226
+ if (timer) clearTimeout(timer);
5227
+ }
5228
+ }
5229
+
4718
5230
  // runWithConcurrency: 执行核心运行流程。
4719
5231
  async function runWithConcurrency<T>(
4720
5232
  items: T[],
@@ -5182,18 +5694,8 @@ async function runFullProbeAndRefreshStatuses(params: {
5182
5694
  readRecentBridgeErrorFromLog(accountId, "feishu"),
5183
5695
  )
5184
5696
  : probeError;
5185
- const preserveEventStatus =
5186
- previous?.status_source === "event" &&
5187
- (previous.link_status === "error" ||
5188
- previous.link_status === "disconnected" ||
5189
- previous.link_status === "degraded");
5190
-
5191
5697
  const nextStatus: RuntimeBotStatus["link_status"] =
5192
- preserveEventStatus
5193
- ? previous?.link_status || "connecting"
5194
- : ok
5195
- ? "connected"
5196
- : "error";
5698
+ ok ? "connected" : "error";
5197
5699
  const nextError =
5198
5700
  nextStatus === "connected"
5199
5701
  ? null
@@ -5241,7 +5743,7 @@ async function runFullProbeAndRefreshStatuses(params: {
5241
5743
  last_run_activity_at:
5242
5744
  runtimeHints.last_run_activity_at || probeAt,
5243
5745
  mode: runtimeHints.mode || previous?.mode || null,
5244
- status_source: preserveEventStatus ? "event" : "probe",
5746
+ status_source: "probe",
5245
5747
  last_probe_at: probeAt,
5246
5748
  });
5247
5749
  } catch (err) {
@@ -5560,7 +6062,11 @@ function startProbeLoop(): void {
5560
6062
  }
5561
6063
 
5562
6064
  // pullRuntimeConfigFromPython: 运行时辅助函数。
5563
- async function pullRuntimeConfigFromPython(forceFull = false): Promise<{
6065
+ async function pullRuntimeConfigFromPython(
6066
+ forceFull = false,
6067
+ reconcileRuntimeState = true,
6068
+ options: { probeAfterPull?: boolean } = {},
6069
+ ): Promise<{
5564
6070
  ok: boolean;
5565
6071
  pulled: boolean;
5566
6072
  not_modified?: boolean;
@@ -5569,6 +6075,7 @@ async function pullRuntimeConfigFromPython(forceFull = false): Promise<{
5569
6075
  dropped_fields?: Array<{ channel: string; accountId: string; field: string }>;
5570
6076
  }> {
5571
6077
  // 配置真源是 Python:拉取后先落本地文件,再通知插件读取,最后触发 probe 收敛状态。
6078
+ const probeAfterPull = options.probeAfterPull !== false;
5572
6079
  applyRemoteRuntimeConfigToPluginHome();
5573
6080
  if (!runtimeConfigPullUrl) {
5574
6081
  return {
@@ -5659,7 +6166,30 @@ async function pullRuntimeConfigFromPython(forceFull = false): Promise<{
5659
6166
  };
5660
6167
  }
5661
6168
 
6169
+ let persistedConfigPath: string | null = null;
6170
+ if (runtimeConfigPullPersist) {
6171
+ const targetPath =
6172
+ loadedConfigPathForRuntime || path.join(process.cwd(), "syim.json");
6173
+ // pull 成功但写盘失败时不能先切内存:否则调用方看到失败,
6174
+ // 当前进程却已经按新配置运行,形成“失败响应 + 已变更运行态”。
6175
+ persistRuntimeConfigSnapshot({
6176
+ config: normalizedResult.normalized,
6177
+ targetPath,
6178
+ reason: "config_pull",
6179
+ });
6180
+ persistedConfigPath = targetPath;
6181
+ }
6182
+
6183
+ const previousConfig =
6184
+ loadedConfigForRuntime &&
6185
+ typeof loadedConfigForRuntime === "object" &&
6186
+ !Array.isArray(loadedConfigForRuntime)
6187
+ ? (loadedConfigForRuntime as Record<string, unknown>)
6188
+ : null;
5662
6189
  loadedConfigForRuntime = normalizedResult.normalized;
6190
+ if (persistedConfigPath) {
6191
+ loadedConfigPathForRuntime = persistedConfigPath;
6192
+ }
5663
6193
  exposeRuntimeConfigToBridges(
5664
6194
  loadedConfigForRuntime,
5665
6195
  loadedConfigPathForRuntime,
@@ -5669,20 +6199,15 @@ async function pullRuntimeConfigFromPython(forceFull = false): Promise<{
5669
6199
  String(data?.version || "").trim() || undefined,
5670
6200
  );
5671
6201
  applyRemoteRuntimeConfigToPluginHome();
5672
- markConfiguredBotsAsConnecting(normalizedResult.normalized);
5673
- await probeAndRefreshStatuses({ force: true, reason: "config_pull" });
5674
-
5675
- if (runtimeConfigPullPersist) {
5676
- const targetPath =
5677
- loadedConfigPathForRuntime || path.join(process.cwd(), "syim.json");
5678
- fs.mkdirSync(path.dirname(targetPath), { recursive: true });
5679
- fs.writeFileSync(
5680
- targetPath,
5681
- JSON.stringify(normalizedResult.normalized, null, 2),
5682
- "utf-8",
5683
- );
5684
- loadedConfigPathForRuntime = targetPath;
5685
- console.log(`[bridges/main] pull config persisted path=${targetPath}`);
6202
+ if (reconcileRuntimeState) {
6203
+ await reconcileRuntimeStateForConfigDelta({
6204
+ previousConfig,
6205
+ nextConfig: normalizedResult.normalized,
6206
+ trigger: "config_pull",
6207
+ });
6208
+ }
6209
+ if (probeAfterPull) {
6210
+ await probeAndRefreshStatuses({ force: true, reason: "config_pull" });
5686
6211
  }
5687
6212
 
5688
6213
  return {
@@ -5790,7 +6315,7 @@ async function recoverRuntimeAccountByHealthGuard(params: {
5790
6315
  try {
5791
6316
  let control = resolveRuntimeBridgeControl(item.platform);
5792
6317
  if (!control) {
5793
- await ensureBridgeReadyForBotControl(item.platform);
6318
+ await ensureBridgeReadyForBotControl(item.platform, item.bot_account_id);
5794
6319
  control = resolveRuntimeBridgeControl(item.platform);
5795
6320
  }
5796
6321
  if (!control) {
@@ -5827,10 +6352,19 @@ async function recoverRuntimeAccountByHealthGuard(params: {
5827
6352
  });
5828
6353
 
5829
6354
  if (typeof control.restartAccount === "function") {
5830
- await control.restartAccount(item.bot_account_id);
6355
+ await withRuntimeControlTimeout(
6356
+ control.restartAccount(item.bot_account_id),
6357
+ `health guard restart timeout platform=${item.platform} account=${item.bot_account_id}`,
6358
+ );
5831
6359
  } else {
5832
- await control.stopAccount!(item.bot_account_id);
5833
- await control.startAccount!(item.bot_account_id);
6360
+ await withRuntimeControlTimeout(
6361
+ control.stopAccount!(item.bot_account_id),
6362
+ `health guard stop timeout platform=${item.platform} account=${item.bot_account_id}`,
6363
+ );
6364
+ await withRuntimeControlTimeout(
6365
+ control.startAccount!(item.bot_account_id),
6366
+ `health guard start timeout platform=${item.platform} account=${item.bot_account_id}`,
6367
+ );
5834
6368
  }
5835
6369
 
5836
6370
  logRuntimeStatusDebug(
@@ -5922,6 +6456,9 @@ async function runRuntimeHealthGuardCycle(): Promise<void> {
5922
6456
  if (!triggerReason) continue;
5923
6457
 
5924
6458
  const key = keyOf(item.platform, item.bot_account_id);
6459
+ if (runtimeManualStoppedKeys.has(key)) {
6460
+ continue;
6461
+ }
5925
6462
  if (botControlInFlight.has(key) || runtimeHealthGuardRecoveringKeys.has(key)) {
5926
6463
  continue;
5927
6464
  }
@@ -6399,14 +6936,20 @@ async function buildRuntimeBotsStatusPayload(params: {
6399
6936
  reason: string;
6400
6937
  }): Promise<Record<string, unknown>> {
6401
6938
  const onlyKeyResolved = params.onlyAccountKey?.trim() || null;
6939
+ const bypassHttpProbeTtl = runtimeStatusHttpBypassGlobalProbeTtl || params.forceProbe;
6940
+ const forceSingleAccountProbe = Boolean(onlyKeyResolved);
6941
+ const forceStatusProbe = bypassHttpProbeTtl || forceSingleAccountProbe;
6942
+ const respectGlobalProbeTtl = runtimeStatusHttpBypassGlobalProbeTtl
6943
+ ? false
6944
+ : !params.forceProbe;
6402
6945
  await probeAndRefreshStatuses({
6403
6946
  force: params.forceProbe,
6404
6947
  reason: params.reason,
6405
6948
  timeoutMs: params.probeTimeoutMs ?? undefined,
6406
6949
  onlyKey: onlyKeyResolved,
6407
- respectGlobalProbeTtl: !params.forceProbe,
6408
- forceSweepAccounts: params.forceProbe,
6409
- forceProviderProbe: params.forceProbe,
6950
+ respectGlobalProbeTtl,
6951
+ forceSweepAccounts: forceStatusProbe,
6952
+ forceProviderProbe: forceStatusProbe,
6410
6953
  });
6411
6954
  const botsAll = listRuntimeBotStatuses();
6412
6955
  const onlyKey = onlyKeyResolved;
@@ -6457,9 +7000,10 @@ async function buildRuntimeBotsStatusPayload(params: {
6457
7000
  timeout_total: runtimeStatusProbeTimeoutTotal,
6458
7001
  partial_probe_key: onlyKeyResolved,
6459
7002
  http_bypass_global_probe_ttl: runtimeStatusHttpBypassGlobalProbeTtl,
6460
- probe_respect_global_ttl: !runtimeStatusHttpBypassGlobalProbeTtl,
6461
- status_reads_force_account_sweep: true,
6462
- status_reads_force_provider_probe: true,
7003
+ probe_respect_global_ttl: respectGlobalProbeTtl,
7004
+ status_reads_force_account_sweep: forceStatusProbe,
7005
+ status_reads_force_provider_probe: forceStatusProbe,
7006
+ single_account_force_probe: forceSingleAccountProbe,
6463
7007
  },
6464
7008
  health_guard_meta: buildRuntimeHealthGuardMeta(),
6465
7009
  runtime_health_ready: runtimeReadiness.ready,
@@ -6827,6 +7371,17 @@ async function startInternalApiServer(): Promise<void> {
6827
7371
  );
6828
7372
  return;
6829
7373
  }
7374
+ if (runtimeConfigApplyInFlight) {
7375
+ res.writeHead(409, { "Content-Type": "application/json" });
7376
+ res.end(
7377
+ JSON.stringify({
7378
+ ok: false,
7379
+ error:
7380
+ "runtime config apply is in progress, bot control is temporarily unavailable",
7381
+ }),
7382
+ );
7383
+ return;
7384
+ }
6830
7385
 
6831
7386
  const channelKey = platformToChannelKey(platform);
6832
7387
  if (!isChannelEnabled(channelKey)) {
@@ -6874,7 +7429,7 @@ async function startInternalApiServer(): Promise<void> {
6874
7429
  `[bridges/main] bot/${botControlAction} control missing, try lazy start platform=${platform} account=${botAccountId}`,
6875
7430
  );
6876
7431
  try {
6877
- await ensureBridgeReadyForBotControl(platform);
7432
+ await ensureBridgeReadyForBotControl(platform, botAccountId);
6878
7433
  } catch (err) {
6879
7434
  console.error(
6880
7435
  `[bridges/main] bot/${botControlAction} lazy start failed platform=${platform} err=${err instanceof Error ? err.message : String(err)}`,
@@ -6916,6 +7471,7 @@ async function startInternalApiServer(): Promise<void> {
6916
7471
  );
6917
7472
  return;
6918
7473
  }
7474
+ clearRuntimeAccountManualStop(platform, botAccountId);
6919
7475
  upsertRuntimeStatus({
6920
7476
  platform,
6921
7477
  bot_account_id: botAccountId,
@@ -6933,7 +7489,10 @@ async function startInternalApiServer(): Promise<void> {
6933
7489
  status_source: "manual",
6934
7490
  last_probe_at: previous?.last_probe_at || null,
6935
7491
  });
6936
- await control.startAccount(botAccountId);
7492
+ await withRuntimeControlTimeout(
7493
+ control.startAccount(botAccountId),
7494
+ `bot start timeout platform=${platform} account=${botAccountId}`,
7495
+ );
6937
7496
  runtimeLogoutRegistry.delete(key);
6938
7497
  scheduleRuntimeStateSnapshotPersist();
6939
7498
  } else if (botControlAction === "stop") {
@@ -6947,10 +7506,14 @@ async function startInternalApiServer(): Promise<void> {
6947
7506
  );
6948
7507
  return;
6949
7508
  }
6950
- await control.stopAccount(botAccountId);
7509
+ await withRuntimeControlTimeout(
7510
+ control.stopAccount(botAccountId),
7511
+ `bot stop timeout platform=${platform} account=${botAccountId}`,
7512
+ );
6951
7513
  if (wantLogout) {
6952
7514
  markRuntimeAccountLoggedOut(platform, botAccountId);
6953
7515
  }
7516
+ markRuntimeAccountManuallyStopped(platform, botAccountId);
6954
7517
  upsertRuntimeStatus({
6955
7518
  platform,
6956
7519
  bot_account_id: botAccountId,
@@ -6979,6 +7542,7 @@ async function startInternalApiServer(): Promise<void> {
6979
7542
  );
6980
7543
  return;
6981
7544
  }
7545
+ clearRuntimeAccountManualStop(platform, botAccountId);
6982
7546
  upsertRuntimeStatus({
6983
7547
  platform,
6984
7548
  bot_account_id: botAccountId,
@@ -6996,7 +7560,10 @@ async function startInternalApiServer(): Promise<void> {
6996
7560
  status_source: "manual",
6997
7561
  last_probe_at: previous?.last_probe_at || null,
6998
7562
  });
6999
- await control.restartAccount(botAccountId);
7563
+ await withRuntimeControlTimeout(
7564
+ control.restartAccount(botAccountId),
7565
+ `bot restart timeout platform=${platform} account=${botAccountId}`,
7566
+ );
7000
7567
  }
7001
7568
 
7002
7569
  const bot = getRuntimeBotStatusByKey(key) || null;
@@ -7062,90 +7629,143 @@ async function startInternalApiServer(): Promise<void> {
7062
7629
  reqUrl.pathname === "/internal/runtime/config/apply"
7063
7630
  ) {
7064
7631
  // config/apply 只负责“配置入站 + 可选标记 connecting + probe”,不做平台自动拉起。
7065
- const body = await readRequestJson(req);
7066
- const config = body.config as Record<string, unknown> | undefined;
7067
- const persist = parseBoolFlag(body.persist, true);
7068
- const markConnectingRaw =
7069
- body.mark_connecting === undefined
7070
- ? body.markConnecting
7071
- : body.mark_connecting;
7072
- const markConnecting = parseBoolFlag(markConnectingRaw, true);
7073
- const requestedVersion = String(body.version || "").trim();
7074
- if (!config || typeof config !== "object") {
7075
- res.writeHead(400, { "Content-Type": "application/json" });
7076
- res.end(JSON.stringify({ ok: false, error: "config is required" }));
7632
+ if (runtimeState === "restarting") {
7633
+ res.writeHead(409, { "Content-Type": "application/json" });
7634
+ res.end(
7635
+ JSON.stringify({
7636
+ ok: false,
7637
+ status: "busy",
7638
+ error: "runtime is restarting, config apply is temporarily unavailable",
7639
+ runtime_instance_id: runtimeInstanceId,
7640
+ }),
7641
+ );
7077
7642
  return;
7078
7643
  }
7079
-
7080
- const normalizedResult = normalizeRuntimeConfigByWhitelist(config);
7081
- if (normalizedResult.droppedFields.length > 0) {
7082
- console.log(
7083
- `[bridges/main] config/apply rejected by whitelist: ${normalizedResult.droppedFields
7084
- .map((item) => `${item.channel}/${item.accountId}:${item.field}`)
7085
- .join(", ")}`,
7086
- );
7087
- res.writeHead(400, { "Content-Type": "application/json" });
7644
+ if (botControlInFlight.size > 0 || runtimeConfigApplyInFlight) {
7645
+ res.writeHead(409, { "Content-Type": "application/json" });
7088
7646
  res.end(
7089
7647
  JSON.stringify({
7090
7648
  ok: false,
7091
- error: "config contains non-whitelisted fields",
7092
- dropped_fields: normalizedResult.droppedFields,
7649
+ status: "busy",
7650
+ error: runtimeConfigApplyInFlight
7651
+ ? "runtime config apply is already in progress"
7652
+ : "bot control operations are in progress",
7653
+ in_flight_count: botControlInFlight.size,
7654
+ runtime_instance_id: runtimeInstanceId,
7093
7655
  }),
7094
7656
  );
7095
7657
  return;
7096
7658
  }
7097
7659
 
7098
- loadedConfigForRuntime = normalizedResult.normalized;
7099
- exposeRuntimeConfigToBridges(
7100
- loadedConfigForRuntime,
7101
- loadedConfigPathForRuntime,
7102
- );
7103
- updateConfigVersion(
7104
- normalizedResult.normalized,
7105
- requestedVersion || undefined,
7106
- );
7107
- applyRemoteRuntimeConfigToPluginHome();
7108
- if (markConnecting) {
7109
- markConfiguredBotsAsConnecting(normalizedResult.normalized);
7110
- }
7111
- await probeAndRefreshStatuses({ force: true, reason: "config_apply" });
7660
+ runtimeConfigApplyInFlight = true;
7661
+ try {
7662
+ const body = await readRequestJson(req);
7663
+ const config = body.config as Record<string, unknown> | undefined;
7664
+ const persist = parseBoolFlag(body.persist, true);
7665
+ const markConnectingRaw =
7666
+ body.mark_connecting === undefined
7667
+ ? body.markConnecting
7668
+ : body.mark_connecting;
7669
+ const markConnecting = parseBoolFlag(markConnectingRaw, true);
7670
+ const requestedVersion = String(body.version || "").trim();
7671
+ if (!config || typeof config !== "object") {
7672
+ res.writeHead(400, { "Content-Type": "application/json" });
7673
+ res.end(JSON.stringify({ ok: false, error: "config is required" }));
7674
+ return;
7675
+ }
7112
7676
 
7113
- if (persist) {
7114
- const targetPath =
7115
- String(body.target_path || "").trim() ||
7116
- loadedConfigPathForRuntime ||
7117
- path.join(process.cwd(), "syim.json");
7118
- try {
7119
- fs.mkdirSync(path.dirname(targetPath), { recursive: true });
7120
- fs.writeFileSync(
7121
- targetPath,
7122
- JSON.stringify(normalizedResult.normalized, null, 2),
7123
- "utf-8",
7677
+ const normalizedResult = normalizeRuntimeConfigByWhitelist(config);
7678
+ if (normalizedResult.droppedFields.length > 0) {
7679
+ console.log(
7680
+ `[bridges/main] config/apply rejected by whitelist: ${normalizedResult.droppedFields
7681
+ .map((item) => `${item.channel}/${item.accountId}:${item.field}`)
7682
+ .join(", ")}`,
7124
7683
  );
7125
- loadedConfigPathForRuntime = targetPath;
7126
- } catch (err) {
7127
- res.writeHead(500, { "Content-Type": "application/json" });
7684
+ res.writeHead(400, { "Content-Type": "application/json" });
7128
7685
  res.end(
7129
7686
  JSON.stringify({
7130
7687
  ok: false,
7131
- error: `persist failed: ${(err as Error).message}`,
7688
+ error: "config contains non-whitelisted fields",
7689
+ dropped_fields: normalizedResult.droppedFields,
7132
7690
  }),
7133
7691
  );
7134
7692
  return;
7135
7693
  }
7136
- }
7137
7694
 
7138
- res.writeHead(200, { "Content-Type": "application/json" });
7139
- res.end(
7140
- JSON.stringify({
7141
- ok: true,
7142
- status: "applied",
7143
- persisted: persist,
7144
- mark_connecting: markConnecting,
7145
- configPath: loadedConfigPathForRuntime,
7146
- version: configVersionHash || "unknown",
7147
- }),
7148
- );
7695
+ let persistedConfigPath: string | null = null;
7696
+ if (persist) {
7697
+ const targetPath =
7698
+ String(body.target_path || "").trim() ||
7699
+ loadedConfigPathForRuntime ||
7700
+ path.join(process.cwd(), "syim.json");
7701
+ try {
7702
+ // config/apply 对外返回失败时,运行中配置也必须保持不变。
7703
+ // 因此 persist=true 先写盘,写盘成功后才切换内存、版本与 probe。
7704
+ persistRuntimeConfigSnapshot({
7705
+ config: normalizedResult.normalized,
7706
+ targetPath,
7707
+ reason: "config_apply",
7708
+ });
7709
+ persistedConfigPath = targetPath;
7710
+ } catch (err) {
7711
+ res.writeHead(500, { "Content-Type": "application/json" });
7712
+ res.end(
7713
+ JSON.stringify({
7714
+ ok: false,
7715
+ error: `persist failed: ${(err as Error).message}`,
7716
+ }),
7717
+ );
7718
+ return;
7719
+ }
7720
+ }
7721
+
7722
+ const previousConfig =
7723
+ loadedConfigForRuntime &&
7724
+ typeof loadedConfigForRuntime === "object" &&
7725
+ !Array.isArray(loadedConfigForRuntime)
7726
+ ? (loadedConfigForRuntime as Record<string, unknown>)
7727
+ : null;
7728
+ loadedConfigForRuntime = normalizedResult.normalized;
7729
+ if (persistedConfigPath) {
7730
+ loadedConfigPathForRuntime = persistedConfigPath;
7731
+ }
7732
+ exposeRuntimeConfigToBridges(
7733
+ loadedConfigForRuntime,
7734
+ loadedConfigPathForRuntime,
7735
+ );
7736
+ updateConfigVersion(
7737
+ normalizedResult.normalized,
7738
+ requestedVersion || undefined,
7739
+ );
7740
+ applyRemoteRuntimeConfigToPluginHome();
7741
+ if (markConnecting) {
7742
+ markConfiguredBotsAsConnecting(
7743
+ normalizedResult.normalized,
7744
+ "config_apply_mark_connecting",
7745
+ );
7746
+ } else {
7747
+ await reconcileRuntimeStateForConfigDelta({
7748
+ previousConfig,
7749
+ nextConfig: normalizedResult.normalized,
7750
+ trigger: "config_apply",
7751
+ });
7752
+ }
7753
+ await probeAndRefreshStatuses({ force: true, reason: "config_apply" });
7754
+
7755
+ res.writeHead(200, { "Content-Type": "application/json" });
7756
+ res.end(
7757
+ JSON.stringify({
7758
+ ok: true,
7759
+ status: "applied",
7760
+ persisted: persist,
7761
+ mark_connecting: markConnecting,
7762
+ configPath: loadedConfigPathForRuntime,
7763
+ version: configVersionHash || "unknown",
7764
+ }),
7765
+ );
7766
+ } finally {
7767
+ runtimeConfigApplyInFlight = false;
7768
+ }
7149
7769
  return;
7150
7770
  }
7151
7771
  if (
@@ -7178,6 +7798,18 @@ async function startInternalApiServer(): Promise<void> {
7178
7798
  );
7179
7799
  return;
7180
7800
  }
7801
+ if (runtimeConfigApplyInFlight) {
7802
+ res.writeHead(409, { "Content-Type": "application/json" });
7803
+ res.end(
7804
+ JSON.stringify({
7805
+ ok: false,
7806
+ status: "busy",
7807
+ error: "runtime config apply is in progress",
7808
+ runtime_instance_id: runtimeInstanceId,
7809
+ }),
7810
+ );
7811
+ return;
7812
+ }
7181
7813
  const body = await readRequestJson(req);
7182
7814
  const restartModeRaw = String(body.mode || "")
7183
7815
  .trim()
@@ -7191,17 +7823,11 @@ async function startInternalApiServer(): Promise<void> {
7191
7823
  );
7192
7824
  runtimeState = "restarting";
7193
7825
  try {
7194
- console.log("[bridges/main] restart-all step=mark_all_stopped begin");
7195
- markAllBotsStopped("restart_all");
7196
- console.log(
7197
- `[bridges/main] restart-all step=mark_all_stopped done total=${summarizeBots().total}`,
7198
- );
7199
-
7200
7826
  if (runtimeConfigPullUrl) {
7201
7827
  console.log(
7202
7828
  `[bridges/main] restart-all step=pull_config begin url=${runtimeConfigPullUrl}`,
7203
7829
  );
7204
- const pullResult = await pullRuntimeConfigFromPython(true);
7830
+ const pullResult = await pullRuntimeConfigFromPython(true, false);
7205
7831
  if (!pullResult.ok) {
7206
7832
  console.error(
7207
7833
  `[bridges/main] restart-all step=pull_config failed err=${pullResult.error || "unknown"}`,
@@ -7274,9 +7900,14 @@ async function startInternalApiServer(): Promise<void> {
7274
7900
  "[bridges/main] restart-all step=config_source skip (no pull url and no local config file)",
7275
7901
  );
7276
7902
  }
7903
+ console.log("[bridges/main] restart-all step=mark_all_stopped begin");
7904
+ markAllBotsStopped("restart_all");
7905
+ console.log(
7906
+ `[bridges/main] restart-all step=mark_all_stopped done total=${summarizeBots().total}`,
7907
+ );
7277
7908
  if (loadedConfigForRuntime) {
7278
7909
  console.log("[bridges/main] restart-all step=mark_connecting begin");
7279
- markConfiguredBotsAsConnecting(loadedConfigForRuntime);
7910
+ markConfiguredBotsAsConnecting(loadedConfigForRuntime, "restart_all");
7280
7911
  console.log(
7281
7912
  `[bridges/main] restart-all step=mark_connecting done total=${summarizeBots().total}`,
7282
7913
  );
@@ -7325,7 +7956,10 @@ async function startInternalApiServer(): Promise<void> {
7325
7956
  console.log("[bridges/main] restart-all step=soft_restart dingtalk");
7326
7957
  bridgeSoftRestart.dingtalk.attempted = true;
7327
7958
  try {
7328
- await dingtalkControl.restart();
7959
+ await withRuntimeControlTimeout(
7960
+ dingtalkControl.restart(),
7961
+ "restart-all dingtalk restart timeout",
7962
+ );
7329
7963
  bridgeSoftRestart.dingtalk.ok = true;
7330
7964
  bridgeSoftRestart.dingtalk.reason = "restarted";
7331
7965
  } catch (err) {
@@ -7340,7 +7974,10 @@ async function startInternalApiServer(): Promise<void> {
7340
7974
  );
7341
7975
  bridgeSoftRestart.dingtalk.attempted = true;
7342
7976
  try {
7343
- await dingtalkControl.stop();
7977
+ await withRuntimeControlTimeout(
7978
+ dingtalkControl.stop(),
7979
+ "restart-all dingtalk stop timeout",
7980
+ );
7344
7981
  bridgeSoftRestart.dingtalk.ok = true;
7345
7982
  bridgeSoftRestart.dingtalk.reason = "stopped_no_accounts";
7346
7983
  } catch (err) {
@@ -7362,7 +7999,10 @@ async function startInternalApiServer(): Promise<void> {
7362
7999
  console.log("[bridges/main] restart-all step=soft_restart lark");
7363
8000
  bridgeSoftRestart.lark.attempted = true;
7364
8001
  try {
7365
- await larkControl.restart();
8002
+ await withRuntimeControlTimeout(
8003
+ larkControl.restart(),
8004
+ "restart-all lark restart timeout",
8005
+ );
7366
8006
  bridgeSoftRestart.lark.ok = true;
7367
8007
  bridgeSoftRestart.lark.reason = "restarted";
7368
8008
  } catch (err) {
@@ -7377,7 +8017,10 @@ async function startInternalApiServer(): Promise<void> {
7377
8017
  );
7378
8018
  bridgeSoftRestart.lark.attempted = true;
7379
8019
  try {
7380
- await larkControl.stop();
8020
+ await withRuntimeControlTimeout(
8021
+ larkControl.stop(),
8022
+ "restart-all lark stop timeout",
8023
+ );
7381
8024
  bridgeSoftRestart.lark.ok = true;
7382
8025
  bridgeSoftRestart.lark.reason = "stopped_no_accounts";
7383
8026
  } catch (err) {
@@ -7399,7 +8042,10 @@ async function startInternalApiServer(): Promise<void> {
7399
8042
  console.log("[bridges/main] restart-all step=soft_restart weixin");
7400
8043
  bridgeSoftRestart.weixin.attempted = true;
7401
8044
  try {
7402
- await weixinControl.restart();
8045
+ await withRuntimeControlTimeout(
8046
+ weixinControl.restart(),
8047
+ "restart-all weixin restart timeout",
8048
+ );
7403
8049
  bridgeSoftRestart.weixin.ok = true;
7404
8050
  bridgeSoftRestart.weixin.reason = "restarted";
7405
8051
  } catch (err) {
@@ -7414,7 +8060,10 @@ async function startInternalApiServer(): Promise<void> {
7414
8060
  );
7415
8061
  bridgeSoftRestart.weixin.attempted = true;
7416
8062
  try {
7417
- await weixinControl.stop();
8063
+ await withRuntimeControlTimeout(
8064
+ weixinControl.stop(),
8065
+ "restart-all weixin stop timeout",
8066
+ );
7418
8067
  bridgeSoftRestart.weixin.ok = true;
7419
8068
  bridgeSoftRestart.weixin.reason = "stopped_no_accounts";
7420
8069
  } catch (err) {
@@ -7519,10 +8168,32 @@ async function startInternalApiServer(): Promise<void> {
7519
8168
  const platform = String(body.platform || "").trim();
7520
8169
  const botAccountId = String(body.bot_account_id || "").trim();
7521
8170
  const linkStatus = String(body.link_status || "").trim();
7522
- const eventName = String(body.event_name || "").trim() || null;
8171
+ const eventNameRaw = String(body.event_name || "").trim();
8172
+ const eventName = normalizeRuntimeTelemetryEventName(
8173
+ normalizeRuntimeEventName(eventNameRaw),
8174
+ ) || null;
7523
8175
  const isTelemetryEvent = isTelemetryOnlyEvent(eventName);
8176
+ const isTelemetryLikeEvent = isTelemetryLikeEventName(eventNameRaw);
7524
8177
  const isHeartbeatEvent = isHeartbeatOnlyEvent(eventName);
7525
8178
  const isDiagnosticEvent = isDiagnosticOnlyEvent(eventName);
8179
+ // 重要:该软忽略分支是跨版本兼容防线,不能删除。
8180
+ // 当发送端事件名发生命名漂移时(snake/dash/dot/camel 混用),
8181
+ // 若直接返回 400 会触发上游重试风暴并污染状态链路。
8182
+ // 必须保持 ok+ignored 语义,并通过可观测日志逐步补齐 alias 映射。
8183
+ if (isTelemetryLikeEvent && !isTelemetryEvent && !linkStatus) {
8184
+ logRuntimeStatusDebug(
8185
+ `event report ignored unknown telemetry event=${eventNameRaw} normalized=${eventName || ""}`,
8186
+ );
8187
+ res.writeHead(200, { "Content-Type": "application/json" });
8188
+ res.end(
8189
+ JSON.stringify({
8190
+ ok: true,
8191
+ ignored: true,
8192
+ reason: "unknown_telemetry_event",
8193
+ }),
8194
+ );
8195
+ return;
8196
+ }
7526
8197
  if (
7527
8198
  (platform !== "dingtalk" &&
7528
8199
  platform !== "feishu" &&
@@ -7611,6 +8282,7 @@ async function startInternalApiServer(): Promise<void> {
7611
8282
  body.last_event_at ?? body.last_heartbeat_at,
7612
8283
  heartbeatAt,
7613
8284
  );
8285
+ const incomingLastError = String(body.last_error || "").trim() || null;
7614
8286
  const eventNameForLiveness =
7615
8287
  eventName ||
7616
8288
  (isHeartbeatEvent
@@ -7647,7 +8319,7 @@ async function startInternalApiServer(): Promise<void> {
7647
8319
  platform: p,
7648
8320
  bot_account_id: normalizedBotAccountId,
7649
8321
  last_event: eventName || "runtime_diagnostic",
7650
- last_error: String(body.last_error || "").trim() || null,
8322
+ last_error: incomingLastError,
7651
8323
  last_reported_at: heartbeatAt || nowIso(),
7652
8324
  });
7653
8325
  }
@@ -7666,8 +8338,10 @@ async function startInternalApiServer(): Promise<void> {
7666
8338
  bot_account_id: normalizedBotAccountId,
7667
8339
  started_at: String(body.started_at || prev.started_at || nowIso()),
7668
8340
  last_heartbeat_at: heartbeatAt,
7669
- last_error:
7670
- String(body.last_error || "").trim() || prev.last_error || null,
8341
+ // 诊断事件只写 diagnostic 区,避免污染 status.last_error 语义。
8342
+ last_error: isDiagnosticEvent
8343
+ ? prev.last_error || null
8344
+ : incomingLastError || prev.last_error || null,
7671
8345
  reconnect_count:
7672
8346
  runtimeHintsFromEvent.reconnect_count ?? prev.reconnect_count ?? 0,
7673
8347
  restart_pending: nextRestartPending,
@@ -7721,8 +8395,7 @@ async function startInternalApiServer(): Promise<void> {
7721
8395
  link_status: normalizedStatus,
7722
8396
  started_at: String(body.started_at || prev?.started_at || nowIso()),
7723
8397
  last_heartbeat_at: heartbeatAt,
7724
- last_error:
7725
- String(body.last_error || "").trim() || prev?.last_error || null,
8398
+ last_error: incomingLastError || prev?.last_error || null,
7726
8399
  reconnect_count: reconnectCount,
7727
8400
  restart_pending: nextRestartPending,
7728
8401
  busy: runtimeHintsFromEvent.busy ?? false,
@@ -7892,8 +8565,29 @@ async function main(): Promise<void> {
7892
8565
  registerRuntimeStateExitHooks();
7893
8566
 
7894
8567
  startupOnlyMode = parseOnlyArg();
7895
- const loaded = loadOpenClawConfig();
7896
- if (!loaded) {
8568
+ const remoteConfigAuthoritative = Boolean(runtimeConfigPullUrl && !localConfigOnly);
8569
+ const loaded = remoteConfigAuthoritative ? null : loadOpenClawConfig();
8570
+ let shouldRestoreRuntimeStateSnapshot = true;
8571
+ if (remoteConfigAuthoritative) {
8572
+ // 远端配置权威模式:只要配置了 RUNTIME_CONFIG_PULL_URL,就不能在
8573
+ // 初始 pull 失败时回退使用本地旧 syim.json 启动 bridge。否则 Python
8574
+ // DB 真源不可用时,Node 可能用过期本地文件启动旧账号/旧凭证。
8575
+ const fallbackPath = path.join(os.homedir(), ".syim", "syim.json");
8576
+ console.log(
8577
+ `[bridges/main] runtime pull url configured, skip local config bootstrap and wait for remote source: ${runtimeConfigPullUrl}`,
8578
+ );
8579
+ loadedConfigForRuntime = { channels: {}, bindings: [] };
8580
+ loadedConfigPathForRuntime = fallbackPath;
8581
+ exposeRuntimeConfigToBridges(
8582
+ loadedConfigForRuntime,
8583
+ loadedConfigPathForRuntime,
8584
+ );
8585
+ updateConfigVersion(loadedConfigForRuntime);
8586
+ markConfiguredBotsAsConnecting(
8587
+ loadedConfigForRuntime,
8588
+ "startup_remote_config_authoritative",
8589
+ );
8590
+ } else if (!loaded) {
7897
8591
  if (!runtimeConfigPullUrl) {
7898
8592
  if (allowLocalRuntimeConfig) {
7899
8593
  printConfigBootstrapGuide();
@@ -7911,7 +8605,10 @@ async function main(): Promise<void> {
7911
8605
  loadedConfigPathForRuntime,
7912
8606
  );
7913
8607
  updateConfigVersion(loadedConfigForRuntime);
7914
- markConfiguredBotsAsConnecting(loadedConfigForRuntime);
8608
+ markConfiguredBotsAsConnecting(
8609
+ loadedConfigForRuntime,
8610
+ "startup_bootstrap_empty",
8611
+ );
7915
8612
  } else {
7916
8613
  const fallbackPath = path.join(os.homedir(), ".syim", "syim.json");
7917
8614
  console.log(
@@ -7924,7 +8621,10 @@ async function main(): Promise<void> {
7924
8621
  loadedConfigPathForRuntime,
7925
8622
  );
7926
8623
  updateConfigVersion(loadedConfigForRuntime);
7927
- markConfiguredBotsAsConnecting(loadedConfigForRuntime);
8624
+ markConfiguredBotsAsConnecting(
8625
+ loadedConfigForRuntime,
8626
+ "startup_bootstrap_pull",
8627
+ );
7928
8628
  }
7929
8629
  } else {
7930
8630
  console.log("[bridges/main] config loaded:", loaded.configPath);
@@ -7935,23 +8635,30 @@ async function main(): Promise<void> {
7935
8635
  loadedConfigPathForRuntime,
7936
8636
  );
7937
8637
  updateConfigVersion(loaded.config);
7938
- markConfiguredBotsAsConnecting(loaded.config);
8638
+ markConfiguredBotsAsConnecting(loaded.config, "startup_local_config");
7939
8639
  }
7940
8640
 
7941
- restorePersistedRuntimeStateSnapshot(
7942
- loadedConfigForRuntime as Record<string, unknown> | null,
7943
- );
7944
-
7945
8641
  if (runtimeConfigPullUrl && !localConfigOnly) {
7946
8642
  // 启动前先尝试拉最新配置,保证 bridge 进程首启读到的是最新配置快照。
7947
- // 启动前必须先拉到配置,确保插件启动时读到的是最新本地文件。
7948
- const pullResult = await pullRuntimeConfigFromPython(false);
8643
+ // 启动前必须先拉到配置,确保插件启动时读到的是 Python DB 真源;
8644
+ // 若失败,只启动控制面和恢复轮询,不允许旧本地配置启动 bridge。
8645
+ const pullResult = await pullRuntimeConfigFromPython(false, true, {
8646
+ probeAfterPull: false,
8647
+ });
7949
8648
  if (!pullResult.ok) {
7950
8649
  console.error(
7951
8650
  `[bridges/main] initial config pull failed: ${pullResult.error || "unknown"}`,
7952
8651
  );
7953
8652
  console.log(
7954
- "[bridges/main] keep alive, wait next poll for config recovery",
8653
+ "[bridges/main] keep alive with empty runtime config, wait next poll for config recovery",
8654
+ );
8655
+ shouldRestoreRuntimeStateSnapshot = false;
8656
+ markConfiguredBotsAsConnecting(
8657
+ (loadedConfigForRuntime as Record<string, unknown>) || {
8658
+ channels: {},
8659
+ bindings: [],
8660
+ },
8661
+ "startup_remote_pull_failed_empty",
7955
8662
  );
7956
8663
  startRuntimeConfigPollLoop();
7957
8664
  } else {
@@ -7964,6 +8671,31 @@ async function main(): Promise<void> {
7964
8671
  }
7965
8672
  }
7966
8673
 
8674
+ if (shouldRestoreRuntimeStateSnapshot && remoteConfigAuthoritative) {
8675
+ const configuredAccounts = collectConfiguredAccountIdsByPlatform(
8676
+ (loadedConfigForRuntime as Record<string, unknown>) || null,
8677
+ );
8678
+ const hasConfiguredAccounts =
8679
+ configuredAccounts.dingtalk.length > 0 ||
8680
+ configuredAccounts.feishu.length > 0 ||
8681
+ configuredAccounts.weixin.length > 0;
8682
+ // 远端权威模式下,空配置不能恢复旧快照;否则 Python 返回空配置或
8683
+ // pull 失败时,历史 snapshot 会重新暴露已删除/过期账号。
8684
+ shouldRestoreRuntimeStateSnapshot = hasConfiguredAccounts;
8685
+ }
8686
+
8687
+ if (shouldRestoreRuntimeStateSnapshot) {
8688
+ restorePersistedRuntimeStateSnapshot(
8689
+ loadedConfigForRuntime as Record<string, unknown> | null,
8690
+ );
8691
+ }
8692
+
8693
+ if (runtimeConfigPullUrl && !localConfigOnly && shouldRestoreRuntimeStateSnapshot) {
8694
+ // 初始远端 pull 已完成后再恢复快照,因此必须补一次 probe,
8695
+ // 避免旧 snapshot 覆盖刚拉取配置后的真实探测结果。
8696
+ await probeAndRefreshStatuses({ force: true, reason: "startup_after_restore" });
8697
+ }
8698
+
7967
8699
  if (enableInternalApi) {
7968
8700
  // 控制面 API 与探测循环独立于 bridge 进程生命周期。
7969
8701
  await startInternalApiServer();