@clawling/clawchat-plugin-openclaw 2026.5.12-39 → 2026.5.13-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import { createPluginRuntimeStore } from "openclaw/plugin-sdk/runtime-store";
5
5
  import { createOpenclawClawlingClient } from "./client.js";
6
6
  import { createOpenclawClawlingApiClient } from "./api-client.js";
7
7
  import { ClawlingApiError } from "./api-types.js";
8
+ import { RefreshManager } from "./refresh-manager.js";
8
9
  import { CHANNEL_ID, effectiveOutputVisibility, effectiveGroupCommandMode, hasOpenclawClawlingConnectCredentials, } from "./config.js";
9
10
  import { dispatchOpenclawClawlingInbound } from "./inbound.js";
10
11
  import { fetchInboundMedia } from "./media-runtime.js";
@@ -12,7 +13,7 @@ import { createOpenclawClawlingReplyDispatcher } from "./reply-dispatcher.js";
12
13
  import { runWithTerminalClawChatSendScope } from "./terminal-send.js";
13
14
  import { flushAlignedOutboundQueue, getAlignedOutboundQueueSize, sendOpenclawClawlingText, setAlignedOutboundLogContext, } from "./outbound.js";
14
15
  import { formatWsLog } from "./ws-log.js";
15
- import { createProtocolControlHandler, createReconnectTracker } from "./ws-alignment.js";
16
+ import { createNotifySignalObserver, createProtocolControlHandler, createReconnectTracker } from "./ws-alignment.js";
16
17
  import { clawChatDbPathForStateDir, getClawChatStore, } from "./storage.js";
17
18
  import { getClawChatGroupPrompt, getClawChatUserPrompt } from "./plugin-prompts.js";
18
19
  import { loadClawChatPromptMetadata, renderClawChatProfilePrompt, resolveSenderRelation, } from "./profile-prompt.js";
@@ -41,9 +42,38 @@ const OPENCLAW_CONFIRM_SLASH_COMMANDS = new Set([
41
42
  "nevermind",
42
43
  ]);
43
44
  const GROUP_OWNER_ATTENTION_TITLE = "requires owner attention";
45
+ // §C.1 — user-visible message emitted on permanent token expiry. Kept
46
+ // byte-identical to the Hermes plugin (parity spec §C.1.4).
47
+ const CLAWCHAT_TOKEN_EXPIRED_MESSAGE = "ClawChat token expired and could not be refreshed. Re-pair with `/clawchat-activate <code>`.";
48
+ const CLAWCHAT_TOKEN_EXPIRED_LAST_ERROR = "token expired — re-pair required";
44
49
  function isRecord(value) {
45
50
  return Boolean(value && typeof value === "object" && !Array.isArray(value));
46
51
  }
52
+ /**
53
+ * §A.2 — classify a WS `hello-fail` reason for refresh gating.
54
+ * - "token-rejected": reason names an authentication failure → refresh.
55
+ * - "auth-unavailable": 5xx auth-backend outage → backoff, DO NOT refresh.
56
+ * - "generic": unattributed → refresh only if the token is at/near expiry.
57
+ *
58
+ * `auth service unavailable` is already split off by the ws-client into a
59
+ * TransportError (backoff), but we classify defensively here too.
60
+ */
61
+ export function classifyHelloFailReason(reason) {
62
+ const r = (reason || "").toLowerCase();
63
+ if (/auth service unavailable|temporarily unavailable/.test(r))
64
+ return "auth-unavailable";
65
+ if (/authentication failed|invalid token|token expired|unauthorized|auth failed|invalid credentials/.test(r)) {
66
+ return "token-rejected";
67
+ }
68
+ return "generic";
69
+ }
70
+ /** Read `channels.<CHANNEL_ID>.refreshToken` from a live config, or null. */
71
+ function readConfigRefreshToken(cfg) {
72
+ const channels = cfg.channels;
73
+ const channel = isRecord(channels) ? channels[CHANNEL_ID] : undefined;
74
+ const refreshToken = isRecord(channel) ? channel.refreshToken : undefined;
75
+ return typeof refreshToken === "string" && refreshToken.trim() ? refreshToken.trim() : null;
76
+ }
47
77
  function withFullVerboseDispatchConfig(cfg, agentId) {
48
78
  const cfgRecord = cfg;
49
79
  const agents = isRecord(cfgRecord.agents) ? cfgRecord.agents : {};
@@ -214,7 +244,12 @@ function metadataScopesFromEnvelope(env) {
214
244
  return Array.isArray(scope) ? scope.filter((item) => typeof item === "string") : [];
215
245
  }
216
246
  function shouldRefreshBehaviorForScopes(scopes) {
217
- return scopes.includes("behavior");
247
+ // §9.3: empty/absent scope ⇒ "refetch everything"; unknown scope strings must
248
+ // also trigger a refresh. Only the known non-behavior scopes (title,
249
+ // description) leave agent behavior untouched.
250
+ if (scopes.length === 0)
251
+ return true;
252
+ return scopes.some((scope) => scope !== "title" && scope !== "description");
218
253
  }
219
254
  function shouldRefreshConversationForScopes(scopes) {
220
255
  if (scopes.length === 0)
@@ -272,6 +307,17 @@ function buildActivationBootstrapEnvelope(params) {
272
307
  },
273
308
  };
274
309
  }
310
+ /**
311
+ * §A.3/§A.4 — max consecutive refresh-driven reconnects within
312
+ * `REFRESH_RECONNECT_WINDOW_MS` before we abandon the refresh loop and fall back
313
+ * to plain transport backoff (or auto-logout on a permanent reject). Bounds a
314
+ * server that keeps rotating-then-rejecting fresh tokens.
315
+ */
316
+ export const MAX_REFRESH_RECONNECTS = 3;
317
+ export const REFRESH_RECONNECT_WINDOW_MS = 5 * 60_000;
318
+ /** §B — default transport-backoff delay before a reactive re-enter (capped 30s). */
319
+ export const TRANSPORT_BACKOFF_BASE_MS = 1_000;
320
+ export const TRANSPORT_BACKOFF_MAX_MS = 30_000;
275
321
  function resolveConnectionStore(params, runtime) {
276
322
  if (params.store !== undefined)
277
323
  return params.store;
@@ -392,15 +438,81 @@ export async function startOpenclawClawlingGateway(params) {
392
438
  if (!activationAccount)
393
439
  return;
394
440
  account = activationAccount.account;
441
+ // §A.0 — fallback expiry source. Prefer the SQLite `activated_at`; null when
442
+ // the credentials came from config (no activation row yet) — in that case the
443
+ // refresh manager relies on the JWT `exp` alone.
444
+ let activatedAtMs = activationAccount.source === "sqlite" && store?.getActivationCredentials
445
+ ? store.getActivationCredentials({ platform: "openclaw", accountId })?.activatedAt ?? null
446
+ : null;
395
447
  let conversationApiClient;
448
+ const buildConversationApiClient = () => createOpenclawClawlingApiClient({
449
+ baseUrl: account.baseUrl,
450
+ mediaBaseUrl: account.mediaBaseUrl,
451
+ token: account.token,
452
+ userId: account.userId,
453
+ });
454
+ // §A.2.1 — forward reference to the single-flight REST refresh wrapper; set
455
+ // once the refresh manager exists. Until then, calls run un-wrapped.
456
+ let restWithRefresh = null;
457
+ // Returns a proxy whose every method call runs through `restWithRefresh`, so a
458
+ // 401/403 transparently triggers one single-flight refresh + retry. The proxy
459
+ // reads the cached client lazily on each call so a post-refresh rebuild is
460
+ // picked up automatically.
396
461
  const getConversationApiClient = () => {
397
- conversationApiClient ??= createOpenclawClawlingApiClient({
398
- baseUrl: account.baseUrl,
399
- mediaBaseUrl: account.mediaBaseUrl,
400
- token: account.token,
401
- userId: account.userId,
462
+ return new Proxy({}, {
463
+ get: (_target, prop) => {
464
+ return (...args) => {
465
+ const invoke = () => {
466
+ conversationApiClient ??= buildConversationApiClient();
467
+ const fn = conversationApiClient[prop];
468
+ if (typeof fn !== "function") {
469
+ throw new TypeError(`clawchat api-client has no method ${String(prop)}`);
470
+ }
471
+ return fn.apply(conversationApiClient, args);
472
+ };
473
+ return restWithRefresh
474
+ ? restWithRefresh(() => Promise.resolve(invoke()))
475
+ : invoke();
476
+ };
477
+ },
478
+ });
479
+ };
480
+ // Rebuilt after every in-memory token swap so REST calls use the fresh token.
481
+ const invalidateConversationApiClient = () => {
482
+ conversationApiClient = undefined;
483
+ };
484
+ const resolveMutateConfigFile = () => {
485
+ if (params.mutateConfigFile)
486
+ return params.mutateConfigFile;
487
+ const runtimeConfig = runtime.config;
488
+ return typeof runtimeConfig?.mutateConfigFile === "function"
489
+ ? runtimeConfig.mutateConfigFile
490
+ : undefined;
491
+ };
492
+ // §0/§C.1 — write the channel-config `token`/`refreshToken` keys. `tokens=null`
493
+ // blanks them (auto-logout); otherwise persists the rotated pair.
494
+ const persistConfigTokens = async (tokens) => {
495
+ const mutateConfigFile = resolveMutateConfigFile();
496
+ if (!mutateConfigFile) {
497
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw config persistence unavailable; cannot ${tokens ? "rotate" : "clear"} tokens in config`);
498
+ return;
499
+ }
500
+ await mutateConfigFile({
501
+ afterWrite: { mode: "none", reason: "clawchat-plugin-openclaw token refresh" },
502
+ mutate(draft) {
503
+ const channels = (draft.channels ?? {});
504
+ const existing = (channels[CHANNEL_ID] ?? {});
505
+ const nextSection = {
506
+ ...existing,
507
+ token: tokens ? tokens.accessToken : "",
508
+ refreshToken: tokens ? tokens.refreshToken : "",
509
+ };
510
+ Object.assign(draft, {
511
+ ...draft,
512
+ channels: { ...channels, [CHANNEL_ID]: nextSection },
513
+ });
514
+ },
402
515
  });
403
- return conversationApiClient;
404
516
  };
405
517
  let lastHelloFailTraceId = "-";
406
518
  let lastHelloFailReason = "";
@@ -411,6 +523,129 @@ export async function startOpenclawClawlingGateway(params) {
411
523
  let authFailureLogged = false;
412
524
  let closingForAbort = false;
413
525
  let wsReady = false;
526
+ // True once this gateway attempt reached "connected" at least once — used to
527
+ // route a later auth-fail through the live-session reactive refresh path
528
+ // rather than the initial-connect catch.
529
+ let wsReadyEverThisAttempt = false;
530
+ // §D — set when a refresh succeeded and we are closing the live WS to
531
+ // reconnect with the new token; suppresses the auth-failed teardown path and
532
+ // drives a clean re-enter into the gateway with the rotated account.
533
+ let reconnectWithRefreshedToken = false;
534
+ // §C — set once auto-logout has fired so we don't double-emit or reconnect.
535
+ let autoLoggedOut = false;
536
+ // §E — connect-time device id for `X-Device-Id` on refresh. Prefer the value
537
+ // recorded in SQLite at connect; backfill legacy rows (no column) to the
538
+ // deterministic constant `CHANNEL_ID` actually sent by `authHeaders`.
539
+ const refreshDeviceId = (activationAccount.source === "sqlite" && store?.getActivationCredentials
540
+ ? store.getActivationCredentials({ platform: "openclaw", accountId })?.deviceId
541
+ : null) || CHANNEL_ID;
542
+ // §C — auto-logout on permanent refresh failure. Blank creds in BOTH stores
543
+ // (KEEP identity), flip not-configured via the auth-failure status path, and
544
+ // emit the user-visible message. Idempotent.
545
+ const performAutoLogout = async (info) => {
546
+ if (autoLoggedOut)
547
+ return;
548
+ autoLoggedOut = true;
549
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw auto-logout (token permanently expired) code=${info.code}: ${info.message}`);
550
+ // SQLite: blank access/refresh, keep user/owner/device for re-pair.
551
+ if (store?.clearActivationCredentials) {
552
+ recordConnection("clear activation credentials", () => store.clearActivationCredentials?.({ platform: "openclaw", accountId }));
553
+ }
554
+ // Config: blank token/refreshToken keys.
555
+ try {
556
+ await persistConfigTokens(null);
557
+ }
558
+ catch (err) {
559
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw failed to clear config credentials on auto-logout: ${err instanceof Error ? err.message : String(err)}`);
560
+ }
561
+ // Flip not-configured (existing auth-failure status path) with the re-pair
562
+ // hint as `lastError`.
563
+ setStatus({
564
+ ...getStatus(),
565
+ connected: false,
566
+ configured: false,
567
+ running: false,
568
+ lastError: CLAWCHAT_TOKEN_EXPIRED_LAST_ERROR,
569
+ });
570
+ // User-visible notification (in addition to logs). Best-effort; never throws.
571
+ emitUserVisibleAuthLogout();
572
+ };
573
+ // §C.1.4 — surface the permanent-expiry message to the user/operator. The
574
+ // plugin has no guaranteed live chat target after creds are cleared, so we
575
+ // route through the runtime notification surface when present and always log.
576
+ const emitUserVisibleAuthLogout = () => {
577
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw ${CLAWCHAT_TOKEN_EXPIRED_MESSAGE}`);
578
+ try {
579
+ const notify = runtime.notifications?.notify;
580
+ if (typeof notify === "function") {
581
+ notify({ level: "error", message: CLAWCHAT_TOKEN_EXPIRED_MESSAGE });
582
+ }
583
+ }
584
+ catch {
585
+ // Best effort only.
586
+ }
587
+ };
588
+ // The refresh token is not part of the resolved account; source it from
589
+ // SQLite first (authoritative after a rotation) then the config channel
590
+ // section. Kept in a mutable cell so a swap updates it in place.
591
+ let latestRefreshToken = (activationAccount.source === "sqlite" && store?.getActivationCredentials
592
+ ? store.getActivationCredentials({ platform: "openclaw", accountId })?.refreshToken
593
+ : null) ?? readConfigRefreshToken(cfg);
594
+ const refreshManager = new RefreshManager({
595
+ baseUrl: account.baseUrl,
596
+ deviceId: refreshDeviceId,
597
+ getAccessToken: () => account.token,
598
+ getRefreshToken: () => latestRefreshToken,
599
+ persistRotatedTokens: async (tokens) => {
600
+ // §0 — persist to BOTH stores BEFORE the in-memory swap. A failure in
601
+ // EITHER store must REJECT so the manager skips the in-memory swap and
602
+ // treats the refresh as transient (keep the current tokens, back off). Do
603
+ // NOT swallow the SQLite write error: `rotateActivationTokens` returns
604
+ // `null` when its internal `write()` caught an exception (a real write
605
+ // failure), `false` only when no activation row exists yet (config-sourced
606
+ // agent — legitimately nothing to update). A swallowed write failure must
607
+ // not leave the SQLite row holding the now-dead refresh token while the
608
+ // in-memory token is rotated, which would brick a sqlite-sourced agent on
609
+ // restart.
610
+ if (store?.rotateActivationTokens) {
611
+ const rotateResult = store.rotateActivationTokens({
612
+ platform: "openclaw",
613
+ accountId,
614
+ accessToken: tokens.accessToken,
615
+ refreshToken: tokens.refreshToken,
616
+ });
617
+ if (rotateResult === null) {
618
+ throw new Error("clawchat-plugin-openclaw sqlite rotate activation tokens failed");
619
+ }
620
+ }
621
+ // A config write failure rejects out of `mutateConfigFile` and propagates
622
+ // here, which is what we want — persistence incomplete ⇒ no swap.
623
+ await persistConfigTokens(tokens);
624
+ },
625
+ swapInMemoryTokens: (tokens) => {
626
+ account = { ...account, token: tokens.accessToken, configured: true };
627
+ latestRefreshToken = tokens.refreshToken;
628
+ activatedAtMs = Date.now();
629
+ invalidateConversationApiClient();
630
+ },
631
+ onPermanentFailure: performAutoLogout,
632
+ // §A.1/§D — proactive-timer success closes the live WS and re-enters with the
633
+ // rotated token. The running ws-client captured the OLD token at `connect`
634
+ // time, so the in-memory swap alone never reaches a `connect` envelope.
635
+ onProactiveRefreshed: async () => {
636
+ await runRefreshReconnect("proactive-timer");
637
+ },
638
+ ...(params.refreshFetchImpl ? { fetchImpl: params.refreshFetchImpl } : {}),
639
+ ...(params.refreshSetTimer ? { setTimer: params.refreshSetTimer } : {}),
640
+ ...(params.refreshClearTimer ? { clearTimer: params.refreshClearTimer } : {}),
641
+ ...(params.refreshJitter ? { jitter: params.refreshJitter } : {}),
642
+ log,
643
+ });
644
+ // §A.3/§A.4 — carry the single-flight latch + min-interval across re-enters so
645
+ // the guards bound a rotate-then-reject loop instead of resetting each time.
646
+ if (params.refreshManagerState) {
647
+ refreshManager.restoreState(params.refreshManagerState);
648
+ }
414
649
  let currentConnectionId = null;
415
650
  let currentConnectionFinished = false;
416
651
  const reconnectTracker = createReconnectTracker({
@@ -530,7 +765,10 @@ export async function startOpenclawClawlingGateway(params) {
530
765
  const memoryRoot = resolveMemoryRootForPeer(peer);
531
766
  if (!memoryRoot)
532
767
  return;
533
- if (refreshBehavior) {
768
+ // §9.3: agent behavior is per-agent metadata that lives only on the agent's
769
+ // DIRECT conversation — never refetch it for a group invalidation, even on a
770
+ // "refetch everything" (empty/unknown) scope.
771
+ if (refreshBehavior && peer.kind === "direct") {
534
772
  await refreshAgentBehavior({
535
773
  source: "metadata_invalidation",
536
774
  ...(version !== undefined ? { metadataVersion: version } : {}),
@@ -600,8 +838,36 @@ export async function startOpenclawClawlingGateway(params) {
600
838
  log: { error: (message) => log?.error?.(`[${accountId}] ${message}`) },
601
839
  });
602
840
  };
841
+ // §A.4 — startup refresh-if-near-expiry, BEFORE the first WS connect. Recovers
842
+ // a long-stopped pod with no manual re-pair. On a permanent refresh failure
843
+ // auto-logout immediately and skip the doomed connect.
844
+ if (!abortSignal.aborted &&
845
+ latestRefreshToken &&
846
+ refreshManager.isNearExpiry(activatedAtMs)) {
847
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw access token near expiry at startup; refreshing before connect`);
848
+ const startupOutcome = await refreshManager.refresh("startup-near-expiry");
849
+ if (abortSignal.aborted)
850
+ return;
851
+ if (startupOutcome.kind === "permanent") {
852
+ // Auto-logout already performed by the manager's onPermanentFailure.
853
+ return;
854
+ }
855
+ // success swaps the in-memory token in place; transient/skipped just connect
856
+ // with the current token (the WS handshake will then drive reactive refresh).
857
+ }
858
+ // Reuse the device id the server resolved on a previous connection so a pod
859
+ // restart (fresh hostname → fresh hostname-derived id) does not present a
860
+ // brand-new device, which would force a full inbox replay and orphan the
861
+ // prior device's cursor. Persisted from `hello-ok` via markConnectionReady.
862
+ const persistedDeviceId = store?.getLastResolvedDeviceId
863
+ ? store.getLastResolvedDeviceId({ platform: "openclaw", accountId })
864
+ : null;
865
+ if (persistedDeviceId) {
866
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw reusing persisted resolved_device_id`);
867
+ }
603
868
  const client = createOpenclawClawlingClient(account, {
604
869
  ...(params.transport ? { transport: params.transport } : {}),
870
+ ...(persistedDeviceId ? { deviceIdOverride: persistedDeviceId } : {}),
605
871
  wsLifecycle: {
606
872
  onConnectFrameSent: (env) => {
607
873
  lastConnectTraceId = typeof env.trace_id === "string" ? env.trace_id : "-";
@@ -627,6 +893,195 @@ export async function startOpenclawClawlingGateway(params) {
627
893
  },
628
894
  });
629
895
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw runtime client created`);
896
+ // §A.3/§A.4 — recompute the refresh-driven reconnect streak (depth + window).
897
+ // Returns the next depth/window to thread into the re-enter, and whether the
898
+ // cap is exceeded so the caller must NOT re-enter via refresh again.
899
+ const nextRefreshReconnectStreak = () => {
900
+ const now = Date.now();
901
+ const priorWindowStart = params.refreshReconnectWindowStartedAt ?? 0;
902
+ const withinWindow = priorWindowStart !== 0 && now - priorWindowStart < REFRESH_RECONNECT_WINDOW_MS;
903
+ const windowStartedAt = withinWindow ? priorWindowStart : now;
904
+ const depth = (withinWindow ? params.refreshReconnectDepth ?? 0 : 0) + 1;
905
+ return { depth, windowStartedAt, capped: depth > MAX_REFRESH_RECONNECTS };
906
+ };
907
+ // §B/§D — re-enter the gateway after a plain transport-backoff delay, with the
908
+ // CURRENT (unchanged) token and creds left untouched. Used when a reactive
909
+ // refresh is transient/skipped (§B: a transient refresh failure NEVER
910
+ // auto-logs-out and NEVER stops — keep retrying with the current token) and
911
+ // when the refresh-reconnect loop is capped (§A.4). Carries the refresh
912
+ // manager's latch + min-interval state so the guards keep bounding the loop.
913
+ const scheduleTransportBackoffReconnect = (reason) => {
914
+ if (abortSignal.aborted || autoLoggedOut || reconnectWithRefreshedToken)
915
+ return;
916
+ reconnectWithRefreshedToken = true; // suppress the auth-failed teardown path.
917
+ refreshManager.stop();
918
+ activeClients.delete(accountId);
919
+ finishCurrentConnection({
920
+ state: "disconnected",
921
+ closeCode: 1000,
922
+ closeReason: "transport backoff reconnect",
923
+ });
924
+ try {
925
+ client.close();
926
+ }
927
+ catch {
928
+ // best effort
929
+ }
930
+ const attempt = (params.refreshReconnectDepth ?? 0) + 1;
931
+ const delayMs = params.transportBackoffDelayMs ??
932
+ Math.min(TRANSPORT_BACKOFF_MAX_MS, TRANSPORT_BACKOFF_BASE_MS * 2 ** Math.max(0, attempt - 1));
933
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw reactive refresh ${reason}; backoff-reconnect with current token delayMs=${delayMs}`);
934
+ const managerState = refreshManager.exportState();
935
+ const streak = nextRefreshReconnectStreak();
936
+ const reEnter = () => {
937
+ if (abortSignal.aborted)
938
+ return;
939
+ void startOpenclawClawlingGateway({
940
+ ...params,
941
+ account: { ...params.account },
942
+ transportBackoffReconnect: true,
943
+ refreshReconnectDepth: streak.depth,
944
+ refreshReconnectWindowStartedAt: streak.windowStartedAt,
945
+ refreshManagerState: managerState,
946
+ });
947
+ };
948
+ const timer = params.backoffTimer ?? ((cb, ms) => void setTimeout(cb, ms));
949
+ timer(reEnter, delayMs);
950
+ };
951
+ // §A/§D — close the live WS and re-enter the gateway with the rotated token
952
+ // (a token only enters via a fresh `connect` envelope; it cannot be hot-swapped
953
+ // onto a live socket). Assumes a refresh ALREADY succeeded and swapped the
954
+ // in-memory token (proactive path), or is called by `runRefreshAndReconnect`
955
+ // after its own successful refresh (reactive path). Carries the refresh
956
+ // manager's latch + min-interval + reconnect-streak state across the re-enter.
957
+ const closeAndReconnectWithRefreshedToken = async (reason) => {
958
+ if (abortSignal.aborted || autoLoggedOut || reconnectWithRefreshedToken)
959
+ return;
960
+ reconnectWithRefreshedToken = true;
961
+ const managerState = refreshManager.exportState();
962
+ refreshManager.stop();
963
+ activeClients.delete(accountId);
964
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw token refreshed (${reason}); closing WS to reconnect with new token`);
965
+ finishCurrentConnection({
966
+ state: "disconnected",
967
+ closeCode: 1000,
968
+ closeReason: "token refresh",
969
+ });
970
+ try {
971
+ client.close();
972
+ }
973
+ catch {
974
+ // best effort
975
+ }
976
+ if (abortSignal.aborted)
977
+ return;
978
+ const streak = nextRefreshReconnectStreak();
979
+ // Re-enter with the rotated in-memory account; SQLite/config already hold
980
+ // the rotated pair (persisted before the swap). Reuse the same device id.
981
+ await startOpenclawClawlingGateway({
982
+ ...params,
983
+ account: {
984
+ ...params.account,
985
+ configured: true,
986
+ token: account.token,
987
+ userId: account.userId,
988
+ ownerUserId: account.ownerUserId,
989
+ },
990
+ refreshReconnectDepth: streak.depth,
991
+ refreshReconnectWindowStartedAt: streak.windowStartedAt,
992
+ refreshManagerState: managerState,
993
+ });
994
+ };
995
+ // Alias used by the proactive port (refresh already succeeded + swapped).
996
+ const runRefreshReconnect = closeAndReconnectWithRefreshedToken;
997
+ // §A/§B/§D — run a single-flight refresh and act on the outcome:
998
+ // - success → close the live WS + re-enter with the rotated token (§D).
999
+ // - permanent→ the manager already auto-logged-out (§C); nothing more here.
1000
+ // - transient/skipped → §B: NEVER teardown. Backoff-reconnect with the CURRENT
1001
+ // token, creds + configured untouched, and keep retrying.
1002
+ // Returns "handled" when it took ownership of the next connection lifecycle
1003
+ // (reconnect scheduled / auto-logout), "fallthrough" when the caller should run
1004
+ // its own path (only when aborted mid-flight).
1005
+ const runRefreshAndReconnect = async (reason) => {
1006
+ if (abortSignal.aborted || autoLoggedOut || reconnectWithRefreshedToken)
1007
+ return "handled";
1008
+ // §A.4 — if the refresh-driven reconnect loop is already capped, do not run
1009
+ // another refresh; fall back to plain transport backoff with the current
1010
+ // token so a rotate-then-reject server cannot loop forever with no backoff.
1011
+ if ((params.refreshReconnectDepth ?? 0) >= MAX_REFRESH_RECONNECTS) {
1012
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw refresh-reconnect loop capped (depth=${params.refreshReconnectDepth}); backoff-reconnect with current token`);
1013
+ scheduleTransportBackoffReconnect("refresh-reconnect-capped");
1014
+ return "handled";
1015
+ }
1016
+ const outcome = await refreshManager.refresh(reason);
1017
+ if (abortSignal.aborted)
1018
+ return "fallthrough";
1019
+ if (autoLoggedOut)
1020
+ return "handled"; // permanent → manager auto-logged-out.
1021
+ if (outcome.kind === "success") {
1022
+ await closeAndReconnectWithRefreshedToken(reason);
1023
+ return "handled";
1024
+ }
1025
+ // §B — transient / skipped (in-flight / min-interval / rejected-latch /
1026
+ // no-refresh-token): keep the WS in backoff with the CURRENT token; do NOT
1027
+ // teardown. (no-refresh-token has no path to recover, but tearing down is
1028
+ // wrong per §B; backoff keeps the supervisor alive without a refresh storm.)
1029
+ scheduleTransportBackoffReconnect(`refresh-${outcome.kind}`);
1030
+ return "handled";
1031
+ };
1032
+ // §A.2.1 — run an authenticated REST call; on a 401/403 (`ClawlingApiError`
1033
+ // kind "auth") run the single-flight refresh and retry the call ONCE with a
1034
+ // rebuilt api-client. Any other error propagates. Used to wrap the REST
1035
+ // api-client so metadata/profile calls survive an expired access token
1036
+ // without waiting for the WS handshake.
1037
+ const isRestAuthError = (err) => err instanceof ClawlingApiError && err.kind === "auth";
1038
+ const withRefresh = async (call) => {
1039
+ try {
1040
+ return await call();
1041
+ }
1042
+ catch (err) {
1043
+ if (!isRestAuthError(err) || abortSignal.aborted)
1044
+ throw err;
1045
+ const outcome = await refreshManager.refresh("rest-401");
1046
+ if (outcome.kind !== "success")
1047
+ throw err;
1048
+ // The in-memory swap already invalidated the cached api-client; the next
1049
+ // `call()` rebuilds it with the fresh token.
1050
+ return await call();
1051
+ }
1052
+ };
1053
+ // Activate the REST proxy's refresh wrapper now that the manager exists.
1054
+ restWithRefresh = withRefresh;
1055
+ // §A.2/§B — handle a WS hello-fail(auth) by gating a reactive refresh on the
1056
+ // reason classification:
1057
+ // - token-rejected → refresh. Success reconnects with the fresh token;
1058
+ // permanent auto-logs-out; transient/skipped backoff-reconnects with the
1059
+ // CURRENT token (§B: a transient refresh failure NEVER auto-logs-out and
1060
+ // NEVER stops — `runRefreshAndReconnect` owns all three).
1061
+ // - generic + token near expiry → same refresh path.
1062
+ // - generic + token NOT near expiry → §A.2: transient backoff with the current
1063
+ // token (NO refresh, NO teardown). A backend outage emitting a generic
1064
+ // reason must not trigger a refresh storm OR a spurious logout.
1065
+ // - auth-unavailable never reaches here (the ws-client routes it as a
1066
+ // TransportError so its own backoff loop handles it).
1067
+ const handleWsAuthFailure = async (reason) => {
1068
+ if (abortSignal.aborted || reconnectWithRefreshedToken || autoLoggedOut)
1069
+ return;
1070
+ const klass = classifyHelloFailReason(reason);
1071
+ const eligible = klass === "token-rejected" ||
1072
+ (klass === "generic" && refreshManager.isNearExpiry(activatedAtMs));
1073
+ if (eligible) {
1074
+ // `runRefreshAndReconnect` is total: it either reconnects (success),
1075
+ // auto-logs-out (permanent), or backoff-reconnects with the current token
1076
+ // (transient/skipped). No teardown path remains for an eligible hello-fail.
1077
+ await runRefreshAndReconnect("ws-hello-fail");
1078
+ return;
1079
+ }
1080
+ // §A.2 / Finding 5 — generic + token NOT near expiry: keep the WS in
1081
+ // transport backoff with the current token. Do NOT refresh and do NOT tear
1082
+ // the account down (the old teardown wrongly flipped configured:false).
1083
+ scheduleTransportBackoffReconnect("hello-fail-generic-not-near");
1084
+ };
630
1085
  setAlignedOutboundLogContext(client, wsLogContext);
631
1086
  client.on("hello:ok", (env) => {
632
1087
  const payload = env.payload && typeof env.payload === "object"
@@ -641,6 +1096,11 @@ export async function startOpenclawClawlingGateway(params) {
641
1096
  send: () => { },
642
1097
  context: wsLogContext,
643
1098
  });
1099
+ const notifySignalObserver = createNotifySignalObserver({
1100
+ accountId,
1101
+ log: (msg) => log?.info?.(msg),
1102
+ context: wsLogContext,
1103
+ });
644
1104
  const logAuthFailure = (reason) => {
645
1105
  if (authFailureLogged)
646
1106
  return;
@@ -663,6 +1123,8 @@ export async function startOpenclawClawlingGateway(params) {
663
1123
  client.on("state", ({ from, to }) => {
664
1124
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw state ${from} -> ${to}`);
665
1125
  wsReady = to === "connected";
1126
+ if (to === "connected")
1127
+ wsReadyEverThisAttempt = true;
666
1128
  if (to === "connecting") {
667
1129
  reconnectTracker.connectStart();
668
1130
  currentAttemptStartedAt = Date.now();
@@ -731,9 +1193,17 @@ export async function startOpenclawClawlingGateway(params) {
731
1193
  }
732
1194
  void refreshConversationCacheAfterReady();
733
1195
  void dispatchActivationBootstrap();
1196
+ // §A.1 — arm the proactive refresh timer from the live token's `exp`
1197
+ // every time a connection becomes ready (re-armed after every refresh via
1198
+ // the gateway re-enter).
1199
+ refreshManager.armProactiveTimer(activatedAtMs);
734
1200
  }
735
1201
  else if (to === "disconnected") {
736
1202
  reconnectTracker.markClosed();
1203
+ // §A.1 — clear the proactive timer on disconnect; it re-arms on the next
1204
+ // ready, or the gateway re-enter arms a fresh one.
1205
+ if (!reconnectWithRefreshedToken)
1206
+ refreshManager.disarmProactiveTimer();
737
1207
  }
738
1208
  const next = { ...getStatus(), ...mapClawlingStateToStatus(to) };
739
1209
  setStatus(next);
@@ -863,21 +1333,48 @@ export async function startOpenclawClawlingGateway(params) {
863
1333
  client.on("metadata:invalidated", (env) => {
864
1334
  void handleMetadataInvalidation(env);
865
1335
  });
1336
+ client.on("notify:signal", (env) => {
1337
+ // §9.4 reliable system notification. The plugin holds no friend/roster
1338
+ // cache (friends are fetched on demand via REST tools), so there is nothing
1339
+ // to invalidate — observe + dedup only. The live frame and its reliable
1340
+ // inbox replay carry the same event_id and collapse to one observation.
1341
+ notifySignalObserver.observe(env);
1342
+ });
1343
+ client.on("replay:done", (env) => {
1344
+ // §11.5 terminal control frame: device replay drained, live delivery begins.
1345
+ // Fires on every reconnect (even zero-backlog). Replayed messages are
1346
+ // processed inline, so this is a logged boundary marker, not a gate.
1347
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw replay.done trace=${env.trace_id}`);
1348
+ });
866
1349
  client.on("error", (err) => {
867
1350
  const classified = classifyClawlingClientError(err);
868
1351
  if (classified.kind === "auth") {
869
- finishCurrentConnection({
870
- state: "auth_failed",
871
- error: lastHelloFailReason || classified.message,
872
- });
873
- logAuthFailure(classified.message);
874
- setStatus({
875
- ...getStatus(),
876
- connected: false,
877
- configured: false,
878
- running: false,
879
- lastError: classified.message,
880
- });
1352
+ // §A.2 — a WS hello-fail(auth) on a LIVE (already-connected) session.
1353
+ // Attempt a gated reactive refresh before tearing the account down. The
1354
+ // INITIAL-connect auth failure is owned by the `client.connect()` catch
1355
+ // below (which runs the refresh/backoff/teardown decision and the recursive
1356
+ // re-enter), so only react here once the session was previously ready —
1357
+ // otherwise we'd double-handle and the error handler's teardown would race
1358
+ // the catch's transient-backoff branch (wrongly flipping configured:false).
1359
+ if (!reconnectWithRefreshedToken && !autoLoggedOut && wsReadyEverThisAttempt) {
1360
+ void handleWsAuthFailure(lastHelloFailReason || classified.message);
1361
+ return;
1362
+ }
1363
+ // Not-ready (initial connect): the `client.connect()` catch below owns the
1364
+ // refresh/backoff/teardown DECISION and the status flip. Record the
1365
+ // connection as auth_failed here (bookkeeping — the ws-client's own 4001
1366
+ // close would otherwise finish it as a plain "disconnected") and log the
1367
+ // auth failure, but do NOT flip status here: a transient refresh / generic
1368
+ // backoff must leave configured untouched, and that decision lives in the
1369
+ // catch.
1370
+ if (!reconnectWithRefreshedToken && !autoLoggedOut) {
1371
+ finishCurrentConnection({
1372
+ state: "auth_failed",
1373
+ error: lastHelloFailReason || classified.message,
1374
+ });
1375
+ logAuthFailure(classified.message);
1376
+ }
1377
+ return;
881
1378
  }
882
1379
  else if (classified.kind === "transport") {
883
1380
  finishCurrentConnection({ state: "transport_error", error: classified.message });
@@ -1385,14 +1882,44 @@ export async function startOpenclawClawlingGateway(params) {
1385
1882
  }
1386
1883
  catch (err) {
1387
1884
  const classified = classifyClawlingClientError(err);
1388
- setStatus({
1389
- ...getStatus(),
1390
- connected: false,
1391
- configured: classified.kind !== "auth",
1392
- running: false,
1393
- lastError: classified.message,
1394
- });
1395
1885
  if (classified.kind === "auth") {
1886
+ // §A.2/§B — initial-connect hello-fail(auth). Do NOT pre-flip
1887
+ // configured:false here: a transient refresh must leave creds + configured
1888
+ // untouched (§B). Branch on the refresh-eligibility classification first.
1889
+ const klass = classifyHelloFailReason(lastHelloFailReason || classified.message);
1890
+ const eligible = !reconnectWithRefreshedToken &&
1891
+ !autoLoggedOut &&
1892
+ !abortSignal.aborted &&
1893
+ Boolean(latestRefreshToken) &&
1894
+ (klass === "token-rejected" ||
1895
+ (klass === "generic" && refreshManager.isNearExpiry(activatedAtMs)));
1896
+ if (eligible) {
1897
+ // Total: success reconnects, permanent auto-logs-out, transient/skipped
1898
+ // backoff-reconnects with the current token (creds + configured intact).
1899
+ await runRefreshAndReconnect("ws-initial-connect-auth");
1900
+ return;
1901
+ }
1902
+ // §A.2 / Finding 5 — generic + token NOT near expiry (and we have a refresh
1903
+ // token but it isn't refresh-eligible): keep the WS in transport backoff
1904
+ // with the current token instead of tearing the account down.
1905
+ if (klass === "generic" &&
1906
+ Boolean(latestRefreshToken) &&
1907
+ !reconnectWithRefreshedToken &&
1908
+ !autoLoggedOut &&
1909
+ !abortSignal.aborted) {
1910
+ scheduleTransportBackoffReconnect("initial-connect-generic-not-near");
1911
+ return;
1912
+ }
1913
+ // Not refresh-eligible and no usable refresh token — fall back to the
1914
+ // legacy auth-failed teardown so the gateway flips not-configured and (for
1915
+ // a sqlite-sourced account) re-enters wait-for-activation.
1916
+ setStatus({
1917
+ ...getStatus(),
1918
+ connected: false,
1919
+ configured: false,
1920
+ running: false,
1921
+ lastError: classified.message,
1922
+ });
1396
1923
  finishCurrentConnection({
1397
1924
  state: "auth_failed",
1398
1925
  error: lastHelloFailReason || classified.message,
@@ -1413,6 +1940,13 @@ export async function startOpenclawClawlingGateway(params) {
1413
1940
  }
1414
1941
  return;
1415
1942
  }
1943
+ setStatus({
1944
+ ...getStatus(),
1945
+ connected: false,
1946
+ configured: true,
1947
+ running: false,
1948
+ lastError: classified.message,
1949
+ });
1416
1950
  log?.error?.(`[${accountId}] clawchat-plugin-openclaw connect failed (${classified.kind}): ${classified.message}`);
1417
1951
  return;
1418
1952
  }
@@ -1429,6 +1963,8 @@ export async function startOpenclawClawlingGateway(params) {
1429
1963
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw runtime abort received; closing client`);
1430
1964
  activeClients.delete(accountId);
1431
1965
  closingForAbort = true;
1966
+ // §A.1 — stop the proactive refresh timer on shutdown.
1967
+ refreshManager.stop();
1432
1968
  groupCoalescer.cancelAll();
1433
1969
  finishCurrentConnection({
1434
1970
  state: "disconnected",