@clawling/clawchat-plugin-openclaw 2026.5.12-39 → 2026.5.13-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,11 @@ import { AckTimeoutError, AuthError, ProtocolError, StateError, TransportError,
2
2
  import { waitUntilAbort } from "openclaw/plugin-sdk/channel-lifecycle";
3
3
  import { hasControlCommand } from "openclaw/plugin-sdk/command-detection";
4
4
  import { createPluginRuntimeStore } from "openclaw/plugin-sdk/runtime-store";
5
- import { createOpenclawClawlingClient } from "./client.js";
5
+ import { createOpenclawClawlingClient, resolveOpenclawClawlingDeviceId } from "./client.js";
6
6
  import { createOpenclawClawlingApiClient } from "./api-client.js";
7
+ import { reportPluginVersionSafe, resolvePluginVersion } from "./plugin-report.js";
7
8
  import { ClawlingApiError } from "./api-types.js";
9
+ import { RefreshManager } from "./refresh-manager.js";
8
10
  import { CHANNEL_ID, effectiveOutputVisibility, effectiveGroupCommandMode, hasOpenclawClawlingConnectCredentials, } from "./config.js";
9
11
  import { dispatchOpenclawClawlingInbound } from "./inbound.js";
10
12
  import { fetchInboundMedia } from "./media-runtime.js";
@@ -12,7 +14,7 @@ import { createOpenclawClawlingReplyDispatcher } from "./reply-dispatcher.js";
12
14
  import { runWithTerminalClawChatSendScope } from "./terminal-send.js";
13
15
  import { flushAlignedOutboundQueue, getAlignedOutboundQueueSize, sendOpenclawClawlingText, setAlignedOutboundLogContext, } from "./outbound.js";
14
16
  import { formatWsLog } from "./ws-log.js";
15
- import { createProtocolControlHandler, createReconnectTracker } from "./ws-alignment.js";
17
+ import { createNotifySignalObserver, createProtocolControlHandler, createReconnectTracker } from "./ws-alignment.js";
16
18
  import { clawChatDbPathForStateDir, getClawChatStore, } from "./storage.js";
17
19
  import { getClawChatGroupPrompt, getClawChatUserPrompt } from "./plugin-prompts.js";
18
20
  import { loadClawChatPromptMetadata, renderClawChatProfilePrompt, resolveSenderRelation, } from "./profile-prompt.js";
@@ -41,9 +43,38 @@ const OPENCLAW_CONFIRM_SLASH_COMMANDS = new Set([
41
43
  "nevermind",
42
44
  ]);
43
45
  const GROUP_OWNER_ATTENTION_TITLE = "requires owner attention";
46
+ // §C.1 — user-visible message emitted on permanent token expiry. Kept
47
+ // byte-identical to the Hermes plugin (parity spec §C.1.4).
48
+ const CLAWCHAT_TOKEN_EXPIRED_MESSAGE = "ClawChat token expired and could not be refreshed. Re-pair with `/clawchat-activate <code>`.";
49
+ const CLAWCHAT_TOKEN_EXPIRED_LAST_ERROR = "token expired — re-pair required";
44
50
  function isRecord(value) {
45
51
  return Boolean(value && typeof value === "object" && !Array.isArray(value));
46
52
  }
53
+ /**
54
+ * §A.2 — classify a WS `hello-fail` reason for refresh gating.
55
+ * - "token-rejected": reason names an authentication failure → refresh.
56
+ * - "auth-unavailable": 5xx auth-backend outage → backoff, DO NOT refresh.
57
+ * - "generic": unattributed → refresh only if the token is at/near expiry.
58
+ *
59
+ * `auth service unavailable` is already split off by the ws-client into a
60
+ * TransportError (backoff), but we classify defensively here too.
61
+ */
62
+ export function classifyHelloFailReason(reason) {
63
+ const r = (reason || "").toLowerCase();
64
+ if (/auth service unavailable|temporarily unavailable/.test(r))
65
+ return "auth-unavailable";
66
+ if (/authentication failed|invalid token|token expired|unauthorized|auth failed|invalid credentials/.test(r)) {
67
+ return "token-rejected";
68
+ }
69
+ return "generic";
70
+ }
71
+ /** Read `channels.<CHANNEL_ID>.refreshToken` from a live config, or null. */
72
+ function readConfigRefreshToken(cfg) {
73
+ const channels = cfg.channels;
74
+ const channel = isRecord(channels) ? channels[CHANNEL_ID] : undefined;
75
+ const refreshToken = isRecord(channel) ? channel.refreshToken : undefined;
76
+ return typeof refreshToken === "string" && refreshToken.trim() ? refreshToken.trim() : null;
77
+ }
47
78
  function withFullVerboseDispatchConfig(cfg, agentId) {
48
79
  const cfgRecord = cfg;
49
80
  const agents = isRecord(cfgRecord.agents) ? cfgRecord.agents : {};
@@ -214,7 +245,12 @@ function metadataScopesFromEnvelope(env) {
214
245
  return Array.isArray(scope) ? scope.filter((item) => typeof item === "string") : [];
215
246
  }
216
247
  function shouldRefreshBehaviorForScopes(scopes) {
217
- return scopes.includes("behavior");
248
+ // §9.3: empty/absent scope ⇒ "refetch everything"; unknown scope strings must
249
+ // also trigger a refresh. Only the known non-behavior scopes (title,
250
+ // description) leave agent behavior untouched.
251
+ if (scopes.length === 0)
252
+ return true;
253
+ return scopes.some((scope) => scope !== "title" && scope !== "description");
218
254
  }
219
255
  function shouldRefreshConversationForScopes(scopes) {
220
256
  if (scopes.length === 0)
@@ -272,6 +308,17 @@ function buildActivationBootstrapEnvelope(params) {
272
308
  },
273
309
  };
274
310
  }
311
+ /**
312
+ * §A.3/§A.4 — max consecutive refresh-driven reconnects within
313
+ * `REFRESH_RECONNECT_WINDOW_MS` before we abandon the refresh loop and fall back
314
+ * to plain transport backoff (or auto-logout on a permanent reject). Bounds a
315
+ * server that keeps rotating-then-rejecting fresh tokens.
316
+ */
317
+ export const MAX_REFRESH_RECONNECTS = 3;
318
+ export const REFRESH_RECONNECT_WINDOW_MS = 5 * 60_000;
319
+ /** §B — default transport-backoff delay before a reactive re-enter (capped 30s). */
320
+ export const TRANSPORT_BACKOFF_BASE_MS = 1_000;
321
+ export const TRANSPORT_BACKOFF_MAX_MS = 30_000;
275
322
  function resolveConnectionStore(params, runtime) {
276
323
  if (params.store !== undefined)
277
324
  return params.store;
@@ -379,6 +426,19 @@ export async function startOpenclawClawlingGateway(params) {
379
426
  const accountId = account.accountId;
380
427
  const store = resolveConnectionStore(params, runtime);
381
428
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw runtime start entered configured=${account.configured} enabled=${account.enabled} hasToken=${Boolean(account.token)} hasUserId=${Boolean(account.userId)} hasOwnerUserId=${Boolean(account.ownerUserId)} websocketUrl=${account.websocketUrl || "(empty)"}`);
429
+ // Freeze one report device_id for this process; reused for the paired report
430
+ // so the backend links both to the same row. Imported from ./client.ts.
431
+ const reportDeviceId = resolveOpenclawClawlingDeviceId(account);
432
+ const pluginVersion = resolvePluginVersion();
433
+ void reportPluginVersionSafe({
434
+ baseUrl: account.baseUrl,
435
+ mediaBaseUrl: account.mediaBaseUrl,
436
+ token: "",
437
+ deviceId: reportDeviceId,
438
+ pluginVersion,
439
+ authenticated: false,
440
+ log,
441
+ });
382
442
  const activationAccount = await waitForActivationCredentials({
383
443
  account,
384
444
  abortSignal,
@@ -392,15 +452,92 @@ export async function startOpenclawClawlingGateway(params) {
392
452
  if (!activationAccount)
393
453
  return;
394
454
  account = activationAccount.account;
455
+ // Paired: link the report row via the authenticated endpoint, reusing the
456
+ // SAME frozen device_id so the backend upserts the existing unpaired row.
457
+ void reportPluginVersionSafe({
458
+ baseUrl: account.baseUrl,
459
+ mediaBaseUrl: account.mediaBaseUrl,
460
+ token: account.token,
461
+ deviceId: reportDeviceId,
462
+ pluginVersion,
463
+ authenticated: true,
464
+ log,
465
+ });
466
+ // §A.0 — fallback expiry source. Prefer the SQLite `activated_at`; null when
467
+ // the credentials came from config (no activation row yet) — in that case the
468
+ // refresh manager relies on the JWT `exp` alone.
469
+ let activatedAtMs = activationAccount.source === "sqlite" && store?.getActivationCredentials
470
+ ? store.getActivationCredentials({ platform: "openclaw", accountId })?.activatedAt ?? null
471
+ : null;
395
472
  let conversationApiClient;
473
+ const buildConversationApiClient = () => createOpenclawClawlingApiClient({
474
+ baseUrl: account.baseUrl,
475
+ mediaBaseUrl: account.mediaBaseUrl,
476
+ token: account.token,
477
+ userId: account.userId,
478
+ });
479
+ // §A.2.1 — forward reference to the single-flight REST refresh wrapper; set
480
+ // once the refresh manager exists. Until then, calls run un-wrapped.
481
+ let restWithRefresh = null;
482
+ // Returns a proxy whose every method call runs through `restWithRefresh`, so a
483
+ // 401/403 transparently triggers one single-flight refresh + retry. The proxy
484
+ // reads the cached client lazily on each call so a post-refresh rebuild is
485
+ // picked up automatically.
396
486
  const getConversationApiClient = () => {
397
- conversationApiClient ??= createOpenclawClawlingApiClient({
398
- baseUrl: account.baseUrl,
399
- mediaBaseUrl: account.mediaBaseUrl,
400
- token: account.token,
401
- userId: account.userId,
487
+ return new Proxy({}, {
488
+ get: (_target, prop) => {
489
+ return (...args) => {
490
+ const invoke = () => {
491
+ conversationApiClient ??= buildConversationApiClient();
492
+ const fn = conversationApiClient[prop];
493
+ if (typeof fn !== "function") {
494
+ throw new TypeError(`clawchat api-client has no method ${String(prop)}`);
495
+ }
496
+ return fn.apply(conversationApiClient, args);
497
+ };
498
+ return restWithRefresh
499
+ ? restWithRefresh(() => Promise.resolve(invoke()))
500
+ : invoke();
501
+ };
502
+ },
503
+ });
504
+ };
505
+ // Rebuilt after every in-memory token swap so REST calls use the fresh token.
506
+ const invalidateConversationApiClient = () => {
507
+ conversationApiClient = undefined;
508
+ };
509
+ const resolveMutateConfigFile = () => {
510
+ if (params.mutateConfigFile)
511
+ return params.mutateConfigFile;
512
+ const runtimeConfig = runtime.config;
513
+ return typeof runtimeConfig?.mutateConfigFile === "function"
514
+ ? runtimeConfig.mutateConfigFile
515
+ : undefined;
516
+ };
517
+ // §0/§C.1 — write the channel-config `token`/`refreshToken` keys. `tokens=null`
518
+ // blanks them (auto-logout); otherwise persists the rotated pair.
519
+ const persistConfigTokens = async (tokens) => {
520
+ const mutateConfigFile = resolveMutateConfigFile();
521
+ if (!mutateConfigFile) {
522
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw config persistence unavailable; cannot ${tokens ? "rotate" : "clear"} tokens in config`);
523
+ return;
524
+ }
525
+ await mutateConfigFile({
526
+ afterWrite: { mode: "none", reason: "clawchat-plugin-openclaw token refresh" },
527
+ mutate(draft) {
528
+ const channels = (draft.channels ?? {});
529
+ const existing = (channels[CHANNEL_ID] ?? {});
530
+ const nextSection = {
531
+ ...existing,
532
+ token: tokens ? tokens.accessToken : "",
533
+ refreshToken: tokens ? tokens.refreshToken : "",
534
+ };
535
+ Object.assign(draft, {
536
+ ...draft,
537
+ channels: { ...channels, [CHANNEL_ID]: nextSection },
538
+ });
539
+ },
402
540
  });
403
- return conversationApiClient;
404
541
  };
405
542
  let lastHelloFailTraceId = "-";
406
543
  let lastHelloFailReason = "";
@@ -411,6 +548,129 @@ export async function startOpenclawClawlingGateway(params) {
411
548
  let authFailureLogged = false;
412
549
  let closingForAbort = false;
413
550
  let wsReady = false;
551
+ // True once this gateway attempt reached "connected" at least once — used to
552
+ // route a later auth-fail through the live-session reactive refresh path
553
+ // rather than the initial-connect catch.
554
+ let wsReadyEverThisAttempt = false;
555
+ // §D — set when a refresh succeeded and we are closing the live WS to
556
+ // reconnect with the new token; suppresses the auth-failed teardown path and
557
+ // drives a clean re-enter into the gateway with the rotated account.
558
+ let reconnectWithRefreshedToken = false;
559
+ // §C — set once auto-logout has fired so we don't double-emit or reconnect.
560
+ let autoLoggedOut = false;
561
+ // §E — connect-time device id for `X-Device-Id` on refresh. Prefer the value
562
+ // recorded in SQLite at connect; backfill legacy rows (no column) to the
563
+ // deterministic constant `CHANNEL_ID` actually sent by `authHeaders`.
564
+ const refreshDeviceId = (activationAccount.source === "sqlite" && store?.getActivationCredentials
565
+ ? store.getActivationCredentials({ platform: "openclaw", accountId })?.deviceId
566
+ : null) || CHANNEL_ID;
567
+ // §C — auto-logout on permanent refresh failure. Blank creds in BOTH stores
568
+ // (KEEP identity), flip not-configured via the auth-failure status path, and
569
+ // emit the user-visible message. Idempotent.
570
+ const performAutoLogout = async (info) => {
571
+ if (autoLoggedOut)
572
+ return;
573
+ autoLoggedOut = true;
574
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw auto-logout (token permanently expired) code=${info.code}: ${info.message}`);
575
+ // SQLite: blank access/refresh, keep user/owner/device for re-pair.
576
+ if (store?.clearActivationCredentials) {
577
+ recordConnection("clear activation credentials", () => store.clearActivationCredentials?.({ platform: "openclaw", accountId }));
578
+ }
579
+ // Config: blank token/refreshToken keys.
580
+ try {
581
+ await persistConfigTokens(null);
582
+ }
583
+ catch (err) {
584
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw failed to clear config credentials on auto-logout: ${err instanceof Error ? err.message : String(err)}`);
585
+ }
586
+ // Flip not-configured (existing auth-failure status path) with the re-pair
587
+ // hint as `lastError`.
588
+ setStatus({
589
+ ...getStatus(),
590
+ connected: false,
591
+ configured: false,
592
+ running: false,
593
+ lastError: CLAWCHAT_TOKEN_EXPIRED_LAST_ERROR,
594
+ });
595
+ // User-visible notification (in addition to logs). Best-effort; never throws.
596
+ emitUserVisibleAuthLogout();
597
+ };
598
+ // §C.1.4 — surface the permanent-expiry message to the user/operator. The
599
+ // plugin has no guaranteed live chat target after creds are cleared, so we
600
+ // route through the runtime notification surface when present and always log.
601
+ const emitUserVisibleAuthLogout = () => {
602
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw ${CLAWCHAT_TOKEN_EXPIRED_MESSAGE}`);
603
+ try {
604
+ const notify = runtime.notifications?.notify;
605
+ if (typeof notify === "function") {
606
+ notify({ level: "error", message: CLAWCHAT_TOKEN_EXPIRED_MESSAGE });
607
+ }
608
+ }
609
+ catch {
610
+ // Best effort only.
611
+ }
612
+ };
613
+ // The refresh token is not part of the resolved account; source it from
614
+ // SQLite first (authoritative after a rotation) then the config channel
615
+ // section. Kept in a mutable cell so a swap updates it in place.
616
+ let latestRefreshToken = (activationAccount.source === "sqlite" && store?.getActivationCredentials
617
+ ? store.getActivationCredentials({ platform: "openclaw", accountId })?.refreshToken
618
+ : null) ?? readConfigRefreshToken(cfg);
619
+ const refreshManager = new RefreshManager({
620
+ baseUrl: account.baseUrl,
621
+ deviceId: refreshDeviceId,
622
+ getAccessToken: () => account.token,
623
+ getRefreshToken: () => latestRefreshToken,
624
+ persistRotatedTokens: async (tokens) => {
625
+ // §0 — persist to BOTH stores BEFORE the in-memory swap. A failure in
626
+ // EITHER store must REJECT so the manager skips the in-memory swap and
627
+ // treats the refresh as transient (keep the current tokens, back off). Do
628
+ // NOT swallow the SQLite write error: `rotateActivationTokens` returns
629
+ // `null` when its internal `write()` caught an exception (a real write
630
+ // failure), `false` only when no activation row exists yet (config-sourced
631
+ // agent — legitimately nothing to update). A swallowed write failure must
632
+ // not leave the SQLite row holding the now-dead refresh token while the
633
+ // in-memory token is rotated, which would brick a sqlite-sourced agent on
634
+ // restart.
635
+ if (store?.rotateActivationTokens) {
636
+ const rotateResult = store.rotateActivationTokens({
637
+ platform: "openclaw",
638
+ accountId,
639
+ accessToken: tokens.accessToken,
640
+ refreshToken: tokens.refreshToken,
641
+ });
642
+ if (rotateResult === null) {
643
+ throw new Error("clawchat-plugin-openclaw sqlite rotate activation tokens failed");
644
+ }
645
+ }
646
+ // A config write failure rejects out of `mutateConfigFile` and propagates
647
+ // here, which is what we want — persistence incomplete ⇒ no swap.
648
+ await persistConfigTokens(tokens);
649
+ },
650
+ swapInMemoryTokens: (tokens) => {
651
+ account = { ...account, token: tokens.accessToken, configured: true };
652
+ latestRefreshToken = tokens.refreshToken;
653
+ activatedAtMs = Date.now();
654
+ invalidateConversationApiClient();
655
+ },
656
+ onPermanentFailure: performAutoLogout,
657
+ // §A.1/§D — proactive-timer success closes the live WS and re-enters with the
658
+ // rotated token. The running ws-client captured the OLD token at `connect`
659
+ // time, so the in-memory swap alone never reaches a `connect` envelope.
660
+ onProactiveRefreshed: async () => {
661
+ await runRefreshReconnect("proactive-timer");
662
+ },
663
+ ...(params.refreshFetchImpl ? { fetchImpl: params.refreshFetchImpl } : {}),
664
+ ...(params.refreshSetTimer ? { setTimer: params.refreshSetTimer } : {}),
665
+ ...(params.refreshClearTimer ? { clearTimer: params.refreshClearTimer } : {}),
666
+ ...(params.refreshJitter ? { jitter: params.refreshJitter } : {}),
667
+ log,
668
+ });
669
+ // §A.3/§A.4 — carry the single-flight latch + min-interval across re-enters so
670
+ // the guards bound a rotate-then-reject loop instead of resetting each time.
671
+ if (params.refreshManagerState) {
672
+ refreshManager.restoreState(params.refreshManagerState);
673
+ }
414
674
  let currentConnectionId = null;
415
675
  let currentConnectionFinished = false;
416
676
  const reconnectTracker = createReconnectTracker({
@@ -530,7 +790,10 @@ export async function startOpenclawClawlingGateway(params) {
530
790
  const memoryRoot = resolveMemoryRootForPeer(peer);
531
791
  if (!memoryRoot)
532
792
  return;
533
- if (refreshBehavior) {
793
+ // §9.3: agent behavior is per-agent metadata that lives only on the agent's
794
+ // DIRECT conversation — never refetch it for a group invalidation, even on a
795
+ // "refetch everything" (empty/unknown) scope.
796
+ if (refreshBehavior && peer.kind === "direct") {
534
797
  await refreshAgentBehavior({
535
798
  source: "metadata_invalidation",
536
799
  ...(version !== undefined ? { metadataVersion: version } : {}),
@@ -600,8 +863,36 @@ export async function startOpenclawClawlingGateway(params) {
600
863
  log: { error: (message) => log?.error?.(`[${accountId}] ${message}`) },
601
864
  });
602
865
  };
866
+ // §A.4 — startup refresh-if-near-expiry, BEFORE the first WS connect. Recovers
867
+ // a long-stopped pod with no manual re-pair. On a permanent refresh failure
868
+ // auto-logout immediately and skip the doomed connect.
869
+ if (!abortSignal.aborted &&
870
+ latestRefreshToken &&
871
+ refreshManager.isNearExpiry(activatedAtMs)) {
872
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw access token near expiry at startup; refreshing before connect`);
873
+ const startupOutcome = await refreshManager.refresh("startup-near-expiry");
874
+ if (abortSignal.aborted)
875
+ return;
876
+ if (startupOutcome.kind === "permanent") {
877
+ // Auto-logout already performed by the manager's onPermanentFailure.
878
+ return;
879
+ }
880
+ // success swaps the in-memory token in place; transient/skipped just connect
881
+ // with the current token (the WS handshake will then drive reactive refresh).
882
+ }
883
+ // Reuse the device id the server resolved on a previous connection so a pod
884
+ // restart (fresh hostname → fresh hostname-derived id) does not present a
885
+ // brand-new device, which would force a full inbox replay and orphan the
886
+ // prior device's cursor. Persisted from `hello-ok` via markConnectionReady.
887
+ const persistedDeviceId = store?.getLastResolvedDeviceId
888
+ ? store.getLastResolvedDeviceId({ platform: "openclaw", accountId })
889
+ : null;
890
+ if (persistedDeviceId) {
891
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw reusing persisted resolved_device_id`);
892
+ }
603
893
  const client = createOpenclawClawlingClient(account, {
604
894
  ...(params.transport ? { transport: params.transport } : {}),
895
+ ...(persistedDeviceId ? { deviceIdOverride: persistedDeviceId } : {}),
605
896
  wsLifecycle: {
606
897
  onConnectFrameSent: (env) => {
607
898
  lastConnectTraceId = typeof env.trace_id === "string" ? env.trace_id : "-";
@@ -627,6 +918,195 @@ export async function startOpenclawClawlingGateway(params) {
627
918
  },
628
919
  });
629
920
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw runtime client created`);
921
+ // §A.3/§A.4 — recompute the refresh-driven reconnect streak (depth + window).
922
+ // Returns the next depth/window to thread into the re-enter, and whether the
923
+ // cap is exceeded so the caller must NOT re-enter via refresh again.
924
+ const nextRefreshReconnectStreak = () => {
925
+ const now = Date.now();
926
+ const priorWindowStart = params.refreshReconnectWindowStartedAt ?? 0;
927
+ const withinWindow = priorWindowStart !== 0 && now - priorWindowStart < REFRESH_RECONNECT_WINDOW_MS;
928
+ const windowStartedAt = withinWindow ? priorWindowStart : now;
929
+ const depth = (withinWindow ? params.refreshReconnectDepth ?? 0 : 0) + 1;
930
+ return { depth, windowStartedAt, capped: depth > MAX_REFRESH_RECONNECTS };
931
+ };
932
+ // §B/§D — re-enter the gateway after a plain transport-backoff delay, with the
933
+ // CURRENT (unchanged) token and creds left untouched. Used when a reactive
934
+ // refresh is transient/skipped (§B: a transient refresh failure NEVER
935
+ // auto-logs-out and NEVER stops — keep retrying with the current token) and
936
+ // when the refresh-reconnect loop is capped (§A.4). Carries the refresh
937
+ // manager's latch + min-interval state so the guards keep bounding the loop.
938
+ const scheduleTransportBackoffReconnect = (reason) => {
939
+ if (abortSignal.aborted || autoLoggedOut || reconnectWithRefreshedToken)
940
+ return;
941
+ reconnectWithRefreshedToken = true; // suppress the auth-failed teardown path.
942
+ refreshManager.stop();
943
+ activeClients.delete(accountId);
944
+ finishCurrentConnection({
945
+ state: "disconnected",
946
+ closeCode: 1000,
947
+ closeReason: "transport backoff reconnect",
948
+ });
949
+ try {
950
+ client.close();
951
+ }
952
+ catch {
953
+ // best effort
954
+ }
955
+ const attempt = (params.refreshReconnectDepth ?? 0) + 1;
956
+ const delayMs = params.transportBackoffDelayMs ??
957
+ Math.min(TRANSPORT_BACKOFF_MAX_MS, TRANSPORT_BACKOFF_BASE_MS * 2 ** Math.max(0, attempt - 1));
958
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw reactive refresh ${reason}; backoff-reconnect with current token delayMs=${delayMs}`);
959
+ const managerState = refreshManager.exportState();
960
+ const streak = nextRefreshReconnectStreak();
961
+ const reEnter = () => {
962
+ if (abortSignal.aborted)
963
+ return;
964
+ void startOpenclawClawlingGateway({
965
+ ...params,
966
+ account: { ...params.account },
967
+ transportBackoffReconnect: true,
968
+ refreshReconnectDepth: streak.depth,
969
+ refreshReconnectWindowStartedAt: streak.windowStartedAt,
970
+ refreshManagerState: managerState,
971
+ });
972
+ };
973
+ const timer = params.backoffTimer ?? ((cb, ms) => void setTimeout(cb, ms));
974
+ timer(reEnter, delayMs);
975
+ };
976
+ // §A/§D — close the live WS and re-enter the gateway with the rotated token
977
+ // (a token only enters via a fresh `connect` envelope; it cannot be hot-swapped
978
+ // onto a live socket). Assumes a refresh ALREADY succeeded and swapped the
979
+ // in-memory token (proactive path), or is called by `runRefreshAndReconnect`
980
+ // after its own successful refresh (reactive path). Carries the refresh
981
+ // manager's latch + min-interval + reconnect-streak state across the re-enter.
982
+ const closeAndReconnectWithRefreshedToken = async (reason) => {
983
+ if (abortSignal.aborted || autoLoggedOut || reconnectWithRefreshedToken)
984
+ return;
985
+ reconnectWithRefreshedToken = true;
986
+ const managerState = refreshManager.exportState();
987
+ refreshManager.stop();
988
+ activeClients.delete(accountId);
989
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw token refreshed (${reason}); closing WS to reconnect with new token`);
990
+ finishCurrentConnection({
991
+ state: "disconnected",
992
+ closeCode: 1000,
993
+ closeReason: "token refresh",
994
+ });
995
+ try {
996
+ client.close();
997
+ }
998
+ catch {
999
+ // best effort
1000
+ }
1001
+ if (abortSignal.aborted)
1002
+ return;
1003
+ const streak = nextRefreshReconnectStreak();
1004
+ // Re-enter with the rotated in-memory account; SQLite/config already hold
1005
+ // the rotated pair (persisted before the swap). Reuse the same device id.
1006
+ await startOpenclawClawlingGateway({
1007
+ ...params,
1008
+ account: {
1009
+ ...params.account,
1010
+ configured: true,
1011
+ token: account.token,
1012
+ userId: account.userId,
1013
+ ownerUserId: account.ownerUserId,
1014
+ },
1015
+ refreshReconnectDepth: streak.depth,
1016
+ refreshReconnectWindowStartedAt: streak.windowStartedAt,
1017
+ refreshManagerState: managerState,
1018
+ });
1019
+ };
1020
+ // Alias used by the proactive port (refresh already succeeded + swapped).
1021
+ const runRefreshReconnect = closeAndReconnectWithRefreshedToken;
1022
+ // §A/§B/§D — run a single-flight refresh and act on the outcome:
1023
+ // - success → close the live WS + re-enter with the rotated token (§D).
1024
+ // - permanent→ the manager already auto-logged-out (§C); nothing more here.
1025
+ // - transient/skipped → §B: NEVER teardown. Backoff-reconnect with the CURRENT
1026
+ // token, creds + configured untouched, and keep retrying.
1027
+ // Returns "handled" when it took ownership of the next connection lifecycle
1028
+ // (reconnect scheduled / auto-logout), "fallthrough" when the caller should run
1029
+ // its own path (only when aborted mid-flight).
1030
+ const runRefreshAndReconnect = async (reason) => {
1031
+ if (abortSignal.aborted || autoLoggedOut || reconnectWithRefreshedToken)
1032
+ return "handled";
1033
+ // §A.4 — if the refresh-driven reconnect loop is already capped, do not run
1034
+ // another refresh; fall back to plain transport backoff with the current
1035
+ // token so a rotate-then-reject server cannot loop forever with no backoff.
1036
+ if ((params.refreshReconnectDepth ?? 0) >= MAX_REFRESH_RECONNECTS) {
1037
+ log?.error?.(`[${accountId}] clawchat-plugin-openclaw refresh-reconnect loop capped (depth=${params.refreshReconnectDepth}); backoff-reconnect with current token`);
1038
+ scheduleTransportBackoffReconnect("refresh-reconnect-capped");
1039
+ return "handled";
1040
+ }
1041
+ const outcome = await refreshManager.refresh(reason);
1042
+ if (abortSignal.aborted)
1043
+ return "fallthrough";
1044
+ if (autoLoggedOut)
1045
+ return "handled"; // permanent → manager auto-logged-out.
1046
+ if (outcome.kind === "success") {
1047
+ await closeAndReconnectWithRefreshedToken(reason);
1048
+ return "handled";
1049
+ }
1050
+ // §B — transient / skipped (in-flight / min-interval / rejected-latch /
1051
+ // no-refresh-token): keep the WS in backoff with the CURRENT token; do NOT
1052
+ // teardown. (no-refresh-token has no path to recover, but tearing down is
1053
+ // wrong per §B; backoff keeps the supervisor alive without a refresh storm.)
1054
+ scheduleTransportBackoffReconnect(`refresh-${outcome.kind}`);
1055
+ return "handled";
1056
+ };
1057
+ // §A.2.1 — run an authenticated REST call; on a 401/403 (`ClawlingApiError`
1058
+ // kind "auth") run the single-flight refresh and retry the call ONCE with a
1059
+ // rebuilt api-client. Any other error propagates. Used to wrap the REST
1060
+ // api-client so metadata/profile calls survive an expired access token
1061
+ // without waiting for the WS handshake.
1062
+ const isRestAuthError = (err) => err instanceof ClawlingApiError && err.kind === "auth";
1063
+ const withRefresh = async (call) => {
1064
+ try {
1065
+ return await call();
1066
+ }
1067
+ catch (err) {
1068
+ if (!isRestAuthError(err) || abortSignal.aborted)
1069
+ throw err;
1070
+ const outcome = await refreshManager.refresh("rest-401");
1071
+ if (outcome.kind !== "success")
1072
+ throw err;
1073
+ // The in-memory swap already invalidated the cached api-client; the next
1074
+ // `call()` rebuilds it with the fresh token.
1075
+ return await call();
1076
+ }
1077
+ };
1078
+ // Activate the REST proxy's refresh wrapper now that the manager exists.
1079
+ restWithRefresh = withRefresh;
1080
+ // §A.2/§B — handle a WS hello-fail(auth) by gating a reactive refresh on the
1081
+ // reason classification:
1082
+ // - token-rejected → refresh. Success reconnects with the fresh token;
1083
+ // permanent auto-logs-out; transient/skipped backoff-reconnects with the
1084
+ // CURRENT token (§B: a transient refresh failure NEVER auto-logs-out and
1085
+ // NEVER stops — `runRefreshAndReconnect` owns all three).
1086
+ // - generic + token near expiry → same refresh path.
1087
+ // - generic + token NOT near expiry → §A.2: transient backoff with the current
1088
+ // token (NO refresh, NO teardown). A backend outage emitting a generic
1089
+ // reason must not trigger a refresh storm OR a spurious logout.
1090
+ // - auth-unavailable never reaches here (the ws-client routes it as a
1091
+ // TransportError so its own backoff loop handles it).
1092
+ const handleWsAuthFailure = async (reason) => {
1093
+ if (abortSignal.aborted || reconnectWithRefreshedToken || autoLoggedOut)
1094
+ return;
1095
+ const klass = classifyHelloFailReason(reason);
1096
+ const eligible = klass === "token-rejected" ||
1097
+ (klass === "generic" && refreshManager.isNearExpiry(activatedAtMs));
1098
+ if (eligible) {
1099
+ // `runRefreshAndReconnect` is total: it either reconnects (success),
1100
+ // auto-logs-out (permanent), or backoff-reconnects with the current token
1101
+ // (transient/skipped). No teardown path remains for an eligible hello-fail.
1102
+ await runRefreshAndReconnect("ws-hello-fail");
1103
+ return;
1104
+ }
1105
+ // §A.2 / Finding 5 — generic + token NOT near expiry: keep the WS in
1106
+ // transport backoff with the current token. Do NOT refresh and do NOT tear
1107
+ // the account down (the old teardown wrongly flipped configured:false).
1108
+ scheduleTransportBackoffReconnect("hello-fail-generic-not-near");
1109
+ };
630
1110
  setAlignedOutboundLogContext(client, wsLogContext);
631
1111
  client.on("hello:ok", (env) => {
632
1112
  const payload = env.payload && typeof env.payload === "object"
@@ -641,6 +1121,11 @@ export async function startOpenclawClawlingGateway(params) {
641
1121
  send: () => { },
642
1122
  context: wsLogContext,
643
1123
  });
1124
+ const notifySignalObserver = createNotifySignalObserver({
1125
+ accountId,
1126
+ log: (msg) => log?.info?.(msg),
1127
+ context: wsLogContext,
1128
+ });
644
1129
  const logAuthFailure = (reason) => {
645
1130
  if (authFailureLogged)
646
1131
  return;
@@ -663,6 +1148,8 @@ export async function startOpenclawClawlingGateway(params) {
663
1148
  client.on("state", ({ from, to }) => {
664
1149
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw state ${from} -> ${to}`);
665
1150
  wsReady = to === "connected";
1151
+ if (to === "connected")
1152
+ wsReadyEverThisAttempt = true;
666
1153
  if (to === "connecting") {
667
1154
  reconnectTracker.connectStart();
668
1155
  currentAttemptStartedAt = Date.now();
@@ -731,9 +1218,17 @@ export async function startOpenclawClawlingGateway(params) {
731
1218
  }
732
1219
  void refreshConversationCacheAfterReady();
733
1220
  void dispatchActivationBootstrap();
1221
+ // §A.1 — arm the proactive refresh timer from the live token's `exp`
1222
+ // every time a connection becomes ready (re-armed after every refresh via
1223
+ // the gateway re-enter).
1224
+ refreshManager.armProactiveTimer(activatedAtMs);
734
1225
  }
735
1226
  else if (to === "disconnected") {
736
1227
  reconnectTracker.markClosed();
1228
+ // §A.1 — clear the proactive timer on disconnect; it re-arms on the next
1229
+ // ready, or the gateway re-enter arms a fresh one.
1230
+ if (!reconnectWithRefreshedToken)
1231
+ refreshManager.disarmProactiveTimer();
737
1232
  }
738
1233
  const next = { ...getStatus(), ...mapClawlingStateToStatus(to) };
739
1234
  setStatus(next);
@@ -863,21 +1358,48 @@ export async function startOpenclawClawlingGateway(params) {
863
1358
  client.on("metadata:invalidated", (env) => {
864
1359
  void handleMetadataInvalidation(env);
865
1360
  });
1361
+ client.on("notify:signal", (env) => {
1362
+ // §9.4 reliable system notification. The plugin holds no friend/roster
1363
+ // cache (friends are fetched on demand via REST tools), so there is nothing
1364
+ // to invalidate — observe + dedup only. The live frame and its reliable
1365
+ // inbox replay carry the same event_id and collapse to one observation.
1366
+ notifySignalObserver.observe(env);
1367
+ });
1368
+ client.on("replay:done", (env) => {
1369
+ // §11.5 terminal control frame: device replay drained, live delivery begins.
1370
+ // Fires on every reconnect (even zero-backlog). Replayed messages are
1371
+ // processed inline, so this is a logged boundary marker, not a gate.
1372
+ log?.info?.(`[${accountId}] clawchat-plugin-openclaw replay.done trace=${env.trace_id}`);
1373
+ });
866
1374
  client.on("error", (err) => {
867
1375
  const classified = classifyClawlingClientError(err);
868
1376
  if (classified.kind === "auth") {
869
- finishCurrentConnection({
870
- state: "auth_failed",
871
- error: lastHelloFailReason || classified.message,
872
- });
873
- logAuthFailure(classified.message);
874
- setStatus({
875
- ...getStatus(),
876
- connected: false,
877
- configured: false,
878
- running: false,
879
- lastError: classified.message,
880
- });
1377
+ // §A.2 — a WS hello-fail(auth) on a LIVE (already-connected) session.
1378
+ // Attempt a gated reactive refresh before tearing the account down. The
1379
+ // INITIAL-connect auth failure is owned by the `client.connect()` catch
1380
+ // below (which runs the refresh/backoff/teardown decision and the recursive
1381
+ // re-enter), so only react here once the session was previously ready —
1382
+ // otherwise we'd double-handle and the error handler's teardown would race
1383
+ // the catch's transient-backoff branch (wrongly flipping configured:false).
1384
+ if (!reconnectWithRefreshedToken && !autoLoggedOut && wsReadyEverThisAttempt) {
1385
+ void handleWsAuthFailure(lastHelloFailReason || classified.message);
1386
+ return;
1387
+ }
1388
+ // Not-ready (initial connect): the `client.connect()` catch below owns the
1389
+ // refresh/backoff/teardown DECISION and the status flip. Record the
1390
+ // connection as auth_failed here (bookkeeping — the ws-client's own 4001
1391
+ // close would otherwise finish it as a plain "disconnected") and log the
1392
+ // auth failure, but do NOT flip status here: a transient refresh / generic
1393
+ // backoff must leave configured untouched, and that decision lives in the
1394
+ // catch.
1395
+ if (!reconnectWithRefreshedToken && !autoLoggedOut) {
1396
+ finishCurrentConnection({
1397
+ state: "auth_failed",
1398
+ error: lastHelloFailReason || classified.message,
1399
+ });
1400
+ logAuthFailure(classified.message);
1401
+ }
1402
+ return;
881
1403
  }
882
1404
  else if (classified.kind === "transport") {
883
1405
  finishCurrentConnection({ state: "transport_error", error: classified.message });
@@ -1113,7 +1635,13 @@ export async function startOpenclawClawlingGateway(params) {
1113
1635
  : {}),
1114
1636
  },
1115
1637
  ...(memoryRoot ? { extra: { memoryRoot } } : {}),
1116
- ...(turn.peer.kind === "group"
1638
+ // Deliver the rendered ClawChat per-turn prompt (owner agent_behavior,
1639
+ // metadata, peer/sender profile) to the host for ALL chat types. The host
1640
+ // appends `GroupSystemPrompt` to the system prompt regardless of chat
1641
+ // kind. Direct chats previously relied only on the `before_prompt_build`
1642
+ // staging hook, which is not applied by the host for DM sessions, so the
1643
+ // owner-configured behavior never reached the LLM in 1:1 chats.
1644
+ ...(turnPrompt
1117
1645
  ? { supplemental: { groupSystemPrompt: turnPrompt } }
1118
1646
  : {}),
1119
1647
  });
@@ -1385,14 +1913,44 @@ export async function startOpenclawClawlingGateway(params) {
1385
1913
  }
1386
1914
  catch (err) {
1387
1915
  const classified = classifyClawlingClientError(err);
1388
- setStatus({
1389
- ...getStatus(),
1390
- connected: false,
1391
- configured: classified.kind !== "auth",
1392
- running: false,
1393
- lastError: classified.message,
1394
- });
1395
1916
  if (classified.kind === "auth") {
1917
+ // §A.2/§B — initial-connect hello-fail(auth). Do NOT pre-flip
1918
+ // configured:false here: a transient refresh must leave creds + configured
1919
+ // untouched (§B). Branch on the refresh-eligibility classification first.
1920
+ const klass = classifyHelloFailReason(lastHelloFailReason || classified.message);
1921
+ const eligible = !reconnectWithRefreshedToken &&
1922
+ !autoLoggedOut &&
1923
+ !abortSignal.aborted &&
1924
+ Boolean(latestRefreshToken) &&
1925
+ (klass === "token-rejected" ||
1926
+ (klass === "generic" && refreshManager.isNearExpiry(activatedAtMs)));
1927
+ if (eligible) {
1928
+ // Total: success reconnects, permanent auto-logs-out, transient/skipped
1929
+ // backoff-reconnects with the current token (creds + configured intact).
1930
+ await runRefreshAndReconnect("ws-initial-connect-auth");
1931
+ return;
1932
+ }
1933
+ // §A.2 / Finding 5 — generic + token NOT near expiry (and we have a refresh
1934
+ // token but it isn't refresh-eligible): keep the WS in transport backoff
1935
+ // with the current token instead of tearing the account down.
1936
+ if (klass === "generic" &&
1937
+ Boolean(latestRefreshToken) &&
1938
+ !reconnectWithRefreshedToken &&
1939
+ !autoLoggedOut &&
1940
+ !abortSignal.aborted) {
1941
+ scheduleTransportBackoffReconnect("initial-connect-generic-not-near");
1942
+ return;
1943
+ }
1944
+ // Not refresh-eligible and no usable refresh token — fall back to the
1945
+ // legacy auth-failed teardown so the gateway flips not-configured and (for
1946
+ // a sqlite-sourced account) re-enters wait-for-activation.
1947
+ setStatus({
1948
+ ...getStatus(),
1949
+ connected: false,
1950
+ configured: false,
1951
+ running: false,
1952
+ lastError: classified.message,
1953
+ });
1396
1954
  finishCurrentConnection({
1397
1955
  state: "auth_failed",
1398
1956
  error: lastHelloFailReason || classified.message,
@@ -1413,6 +1971,13 @@ export async function startOpenclawClawlingGateway(params) {
1413
1971
  }
1414
1972
  return;
1415
1973
  }
1974
+ setStatus({
1975
+ ...getStatus(),
1976
+ connected: false,
1977
+ configured: true,
1978
+ running: false,
1979
+ lastError: classified.message,
1980
+ });
1416
1981
  log?.error?.(`[${accountId}] clawchat-plugin-openclaw connect failed (${classified.kind}): ${classified.message}`);
1417
1982
  return;
1418
1983
  }
@@ -1429,6 +1994,8 @@ export async function startOpenclawClawlingGateway(params) {
1429
1994
  log?.info?.(`[${accountId}] clawchat-plugin-openclaw runtime abort received; closing client`);
1430
1995
  activeClients.delete(accountId);
1431
1996
  closingForAbort = true;
1997
+ // §A.1 — stop the proactive refresh timer on shutdown.
1998
+ refreshManager.stop();
1432
1999
  groupCoalescer.cancelAll();
1433
2000
  finishCurrentConnection({
1434
2001
  state: "disconnected",