@botcord/daemon 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  import WebSocket from "ws";
10
10
  import { buildDaemonWebSocketUrl, CONTROL_FRAME_TYPES, jcsCanonicalize, resolveHubControlPublicKey, verifyEd25519, } from "@botcord/protocol-core";
11
11
  import { log as daemonLog } from "./log.js";
12
- import { writeAuthExpiredFlag, } from "./user-auth.js";
12
+ import { AuthRefreshRejectedError, writeAuthExpiredFlag, } from "./user-auth.js";
13
13
  /** Exponential backoff plan for transient disconnects. */
14
14
  const RECONNECT_BACKOFF_MS = [1000, 2000, 4000, 8000, 16000, 30000];
15
15
  const KEEPALIVE_INTERVAL_MS = 25_000;
@@ -91,8 +91,18 @@ export class ControlChannel {
91
91
  });
92
92
  this.connectInflight = this.connect().catch((err) => {
93
93
  // Initial connect failure surfaces to the caller; subsequent
94
- // reconnects are handled opaquely inside onClose.
95
- this.scheduleReconnect(err);
94
+ // reconnects are handled opaquely inside onClose. A refresh-rejected
95
+ // error means the refresh token itself is dead — no point retrying;
96
+ // writeAuthExpiredFlag was already called in user-auth.refresh().
97
+ if (err instanceof AuthRefreshRejectedError) {
98
+ this.stopRequested = true;
99
+ daemonLog.warn("control-channel: refresh rejected; stopping (re-login required)", {
100
+ status: err.status,
101
+ });
102
+ }
103
+ else {
104
+ this.scheduleReconnect(err);
105
+ }
96
106
  throw err;
97
107
  });
98
108
  try {
@@ -223,6 +233,13 @@ export class ControlChannel {
223
233
  scheduleReconnect(err) {
224
234
  if (this.stopRequested)
225
235
  return;
236
+ if (err instanceof AuthRefreshRejectedError) {
237
+ this.stopRequested = true;
238
+ daemonLog.warn("control-channel: refresh rejected; halting reconnect (re-login required)", {
239
+ status: err.status,
240
+ });
241
+ return;
242
+ }
226
243
  const attempt = this.reconnectAttempts;
227
244
  this.reconnectAttempts = attempt + 1;
228
245
  const delay = this.backoff[Math.min(attempt, this.backoff.length - 1)];
package/dist/doctor.js CHANGED
@@ -156,6 +156,9 @@ export function renderDoctor(input) {
156
156
  const r = rows[i];
157
157
  const e = input.runtimes[i];
158
158
  lines.push(`${pad(r.runtime, widths.runtime)} ${pad(r.name, widths.name)} ${pad(r.status, widths.status)} ${pad(r.version, widths.version)} ${r.path}`);
159
+ if (!e.result.available && e.installHint) {
160
+ lines.push(` → ${e.installHint}`);
161
+ }
159
162
  if (e.endpoints && e.endpoints.length > 0) {
160
163
  for (const ep of e.endpoints) {
161
164
  const mark = ep.reachable ? "✓" : "✗";
@@ -1,7 +1,11 @@
1
1
  import { AcpRuntimeAdapter, type AcpPermissionRequest, type AcpPermissionResponse, type AcpUpdateCtx, type AcpUpdateParams } from "./acp-stream.js";
2
2
  import { type ProbeDeps } from "./probe.js";
3
3
  import type { RuntimeProbeResult, RuntimeRunOptions } from "../types.js";
4
- /** Resolve the `hermes-acp` executable on PATH. */
4
+ /**
5
+ * Resolve the `hermes-acp` executable. Tries PATH first, then falls back to
6
+ * the upstream install.sh's private venv location (`~/.hermes/...`) before
7
+ * giving up. `BOTCORD_HERMES_AGENT_BIN` always wins via the adapter override.
8
+ */
5
9
  export declare function resolveHermesAcpCommand(deps?: ProbeDeps): string | null;
6
10
  /** Probe whether `hermes-acp` is installed and report its version. */
7
11
  export declare function probeHermesAgent(deps?: ProbeDeps): RuntimeProbeResult;
@@ -3,10 +3,34 @@ import path from "node:path";
3
3
  import { agentHermesHomeDir, agentHermesWorkspaceDir, ensureAgentHermesWorkspace, } from "../../agent-workspace.js";
4
4
  import { buildCliEnv } from "../cli-resolver.js";
5
5
  import { AcpRuntimeAdapter, } from "./acp-stream.js";
6
- import { readCommandVersion, resolveCommandOnPath } from "./probe.js";
7
- /** Resolve the `hermes-acp` executable on PATH. */
6
+ import { firstExistingPath, readCommandVersion, resolveCommandOnPath, resolveHomePath, } from "./probe.js";
7
+ /**
8
+ * Known absolute locations of the `hermes-acp` entry point when it is not on
9
+ * PATH. The upstream `scripts/install.sh` (curl|bash installer) installs a
10
+ * private virtualenv under `~/.hermes/hermes-agent/venv/` and only symlinks
11
+ * the user-facing `hermes` command into `~/.local/bin/` — the `hermes-acp`
12
+ * entry point stays inside the venv. Without a fallback, daemon's PATH-only
13
+ * probe misses every user who installed via the README-recommended script.
14
+ */
15
+ const HERMES_ACP_FALLBACK_RELATIVE_PATHS = [
16
+ path.join(".hermes", "hermes-agent", "venv", "bin", "hermes-acp"),
17
+ ];
18
+ const HERMES_ACP_FALLBACK_SYSTEM_PATHS = [
19
+ "/opt/hermes/hermes-agent/venv/bin/hermes-acp",
20
+ ];
21
+ /**
22
+ * Resolve the `hermes-acp` executable. Tries PATH first, then falls back to
23
+ * the upstream install.sh's private venv location (`~/.hermes/...`) before
24
+ * giving up. `BOTCORD_HERMES_AGENT_BIN` always wins via the adapter override.
25
+ */
8
26
  export function resolveHermesAcpCommand(deps = {}) {
9
- return resolveCommandOnPath("hermes-acp", deps);
27
+ const onPath = resolveCommandOnPath("hermes-acp", deps);
28
+ if (onPath)
29
+ return onPath;
30
+ return firstExistingPath([
31
+ ...HERMES_ACP_FALLBACK_RELATIVE_PATHS.map((p) => resolveHomePath(p, deps)),
32
+ ...HERMES_ACP_FALLBACK_SYSTEM_PATHS,
33
+ ], deps);
10
34
  }
11
35
  /** Probe whether `hermes-acp` is installed and report its version. */
12
36
  export function probeHermesAgent(deps = {}) {
@@ -23,6 +23,11 @@ export interface RuntimeModule {
23
23
  * config loader rejects routing turns to this adapter.
24
24
  */
25
25
  supportsRun?: boolean;
26
+ /**
27
+ * Short, single-line install hint shown by `doctor` when the runtime
28
+ * probes as unavailable. Helps users recover without reading source.
29
+ */
30
+ installHint?: string;
26
31
  }
27
32
  /** Built-in runtime module entry for Claude Code. */
28
33
  export declare const claudeCodeModule: RuntimeModule;
@@ -58,6 +63,7 @@ export interface RuntimeProbeEntry {
58
63
  binary: string;
59
64
  supportsRun: boolean;
60
65
  result: RuntimeProbeResult;
66
+ installHint?: string;
61
67
  }
62
68
  /** Probe every registered runtime and report installation status. */
63
69
  export declare function detectRuntimes(): RuntimeProbeEntry[];
@@ -28,6 +28,7 @@ export const hermesAgentModule = {
28
28
  envVar: "BOTCORD_HERMES_AGENT_BIN",
29
29
  probe: () => probeHermesAgent(),
30
30
  create: () => new HermesAgentAdapter(),
31
+ installHint: 'Install: pip install "hermes-agent[acp]" (or set BOTCORD_HERMES_AGENT_BIN to the absolute path of hermes-acp)',
31
32
  };
32
33
  /** Built-in runtime module entry for Gemini (probe-only stub). */
33
34
  export const geminiModule = {
@@ -110,6 +111,7 @@ export function detectRuntimes() {
110
111
  binary: m.binary,
111
112
  supportsRun: m.supportsRun !== false,
112
113
  result,
114
+ installHint: m.installHint,
113
115
  });
114
116
  }
115
117
  return out;
@@ -46,13 +46,31 @@ export async function discoverLocalOpenclawGateways(opts = {}) {
46
46
  }
47
47
  export function mergeOpenclawGateways(cfg, found) {
48
48
  const existing = cfg.openclawGateways ?? [];
49
- const existingUrls = new Set(existing.map((g) => normalizeUrlKey(g.url)));
49
+ const byUrl = new Map();
50
+ existing.forEach((g, i) => byUrl.set(normalizeUrlKey(g.url), i));
50
51
  const existingNames = new Set(existing.map((g) => g.name));
52
+ const merged = existing.map((g) => ({ ...g }));
51
53
  const added = [];
54
+ let mutated = false;
52
55
  for (const item of found) {
53
56
  const key = normalizeUrlKey(item.url);
54
- if (existingUrls.has(key))
57
+ const idx = byUrl.get(key);
58
+ if (idx !== undefined) {
59
+ // Same URL already configured — only fill in auth that the user is
60
+ // missing, never overwrite an existing token / tokenFile.
61
+ const cur = merged[idx];
62
+ if (!cur.token && !cur.tokenFile) {
63
+ if (item.token) {
64
+ cur.token = item.token;
65
+ mutated = true;
66
+ }
67
+ else if (item.tokenFile) {
68
+ cur.tokenFile = item.tokenFile;
69
+ mutated = true;
70
+ }
71
+ }
55
72
  continue;
73
+ }
56
74
  const profile = {
57
75
  name: uniqueName(item.name, existingNames),
58
76
  url: item.url,
@@ -61,14 +79,15 @@ export function mergeOpenclawGateways(cfg, found) {
61
79
  profile.token = item.token;
62
80
  else if (item.tokenFile)
63
81
  profile.tokenFile = item.tokenFile;
64
- existingUrls.add(key);
82
+ byUrl.set(key, merged.length);
65
83
  existingNames.add(profile.name);
84
+ merged.push(profile);
66
85
  added.push(profile);
67
86
  }
68
- if (added.length === 0)
87
+ if (added.length === 0 && !mutated)
69
88
  return { cfg, changed: false, added };
70
89
  return {
71
- cfg: { ...cfg, openclawGateways: [...existing, ...added] },
90
+ cfg: { ...cfg, openclawGateways: merged },
72
91
  changed: true,
73
92
  added,
74
93
  };
@@ -117,9 +136,34 @@ function discoverFromConfigDir(root) {
117
136
  }
118
137
  function parseJsonConfig(raw) {
119
138
  const obj = JSON.parse(raw);
139
+ // Prefer OpenClaw's native shape: `gateway.port` + `gateway.auth.token`.
140
+ // The legacy `acp.url` shape is also supported for explicit user-authored configs.
141
+ const native = pickOpenclawGatewayValues(obj?.gateway);
142
+ if (native)
143
+ return native;
120
144
  const acp = obj?.acp ?? obj?.gateway?.acp ?? obj?.gateway ?? obj;
121
145
  return pickConfigValues(acp);
122
146
  }
147
+ function pickOpenclawGatewayValues(gw) {
148
+ if (!gw || typeof gw !== "object")
149
+ return null;
150
+ const port = typeof gw.port === "number" ? gw.port : undefined;
151
+ if (!port)
152
+ return null;
153
+ // Local discovery always targets the loopback interface, regardless of how
154
+ // the gateway is bound — the daemon is on the same machine.
155
+ const url = `ws://127.0.0.1:${port}`;
156
+ const auth = gw.auth;
157
+ const out = { url };
158
+ if (auth && typeof auth === "object" && auth.mode === "token") {
159
+ if (typeof auth.token === "string" && auth.token.trim())
160
+ out.token = auth.token.trim();
161
+ else if (typeof auth.tokenFile === "string" && auth.tokenFile.trim()) {
162
+ out.tokenFile = auth.tokenFile.trim();
163
+ }
164
+ }
165
+ return out;
166
+ }
123
167
  function parseTomlConfig(raw) {
124
168
  let inAcp = false;
125
169
  const values = {};
package/dist/provision.js CHANGED
@@ -4,7 +4,7 @@
4
4
  * side effects (register agent, write credentials, load route, add/remove
5
5
  * gateway channel) and return an ack payload.
6
6
  */
7
- import { existsSync, rmSync, unlinkSync } from "node:fs";
7
+ import { existsSync, readFileSync, rmSync, unlinkSync } from "node:fs";
8
8
  import { homedir } from "node:os";
9
9
  import path from "node:path";
10
10
  import { BotCordClient, CONTROL_FRAME_TYPES, defaultCredentialsFile, derivePublicKey, loadStoredCredentials, writeCredentialsFile, } from "@botcord/protocol-core";
@@ -801,16 +801,22 @@ export function collectRuntimeSnapshot() {
801
801
  /** Maximum number of `endpoints[]` entries persisted per runtime (RFC §3.8.2). */
802
802
  export const RUNTIME_ENDPOINTS_CAP = 32;
803
803
  /**
804
- * Default L2 + L3 probe — opens a WS handshake against the OpenClaw gateway
805
- * and, when the connection is up, issues a JSON-RPC `agents.list` request to
806
- * enumerate configured agent profiles. Best-effort: a successful WS open with
807
- * a failed `agents.list` still reports `ok: true` (just without `agents`),
808
- * matching the RFC's "agents populated only when listing succeeded" rule.
804
+ * Default L2 + L3 probe — speaks OpenClaw's WS frame protocol against the
805
+ * gateway and enumerates agent profiles via `agents.list`.
809
806
  *
810
- * Method name and result shape follow OpenClaw:
811
- * `~/claws/openclaw/src/gateway/server-methods/agents.ts:416` and
812
- * `~/claws/openclaw/src/gateway/session-utils.ts:783`
813
- * `{ defaultId, mainKey, scope, agents: [{ id, name?, identity?, workspace, model? }] }`.
807
+ * Wire flow (see `~/claws/openclaw/src/gateway/server/ws-connection/message-handler.ts`
808
+ * and `~/claws/openclaw/src/gateway/protocol/schema/frames.ts`):
809
+ * 1. WS upgrade (no auth required at the HTTP layer).
810
+ * 2. Server emits `{type:"event", event:"connect.challenge", payload:{nonce}}`.
811
+ * 3. Client sends `{type:"req", id, method:"connect", params:{minProtocol, maxProtocol,
812
+ * client:{id:"openclaw-probe", mode:"probe", ...}, auth:{token}}}`.
813
+ * 4. Server responds `{type:"res", id, ok:true, payload:{type:"hello-ok", server:{version}, ...}}`.
814
+ * 5. Client sends `{type:"req", id, method:"agents.list", params:{}}`.
815
+ * 6. Server responds with `{payload: { defaultId, mainKey, scope, agents:[{id, name?, workspace?, model?}] }}`.
816
+ *
817
+ * Best-effort: a successful WS open with a failed handshake / `agents.list`
818
+ * still reports `ok: true` (just without `agents`), matching the RFC's
819
+ * "agents populated only when listing succeeded" rule.
814
820
  */
815
821
  async function defaultWsProbe(args) {
816
822
  const { default: WebSocket } = await import("ws");
@@ -818,6 +824,9 @@ async function defaultWsProbe(args) {
818
824
  let settled = false;
819
825
  let ws;
820
826
  let timer;
827
+ let serverVersion;
828
+ const CONNECT_ID = "probe-connect";
829
+ let connectSent = false;
821
830
  const settle = (v) => {
822
831
  if (settled)
823
832
  return;
@@ -834,6 +843,8 @@ async function defaultWsProbe(args) {
834
843
  };
835
844
  try {
836
845
  const headers = {};
846
+ // Some deployments gate the WS upgrade on Authorization too; harmless
847
+ // when not enforced — auth is also re-asserted in the connect frame.
837
848
  if (args.token)
838
849
  headers["Authorization"] = `Bearer ${args.token}`;
839
850
  ws = new WebSocket(args.url, { headers });
@@ -843,65 +854,81 @@ async function defaultWsProbe(args) {
843
854
  return;
844
855
  }
845
856
  timer = setTimeout(() => settle({ ok: false, error: "timeout" }), args.timeoutMs);
846
- const requestId = "probe-agents-list";
847
- ws.on("open", () => {
848
- // L3: enumerate agent profiles. We don't fail the L2 result if this
849
- // call fails — the gateway is reachable either way.
857
+ const sendConnect = () => {
858
+ if (connectSent)
859
+ return;
860
+ connectSent = true;
861
+ const params = {
862
+ minProtocol: 3,
863
+ maxProtocol: 3,
864
+ client: {
865
+ id: "openclaw-probe",
866
+ version: "0.1.0",
867
+ platform: process.platform || "node",
868
+ mode: "probe",
869
+ },
870
+ role: "operator",
871
+ scopes: ["operator.read"],
872
+ };
873
+ if (args.token)
874
+ params.auth = { token: args.token };
850
875
  try {
851
- ws.send(JSON.stringify({
852
- jsonrpc: "2.0",
853
- id: requestId,
854
- method: "agents.list",
855
- params: {},
856
- }));
876
+ ws.send(JSON.stringify({ type: "req", id: CONNECT_ID, method: "connect", params }));
857
877
  }
858
878
  catch (err) {
859
- settle({ ok: true, error: `agents.list send failed: ${err.message}` });
879
+ settle({ ok: true, error: `connect send failed: ${err.message}` });
860
880
  }
881
+ };
882
+ ws.on("open", () => {
883
+ // Some servers send `connect.challenge` before the socket is fully
884
+ // wired; if it never arrives we still try a best-effort connect after
885
+ // a short delay so the probe doesn't stall on legacy gateways.
886
+ setTimeout(() => {
887
+ if (!connectSent && !settled)
888
+ sendConnect();
889
+ }, 250);
861
890
  });
862
891
  ws.on("message", (raw) => {
892
+ let msg;
863
893
  try {
864
- const msg = JSON.parse(typeof raw === "string" ? raw : raw.toString("utf8"));
865
- if (msg?.id !== requestId)
866
- return; // ignore unrelated frames
867
- if (msg.error) {
868
- settle({ ok: true, error: String(msg.error?.message ?? "agents.list error") });
894
+ msg = JSON.parse(typeof raw === "string" ? raw : raw.toString("utf8"));
895
+ }
896
+ catch {
897
+ return;
898
+ }
899
+ if (!msg || typeof msg !== "object")
900
+ return;
901
+ if (msg.type === "event" && msg.event === "connect.challenge") {
902
+ // Nonce only matters for device-pairing flows; token-only auth ignores it.
903
+ sendConnect();
904
+ return;
905
+ }
906
+ if (msg.type !== "res" || typeof msg.id !== "string")
907
+ return;
908
+ if (msg.id === CONNECT_ID) {
909
+ if (!msg.ok) {
910
+ const errMsg = msg.error?.message ? String(msg.error.message) : "connect rejected";
911
+ settle({ ok: true, error: errMsg });
869
912
  return;
870
913
  }
871
- const list = Array.isArray(msg.result?.agents) ? msg.result.agents : [];
872
- const agents = [];
873
- for (const a of list) {
874
- if (!a || typeof a.id !== "string" || a.id.length === 0)
875
- continue;
876
- const row = { id: a.id };
877
- if (typeof a.name === "string")
878
- row.name = a.name;
879
- if (typeof a.workspace === "string")
880
- row.workspace = a.workspace;
881
- if (a.model && typeof a.model === "object") {
882
- const model = {};
883
- if (typeof a.model.name === "string")
884
- model.name = a.model.name;
885
- if (typeof a.model.provider === "string")
886
- model.provider = a.model.provider;
887
- if (model.name || model.provider)
888
- row.model = model;
889
- }
890
- agents.push(row);
891
- }
892
- settle({ ok: true, agents });
893
- }
894
- catch (err) {
895
- settle({ ok: true, error: `agents.list parse failed: ${err.message}` });
914
+ const v = msg.payload?.server?.version;
915
+ if (typeof v === "string" && v)
916
+ serverVersion = v;
917
+ // We don't fetch agents.list over the wire: it requires `operator.read`
918
+ // which the gateway only grants to clients that present a paired device
919
+ // identity (see message-handler.ts:478 self-declared scopes are
920
+ // cleared without device pairing). For local OpenClaw the agent list
921
+ // is sourced directly from disk by `probeOpenclawAgents`.
922
+ settle({ ok: true, version: serverVersion });
896
923
  }
897
924
  });
898
925
  ws.on("error", (err) => {
899
926
  settle({ ok: false, error: err.message });
900
927
  });
901
928
  ws.on("close", () => {
902
- // If the socket closes before `agents.list` resolved we still treat
903
- // L2 as ok (open fired) and emit no agents.
904
- settle({ ok: true });
929
+ // If the socket closes before we got our agents.list response, treat
930
+ // L2 as ok (the upgrade succeeded) and emit no agents.
931
+ settle({ ok: true, version: serverVersion });
905
932
  });
906
933
  });
907
934
  }
@@ -913,11 +940,74 @@ export async function probeOpenclawAgents(profile, opts = {}) {
913
940
  ...(profile.token ? { token: profile.token } : {}),
914
941
  ...(profile.tokenFile ? { tokenFile: profile.tokenFile } : {}),
915
942
  });
916
- return probe({
943
+ const result = await probe({
917
944
  url: profile.url,
918
945
  token: prepared.resolvedToken,
919
946
  timeoutMs: opts.timeoutMs ?? 3000,
920
947
  });
948
+ // For loopback gateways the agent roster lives in `~/.openclaw/openclaw.json`
949
+ // and is the source of truth — listing it over the wire would require a
950
+ // paired device identity (operator.read scope). When the WS probe is the
951
+ // default (i.e. no test injection) we enrich the result from disk.
952
+ if (result.ok && !result.agents && !opts.probe && isLoopbackUrl(profile.url)) {
953
+ const local = readLocalOpenclawAgents();
954
+ if (local && local.length > 0)
955
+ result.agents = local;
956
+ }
957
+ return result;
958
+ }
959
+ function isLoopbackUrl(raw) {
960
+ try {
961
+ const u = new URL(raw);
962
+ return u.hostname === "127.0.0.1" || u.hostname === "::1" || u.hostname === "localhost";
963
+ }
964
+ catch {
965
+ return false;
966
+ }
967
+ }
968
+ function readLocalOpenclawAgents() {
969
+ try {
970
+ const file = path.join(homedir(), ".openclaw", "openclaw.json");
971
+ if (!existsSync(file))
972
+ return null;
973
+ const cfg = JSON.parse(readFileSync(file, "utf8"));
974
+ const list = Array.isArray(cfg?.agents?.list) ? cfg.agents.list : [];
975
+ const defaultId = typeof cfg?.agents?.defaults?.id === "string" ? cfg.agents.defaults.id : "default";
976
+ const seen = new Set();
977
+ const out = [];
978
+ const push = (raw, fallbackId) => {
979
+ const id = typeof raw?.id === "string" && raw.id ? raw.id : fallbackId;
980
+ if (!id || seen.has(id))
981
+ return;
982
+ seen.add(id);
983
+ const row = { id };
984
+ if (typeof raw?.name === "string")
985
+ row.name = raw.name;
986
+ if (typeof raw?.workspace === "string")
987
+ row.workspace = raw.workspace;
988
+ const m = raw?.model;
989
+ if (m && typeof m === "object") {
990
+ const model = {};
991
+ if (typeof m.primary === "string")
992
+ model.name = m.primary;
993
+ else if (typeof m.name === "string")
994
+ model.name = m.name;
995
+ if (typeof m.provider === "string")
996
+ model.provider = m.provider;
997
+ if (model.name || model.provider)
998
+ row.model = model;
999
+ }
1000
+ out.push(row);
1001
+ };
1002
+ // Default agent first so it surfaces at the top of the dropdown.
1003
+ push({ id: defaultId, workspace: cfg?.agents?.defaults?.workspace, model: cfg?.agents?.defaults?.model }, defaultId);
1004
+ for (const entry of list)
1005
+ push(entry);
1006
+ return out;
1007
+ }
1008
+ catch {
1009
+ return null;
1010
+ }
921
1011
  }
922
1012
  /**
923
1013
  * Async variant that includes L2 (gateway reachability) and L3 (agent listing)
@@ -40,6 +40,15 @@ export declare function writeAuthExpiredFlag(file?: string): void;
40
40
  export declare function clearAuthExpiredFlag(file?: string): void;
41
41
  /** Returns true if the stored access token is within `windowMs` of expiry. */
42
42
  export declare function isTokenNearExpiry(record: UserAuthRecord, windowMs?: number): boolean;
43
+ /**
44
+ * Thrown when the Hub rejects a refresh token (401/403). Signals that the
45
+ * user must re-login — reconnect loops should stop instead of hammering
46
+ * the refresh endpoint forever with a known-bad token.
47
+ */
48
+ export declare class AuthRefreshRejectedError extends Error {
49
+ readonly status: number;
50
+ constructor(status: number, message: string);
51
+ }
43
52
  /**
44
53
  * Stateful helper that owns the in-memory copy of user-auth and knows how
45
54
  * to refresh it. Used by the control channel so reconnects always carry
package/dist/user-auth.js CHANGED
@@ -144,6 +144,19 @@ export function clearAuthExpiredFlag(file = AUTH_EXPIRED_FLAG_PATH) {
144
144
  export function isTokenNearExpiry(record, windowMs = 60_000) {
145
145
  return record.expiresAt - Date.now() <= windowMs;
146
146
  }
147
+ /**
148
+ * Thrown when the Hub rejects a refresh token (401/403). Signals that the
149
+ * user must re-login — reconnect loops should stop instead of hammering
150
+ * the refresh endpoint forever with a known-bad token.
151
+ */
152
+ export class AuthRefreshRejectedError extends Error {
153
+ status;
154
+ constructor(status, message) {
155
+ super(message);
156
+ this.name = "AuthRefreshRejectedError";
157
+ this.status = status;
158
+ }
159
+ }
147
160
  /**
148
161
  * Stateful helper that owns the in-memory copy of user-auth and knows how
149
162
  * to refresh it. Used by the control channel so reconnects always carry
@@ -197,13 +210,37 @@ export class UserAuthManager {
197
210
  expiresInMs: current.expiresAt - Date.now(),
198
211
  });
199
212
  this.refreshInflight = (async () => {
200
- const tok = await refreshDaemonToken(current.hubUrl, current.refreshToken);
213
+ // Refresh tokens rotate server-side. If another local process (e.g. a
214
+ // second daemon racing on the same user-auth.json) refreshed in the
215
+ // meantime, the on-disk refreshToken now differs from our in-memory
216
+ // copy — using the in-memory one would 401 because the server already
217
+ // invalidated it. Re-read disk first and adopt any newer record.
218
+ let basis = current;
219
+ try {
220
+ const onDisk = loadUserAuth(this.file);
221
+ if (onDisk && onDisk.refreshToken !== current.refreshToken) {
222
+ daemonLog.info("user-auth refresh: adopting newer on-disk token", {
223
+ userId: onDisk.userId,
224
+ expiresAt: onDisk.expiresAt,
225
+ });
226
+ this.record = onDisk;
227
+ if (!isTokenNearExpiry(onDisk))
228
+ return onDisk;
229
+ basis = onDisk;
230
+ }
231
+ }
232
+ catch (err) {
233
+ daemonLog.debug("user-auth refresh: disk reread failed (ignored)", {
234
+ error: err instanceof Error ? err.message : String(err),
235
+ });
236
+ }
237
+ const tok = await refreshDaemonToken(basis.hubUrl, basis.refreshToken);
201
238
  const next = {
202
- ...current,
239
+ ...basis,
203
240
  accessToken: tok.accessToken,
204
241
  refreshToken: tok.refreshToken,
205
242
  expiresAt: Date.now() + tok.expiresIn * 1000,
206
- hubUrl: tok.hubUrl || current.hubUrl,
243
+ hubUrl: tok.hubUrl || basis.hubUrl,
207
244
  };
208
245
  saveUserAuth(next, this.file);
209
246
  this.record = next;
@@ -213,10 +250,22 @@ export class UserAuthManager {
213
250
  });
214
251
  return next;
215
252
  })().catch((err) => {
253
+ const status = typeof err.status === "number"
254
+ ? (err.status)
255
+ : null;
256
+ const message = err instanceof Error ? err.message : String(err);
216
257
  daemonLog.warn("user-auth refresh: failed", {
217
258
  userId: current.userId,
218
- error: err instanceof Error ? err.message : String(err),
259
+ status,
260
+ error: message,
219
261
  });
262
+ if (status === 401 || status === 403) {
263
+ // Refresh token is permanently dead — write the expired flag so
264
+ // `status` surfaces it and re-throw a typed error so the control
265
+ // channel can stop reconnect loops instead of hammering the Hub.
266
+ writeAuthExpiredFlag();
267
+ throw new AuthRefreshRejectedError(status, message);
268
+ }
220
269
  throw err;
221
270
  }).finally(() => {
222
271
  this.refreshInflight = null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botcord/daemon",
3
- "version": "0.2.8",
3
+ "version": "0.2.10",
4
4
  "description": "BotCord local daemon — bridges Hub inbox push to local Claude Code / Codex / Gemini CLIs",
5
5
  "type": "module",
6
6
  "bin": {
@@ -62,6 +62,33 @@ describe("discoverLocalOpenclawGateways", () => {
62
62
  );
63
63
  });
64
64
 
65
+ it("parses OpenClaw's native gateway.port + auth.token shape", async () => {
66
+ const dir = tempDir();
67
+ writeFileSync(
68
+ path.join(dir, "openclaw.json"),
69
+ JSON.stringify({
70
+ gateway: {
71
+ port: 18789,
72
+ bind: "loopback",
73
+ auth: { mode: "token", token: "native-token" },
74
+ },
75
+ }),
76
+ );
77
+
78
+ const found = await discoverLocalOpenclawGateways({
79
+ searchPaths: [dir],
80
+ defaultPorts: [],
81
+ });
82
+
83
+ expect(found).toEqual([
84
+ expect.objectContaining({
85
+ url: "ws://127.0.0.1:18789",
86
+ token: "native-token",
87
+ source: "config-file",
88
+ }),
89
+ ]);
90
+ });
91
+
65
92
  it("uses OPENCLAW_ACP_URL and token env vars", async () => {
66
93
  const found = await discoverLocalOpenclawGateways({
67
94
  searchPaths: [],
@@ -124,6 +151,27 @@ describe("discoverLocalOpenclawGateways", () => {
124
151
  });
125
152
 
126
153
  describe("mergeOpenclawGateways", () => {
154
+ it("backfills token onto an existing profile that lacks one", () => {
155
+ const cfg = baseConfig();
156
+ cfg.openclawGateways = [
157
+ { name: "openclaw-127-0-0-1-18789", url: "ws://127.0.0.1:18789" },
158
+ ];
159
+ const merged = mergeOpenclawGateways(cfg, [
160
+ {
161
+ name: "openclaw-127-0-0-1-18789",
162
+ url: "ws://127.0.0.1:18789",
163
+ token: "discovered",
164
+ source: "config-file",
165
+ },
166
+ ]);
167
+
168
+ expect(merged.changed).toBe(true);
169
+ expect(merged.added).toEqual([]);
170
+ expect(merged.cfg.openclawGateways).toEqual([
171
+ { name: "openclaw-127-0-0-1-18789", url: "ws://127.0.0.1:18789", token: "discovered" },
172
+ ]);
173
+ });
174
+
127
175
  it("appends new URLs and keeps existing profiles untouched", () => {
128
176
  const cfg = baseConfig();
129
177
  cfg.openclawGateways = [{ name: "local", url: "ws://127.0.0.1:18789/acp", token: "user-token" }];
@@ -18,6 +18,7 @@ import {
18
18
  } from "@botcord/protocol-core";
19
19
  import { log as daemonLog } from "./log.js";
20
20
  import {
21
+ AuthRefreshRejectedError,
21
22
  writeAuthExpiredFlag,
22
23
  type UserAuthManager,
23
24
  } from "./user-auth.js";
@@ -142,8 +143,17 @@ export class ControlChannel {
142
143
  });
143
144
  this.connectInflight = this.connect().catch((err) => {
144
145
  // Initial connect failure surfaces to the caller; subsequent
145
- // reconnects are handled opaquely inside onClose.
146
- this.scheduleReconnect(err);
146
+ // reconnects are handled opaquely inside onClose. A refresh-rejected
147
+ // error means the refresh token itself is dead — no point retrying;
148
+ // writeAuthExpiredFlag was already called in user-auth.refresh().
149
+ if (err instanceof AuthRefreshRejectedError) {
150
+ this.stopRequested = true;
151
+ daemonLog.warn("control-channel: refresh rejected; stopping (re-login required)", {
152
+ status: err.status,
153
+ });
154
+ } else {
155
+ this.scheduleReconnect(err);
156
+ }
147
157
  throw err;
148
158
  });
149
159
  try {
@@ -285,6 +295,13 @@ export class ControlChannel {
285
295
 
286
296
  private scheduleReconnect(err?: unknown): void {
287
297
  if (this.stopRequested) return;
298
+ if (err instanceof AuthRefreshRejectedError) {
299
+ this.stopRequested = true;
300
+ daemonLog.warn("control-channel: refresh rejected; halting reconnect (re-login required)", {
301
+ status: err.status,
302
+ });
303
+ return;
304
+ }
288
305
  const attempt = this.reconnectAttempts;
289
306
  this.reconnectAttempts = attempt + 1;
290
307
  const delay = this.backoff[Math.min(attempt, this.backoff.length - 1)];
package/src/doctor.ts CHANGED
@@ -257,6 +257,9 @@ export function renderDoctor(input: DoctorInput): string {
257
257
  lines.push(
258
258
  `${pad(r.runtime, widths.runtime)} ${pad(r.name, widths.name)} ${pad(r.status, widths.status)} ${pad(r.version, widths.version)} ${r.path}`,
259
259
  );
260
+ if (!e.result.available && e.installHint) {
261
+ lines.push(` → ${e.installHint}`);
262
+ }
260
263
  if (e.endpoints && e.endpoints.length > 0) {
261
264
  for (const ep of e.endpoints) {
262
265
  const mark = ep.reachable ? "✓" : "✗";
@@ -2,6 +2,7 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest";
2
2
  import {
3
3
  chmodSync,
4
4
  existsSync,
5
+ mkdirSync,
5
6
  mkdtempSync,
6
7
  readFileSync,
7
8
  rmSync,
@@ -9,7 +10,10 @@ import {
9
10
  } from "node:fs";
10
11
  import os from "node:os";
11
12
  import path from "node:path";
12
- import { HermesAgentAdapter } from "../runtimes/hermes-agent.js";
13
+ import {
14
+ HermesAgentAdapter,
15
+ resolveHermesAcpCommand,
16
+ } from "../runtimes/hermes-agent.js";
13
17
  import { agentHermesWorkspaceDir } from "../../agent-workspace.js";
14
18
 
15
19
  // Spawn a tiny Node "ACP server" we control instead of the real hermes-acp.
@@ -288,6 +292,30 @@ describe("HermesAgentAdapter", () => {
288
292
  expect(res.error).toMatch(/aborted before spawn/);
289
293
  });
290
294
 
295
+ it("resolveHermesAcpCommand falls back to ~/.hermes venv when PATH lookup fails", () => {
296
+ // Upstream `scripts/install.sh` puts hermes-acp at
297
+ // ~/.hermes/hermes-agent/venv/bin/hermes-acp and only symlinks `hermes`
298
+ // into ~/.local/bin. Simulate that layout: `which hermes-acp` fails,
299
+ // but the venv path exists on disk.
300
+ const fakeHome = mkdtempSync(path.join(os.tmpdir(), "hermes-fallback-"));
301
+ const venvBin = path.join(fakeHome, ".hermes", "hermes-agent", "venv", "bin");
302
+ const target = path.join(venvBin, "hermes-acp");
303
+ mkdirSync(venvBin, { recursive: true });
304
+ writeFileSync(target, "#!/bin/sh\nexit 0\n", { mode: 0o755 });
305
+ chmodSync(target, 0o755);
306
+
307
+ const resolved = resolveHermesAcpCommand({
308
+ env: { PATH: "/nonexistent" },
309
+ homeDir: fakeHome,
310
+ execFileSyncFn: (() => {
311
+ throw new Error("which: not found");
312
+ }) as never,
313
+ });
314
+ expect(resolved).toBe(target);
315
+
316
+ rmSync(fakeHome, { recursive: true, force: true });
317
+ });
318
+
291
319
  it("surfaces non-zero exit with stderr snippet", async () => {
292
320
  const p = path.join(tmpRoot, "boom.js");
293
321
  writeFileSync(
@@ -13,12 +13,45 @@ import {
13
13
  type AcpUpdateCtx,
14
14
  type AcpUpdateParams,
15
15
  } from "./acp-stream.js";
16
- import { readCommandVersion, resolveCommandOnPath, type ProbeDeps } from "./probe.js";
16
+ import {
17
+ firstExistingPath,
18
+ readCommandVersion,
19
+ resolveCommandOnPath,
20
+ resolveHomePath,
21
+ type ProbeDeps,
22
+ } from "./probe.js";
17
23
  import type { RuntimeProbeResult, RuntimeRunOptions, StreamBlock } from "../types.js";
18
24
 
19
- /** Resolve the `hermes-acp` executable on PATH. */
25
+ /**
26
+ * Known absolute locations of the `hermes-acp` entry point when it is not on
27
+ * PATH. The upstream `scripts/install.sh` (curl|bash installer) installs a
28
+ * private virtualenv under `~/.hermes/hermes-agent/venv/` and only symlinks
29
+ * the user-facing `hermes` command into `~/.local/bin/` — the `hermes-acp`
30
+ * entry point stays inside the venv. Without a fallback, daemon's PATH-only
31
+ * probe misses every user who installed via the README-recommended script.
32
+ */
33
+ const HERMES_ACP_FALLBACK_RELATIVE_PATHS = [
34
+ path.join(".hermes", "hermes-agent", "venv", "bin", "hermes-acp"),
35
+ ];
36
+ const HERMES_ACP_FALLBACK_SYSTEM_PATHS = [
37
+ "/opt/hermes/hermes-agent/venv/bin/hermes-acp",
38
+ ];
39
+
40
+ /**
41
+ * Resolve the `hermes-acp` executable. Tries PATH first, then falls back to
42
+ * the upstream install.sh's private venv location (`~/.hermes/...`) before
43
+ * giving up. `BOTCORD_HERMES_AGENT_BIN` always wins via the adapter override.
44
+ */
20
45
  export function resolveHermesAcpCommand(deps: ProbeDeps = {}): string | null {
21
- return resolveCommandOnPath("hermes-acp", deps);
46
+ const onPath = resolveCommandOnPath("hermes-acp", deps);
47
+ if (onPath) return onPath;
48
+ return firstExistingPath(
49
+ [
50
+ ...HERMES_ACP_FALLBACK_RELATIVE_PATHS.map((p) => resolveHomePath(p, deps)),
51
+ ...HERMES_ACP_FALLBACK_SYSTEM_PATHS,
52
+ ],
53
+ deps,
54
+ );
22
55
  }
23
56
 
24
57
  /** Probe whether `hermes-acp` is installed and report its version. */
@@ -29,6 +29,11 @@ export interface RuntimeModule {
29
29
  * config loader rejects routing turns to this adapter.
30
30
  */
31
31
  supportsRun?: boolean;
32
+ /**
33
+ * Short, single-line install hint shown by `doctor` when the runtime
34
+ * probes as unavailable. Helps users recover without reading source.
35
+ */
36
+ installHint?: string;
32
37
  }
33
38
 
34
39
  /** Built-in runtime module entry for Claude Code. */
@@ -58,6 +63,8 @@ export const hermesAgentModule: RuntimeModule = {
58
63
  envVar: "BOTCORD_HERMES_AGENT_BIN",
59
64
  probe: () => probeHermesAgent(),
60
65
  create: () => new HermesAgentAdapter(),
66
+ installHint:
67
+ 'Install: pip install "hermes-agent[acp]" (or set BOTCORD_HERMES_AGENT_BIN to the absolute path of hermes-acp)',
61
68
  };
62
69
 
63
70
  /** Built-in runtime module entry for Gemini (probe-only stub). */
@@ -143,6 +150,7 @@ export interface RuntimeProbeEntry {
143
150
  binary: string;
144
151
  supportsRun: boolean;
145
152
  result: RuntimeProbeResult;
153
+ installHint?: string;
146
154
  }
147
155
 
148
156
  /** Probe every registered runtime and report installation status. */
@@ -161,6 +169,7 @@ export function detectRuntimes(): RuntimeProbeEntry[] {
161
169
  binary: m.binary,
162
170
  supportsRun: m.supportsRun !== false,
163
171
  result,
172
+ installHint: m.installHint,
164
173
  });
165
174
  }
166
175
  return out;
@@ -84,27 +84,46 @@ export function mergeOpenclawGateways(
84
84
  found: DiscoveredOpenclawGateway[],
85
85
  ): MergeOpenclawGatewayResult {
86
86
  const existing = cfg.openclawGateways ?? [];
87
- const existingUrls = new Set(existing.map((g) => normalizeUrlKey(g.url)));
87
+ const byUrl = new Map<string, number>();
88
+ existing.forEach((g, i) => byUrl.set(normalizeUrlKey(g.url), i));
88
89
  const existingNames = new Set(existing.map((g) => g.name));
90
+ const merged = existing.map((g) => ({ ...g }));
89
91
  const added: OpenclawGatewayProfile[] = [];
92
+ let mutated = false;
90
93
 
91
94
  for (const item of found) {
92
95
  const key = normalizeUrlKey(item.url);
93
- if (existingUrls.has(key)) continue;
96
+ const idx = byUrl.get(key);
97
+ if (idx !== undefined) {
98
+ // Same URL already configured — only fill in auth that the user is
99
+ // missing, never overwrite an existing token / tokenFile.
100
+ const cur = merged[idx];
101
+ if (!cur.token && !cur.tokenFile) {
102
+ if (item.token) {
103
+ cur.token = item.token;
104
+ mutated = true;
105
+ } else if (item.tokenFile) {
106
+ cur.tokenFile = item.tokenFile;
107
+ mutated = true;
108
+ }
109
+ }
110
+ continue;
111
+ }
94
112
  const profile: OpenclawGatewayProfile = {
95
113
  name: uniqueName(item.name, existingNames),
96
114
  url: item.url,
97
115
  };
98
116
  if (item.token) profile.token = item.token;
99
117
  else if (item.tokenFile) profile.tokenFile = item.tokenFile;
100
- existingUrls.add(key);
118
+ byUrl.set(key, merged.length);
101
119
  existingNames.add(profile.name);
120
+ merged.push(profile);
102
121
  added.push(profile);
103
122
  }
104
123
 
105
- if (added.length === 0) return { cfg, changed: false, added };
124
+ if (added.length === 0 && !mutated) return { cfg, changed: false, added };
106
125
  return {
107
- cfg: { ...cfg, openclawGateways: [...existing, ...added] },
126
+ cfg: { ...cfg, openclawGateways: merged },
108
127
  changed: true,
109
128
  added,
110
129
  };
@@ -148,10 +167,34 @@ function discoverFromConfigDir(root: string): DiscoveredOpenclawGateway[] {
148
167
 
149
168
  function parseJsonConfig(raw: string): { url?: string; token?: string; tokenFile?: string } | null {
150
169
  const obj = JSON.parse(raw) as any;
170
+ // Prefer OpenClaw's native shape: `gateway.port` + `gateway.auth.token`.
171
+ // The legacy `acp.url` shape is also supported for explicit user-authored configs.
172
+ const native = pickOpenclawGatewayValues(obj?.gateway);
173
+ if (native) return native;
151
174
  const acp = obj?.acp ?? obj?.gateway?.acp ?? obj?.gateway ?? obj;
152
175
  return pickConfigValues(acp);
153
176
  }
154
177
 
178
+ function pickOpenclawGatewayValues(
179
+ gw: any,
180
+ ): { url?: string; token?: string; tokenFile?: string } | null {
181
+ if (!gw || typeof gw !== "object") return null;
182
+ const port = typeof gw.port === "number" ? gw.port : undefined;
183
+ if (!port) return null;
184
+ // Local discovery always targets the loopback interface, regardless of how
185
+ // the gateway is bound — the daemon is on the same machine.
186
+ const url = `ws://127.0.0.1:${port}`;
187
+ const auth = gw.auth;
188
+ const out: { url: string; token?: string; tokenFile?: string } = { url };
189
+ if (auth && typeof auth === "object" && auth.mode === "token") {
190
+ if (typeof auth.token === "string" && auth.token.trim()) out.token = auth.token.trim();
191
+ else if (typeof auth.tokenFile === "string" && auth.tokenFile.trim()) {
192
+ out.tokenFile = auth.tokenFile.trim();
193
+ }
194
+ }
195
+ return out;
196
+ }
197
+
155
198
  function parseTomlConfig(raw: string): { url?: string; token?: string; tokenFile?: string } | null {
156
199
  let inAcp = false;
157
200
  const values: Record<string, string> = {};
package/src/provision.ts CHANGED
@@ -4,7 +4,7 @@
4
4
  * side effects (register agent, write credentials, load route, add/remove
5
5
  * gateway channel) and return an ack payload.
6
6
  */
7
- import { existsSync, rmSync, unlinkSync } from "node:fs";
7
+ import { existsSync, readFileSync, rmSync, unlinkSync } from "node:fs";
8
8
  import { homedir } from "node:os";
9
9
  import path from "node:path";
10
10
  import {
@@ -957,16 +957,22 @@ export type WsEndpointProbeFn = (args: {
957
957
  }>;
958
958
 
959
959
  /**
960
- * Default L2 + L3 probe — opens a WS handshake against the OpenClaw gateway
961
- * and, when the connection is up, issues a JSON-RPC `agents.list` request to
962
- * enumerate configured agent profiles. Best-effort: a successful WS open with
963
- * a failed `agents.list` still reports `ok: true` (just without `agents`),
964
- * matching the RFC's "agents populated only when listing succeeded" rule.
960
+ * Default L2 + L3 probe — speaks OpenClaw's WS frame protocol against the
961
+ * gateway and enumerates agent profiles via `agents.list`.
965
962
  *
966
- * Method name and result shape follow OpenClaw:
967
- * `~/claws/openclaw/src/gateway/server-methods/agents.ts:416` and
968
- * `~/claws/openclaw/src/gateway/session-utils.ts:783`
969
- * `{ defaultId, mainKey, scope, agents: [{ id, name?, identity?, workspace, model? }] }`.
963
+ * Wire flow (see `~/claws/openclaw/src/gateway/server/ws-connection/message-handler.ts`
964
+ * and `~/claws/openclaw/src/gateway/protocol/schema/frames.ts`):
965
+ * 1. WS upgrade (no auth required at the HTTP layer).
966
+ * 2. Server emits `{type:"event", event:"connect.challenge", payload:{nonce}}`.
967
+ * 3. Client sends `{type:"req", id, method:"connect", params:{minProtocol, maxProtocol,
968
+ * client:{id:"openclaw-probe", mode:"probe", ...}, auth:{token}}}`.
969
+ * 4. Server responds `{type:"res", id, ok:true, payload:{type:"hello-ok", server:{version}, ...}}`.
970
+ * 5. Client sends `{type:"req", id, method:"agents.list", params:{}}`.
971
+ * 6. Server responds with `{payload: { defaultId, mainKey, scope, agents:[{id, name?, workspace?, model?}] }}`.
972
+ *
973
+ * Best-effort: a successful WS open with a failed handshake / `agents.list`
974
+ * still reports `ok: true` (just without `agents`), matching the RFC's
975
+ * "agents populated only when listing succeeded" rule.
970
976
  */
971
977
  async function defaultWsProbe(args: {
972
978
  url: string;
@@ -1000,6 +1006,9 @@ async function defaultWsProbe(args: {
1000
1006
  let settled = false;
1001
1007
  let ws: any;
1002
1008
  let timer: ReturnType<typeof setTimeout> | undefined;
1009
+ let serverVersion: string | undefined;
1010
+ const CONNECT_ID = "probe-connect";
1011
+ let connectSent = false;
1003
1012
  const settle = (v: ProbeResult): void => {
1004
1013
  if (settled) return;
1005
1014
  settled = true;
@@ -1013,6 +1022,8 @@ async function defaultWsProbe(args: {
1013
1022
  };
1014
1023
  try {
1015
1024
  const headers: Record<string, string> = {};
1025
+ // Some deployments gate the WS upgrade on Authorization too; harmless
1026
+ // when not enforced — auth is also re-asserted in the connect frame.
1016
1027
  if (args.token) headers["Authorization"] = `Bearer ${args.token}`;
1017
1028
  ws = new WebSocket(args.url, { headers });
1018
1029
  } catch (err) {
@@ -1020,58 +1031,75 @@ async function defaultWsProbe(args: {
1020
1031
  return;
1021
1032
  }
1022
1033
  timer = setTimeout(() => settle({ ok: false, error: "timeout" }), args.timeoutMs);
1023
- const requestId = "probe-agents-list";
1024
- ws.on("open", () => {
1025
- // L3: enumerate agent profiles. We don't fail the L2 result if this
1026
- // call fails — the gateway is reachable either way.
1034
+
1035
+ const sendConnect = (): void => {
1036
+ if (connectSent) return;
1037
+ connectSent = true;
1038
+ const params: any = {
1039
+ minProtocol: 3,
1040
+ maxProtocol: 3,
1041
+ client: {
1042
+ id: "openclaw-probe",
1043
+ version: "0.1.0",
1044
+ platform: process.platform || "node",
1045
+ mode: "probe",
1046
+ },
1047
+ role: "operator",
1048
+ scopes: ["operator.read"],
1049
+ };
1050
+ if (args.token) params.auth = { token: args.token };
1027
1051
  try {
1028
- ws.send(
1029
- JSON.stringify({
1030
- jsonrpc: "2.0",
1031
- id: requestId,
1032
- method: "agents.list",
1033
- params: {},
1034
- }),
1035
- );
1052
+ ws.send(JSON.stringify({ type: "req", id: CONNECT_ID, method: "connect", params }));
1036
1053
  } catch (err) {
1037
- settle({ ok: true, error: `agents.list send failed: ${(err as Error).message}` });
1054
+ settle({ ok: true, error: `connect send failed: ${(err as Error).message}` });
1038
1055
  }
1056
+ };
1057
+
1058
+ ws.on("open", () => {
1059
+ // Some servers send `connect.challenge` before the socket is fully
1060
+ // wired; if it never arrives we still try a best-effort connect after
1061
+ // a short delay so the probe doesn't stall on legacy gateways.
1062
+ setTimeout(() => {
1063
+ if (!connectSent && !settled) sendConnect();
1064
+ }, 250);
1039
1065
  });
1040
1066
  ws.on("message", (raw: Buffer | string) => {
1067
+ let msg: any;
1041
1068
  try {
1042
- const msg = JSON.parse(typeof raw === "string" ? raw : raw.toString("utf8"));
1043
- if (msg?.id !== requestId) return; // ignore unrelated frames
1044
- if (msg.error) {
1045
- settle({ ok: true, error: String(msg.error?.message ?? "agents.list error") });
1069
+ msg = JSON.parse(typeof raw === "string" ? raw : raw.toString("utf8"));
1070
+ } catch {
1071
+ return;
1072
+ }
1073
+ if (!msg || typeof msg !== "object") return;
1074
+ if (msg.type === "event" && msg.event === "connect.challenge") {
1075
+ // Nonce only matters for device-pairing flows; token-only auth ignores it.
1076
+ sendConnect();
1077
+ return;
1078
+ }
1079
+ if (msg.type !== "res" || typeof msg.id !== "string") return;
1080
+ if (msg.id === CONNECT_ID) {
1081
+ if (!msg.ok) {
1082
+ const errMsg = msg.error?.message ? String(msg.error.message) : "connect rejected";
1083
+ settle({ ok: true, error: errMsg });
1046
1084
  return;
1047
1085
  }
1048
- const list = Array.isArray(msg.result?.agents) ? msg.result.agents : [];
1049
- const agents: AgentRow[] = [];
1050
- for (const a of list) {
1051
- if (!a || typeof a.id !== "string" || a.id.length === 0) continue;
1052
- const row: AgentRow = { id: a.id };
1053
- if (typeof a.name === "string") row.name = a.name;
1054
- if (typeof a.workspace === "string") row.workspace = a.workspace;
1055
- if (a.model && typeof a.model === "object") {
1056
- const model: { name?: string; provider?: string } = {};
1057
- if (typeof a.model.name === "string") model.name = a.model.name;
1058
- if (typeof a.model.provider === "string") model.provider = a.model.provider;
1059
- if (model.name || model.provider) row.model = model;
1060
- }
1061
- agents.push(row);
1062
- }
1063
- settle({ ok: true, agents });
1064
- } catch (err) {
1065
- settle({ ok: true, error: `agents.list parse failed: ${(err as Error).message}` });
1086
+ const v = msg.payload?.server?.version;
1087
+ if (typeof v === "string" && v) serverVersion = v;
1088
+ // We don't fetch agents.list over the wire: it requires `operator.read`
1089
+ // which the gateway only grants to clients that present a paired device
1090
+ // identity (see message-handler.ts:478 self-declared scopes are
1091
+ // cleared without device pairing). For local OpenClaw the agent list
1092
+ // is sourced directly from disk by `probeOpenclawAgents`.
1093
+ settle({ ok: true, version: serverVersion });
1066
1094
  }
1067
1095
  });
1068
1096
  ws.on("error", (err: Error) => {
1069
1097
  settle({ ok: false, error: err.message });
1070
1098
  });
1071
1099
  ws.on("close", () => {
1072
- // If the socket closes before `agents.list` resolved we still treat
1073
- // L2 as ok (open fired) and emit no agents.
1074
- settle({ ok: true });
1100
+ // If the socket closes before we got our agents.list response, treat
1101
+ // L2 as ok (the upgrade succeeded) and emit no agents.
1102
+ settle({ ok: true, version: serverVersion });
1075
1103
  });
1076
1104
  });
1077
1105
  }
@@ -1097,11 +1125,69 @@ export async function probeOpenclawAgents(
1097
1125
  ...(profile.token ? { token: profile.token } : {}),
1098
1126
  ...(profile.tokenFile ? { tokenFile: profile.tokenFile } : {}),
1099
1127
  });
1100
- return probe({
1128
+ const result = await probe({
1101
1129
  url: profile.url,
1102
1130
  token: prepared.resolvedToken,
1103
1131
  timeoutMs: opts.timeoutMs ?? 3000,
1104
1132
  });
1133
+ // For loopback gateways the agent roster lives in `~/.openclaw/openclaw.json`
1134
+ // and is the source of truth — listing it over the wire would require a
1135
+ // paired device identity (operator.read scope). When the WS probe is the
1136
+ // default (i.e. no test injection) we enrich the result from disk.
1137
+ if (result.ok && !result.agents && !opts.probe && isLoopbackUrl(profile.url)) {
1138
+ const local = readLocalOpenclawAgents();
1139
+ if (local && local.length > 0) result.agents = local;
1140
+ }
1141
+ return result;
1142
+ }
1143
+
1144
+ function isLoopbackUrl(raw: string): boolean {
1145
+ try {
1146
+ const u = new URL(raw);
1147
+ return u.hostname === "127.0.0.1" || u.hostname === "::1" || u.hostname === "localhost";
1148
+ } catch {
1149
+ return false;
1150
+ }
1151
+ }
1152
+
1153
+ function readLocalOpenclawAgents(): Array<{
1154
+ id: string;
1155
+ name?: string;
1156
+ workspace?: string;
1157
+ model?: { name?: string; provider?: string };
1158
+ }> | null {
1159
+ try {
1160
+ const file = path.join(homedir(), ".openclaw", "openclaw.json");
1161
+ if (!existsSync(file)) return null;
1162
+ const cfg = JSON.parse(readFileSync(file, "utf8")) as any;
1163
+ const list = Array.isArray(cfg?.agents?.list) ? cfg.agents.list : [];
1164
+ const defaultId = typeof cfg?.agents?.defaults?.id === "string" ? cfg.agents.defaults.id : "default";
1165
+ const seen = new Set<string>();
1166
+ const out: Array<{ id: string; name?: string; workspace?: string; model?: { name?: string; provider?: string } }> = [];
1167
+ const push = (raw: any, fallbackId?: string): void => {
1168
+ const id = typeof raw?.id === "string" && raw.id ? raw.id : fallbackId;
1169
+ if (!id || seen.has(id)) return;
1170
+ seen.add(id);
1171
+ const row: { id: string; name?: string; workspace?: string; model?: { name?: string; provider?: string } } = { id };
1172
+ if (typeof raw?.name === "string") row.name = raw.name;
1173
+ if (typeof raw?.workspace === "string") row.workspace = raw.workspace;
1174
+ const m = raw?.model;
1175
+ if (m && typeof m === "object") {
1176
+ const model: { name?: string; provider?: string } = {};
1177
+ if (typeof m.primary === "string") model.name = m.primary;
1178
+ else if (typeof m.name === "string") model.name = m.name;
1179
+ if (typeof m.provider === "string") model.provider = m.provider;
1180
+ if (model.name || model.provider) row.model = model;
1181
+ }
1182
+ out.push(row);
1183
+ };
1184
+ // Default agent first so it surfaces at the top of the dropdown.
1185
+ push({ id: defaultId, workspace: cfg?.agents?.defaults?.workspace, model: cfg?.agents?.defaults?.model }, defaultId);
1186
+ for (const entry of list) push(entry);
1187
+ return out;
1188
+ } catch {
1189
+ return null;
1190
+ }
1105
1191
  }
1106
1192
 
1107
1193
  /**
package/src/user-auth.ts CHANGED
@@ -188,6 +188,20 @@ export function isTokenNearExpiry(record: UserAuthRecord, windowMs = 60_000): bo
188
188
  return record.expiresAt - Date.now() <= windowMs;
189
189
  }
190
190
 
191
+ /**
192
+ * Thrown when the Hub rejects a refresh token (401/403). Signals that the
193
+ * user must re-login — reconnect loops should stop instead of hammering
194
+ * the refresh endpoint forever with a known-bad token.
195
+ */
196
+ export class AuthRefreshRejectedError extends Error {
197
+ readonly status: number;
198
+ constructor(status: number, message: string) {
199
+ super(message);
200
+ this.name = "AuthRefreshRejectedError";
201
+ this.status = status;
202
+ }
203
+ }
204
+
191
205
  /**
192
206
  * Stateful helper that owns the in-memory copy of user-auth and knows how
193
207
  * to refresh it. Used by the control channel so reconnects always carry
@@ -245,13 +259,35 @@ export class UserAuthManager {
245
259
  expiresInMs: current.expiresAt - Date.now(),
246
260
  });
247
261
  this.refreshInflight = (async () => {
248
- const tok = await refreshDaemonToken(current.hubUrl, current.refreshToken);
262
+ // Refresh tokens rotate server-side. If another local process (e.g. a
263
+ // second daemon racing on the same user-auth.json) refreshed in the
264
+ // meantime, the on-disk refreshToken now differs from our in-memory
265
+ // copy — using the in-memory one would 401 because the server already
266
+ // invalidated it. Re-read disk first and adopt any newer record.
267
+ let basis = current;
268
+ try {
269
+ const onDisk = loadUserAuth(this.file);
270
+ if (onDisk && onDisk.refreshToken !== current.refreshToken) {
271
+ daemonLog.info("user-auth refresh: adopting newer on-disk token", {
272
+ userId: onDisk.userId,
273
+ expiresAt: onDisk.expiresAt,
274
+ });
275
+ this.record = onDisk;
276
+ if (!isTokenNearExpiry(onDisk)) return onDisk;
277
+ basis = onDisk;
278
+ }
279
+ } catch (err) {
280
+ daemonLog.debug("user-auth refresh: disk reread failed (ignored)", {
281
+ error: err instanceof Error ? err.message : String(err),
282
+ });
283
+ }
284
+ const tok = await refreshDaemonToken(basis.hubUrl, basis.refreshToken);
249
285
  const next: UserAuthRecord = {
250
- ...current,
286
+ ...basis,
251
287
  accessToken: tok.accessToken,
252
288
  refreshToken: tok.refreshToken,
253
289
  expiresAt: Date.now() + tok.expiresIn * 1000,
254
- hubUrl: tok.hubUrl || current.hubUrl,
290
+ hubUrl: tok.hubUrl || basis.hubUrl,
255
291
  };
256
292
  saveUserAuth(next, this.file);
257
293
  this.record = next;
@@ -261,10 +297,23 @@ export class UserAuthManager {
261
297
  });
262
298
  return next;
263
299
  })().catch((err) => {
300
+ const status =
301
+ typeof (err as { status?: unknown }).status === "number"
302
+ ? ((err as { status: number }).status)
303
+ : null;
304
+ const message = err instanceof Error ? err.message : String(err);
264
305
  daemonLog.warn("user-auth refresh: failed", {
265
306
  userId: current.userId,
266
- error: err instanceof Error ? err.message : String(err),
307
+ status,
308
+ error: message,
267
309
  });
310
+ if (status === 401 || status === 403) {
311
+ // Refresh token is permanently dead — write the expired flag so
312
+ // `status` surfaces it and re-throw a typed error so the control
313
+ // channel can stop reconnect loops instead of hammering the Hub.
314
+ writeAuthExpiredFlag();
315
+ throw new AuthRefreshRejectedError(status, message);
316
+ }
268
317
  throw err;
269
318
  }).finally(() => {
270
319
  this.refreshInflight = null;