@botcord/daemon 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/control-channel.js +48 -4
- package/dist/doctor.js +3 -0
- package/dist/gateway/runtimes/hermes-agent.d.ts +5 -1
- package/dist/gateway/runtimes/hermes-agent.js +27 -3
- package/dist/gateway/runtimes/registry.d.ts +6 -0
- package/dist/gateway/runtimes/registry.js +2 -0
- package/dist/provision.d.ts +7 -3
- package/dist/provision.js +26 -5
- package/dist/user-auth.d.ts +9 -0
- package/dist/user-auth.js +53 -4
- package/package.json +1 -1
- package/src/__tests__/runtime-discovery.test.ts +8 -2
- package/src/control-channel.ts +49 -3
- package/src/doctor.ts +3 -0
- package/src/gateway/__tests__/hermes-agent-adapter.test.ts +29 -1
- package/src/gateway/runtimes/hermes-agent.ts +36 -3
- package/src/gateway/runtimes/registry.ts +9 -0
- package/src/provision.ts +31 -5
- package/src/user-auth.ts +53 -4
package/dist/control-channel.js
CHANGED
|
@@ -9,10 +9,17 @@
|
|
|
9
9
|
import WebSocket from "ws";
|
|
10
10
|
import { buildDaemonWebSocketUrl, CONTROL_FRAME_TYPES, jcsCanonicalize, resolveHubControlPublicKey, verifyEd25519, } from "@botcord/protocol-core";
|
|
11
11
|
import { log as daemonLog } from "./log.js";
|
|
12
|
-
import { writeAuthExpiredFlag, } from "./user-auth.js";
|
|
12
|
+
import { AuthRefreshRejectedError, writeAuthExpiredFlag, } from "./user-auth.js";
|
|
13
13
|
/** Exponential backoff plan for transient disconnects. */
|
|
14
14
|
const RECONNECT_BACKOFF_MS = [1000, 2000, 4000, 8000, 16000, 30000];
|
|
15
|
-
|
|
15
|
+
/**
|
|
16
|
+
* Keepalive cadence. Has to stay below the smallest idle-timeout in any
|
|
17
|
+
* intermediary on the daemon → Hub WS path. Cloudflare and AWS ALB both
|
|
18
|
+
* default to ~60s of idle without app-level data, and some tunnels strip
|
|
19
|
+
* WS-level ping/pong control frames entirely — hence we send an app-level
|
|
20
|
+
* `pong` heartbeat alongside `ws.ping()` rather than relying on it alone.
|
|
21
|
+
*/
|
|
22
|
+
const KEEPALIVE_INTERVAL_MS = 20_000;
|
|
16
23
|
const REPLAY_DEDUPE_CAP = 256;
|
|
17
24
|
/**
|
|
18
25
|
* Build the canonical signing input for a control frame: RFC 8785 (JCS)
|
|
@@ -91,8 +98,18 @@ export class ControlChannel {
|
|
|
91
98
|
});
|
|
92
99
|
this.connectInflight = this.connect().catch((err) => {
|
|
93
100
|
// Initial connect failure surfaces to the caller; subsequent
|
|
94
|
-
// reconnects are handled opaquely inside onClose.
|
|
95
|
-
|
|
101
|
+
// reconnects are handled opaquely inside onClose. A refresh-rejected
|
|
102
|
+
// error means the refresh token itself is dead — no point retrying;
|
|
103
|
+
// writeAuthExpiredFlag was already called in user-auth.refresh().
|
|
104
|
+
if (err instanceof AuthRefreshRejectedError) {
|
|
105
|
+
this.stopRequested = true;
|
|
106
|
+
daemonLog.warn("control-channel: refresh rejected; stopping (re-login required)", {
|
|
107
|
+
status: err.status,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
this.scheduleReconnect(err);
|
|
112
|
+
}
|
|
96
113
|
throw err;
|
|
97
114
|
});
|
|
98
115
|
try {
|
|
@@ -188,12 +205,24 @@ export class ControlChannel {
|
|
|
188
205
|
const ws = this.ws;
|
|
189
206
|
if (!ws || ws.readyState !== WebSocket.OPEN)
|
|
190
207
|
return;
|
|
208
|
+
// WS-level ping for normal cases.
|
|
191
209
|
try {
|
|
192
210
|
ws.ping();
|
|
193
211
|
}
|
|
194
212
|
catch {
|
|
195
213
|
// ignore — next failed send will trigger close
|
|
196
214
|
}
|
|
215
|
+
// App-level heartbeat: a `pong` daemon-initiated frame. Hub recognizes
|
|
216
|
+
// it via `_DAEMON_INITIATED_TYPES` and bumps `last_seen_at`. Critical
|
|
217
|
+
// when an intermediary (Cloudflare, AWS ALB, some k8s ingresses)
|
|
218
|
+
// drops WS-level control frames — those proxies idle-close the WS at
|
|
219
|
+
// ~60s without app-level activity, masquerading as a clean 1006 to
|
|
220
|
+
// both peers.
|
|
221
|
+
this.send({
|
|
222
|
+
id: `hb_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
223
|
+
type: "pong",
|
|
224
|
+
ts: Date.now(),
|
|
225
|
+
});
|
|
197
226
|
}, this.keepaliveMs);
|
|
198
227
|
}
|
|
199
228
|
stopKeepalive() {
|
|
@@ -223,6 +252,13 @@ export class ControlChannel {
|
|
|
223
252
|
scheduleReconnect(err) {
|
|
224
253
|
if (this.stopRequested)
|
|
225
254
|
return;
|
|
255
|
+
if (err instanceof AuthRefreshRejectedError) {
|
|
256
|
+
this.stopRequested = true;
|
|
257
|
+
daemonLog.warn("control-channel: refresh rejected; halting reconnect (re-login required)", {
|
|
258
|
+
status: err.status,
|
|
259
|
+
});
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
226
262
|
const attempt = this.reconnectAttempts;
|
|
227
263
|
this.reconnectAttempts = attempt + 1;
|
|
228
264
|
const delay = this.backoff[Math.min(attempt, this.backoff.length - 1)];
|
|
@@ -254,6 +290,14 @@ export class ControlChannel {
|
|
|
254
290
|
return;
|
|
255
291
|
}
|
|
256
292
|
if (!frame || typeof frame.id !== "string" || typeof frame.type !== "string") {
|
|
293
|
+
// Hub ack responses for daemon-initiated frames (runtime_snapshot push,
|
|
294
|
+
// heartbeat, etc.) carry `{id, ok}` and no `type`. They're expected,
|
|
295
|
+
// not malformed — drop silently. Anything else stays a warn.
|
|
296
|
+
if (frame &&
|
|
297
|
+
typeof frame.id === "string" &&
|
|
298
|
+
typeof frame.ok === "boolean") {
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
257
301
|
daemonLog.warn("control-channel: malformed frame", { frame });
|
|
258
302
|
return;
|
|
259
303
|
}
|
package/dist/doctor.js
CHANGED
|
@@ -156,6 +156,9 @@ export function renderDoctor(input) {
|
|
|
156
156
|
const r = rows[i];
|
|
157
157
|
const e = input.runtimes[i];
|
|
158
158
|
lines.push(`${pad(r.runtime, widths.runtime)} ${pad(r.name, widths.name)} ${pad(r.status, widths.status)} ${pad(r.version, widths.version)} ${r.path}`);
|
|
159
|
+
if (!e.result.available && e.installHint) {
|
|
160
|
+
lines.push(` → ${e.installHint}`);
|
|
161
|
+
}
|
|
159
162
|
if (e.endpoints && e.endpoints.length > 0) {
|
|
160
163
|
for (const ep of e.endpoints) {
|
|
161
164
|
const mark = ep.reachable ? "✓" : "✗";
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { AcpRuntimeAdapter, type AcpPermissionRequest, type AcpPermissionResponse, type AcpUpdateCtx, type AcpUpdateParams } from "./acp-stream.js";
|
|
2
2
|
import { type ProbeDeps } from "./probe.js";
|
|
3
3
|
import type { RuntimeProbeResult, RuntimeRunOptions } from "../types.js";
|
|
4
|
-
/**
|
|
4
|
+
/**
|
|
5
|
+
* Resolve the `hermes-acp` executable. Tries PATH first, then falls back to
|
|
6
|
+
* the upstream install.sh's private venv location (`~/.hermes/...`) before
|
|
7
|
+
* giving up. `BOTCORD_HERMES_AGENT_BIN` always wins via the adapter override.
|
|
8
|
+
*/
|
|
5
9
|
export declare function resolveHermesAcpCommand(deps?: ProbeDeps): string | null;
|
|
6
10
|
/** Probe whether `hermes-acp` is installed and report its version. */
|
|
7
11
|
export declare function probeHermesAgent(deps?: ProbeDeps): RuntimeProbeResult;
|
|
@@ -3,10 +3,34 @@ import path from "node:path";
|
|
|
3
3
|
import { agentHermesHomeDir, agentHermesWorkspaceDir, ensureAgentHermesWorkspace, } from "../../agent-workspace.js";
|
|
4
4
|
import { buildCliEnv } from "../cli-resolver.js";
|
|
5
5
|
import { AcpRuntimeAdapter, } from "./acp-stream.js";
|
|
6
|
-
import { readCommandVersion, resolveCommandOnPath } from "./probe.js";
|
|
7
|
-
/**
|
|
6
|
+
import { firstExistingPath, readCommandVersion, resolveCommandOnPath, resolveHomePath, } from "./probe.js";
|
|
7
|
+
/**
|
|
8
|
+
* Known absolute locations of the `hermes-acp` entry point when it is not on
|
|
9
|
+
* PATH. The upstream `scripts/install.sh` (curl|bash installer) installs a
|
|
10
|
+
* private virtualenv under `~/.hermes/hermes-agent/venv/` and only symlinks
|
|
11
|
+
* the user-facing `hermes` command into `~/.local/bin/` — the `hermes-acp`
|
|
12
|
+
* entry point stays inside the venv. Without a fallback, daemon's PATH-only
|
|
13
|
+
* probe misses every user who installed via the README-recommended script.
|
|
14
|
+
*/
|
|
15
|
+
const HERMES_ACP_FALLBACK_RELATIVE_PATHS = [
|
|
16
|
+
path.join(".hermes", "hermes-agent", "venv", "bin", "hermes-acp"),
|
|
17
|
+
];
|
|
18
|
+
const HERMES_ACP_FALLBACK_SYSTEM_PATHS = [
|
|
19
|
+
"/opt/hermes/hermes-agent/venv/bin/hermes-acp",
|
|
20
|
+
];
|
|
21
|
+
/**
|
|
22
|
+
* Resolve the `hermes-acp` executable. Tries PATH first, then falls back to
|
|
23
|
+
* the upstream install.sh's private venv location (`~/.hermes/...`) before
|
|
24
|
+
* giving up. `BOTCORD_HERMES_AGENT_BIN` always wins via the adapter override.
|
|
25
|
+
*/
|
|
8
26
|
export function resolveHermesAcpCommand(deps = {}) {
|
|
9
|
-
|
|
27
|
+
const onPath = resolveCommandOnPath("hermes-acp", deps);
|
|
28
|
+
if (onPath)
|
|
29
|
+
return onPath;
|
|
30
|
+
return firstExistingPath([
|
|
31
|
+
...HERMES_ACP_FALLBACK_RELATIVE_PATHS.map((p) => resolveHomePath(p, deps)),
|
|
32
|
+
...HERMES_ACP_FALLBACK_SYSTEM_PATHS,
|
|
33
|
+
], deps);
|
|
10
34
|
}
|
|
11
35
|
/** Probe whether `hermes-acp` is installed and report its version. */
|
|
12
36
|
export function probeHermesAgent(deps = {}) {
|
|
@@ -23,6 +23,11 @@ export interface RuntimeModule {
|
|
|
23
23
|
* config loader rejects routing turns to this adapter.
|
|
24
24
|
*/
|
|
25
25
|
supportsRun?: boolean;
|
|
26
|
+
/**
|
|
27
|
+
* Short, single-line install hint shown by `doctor` when the runtime
|
|
28
|
+
* probes as unavailable. Helps users recover without reading source.
|
|
29
|
+
*/
|
|
30
|
+
installHint?: string;
|
|
26
31
|
}
|
|
27
32
|
/** Built-in runtime module entry for Claude Code. */
|
|
28
33
|
export declare const claudeCodeModule: RuntimeModule;
|
|
@@ -58,6 +63,7 @@ export interface RuntimeProbeEntry {
|
|
|
58
63
|
binary: string;
|
|
59
64
|
supportsRun: boolean;
|
|
60
65
|
result: RuntimeProbeResult;
|
|
66
|
+
installHint?: string;
|
|
61
67
|
}
|
|
62
68
|
/** Probe every registered runtime and report installation status. */
|
|
63
69
|
export declare function detectRuntimes(): RuntimeProbeEntry[];
|
|
@@ -28,6 +28,7 @@ export const hermesAgentModule = {
|
|
|
28
28
|
envVar: "BOTCORD_HERMES_AGENT_BIN",
|
|
29
29
|
probe: () => probeHermesAgent(),
|
|
30
30
|
create: () => new HermesAgentAdapter(),
|
|
31
|
+
installHint: 'Install: pip install "hermes-agent[acp]" (or set BOTCORD_HERMES_AGENT_BIN to the absolute path of hermes-acp)',
|
|
31
32
|
};
|
|
32
33
|
/** Built-in runtime module entry for Gemini (probe-only stub). */
|
|
33
34
|
export const geminiModule = {
|
|
@@ -110,6 +111,7 @@ export function detectRuntimes() {
|
|
|
110
111
|
binary: m.binary,
|
|
111
112
|
supportsRun: m.supportsRun !== false,
|
|
112
113
|
result,
|
|
114
|
+
installHint: m.installHint,
|
|
113
115
|
});
|
|
114
116
|
}
|
|
115
117
|
return out;
|
package/dist/provision.d.ts
CHANGED
|
@@ -56,15 +56,19 @@ export declare function adoptDiscoveredOpenclawAgents(ctx: {
|
|
|
56
56
|
export declare function addAgentToConfig(cfg: DaemonConfig, agentId: string): DaemonConfig | null;
|
|
57
57
|
/** Inverse of {@link addAgentToConfig}. Returns `null` on no-op. */
|
|
58
58
|
export declare function removeAgentFromConfig(cfg: DaemonConfig, agentId: string): DaemonConfig | null;
|
|
59
|
+
/** Drop the cache (e.g. before a `doctor`-style interactive re-probe). */
|
|
60
|
+
export declare function clearRuntimeProbeCache(): void;
|
|
59
61
|
/**
|
|
60
62
|
* Probe every registered adapter and shape the result as the wire-level
|
|
61
63
|
* {@link ListRuntimesResult} — used by both the `list_runtimes` ack path and
|
|
62
64
|
* the daemon-side first-connect `runtime_snapshot` push in `daemon.ts`.
|
|
63
65
|
*
|
|
64
|
-
*
|
|
65
|
-
*
|
|
66
|
+
* Cached for {@link RUNTIME_PROBE_CACHE_TTL_MS}; pass `{ force: true }` to
|
|
67
|
+
* bypass the cache.
|
|
66
68
|
*/
|
|
67
|
-
export declare function collectRuntimeSnapshot(
|
|
69
|
+
export declare function collectRuntimeSnapshot(opts?: {
|
|
70
|
+
force?: boolean;
|
|
71
|
+
}): ListRuntimesResult;
|
|
68
72
|
/** Maximum number of `endpoints[]` entries persisted per runtime (RFC §3.8.2). */
|
|
69
73
|
export declare const RUNTIME_ENDPOINTS_CAP = 32;
|
|
70
74
|
/** Injection seam for L2 + L3 endpoint probes — kept testable + side-effect-free. */
|
package/dist/provision.js
CHANGED
|
@@ -768,15 +768,34 @@ export function removeAgentFromConfig(cfg, agentId) {
|
|
|
768
768
|
// ---------------------------------------------------------------------------
|
|
769
769
|
// runtime-discovery snapshot (plan §8.5)
|
|
770
770
|
// ---------------------------------------------------------------------------
|
|
771
|
+
/**
|
|
772
|
+
* TTL for the L1 runtime-detection cache. `detectRuntimes()` shells out to
|
|
773
|
+
* each adapter binary (claude / codex / gemini / openclaw / hermes) to read
|
|
774
|
+
* `--version`, which routinely costs 1.5–2s in aggregate — long enough to
|
|
775
|
+
* push `list_runtimes` past the Hub's 10s ack budget when combined with the
|
|
776
|
+
* 3s openclaw gateway probe. Versions don't change between dashboard refresh
|
|
777
|
+
* clicks, so cache the L1 snapshot briefly and recompute on miss.
|
|
778
|
+
*/
|
|
779
|
+
const RUNTIME_PROBE_CACHE_TTL_MS = 30_000;
|
|
780
|
+
let _runtimeProbeCache = null;
|
|
781
|
+
/** Drop the cache (e.g. before a `doctor`-style interactive re-probe). */
|
|
782
|
+
export function clearRuntimeProbeCache() {
|
|
783
|
+
_runtimeProbeCache = null;
|
|
784
|
+
}
|
|
771
785
|
/**
|
|
772
786
|
* Probe every registered adapter and shape the result as the wire-level
|
|
773
787
|
* {@link ListRuntimesResult} — used by both the `list_runtimes` ack path and
|
|
774
788
|
* the daemon-side first-connect `runtime_snapshot` push in `daemon.ts`.
|
|
775
789
|
*
|
|
776
|
-
*
|
|
777
|
-
*
|
|
790
|
+
* Cached for {@link RUNTIME_PROBE_CACHE_TTL_MS}; pass `{ force: true }` to
|
|
791
|
+
* bypass the cache.
|
|
778
792
|
*/
|
|
779
|
-
export function collectRuntimeSnapshot() {
|
|
793
|
+
export function collectRuntimeSnapshot(opts = {}) {
|
|
794
|
+
if (!opts.force &&
|
|
795
|
+
_runtimeProbeCache &&
|
|
796
|
+
Date.now() - _runtimeProbeCache.at < RUNTIME_PROBE_CACHE_TTL_MS) {
|
|
797
|
+
return _runtimeProbeCache.value;
|
|
798
|
+
}
|
|
780
799
|
const entries = detectRuntimes();
|
|
781
800
|
const runtimes = entries.map((entry) => {
|
|
782
801
|
const record = {
|
|
@@ -796,7 +815,9 @@ export function collectRuntimeSnapshot() {
|
|
|
796
815
|
// enough; filling a synthetic message would be misleading.
|
|
797
816
|
return record;
|
|
798
817
|
});
|
|
799
|
-
|
|
818
|
+
const value = { runtimes, probedAt: Date.now() };
|
|
819
|
+
_runtimeProbeCache = { at: Date.now(), value };
|
|
820
|
+
return value;
|
|
800
821
|
}
|
|
801
822
|
/** Maximum number of `endpoints[]` entries persisted per runtime (RFC §3.8.2). */
|
|
802
823
|
export const RUNTIME_ENDPOINTS_CAP = 32;
|
|
@@ -1024,7 +1045,7 @@ export async function collectRuntimeSnapshotAsync(opts = {}) {
|
|
|
1024
1045
|
if (gateways.length === 0)
|
|
1025
1046
|
return base;
|
|
1026
1047
|
// Default daemon-side budget is 3s — it must stay below the Hub's
|
|
1027
|
-
// `list_runtimes` ack wait (
|
|
1048
|
+
// `list_runtimes` ack wait (10s, see backend/hub/routers/daemon_control.py)
|
|
1028
1049
|
// so a single slow gateway can't blow the whole snapshot to a 504.
|
|
1029
1050
|
const timeoutMs = opts.timeoutMs ?? 3000;
|
|
1030
1051
|
const capped = gateways.slice(0, RUNTIME_ENDPOINTS_CAP);
|
package/dist/user-auth.d.ts
CHANGED
|
@@ -40,6 +40,15 @@ export declare function writeAuthExpiredFlag(file?: string): void;
|
|
|
40
40
|
export declare function clearAuthExpiredFlag(file?: string): void;
|
|
41
41
|
/** Returns true if the stored access token is within `windowMs` of expiry. */
|
|
42
42
|
export declare function isTokenNearExpiry(record: UserAuthRecord, windowMs?: number): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Thrown when the Hub rejects a refresh token (401/403). Signals that the
|
|
45
|
+
* user must re-login — reconnect loops should stop instead of hammering
|
|
46
|
+
* the refresh endpoint forever with a known-bad token.
|
|
47
|
+
*/
|
|
48
|
+
export declare class AuthRefreshRejectedError extends Error {
|
|
49
|
+
readonly status: number;
|
|
50
|
+
constructor(status: number, message: string);
|
|
51
|
+
}
|
|
43
52
|
/**
|
|
44
53
|
* Stateful helper that owns the in-memory copy of user-auth and knows how
|
|
45
54
|
* to refresh it. Used by the control channel so reconnects always carry
|
package/dist/user-auth.js
CHANGED
|
@@ -144,6 +144,19 @@ export function clearAuthExpiredFlag(file = AUTH_EXPIRED_FLAG_PATH) {
|
|
|
144
144
|
export function isTokenNearExpiry(record, windowMs = 60_000) {
|
|
145
145
|
return record.expiresAt - Date.now() <= windowMs;
|
|
146
146
|
}
|
|
147
|
+
/**
|
|
148
|
+
* Thrown when the Hub rejects a refresh token (401/403). Signals that the
|
|
149
|
+
* user must re-login — reconnect loops should stop instead of hammering
|
|
150
|
+
* the refresh endpoint forever with a known-bad token.
|
|
151
|
+
*/
|
|
152
|
+
export class AuthRefreshRejectedError extends Error {
|
|
153
|
+
status;
|
|
154
|
+
constructor(status, message) {
|
|
155
|
+
super(message);
|
|
156
|
+
this.name = "AuthRefreshRejectedError";
|
|
157
|
+
this.status = status;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
147
160
|
/**
|
|
148
161
|
* Stateful helper that owns the in-memory copy of user-auth and knows how
|
|
149
162
|
* to refresh it. Used by the control channel so reconnects always carry
|
|
@@ -197,13 +210,37 @@ export class UserAuthManager {
|
|
|
197
210
|
expiresInMs: current.expiresAt - Date.now(),
|
|
198
211
|
});
|
|
199
212
|
this.refreshInflight = (async () => {
|
|
200
|
-
|
|
213
|
+
// Refresh tokens rotate server-side. If another local process (e.g. a
|
|
214
|
+
// second daemon racing on the same user-auth.json) refreshed in the
|
|
215
|
+
// meantime, the on-disk refreshToken now differs from our in-memory
|
|
216
|
+
// copy — using the in-memory one would 401 because the server already
|
|
217
|
+
// invalidated it. Re-read disk first and adopt any newer record.
|
|
218
|
+
let basis = current;
|
|
219
|
+
try {
|
|
220
|
+
const onDisk = loadUserAuth(this.file);
|
|
221
|
+
if (onDisk && onDisk.refreshToken !== current.refreshToken) {
|
|
222
|
+
daemonLog.info("user-auth refresh: adopting newer on-disk token", {
|
|
223
|
+
userId: onDisk.userId,
|
|
224
|
+
expiresAt: onDisk.expiresAt,
|
|
225
|
+
});
|
|
226
|
+
this.record = onDisk;
|
|
227
|
+
if (!isTokenNearExpiry(onDisk))
|
|
228
|
+
return onDisk;
|
|
229
|
+
basis = onDisk;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
daemonLog.debug("user-auth refresh: disk reread failed (ignored)", {
|
|
234
|
+
error: err instanceof Error ? err.message : String(err),
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
const tok = await refreshDaemonToken(basis.hubUrl, basis.refreshToken);
|
|
201
238
|
const next = {
|
|
202
|
-
...
|
|
239
|
+
...basis,
|
|
203
240
|
accessToken: tok.accessToken,
|
|
204
241
|
refreshToken: tok.refreshToken,
|
|
205
242
|
expiresAt: Date.now() + tok.expiresIn * 1000,
|
|
206
|
-
hubUrl: tok.hubUrl ||
|
|
243
|
+
hubUrl: tok.hubUrl || basis.hubUrl,
|
|
207
244
|
};
|
|
208
245
|
saveUserAuth(next, this.file);
|
|
209
246
|
this.record = next;
|
|
@@ -213,10 +250,22 @@ export class UserAuthManager {
|
|
|
213
250
|
});
|
|
214
251
|
return next;
|
|
215
252
|
})().catch((err) => {
|
|
253
|
+
const status = typeof err.status === "number"
|
|
254
|
+
? (err.status)
|
|
255
|
+
: null;
|
|
256
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
216
257
|
daemonLog.warn("user-auth refresh: failed", {
|
|
217
258
|
userId: current.userId,
|
|
218
|
-
|
|
259
|
+
status,
|
|
260
|
+
error: message,
|
|
219
261
|
});
|
|
262
|
+
if (status === 401 || status === 403) {
|
|
263
|
+
// Refresh token is permanently dead — write the expired flag so
|
|
264
|
+
// `status` surfaces it and re-throw a typed error so the control
|
|
265
|
+
// channel can stop reconnect loops instead of hammering the Hub.
|
|
266
|
+
writeAuthExpiredFlag();
|
|
267
|
+
throw new AuthRefreshRejectedError(status, message);
|
|
268
|
+
}
|
|
220
269
|
throw err;
|
|
221
270
|
}).finally(() => {
|
|
222
271
|
this.refreshInflight = null;
|
package/package.json
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { describe, expect, it, vi } from "vitest";
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
2
|
|
|
3
3
|
// Hoisted mock for `../adapters/runtimes.js` so each suite can stub
|
|
4
4
|
// `detectRuntimes()` independently — we want coverage of the "empty
|
|
@@ -24,7 +24,13 @@ vi.mock("../adapters/runtimes.js", async () => {
|
|
|
24
24
|
};
|
|
25
25
|
});
|
|
26
26
|
|
|
27
|
-
const { collectRuntimeSnapshot, createProvisioner } = await import("../provision.js");
|
|
27
|
+
const { collectRuntimeSnapshot, clearRuntimeProbeCache, createProvisioner } = await import("../provision.js");
|
|
28
|
+
|
|
29
|
+
beforeEach(() => {
|
|
30
|
+
// The L1 probe is memoized for 30s in production; tests rotate the
|
|
31
|
+
// mocked runtime list between cases, so reset before each.
|
|
32
|
+
clearRuntimeProbeCache();
|
|
33
|
+
});
|
|
28
34
|
const { pushRuntimeSnapshot } = await import("../daemon.js");
|
|
29
35
|
const { CONTROL_FRAME_TYPES } = await import("@botcord/protocol-core");
|
|
30
36
|
import type { GatewayChannelConfig, GatewayRuntimeSnapshot } from "../gateway/index.js";
|
package/src/control-channel.ts
CHANGED
|
@@ -18,13 +18,21 @@ import {
|
|
|
18
18
|
} from "@botcord/protocol-core";
|
|
19
19
|
import { log as daemonLog } from "./log.js";
|
|
20
20
|
import {
|
|
21
|
+
AuthRefreshRejectedError,
|
|
21
22
|
writeAuthExpiredFlag,
|
|
22
23
|
type UserAuthManager,
|
|
23
24
|
} from "./user-auth.js";
|
|
24
25
|
|
|
25
26
|
/** Exponential backoff plan for transient disconnects. */
|
|
26
27
|
const RECONNECT_BACKOFF_MS = [1000, 2000, 4000, 8000, 16000, 30000];
|
|
27
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Keepalive cadence. Has to stay below the smallest idle-timeout in any
|
|
30
|
+
* intermediary on the daemon → Hub WS path. Cloudflare and AWS ALB both
|
|
31
|
+
* default to ~60s of idle without app-level data, and some tunnels strip
|
|
32
|
+
* WS-level ping/pong control frames entirely — hence we send an app-level
|
|
33
|
+
* `pong` heartbeat alongside `ws.ping()` rather than relying on it alone.
|
|
34
|
+
*/
|
|
35
|
+
const KEEPALIVE_INTERVAL_MS = 20_000;
|
|
28
36
|
const REPLAY_DEDUPE_CAP = 256;
|
|
29
37
|
|
|
30
38
|
/**
|
|
@@ -142,8 +150,17 @@ export class ControlChannel {
|
|
|
142
150
|
});
|
|
143
151
|
this.connectInflight = this.connect().catch((err) => {
|
|
144
152
|
// Initial connect failure surfaces to the caller; subsequent
|
|
145
|
-
// reconnects are handled opaquely inside onClose.
|
|
146
|
-
|
|
153
|
+
// reconnects are handled opaquely inside onClose. A refresh-rejected
|
|
154
|
+
// error means the refresh token itself is dead — no point retrying;
|
|
155
|
+
// writeAuthExpiredFlag was already called in user-auth.refresh().
|
|
156
|
+
if (err instanceof AuthRefreshRejectedError) {
|
|
157
|
+
this.stopRequested = true;
|
|
158
|
+
daemonLog.warn("control-channel: refresh rejected; stopping (re-login required)", {
|
|
159
|
+
status: err.status,
|
|
160
|
+
});
|
|
161
|
+
} else {
|
|
162
|
+
this.scheduleReconnect(err);
|
|
163
|
+
}
|
|
147
164
|
throw err;
|
|
148
165
|
});
|
|
149
166
|
try {
|
|
@@ -248,11 +265,23 @@ export class ControlChannel {
|
|
|
248
265
|
this.keepaliveTimer = setInterval(() => {
|
|
249
266
|
const ws = this.ws;
|
|
250
267
|
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
268
|
+
// WS-level ping for normal cases.
|
|
251
269
|
try {
|
|
252
270
|
ws.ping();
|
|
253
271
|
} catch {
|
|
254
272
|
// ignore — next failed send will trigger close
|
|
255
273
|
}
|
|
274
|
+
// App-level heartbeat: a `pong` daemon-initiated frame. Hub recognizes
|
|
275
|
+
// it via `_DAEMON_INITIATED_TYPES` and bumps `last_seen_at`. Critical
|
|
276
|
+
// when an intermediary (Cloudflare, AWS ALB, some k8s ingresses)
|
|
277
|
+
// drops WS-level control frames — those proxies idle-close the WS at
|
|
278
|
+
// ~60s without app-level activity, masquerading as a clean 1006 to
|
|
279
|
+
// both peers.
|
|
280
|
+
this.send({
|
|
281
|
+
id: `hb_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
282
|
+
type: "pong",
|
|
283
|
+
ts: Date.now(),
|
|
284
|
+
});
|
|
256
285
|
}, this.keepaliveMs);
|
|
257
286
|
}
|
|
258
287
|
|
|
@@ -285,6 +314,13 @@ export class ControlChannel {
|
|
|
285
314
|
|
|
286
315
|
private scheduleReconnect(err?: unknown): void {
|
|
287
316
|
if (this.stopRequested) return;
|
|
317
|
+
if (err instanceof AuthRefreshRejectedError) {
|
|
318
|
+
this.stopRequested = true;
|
|
319
|
+
daemonLog.warn("control-channel: refresh rejected; halting reconnect (re-login required)", {
|
|
320
|
+
status: err.status,
|
|
321
|
+
});
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
288
324
|
const attempt = this.reconnectAttempts;
|
|
289
325
|
this.reconnectAttempts = attempt + 1;
|
|
290
326
|
const delay = this.backoff[Math.min(attempt, this.backoff.length - 1)];
|
|
@@ -314,6 +350,16 @@ export class ControlChannel {
|
|
|
314
350
|
return;
|
|
315
351
|
}
|
|
316
352
|
if (!frame || typeof frame.id !== "string" || typeof frame.type !== "string") {
|
|
353
|
+
// Hub ack responses for daemon-initiated frames (runtime_snapshot push,
|
|
354
|
+
// heartbeat, etc.) carry `{id, ok}` and no `type`. They're expected,
|
|
355
|
+
// not malformed — drop silently. Anything else stays a warn.
|
|
356
|
+
if (
|
|
357
|
+
frame &&
|
|
358
|
+
typeof (frame as { id?: unknown }).id === "string" &&
|
|
359
|
+
typeof (frame as { ok?: unknown }).ok === "boolean"
|
|
360
|
+
) {
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
317
363
|
daemonLog.warn("control-channel: malformed frame", { frame });
|
|
318
364
|
return;
|
|
319
365
|
}
|
package/src/doctor.ts
CHANGED
|
@@ -257,6 +257,9 @@ export function renderDoctor(input: DoctorInput): string {
|
|
|
257
257
|
lines.push(
|
|
258
258
|
`${pad(r.runtime, widths.runtime)} ${pad(r.name, widths.name)} ${pad(r.status, widths.status)} ${pad(r.version, widths.version)} ${r.path}`,
|
|
259
259
|
);
|
|
260
|
+
if (!e.result.available && e.installHint) {
|
|
261
|
+
lines.push(` → ${e.installHint}`);
|
|
262
|
+
}
|
|
260
263
|
if (e.endpoints && e.endpoints.length > 0) {
|
|
261
264
|
for (const ep of e.endpoints) {
|
|
262
265
|
const mark = ep.reachable ? "✓" : "✗";
|
|
@@ -2,6 +2,7 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest";
|
|
|
2
2
|
import {
|
|
3
3
|
chmodSync,
|
|
4
4
|
existsSync,
|
|
5
|
+
mkdirSync,
|
|
5
6
|
mkdtempSync,
|
|
6
7
|
readFileSync,
|
|
7
8
|
rmSync,
|
|
@@ -9,7 +10,10 @@ import {
|
|
|
9
10
|
} from "node:fs";
|
|
10
11
|
import os from "node:os";
|
|
11
12
|
import path from "node:path";
|
|
12
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
HermesAgentAdapter,
|
|
15
|
+
resolveHermesAcpCommand,
|
|
16
|
+
} from "../runtimes/hermes-agent.js";
|
|
13
17
|
import { agentHermesWorkspaceDir } from "../../agent-workspace.js";
|
|
14
18
|
|
|
15
19
|
// Spawn a tiny Node "ACP server" we control instead of the real hermes-acp.
|
|
@@ -288,6 +292,30 @@ describe("HermesAgentAdapter", () => {
|
|
|
288
292
|
expect(res.error).toMatch(/aborted before spawn/);
|
|
289
293
|
});
|
|
290
294
|
|
|
295
|
+
it("resolveHermesAcpCommand falls back to ~/.hermes venv when PATH lookup fails", () => {
|
|
296
|
+
// Upstream `scripts/install.sh` puts hermes-acp at
|
|
297
|
+
// ~/.hermes/hermes-agent/venv/bin/hermes-acp and only symlinks `hermes`
|
|
298
|
+
// into ~/.local/bin. Simulate that layout: `which hermes-acp` fails,
|
|
299
|
+
// but the venv path exists on disk.
|
|
300
|
+
const fakeHome = mkdtempSync(path.join(os.tmpdir(), "hermes-fallback-"));
|
|
301
|
+
const venvBin = path.join(fakeHome, ".hermes", "hermes-agent", "venv", "bin");
|
|
302
|
+
const target = path.join(venvBin, "hermes-acp");
|
|
303
|
+
mkdirSync(venvBin, { recursive: true });
|
|
304
|
+
writeFileSync(target, "#!/bin/sh\nexit 0\n", { mode: 0o755 });
|
|
305
|
+
chmodSync(target, 0o755);
|
|
306
|
+
|
|
307
|
+
const resolved = resolveHermesAcpCommand({
|
|
308
|
+
env: { PATH: "/nonexistent" },
|
|
309
|
+
homeDir: fakeHome,
|
|
310
|
+
execFileSyncFn: (() => {
|
|
311
|
+
throw new Error("which: not found");
|
|
312
|
+
}) as never,
|
|
313
|
+
});
|
|
314
|
+
expect(resolved).toBe(target);
|
|
315
|
+
|
|
316
|
+
rmSync(fakeHome, { recursive: true, force: true });
|
|
317
|
+
});
|
|
318
|
+
|
|
291
319
|
it("surfaces non-zero exit with stderr snippet", async () => {
|
|
292
320
|
const p = path.join(tmpRoot, "boom.js");
|
|
293
321
|
writeFileSync(
|
|
@@ -13,12 +13,45 @@ import {
|
|
|
13
13
|
type AcpUpdateCtx,
|
|
14
14
|
type AcpUpdateParams,
|
|
15
15
|
} from "./acp-stream.js";
|
|
16
|
-
import {
|
|
16
|
+
import {
|
|
17
|
+
firstExistingPath,
|
|
18
|
+
readCommandVersion,
|
|
19
|
+
resolveCommandOnPath,
|
|
20
|
+
resolveHomePath,
|
|
21
|
+
type ProbeDeps,
|
|
22
|
+
} from "./probe.js";
|
|
17
23
|
import type { RuntimeProbeResult, RuntimeRunOptions, StreamBlock } from "../types.js";
|
|
18
24
|
|
|
19
|
-
/**
|
|
25
|
+
/**
|
|
26
|
+
* Known absolute locations of the `hermes-acp` entry point when it is not on
|
|
27
|
+
* PATH. The upstream `scripts/install.sh` (curl|bash installer) installs a
|
|
28
|
+
* private virtualenv under `~/.hermes/hermes-agent/venv/` and only symlinks
|
|
29
|
+
* the user-facing `hermes` command into `~/.local/bin/` — the `hermes-acp`
|
|
30
|
+
* entry point stays inside the venv. Without a fallback, daemon's PATH-only
|
|
31
|
+
* probe misses every user who installed via the README-recommended script.
|
|
32
|
+
*/
|
|
33
|
+
const HERMES_ACP_FALLBACK_RELATIVE_PATHS = [
|
|
34
|
+
path.join(".hermes", "hermes-agent", "venv", "bin", "hermes-acp"),
|
|
35
|
+
];
|
|
36
|
+
const HERMES_ACP_FALLBACK_SYSTEM_PATHS = [
|
|
37
|
+
"/opt/hermes/hermes-agent/venv/bin/hermes-acp",
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Resolve the `hermes-acp` executable. Tries PATH first, then falls back to
|
|
42
|
+
* the upstream install.sh's private venv location (`~/.hermes/...`) before
|
|
43
|
+
* giving up. `BOTCORD_HERMES_AGENT_BIN` always wins via the adapter override.
|
|
44
|
+
*/
|
|
20
45
|
export function resolveHermesAcpCommand(deps: ProbeDeps = {}): string | null {
|
|
21
|
-
|
|
46
|
+
const onPath = resolveCommandOnPath("hermes-acp", deps);
|
|
47
|
+
if (onPath) return onPath;
|
|
48
|
+
return firstExistingPath(
|
|
49
|
+
[
|
|
50
|
+
...HERMES_ACP_FALLBACK_RELATIVE_PATHS.map((p) => resolveHomePath(p, deps)),
|
|
51
|
+
...HERMES_ACP_FALLBACK_SYSTEM_PATHS,
|
|
52
|
+
],
|
|
53
|
+
deps,
|
|
54
|
+
);
|
|
22
55
|
}
|
|
23
56
|
|
|
24
57
|
/** Probe whether `hermes-acp` is installed and report its version. */
|
|
@@ -29,6 +29,11 @@ export interface RuntimeModule {
|
|
|
29
29
|
* config loader rejects routing turns to this adapter.
|
|
30
30
|
*/
|
|
31
31
|
supportsRun?: boolean;
|
|
32
|
+
/**
|
|
33
|
+
* Short, single-line install hint shown by `doctor` when the runtime
|
|
34
|
+
* probes as unavailable. Helps users recover without reading source.
|
|
35
|
+
*/
|
|
36
|
+
installHint?: string;
|
|
32
37
|
}
|
|
33
38
|
|
|
34
39
|
/** Built-in runtime module entry for Claude Code. */
|
|
@@ -58,6 +63,8 @@ export const hermesAgentModule: RuntimeModule = {
|
|
|
58
63
|
envVar: "BOTCORD_HERMES_AGENT_BIN",
|
|
59
64
|
probe: () => probeHermesAgent(),
|
|
60
65
|
create: () => new HermesAgentAdapter(),
|
|
66
|
+
installHint:
|
|
67
|
+
'Install: pip install "hermes-agent[acp]" (or set BOTCORD_HERMES_AGENT_BIN to the absolute path of hermes-acp)',
|
|
61
68
|
};
|
|
62
69
|
|
|
63
70
|
/** Built-in runtime module entry for Gemini (probe-only stub). */
|
|
@@ -143,6 +150,7 @@ export interface RuntimeProbeEntry {
|
|
|
143
150
|
binary: string;
|
|
144
151
|
supportsRun: boolean;
|
|
145
152
|
result: RuntimeProbeResult;
|
|
153
|
+
installHint?: string;
|
|
146
154
|
}
|
|
147
155
|
|
|
148
156
|
/** Probe every registered runtime and report installation status. */
|
|
@@ -161,6 +169,7 @@ export function detectRuntimes(): RuntimeProbeEntry[] {
|
|
|
161
169
|
binary: m.binary,
|
|
162
170
|
supportsRun: m.supportsRun !== false,
|
|
163
171
|
result,
|
|
172
|
+
installHint: m.installHint,
|
|
164
173
|
});
|
|
165
174
|
}
|
|
166
175
|
return out;
|
package/src/provision.ts
CHANGED
|
@@ -903,15 +903,39 @@ export function removeAgentFromConfig(
|
|
|
903
903
|
// runtime-discovery snapshot (plan §8.5)
|
|
904
904
|
// ---------------------------------------------------------------------------
|
|
905
905
|
|
|
906
|
+
/**
|
|
907
|
+
* TTL for the L1 runtime-detection cache. `detectRuntimes()` shells out to
|
|
908
|
+
* each adapter binary (claude / codex / gemini / openclaw / hermes) to read
|
|
909
|
+
* `--version`, which routinely costs 1.5–2s in aggregate — long enough to
|
|
910
|
+
* push `list_runtimes` past the Hub's 10s ack budget when combined with the
|
|
911
|
+
* 3s openclaw gateway probe. Versions don't change between dashboard refresh
|
|
912
|
+
* clicks, so cache the L1 snapshot briefly and recompute on miss.
|
|
913
|
+
*/
|
|
914
|
+
const RUNTIME_PROBE_CACHE_TTL_MS = 30_000;
|
|
915
|
+
|
|
916
|
+
let _runtimeProbeCache: { at: number; value: ListRuntimesResult } | null = null;
|
|
917
|
+
|
|
918
|
+
/** Drop the cache (e.g. before a `doctor`-style interactive re-probe). */
|
|
919
|
+
export function clearRuntimeProbeCache(): void {
|
|
920
|
+
_runtimeProbeCache = null;
|
|
921
|
+
}
|
|
922
|
+
|
|
906
923
|
/**
|
|
907
924
|
* Probe every registered adapter and shape the result as the wire-level
|
|
908
925
|
* {@link ListRuntimesResult} — used by both the `list_runtimes` ack path and
|
|
909
926
|
* the daemon-side first-connect `runtime_snapshot` push in `daemon.ts`.
|
|
910
927
|
*
|
|
911
|
-
*
|
|
912
|
-
*
|
|
928
|
+
* Cached for {@link RUNTIME_PROBE_CACHE_TTL_MS}; pass `{ force: true }` to
|
|
929
|
+
* bypass the cache.
|
|
913
930
|
*/
|
|
914
|
-
export function collectRuntimeSnapshot(): ListRuntimesResult {
|
|
931
|
+
export function collectRuntimeSnapshot(opts: { force?: boolean } = {}): ListRuntimesResult {
|
|
932
|
+
if (
|
|
933
|
+
!opts.force &&
|
|
934
|
+
_runtimeProbeCache &&
|
|
935
|
+
Date.now() - _runtimeProbeCache.at < RUNTIME_PROBE_CACHE_TTL_MS
|
|
936
|
+
) {
|
|
937
|
+
return _runtimeProbeCache.value;
|
|
938
|
+
}
|
|
915
939
|
const entries = detectRuntimes();
|
|
916
940
|
const runtimes: RuntimeProbeResult[] = entries.map((entry) => {
|
|
917
941
|
const record: RuntimeProbeResult = {
|
|
@@ -929,7 +953,9 @@ export function collectRuntimeSnapshot(): ListRuntimesResult {
|
|
|
929
953
|
// enough; filling a synthetic message would be misleading.
|
|
930
954
|
return record;
|
|
931
955
|
});
|
|
932
|
-
|
|
956
|
+
const value: ListRuntimesResult = { runtimes, probedAt: Date.now() };
|
|
957
|
+
_runtimeProbeCache = { at: Date.now(), value };
|
|
958
|
+
return value;
|
|
933
959
|
}
|
|
934
960
|
|
|
935
961
|
/** Maximum number of `endpoints[]` entries persisted per runtime (RFC §3.8.2). */
|
|
@@ -1208,7 +1234,7 @@ export async function collectRuntimeSnapshotAsync(opts: {
|
|
|
1208
1234
|
const gateways = opts.cfg?.openclawGateways ?? [];
|
|
1209
1235
|
if (gateways.length === 0) return base;
|
|
1210
1236
|
// Default daemon-side budget is 3s — it must stay below the Hub's
|
|
1211
|
-
// `list_runtimes` ack wait (
|
|
1237
|
+
// `list_runtimes` ack wait (10s, see backend/hub/routers/daemon_control.py)
|
|
1212
1238
|
// so a single slow gateway can't blow the whole snapshot to a 504.
|
|
1213
1239
|
const timeoutMs = opts.timeoutMs ?? 3000;
|
|
1214
1240
|
const capped = gateways.slice(0, RUNTIME_ENDPOINTS_CAP);
|
package/src/user-auth.ts
CHANGED
|
@@ -188,6 +188,20 @@ export function isTokenNearExpiry(record: UserAuthRecord, windowMs = 60_000): bo
|
|
|
188
188
|
return record.expiresAt - Date.now() <= windowMs;
|
|
189
189
|
}
|
|
190
190
|
|
|
191
|
+
/**
|
|
192
|
+
* Thrown when the Hub rejects a refresh token (401/403). Signals that the
|
|
193
|
+
* user must re-login — reconnect loops should stop instead of hammering
|
|
194
|
+
* the refresh endpoint forever with a known-bad token.
|
|
195
|
+
*/
|
|
196
|
+
export class AuthRefreshRejectedError extends Error {
|
|
197
|
+
readonly status: number;
|
|
198
|
+
constructor(status: number, message: string) {
|
|
199
|
+
super(message);
|
|
200
|
+
this.name = "AuthRefreshRejectedError";
|
|
201
|
+
this.status = status;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
191
205
|
/**
|
|
192
206
|
* Stateful helper that owns the in-memory copy of user-auth and knows how
|
|
193
207
|
* to refresh it. Used by the control channel so reconnects always carry
|
|
@@ -245,13 +259,35 @@ export class UserAuthManager {
|
|
|
245
259
|
expiresInMs: current.expiresAt - Date.now(),
|
|
246
260
|
});
|
|
247
261
|
this.refreshInflight = (async () => {
|
|
248
|
-
|
|
262
|
+
// Refresh tokens rotate server-side. If another local process (e.g. a
|
|
263
|
+
// second daemon racing on the same user-auth.json) refreshed in the
|
|
264
|
+
// meantime, the on-disk refreshToken now differs from our in-memory
|
|
265
|
+
// copy — using the in-memory one would 401 because the server already
|
|
266
|
+
// invalidated it. Re-read disk first and adopt any newer record.
|
|
267
|
+
let basis = current;
|
|
268
|
+
try {
|
|
269
|
+
const onDisk = loadUserAuth(this.file);
|
|
270
|
+
if (onDisk && onDisk.refreshToken !== current.refreshToken) {
|
|
271
|
+
daemonLog.info("user-auth refresh: adopting newer on-disk token", {
|
|
272
|
+
userId: onDisk.userId,
|
|
273
|
+
expiresAt: onDisk.expiresAt,
|
|
274
|
+
});
|
|
275
|
+
this.record = onDisk;
|
|
276
|
+
if (!isTokenNearExpiry(onDisk)) return onDisk;
|
|
277
|
+
basis = onDisk;
|
|
278
|
+
}
|
|
279
|
+
} catch (err) {
|
|
280
|
+
daemonLog.debug("user-auth refresh: disk reread failed (ignored)", {
|
|
281
|
+
error: err instanceof Error ? err.message : String(err),
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
const tok = await refreshDaemonToken(basis.hubUrl, basis.refreshToken);
|
|
249
285
|
const next: UserAuthRecord = {
|
|
250
|
-
...
|
|
286
|
+
...basis,
|
|
251
287
|
accessToken: tok.accessToken,
|
|
252
288
|
refreshToken: tok.refreshToken,
|
|
253
289
|
expiresAt: Date.now() + tok.expiresIn * 1000,
|
|
254
|
-
hubUrl: tok.hubUrl ||
|
|
290
|
+
hubUrl: tok.hubUrl || basis.hubUrl,
|
|
255
291
|
};
|
|
256
292
|
saveUserAuth(next, this.file);
|
|
257
293
|
this.record = next;
|
|
@@ -261,10 +297,23 @@ export class UserAuthManager {
|
|
|
261
297
|
});
|
|
262
298
|
return next;
|
|
263
299
|
})().catch((err) => {
|
|
300
|
+
const status =
|
|
301
|
+
typeof (err as { status?: unknown }).status === "number"
|
|
302
|
+
? ((err as { status: number }).status)
|
|
303
|
+
: null;
|
|
304
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
264
305
|
daemonLog.warn("user-auth refresh: failed", {
|
|
265
306
|
userId: current.userId,
|
|
266
|
-
|
|
307
|
+
status,
|
|
308
|
+
error: message,
|
|
267
309
|
});
|
|
310
|
+
if (status === 401 || status === 403) {
|
|
311
|
+
// Refresh token is permanently dead — write the expired flag so
|
|
312
|
+
// `status` surfaces it and re-throw a typed error so the control
|
|
313
|
+
// channel can stop reconnect loops instead of hammering the Hub.
|
|
314
|
+
writeAuthExpiredFlag();
|
|
315
|
+
throw new AuthRefreshRejectedError(status, message);
|
|
316
|
+
}
|
|
268
317
|
throw err;
|
|
269
318
|
}).finally(() => {
|
|
270
319
|
this.refreshInflight = null;
|