agent-relay-orchestrator 0.78.2 → 0.78.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/quota-poller.ts +143 -21
- package/src/relay.ts +5 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-orchestrator",
|
|
3
|
-
"version": "0.78.
|
|
3
|
+
"version": "0.78.4",
|
|
4
4
|
"description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"test": "bun test"
|
|
17
17
|
},
|
|
18
18
|
"dependencies": {
|
|
19
|
-
"agent-relay-sdk": "0.2.
|
|
19
|
+
"agent-relay-sdk": "0.2.56"
|
|
20
20
|
},
|
|
21
21
|
"devDependencies": {
|
|
22
22
|
"@types/bun": "latest",
|
package/src/quota-poller.ts
CHANGED
|
@@ -1,22 +1,25 @@
|
|
|
1
1
|
import { homedir } from "node:os";
|
|
2
|
-
import { existsSync, readdirSync } from "node:fs";
|
|
2
|
+
import { existsSync, readFileSync, readdirSync } from "node:fs";
|
|
3
3
|
import { createServer } from "node:net";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import {
|
|
6
|
+
DEFAULT_PROVIDER_QUOTA_CONFIG,
|
|
6
7
|
QUOTA_FAILURE_LOG_INTERVAL_MS,
|
|
7
8
|
QUOTA_FAST_RETRY_MS,
|
|
8
|
-
QUOTA_POLL_INTERVAL_MS,
|
|
9
9
|
QuotaCollectionError,
|
|
10
10
|
collectClaudeQuotaSample,
|
|
11
11
|
collectCodexQuotaSample,
|
|
12
|
+
credentialAccountKey,
|
|
13
|
+
normalizeProviderQuotaConfig,
|
|
12
14
|
providerQuotaErrorFromCollectorError,
|
|
13
15
|
quotaRetryAfterMs,
|
|
16
|
+
readClaudeOAuthAccessToken,
|
|
14
17
|
resolveStableClaudeQuotaIdentity,
|
|
15
18
|
resolveStableCodexQuotaIdentityFromHome,
|
|
16
19
|
type ProviderQuotaIdentity,
|
|
17
20
|
type ProviderQuotaSample,
|
|
18
21
|
} from "agent-relay-sdk/provider-quota";
|
|
19
|
-
import type { ProviderQuotaLeaseAcquireInput, ProviderQuotaUpdateInput } from "agent-relay-sdk";
|
|
22
|
+
import type { ProviderQuotaConfig, ProviderQuotaConfigMap, ProviderQuotaLeaseAcquireInput, ProviderQuotaUpdateInput } from "agent-relay-sdk";
|
|
20
23
|
import { errMessage } from "agent-relay-sdk";
|
|
21
24
|
import { codexCommandFromEnv, providerHomeRootFromEnv, type OrchestratorConfig } from "./config";
|
|
22
25
|
|
|
@@ -25,6 +28,14 @@ const QUOTA_LEASE_RENEW_MS = 30_000;
|
|
|
25
28
|
const CODEX_APP_SERVER_CONNECT_ATTEMPTS = 40;
|
|
26
29
|
const CODEX_APP_SERVER_CONNECT_RETRY_MS = 250;
|
|
27
30
|
|
|
31
|
+
// A provider that is configured but has no usable quota credential on this host
|
|
32
|
+
// is uncollectable by design. Rather than silently omitting it (so the row
|
|
33
|
+
// vanishes from the dashboard), we report a marker record carrying the reason,
|
|
34
|
+
// so the widget can show a muted "unavailable" row.
|
|
35
|
+
// A stable synthetic account key keeps it out of the host:/home: unstable-key prune.
|
|
36
|
+
const PROVIDER_QUOTA_UNAVAILABLE_ACCOUNT_KEY = "unavailable";
|
|
37
|
+
const PROVIDER_QUOTA_UNAVAILABLE_ERROR_TYPE = "unavailable";
|
|
38
|
+
|
|
28
39
|
type QuotaRelay = {
|
|
29
40
|
acquireProviderQuotaLease(orchestratorId: string, input: ProviderQuotaLeaseAcquireInput): Promise<{
|
|
30
41
|
acquired: boolean;
|
|
@@ -33,14 +44,19 @@ type QuotaRelay = {
|
|
|
33
44
|
}>;
|
|
34
45
|
releaseProviderQuotaLease(orchestratorId: string, input: ProviderQuotaLeaseAcquireInput & { leaseToken: string }): Promise<unknown>;
|
|
35
46
|
reportProviderQuota(input: ProviderQuotaUpdateInput): Promise<unknown>;
|
|
47
|
+
getProviderQuotaConfig(): Promise<ProviderQuotaConfigMap>;
|
|
36
48
|
connected: boolean;
|
|
37
49
|
};
|
|
38
50
|
|
|
39
51
|
type QuotaCandidate = ProviderQuotaIdentity & {
|
|
52
|
+
accessToken?: string;
|
|
40
53
|
appServerUrl?: string;
|
|
41
54
|
codexHome?: string;
|
|
42
55
|
};
|
|
43
56
|
|
|
57
|
+
type ProviderSkip = { provider: string; reason: string };
|
|
58
|
+
type QuotaDiscovery = { candidates: QuotaCandidate[]; skips: ProviderSkip[] };
|
|
59
|
+
|
|
44
60
|
type QuotaPollState = {
|
|
45
61
|
leaseToken?: string;
|
|
46
62
|
leaseExpiresAt?: number;
|
|
@@ -55,6 +71,10 @@ export class OrchestratorQuotaPoller {
|
|
|
55
71
|
private inFlight = false;
|
|
56
72
|
private readonly states = new Map<string, QuotaPollState>();
|
|
57
73
|
private readonly logStates = new Map<string, { key: string; at: number }>();
|
|
74
|
+
// Per-provider quota config (#605), refreshed each tick. Empty until the first
|
|
75
|
+
// successful fetch — configFor() falls back to defaults so a fresh/empty config
|
|
76
|
+
// (or an older relay without the endpoint) preserves today's behavior.
|
|
77
|
+
private quotaConfig: ProviderQuotaConfigMap = {};
|
|
58
78
|
|
|
59
79
|
constructor(
|
|
60
80
|
private readonly config: OrchestratorConfig,
|
|
@@ -91,11 +111,15 @@ export class OrchestratorQuotaPoller {
|
|
|
91
111
|
if (this.inFlight || !this.active || !this.relay.connected) return;
|
|
92
112
|
this.inFlight = true;
|
|
93
113
|
try {
|
|
94
|
-
|
|
114
|
+
await this.refreshQuotaConfig();
|
|
115
|
+
const { candidates, skips } = await this.discoverCandidates();
|
|
95
116
|
await this.releaseRemovedCandidates(candidates);
|
|
96
117
|
for (const candidate of candidates) {
|
|
97
118
|
await this.processCandidate(candidate);
|
|
98
119
|
}
|
|
120
|
+
for (const skip of skips) {
|
|
121
|
+
await this.reportSkip(skip);
|
|
122
|
+
}
|
|
99
123
|
} finally {
|
|
100
124
|
this.inFlight = false;
|
|
101
125
|
this.schedule(this.options.intervalMs ?? QUOTA_LEASE_RENEW_MS);
|
|
@@ -112,38 +136,73 @@ export class OrchestratorQuotaPoller {
|
|
|
112
136
|
}, Math.max(1_000, delayMs));
|
|
113
137
|
}
|
|
114
138
|
|
|
115
|
-
|
|
139
|
+
// Refresh per-provider quota config (#605). Best-effort: on failure we keep the
|
|
140
|
+
// last known config (defaults for any unset provider), so a transient relay blip
|
|
141
|
+
// never silently stops collection.
|
|
142
|
+
private async refreshQuotaConfig(): Promise<void> {
|
|
143
|
+
try {
|
|
144
|
+
this.quotaConfig = await this.relay.getProviderQuotaConfig();
|
|
145
|
+
} catch {
|
|
146
|
+
// keep prior config; configFor() defaults any missing provider.
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
private configFor(provider: string): ProviderQuotaConfig {
|
|
151
|
+
const stored = this.quotaConfig[provider];
|
|
152
|
+
return stored ? normalizeProviderQuotaConfig(stored) : { ...DEFAULT_PROVIDER_QUOTA_CONFIG };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
private async discoverCandidates(): Promise<QuotaDiscovery> {
|
|
116
156
|
const candidates: QuotaCandidate[] = [];
|
|
117
|
-
|
|
118
|
-
|
|
157
|
+
const skips: ProviderSkip[] = [];
|
|
158
|
+
// A disabled provider (#605) is collected from at all: no discovery → no
|
|
159
|
+
// polling/API calls, leases released by releaseRemovedCandidates, and no
|
|
160
|
+
// skip-marker row (disabled is intentional, not a credential failure).
|
|
161
|
+
if (this.config.providers.includes("claude") && this.configFor("claude").enabled) {
|
|
162
|
+
const found = await this.discoverClaudeCandidates();
|
|
163
|
+
candidates.push(...found.candidates);
|
|
164
|
+
if (found.skipReason) skips.push({ provider: "claude", reason: found.skipReason });
|
|
119
165
|
}
|
|
120
|
-
if (this.config.providers.includes("codex")) {
|
|
121
|
-
|
|
166
|
+
if (this.config.providers.includes("codex") && this.configFor("codex").enabled) {
|
|
167
|
+
const found = await this.discoverCodexCandidates();
|
|
168
|
+
candidates.push(...found.candidates);
|
|
169
|
+
if (found.skipReason) skips.push({ provider: "codex", reason: found.skipReason });
|
|
122
170
|
}
|
|
123
171
|
const deduped = new Map<string, QuotaCandidate>();
|
|
124
172
|
for (const candidate of candidates) {
|
|
125
173
|
deduped.set(candidateStateKey(candidate), candidate);
|
|
126
174
|
}
|
|
127
|
-
return [...deduped.values()];
|
|
175
|
+
return { candidates: [...deduped.values()], skips };
|
|
128
176
|
}
|
|
129
177
|
|
|
130
|
-
private async discoverClaudeCandidates(): Promise<QuotaCandidate[]> {
|
|
131
|
-
const
|
|
178
|
+
private async discoverClaudeCandidates(): Promise<{ candidates: QuotaCandidate[]; skipReason?: string }> {
|
|
179
|
+
const credentialsPaths = [
|
|
132
180
|
join(this.config.env.CLAUDE_CONFIG_DIR || process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude"), ".credentials.json"),
|
|
133
181
|
...providerHomeCredentialPaths("claude", ".credentials.json"),
|
|
134
182
|
];
|
|
135
183
|
const candidates: QuotaCandidate[] = [];
|
|
136
|
-
for (const
|
|
184
|
+
for (const accessToken of [
|
|
185
|
+
this.config.env.CLAUDE_CODE_OAUTH_TOKEN,
|
|
186
|
+
process.env.CLAUDE_CODE_OAUTH_TOKEN,
|
|
187
|
+
...claudeSetupTokenEnvPaths(this.config).map((path) => readEnvFileValue(path, "CLAUDE_CODE_OAUTH_TOKEN")),
|
|
188
|
+
]) {
|
|
189
|
+
if (accessToken) candidates.push(claudeBearerCandidate(accessToken));
|
|
190
|
+
}
|
|
191
|
+
for (const credentialsPath of credentialsPaths) {
|
|
192
|
+
const accessToken = await readClaudeOAuthAccessToken(credentialsPath);
|
|
193
|
+
if (!accessToken) continue;
|
|
137
194
|
const identity = await resolveStableClaudeQuotaIdentity({ credentialsPath });
|
|
138
|
-
|
|
195
|
+
candidates.push({ ...(identity ?? claudeBearerCandidate(accessToken)), credentialsPath, accessToken });
|
|
139
196
|
}
|
|
140
197
|
if (candidates.length === 0) {
|
|
141
|
-
|
|
198
|
+
const reason = "no usable Claude bearer token on this host";
|
|
199
|
+
this.logOnce("claude:no-usable-bearer", `quota refresh skipped for claude: ${reason}`);
|
|
200
|
+
return { candidates, skipReason: reason };
|
|
142
201
|
}
|
|
143
|
-
return candidates;
|
|
202
|
+
return { candidates };
|
|
144
203
|
}
|
|
145
204
|
|
|
146
|
-
private async discoverCodexCandidates(): Promise<QuotaCandidate[]> {
|
|
205
|
+
private async discoverCodexCandidates(): Promise<{ candidates: QuotaCandidate[]; skipReason?: string }> {
|
|
147
206
|
const homes = [
|
|
148
207
|
this.config.env.CODEX_HOME || process.env.CODEX_HOME || join(homedir(), ".codex"),
|
|
149
208
|
...providerHomeConfigDirs("codex", "auth.json"),
|
|
@@ -155,9 +214,25 @@ export class OrchestratorQuotaPoller {
|
|
|
155
214
|
if (identity) candidates.push({ ...identity, codexHome, ...(appServerUrl ? { appServerUrl } : {}) });
|
|
156
215
|
}
|
|
157
216
|
if (candidates.length === 0) {
|
|
158
|
-
|
|
217
|
+
const reason = "no Codex account id found in auth.json";
|
|
218
|
+
this.logOnce("codex:no-stable-auth", `quota refresh skipped for codex: ${reason}`);
|
|
219
|
+
return { candidates, skipReason: reason };
|
|
159
220
|
}
|
|
160
|
-
return candidates;
|
|
221
|
+
return { candidates };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Publish a marker record for a provider that is configured but uncollectable on
|
|
225
|
+
// this host, so the dashboard shows a muted "unavailable" row with the reason
|
|
226
|
+
// instead of dropping the provider entirely. Re-sent each tick to keep the row
|
|
227
|
+
// fresh (the day-old prune is keyed on updated_at).
|
|
228
|
+
private async reportSkip(skip: ProviderSkip): Promise<void> {
|
|
229
|
+
await this.relay.reportProviderQuota({
|
|
230
|
+
provider: skip.provider,
|
|
231
|
+
accountKey: PROVIDER_QUOTA_UNAVAILABLE_ACCOUNT_KEY,
|
|
232
|
+
lastAttemptAt: this.now(),
|
|
233
|
+
lastError: { type: PROVIDER_QUOTA_UNAVAILABLE_ERROR_TYPE, message: skip.reason },
|
|
234
|
+
sourceAgentId: this.sourceAgentId(),
|
|
235
|
+
}).catch((publishError) => this.log(`quota skip publish failed for ${skip.provider}: ${errMessage(publishError)}`));
|
|
161
236
|
}
|
|
162
237
|
|
|
163
238
|
private async releaseRemovedCandidates(candidates: QuotaCandidate[]): Promise<void> {
|
|
@@ -177,6 +252,9 @@ export class OrchestratorQuotaPoller {
|
|
|
177
252
|
if (!await this.ensureLease(candidate, state, now)) return;
|
|
178
253
|
if (state.nextPollAt !== undefined && state.nextPollAt > now) return;
|
|
179
254
|
|
|
255
|
+
// Per-provider cadence (#605): the configured interval governs the gap between
|
|
256
|
+
// successful polls and the post-failure retry once a first attempt has landed.
|
|
257
|
+
const pollIntervalMs = this.configFor(candidate.provider).pollIntervalMs;
|
|
180
258
|
const lastAttemptAt = now;
|
|
181
259
|
try {
|
|
182
260
|
const sample = await this.collect(candidate);
|
|
@@ -191,13 +269,17 @@ export class OrchestratorQuotaPoller {
|
|
|
191
269
|
};
|
|
192
270
|
await this.relay.reportProviderQuota(update);
|
|
193
271
|
state.lastAttemptAt = update.lastAttemptAt;
|
|
194
|
-
state.nextPollAt = now +
|
|
272
|
+
state.nextPollAt = now + pollIntervalMs;
|
|
195
273
|
} catch (error) {
|
|
196
274
|
const retryAfterMs = quotaRetryAfterMs(error);
|
|
197
275
|
const lastError = providerQuotaErrorFromCollectorError(error, retryAfterMs);
|
|
198
|
-
const retryDelayMs = retryAfterMs ?? (state.lastAttemptAt ?
|
|
276
|
+
const retryDelayMs = retryAfterMs ?? (state.lastAttemptAt ? pollIntervalMs : QUOTA_FAST_RETRY_MS);
|
|
199
277
|
state.lastAttemptAt = lastAttemptAt;
|
|
200
278
|
state.nextPollAt = now + retryDelayMs;
|
|
279
|
+
if (candidate.provider === "claude" && retryAfterMs !== undefined) {
|
|
280
|
+
this.logFailure(candidate, error, retryAfterMs);
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
201
283
|
await this.relay.reportProviderQuota({
|
|
202
284
|
provider: candidate.provider,
|
|
203
285
|
accountKey: candidate.accountKey,
|
|
@@ -232,6 +314,7 @@ export class OrchestratorQuotaPoller {
|
|
|
232
314
|
if (candidate.provider === "claude") {
|
|
233
315
|
return collectClaudeQuotaSample({
|
|
234
316
|
agentId: this.sourceAgentId(),
|
|
317
|
+
accessToken: candidate.accessToken,
|
|
235
318
|
credentialsPath: candidate.credentialsPath,
|
|
236
319
|
fetchImpl: this.options.fetchImpl,
|
|
237
320
|
});
|
|
@@ -374,6 +457,45 @@ function providerHomeCredentialPaths(provider: "claude", markerFile: string): st
|
|
|
374
457
|
return providerHomeConfigDirs(provider, markerFile).map((dir) => join(dir, markerFile));
|
|
375
458
|
}
|
|
376
459
|
|
|
460
|
+
function claudeSetupTokenEnvPaths(config: OrchestratorConfig): string[] {
|
|
461
|
+
const configDir = config.env.CLAUDE_CONFIG_DIR || process.env.CLAUDE_CONFIG_DIR || join(homedir(), ".claude");
|
|
462
|
+
return [
|
|
463
|
+
join(configDir, "setup-token.env"),
|
|
464
|
+
...providerHomeConfigDirs("claude", "setup-token.env").map((dir) => join(dir, "setup-token.env")),
|
|
465
|
+
];
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
function claudeBearerCandidate(accessToken: string): QuotaCandidate {
|
|
469
|
+
return {
|
|
470
|
+
provider: "claude",
|
|
471
|
+
accountKey: credentialAccountKey(accessToken),
|
|
472
|
+
accessToken,
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
function readEnvFileValue(path: string, key: string): string | undefined {
|
|
477
|
+
try {
|
|
478
|
+
for (const line of readFileSync(path, "utf8").split(/\r?\n/)) {
|
|
479
|
+
const match = new RegExp(`^\\s*(?:export\\s+)?${key}\\s*=\\s*(.*)\\s*$`).exec(line);
|
|
480
|
+
if (!match) continue;
|
|
481
|
+
return cleanEnvValue(match[1] ?? "");
|
|
482
|
+
}
|
|
483
|
+
} catch {
|
|
484
|
+
return undefined;
|
|
485
|
+
}
|
|
486
|
+
return undefined;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
function cleanEnvValue(value: string): string | undefined {
|
|
490
|
+
let cleaned = value.trim();
|
|
491
|
+
if (!cleaned) return undefined;
|
|
492
|
+
const quote = cleaned[0];
|
|
493
|
+
if ((quote === "'" || quote === "\"") && cleaned.endsWith(quote)) {
|
|
494
|
+
cleaned = cleaned.slice(1, -1);
|
|
495
|
+
}
|
|
496
|
+
return cleaned.trim() || undefined;
|
|
497
|
+
}
|
|
498
|
+
|
|
377
499
|
function safeReadDir(path: string): string[] {
|
|
378
500
|
try {
|
|
379
501
|
return readdirSync(path, { withFileTypes: true })
|
package/src/relay.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { OrchestratorConfig } from "./config";
|
|
|
2
2
|
import type { ProviderProbeCache } from "./provider-probe";
|
|
3
3
|
import { detectSelfSupervision } from "./self-supervision";
|
|
4
4
|
import { GIT_SHA, ORCHESTRATOR_PROTOCOL_VERSION, VERSION, runtimeMetadata } from "./version";
|
|
5
|
-
import type { AgentLifecycle, ProviderQuotaLeaseAcquireInput, ProviderQuotaLeaseAcquireResult, ProviderQuotaUpdateInput, WorkspaceMetadata, WorkspaceMode, ManagedSessionExitDiagnostics as SdkManagedSessionExitDiagnostics } from "agent-relay-sdk";
|
|
5
|
+
import type { AgentLifecycle, ProviderQuotaConfigMap, ProviderQuotaLeaseAcquireInput, ProviderQuotaLeaseAcquireResult, ProviderQuotaUpdateInput, WorkspaceMetadata, WorkspaceMode, ManagedSessionExitDiagnostics as SdkManagedSessionExitDiagnostics } from "agent-relay-sdk";
|
|
6
6
|
import { ReconnectionManager, RelayHttpClient } from "agent-relay-sdk";
|
|
7
7
|
|
|
8
8
|
export interface RelayClient {
|
|
@@ -14,6 +14,7 @@ export interface RelayClient {
|
|
|
14
14
|
acquireProviderQuotaLease(orchestratorId: string, input: ProviderQuotaLeaseAcquireInput): Promise<ProviderQuotaLeaseAcquireResult>;
|
|
15
15
|
releaseProviderQuotaLease(orchestratorId: string, input: ProviderQuotaLeaseAcquireInput & { leaseToken: string }): Promise<{ released: boolean }>;
|
|
16
16
|
reportProviderQuota(input: ProviderQuotaUpdateInput): Promise<unknown>;
|
|
17
|
+
getProviderQuotaConfig(): Promise<ProviderQuotaConfigMap>;
|
|
17
18
|
setApiUrl(url: string): void;
|
|
18
19
|
startHeartbeatLoop(): void;
|
|
19
20
|
stopHeartbeatLoop(): void;
|
|
@@ -251,6 +252,9 @@ export function createRelayClient(config: OrchestratorConfig, probeCache: Provid
|
|
|
251
252
|
reportProviderQuota(input: ProviderQuotaUpdateInput): Promise<unknown> {
|
|
252
253
|
return http.upsertProviderQuota(input);
|
|
253
254
|
},
|
|
255
|
+
getProviderQuotaConfig(): Promise<ProviderQuotaConfigMap> {
|
|
256
|
+
return http.getProviderQuotaConfig();
|
|
257
|
+
},
|
|
254
258
|
get connected() { return connected; },
|
|
255
259
|
};
|
|
256
260
|
}
|