agent-relay-orchestrator 0.19.2 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-orchestrator",
3
- "version": "0.19.2",
3
+ "version": "0.20.0",
4
4
  "description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
5
5
  "type": "module",
6
6
  "bin": {
@@ -16,7 +16,7 @@
16
16
  "test": "bun test"
17
17
  },
18
18
  "dependencies": {
19
- "agent-relay-sdk": "0.2.10"
19
+ "agent-relay-sdk": "0.2.11"
20
20
  },
21
21
  "devDependencies": {
22
22
  "@types/bun": "latest",
@@ -6,6 +6,31 @@ import { detectSelfSupervision, type SelfSupervision } from "./self-supervision"
6
6
  const VALID_PROVIDERS = new Set(["auto", "all", "codex", "claude", "orchestrator"]);
7
7
  const SEMVER_RE = /^\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?$/;
8
8
 
9
+ /**
10
+ * Stale-packument cache race: right after a publish, a remote host's cached npm
11
+ * metadata may not yet list the new version, so `npm install ...@X` fails with
12
+ * ETARGET / "No matching version found" even though it IS published (#211). The
13
+ * packument refreshes within seconds; `--prefer-online` + retry recovers it.
14
+ */
15
+ const CACHE_RACE_RE = /ETARGET|No matching version found|notarget/i;
16
+ const DEFAULT_INSTALL_RETRIES = 4;
17
+ const DEFAULT_INSTALL_RETRY_BASE_MS = 2000;
18
+
19
+ export interface SelfUpgradeOptions {
20
+ /** Sleep between install retries (injectable for tests). */
21
+ sleep?: (ms: number) => Promise<void>;
22
+ /** Max extra install attempts after the first on a cache-race error. */
23
+ installRetries?: number;
24
+ /** Base backoff (doubles per attempt: base, 2×, 4×, …). */
25
+ installRetryBaseMs?: number;
26
+ /** Override self-supervision detection (injectable for tests). */
27
+ supervision?: SelfSupervision;
28
+ }
29
+
30
+ function isCacheRaceError(output: string): boolean {
31
+ return CACHE_RACE_RE.test(output);
32
+ }
33
+
9
34
  export interface SelfUpgradeRunner {
10
35
  run(cmd: string[]): Promise<{ exitCode: number; stdout: string; stderr: string }>;
11
36
  commandExists(name: string): boolean;
@@ -96,8 +121,9 @@ export async function handleSelfUpgrade(
96
121
  _config: OrchestratorConfig,
97
122
  relay: RelayClient,
98
123
  runner: SelfUpgradeRunner = defaultRunner,
124
+ opts: SelfUpgradeOptions = {},
99
125
  ): Promise<void> {
100
- const plan = planSelfUpgrade(command.params, detectSelfSupervision(), runner);
126
+ const plan = planSelfUpgrade(command.params, opts.supervision ?? detectSelfSupervision(), runner);
101
127
  await relay.updateCommand(command.id, "running", {
102
128
  phase: "installing",
103
129
  targetVersion: plan.targetVersion,
@@ -105,7 +131,17 @@ export async function handleSelfUpgrade(
105
131
  unit: plan.unit,
106
132
  });
107
133
 
108
- const install = await runner.run(plan.installCmd);
134
+ const install = await runInstallWithRetry(plan, runner, opts, async (attempt, delayMs) => {
135
+ await relay.updateCommand(command.id, "running", {
136
+ phase: "installing",
137
+ targetVersion: plan.targetVersion,
138
+ unit: plan.unit,
139
+ retry: attempt,
140
+ retryDelayMs: delayMs,
141
+ note: `target ${plan.targetVersion} not yet visible to this host's npm cache; retrying (attempt ${attempt})`,
142
+ });
143
+ console.error(`[orchestrator] self-upgrade install hit a stale-cache race for ${plan.targetVersion}; retry ${attempt} in ${delayMs}ms`);
144
+ });
109
145
  if (install.exitCode !== 0) {
110
146
  throw new Error(`install failed (exit ${install.exitCode}): ${(install.stderr || install.stdout).trim().slice(-500)}`);
111
147
  }
@@ -125,6 +161,32 @@ export async function handleSelfUpgrade(
125
161
  console.error(`[orchestrator] self-upgrade to ${plan.targetVersion} installed; restart dispatched for ${plan.unit}`);
126
162
  }
127
163
 
164
+ /**
165
+ * Run the install, retrying with exponential backoff when it fails on a
166
+ * stale-packument cache race (#211). Non-cache-race failures return immediately
167
+ * so genuine errors aren't masked by retries.
168
+ */
169
+ async function runInstallWithRetry(
170
+ plan: SelfUpgradePlan,
171
+ runner: SelfUpgradeRunner,
172
+ opts: SelfUpgradeOptions,
173
+ onRetry: (attempt: number, delayMs: number) => Promise<void>,
174
+ ): Promise<{ exitCode: number; stdout: string; stderr: string }> {
175
+ const retries = opts.installRetries ?? DEFAULT_INSTALL_RETRIES;
176
+ const baseMs = opts.installRetryBaseMs ?? DEFAULT_INSTALL_RETRY_BASE_MS;
177
+ const sleep = opts.sleep ?? ((ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms)));
178
+
179
+ let result = await runner.run(plan.installCmd);
180
+ for (let attempt = 1; attempt <= retries; attempt++) {
181
+ if (result.exitCode === 0 || !isCacheRaceError(result.stderr || result.stdout)) break;
182
+ const delayMs = baseMs * 2 ** (attempt - 1);
183
+ await onRetry(attempt, delayMs);
184
+ await sleep(delayMs);
185
+ result = await runner.run(plan.installCmd);
186
+ }
187
+ return result;
188
+ }
189
+
128
190
  function normalizeProviders(value: unknown): string[] {
129
191
  const list = Array.isArray(value)
130
192
  ? value.filter((v): v is string => typeof v === "string").map((v) => v.trim()).filter(Boolean)
@@ -147,6 +209,9 @@ function buildInstallCommand(
147
209
  return [
148
210
  "npm",
149
211
  "install",
212
+ // Revalidate the packument instead of trusting a stale cache, so a
213
+ // just-published version is visible to this host's npm right away (#211).
214
+ "--prefer-online",
150
215
  "--prefix",
151
216
  supervision.runtimePrefix,
152
217
  ...packagesForProviders(targetVersion, providers),