@desplega.ai/agent-swarm 1.94.0 → 1.96.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +3 -3
  2. package/openapi.json +46 -1
  3. package/package.json +4 -3
  4. package/src/be/boot-scrub-logs.ts +76 -0
  5. package/src/be/db.ts +22 -10
  6. package/src/be/migrations/094_mcp_extra_authorize_params.sql +4 -0
  7. package/src/be/modelsdev-cache.json +89422 -85636
  8. package/src/be/skill-sync.ts +4 -4
  9. package/src/be/swarm-config-guard.ts +8 -0
  10. package/src/commands/provider-credentials.ts +37 -9
  11. package/src/commands/runner.ts +28 -0
  12. package/src/http/agents.ts +1 -0
  13. package/src/http/config.ts +24 -4
  14. package/src/http/index.ts +9 -0
  15. package/src/http/mcp-oauth.ts +14 -0
  16. package/src/oauth/mcp-wrapper.ts +14 -0
  17. package/src/prompts/session-templates.ts +21 -0
  18. package/src/providers/codex-skill-resolver.ts +22 -8
  19. package/src/providers/opencode-adapter.ts +20 -2
  20. package/src/providers/pi-mono-adapter.ts +160 -21
  21. package/src/providers/types.ts +33 -0
  22. package/src/tests/bedrock-model-groups.test.ts +135 -0
  23. package/src/tests/credential-check.test.ts +538 -50
  24. package/src/tests/harness-provider-resolution.test.ts +23 -0
  25. package/src/tests/mcp-oauth-queries.test.ts +71 -1
  26. package/src/tests/mcp-oauth-wrapper.test.ts +109 -0
  27. package/src/tests/opencode-adapter.test.ts +29 -1
  28. package/src/tests/provider-command-format.test.ts +12 -0
  29. package/src/tests/secret-scrubber.test.ts +73 -1
  30. package/src/tests/skill-fs-writer.test.ts +7 -1
  31. package/src/tests/skill-sync.test.ts +15 -3
  32. package/src/tools/mcp-servers/mcp-server-create.ts +7 -0
  33. package/src/tools/mcp-servers/mcp-server-update.ts +8 -0
  34. package/src/tools/swarm-config/get-config.ts +9 -1
  35. package/src/tools/swarm-config/list-config.ts +8 -0
  36. package/src/types.ts +22 -0
  37. package/src/utils/secret-scrubber.ts +33 -12
  38. package/src/utils/skill-fs-writer.ts +11 -3
@@ -8,7 +8,7 @@
8
8
 
9
9
  import { existsSync, lstatSync, symlinkSync, unlinkSync } from "node:fs";
10
10
  import { join } from "node:path";
11
- import { getModel } from "@earendil-works/pi-ai";
11
+ import { getModel, getModels } from "@earendil-works/pi-ai";
12
12
  import type {
13
13
  AgentSessionEvent,
14
14
  CreateAgentSessionOptions,
@@ -74,40 +74,179 @@ function modelToCredKeys(modelStr: string | undefined): string[] | null {
74
74
  return null;
75
75
  }
76
76
 
77
+ /**
78
+ * Return the pi-ai Bedrock models the harness can actually drive via the
79
+ * Converse API (the catalog from `getModels("amazon-bedrock")`). Each id is a
80
+ * valid pi-ai id — base foundation-model id OR inference-profile id (`us.` /
81
+ * `eu.` / `apac.` / `au.` / `global.` prefixes) — so the matched id round-trips
82
+ * through `MODEL_OVERRIDE=amazon-bedrock/<id>` unchanged. Used as the
83
+ * harness-drivable half of the (drivable ∩ invocable) intersection.
84
+ */
85
+ function getHarnessDrivableBedrockModels(): Array<{ id: string; name: string }> {
86
+ try {
87
+ return getModels("amazon-bedrock").map((m) => ({ id: m.id, name: m.name }));
88
+ } catch {
89
+ // getModels may throw if the pi-ai catalog is empty or corrupted.
90
+ // Return an empty list — the intersection will be empty too, which is safe.
91
+ return [];
92
+ }
93
+ }
94
+
95
+ /**
96
+ * Enumerate the Bedrock models that are both invocable by this AWS account and
97
+ * drivable by the pi-ai Converse harness, and verify the credential chain in
98
+ * one pass:
99
+ * 1. VERIFY the active credential chain is valid for Bedrock in `region`
100
+ * (the AWS list calls throw on auth/access failure).
101
+ * 2. ENUMERATE usable models = harness-drivable ∩ AWS-invocable, where the
102
+ * AWS-invocable set is:
103
+ * - `ListFoundationModels` filtered to on-demand TEXT models that are
104
+ * `ACTIVE` (base foundation-model ids), UNION
105
+ * - `ListInferenceProfiles` ids (the `us.`/`eu.`/… cross-region profile
106
+ * ids). The newest Claude models on Bedrock are invocable ONLY via an
107
+ * inference profile and never appear in `ListFoundationModels`, so this
108
+ * union is what keeps the current models in the usable list.
109
+ *
110
+ * `ListFoundationModels` reports models that EXIST in the region, not strictly
111
+ * ones the account has enabled access to, so the on-demand/ACTIVE filtering
112
+ * narrows it; base on-demand access-grant is not fully enumerable from the
113
+ * catalog. The inference-profile union is what makes the *current* models
114
+ * accurate. The matched id is stored/displayed as the pi-ai id (the id the
115
+ * harness can drive); ids are matched exactly.
116
+ *
117
+ * Two list calls per refresh, no pagination loops or per-model lookups.
118
+ * Dynamically imported so the API binary never loads `@aws-sdk/client-bedrock`.
119
+ * Tests inject a stub via `CredCheckOptions.bedrockProbe` instead.
120
+ *
121
+ * Returns `Array<{id, name}>` on success; throws on auth/access failure.
122
+ */
123
+ export async function runBedrockSdkProbeAndEnumerate(
124
+ region: string,
125
+ ): Promise<Array<{ id: string; name: string }>> {
126
+ const { BedrockClient, ListFoundationModelsCommand, ListInferenceProfilesCommand } = await import(
127
+ "@aws-sdk/client-bedrock"
128
+ );
129
+ const client = new BedrockClient({ region });
130
+
131
+ // AWS-invocable set, region-scoped to `region`.
132
+ const invocable = new Set<string>();
133
+
134
+ // Base on-demand TEXT foundation models that are ACTIVE.
135
+ const fmResponse = await client.send(
136
+ new ListFoundationModelsCommand({ byInferenceType: "ON_DEMAND", byOutputModality: "TEXT" }),
137
+ );
138
+ for (const m of fmResponse.modelSummaries ?? []) {
139
+ if (m.modelId && m.modelLifecycle?.status === "ACTIVE") {
140
+ invocable.add(m.modelId);
141
+ }
142
+ }
143
+
144
+ // Inference-profile / cross-region ids (`us.`/`eu.`/`apac.`/…). These are the
145
+ // only invocation path for the newest Claude models and are absent from
146
+ // `ListFoundationModels`.
147
+ const profileResponse = await client.send(new ListInferenceProfilesCommand({}));
148
+ for (const p of profileResponse.inferenceProfileSummaries ?? []) {
149
+ if (p.inferenceProfileId) {
150
+ invocable.add(p.inferenceProfileId);
151
+ }
152
+ }
153
+
154
+ // Usable = harness-drivable ∩ AWS-invocable, exact-id match. The stored id is
155
+ // the pi-ai id so it round-trips through `getModel("amazon-bedrock", id)`.
156
+ return getHarnessDrivableBedrockModels().filter((m) => invocable.has(m.id));
157
+ }
158
+
77
159
  /**
78
160
  * Pi-mono is satisfied by ANY of:
79
- * 1. `MODEL_OVERRIDE` selects the `amazon-bedrock` provider — credential
80
- * resolution is delegated to the AWS SDK's default chain at first
81
- * inference call. agent-swarm does no presence check; if creds are
82
- * missing the SDK error surfaces in the session log.
161
+ * 1. `BEDROCK_AUTH_MODE=sdk` or `MODEL_OVERRIDE` selects the
162
+ * `amazon-bedrock` provider (prefix-inference fallback when
163
+ * `BEDROCK_AUTH_MODE` is absent). The AWS SDK default credential chain is
164
+ * exercised by a real enumeration pass (`ListFoundationModels` +
165
+ * `ListInferenceProfiles`) that both verifies access and lists the usable
166
+ * models. Success → `ready:true, satisfiedBy:"sdk-delegated"` with the
167
+ * enumerated models; failure → `ready:false` with a classified hint;
168
+ * `AWS_REGION` unset → `ready:false` with a set-region hint. The
169
+ * enumeration is worker-only (the pi dynamic-import arm in
170
+ * `checkProviderCredentials`); the API binary never imports the SDK.
83
171
  * 2. `~/.pi/agent/auth.json` exists.
84
- * 3. `MODEL_OVERRIDE` is set to a provider-prefixed model — only the
85
- * matching provider's key is required.
172
+ * 3. `MODEL_OVERRIDE` is set to a non-Bedrock provider-prefixed model — only
173
+ * the matching provider's key is required.
86
174
  * 4. `MODEL_OVERRIDE` is empty / unprefixed — any one of the supported
87
175
  * keys (ANTHROPIC_API_KEY / OPENROUTER_API_KEY / OPENAI_API_KEY) is
88
176
  * enough.
89
177
  *
90
- * Bedrock is checked first so a stale `auth.json` (Anthropic / OpenRouter
91
- * creds from a previous login) doesn't get falsely reported as the
92
- * satisfying source when the model is actually going to AWS.
178
+ * The Bedrock branch is checked first so a stale `auth.json` (Anthropic /
179
+ * OpenRouter creds from a previous login) doesn't get falsely reported as
180
+ * the satisfying source when the model is actually going to AWS.
93
181
  */
94
- export function checkPiMonoCredentials(
182
+ export async function checkPiMonoCredentials(
95
183
  env: Record<string, string | undefined>,
96
184
  opts: CredCheckOptions = {},
97
- ): CredStatus {
98
- if (env.MODEL_OVERRIDE?.toLowerCase().startsWith("amazon-bedrock/")) {
99
- return {
100
- ready: true,
101
- missing: [],
102
- satisfiedBy: "sdk-delegated",
103
- hint: "AWS SDK will resolve credentials at first Bedrock call (env, ~/.aws/*, SSO, IMDS, etc.).",
104
- };
185
+ ): Promise<CredStatus> {
186
+ // Determine Bedrock SDK mode:
187
+ // - Explicit: BEDROCK_AUTH_MODE=sdk
188
+ // - Fallback: BEDROCK_AUTH_MODE absent AND MODEL_OVERRIDE starts with
189
+ // "amazon-bedrock/" (preserves today's prefix-inference semantics)
190
+ // BEDROCK_AUTH_MODE=bearer is declared/validated but the full bearer-token
191
+ // path is not implemented yet it falls through to the standard auth check.
192
+ const bedrockAuthMode = env.BEDROCK_AUTH_MODE?.toLowerCase();
193
+ const isBedrockSdk =
194
+ bedrockAuthMode === "sdk" ||
195
+ (bedrockAuthMode === undefined &&
196
+ env.MODEL_OVERRIDE?.toLowerCase().startsWith("amazon-bedrock/"));
197
+
198
+ if (isBedrockSdk) {
199
+ const region = env.AWS_REGION;
200
+ if (!region) {
201
+ // Do NOT fabricate a region. A guessed `us-east-1` can differ from where
202
+ // inference actually runs, which would enumerate the wrong region's
203
+ // models. Report a not-ready Bedrock state with a hint instead, so the
204
+ // enumeration region always matches the inference region. `bedrockRegion`
205
+ // is an empty string (not undefined) so the report still carries a
206
+ // Bedrock block and the picker can surface the reason.
207
+ return {
208
+ ready: false,
209
+ missing: [],
210
+ hint: "AWS_REGION is not set — set it to the region where your Bedrock models are accessible so model enumeration matches the inference region.",
211
+ bedrockModels: [],
212
+ bedrockRegion: "",
213
+ };
214
+ }
215
+ const probe = opts.bedrockProbe ?? (() => runBedrockSdkProbeAndEnumerate(region));
216
+ try {
217
+ const probeResult = await probe();
218
+ // `probeResult` is `Array<{id,name}> | void` — void comes from auth-only
219
+ // stubs that don't exercise enumeration. Treat void as [].
220
+ const bedrockModels: Array<{ id: string; name: string }> = Array.isArray(probeResult)
221
+ ? probeResult
222
+ : [];
223
+ return {
224
+ ready: true,
225
+ missing: [],
226
+ satisfiedBy: "sdk-delegated",
227
+ hint: `Bedrock models invocable in ${region} enumerated (${bedrockModels.length} usable; ListFoundationModels + ListInferenceProfiles).`,
228
+ bedrockModels,
229
+ bedrockRegion: region,
230
+ };
231
+ } catch (err) {
232
+ const errorMessage = err instanceof Error ? err.message : String(err);
233
+ const classification = classifyAwsSdkError(errorMessage);
234
+ return {
235
+ ready: false,
236
+ missing: [],
237
+ hint:
238
+ classification?.message ??
239
+ `AWS Bedrock enumeration failed (region: ${region}): ${errorMessage}`,
240
+ bedrockModels: [],
241
+ bedrockRegion: region,
242
+ };
243
+ }
105
244
  }
106
245
 
107
246
  const homeDir = opts.homeDir ?? env.HOME ?? "/root";
108
- const probe = opts.fs?.existsSync ?? existsSync;
247
+ const fsProbe = opts.fs?.existsSync ?? existsSync;
109
248
  const authFile = `${homeDir}/.pi/agent/auth.json`;
110
- if (probe(authFile)) {
249
+ if (fsProbe(authFile)) {
111
250
  return { ready: true, missing: [], satisfiedBy: "file" };
112
251
  }
113
252
 
@@ -181,6 +181,19 @@ export interface CredStatus {
181
181
  missing: string[];
182
182
  satisfiedBy?: "env" | "file" | "side-effect-pending" | "sdk-delegated";
183
183
  hint?: string;
184
+ /**
185
+ * Pi-mono Bedrock mode only: usable model list = harness-drivable ∩
186
+ * AWS-invocable (on-demand/ACTIVE foundation models ∪ inference profiles),
187
+ * region-scoped. Empty when enumeration failed (ready===false), when
188
+ * `AWS_REGION` is unset, or when the intersection is empty. Undefined when not
189
+ * in Bedrock mode.
190
+ */
191
+ bedrockModels?: Array<{ id: string; name: string }>;
192
+ /**
193
+ * Pi-mono Bedrock mode only: AWS region the enumeration ran against. An empty
194
+ * string signals Bedrock mode with `AWS_REGION` unset (no region fabricated).
195
+ */
196
+ bedrockRegion?: string;
184
197
  }
185
198
 
186
199
  /**
@@ -188,8 +201,28 @@ export interface CredStatus {
188
201
  * pi/opencode predicates probe the filesystem for `~/.codex/auth.json`,
189
202
  * `~/.pi/agent/auth.json`, `~/.local/share/opencode/auth.json`. Tests inject
190
203
  * a fake `fs` + `homeDir` to exercise the file-vs-env branches deterministically.
204
+ *
205
+ * `bedrockProbe` is an injectable for the Bedrock SDK enumeration path in
206
+ * `checkPiMonoCredentials`. In production it is left undefined and the function
207
+ * dynamically imports `@aws-sdk/client-bedrock` to run real
208
+ * `ListFoundationModels` + `ListInferenceProfiles` calls. Tests inject a stub
209
+ * to avoid hitting AWS.
191
210
  */
192
211
  export interface CredCheckOptions {
193
212
  homeDir?: string;
194
213
  fs?: { existsSync(p: string): boolean };
214
+ /**
215
+ * Injectable for the Bedrock SDK enumeration. When provided, called instead
216
+ * of the real `@aws-sdk/client-bedrock` `ListFoundationModels` +
217
+ * `ListInferenceProfiles` calls. Should throw on auth/access failure (with an
218
+ * AWS SDK-shaped error message) or resolve with the intersected
219
+ * (harness-drivable ∩ AWS-invocable) model list on success.
220
+ *
221
+ * Return type is `Array<{id,name}> | undefined` for backward compatibility:
222
+ * existing test stubs that return void (`async () => {}`) are still valid
223
+ * (void is assignable to undefined in TypeScript's structural typing);
224
+ * new tests that need to exercise the model list inject stubs that return
225
+ * an array. Production code always returns the model list.
226
+ */
227
+ bedrockProbe?: () => Promise<Array<{ id: string; name: string }> | undefined>;
195
228
  }
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Unit tests for amazon-bedrock model group behaviour in modelGroupsForHarness.
3
+ *
4
+ * Verifies:
5
+ * - Bedrock group always appears for the pi harness (NEVER blank).
6
+ * - Live worker-reported models are preferred when present.
7
+ * - Static snapshot from modelsdev-cache.json is used as fallback.
8
+ * - Converse-incompatible models listed by AWS but absent from pi-ai's catalog
9
+ * are NOT in the live list (the intersection is worker-side; this test just
10
+ * ensures the UI renders what the worker sent, without adding phantom entries).
11
+ * - Non-pi harnesses do NOT get a Bedrock group.
12
+ */
13
+
14
+ import { describe, expect, test } from "bun:test";
15
+ import {
16
+ type LiveBedrockStatus,
17
+ modelGroupsForHarness,
18
+ } from "../../ui/src/lib/agent-runtime-models";
19
+
20
+ describe("modelGroupsForHarness — Bedrock group for pi harness", () => {
21
+ const configs = undefined;
22
+ const envPresence = undefined;
23
+
24
+ test("pi harness always includes an Amazon Bedrock group (NEVER blank)", () => {
25
+ // No live status provided — falls back to static snapshot.
26
+ const groups = modelGroupsForHarness("pi", configs, envPresence);
27
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
28
+ expect(bedrockGroup).toBeDefined();
29
+ // Static snapshot has 98 models — at least one must be present.
30
+ expect(bedrockGroup!.models.length).toBeGreaterThan(0);
31
+ // All model IDs must be prefixed with the provider.
32
+ for (const m of bedrockGroup!.models) {
33
+ expect(m.id.startsWith("amazon-bedrock/")).toBe(true);
34
+ }
35
+ });
36
+
37
+ test("pi harness with no live report → Bedrock group disabled (auth state unknown)", () => {
38
+ const groups = modelGroupsForHarness("pi", configs, envPresence, null);
39
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
40
+ expect(bedrockGroup).toBeDefined();
41
+ expect(bedrockGroup!.enabled).toBe(false);
42
+ });
43
+
44
+ test("pi harness with live report ready:true → Bedrock group enabled + live models", () => {
45
+ const liveStatus: LiveBedrockStatus = {
46
+ ready: true,
47
+ models: [
48
+ { id: "anthropic.claude-sonnet-4-20250514-v1:0", name: "Claude Sonnet 4" },
49
+ { id: "anthropic.claude-haiku-4-5-20251001-v1:0", name: "Claude Haiku 4.5" },
50
+ ],
51
+ };
52
+ const groups = modelGroupsForHarness("pi", configs, envPresence, liveStatus);
53
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
54
+ expect(bedrockGroup).toBeDefined();
55
+ expect(bedrockGroup!.enabled).toBe(true);
56
+ expect(bedrockGroup!.models).toHaveLength(2);
57
+ expect(bedrockGroup!.models[0]!.id).toBe(
58
+ "amazon-bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
59
+ );
60
+ expect(bedrockGroup!.models[0]!.label).toBe("Claude Sonnet 4");
61
+ });
62
+
63
+ test("pi harness with live report ready:false → Bedrock group disabled + live models shown", () => {
64
+ // Auth failed but we still show models so the operator can see what's available.
65
+ const liveStatus: LiveBedrockStatus = {
66
+ ready: false,
67
+ models: [{ id: "anthropic.claude-sonnet-4-20250514-v1:0", name: "Claude Sonnet 4" }],
68
+ };
69
+ const groups = modelGroupsForHarness("pi", configs, envPresence, liveStatus);
70
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
71
+ expect(bedrockGroup!.enabled).toBe(false);
72
+ expect(bedrockGroup!.models).toHaveLength(1);
73
+ });
74
+
75
+ test("pi harness with failed probe surfaces the probe error as disabledReason", () => {
76
+ // A failed probe (ready:false with an error) should surface WHY the group is
77
+ // disabled instead of a silent disable.
78
+ const liveStatus: LiveBedrockStatus = {
79
+ ready: false,
80
+ models: [],
81
+ error: "Token expired — run aws sso login",
82
+ };
83
+ const groups = modelGroupsForHarness("pi", configs, envPresence, liveStatus);
84
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
85
+ expect(bedrockGroup!.enabled).toBe(false);
86
+ expect(bedrockGroup!.disabledReason).toBe("Token expired — run aws sso login");
87
+ });
88
+
89
+ test("pi harness with ready:true → no disabledReason and Bedrock icon key", () => {
90
+ const liveStatus: LiveBedrockStatus = {
91
+ ready: true,
92
+ models: [{ id: "anthropic.claude-sonnet-4-20250514-v1:0", name: "Claude Sonnet 4" }],
93
+ };
94
+ const groups = modelGroupsForHarness("pi", configs, envPresence, liveStatus);
95
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
96
+ expect(bedrockGroup!.disabledReason).toBeUndefined();
97
+ // Bedrock has its own provider icon — it no longer borrows the OpenRouter glyph.
98
+ expect(bedrockGroup!.models[0]!.providerId).toBe("amazon-bedrock");
99
+ });
100
+
101
+ test("pi harness with live report and empty model list → shows empty list (not snapshot fallback)", () => {
102
+ // Worker reported successfully but no models were in the intersection.
103
+ const liveStatus: LiveBedrockStatus = { ready: true, models: [] };
104
+ const groups = modelGroupsForHarness("pi", configs, envPresence, liveStatus);
105
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
106
+ expect(bedrockGroup!.models).toHaveLength(0);
107
+ });
108
+
109
+ test("opencode harness does NOT get a Bedrock group", () => {
110
+ const groups = modelGroupsForHarness("opencode", configs, envPresence);
111
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
112
+ expect(bedrockGroup).toBeUndefined();
113
+ });
114
+
115
+ test("claude harness does NOT get a Bedrock group", () => {
116
+ const groups = modelGroupsForHarness("claude", configs, envPresence);
117
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
118
+ expect(bedrockGroup).toBeUndefined();
119
+ });
120
+
121
+ test("codex harness does NOT get a Bedrock group", () => {
122
+ const groups = modelGroupsForHarness("codex", configs, envPresence);
123
+ const bedrockGroup = groups.find((g) => g.provider === "Amazon Bedrock");
124
+ expect(bedrockGroup).toBeUndefined();
125
+ });
126
+
127
+ test("pi harness still returns openrouter/anthropic/openai snapshot groups alongside Bedrock", () => {
128
+ const groups = modelGroupsForHarness("pi", configs, envPresence);
129
+ const providerNames = groups.map((g) => g.provider);
130
+ expect(providerNames).toContain("OpenRouter");
131
+ expect(providerNames).toContain("Anthropic");
132
+ expect(providerNames).toContain("OpenAI");
133
+ expect(providerNames).toContain("Amazon Bedrock");
134
+ });
135
+ });