@desplega.ai/agent-swarm 1.91.0 → 1.92.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +2 -1
  2. package/openapi.json +585 -5
  3. package/package.json +1 -1
  4. package/src/be/db.ts +337 -1
  5. package/src/be/migrations/083_script_workflows.sql +51 -0
  6. package/src/be/modelsdev-cache.json +42352 -38595
  7. package/src/be/scripts/typecheck.ts +49 -0
  8. package/src/be/seed-scripts/catalog/compound-insights.ts +216 -6
  9. package/src/be/seed-scripts/catalog/ops-catalog-audit.ts +911 -0
  10. package/src/be/seed-scripts/catalog/task-context-gathering.ts +92 -0
  11. package/src/be/seed-scripts/catalog/tool-usage.ts +6 -3
  12. package/src/be/seed-scripts/index.ts +20 -2
  13. package/src/be/seed-skills/index.ts +7 -0
  14. package/src/be/swarm-config-guard.ts +17 -0
  15. package/src/commands/runner.ts +43 -2
  16. package/src/http/db-query.ts +20 -5
  17. package/src/http/index.ts +10 -0
  18. package/src/http/script-runs.ts +555 -0
  19. package/src/prompts/session-templates.ts +24 -4
  20. package/src/providers/claude-adapter.ts +60 -13
  21. package/src/script-workflows/executor.ts +110 -0
  22. package/src/script-workflows/harness.ts +73 -0
  23. package/src/script-workflows/label-lint.ts +51 -0
  24. package/src/script-workflows/limits.ts +22 -0
  25. package/src/script-workflows/supervisor.ts +139 -0
  26. package/src/script-workflows/workflow-ctx.ts +205 -0
  27. package/src/scripts-runtime/sdk-allowlist.ts +3 -0
  28. package/src/scripts-runtime/types/stdlib.d.ts +60 -0
  29. package/src/scripts-runtime/types/swarm-sdk.d.ts +60 -0
  30. package/src/server.ts +2 -0
  31. package/src/slack/handlers.ts +11 -4
  32. package/src/slack/message-text.ts +98 -0
  33. package/src/slack/thread-buffer.ts +5 -3
  34. package/src/tests/claude-adapter-binary.test.ts +147 -4
  35. package/src/tests/db-query.test.ts +28 -0
  36. package/src/tests/error-tracker.test.ts +121 -0
  37. package/src/tests/harness-provider-resolution.test.ts +33 -0
  38. package/src/tests/mcp-tools.test.ts +6 -0
  39. package/src/tests/prompt-template-session.test.ts +34 -5
  40. package/src/tests/script-runs-http.test.ts +278 -0
  41. package/src/tests/script-workflows-label-lint.test.ts +43 -0
  42. package/src/tests/script-workflows-runtime-e2e.test.ts +170 -0
  43. package/src/tests/scripts-mcp-e2e.test.ts +49 -2
  44. package/src/tests/seed-scripts.test.ts +347 -2
  45. package/src/tests/slack-message-text.test.ts +250 -0
  46. package/src/tests/system-default-skills.test.ts +40 -0
  47. package/src/tools/db-query.ts +16 -6
  48. package/src/tools/script-runs.ts +123 -0
  49. package/src/tools/slack-read.ts +12 -3
  50. package/src/tools/tool-config.ts +4 -1
  51. package/src/types.ts +52 -0
  52. package/src/utils/error-tracker.ts +40 -1
  53. package/src/utils/internal-ai/complete-structured.ts +10 -4
  54. package/src/workflows/executors/raw-llm.ts +76 -59
  55. package/templates/skills/pages/content.md +205 -55
  56. package/templates/skills/script-workflows/config.json +14 -0
  57. package/templates/skills/script-workflows/content.md +68 -0
  58. package/templates/skills/swarm-scripts/content.md +2 -3
@@ -1,10 +1,21 @@
1
1
  import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import * as z from "zod";
3
- import { executeReadOnlyQuery } from "@/http/db-query";
3
+ import { DbQueryInputShape, executeReadOnlyQuery, resolveDbQuerySql } from "@/http/db-query";
4
4
  import { createToolRegistrar } from "@/tools/utils";
5
5
 
6
6
  const MCP_MAX_ROWS = 100;
7
7
 
8
+ const DbQueryToolInputSchema = z
9
+ .object({
10
+ ...DbQueryInputShape,
11
+ sql: z.string().optional().describe("SQL query (read-only only — writes are rejected)"),
12
+ query: z.string().optional().describe("Deprecated runtime alias for sql."),
13
+ params: z.array(z.any()).optional().default([]).describe("Query parameters"),
14
+ })
15
+ .refine((body) => body.sql !== undefined || body.query !== undefined, {
16
+ message: "Either sql or query is required",
17
+ });
18
+
8
19
  export const registerDbQueryTool = (server: McpServer) => {
9
20
  createToolRegistrar(server)(
10
21
  "db-query",
@@ -13,10 +24,7 @@ export const registerDbQueryTool = (server: McpServer) => {
13
24
  description:
14
25
  "Execute a read-only SQL query against the swarm database. Available to all authenticated agents — be aware results may include secrets (oauth_tokens, configs). Results capped at 100 rows.",
15
26
  annotations: { readOnlyHint: true },
16
- inputSchema: z.object({
17
- sql: z.string().describe("SQL query (read-only only — writes are rejected)"),
18
- params: z.array(z.any()).optional().default([]).describe("Query parameters"),
19
- }),
27
+ inputSchema: DbQueryToolInputSchema,
20
28
  outputSchema: z.object({
21
29
  success: z.boolean(),
22
30
  columns: z.array(z.string()),
@@ -26,8 +34,10 @@ export const registerDbQueryTool = (server: McpServer) => {
26
34
  truncated: z.boolean(),
27
35
  }),
28
36
  },
29
- async ({ sql, params }, _requestInfo, _meta) => {
37
+ async (input, _requestInfo, _meta) => {
30
38
  try {
39
+ const sql = resolveDbQuerySql(input);
40
+ const params = input.params ?? [];
31
41
  const result = executeReadOnlyQuery(sql, params, MCP_MAX_ROWS);
32
42
  const truncated = result.total > MCP_MAX_ROWS;
33
43
 
@@ -0,0 +1,123 @@
1
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import * as z from "zod";
3
+ import { createToolRegistrar } from "@/tools/utils";
4
+ import { ScriptRunStatusSchema } from "@/types";
5
+ import { proxyScriptsApi, scriptNameSchema, scriptToolOutputSchema } from "./script-common";
6
+
7
+ export const LAUNCH_SCRIPT_RUN_DESCRIPTION =
8
+ "Launch a durable one-off script workflow run. The run executes in the background and can be inspected with get-script-run for terminal status and journal entries.";
9
+
10
+ export const GET_SCRIPT_RUN_DESCRIPTION =
11
+ "Get a durable script workflow run by ID, including its journal entries for swarm-script, raw-llm, and agent-task steps.";
12
+
13
+ export const LIST_SCRIPT_RUNS_DESCRIPTION =
14
+ "List durable script workflow runs, optionally filtered by status or agent ID.";
15
+
16
+ export const registerScriptRunsTools = (server: McpServer) => {
17
+ const register = createToolRegistrar(server);
18
+
19
+ register(
20
+ "launch-script-run",
21
+ {
22
+ title: "Launch Script Run",
23
+ description: LAUNCH_SCRIPT_RUN_DESCRIPTION,
24
+ annotations: { openWorldHint: true },
25
+ inputSchema: z.object({
26
+ source: z.string().min(1).describe("TypeScript script workflow source."),
27
+ args: z.unknown().optional().describe("JSON-serializable workflow arguments."),
28
+ idempotencyKey: z
29
+ .string()
30
+ .min(1)
31
+ .max(200)
32
+ .optional()
33
+ .describe("Optional key that returns the existing run instead of launching a duplicate."),
34
+ scriptName: scriptNameSchema
35
+ .optional()
36
+ .describe("Optional human-readable script/workflow name for the run."),
37
+ requestedByUserId: z
38
+ .string()
39
+ .optional()
40
+ .describe("Optional canonical user ID to attribute the run to."),
41
+ }),
42
+ outputSchema: scriptToolOutputSchema,
43
+ },
44
+ async (args, requestInfo) =>
45
+ proxyScriptsApi({
46
+ method: "POST",
47
+ path: "/api/script-runs",
48
+ body: { ...args, background: true },
49
+ requestInfo,
50
+ successMessage: (data) => {
51
+ const id =
52
+ typeof data === "object" && data !== null && "id" in data
53
+ ? String((data as { id: unknown }).id)
54
+ : "unknown";
55
+ return `Script run launched: ${id}.`;
56
+ },
57
+ }),
58
+ );
59
+
60
+ register(
61
+ "get-script-run",
62
+ {
63
+ title: "Get Script Run",
64
+ description: GET_SCRIPT_RUN_DESCRIPTION,
65
+ annotations: { readOnlyHint: true, openWorldHint: false },
66
+ inputSchema: z.object({
67
+ id: z.string().uuid().describe("Script run ID."),
68
+ }),
69
+ outputSchema: scriptToolOutputSchema,
70
+ },
71
+ async ({ id }, requestInfo) =>
72
+ proxyScriptsApi({
73
+ method: "GET",
74
+ path: `/api/script-runs/${encodeURIComponent(id)}`,
75
+ requestInfo,
76
+ successMessage: (data) => {
77
+ const status =
78
+ typeof data === "object" &&
79
+ data !== null &&
80
+ "run" in data &&
81
+ typeof (data as { run?: { status?: unknown } }).run?.status === "string"
82
+ ? (data as { run: { status: string } }).run.status
83
+ : "unknown";
84
+ return `Script run ${id} status: ${status}.`;
85
+ },
86
+ }),
87
+ );
88
+
89
+ register(
90
+ "list-script-runs",
91
+ {
92
+ title: "List Script Runs",
93
+ description: LIST_SCRIPT_RUNS_DESCRIPTION,
94
+ annotations: { readOnlyHint: true, openWorldHint: false },
95
+ inputSchema: z.object({
96
+ status: ScriptRunStatusSchema.optional().describe("Optional script run status filter."),
97
+ agentId: z.string().optional().describe("Optional agent ID filter."),
98
+ limit: z.number().int().min(1).max(500).default(50).describe("Maximum runs to return."),
99
+ offset: z.number().int().min(0).default(0).describe("Pagination offset."),
100
+ }),
101
+ outputSchema: scriptToolOutputSchema,
102
+ },
103
+ async ({ status, agentId, limit, offset }, requestInfo) => {
104
+ const params = new URLSearchParams();
105
+ if (status) params.set("status", status);
106
+ if (agentId) params.set("agentId", agentId);
107
+ params.set("limit", String(limit));
108
+ params.set("offset", String(offset));
109
+ return proxyScriptsApi({
110
+ method: "GET",
111
+ path: `/api/script-runs?${params.toString()}`,
112
+ requestInfo,
113
+ successMessage: (data) => {
114
+ const total =
115
+ typeof data === "object" && data !== null && "total" in data
116
+ ? Number((data as { total: unknown }).total)
117
+ : 0;
118
+ return `Found ${Number.isFinite(total) ? total : 0} script run(s).`;
119
+ },
120
+ });
121
+ },
122
+ );
123
+ };
@@ -3,6 +3,7 @@ import * as z from "zod";
3
3
  import { getAgentById, getInboxMessageById, getTaskById } from "@/be/db";
4
4
  import { getSlackApp } from "@/slack/app";
5
5
  import { downloadFile } from "@/slack/files";
6
+ import { extractSlackMessageText } from "@/slack/message-text";
6
7
  import { createToolRegistrar } from "@/tools/utils";
7
8
 
8
9
  /**
@@ -203,6 +204,13 @@ export const registerSlackReadTool = (server: McpServer) => {
203
204
  text?: string;
204
205
  ts: string;
205
206
  files?: RawFile[];
207
+ attachments?: Array<{
208
+ fallback?: string;
209
+ text?: string;
210
+ title?: string;
211
+ pretext?: string;
212
+ }>;
213
+ blocks?: unknown[];
206
214
  };
207
215
 
208
216
  let rawMessages: RawMessage[] = [];
@@ -267,8 +275,9 @@ export const registerSlackReadTool = (server: McpServer) => {
267
275
  }> = [];
268
276
 
269
277
  for (const m of rawMessages) {
270
- // Include messages with text OR files
271
- if (!m.text && (!m.files || m.files.length === 0)) continue;
278
+ // Include messages with text, attachments, blocks, or files
279
+ const extractedText = extractSlackMessageText(m);
280
+ if (!extractedText && (!m.files || m.files.length === 0)) continue;
272
281
 
273
282
  const isBot =
274
283
  m.user === botUserId || m.bot_id !== undefined || m.subtype === "bot_message";
@@ -330,7 +339,7 @@ export const registerSlackReadTool = (server: McpServer) => {
330
339
  user: m.user,
331
340
  username,
332
341
  isBot,
333
- text: m.text || "",
342
+ text: extractedText,
334
343
  ts: m.ts,
335
344
  files,
336
345
  });
@@ -164,12 +164,15 @@ export const DEFERRED_TOOLS = new Set([
164
164
  "kv-incr",
165
165
  "kv-list",
166
166
 
167
- // Reusable scripts (5)
167
+ // Reusable scripts (8)
168
168
  "script-search",
169
169
  "script-run",
170
170
  "script-upsert",
171
171
  "script-delete",
172
172
  "script-query-types",
173
+ "launch-script-run",
174
+ "get-script-run",
175
+ "list-script-runs",
173
176
 
174
177
  // External command routes (1)
175
178
  "swarm_x",
package/src/types.ts CHANGED
@@ -1535,6 +1535,58 @@ export const WorkflowRunSchema = z.object({
1535
1535
  });
1536
1536
  export type WorkflowRun = z.infer<typeof WorkflowRunSchema>;
1537
1537
 
1538
+ // --- Script Workflow Runs ---
1539
+
1540
+ export const ScriptRunStatusSchema = z.enum([
1541
+ "running",
1542
+ "paused",
1543
+ "completed",
1544
+ "failed",
1545
+ "cancelled",
1546
+ "aborted_limit",
1547
+ ]);
1548
+ export type ScriptRunStatus = z.infer<typeof ScriptRunStatusSchema>;
1549
+
1550
+ export const TERMINAL_SCRIPT_RUN_STATUSES = [
1551
+ "completed",
1552
+ "failed",
1553
+ "cancelled",
1554
+ "aborted_limit",
1555
+ ] as const;
1556
+ export type TerminalScriptRunStatus = (typeof TERMINAL_SCRIPT_RUN_STATUSES)[number];
1557
+
1558
+ export const ScriptRunSchema = z.object({
1559
+ id: z.string().uuid(),
1560
+ agentId: z.string(),
1561
+ scriptName: z.string().optional(),
1562
+ source: z.string(),
1563
+ args: z.unknown(),
1564
+ status: ScriptRunStatusSchema,
1565
+ pid: z.number().int().optional(),
1566
+ startedAt: z.string(),
1567
+ finishedAt: z.string().optional(),
1568
+ output: z.unknown().optional(),
1569
+ error: z.string().optional(),
1570
+ lastHeartbeatAt: z.string().optional(),
1571
+ idempotencyKey: z.string().optional(),
1572
+ requestedByUserId: z.string().optional(),
1573
+ });
1574
+ export type ScriptRun = z.infer<typeof ScriptRunSchema>;
1575
+
1576
+ export const ScriptRunJournalEntrySchema = z.object({
1577
+ id: z.string().uuid(),
1578
+ runId: z.string().uuid(),
1579
+ stepKey: z.string(),
1580
+ stepType: z.string(),
1581
+ config: z.record(z.string(), z.unknown()),
1582
+ status: z.enum(["completed", "failed"]),
1583
+ result: z.unknown().optional(),
1584
+ error: z.string().optional(),
1585
+ startedAt: z.string(),
1586
+ completedAt: z.string().optional(),
1587
+ });
1588
+ export type ScriptRunJournalEntry = z.infer<typeof ScriptRunJournalEntrySchema>;
1589
+
1538
1590
  // --- Workflow Run Step ---
1539
1591
 
1540
1592
  export const WorkflowRunStepStatusSchema = z.enum([
@@ -28,7 +28,46 @@ export const MAX_RATE_LIMIT_RESET_MS = 7 * 24 * 60 * 60 * 1000;
28
28
  * "429 Too Many Requests"; does not match "No conversation found with session ID".
29
29
  */
30
30
  export function isRateLimitMessage(s: string): boolean {
31
- return /rate.?limit|hit your[\w\s-]*limit|usage[ _-]?limit|too many requests|\b429\b/i.test(s);
31
+ return (
32
+ /rate.?limit|hit your[\w\s-]*limit|usage[ _-]?limit|too many requests|\b429\b/i.test(s) ||
33
+ isCodexCreditsExhaustedMessage(s)
34
+ );
35
+ }
36
+
37
+ /**
38
+ * Detects Codex's workspace-credit-exhausted error, which surfaces as:
39
+ * "Your workspace is out of credits. Ask your workspace owner to refill in order to continue."
40
+ * This wording does not match the standard rate-limit patterns, so it needs its own predicate.
41
+ * Kept specific to avoid false positives — "refill" alone is intentionally excluded.
42
+ */
43
+ export function isCodexCreditsExhaustedMessage(s: string): boolean {
44
+ return /out of credits|refill in order to continue|workspace owner to refill/i.test(s);
45
+ }
46
+
47
+ /** Default cooldown applied when a Codex OAuth slot returns a credits-exhausted error.
48
+ * The workspace credit cap is weekly, so a 2-hour cooldown is conservative but avoids
49
+ * the sawtooth of the 5-minute tier-3 fallback re-handing the dead slot every 5 minutes.
50
+ */
51
+ export const CODEX_CREDITS_EXHAUSTED_COOLDOWN_MS = 2 * 60 * 60 * 1000; // 2h
52
+
53
+ /** Floor for the operator-tunable Codex credits cooldown — never shorter than the tier-3 fallback. */
54
+ export const MIN_CODEX_CREDITS_EXHAUSTED_COOLDOWN_MS = 5 * 60 * 1000; // 5m
55
+
56
+ /**
57
+ * Resolve the effective Codex credits-exhausted cooldown (ms) from a raw config
58
+ * value (string | number | undefined). Falls back to the default constant on
59
+ * absent / empty / non-finite / non-positive input, then clamps to
60
+ * [MIN_CODEX_CREDITS_EXHAUSTED_COOLDOWN_MS, MAX_RATE_LIMIT_RESET_MS].
61
+ * Pure + side-effect free so it's unit-testable and cheap to call.
62
+ */
63
+ export function resolveCodexCreditsExhaustedCooldownMs(
64
+ raw: string | number | undefined | null,
65
+ ): number {
66
+ if (raw === undefined || raw === null || raw === "") return CODEX_CREDITS_EXHAUSTED_COOLDOWN_MS;
67
+ const n =
68
+ typeof raw === "number" ? raw : /^\d+$/.test(raw.trim()) ? Number(raw.trim()) : Number.NaN;
69
+ if (!Number.isFinite(n) || n <= 0) return CODEX_CREDITS_EXHAUSTED_COOLDOWN_MS;
70
+ return Math.min(Math.max(n, MIN_CODEX_CREDITS_EXHAUSTED_COOLDOWN_MS), MAX_RATE_LIMIT_RESET_MS);
32
71
  }
33
72
 
34
73
  /**
@@ -84,10 +84,16 @@ async function defaultSpawnClaudeCli(
84
84
  signal?: AbortSignal,
85
85
  jsonSchema?: object,
86
86
  ): Promise<string> {
87
- // CLAUDE_BINARY may be a single binary ("claude", "shannon") or a
88
- // whitespace-separated command string ("bunx @dexh/shannon"). See
89
- // parseClaudeBinary in src/providers/claude-adapter.ts.
90
- const claudeBinaryArgv = (process.env.CLAUDE_BINARY ?? "claude").trim().split(/\s+/);
87
+ // SWARM_USE_CLAUDE_BRIDGE mirrors the main claude adapter's subscription-pool
88
+ // routing. Otherwise CLAUDE_BINARY may be a single binary ("claude", "shannon")
89
+ // or a whitespace-separated command string ("bunx @dexh/shannon").
90
+ const useClaudeBridge = ["true", "1"].includes(
91
+ (process.env.SWARM_USE_CLAUDE_BRIDGE ?? "").trim().toLowerCase(),
92
+ );
93
+ const claudeBinaryRaw = useClaudeBridge
94
+ ? "claude-bridge"
95
+ : (process.env.CLAUDE_BINARY ?? "claude").trim();
96
+ const claudeBinaryArgv = (claudeBinaryRaw || "claude").split(/\s+/);
91
97
  const cmd = [...claudeBinaryArgv, "-p", "--model", model, "--output-format", "json"];
92
98
  if (jsonSchema) {
93
99
  cmd.push("--json-schema", JSON.stringify(jsonSchema));
@@ -16,6 +16,76 @@ export const RawLlmOutputSchema = z.object({
16
16
  model: z.string(),
17
17
  });
18
18
 
19
+ export async function executeRawLlm(
20
+ config: z.infer<typeof RawLlmConfigSchema>,
21
+ ): Promise<
22
+ | { status: "success"; output: z.infer<typeof RawLlmOutputSchema>; error?: string }
23
+ | { status: "failed"; error: string }
24
+ > {
25
+ const modelName = config.model ?? "google/gemini-3-flash-preview";
26
+
27
+ try {
28
+ const { createOpenAI } = await import("@ai-sdk/openai");
29
+ const openrouter = createOpenAI({
30
+ baseURL: "https://openrouter.ai/api/v1",
31
+ apiKey: process.env.OPENROUTER_API_KEY,
32
+ });
33
+ const model = openrouter(modelName);
34
+
35
+ if (config.schema) {
36
+ const { generateObject, jsonSchema } = await import("ai");
37
+ const { object } = await generateObject({
38
+ model,
39
+ schema: jsonSchema(config.schema),
40
+ prompt: config.prompt,
41
+ providerOptions: {
42
+ openai: { strictJsonSchema: false },
43
+ },
44
+ });
45
+ return {
46
+ status: "success",
47
+ output: { result: object, model: modelName },
48
+ };
49
+ }
50
+
51
+ const { generateText } = await import("ai");
52
+ const { text } = await generateText({
53
+ model,
54
+ prompt: config.prompt,
55
+ });
56
+ return {
57
+ status: "success",
58
+ output: { result: text, model: modelName },
59
+ };
60
+ } catch (err) {
61
+ // Re-throw rate-limit errors so executeStep's retry policy handles them
62
+ // via the retry poller (scheduled backoff). Using the fallbackPort for
63
+ // rate limits would trigger the semantic loop-back path instead, causing
64
+ // runaway retries without any backoff.
65
+ const httpStatus =
66
+ (err as { status?: number; statusCode?: number })?.status ??
67
+ (err as { status?: number; statusCode?: number })?.statusCode;
68
+ const isRateLimited =
69
+ httpStatus === 429 ||
70
+ httpStatus === 529 ||
71
+ (err instanceof Error && /rate.?limit|too many requests|529/i.test(err.message));
72
+ if (isRateLimited) {
73
+ throw err;
74
+ }
75
+ if (config.fallbackPort) {
76
+ return {
77
+ status: "success",
78
+ output: { result: null, model: modelName },
79
+ error: `LLM call failed, using fallback port: ${err instanceof Error ? err.message : String(err)}`,
80
+ };
81
+ }
82
+ return {
83
+ status: "failed",
84
+ error: `LLM call failed: ${err instanceof Error ? err.message : String(err)}`,
85
+ };
86
+ }
87
+ }
88
+
19
89
  // ─── Executor ───────────────────────────────────────────────
20
90
 
21
91
  export class RawLlmExecutor extends BaseExecutor<
@@ -33,68 +103,15 @@ export class RawLlmExecutor extends BaseExecutor<
33
103
  _meta: ExecutorMeta,
34
104
  ): Promise<ExecutorResult<z.infer<typeof RawLlmOutputSchema>>> {
35
105
  const prompt = this.deps.interpolate(config.prompt, context as Record<string, unknown>);
36
- const modelName = config.model ?? "google/gemini-3-flash-preview";
37
-
38
- try {
39
- const { createOpenAI } = await import("@ai-sdk/openai");
40
- const openrouter = createOpenAI({
41
- baseURL: "https://openrouter.ai/api/v1",
42
- apiKey: process.env.OPENROUTER_API_KEY,
43
- });
44
- const model = openrouter(modelName);
45
-
46
- if (config.schema) {
47
- const { generateObject, jsonSchema } = await import("ai");
48
- const { object } = await generateObject({
49
- model,
50
- schema: jsonSchema(config.schema),
51
- prompt,
52
- providerOptions: {
53
- openai: { strictJsonSchema: false },
54
- },
55
- });
56
- return {
57
- status: "success",
58
- output: { result: object, model: modelName },
59
- };
60
- }
61
-
62
- const { generateText } = await import("ai");
63
- const { text } = await generateText({
64
- model,
65
- prompt,
66
- });
106
+ const result = await executeRawLlm({ ...config, prompt });
107
+ if (result.status === "success" && result.error) {
67
108
  return {
68
109
  status: "success",
69
- output: { result: text, model: modelName },
70
- };
71
- } catch (err) {
72
- // Re-throw rate-limit errors so executeStep's retry policy handles them
73
- // via the retry poller (scheduled backoff). Using the fallbackPort for
74
- // rate limits would trigger the semantic loop-back path instead, causing
75
- // runaway retries without any backoff.
76
- const httpStatus =
77
- (err as { status?: number; statusCode?: number })?.status ??
78
- (err as { status?: number; statusCode?: number })?.statusCode;
79
- const isRateLimited =
80
- httpStatus === 429 ||
81
- httpStatus === 529 ||
82
- (err instanceof Error && /rate.?limit|too many requests|529/i.test(err.message));
83
- if (isRateLimited) {
84
- throw err;
85
- }
86
- if (config.fallbackPort) {
87
- return {
88
- status: "success",
89
- output: { result: null, model: modelName },
90
- nextPort: config.fallbackPort,
91
- error: `LLM call failed, using fallback port: ${err instanceof Error ? err.message : String(err)}`,
92
- };
93
- }
94
- return {
95
- status: "failed",
96
- error: `LLM call failed: ${err instanceof Error ? err.message : String(err)}`,
110
+ output: result.output,
111
+ nextPort: config.fallbackPort,
112
+ error: result.error,
97
113
  };
98
114
  }
115
+ return result;
99
116
  }
100
117
  }