@smithers-orchestrator/agents 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/package.json +15 -5
  2. package/src/AgentLike.ts +5 -0
  3. package/src/AmpAgent.js +15 -5
  4. package/src/AmpAgentOptions.ts +6 -0
  5. package/src/BaseCliAgent/BaseCliAgent.js +205 -11
  6. package/src/BaseCliAgent/createAgentStdoutTextEmitter.js +21 -3
  7. package/src/BaseCliAgent/index.d.ts +467 -0
  8. package/src/ClaudeCodeAgent.js +6 -2
  9. package/src/CodexAgent.js +17 -2
  10. package/src/CodexAgentOptions.ts +11 -0
  11. package/src/GeminiAgent.js +34 -224
  12. package/src/GeminiAgentOptions.ts +4 -9
  13. package/src/OpenCodeAgent.js +2 -12
  14. package/src/OpenCodeAgentOptions.ts +19 -0
  15. package/src/PiAgent.js +63 -5
  16. package/src/cli-capabilities/CliAgentCapabilityAdapterId.ts +0 -1
  17. package/src/cli-capabilities/getCliAgentCapabilityDoctorReport.js +3 -2
  18. package/src/cli-capabilities/getCliAgentCapabilityReport.js +0 -6
  19. package/src/cli-surface/cliAgentSurfaceManifest.js +1 -40
  20. package/src/createElevenLabsTextToSpeechTool.js +128 -0
  21. package/src/createElevenLabsTextToSpeechTool.ts +33 -0
  22. package/src/diagnostics/getDiagnosticStrategy.js +94 -23
  23. package/src/diagnostics/launchDiagnostics.js +7 -4
  24. package/src/document-parsing/DocumentParsingProvider.ts +13 -0
  25. package/src/document-parsing/DocumentParsingResult.ts +13 -0
  26. package/src/document-parsing/DocumentParsingToolset.ts +4 -0
  27. package/src/document-parsing/DocumentParsingToolsetOptions.ts +9 -0
  28. package/src/document-parsing/createDocumentParsingToolset.d.ts +9 -0
  29. package/src/document-parsing/createDocumentParsingToolset.js +416 -0
  30. package/src/http/CreateHttpToolOptions.ts +4 -0
  31. package/src/http/HttpToolAuth.ts +15 -0
  32. package/src/http/HttpToolInput.ts +11 -0
  33. package/src/http/HttpToolOutput.ts +7 -0
  34. package/src/http/createHttpTool.js +136 -0
  35. package/src/image-generation/ImageGenerationProvider.ts +7 -0
  36. package/src/image-generation/ImageGenerationRequest.ts +8 -0
  37. package/src/image-generation/ImageGenerationResult.ts +10 -0
  38. package/src/image-generation/ImageGenerationToolOptions.ts +10 -0
  39. package/src/image-generation/createImageGenerationTool.d.ts +18 -0
  40. package/src/image-generation/createImageGenerationTool.js +92 -0
  41. package/src/index.d.ts +490 -147
  42. package/src/index.js +23 -5
  43. package/src/streamResultToGenerateResult.js +55 -26
  44. package/src/transcription/createTranscriptionTool.js +182 -0
  45. package/src/transcription/createTranscriptionTool.ts +29 -0
  46. package/src/transcription/index.js +1 -0
  47. package/src/transcription/index.ts +6 -0
  48. package/src/web-search/GroundedWebSearchProvider.ts +21 -0
  49. package/src/web-search/GroundedWebSearchToolset.ts +6 -0
  50. package/src/web-search/createBraveSearchProvider.js +53 -0
  51. package/src/web-search/createExaSearchProvider.js +72 -0
  52. package/src/web-search/createGroundedWebSearchToolset.js +110 -0
  53. package/src/web-search/createSerperSearchProvider.js +63 -0
  54. package/src/web-search/createTavilySearchProvider.js +59 -0
  55. package/src/web-search/index.js +5 -0
  56. package/src/zodToOpenAISchema.js +4 -0
  57. package/src/OpenCodeAgent.ts +0 -43
@@ -0,0 +1,128 @@
1
+ import { dynamicTool, jsonSchema } from "ai";
2
+
3
+ const DEFAULT_BASE_URL = "https://api.elevenlabs.io";
4
+ const DEFAULT_MODEL_ID = "eleven_turbo_v2_5";
5
+ const DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
6
+ const TOOL_NAME = "elevenlabs_text_to_speech";
7
+
8
+ const inputSchema = {
9
+ type: "object",
10
+ properties: {
11
+ text: {
12
+ type: "string",
13
+ minLength: 1,
14
+ description: "Text to synthesize into speech.",
15
+ },
16
+ voiceId: {
17
+ type: "string",
18
+ minLength: 1,
19
+ description: "Optional ElevenLabs voice id. Defaults to the configured voice.",
20
+ },
21
+ modelId: {
22
+ type: "string",
23
+ minLength: 1,
24
+ description: "Optional ElevenLabs model id. Defaults to the configured model.",
25
+ },
26
+ voiceSettings: {
27
+ type: "object",
28
+ additionalProperties: true,
29
+ description: "Optional ElevenLabs voice_settings payload.",
30
+ },
31
+ },
32
+ required: ["text"],
33
+ additionalProperties: false,
34
+ };
35
+
36
+ /**
37
+ * Create an agent-callable ElevenLabs text-to-speech tool.
38
+ *
39
+ * @param {import("./createElevenLabsTextToSpeechTool.ts").ElevenLabsTextToSpeechToolOptions} options
40
+ * @returns {import("./createElevenLabsTextToSpeechTool.ts").ElevenLabsTextToSpeechToolset}
41
+ */
42
+ export function createElevenLabsTextToSpeechTool(options) {
43
+ if (!options?.apiKey) {
44
+ throw new Error("createElevenLabsTextToSpeechTool requires an ElevenLabs apiKey");
45
+ }
46
+
47
+ const fetchImpl = options.fetch ?? globalThis.fetch;
48
+ if (typeof fetchImpl !== "function") {
49
+ throw new Error("createElevenLabsTextToSpeechTool requires fetch");
50
+ }
51
+
52
+ const baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
53
+ const defaultVoiceId = options.defaultVoiceId ?? DEFAULT_VOICE_ID;
54
+ const defaultModelId = options.defaultModelId ?? DEFAULT_MODEL_ID;
55
+
56
+ return {
57
+ tools: {
58
+ [TOOL_NAME]: dynamicTool({
59
+ description: "Synthesize speech audio from text using ElevenLabs.",
60
+ inputSchema: jsonSchema(inputSchema),
61
+ execute: async (input) =>
62
+ synthesizeSpeech({
63
+ apiKey: options.apiKey,
64
+ baseUrl,
65
+ defaultVoiceId,
66
+ defaultModelId,
67
+ fetchImpl,
68
+ input,
69
+ }),
70
+ }),
71
+ },
72
+ toolNames: [TOOL_NAME],
73
+ };
74
+ }
75
+
76
+ /**
77
+ * @param {{
78
+ * apiKey: string;
79
+ * baseUrl: string;
80
+ * defaultVoiceId: string;
81
+ * defaultModelId: string;
82
+ * fetchImpl: typeof fetch;
83
+ * input: unknown;
84
+ * }} params
85
+ */
86
+ async function synthesizeSpeech({ apiKey, baseUrl, defaultVoiceId, defaultModelId, fetchImpl, input }) {
87
+ const args = /** @type {import("./createElevenLabsTextToSpeechTool.ts").ElevenLabsTextToSpeechInput} */ (
88
+ input ?? {}
89
+ );
90
+ if (typeof args.text !== "string" || args.text.trim() === "") {
91
+ throw new Error("elevenlabs_text_to_speech requires non-empty text");
92
+ }
93
+
94
+ const voiceId = args.voiceId ?? defaultVoiceId;
95
+ const modelId = args.modelId ?? defaultModelId;
96
+ const body = {
97
+ text: args.text,
98
+ model_id: modelId,
99
+ ...(args.voiceSettings ? { voice_settings: args.voiceSettings } : {}),
100
+ };
101
+
102
+ const response = await fetchImpl(`${baseUrl}/v1/text-to-speech/${encodeURIComponent(voiceId)}`, {
103
+ method: "POST",
104
+ headers: {
105
+ Accept: "audio/mpeg",
106
+ "Content-Type": "application/json",
107
+ "xi-api-key": apiKey,
108
+ },
109
+ body: JSON.stringify(body),
110
+ });
111
+
112
+ if (!response.ok) {
113
+ const errorText = await response.text().catch(() => "");
114
+ throw new Error(
115
+ `ElevenLabs text-to-speech failed with ${response.status}${errorText ? `: ${errorText}` : ""}`,
116
+ );
117
+ }
118
+
119
+ const contentType = response.headers.get("content-type") ?? "audio/mpeg";
120
+ const bytes = new Uint8Array(await response.arrayBuffer());
121
+ return {
122
+ audioBase64: Buffer.from(bytes).toString("base64"),
123
+ contentType,
124
+ voiceId,
125
+ modelId,
126
+ byteLength: bytes.byteLength,
127
+ };
128
+ }
@@ -0,0 +1,33 @@
1
+ import type { Tool } from "ai";
2
+
3
+ export type ElevenLabsTextToSpeechInput = {
4
+ text: string;
5
+ voiceId?: string;
6
+ modelId?: string;
7
+ voiceSettings?: Record<string, unknown>;
8
+ };
9
+
10
+ export type ElevenLabsTextToSpeechResult = {
11
+ audioBase64: string;
12
+ contentType: string;
13
+ voiceId: string;
14
+ modelId: string;
15
+ byteLength: number;
16
+ };
17
+
18
+ export type ElevenLabsTextToSpeechToolOptions = {
19
+ apiKey: string;
20
+ defaultVoiceId?: string;
21
+ defaultModelId?: string;
22
+ baseUrl?: string;
23
+ fetch?: typeof fetch;
24
+ };
25
+
26
+ export type ElevenLabsTextToSpeechToolset = {
27
+ tools: Record<"elevenlabs_text_to_speech", Tool>;
28
+ toolNames: ["elevenlabs_text_to_speech"];
29
+ };
30
+
31
+ export declare function createElevenLabsTextToSpeechTool(
32
+ options: ElevenLabsTextToSpeechToolOptions,
33
+ ): ElevenLabsTextToSpeechToolset;
@@ -9,6 +9,9 @@ import { spawnSync } from "node:child_process";
9
9
  /**
10
10
  * @typedef {{ id: DiagnosticCheckId; run: (ctx: DiagnosticContext) => Promise<DiagnosticCheck>; }} DiagnosticCheckDef
11
11
  */
12
+ /**
13
+ * @typedef {{ provider?: string; model?: string; apiKey?: string }} DiagnosticHints
14
+ */
12
15
 
13
16
  // ---------------------------------------------------------------------------
14
17
  // Shared check helpers
@@ -187,6 +190,17 @@ const claudeStrategy = {
187
190
  // ---------------------------------------------------------------------------
188
191
  // Codex strategy
189
192
  // ---------------------------------------------------------------------------
193
+ /**
194
+ * Resolve the OpenAI models endpoint, honoring OPENAI_BASE_URL (Azure, proxies,
195
+ * OpenAI-compatible gateways, and hermetic test fixtures) the same way the
196
+ * OpenAI SDK and codex do. Defaults to the public API, so existing behavior is
197
+ * unchanged when the variable is unset.
198
+ * @param {Record<string, string | undefined>} env
199
+ */
200
+ function openaiModelsUrl(env) {
201
+ const base = (env.OPENAI_BASE_URL ?? "https://api.openai.com/v1").replace(/\/+$/, "");
202
+ return `${base}/models`;
203
+ }
190
204
  // Combined API key validation + rate limit check via GET /v1/models (free, no tokens)
191
205
  const codexApiKeyAndRateLimitCheck = [
192
206
  {
@@ -203,7 +217,7 @@ const codexApiKeyAndRateLimitCheck = [
203
217
  };
204
218
  }
205
219
  try {
206
- const res = await fetch("https://api.openai.com/v1/models", {
220
+ const res = await fetch(openaiModelsUrl(ctx.env), {
207
221
  headers: { Authorization: `Bearer ${apiKey}` },
208
222
  signal: AbortSignal.timeout(4_000),
209
223
  });
@@ -255,7 +269,7 @@ const codexApiKeyAndRateLimitCheck = [
255
269
  };
256
270
  }
257
271
  try {
258
- const res = await fetch("https://api.openai.com/v1/models", {
272
+ const res = await fetch(openaiModelsUrl(ctx.env), {
259
273
  headers: { Authorization: `Bearer ${apiKey}` },
260
274
  signal: AbortSignal.timeout(4_000),
261
275
  });
@@ -429,15 +443,6 @@ const googleRateLimitCheck = {
429
443
  }
430
444
  },
431
445
  };
432
- const geminiStrategy = {
433
- agentId: "gemini",
434
- command: "gemini",
435
- checks: [
436
- checkCliInstalled("gemini", "Gemini CLI"),
437
- googleAuthCheck,
438
- googleRateLimitCheck,
439
- ],
440
- };
441
446
  const antigravityAuthSkip = {
442
447
  id: "api_key_valid",
443
448
  run: async () => {
@@ -472,15 +477,75 @@ const antigravityStrategy = {
472
477
  // ---------------------------------------------------------------------------
473
478
  // Pi strategy
474
479
  // ---------------------------------------------------------------------------
475
- const piStrategy = {
476
- agentId: "pi",
477
- command: "pi",
478
- checks: [
479
- checkCliInstalled("pi", "Pi"),
480
- googleAuthCheck,
481
- googleRateLimitCheck,
482
- ],
483
- };
480
+ /**
481
+ * Resolve the effective pi provider family from an explicit `--provider`, a
482
+ * `provider/model` prefix, or a bare model id's well-known prefix. Returns ""
483
+ * when undeterminable so callers fall back to pi's default (google) (#284).
484
+ * @param {DiagnosticHints | undefined} hints
485
+ * @returns {string}
486
+ */
487
+ function resolvePiProvider(hints) {
488
+ const explicit = (hints?.provider || "").trim().toLowerCase();
489
+ if (explicit) {
490
+ return explicit;
491
+ }
492
+ const model = typeof hints?.model === "string" ? hints.model.trim().toLowerCase() : "";
493
+ if (!model) {
494
+ return "";
495
+ }
496
+ if (model.includes("/")) {
497
+ return model.split("/")[0];
498
+ }
499
+ // Bare model id (no provider prefix) — infer the provider family from
500
+ // common id prefixes so diagnostics probe the right backend.
501
+ if (model.startsWith("gpt-") || model.startsWith("o1-") || model.startsWith("o3-") || model.startsWith("o4-") || model.startsWith("chatgpt")) {
502
+ return "openai";
503
+ }
504
+ if (model.startsWith("claude")) {
505
+ return "anthropic";
506
+ }
507
+ if (model.startsWith("gemini")) {
508
+ return "google";
509
+ }
510
+ return "";
511
+ }
512
+ /**
513
+ * @param {DiagnosticHints | undefined} hints
514
+ * @returns {DiagnosticCheckDef[]}
515
+ */
516
+ function piProviderChecks(hints) {
517
+ const raw = resolvePiProvider(hints);
518
+ if (raw === "openai" || raw === "openai-codex" || raw === "azure" || raw === "azure-openai") {
519
+ return [...codexApiKeyAndRateLimitCheck];
520
+ }
521
+ if (raw === "anthropic" || raw === "claude") {
522
+ return [claudeApiKeyCheck, claudeRateLimitCheck];
523
+ }
524
+ return [googleAuthCheck, googleRateLimitCheck];
525
+ }
526
+ /**
527
+ * pi accepts credentials via the `--api-key` option instead of an environment
528
+ * variable. Diagnostics only see the process env, so map an explicit apiKey to
529
+ * the env var the selected provider's checks read — otherwise an apiKey-only pi
530
+ * run is misreported as "key missing" (#284). Returns undefined when there is
531
+ * nothing to inject.
532
+ * @param {string} command
533
+ * @param {DiagnosticHints | undefined} hints
534
+ * @returns {Record<string, string> | undefined}
535
+ */
536
+ export function diagnosticApiKeyEnv(command, hints) {
537
+ if (command !== "pi" || !hints?.apiKey) {
538
+ return undefined;
539
+ }
540
+ const raw = resolvePiProvider(hints);
541
+ if (raw === "openai" || raw === "openai-codex" || raw === "azure" || raw === "azure-openai") {
542
+ return { OPENAI_API_KEY: hints.apiKey };
543
+ }
544
+ if (raw === "anthropic" || raw === "claude") {
545
+ return { ANTHROPIC_API_KEY: hints.apiKey };
546
+ }
547
+ return { GOOGLE_API_KEY: hints.apiKey };
548
+ }
484
549
  // ---------------------------------------------------------------------------
485
550
  // Amp strategy
486
551
  // ---------------------------------------------------------------------------
@@ -523,14 +588,20 @@ const strategies = {
523
588
  codex: codexStrategy,
524
589
  antigravity: antigravityStrategy,
525
590
  agy: antigravityStrategy,
526
- gemini: geminiStrategy,
527
- pi: piStrategy,
528
591
  amp: ampStrategy,
529
592
  };
530
593
  /**
531
594
  * @param {string} command
595
+ * @param {DiagnosticHints} [hints]
532
596
  * @returns {AgentDiagnosticStrategy | null}
533
597
  */
534
- export function getDiagnosticStrategy(command) {
598
+ export function getDiagnosticStrategy(command, hints) {
599
+ if (command === "pi") {
600
+ return {
601
+ agentId: "pi",
602
+ command: "pi",
603
+ checks: [checkCliInstalled("pi", "Pi"), ...piProviderChecks(hints)],
604
+ };
605
+ }
535
606
  return strategies[command] ?? null;
536
607
  }
@@ -1,4 +1,4 @@
1
- import { getDiagnosticStrategy } from "./getDiagnosticStrategy.js";
1
+ import { diagnosticApiKeyEnv, getDiagnosticStrategy } from "./getDiagnosticStrategy.js";
2
2
  import { runDiagnostics } from "./runDiagnostics.js";
3
3
  /** @typedef {import("./DiagnosticReport.ts").DiagnosticReport} DiagnosticReport */
4
4
 
@@ -6,11 +6,14 @@ import { runDiagnostics } from "./runDiagnostics.js";
6
6
  * @param {string} command
7
7
  * @param {Record<string, string>} env
8
8
  * @param {string} cwd
9
+ * @param {{ provider?: string; model?: string; apiKey?: string }} [hints]
9
10
  * @returns {Promise<DiagnosticReport> | null}
10
11
  */
11
- export function launchDiagnostics(command, env, cwd) {
12
- const strategy = getDiagnosticStrategy(command);
12
+ export function launchDiagnostics(command, env, cwd, hints) {
13
+ const strategy = getDiagnosticStrategy(command, hints);
13
14
  if (!strategy)
14
15
  return null;
15
- return runDiagnostics(strategy, { env, cwd }).catch(() => null);
16
+ const apiKeyEnv = diagnosticApiKeyEnv(command, hints);
17
+ const effectiveEnv = apiKeyEnv ? { ...env, ...apiKeyEnv } : env;
18
+ return runDiagnostics(strategy, { env: effectiveEnv, cwd }).catch(() => null);
16
19
  }
@@ -0,0 +1,13 @@
1
+ import type { DocumentParsingResult } from "./DocumentParsingResult.ts";
2
+
3
+ export type DocumentParsingProvider = {
4
+ name: "firecrawl" | "mistral-ocr" | "llamaparse" | string;
5
+ parseDocument: (input: {
6
+ source:
7
+ | { type: "url"; url: string }
8
+ | { type: "base64"; data: string; mimeType?: string; filename?: string }
9
+ | { type: "text"; text: string; filename?: string };
10
+ outputFormat?: "text" | "markdown" | "json";
11
+ instructions?: string;
12
+ }) => Promise<DocumentParsingResult>;
13
+ };
@@ -0,0 +1,13 @@
1
+ export type DocumentParsingResult = {
2
+ provider: "firecrawl" | "mistral-ocr" | "llamaparse" | string;
3
+ text: string;
4
+ markdown?: string;
5
+ pages?: Array<{
6
+ index: number;
7
+ text?: string;
8
+ markdown?: string;
9
+ images?: unknown[];
10
+ }>;
11
+ metadata?: Record<string, unknown>;
12
+ raw?: unknown;
13
+ };
@@ -0,0 +1,4 @@
1
+ export type DocumentParsingToolset = {
2
+ tools: Record<string, import("ai").Tool>;
3
+ toolNames: string[];
4
+ };
@@ -0,0 +1,9 @@
1
+ import type { DocumentParsingProvider } from "./DocumentParsingProvider.ts";
2
+
3
+ export type DocumentParsingToolsetOptions = {
4
+ provider?: "firecrawl" | "mistral-ocr" | "llamaparse" | DocumentParsingProvider;
5
+ apiKey?: string;
6
+ baseUrl?: string;
7
+ toolName?: string;
8
+ fetch?: typeof fetch;
9
+ };
@@ -0,0 +1,9 @@
1
+ import type { DocumentParsingToolset } from "./DocumentParsingToolset.js";
2
+ import type { DocumentParsingToolsetOptions } from "./DocumentParsingToolsetOptions.js";
3
+
4
+ export type { DocumentParsingProvider } from "./DocumentParsingProvider.js";
5
+ export type { DocumentParsingResult } from "./DocumentParsingResult.js";
6
+ export type { DocumentParsingToolset } from "./DocumentParsingToolset.js";
7
+ export type { DocumentParsingToolsetOptions } from "./DocumentParsingToolsetOptions.js";
8
+
9
+ export declare function createDocumentParsingToolset(options?: DocumentParsingToolsetOptions): DocumentParsingToolset;