@smithers-orchestrator/agents 0.24.2 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/package.json +15 -5
  2. package/src/AgentLike.ts +5 -0
  3. package/src/AmpAgent.js +15 -5
  4. package/src/AmpAgentOptions.ts +6 -0
  5. package/src/BaseCliAgent/BaseCliAgent.js +198 -10
  6. package/src/BaseCliAgent/createAgentStdoutTextEmitter.js +21 -3
  7. package/src/BaseCliAgent/index.d.ts +467 -0
  8. package/src/ClaudeCodeAgent.js +6 -2
  9. package/src/CodexAgent.js +4 -0
  10. package/src/GeminiAgent.js +34 -224
  11. package/src/GeminiAgentOptions.ts +4 -9
  12. package/src/OpenCodeAgent.js +2 -12
  13. package/src/OpenCodeAgentOptions.ts +19 -0
  14. package/src/cli-capabilities/CliAgentCapabilityAdapterId.ts +0 -1
  15. package/src/cli-capabilities/getCliAgentCapabilityDoctorReport.js +3 -2
  16. package/src/cli-capabilities/getCliAgentCapabilityReport.js +0 -6
  17. package/src/cli-surface/cliAgentSurfaceManifest.js +1 -40
  18. package/src/createElevenLabsTextToSpeechTool.js +128 -0
  19. package/src/createElevenLabsTextToSpeechTool.ts +33 -0
  20. package/src/diagnostics/getDiagnosticStrategy.js +13 -12
  21. package/src/document-parsing/DocumentParsingProvider.ts +13 -0
  22. package/src/document-parsing/DocumentParsingResult.ts +13 -0
  23. package/src/document-parsing/DocumentParsingToolset.ts +4 -0
  24. package/src/document-parsing/DocumentParsingToolsetOptions.ts +9 -0
  25. package/src/document-parsing/createDocumentParsingToolset.d.ts +9 -0
  26. package/src/document-parsing/createDocumentParsingToolset.js +416 -0
  27. package/src/http/CreateHttpToolOptions.ts +4 -0
  28. package/src/http/HttpToolAuth.ts +15 -0
  29. package/src/http/HttpToolInput.ts +11 -0
  30. package/src/http/HttpToolOutput.ts +7 -0
  31. package/src/http/createHttpTool.js +136 -0
  32. package/src/image-generation/ImageGenerationProvider.ts +7 -0
  33. package/src/image-generation/ImageGenerationRequest.ts +8 -0
  34. package/src/image-generation/ImageGenerationResult.ts +10 -0
  35. package/src/image-generation/ImageGenerationToolOptions.ts +10 -0
  36. package/src/image-generation/createImageGenerationTool.d.ts +18 -0
  37. package/src/image-generation/createImageGenerationTool.js +92 -0
  38. package/src/index.d.ts +490 -147
  39. package/src/index.js +23 -5
  40. package/src/streamResultToGenerateResult.js +55 -26
  41. package/src/transcription/createTranscriptionTool.js +182 -0
  42. package/src/transcription/createTranscriptionTool.ts +29 -0
  43. package/src/transcription/index.js +1 -0
  44. package/src/transcription/index.ts +6 -0
  45. package/src/web-search/GroundedWebSearchProvider.ts +21 -0
  46. package/src/web-search/GroundedWebSearchToolset.ts +6 -0
  47. package/src/web-search/createBraveSearchProvider.js +53 -0
  48. package/src/web-search/createExaSearchProvider.js +72 -0
  49. package/src/web-search/createGroundedWebSearchToolset.js +110 -0
  50. package/src/web-search/createSerperSearchProvider.js +63 -0
  51. package/src/web-search/createTavilySearchProvider.js +59 -0
  52. package/src/web-search/index.js +5 -0
  53. package/src/zodToOpenAISchema.js +4 -0
  54. package/src/OpenCodeAgent.ts +0 -43
@@ -1,18 +1,27 @@
1
- import { BaseCliAgent, pushFlag, pushList, isRecord, asString, truncate, toolKindFromName, createSyntheticIdGenerator, } from "./BaseCliAgent/index.js";
2
- import { normalizeCapabilityStringList, } from "./capability-registry/index.js";
3
- /** @typedef {import("./BaseCliAgent/BaseCliAgentOptions.ts").BaseCliAgentOptions} BaseCliAgentOptions */
1
+ import { BaseCliAgent } from "./BaseCliAgent/index.js";
2
+ import { normalizeCapabilityStringList } from "./capability-registry/index.js";
3
+
4
4
  /** @typedef {import("./capability-registry/AgentCapabilityRegistry.ts").AgentCapabilityRegistry} AgentCapabilityRegistry */
5
5
  /** @typedef {import("./BaseCliAgent/CliOutputInterpreter.ts").CliOutputInterpreter} CliOutputInterpreter */
6
6
  /** @typedef {import("./GeminiAgentOptions.ts").GeminiAgentOptions} GeminiAgentOptions */
7
7
 
8
+ export const GEMINI_SUNSET_MESSAGE = [
9
+ "Gemini CLI support has been sunset in Smithers.",
10
+ "Use AntigravityAgent with Google's `agy` CLI instead.",
11
+ "Example:",
12
+ ' import { AntigravityAgent } from "smithers-orchestrator";',
13
+ ' const agent = new AntigravityAgent({ model: "gemini-3.1-pro-preview", cwd: process.cwd() });',
14
+ ].join("\n");
15
+
8
16
  /**
9
17
  * @param {GeminiAgentOptions} opts
10
18
  */
11
19
  function resolveGeminiBuiltIns(opts) {
12
20
  return opts.allowedTools?.length
13
21
  ? normalizeCapabilityStringList(opts.allowedTools)
14
- : ["default"];
22
+ : ["sunset"];
15
23
  }
24
+
16
25
  /**
17
26
  * @param {GeminiAgentOptions} [opts]
18
27
  * @returns {AgentCapabilityRegistry}
@@ -23,9 +32,9 @@ export function createGeminiCapabilityRegistry(opts = {}) {
23
32
  engine: "gemini",
24
33
  runtimeTools: {},
25
34
  mcp: {
26
- bootstrap: "allow-list",
35
+ bootstrap: "unsupported",
27
36
  supportsProjectScope: false,
28
- supportsUserScope: true,
37
+ supportsUserScope: false,
29
38
  },
30
39
  skills: {
31
40
  supportsSkills: false,
@@ -38,14 +47,16 @@ export function createGeminiCapabilityRegistry(opts = {}) {
38
47
  builtIns: resolveGeminiBuiltIns(opts),
39
48
  };
40
49
  }
50
+
41
51
  /**
42
- * @deprecated Use AntigravityAgent for new Google CLI integrations. GeminiAgent
43
- * remains for legacy and enterprise Gemini CLI setups.
52
+ * @deprecated Gemini CLI support has been sunset. Use AntigravityAgent with
53
+ * Google's `agy` CLI instead.
44
54
  */
45
55
  export class GeminiAgent extends BaseCliAgent {
46
56
  opts;
47
57
  capabilities;
48
58
  cliEngine = "gemini";
59
+
49
60
  /**
50
61
  * @param {GeminiAgentOptions} [opts]
51
62
  */
@@ -54,228 +65,27 @@ export class GeminiAgent extends BaseCliAgent {
54
65
  this.opts = opts;
55
66
  this.capabilities = createGeminiCapabilityRegistry(opts);
56
67
  }
68
+
57
69
  /**
58
70
  * @returns {CliOutputInterpreter}
59
71
  */
60
72
  createOutputInterpreter() {
61
- let sessionId;
62
- let finalAnswer = "";
63
- let didEmitCompleted = false;
64
- const nextSyntheticId = createSyntheticIdGenerator();
65
- /**
66
- * @param {string} line
67
- * @returns {AgentCliEvent[]}
68
- */
69
- const parseLine = (line) => {
70
- const trimmed = line.trim();
71
- if (!trimmed)
72
- return [];
73
- let payload;
74
- try {
75
- payload = JSON.parse(trimmed);
76
- }
77
- catch {
78
- return [];
79
- }
80
- if (!isRecord(payload))
81
- return [];
82
- const type = asString(payload.type);
83
- if (!type)
84
- return [];
85
- if (type === "init") {
86
- const resume = asString(payload.session_id);
87
- if (resume) {
88
- sessionId = resume;
89
- }
90
- return [{
91
- type: "started",
92
- engine: this.cliEngine,
93
- title: "Gemini CLI",
94
- resume: sessionId,
95
- detail: {
96
- model: asString(payload.model),
97
- },
98
- }];
99
- }
100
- if (type === "MESSAGE") {
101
- const role = asString(payload.role);
102
- const content = asString(payload.content);
103
- if (role === "assistant" && content) {
104
- if (payload.delta === true) {
105
- finalAnswer += content;
106
- }
107
- else {
108
- finalAnswer = content;
109
- }
110
- }
111
- return [];
112
- }
113
- if (type === "TOOL_USE") {
114
- const toolName = asString(payload.tool_name) ?? "tool";
115
- const toolId = asString(payload.tool_id) ?? nextSyntheticId("gemini-tool");
116
- return [{
117
- type: "action",
118
- engine: this.cliEngine,
119
- phase: "started",
120
- entryType: "thought",
121
- action: {
122
- id: toolId,
123
- kind: toolKindFromName(toolName),
124
- title: toolName,
125
- detail: {
126
- parameters: payload.parameters,
127
- },
128
- },
129
- message: `Running ${toolName}`,
130
- level: "info",
131
- }];
132
- }
133
- if (type === "TOOL_RESULT") {
134
- const toolId = asString(payload.tool_id) ?? nextSyntheticId("gemini-tool");
135
- const ok = asString(payload.status) !== "error";
136
- const error = isRecord(payload.error) ? asString(payload.error.message) : undefined;
137
- const output = asString(payload.output);
138
- return [{
139
- type: "action",
140
- engine: this.cliEngine,
141
- phase: "completed",
142
- entryType: "thought",
143
- action: {
144
- id: toolId,
145
- kind: "tool",
146
- title: "tool result",
147
- detail: {
148
- status: asString(payload.status),
149
- output: output ? truncate(output, 400) : undefined,
150
- },
151
- },
152
- message: error ?? output,
153
- ok,
154
- level: ok ? "info" : "warning",
155
- }];
156
- }
157
- if (type === "ERROR") {
158
- return [{
159
- type: "action",
160
- engine: this.cliEngine,
161
- phase: "completed",
162
- entryType: "thought",
163
- action: {
164
- id: nextSyntheticId("gemini-warning"),
165
- kind: "warning",
166
- title: "warning",
167
- detail: {
168
- severity: asString(payload.severity),
169
- },
170
- },
171
- message: asString(payload.message),
172
- ok: asString(payload.severity) !== "error",
173
- level: asString(payload.severity) === "error" ? "error" : "warning",
174
- }];
175
- }
176
- if (type === "RESULT") {
177
- if (didEmitCompleted)
178
- return [];
179
- didEmitCompleted = true;
180
- return [{
181
- type: "completed",
182
- engine: this.cliEngine,
183
- ok: asString(payload.status) !== "error",
184
- answer: finalAnswer || asString(payload.response),
185
- resume: sessionId,
186
- usage: isRecord(payload.stats) ? payload.stats : undefined,
187
- }];
188
- }
189
- return [];
190
- };
191
73
  return {
192
- onStdoutLine: parseLine,
193
- onExit: (result) => {
194
- if (didEmitCompleted)
195
- return [];
196
- if (result.exitCode === 0)
197
- return [];
198
- didEmitCompleted = true;
199
- return [{
200
- type: "completed",
201
- engine: this.cliEngine,
202
- ok: false,
203
- answer: finalAnswer || undefined,
204
- error: result.stderr.trim() || `Gemini exited with code ${result.exitCode}`,
205
- resume: sessionId,
206
- }];
207
- },
74
+ onStdoutLine: () => [],
75
+ onExit: () => [{
76
+ type: "completed",
77
+ engine: this.cliEngine,
78
+ ok: false,
79
+ error: GEMINI_SUNSET_MESSAGE,
80
+ }],
208
81
  };
209
82
  }
210
- /**
211
- * @param {{ prompt: string; systemPrompt?: string; cwd: string; options: any; }} params
212
- */
213
- async buildCommand(params) {
214
- const args = [];
215
- const yoloEnabled = this.opts.yolo ?? this.yolo;
216
- // Default to "json" output format to separate model responses from tool
217
- // output text. With "text" format, tool call results (file contents etc.)
218
- // are concatenated into the response, making JSON extraction unreliable.
219
- const outputFormat = this.opts.outputFormat ??
220
- (params.options?.onEvent ? "stream-json" : "json");
221
- const resumeSession = typeof params.options?.resumeSession === "string"
222
- ? params.options.resumeSession
223
- : this.opts.resume;
224
- if (this.opts.debug)
225
- args.push("--debug");
226
- pushFlag(args, "--model", this.opts.model ?? this.model);
227
- if (this.opts.sandbox)
228
- args.push("--sandbox");
229
- if (this.opts.approvalMode) {
230
- pushFlag(args, "--approval-mode", this.opts.approvalMode);
231
- }
232
- else if (yoloEnabled) {
233
- args.push("--yolo");
234
- }
235
- if (this.opts.experimentalAcp)
236
- args.push("--experimental-acp");
237
- pushList(args, "--allowed-mcp-server-names", this.opts.allowedMcpServerNames);
238
- if (this.opts.allowedTools !== undefined) {
239
- if (this.opts.allowedTools.length === 0) {
240
- pushFlag(args, "--allowed-tools", "");
241
- }
242
- else {
243
- pushList(args, "--allowed-tools", this.opts.allowedTools);
244
- }
245
- }
246
- pushList(args, "--extensions", this.opts.extensions);
247
- if (this.opts.listExtensions)
248
- args.push("--list-extensions");
249
- pushFlag(args, "--resume", resumeSession);
250
- if (this.opts.listSessions)
251
- args.push("--list-sessions");
252
- pushFlag(args, "--delete-session", this.opts.deleteSession);
253
- pushList(args, "--include-directories", this.opts.includeDirectories);
254
- if (this.opts.screenReader)
255
- args.push("--screen-reader");
256
- pushFlag(args, "--output-format", outputFormat);
257
- if (this.extraArgs?.length)
258
- args.push(...this.extraArgs);
259
- const systemPrefix = params.systemPrompt
260
- ? `${params.systemPrompt}\n\n`
261
- : "";
262
- // Reinforce raw JSON output requirement in the prompt for Gemini models
263
- // which tend to forget structured output instructions on long responses.
264
- const jsonReminder = params.prompt?.includes("REQUIRED OUTPUT")
265
- ? "\n\nREMINDER: Your response MUST be ONLY the required raw JSON object. Do not include prose, markdown, or code fences. The first character must be `{` and the last character must be `}`.\n"
266
- : "";
267
- const fullPrompt = `${systemPrefix}${params.prompt ?? ""}${jsonReminder}`;
268
- args.push("--prompt", fullPrompt);
269
- const accountEnv = {};
270
- if (this.opts.configDir)
271
- accountEnv.GEMINI_DIR = this.opts.configDir;
272
- if (this.opts.apiKey)
273
- accountEnv.GEMINI_API_KEY = this.opts.apiKey;
274
- return {
275
- command: "gemini",
276
- args,
277
- outputFormat,
278
- env: Object.keys(accountEnv).length > 0 ? accountEnv : undefined,
279
- };
83
+
84
+ async generate() {
85
+ throw new Error(GEMINI_SUNSET_MESSAGE);
86
+ }
87
+
88
+ async buildCommand() {
89
+ throw new Error(GEMINI_SUNSET_MESSAGE);
280
90
  }
281
91
  }
@@ -1,9 +1,8 @@
1
1
  import type { BaseCliAgentOptions } from "./BaseCliAgent/BaseCliAgentOptions";
2
2
 
3
3
  /**
4
- * @deprecated Use AntigravityAgentOptions with the Antigravity CLI (`agy`) for
5
- * new Google CLI integrations. GeminiAgentOptions remains for legacy and
6
- * enterprise Gemini CLI setups.
4
+ * @deprecated Gemini CLI support has been sunset. Use AntigravityAgentOptions
5
+ * with the Antigravity CLI (`agy`) for Google CLI integrations.
7
6
  */
8
7
  export type GeminiAgentOptions = BaseCliAgentOptions & {
9
8
  debug?: boolean;
@@ -23,15 +22,11 @@ export type GeminiAgentOptions = BaseCliAgentOptions & {
23
22
  screenReader?: boolean;
24
23
  outputFormat?: "text" | "json" | "stream-json";
25
24
  /**
26
- * Path to an isolated Gemini CLI config directory. Sets `GEMINI_DIR` on the
27
- * spawned process so this invocation uses the credentials stored at
28
- * `<configDir>/oauth_creds.json` (instead of the user's default
29
- * `~/.gemini/`). Use this to run multiple Gemini accounts side-by-side.
25
+ * Legacy option retained only so old constructor calls type-check.
30
26
  */
31
27
  configDir?: string;
32
28
  /**
33
- * Gemini API key. Sets `GEMINI_API_KEY` on the spawned process for
34
- * API-billed invocations.
29
+ * Legacy option retained only so old constructor calls type-check.
35
30
  */
36
31
  apiKey?: string;
37
32
  };
@@ -12,17 +12,7 @@ import { normalizeCapabilityStringList } from "./capability-registry/index.js";
12
12
 
13
13
  /** @typedef {import("./BaseCliAgent/index.ts").BaseCliAgentOptions} BaseCliAgentOptions */
14
14
  /** @typedef {import("./capability-registry/index.ts").AgentCapabilityRegistry} AgentCapabilityRegistry */
15
-
16
- /**
17
- * @typedef {BaseCliAgentOptions & {
18
- * model?: string;
19
- * agentName?: string;
20
- * attachFiles?: string[];
21
- * continueSession?: boolean;
22
- * sessionId?: string;
23
- * variant?: "high" | "medium" | "low";
24
- * }} OpenCodeAgentOptions
25
- */
15
+ /** @typedef {import("./OpenCodeAgentOptions.ts").OpenCodeAgentOptions} OpenCodeAgentOptions */
26
16
 
27
17
  /** @typedef {import("./BaseCliAgent/index.ts").CliOutputInterpreter} CliOutputInterpreter */
28
18
 
@@ -80,7 +70,7 @@ export function createOpenCodeCapabilityRegistry(opts = {}) {
80
70
  *
81
71
  * Usage:
82
72
  * const agent = new OpenCodeAgent({
83
- * model: "anthropic/claude-opus-4-20250514",
73
+ * model: "anthropic/claude-opus-4-8",
84
74
  * yolo: true,
85
75
  * });
86
76
  * const result = await agent.generate({
@@ -0,0 +1,19 @@
1
+ import type { BaseCliAgentOptions } from "./BaseCliAgent";
2
+
3
+ /**
4
+ * Configuration options for the OpenCodeAgent.
5
+ */
6
+ export type OpenCodeAgentOptions = BaseCliAgentOptions & {
7
+ /** Model identifier (e.g., "anthropic/claude-opus-4-8", "openai/gpt-5.4") */
8
+ model?: string;
9
+ /** OpenCode agent name (maps to --agent flag, selects predefined agent config) */
10
+ agentName?: string;
11
+ /** Files to attach to the prompt via -f flags */
12
+ attachFiles?: string[];
13
+ /** Continue a previous session */
14
+ continueSession?: boolean;
15
+ /** Resume a specific session by ID */
16
+ sessionId?: string;
17
+ /** Provider-specific model variant/reasoning effort level */
18
+ variant?: string;
19
+ };
@@ -4,7 +4,6 @@ export type CliAgentCapabilityAdapterId =
4
4
  | "antigravity"
5
5
  | "codex"
6
6
  | "forge"
7
- | "gemini"
8
7
  | "kimi"
9
8
  | "opencode"
10
9
  | "pi"
@@ -116,10 +116,11 @@ function diagnoseSurfaceContract(entry) {
116
116
  }
117
117
 
118
118
  /**
119
+ * @param {CliAgentCapabilityReportEntry[]} [entries]
119
120
  * @returns {CliAgentCapabilityDoctorReport}
120
121
  */
121
- export function getCliAgentCapabilityDoctorReport() {
122
- const agents = getCliAgentCapabilityReport().map((entry) => {
122
+ export function getCliAgentCapabilityDoctorReport(entries = getCliAgentCapabilityReport()) {
123
+ const agents = entries.map((entry) => {
123
124
  const issues = [
124
125
  ...diagnoseCapabilityRegistry(entry.capabilities),
125
126
  ...diagnoseSurfaceContract(entry),
@@ -4,7 +4,6 @@ import { createAntigravityCapabilityRegistry } from "../AntigravityAgent.js";
4
4
  import { createClaudeCodeCapabilityRegistry } from "../ClaudeCodeAgent.js";
5
5
  import { createCodexCapabilityRegistry } from "../CodexAgent.js";
6
6
  import { createForgeCapabilityRegistry } from "../ForgeAgent.js";
7
- import { createGeminiCapabilityRegistry } from "../GeminiAgent.js";
8
7
  import { createKimiCapabilityRegistry } from "../KimiAgent.js";
9
8
  import { createOpenCodeCapabilityRegistry } from "../OpenCodeAgent.js";
10
9
  import { createPiCapabilityRegistry } from "../PiAgent.js";
@@ -33,11 +32,6 @@ const CLI_AGENT_CAPABILITY_ADAPTERS = [
33
32
  binary: "agy",
34
33
  buildRegistry: () => createAntigravityCapabilityRegistry(),
35
34
  },
36
- {
37
- id: "gemini",
38
- binary: "gemini",
39
- buildRegistry: () => createGeminiCapabilityRegistry(),
40
- },
41
35
  {
42
36
  id: "forge",
43
37
  binary: "forge",
@@ -97,6 +97,7 @@ export const CLI_AGENT_SURFACE_MANIFEST = [
97
97
  "--color",
98
98
  "--json",
99
99
  "--output-last-message",
100
+ "-",
100
101
  ],
101
102
  supportedFlags: [],
102
103
  unsupportedFlags: [],
@@ -213,45 +214,6 @@ export const CLI_AGENT_SURFACE_MANIFEST = [
213
214
  notes: "Smithers maps native session ids to `agy --conversation <id>`.",
214
215
  },
215
216
  },
216
- {
217
- id: "gemini",
218
- displayName: "Gemini",
219
- binary: "gemini",
220
- packageExport: "GeminiAgent",
221
- defaultOutputFormat: "json",
222
- docsUrls: ["https://github.com/google-gemini/gemini-cli"],
223
- emittedFlags: [
224
- "--debug",
225
- "--model",
226
- "--sandbox",
227
- "--approval-mode",
228
- "--yolo",
229
- "--experimental-acp",
230
- "--allowed-mcp-server-names",
231
- "--allowed-tools",
232
- "--extensions",
233
- "--list-extensions",
234
- "--resume",
235
- "--list-sessions",
236
- "--delete-session",
237
- "--include-directories",
238
- "--screen-reader",
239
- "--output-format",
240
- "--prompt",
241
- ],
242
- supportedFlags: [],
243
- unsupportedFlags: [],
244
- optionMappings: [
245
- { option: "configDir", env: "GEMINI_DIR" },
246
- { option: "apiKey", env: "GEMINI_API_KEY" },
247
- { option: "resume", flag: "--resume" },
248
- ],
249
- resume: {
250
- kind: "flag",
251
- emitted: ["--resume"],
252
- notes: "Legacy Gemini CLI session id.",
253
- },
254
- },
255
217
  {
256
218
  id: "pi",
257
219
  displayName: "Pi",
@@ -287,7 +249,6 @@ export const CLI_AGENT_SURFACE_MANIFEST = [
287
249
  "--no-themes",
288
250
  "--thinking",
289
251
  "--verbose",
290
- "--files",
291
252
  ],
292
253
  supportedFlags: [],
293
254
  unsupportedFlags: [],
@@ -0,0 +1,128 @@
1
+ import { dynamicTool, jsonSchema } from "ai";
2
+
3
+ const DEFAULT_BASE_URL = "https://api.elevenlabs.io";
4
+ const DEFAULT_MODEL_ID = "eleven_turbo_v2_5";
5
+ const DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
6
+ const TOOL_NAME = "elevenlabs_text_to_speech";
7
+
8
+ const inputSchema = {
9
+ type: "object",
10
+ properties: {
11
+ text: {
12
+ type: "string",
13
+ minLength: 1,
14
+ description: "Text to synthesize into speech.",
15
+ },
16
+ voiceId: {
17
+ type: "string",
18
+ minLength: 1,
19
+ description: "Optional ElevenLabs voice id. Defaults to the configured voice.",
20
+ },
21
+ modelId: {
22
+ type: "string",
23
+ minLength: 1,
24
+ description: "Optional ElevenLabs model id. Defaults to the configured model.",
25
+ },
26
+ voiceSettings: {
27
+ type: "object",
28
+ additionalProperties: true,
29
+ description: "Optional ElevenLabs voice_settings payload.",
30
+ },
31
+ },
32
+ required: ["text"],
33
+ additionalProperties: false,
34
+ };
35
+
36
+ /**
37
+ * Create an agent-callable ElevenLabs text-to-speech tool.
38
+ *
39
+ * @param {import("./createElevenLabsTextToSpeechTool.ts").ElevenLabsTextToSpeechToolOptions} options
40
+ * @returns {import("./createElevenLabsTextToSpeechTool.ts").ElevenLabsTextToSpeechToolset}
41
+ */
42
+ export function createElevenLabsTextToSpeechTool(options) {
43
+ if (!options?.apiKey) {
44
+ throw new Error("createElevenLabsTextToSpeechTool requires an ElevenLabs apiKey");
45
+ }
46
+
47
+ const fetchImpl = options.fetch ?? globalThis.fetch;
48
+ if (typeof fetchImpl !== "function") {
49
+ throw new Error("createElevenLabsTextToSpeechTool requires fetch");
50
+ }
51
+
52
+ const baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
53
+ const defaultVoiceId = options.defaultVoiceId ?? DEFAULT_VOICE_ID;
54
+ const defaultModelId = options.defaultModelId ?? DEFAULT_MODEL_ID;
55
+
56
+ return {
57
+ tools: {
58
+ [TOOL_NAME]: dynamicTool({
59
+ description: "Synthesize speech audio from text using ElevenLabs.",
60
+ inputSchema: jsonSchema(inputSchema),
61
+ execute: async (input) =>
62
+ synthesizeSpeech({
63
+ apiKey: options.apiKey,
64
+ baseUrl,
65
+ defaultVoiceId,
66
+ defaultModelId,
67
+ fetchImpl,
68
+ input,
69
+ }),
70
+ }),
71
+ },
72
+ toolNames: [TOOL_NAME],
73
+ };
74
+ }
75
+
76
+ /**
77
+ * @param {{
78
+ * apiKey: string;
79
+ * baseUrl: string;
80
+ * defaultVoiceId: string;
81
+ * defaultModelId: string;
82
+ * fetchImpl: typeof fetch;
83
+ * input: unknown;
84
+ * }} params
85
+ */
86
+ async function synthesizeSpeech({ apiKey, baseUrl, defaultVoiceId, defaultModelId, fetchImpl, input }) {
87
+ const args = /** @type {import("./createElevenLabsTextToSpeechTool.ts").ElevenLabsTextToSpeechInput} */ (
88
+ input ?? {}
89
+ );
90
+ if (typeof args.text !== "string" || args.text.trim() === "") {
91
+ throw new Error("elevenlabs_text_to_speech requires non-empty text");
92
+ }
93
+
94
+ const voiceId = args.voiceId ?? defaultVoiceId;
95
+ const modelId = args.modelId ?? defaultModelId;
96
+ const body = {
97
+ text: args.text,
98
+ model_id: modelId,
99
+ ...(args.voiceSettings ? { voice_settings: args.voiceSettings } : {}),
100
+ };
101
+
102
+ const response = await fetchImpl(`${baseUrl}/v1/text-to-speech/${encodeURIComponent(voiceId)}`, {
103
+ method: "POST",
104
+ headers: {
105
+ Accept: "audio/mpeg",
106
+ "Content-Type": "application/json",
107
+ "xi-api-key": apiKey,
108
+ },
109
+ body: JSON.stringify(body),
110
+ });
111
+
112
+ if (!response.ok) {
113
+ const errorText = await response.text().catch(() => "");
114
+ throw new Error(
115
+ `ElevenLabs text-to-speech failed with ${response.status}${errorText ? `: ${errorText}` : ""}`,
116
+ );
117
+ }
118
+
119
+ const contentType = response.headers.get("content-type") ?? "audio/mpeg";
120
+ const bytes = new Uint8Array(await response.arrayBuffer());
121
+ return {
122
+ audioBase64: Buffer.from(bytes).toString("base64"),
123
+ contentType,
124
+ voiceId,
125
+ modelId,
126
+ byteLength: bytes.byteLength,
127
+ };
128
+ }
@@ -0,0 +1,33 @@
1
+ import type { Tool } from "ai";
2
+
3
+ export type ElevenLabsTextToSpeechInput = {
4
+ text: string;
5
+ voiceId?: string;
6
+ modelId?: string;
7
+ voiceSettings?: Record<string, unknown>;
8
+ };
9
+
10
+ export type ElevenLabsTextToSpeechResult = {
11
+ audioBase64: string;
12
+ contentType: string;
13
+ voiceId: string;
14
+ modelId: string;
15
+ byteLength: number;
16
+ };
17
+
18
+ export type ElevenLabsTextToSpeechToolOptions = {
19
+ apiKey: string;
20
+ defaultVoiceId?: string;
21
+ defaultModelId?: string;
22
+ baseUrl?: string;
23
+ fetch?: typeof fetch;
24
+ };
25
+
26
+ export type ElevenLabsTextToSpeechToolset = {
27
+ tools: Record<"elevenlabs_text_to_speech", Tool>;
28
+ toolNames: ["elevenlabs_text_to_speech"];
29
+ };
30
+
31
+ export declare function createElevenLabsTextToSpeechTool(
32
+ options: ElevenLabsTextToSpeechToolOptions,
33
+ ): ElevenLabsTextToSpeechToolset;