mulmocast 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/lib/actions/audio.js +3 -1
  2. package/lib/agents/image_genai_agent.js +62 -56
  3. package/lib/agents/image_replicate_agent.js +9 -1
  4. package/lib/agents/index.d.ts +2 -1
  5. package/lib/agents/index.js +2 -1
  6. package/lib/agents/movie_genai_agent.d.ts +0 -4
  7. package/lib/agents/movie_genai_agent.js +3 -12
  8. package/lib/agents/test.d.ts +1 -0
  9. package/lib/agents/test.js +12 -0
  10. package/lib/agents/tts_elevenlabs_agent.js +42 -32
  11. package/lib/agents/tts_gemini_agent.js +8 -2
  12. package/lib/agents/tts_kotodama_agent.d.ts +5 -0
  13. package/lib/agents/tts_kotodama_agent.js +76 -0
  14. package/lib/agents/tts_openai_agent.js +1 -1
  15. package/lib/agents/utils.d.ts +1 -0
  16. package/lib/agents/utils.js +1 -0
  17. package/lib/types/agent.d.ts +3 -0
  18. package/lib/types/schema.d.ts +21 -0
  19. package/lib/types/schema.js +3 -2
  20. package/lib/utils/const.d.ts +1 -0
  21. package/lib/utils/const.js +1 -0
  22. package/lib/utils/context.d.ts +8 -0
  23. package/lib/utils/error_cause.d.ts +10 -0
  24. package/lib/utils/error_cause.js +22 -0
  25. package/lib/utils/provider2agent.d.ts +7 -0
  26. package/lib/utils/provider2agent.js +7 -0
  27. package/lib/utils/utils.d.ts +4 -0
  28. package/lib/utils/utils.js +21 -6
  29. package/package.json +4 -4
  30. package/scripts/test/README.md +161 -0
  31. package/scripts/test/test_all_elevenlabs_tts_model.json +111 -0
  32. package/scripts/test/test_all_gemini_tts_model.json +433 -0
  33. package/scripts/test/test_all_image.json +40 -0
  34. package/scripts/test/test_all_image.json~ +45 -0
  35. package/scripts/test/test_all_movie.json +33 -0
  36. package/scripts/test/test_all_movie.json~ +37 -0
  37. package/scripts/test/test_all_tts.json +83 -0
  38. package/scripts/test/test_all_tts.json~ +83 -0
  39. package/scripts/test/test_kotodama.json +57 -0
  40. package/scripts/test/test_kotodama.json~ +0 -0
@@ -2,7 +2,7 @@ import dotenv from "dotenv";
2
2
  import { GraphAI, TaskManager, GraphAILogger } from "graphai";
3
3
  import * as agents from "@graphai/vanilla";
4
4
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
5
- import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
5
+ import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, ttsKotodamaAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
6
6
  import { text2SpeechProviderSchema } from "../types/index.js";
7
7
  import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
8
8
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
@@ -108,6 +108,7 @@ const graph_tts = {
108
108
  voice: ":preprocessor.voiceId",
109
109
  speed: ":preprocessor.speechOptions.speed",
110
110
  instructions: ":preprocessor.speechOptions.instruction",
111
+ decoration: ":preprocessor.speechOptions.decoration",
111
112
  model: ":preprocessor.model",
112
113
  },
113
114
  },
@@ -222,6 +223,7 @@ const audioAgents = {
222
223
  ttsNijivoiceAgent,
223
224
  ttsGoogleAgent,
224
225
  ttsGeminiAgent,
226
+ ttsKotodamaAgent,
225
227
  ttsElevenlabsAgent,
226
228
  mediaMockAgent,
227
229
  addBGMAgent,
@@ -1,18 +1,11 @@
1
1
  import fs from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import { provider2ImageAgent } from "../utils/provider2agent.js";
4
- import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, imageFileTarget, hasCause } from "../utils/error_cause.js";
4
+ import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, agentInvalidResponseError, imageAction, imageFileTarget, hasCause, getGenAIErrorReason, resultify, } from "../utils/error_cause.js";
5
+ import { getAspectRatio } from "../utils/utils.js";
6
+ import { ASPECT_RATIOS } from "../utils/const.js";
5
7
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
6
8
  import { blankImagePath, blankSquareImagePath, blankVerticalImagePath } from "../utils/file.js";
7
- const getAspectRatio = (canvasSize) => {
8
- if (canvasSize.width > canvasSize.height) {
9
- return "16:9";
10
- }
11
- else if (canvasSize.width < canvasSize.height) {
12
- return "9:16";
13
- }
14
- return "1:1";
15
- };
16
9
  export const ratio2BlankPath = (aspectRatio) => {
17
10
  if (aspectRatio === "9:16") {
18
11
  return blankVerticalImagePath();
@@ -61,9 +54,24 @@ const geminiFlashResult = (response) => {
61
54
  cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
62
55
  });
63
56
  };
57
+ const errorProcess = (error) => {
58
+ GraphAILogger.info("Failed to generate image:", error);
59
+ if (hasCause(error) && error.cause) {
60
+ throw error;
61
+ }
62
+ const reasonDetail = getGenAIErrorReason(error);
63
+ if (reasonDetail && reasonDetail.reason && reasonDetail.reason === "API_KEY_INVALID") {
64
+ throw new Error("Failed to generate image: 400 Incorrect API key provided with gemini", {
65
+ cause: agentIncorrectAPIKeyError("imageGenAIAgent", imageAction, imageFileTarget),
66
+ });
67
+ }
68
+ throw new Error("Failed to generate image with Google GenAI", {
69
+ cause: agentGenerationError("imageGenAIAgent", imageAction, imageFileTarget),
70
+ });
71
+ };
64
72
  export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
65
73
  const { prompt, referenceImages } = namedInputs;
66
- const aspectRatio = getAspectRatio(params.canvasSize);
74
+ const aspectRatio = getAspectRatio(params.canvasSize, ASPECT_RATIOS);
67
75
  const model = params.model ?? provider2ImageAgent["google"].defaultModel;
68
76
  const apiKey = config?.apiKey;
69
77
  if (!apiKey) {
@@ -71,61 +79,60 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
71
79
  cause: apiKeyMissingError("imageGenAIAgent", imageAction, "GEMINI_API_KEY"),
72
80
  });
73
81
  }
74
- try {
75
- const ai = new GoogleGenAI({ apiKey });
76
- if (model === "gemini-2.5-flash-image") {
77
- const contents = getGeminiContents(prompt, referenceImages, aspectRatio);
78
- const response = await ai.models.generateContent({ model, contents });
79
- return geminiFlashResult(response);
80
- }
81
- else if (model === "gemini-3-pro-image-preview") {
82
+ const ai = new GoogleGenAI({ apiKey });
83
+ if (model === "gemini-2.5-flash-image" || model === "gemini-3-pro-image-preview") {
84
+ const contentParams = (() => {
85
+ if (model === "gemini-2.5-flash-image") {
86
+ const contents = getGeminiContents(prompt, referenceImages, aspectRatio);
87
+ return { model, contents };
88
+ }
89
+ // gemini-3-pro-image-preview
82
90
  const contents = getGeminiContents(prompt, referenceImages);
83
- const response = await ai.models.generateContent({
91
+ const PRO_ASPECT_RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
92
+ return {
84
93
  model,
85
94
  contents,
86
95
  config: {
87
96
  imageConfig: {
88
- // '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', or '21:9'.
89
- aspectRatio,
97
+ aspectRatio: getAspectRatio(params.canvasSize, PRO_ASPECT_RATIOS),
90
98
  },
91
99
  },
92
- });
93
- return geminiFlashResult(response);
94
- }
95
- else {
96
- const response = await ai.models.generateImages({
97
- model,
98
- prompt,
99
- config: {
100
- numberOfImages: 1, // default is 4!
101
- aspectRatio,
102
- personGeneration: PersonGeneration.ALLOW_ALL,
103
- // safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
104
- },
105
- });
106
- if (!response.generatedImages || response.generatedImages.length === 0) {
107
- throw new Error("ERROR: generateImage returned no generated images", {
108
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
109
- });
110
- }
111
- const image = response.generatedImages[0].image;
112
- if (image && image.imageBytes) {
113
- return { buffer: Buffer.from(image.imageBytes, "base64") };
114
- }
115
- throw new Error("ERROR: generateImage returned no image bytes", {
116
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
117
- });
100
+ };
101
+ })();
102
+ const res = await resultify(() => ai.models.generateContent(contentParams));
103
+ if (res.ok) {
104
+ return geminiFlashResult(res.value);
118
105
  }
106
+ return errorProcess(res.error);
119
107
  }
120
- catch (error) {
121
- GraphAILogger.info("Failed to generate image:", error);
122
- if (hasCause(error) && error.cause) {
123
- throw error;
124
- }
125
- throw new Error("Failed to generate image with Google GenAI", {
126
- cause: agentGenerationError("imageGenAIAgent", imageAction, imageFileTarget),
108
+ // other case,
109
+ const generateParams = {
110
+ model,
111
+ prompt,
112
+ config: {
113
+ numberOfImages: 1, // default is 4!
114
+ aspectRatio,
115
+ personGeneration: PersonGeneration.ALLOW_ALL,
116
+ // safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
117
+ },
118
+ };
119
+ const res = await resultify(() => ai.models.generateImages(generateParams));
120
+ if (!res.ok) {
121
+ return errorProcess(res.error);
122
+ }
123
+ const response = res.value;
124
+ if (!response.generatedImages || response.generatedImages.length === 0) {
125
+ throw new Error("ERROR: generateImage returned no generated images", {
126
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
127
127
  });
128
128
  }
129
+ const image = response.generatedImages[0].image;
130
+ if (image && image.imageBytes) {
131
+ return { buffer: Buffer.from(image.imageBytes, "base64") };
132
+ }
133
+ throw new Error("ERROR: generateImage returned no image bytes", {
134
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
135
+ });
129
136
  };
130
137
  const imageGenAIAgentInfo = {
131
138
  name: "imageGenAIAgent",
@@ -136,7 +143,6 @@ const imageGenAIAgentInfo = {
136
143
  category: ["image"],
137
144
  author: "Receptron Team",
138
145
  repository: "https://github.com/receptron/mulmocast-cli/",
139
- // source: "https://github.com/receptron/mulmocast-cli/blob/main/src/agents/image_google_agent.ts",
140
146
  license: "MIT",
141
147
  environmentVariables: [],
142
148
  };
@@ -2,7 +2,7 @@ import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { getAspectRatio } from "./movie_replicate_agent.js";
5
- import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, imageFileTarget, hasCause } from "../utils/error_cause.js";
5
+ import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, agentInvalidResponseError, imageAction, imageFileTarget, hasCause, } from "../utils/error_cause.js";
6
6
  import { provider2ImageAgent } from "../utils/provider2agent.js";
7
7
  export const imageReplicateAgent = async ({ namedInputs, params, config, }) => {
8
8
  const { prompt, referenceImages } = namedInputs;
@@ -51,6 +51,14 @@ export const imageReplicateAgent = async ({ namedInputs, params, config, }) => {
51
51
  if (hasCause(error) && error.cause) {
52
52
  throw error;
53
53
  }
54
+ if (typeof error === "object" && error !== null && "response" in error) {
55
+ const errorWithResponse = error;
56
+ if (errorWithResponse.response?.status === 401) {
57
+ throw new Error("Failed to generate image: 401 Incorrect API key provided with replicate", {
58
+ cause: agentIncorrectAPIKeyError("imageGenAIAgent", imageAction, imageFileTarget),
59
+ });
60
+ }
61
+ }
54
62
  throw new Error("Failed to generate image with Replicate", {
55
63
  cause: agentGenerationError("imageReplicateAgent", imageAction, imageFileTarget),
56
64
  });
@@ -12,6 +12,7 @@ import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
12
12
  import ttsOpenaiAgent from "./tts_openai_agent.js";
13
13
  import ttsGoogleAgent from "./tts_google_agent.js";
14
14
  import ttsGeminiAgent from "./tts_gemini_agent.js";
15
+ import ttsKotodamaAgent from "./tts_kotodama_agent.js";
15
16
  import validateSchemaAgent from "./validate_schema_agent.js";
16
17
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
17
18
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
@@ -20,4 +21,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
20
21
  import { textInputAgent } from "@graphai/input_agents";
21
22
  import { openAIAgent } from "@graphai/openai_agent";
22
23
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
23
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
24
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsKotodamaAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -12,6 +12,7 @@ import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
12
12
  import ttsOpenaiAgent from "./tts_openai_agent.js";
13
13
  import ttsGoogleAgent from "./tts_google_agent.js";
14
14
  import ttsGeminiAgent from "./tts_gemini_agent.js";
15
+ import ttsKotodamaAgent from "./tts_kotodama_agent.js";
15
16
  import validateSchemaAgent from "./validate_schema_agent.js";
16
17
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
17
18
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
@@ -21,4 +22,4 @@ import { textInputAgent } from "@graphai/input_agents";
21
22
  import { openAIAgent } from "@graphai/openai_agent";
22
23
  // import * as vanilla from "@graphai/vanilla";
23
24
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
24
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
25
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsKotodamaAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -1,9 +1,5 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
2
  import type { AgentBufferResult, GenAIImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
3
- export declare const getAspectRatio: (canvasSize: {
4
- width: number;
5
- height: number;
6
- }) => string;
7
3
  export declare const movieGenAIAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GenAIImageAgentConfig>;
8
4
  declare const movieGenAIAgentInfo: AgentFunctionInfo;
9
5
  export default movieGenAIAgentInfo;
@@ -2,18 +2,9 @@ import { readFileSync } from "fs";
2
2
  import { GraphAILogger, sleep } from "graphai";
3
3
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
4
4
  import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, hasCause, } from "../utils/error_cause.js";
5
+ import { getAspectRatio } from "../utils/utils.js";
6
+ import { ASPECT_RATIOS } from "../utils/const.js";
5
7
  import { getModelDuration, provider2MovieAgent } from "../utils/provider2agent.js";
6
- export const getAspectRatio = (canvasSize) => {
7
- if (canvasSize.width > canvasSize.height) {
8
- return "16:9";
9
- }
10
- else if (canvasSize.width < canvasSize.height) {
11
- return "9:16";
12
- }
13
- else {
14
- return "1:1";
15
- }
16
- };
17
8
  const pollUntilDone = async (ai, operation) => {
18
9
  const response = { operation };
19
10
  while (!response.operation.done) {
@@ -115,7 +106,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
115
106
  };
116
107
  export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
117
108
  const { prompt, imagePath, movieFile } = namedInputs;
118
- const aspectRatio = getAspectRatio(params.canvasSize);
109
+ const aspectRatio = getAspectRatio(params.canvasSize, ASPECT_RATIOS);
119
110
  const model = params.model ?? provider2MovieAgent.google.defaultModel;
120
111
  const apiKey = config?.apiKey;
121
112
  if (!apiKey) {
@@ -0,0 +1 @@
1
+ import "dotenv/config";
@@ -0,0 +1,12 @@
1
+ import "dotenv/config";
2
+ import { ttsKotodamaAgent } from "./tts_kotodama_agent.js";
3
+ const kotodamaApiKey = process.env.KOTODAMA_API_KEY ?? "";
4
+ const main = async () => {
5
+ const result = await ttsKotodamaAgent({
6
+ namedInputs: { text: "こんにちは" },
7
+ params: { voice: "Atla", decoration: "neutral", suppressError: false },
8
+ config: { apiKey: kotodamaApiKey },
9
+ });
10
+ console.log("Result:", result);
11
+ };
12
+ main();
@@ -1,6 +1,6 @@
1
1
  import { GraphAILogger } from "graphai";
2
2
  import { provider2TTSAgent } from "../utils/provider2agent.js";
3
- import { apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
3
+ import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
4
4
  export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
5
5
  const { text } = namedInputs;
6
6
  const { voice, model, stability, similarityBoost, suppressError } = params;
@@ -15,45 +15,55 @@ export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
15
15
  cause: agentGenerationError("ttsElevenlabsAgent", audioAction, audioFileTarget),
16
16
  });
17
17
  }
18
- try {
19
- const requestBody = {
20
- text,
21
- model_id: model ?? provider2TTSAgent.elevenlabs.defaultModel,
22
- voice_settings: {
23
- stability: stability ?? 0.5,
24
- similarity_boost: similarityBoost ?? 0.75,
25
- },
26
- };
27
- GraphAILogger.log("ElevenLabs TTS options", requestBody);
28
- const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
29
- method: "POST",
30
- headers: {
31
- Accept: "audio/mpeg",
32
- "Content-Type": "application/json",
33
- "xi-api-key": apiKey,
34
- },
35
- body: JSON.stringify(requestBody),
36
- });
37
- if (!response.ok) {
38
- throw new Error(`Eleven Labs API error: ${response.status} ${response.statusText}`, {
18
+ const requestBody = {
19
+ text,
20
+ model_id: model ?? provider2TTSAgent.elevenlabs.defaultModel,
21
+ voice_settings: {
22
+ stability: stability ?? 0.5,
23
+ similarity_boost: similarityBoost ?? 0.75,
24
+ },
25
+ };
26
+ GraphAILogger.log("ElevenLabs TTS options", requestBody);
27
+ const response = await (async () => {
28
+ try {
29
+ return await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
30
+ method: "POST",
31
+ headers: {
32
+ Accept: "audio/mpeg",
33
+ "Content-Type": "application/json",
34
+ "xi-api-key": apiKey,
35
+ },
36
+ body: JSON.stringify(requestBody),
37
+ });
38
+ }
39
+ catch (e) {
40
+ if (suppressError) {
41
+ return {
42
+ error: e,
43
+ };
44
+ }
45
+ GraphAILogger.info(e);
46
+ throw new Error("TTS Eleven Labs Error", {
39
47
  cause: agentGenerationError("ttsElevenlabsAgent", audioAction, audioFileTarget),
40
48
  });
41
49
  }
42
- const arrayBuffer = await response.arrayBuffer();
43
- const buffer = Buffer.from(arrayBuffer);
44
- return { buffer };
50
+ })();
51
+ if ("error" in response) {
52
+ return response;
45
53
  }
46
- catch (e) {
47
- if (suppressError) {
48
- return {
49
- error: e,
50
- };
54
+ if (!response.ok) {
55
+ if (response.status === 401) {
56
+ throw new Error("Failed to generate audio: 401 Incorrect API key provided with ElevenLabs", {
57
+ cause: agentIncorrectAPIKeyError("ttsElevenlabsAgent", audioAction, audioFileTarget),
58
+ });
51
59
  }
52
- GraphAILogger.info(e);
53
- throw new Error("TTS Eleven Labs Error", {
60
+ throw new Error(`Eleven Labs API error: ${response.status} ${response.statusText}`, {
54
61
  cause: agentGenerationError("ttsElevenlabsAgent", audioAction, audioFileTarget),
55
62
  });
56
63
  }
64
+ const arrayBuffer = await response.arrayBuffer();
65
+ const buffer = Buffer.from(arrayBuffer);
66
+ return { buffer };
57
67
  };
58
68
  const ttsElevenlabsAgentInfo = {
59
69
  name: "ttsElevenlabsAgent",
@@ -1,7 +1,7 @@
1
1
  import { GraphAILogger } from "graphai";
2
2
  import { GoogleGenAI } from "@google/genai";
3
3
  import { provider2TTSAgent } from "../utils/provider2agent.js";
4
- import { apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
4
+ import { agentIncorrectAPIKeyError, apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget, getGenAIErrorReason, } from "../utils/error_cause.js";
5
5
  import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
6
6
  export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
7
7
  const { text } = namedInputs;
@@ -29,7 +29,7 @@ export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
29
29
  const inlineData = response.candidates?.[0]?.content?.parts?.[0]?.inlineData;
30
30
  const pcmBase64 = inlineData?.data;
31
31
  const mimeType = inlineData?.mimeType;
32
- if (!pcmBase64)
32
+ if (!pcmBase64 || typeof pcmBase64 !== "string")
33
33
  throw new Error("No audio data returned");
34
34
  // Extract sample rate from mimeType (e.g., "audio/L16;codec=pcm;rate=24000")
35
35
  const rateMatch = mimeType?.match(/rate=(\d+)/);
@@ -44,6 +44,12 @@ export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
44
44
  };
45
45
  }
46
46
  GraphAILogger.info(e);
47
+ const reasonDetail = getGenAIErrorReason(e);
48
+ if (reasonDetail && reasonDetail.reason && reasonDetail.reason === "API_KEY_INVALID") {
49
+ throw new Error("Failed to generate tts: 400 Incorrect API key provided with gemini", {
50
+ cause: agentIncorrectAPIKeyError("ttsGeminiAgent", audioAction, audioFileTarget),
51
+ });
52
+ }
47
53
  throw new Error("TTS Gemini Error", {
48
54
  cause: agentGenerationError("ttsGeminiAgent", audioAction, audioFileTarget),
49
55
  });
@@ -0,0 +1,5 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ import type { KotodamaTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, AgentConfig } from "../types/agent.js";
3
+ export declare const ttsKotodamaAgent: AgentFunction<KotodamaTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, AgentConfig>;
4
+ declare const ttsKotodamaAgentInfo: AgentFunctionInfo;
5
+ export default ttsKotodamaAgentInfo;
@@ -0,0 +1,76 @@
1
+ import { GraphAILogger } from "graphai";
2
+ import { provider2TTSAgent } from "../utils/provider2agent.js";
3
+ import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
4
+ export const ttsKotodamaAgent = async ({ namedInputs, params, config, }) => {
5
+ const { text } = namedInputs;
6
+ const { voice, decoration, suppressError } = params;
7
+ const { apiKey } = config ?? {};
8
+ if (!apiKey) {
9
+ throw new Error("Kotodama API key is required (KOTODAMA_API_KEY)", {
10
+ cause: apiKeyMissingError("ttsKotodamaAgent", audioAction, "KOTODAMA_API_KEY"),
11
+ });
12
+ }
13
+ const url = "https://tts3.spiral-ai-app.com/api/tts_generate";
14
+ const body = {
15
+ text,
16
+ speaker_id: voice ?? provider2TTSAgent.kotodama.defaultVoice,
17
+ decoration_id: decoration ?? provider2TTSAgent.kotodama.defaultDecoration,
18
+ audio_format: "mp3",
19
+ };
20
+ try {
21
+ const response = await fetch(url, {
22
+ method: "POST",
23
+ headers: {
24
+ "Content-Type": "application/json",
25
+ "X-API-Key": apiKey,
26
+ },
27
+ body: JSON.stringify(body),
28
+ });
29
+ if (!response.ok) {
30
+ if (response.status === 401) {
31
+ throw new Error("Failed to generate audio: 401 Incorrect API key provided with Kotodama", {
32
+ cause: agentIncorrectAPIKeyError("ttsKotodamaAgent", audioAction, audioFileTarget),
33
+ });
34
+ }
35
+ throw new Error(`Kotodama API error: ${response.status} ${response.statusText}`, {
36
+ cause: agentGenerationError("ttsKotodamaAgent", audioAction, audioFileTarget),
37
+ });
38
+ }
39
+ // Response is JSON with base64-encoded audio in "audios" array
40
+ const json = await response.json();
41
+ if (!json.audios || !json.audios[0]) {
42
+ throw new Error("TTS Kotodama Error: No audio data in response", {
43
+ cause: agentGenerationError("ttsKotodamaAgent", audioAction, audioFileTarget),
44
+ });
45
+ }
46
+ const buffer = Buffer.from(json.audios[0], "base64");
47
+ return { buffer };
48
+ }
49
+ catch (error) {
50
+ if (suppressError) {
51
+ return {
52
+ error,
53
+ };
54
+ }
55
+ GraphAILogger.error(error);
56
+ if (error && typeof error === "object" && "cause" in error) {
57
+ throw error;
58
+ }
59
+ throw new Error("TTS Kotodama Error", {
60
+ cause: agentGenerationError("ttsKotodamaAgent", audioAction, audioFileTarget),
61
+ });
62
+ }
63
+ };
64
+ const ttsKotodamaAgentInfo = {
65
+ name: "ttsKotodamaAgent",
66
+ agent: ttsKotodamaAgent,
67
+ mock: ttsKotodamaAgent,
68
+ samples: [],
69
+ description: "Kotodama TTS agent (SpiralAI)",
70
+ category: ["tts"],
71
+ author: "Receptron Team",
72
+ repository: "https://github.com/receptron/mulmocast-cli",
73
+ license: "MIT",
74
+ environmentVariables: ["KOTODAMA_API_KEY"],
75
+ };
76
+ export default ttsKotodamaAgentInfo;
@@ -34,7 +34,7 @@ export const ttsOpenaiAgent = async ({ namedInputs, params, config, }) => {
34
34
  }
35
35
  GraphAILogger.error(error);
36
36
  if (error instanceof AuthenticationError) {
37
- throw new Error("Failed to generate image: 401 Incorrect API key provided with OpenAI", {
37
+ throw new Error("Failed to generate audio: 401 Incorrect API key provided with OpenAI", {
38
38
  cause: agentIncorrectAPIKeyError("ttsOpenaiAgent", audioAction, audioFileTarget),
39
39
  });
40
40
  }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -116,6 +116,9 @@ export type NijivoiceTTSAgentParams = TTSAgentParams & {
116
116
  speed: number;
117
117
  speed_global: number;
118
118
  };
119
+ export type KotodamaTTSAgentParams = TTSAgentParams & {
120
+ decoration: string;
121
+ };
119
122
  export type GoogleTTSAgentParams = TTSAgentParams & {
120
123
  speed: number;
121
124
  };