@mixio-pro/kalaasetu-mcp 1.0.5-beta → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -68,7 +68,9 @@ Add to your Cursor settings (`~/.cursor/config.json` or via Settings → MCP):
68
68
  "env": {
69
69
  "GEMINI_API_KEY": "your-gemini-api-key",
70
70
  "FAL_KEY": "your-fal-api-key",
71
- "PERPLEXITY_API_KEY": "your-perplexity-api-key"
71
+ "PERPLEXITY_API_KEY": "your-perplexity-api-key",
72
+ "STORAGE_PROVIDER":"gcs",
73
+ "GCS_BUCKET":"your-gcs-bucket-name"
72
74
  }
73
75
  }
74
76
  }
@@ -88,7 +90,9 @@ Add to your OpenCode MCP configuration:
88
90
  "environment": {
89
91
  "GEMINI_API_KEY": "your-gemini-api-key",
90
92
  "FAL_KEY": "your-fal-api-key",
91
- "PERPLEXITY_API_KEY": "your-perplexity-api-key"
93
+ "PERPLEXITY_API_KEY": "your-perplexity-api-key",
94
+ "STORAGE_PROVIDER":"gcs",
95
+ "GCS_BUCKET":"your-bucket-name"
92
96
  }
93
97
  }
94
98
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.0.5-beta",
3
+ "version": "1.0.7",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -49,8 +49,10 @@
49
49
  "dependencies": {
50
50
  "@fal-ai/client": "^1.7.2",
51
51
  "@google/genai": "^1.28.0",
52
+ "@types/node": "^24.10.1",
52
53
  "@types/wav": "^1.0.4",
53
54
  "fastmcp": "^3.22.0",
55
+ "form-data": "^4.0.5",
54
56
  "google-auth-library": "^10.5.0",
55
57
  "wav": "^1.0.2",
56
58
  "zod": "^4.1.12"
@@ -0,0 +1,116 @@
1
+ import { GoogleAuth } from "google-auth-library";
2
+ import type { StorageProvider } from "./interface";
3
+ import * as path from "path";
4
+
5
+ export class GCSStorageProvider implements StorageProvider {
6
+ private bucket: string;
7
+ private auth: GoogleAuth;
8
+
9
+ constructor(bucket: string) {
10
+ this.bucket = bucket;
11
+ this.auth = new GoogleAuth({
12
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
13
+ });
14
+ }
15
+
16
+ async init(): Promise<void> {
17
+ console.log(
18
+ `Initializing GCS Storage Provider with bucket: ${this.bucket}`
19
+ );
20
+ // Verify we can get credentials
21
+ try {
22
+ await this.auth.getClient();
23
+ } catch (error) {
24
+ console.warn(`Warning: Could not initialize GCS client: ${error}`);
25
+ }
26
+ }
27
+
28
+ private async getAccessToken(): Promise<string> {
29
+ const client = await this.auth.getClient();
30
+ const token = await client.getAccessToken();
31
+ if (!token.token) {
32
+ throw new Error("Failed to get GCS access token");
33
+ }
34
+ return token.token;
35
+ }
36
+
37
+ async readFile(filePath: string): Promise<Buffer> {
38
+ const objectName = path.basename(filePath);
39
+ const url = `https://storage.googleapis.com/storage/v1/b/${
40
+ this.bucket
41
+ }/o/${encodeURIComponent(objectName)}?alt=media`;
42
+
43
+ const token = await this.getAccessToken();
44
+ const response = await fetch(url, {
45
+ headers: {
46
+ Authorization: `Bearer ${token}`,
47
+ },
48
+ });
49
+
50
+ if (!response.ok) {
51
+ throw new Error(
52
+ `Failed to read file from GCS: ${response.status} ${response.statusText}`
53
+ );
54
+ }
55
+
56
+ const arrayBuffer = await response.arrayBuffer();
57
+ return Buffer.from(arrayBuffer);
58
+ }
59
+
60
+ async writeFile(filePath: string, data: Buffer | string): Promise<string> {
61
+ const objectName = path.basename(filePath);
62
+ const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
63
+
64
+ // Upload using JSON API
65
+ const url = `https://storage.googleapis.com/upload/storage/v1/b/${
66
+ this.bucket
67
+ }/o?uploadType=media&name=${encodeURIComponent(objectName)}`;
68
+
69
+ const token = await this.getAccessToken();
70
+ const response = await fetch(url, {
71
+ method: "POST",
72
+ headers: {
73
+ Authorization: `Bearer ${token}`,
74
+ "Content-Type": "application/octet-stream",
75
+ "Content-Length": buffer.length.toString(),
76
+ },
77
+ body: buffer,
78
+ });
79
+
80
+ if (!response.ok) {
81
+ const errorText = await response.text();
82
+ throw new Error(
83
+ `Failed to upload to GCS: ${response.status} ${errorText}`
84
+ );
85
+ }
86
+
87
+ // Return public URL
88
+ return `https://storage.googleapis.com/${this.bucket}/${objectName}`;
89
+ }
90
+
91
+ async exists(filePath: string): Promise<boolean> {
92
+ try {
93
+ const objectName = path.basename(filePath);
94
+ const url = `https://storage.googleapis.com/storage/v1/b/${
95
+ this.bucket
96
+ }/o/${encodeURIComponent(objectName)}`;
97
+
98
+ const token = await this.getAccessToken();
99
+ const response = await fetch(url, {
100
+ method: "GET",
101
+ headers: {
102
+ Authorization: `Bearer ${token}`,
103
+ },
104
+ });
105
+
106
+ return response.ok;
107
+ } catch {
108
+ return false;
109
+ }
110
+ }
111
+
112
+ async getPublicUrl(filePath: string): Promise<string> {
113
+ const objectName = path.basename(filePath);
114
+ return `https://storage.googleapis.com/${this.bucket}/${objectName}`;
115
+ }
116
+ }
@@ -0,0 +1,30 @@
1
+ import type { StorageProvider } from "./interface";
2
+ import { LocalStorageProvider } from "./local";
3
+ import { GCSStorageProvider } from "./gcs";
4
+
5
+ let storageInstance: StorageProvider | null = null;
6
+
7
+ export function getStorage(): StorageProvider {
8
+ if (!storageInstance) {
9
+ const type = process.env.STORAGE_PROVIDER || "local";
10
+ console.error(`Initializing storage provider: ${type}`);
11
+
12
+ if (type === "gcs") {
13
+ const bucket = process.env.GCS_BUCKET;
14
+
15
+ if (!bucket) {
16
+ throw new Error("GCS_BUCKET is required when using gcs storage");
17
+ }
18
+
19
+ storageInstance = new GCSStorageProvider(bucket);
20
+ } else {
21
+ storageInstance = new LocalStorageProvider(process.cwd());
22
+ }
23
+
24
+ // Initialize async
25
+ storageInstance
26
+ .init()
27
+ .catch((err) => console.error("Failed to init storage:", err));
28
+ }
29
+ return storageInstance;
30
+ }
@@ -0,0 +1,7 @@
1
+ export interface StorageProvider {
2
+ init(): Promise<void>;
3
+ readFile(path: string): Promise<Buffer>;
4
+ writeFile(path: string, data: Buffer | string): Promise<string>; // Returns public URL
5
+ exists(path: string): Promise<boolean>;
6
+ getPublicUrl(path: string): Promise<string>;
7
+ }
@@ -0,0 +1,53 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import type { StorageProvider } from "./interface";
4
+
5
+ export class LocalStorageProvider implements StorageProvider {
6
+ private basePath: string;
7
+
8
+ constructor(basePath: string = process.cwd()) {
9
+ this.basePath = basePath;
10
+ }
11
+
12
+ async init(): Promise<void> {
13
+ // No-op for local
14
+ }
15
+
16
+ async readFile(filePath: string): Promise<Buffer> {
17
+ let fullPath = filePath;
18
+ if (!path.isAbsolute(filePath)) {
19
+ fullPath = path.resolve(this.basePath, filePath);
20
+ }
21
+ return fs.promises.readFile(fullPath);
22
+ }
23
+
24
+ async writeFile(filePath: string, data: Buffer | string): Promise<string> {
25
+ let fullPath = filePath;
26
+ if (!path.isAbsolute(filePath)) {
27
+ fullPath = path.resolve(this.basePath, filePath);
28
+ }
29
+
30
+ const dir = path.dirname(fullPath);
31
+ if (!fs.existsSync(dir)) {
32
+ await fs.promises.mkdir(dir, { recursive: true });
33
+ }
34
+ await fs.promises.writeFile(fullPath, data);
35
+ return fullPath;
36
+ }
37
+
38
+ async exists(filePath: string): Promise<boolean> {
39
+ let fullPath = filePath;
40
+ if (!path.isAbsolute(filePath)) {
41
+ fullPath = path.resolve(this.basePath, filePath);
42
+ }
43
+ return fs.existsSync(fullPath);
44
+ }
45
+
46
+ async getPublicUrl(filePath: string): Promise<string> {
47
+ let fullPath = filePath;
48
+ if (!path.isAbsolute(filePath)) {
49
+ fullPath = path.resolve(this.basePath, filePath);
50
+ }
51
+ return fullPath;
52
+ }
53
+ }
@@ -1,17 +1,26 @@
1
1
  import { z } from "zod";
2
- import { GoogleGenAI, createPartFromUri, createUserContent } from "@google/genai";
2
+ import {
3
+ GoogleGenAI,
4
+ createPartFromUri,
5
+ createUserContent,
6
+ } from "@google/genai";
3
7
  import * as fs from "fs";
8
+ import * as path from "path";
9
+ import * as os from "os";
4
10
  import * as wav from "wav";
11
+ import { PassThrough } from "stream";
12
+ import { getStorage } from "../storage";
5
13
 
6
14
  const ai = new GoogleGenAI({
7
15
  apiKey: process.env.GEMINI_API_KEY || "",
8
16
  });
9
17
 
10
- function fileToGenerativePart(filePath: string) {
11
- if (!fs.existsSync(filePath)) {
18
+ async function fileToGenerativePart(filePath: string) {
19
+ const storage = getStorage();
20
+ if (!(await storage.exists(filePath))) {
12
21
  throw new Error(`File not found: ${filePath}`);
13
22
  }
14
- const imageBytes = fs.readFileSync(filePath);
23
+ const imageBytes = await storage.readFile(filePath);
15
24
  return {
16
25
  inlineData: {
17
26
  data: Buffer.from(imageBytes).toString("base64"),
@@ -21,22 +30,37 @@ function fileToGenerativePart(filePath: string) {
21
30
  }
22
31
 
23
32
  // Helper function to save WAV file
24
- function saveWaveFile(
33
+ // Helper function to save WAV file
34
+ async function saveWaveFile(
25
35
  filename: string,
26
36
  pcmData: Buffer,
27
37
  channels = 1,
28
38
  rate = 24000,
29
- sampleWidth = 2,
39
+ sampleWidth = 2
30
40
  ): Promise<void> {
31
41
  return new Promise((resolve, reject) => {
32
- const writer = new wav.FileWriter(filename, {
42
+ const writer = new wav.Writer({
33
43
  channels,
34
44
  sampleRate: rate,
35
45
  bitDepth: sampleWidth * 8,
36
46
  });
37
47
 
38
- writer.on('finish', resolve);
39
- writer.on('error', reject);
48
+ const stream = new PassThrough();
49
+ const chunks: Buffer[] = [];
50
+
51
+ writer.pipe(stream);
52
+ stream.on("data", (chunk) => chunks.push(chunk));
53
+ stream.on("end", async () => {
54
+ try {
55
+ const wavBuffer = Buffer.concat(chunks);
56
+ const storage = getStorage();
57
+ await storage.writeFile(filename, wavBuffer);
58
+ resolve();
59
+ } catch (err) {
60
+ reject(err);
61
+ }
62
+ });
63
+ writer.on("error", reject);
40
64
 
41
65
  writer.write(pcmData);
42
66
  writer.end();
@@ -45,31 +69,59 @@ function saveWaveFile(
45
69
 
46
70
  // Helper function to check if URL is YouTube URL
47
71
  function isYouTubeUrl(url: string): boolean {
48
- return url.includes('youtube.com/watch') || url.includes('youtu.be');
72
+ return url.includes("youtube.com/watch") || url.includes("youtu.be");
49
73
  }
50
74
 
51
75
  // Helper function to get file size in bytes
52
- function getFileSize(filePath: string): number {
53
- const stats = fs.statSync(filePath);
54
- return stats.size;
76
+ async function getFileSize(filePath: string): Promise<number> {
77
+ const storage = getStorage();
78
+ const buffer = await storage.readFile(filePath);
79
+ return buffer.length;
55
80
  }
56
81
 
82
+ // Helper function to upload file to Gemini API
57
83
  // Helper function to upload file to Gemini API
58
84
  async function uploadFileToGemini(filePath: string): Promise<any> {
59
85
  try {
86
+ const storage = getStorage();
87
+ // For Gemini API, we need a local file path.
88
+ // If storage is local, we can use the path directly (if we can resolve it).
89
+ // If storage is remote, we must download to a temp file.
90
+
91
+ let localPath = filePath;
92
+ let isTemp = false;
93
+
94
+ // Check if we can get a local path from storage (hacky check for LocalStorageProvider)
95
+ // A better way is to always download to temp if not sure, or ask storage for a local path.
96
+ // For now, let's assume we need to download if it's not a local file system path that exists.
97
+
98
+ if (!fs.existsSync(filePath)) {
99
+ // Try to read from storage and write to temp
100
+ const buffer = await storage.readFile(filePath);
101
+ const tempDir = os.tmpdir();
102
+ const tempFilePath = path.join(tempDir, path.basename(filePath));
103
+ fs.writeFileSync(tempFilePath, buffer);
104
+ localPath = tempFilePath;
105
+ isTemp = true;
106
+ }
107
+
60
108
  const uploadedFile = await ai.files.upload({
61
- file: filePath,
109
+ file: localPath,
62
110
  });
63
-
111
+
112
+ if (isTemp) {
113
+ fs.unlinkSync(localPath);
114
+ }
115
+
64
116
  // Wait for file processing to complete
65
117
  let getFile = await ai.files.get({ name: uploadedFile.name! });
66
- while (getFile.state === 'PROCESSING') {
67
- await new Promise(resolve => setTimeout(resolve, 3000));
118
+ while (getFile.state === "PROCESSING") {
119
+ await new Promise((resolve) => setTimeout(resolve, 3000));
68
120
  getFile = await ai.files.get({ name: uploadedFile.name! });
69
121
  }
70
122
 
71
- if (getFile.state === 'FAILED') {
72
- throw new Error('File processing failed');
123
+ if (getFile.state === "FAILED") {
124
+ throw new Error("File processing failed");
73
125
  }
74
126
 
75
127
  return getFile;
@@ -79,41 +131,58 @@ async function uploadFileToGemini(filePath: string): Promise<any> {
79
131
  }
80
132
 
81
133
  // Helper function to process video input intelligently
82
- async function processVideoInput(input: string, config?: { fps?: number; startOffset?: string; endOffset?: string }): Promise<any> {
134
+ async function processVideoInput(
135
+ input: string,
136
+ config?: { fps?: number; startOffset?: string; endOffset?: string }
137
+ ): Promise<any> {
83
138
  if (isYouTubeUrl(input)) {
84
139
  return {
85
140
  fileData: {
86
141
  fileUri: input,
87
- mimeType: 'video/*',
88
- videoMetadata: config ? {
89
- fps: config.fps,
90
- startOffset: config.startOffset,
91
- endOffset: config.endOffset
92
- } : undefined
93
- }
142
+ mimeType: "video/*",
143
+ videoMetadata: config
144
+ ? {
145
+ fps: config.fps,
146
+ startOffset: config.startOffset,
147
+ endOffset: config.endOffset,
148
+ }
149
+ : undefined,
150
+ },
94
151
  };
95
152
  } else {
96
153
  // Local file processing - use File Upload API
97
- if (!fs.existsSync(input)) {
154
+ const storage = getStorage();
155
+ if (!(await storage.exists(input))) {
98
156
  throw new Error(`Video file not found: ${input}`);
99
157
  }
100
-
158
+
101
159
  // Upload file to Gemini API
102
160
  const uploadedFile = await uploadFileToGemini(input);
103
-
161
+
104
162
  return uploadedFile;
105
163
  }
106
164
  }
107
165
 
108
166
  export const geminiTextToImage = {
109
- name: "geminiTextToImage",
110
- description: "Generate images from text prompts using Gemini 2.5 Flash Image model",
167
+ name: "generateImage",
168
+ description:
169
+ "Generate images from text prompts using Gemini 2.5 Flash Image model",
111
170
  parameters: z.object({
112
171
  prompt: z.string().describe("Text description of the image to generate"),
113
- aspect_ratio: z.string().optional().describe("Aspect ratio: 1:1, 3:4, 4:3, 9:16, or 16:9"),
114
- output_path: z.string().optional().describe("File path to save the generated image"),
172
+ aspect_ratio: z
173
+ .string()
174
+ .optional()
175
+ .describe("Aspect ratio: 1:1, 3:4, 4:3, 9:16, or 16:9"),
176
+ output_path: z
177
+ .string()
178
+ .optional()
179
+ .describe("File path to save the generated image"),
115
180
  }),
116
- execute: async (args: { prompt: string; aspect_ratio?: string; output_path?: string }) => {
181
+ execute: async (args: {
182
+ prompt: string;
183
+ aspect_ratio?: string;
184
+ output_path?: string;
185
+ }) => {
117
186
  try {
118
187
  const response = await ai.models.generateContent({
119
188
  model: "gemini-2.5-flash-image",
@@ -133,10 +202,17 @@ export const geminiTextToImage = {
133
202
  } else if (part.inlineData?.data) {
134
203
  const imageData = part.inlineData.data;
135
204
  if (args.output_path) {
136
- fs.writeFileSync(args.output_path, Buffer.from(imageData, "base64"));
137
- result += `\nImage saved to: ${args.output_path}`;
205
+ const storage = getStorage();
206
+ const url = await storage.writeFile(
207
+ args.output_path,
208
+ Buffer.from(imageData, "base64")
209
+ );
210
+ result += `\nImage saved to: ${url}`;
138
211
  } else {
139
- result += `\nGenerated image (base64): ${imageData.substring(0, 100)}...`;
212
+ result += `\nGenerated image (base64): ${imageData.substring(
213
+ 0,
214
+ 100
215
+ )}...`;
140
216
  }
141
217
  }
142
218
  }
@@ -149,21 +225,34 @@ export const geminiTextToImage = {
149
225
  };
150
226
 
151
227
  export const geminiEditImage = {
152
- name: "geminiEditImage",
153
- description: "Edit existing images with text instructions using Gemini 2.5 Flash Image Preview",
228
+ name: "editImage",
229
+ description:
230
+ "Edit existing images with text instructions using Gemini 2.5 Flash Image Preview",
154
231
  parameters: z.object({
155
232
  image_path: z.string().describe("Path to the source image file"),
156
233
  prompt: z.string().describe("Text instructions for editing the image"),
157
- output_path: z.string().optional().describe("File path to save the edited image"),
158
- reference_images: z.array(z.string()).optional().describe("Additional image paths for reference"),
234
+ output_path: z
235
+ .string()
236
+ .optional()
237
+ .describe("File path to save the edited image"),
238
+ reference_images: z
239
+ .array(z.string())
240
+ .optional()
241
+ .describe("Additional image paths for reference"),
159
242
  }),
160
- execute: async (args: { image_path: string; prompt: string; output_path?: string; reference_images?: string[] }) => {
243
+ execute: async (args: {
244
+ image_path: string;
245
+ prompt: string;
246
+ output_path?: string;
247
+ reference_images?: string[];
248
+ }) => {
161
249
  try {
162
- const contents: any[] = [args.prompt, fileToGenerativePart(args.image_path)];
163
-
250
+ const imagePart = await fileToGenerativePart(args.image_path);
251
+ const contents: any[] = [args.prompt, imagePart];
252
+
164
253
  if (args.reference_images) {
165
254
  for (const refPath of args.reference_images) {
166
- contents.push(fileToGenerativePart(refPath));
255
+ contents.push(await fileToGenerativePart(refPath));
167
256
  }
168
257
  }
169
258
 
@@ -180,10 +269,17 @@ export const geminiEditImage = {
180
269
  } else if (part.inlineData?.data) {
181
270
  const imageData = part.inlineData.data;
182
271
  if (args.output_path) {
183
- fs.writeFileSync(args.output_path, Buffer.from(imageData, "base64"));
184
- result += `\nEdited image saved to: ${args.output_path}`;
272
+ const storage = getStorage();
273
+ const url = await storage.writeFile(
274
+ args.output_path,
275
+ Buffer.from(imageData, "base64")
276
+ );
277
+ result += `\nEdited image saved to: ${url}`;
185
278
  } else {
186
- result += `\nEdited image (base64): ${imageData.substring(0, 100)}...`;
279
+ result += `\nEdited image (base64): ${imageData.substring(
280
+ 0,
281
+ 100
282
+ )}...`;
187
283
  }
188
284
  }
189
285
  }
@@ -196,10 +292,13 @@ export const geminiEditImage = {
196
292
  };
197
293
 
198
294
  export const geminiAnalyzeImages = {
199
- name: "geminiAnalyzeImages",
200
- description: "Analyze and describe images using Gemini 2.5 Pro with advanced multimodal understanding",
295
+ name: "analyzeImages",
296
+ description:
297
+ "Analyze and describe images using Gemini 2.5 Pro with advanced multimodal understanding",
201
298
  parameters: z.object({
202
- image_paths: z.array(z.string()).describe("Array of image file paths to analyze"),
299
+ image_paths: z
300
+ .array(z.string())
301
+ .describe("Array of image file paths to analyze"),
203
302
  prompt: z.string().describe("Text prompt or question about the images"),
204
303
  }),
205
304
  execute: async (args: { image_paths: string[]; prompt: string }) => {
@@ -208,12 +307,12 @@ export const geminiAnalyzeImages = {
208
307
  if (!args.image_paths) {
209
308
  throw new Error("Image paths not provided");
210
309
  }
211
-
310
+
212
311
  // Convert to array if passed as string
213
312
  let imagePaths: string[];
214
- if (typeof args.image_paths === 'string') {
313
+ if (typeof args.image_paths === "string") {
215
314
  const strValue = args.image_paths as string;
216
- if (strValue.startsWith('[') && strValue.endsWith(']')) {
315
+ if (strValue.startsWith("[") && strValue.endsWith("]")) {
217
316
  try {
218
317
  imagePaths = JSON.parse(strValue);
219
318
  } catch {
@@ -227,15 +326,15 @@ export const geminiAnalyzeImages = {
227
326
  } else {
228
327
  throw new Error("Invalid image_paths: must be array or string");
229
328
  }
230
-
329
+
231
330
  if (imagePaths.length === 0) {
232
331
  throw new Error("At least one image path must be provided");
233
332
  }
234
-
333
+
235
334
  const contents: any[] = [args.prompt];
236
-
335
+
237
336
  for (const imagePath of imagePaths) {
238
- contents.push(fileToGenerativePart(imagePath));
337
+ contents.push(await fileToGenerativePart(imagePath));
239
338
  }
240
339
 
241
340
  const response = await ai.models.generateContent({
@@ -259,43 +358,59 @@ export const geminiAnalyzeImages = {
259
358
  };
260
359
 
261
360
  export const geminiSingleSpeakerTts = {
262
- name: "geminiSingleSpeakerTts",
263
- description: "Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
361
+ name: "generateSpeech",
362
+ description:
363
+ "Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
264
364
  parameters: z.object({
265
365
  text: z.string().describe("Text to convert to speech"),
266
- voice_name: z.string().describe("Voice name from supported options. Use Kore, Erinome or Despina for the female voices and Enceladus for male."),
267
- output_path: z.string().optional().describe("Output WAV file path (optional, defaults to timestamp-based filename)"),
366
+ voice_name: z
367
+ .string()
368
+ .describe(
369
+ "Voice name from supported options. Use Kore, Erinome or Despina for the female voices and Enceladus for male."
370
+ ),
371
+ output_path: z
372
+ .string()
373
+ .optional()
374
+ .describe(
375
+ "Output WAV file path (optional, defaults to timestamp-based filename)"
376
+ ),
268
377
  }),
269
- execute: async (args: { text: string; voice_name: string; output_path?: string }) => {
378
+ execute: async (args: {
379
+ text: string;
380
+ voice_name: string;
381
+ output_path?: string;
382
+ }) => {
270
383
  try {
271
384
  const response = await ai.models.generateContent({
272
385
  model: "gemini-2.5-pro-preview-tts",
273
386
  contents: [{ parts: [{ text: args.text }] }],
274
387
  config: {
275
- responseModalities: ['AUDIO'],
388
+ responseModalities: ["AUDIO"],
276
389
  speechConfig: {
277
390
  voiceConfig: {
278
- prebuiltVoiceConfig: {
279
- voiceName: args.voice_name || 'Despina'
391
+ prebuiltVoiceConfig: {
392
+ voiceName: args.voice_name || "Despina",
280
393
  },
281
394
  },
282
395
  },
283
396
  },
284
397
  });
285
398
 
286
- const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
399
+ const data =
400
+ response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
287
401
  if (!data) {
288
402
  throw new Error("No audio data received from Gemini API");
289
403
  }
290
404
 
291
- const audioBuffer = Buffer.from(data, 'base64');
292
-
405
+ const audioBuffer = Buffer.from(data, "base64");
406
+
293
407
  // Generate output filename if not provided
294
408
  const outputPath = args.output_path || `voice_output_${Date.now()}.wav`;
295
-
296
- await saveWaveFile(outputPath, audioBuffer);
297
-
298
- return `Audio generated successfully: ${outputPath}`;
409
+
410
+ const storage = getStorage();
411
+ const url = await storage.writeFile(outputPath, audioBuffer);
412
+
413
+ return `Audio generated successfully: ${url}`;
299
414
  } catch (error: any) {
300
415
  throw new Error(`Voice generation failed: ${error.message}`);
301
416
  }
@@ -303,28 +418,60 @@ export const geminiSingleSpeakerTts = {
303
418
  };
304
419
 
305
420
  export const geminiAnalyzeVideos = {
306
- name: "geminiAnalyzeVideos",
307
- description: "Analyze and understand video content using Gemini 2.5 Flash model. Intelligently handles YouTube URLs and local videos (files <20MB processed inline, ≥20MB uploaded via File API). Supports timestamp queries, clipping, and custom frame rates with default 5 FPS for local videos to optimize processing.",
421
+ name: "analyzeVideos",
422
+ description:
423
+ "Analyze and understand video content using Gemini 2.5 Flash model. Intelligently handles YouTube URLs and local videos (files <20MB processed inline, ≥20MB uploaded via File API). Supports timestamp queries, clipping, and custom frame rates with default 5 FPS for local videos to optimize processing.",
308
424
  parameters: z.object({
309
- video_inputs: z.array(z.string()).describe("Array of video inputs - mix of local file paths and YouTube URLs (max 10 videos). Local files <20MB processed inline, larger files uploaded via File API automatically."),
310
- prompt: z.string().describe("Text prompt or question about the videos. Use MM:SS format for timestamp references (e.g., 'What happens at 01:30?')."),
311
- fps: z.number().optional().describe("Frame rate for video processing (default: 5 FPS for local videos to reduce file size, 1 FPS for YouTube URLs)"),
312
- start_offset: z.string().optional().describe("Clip start time in seconds with 's' suffix (e.g., '40s')"),
313
- end_offset: z.string().optional().describe("Clip end time in seconds with 's' suffix (e.g., '80s')"),
314
- media_resolution: z.string().optional().describe("Media resolution: 'default' or 'low' (low resolution uses ~100 tokens/sec vs 300 tokens/sec)"),
425
+ video_inputs: z
426
+ .array(z.string())
427
+ .describe(
428
+ "Array of video inputs - mix of local file paths and YouTube URLs (max 10 videos). Local files <20MB processed inline, larger files uploaded via File API automatically."
429
+ ),
430
+ prompt: z
431
+ .string()
432
+ .describe(
433
+ "Text prompt or question about the videos. Use MM:SS format for timestamp references (e.g., 'What happens at 01:30?')."
434
+ ),
435
+ fps: z
436
+ .number()
437
+ .optional()
438
+ .describe(
439
+ "Frame rate for video processing (default: 5 FPS for local videos to reduce file size, 1 FPS for YouTube URLs)"
440
+ ),
441
+ start_offset: z
442
+ .string()
443
+ .optional()
444
+ .describe("Clip start time in seconds with 's' suffix (e.g., '40s')"),
445
+ end_offset: z
446
+ .string()
447
+ .optional()
448
+ .describe("Clip end time in seconds with 's' suffix (e.g., '80s')"),
449
+ media_resolution: z
450
+ .string()
451
+ .optional()
452
+ .describe(
453
+ "Media resolution: 'default' or 'low' (low resolution uses ~100 tokens/sec vs 300 tokens/sec)"
454
+ ),
315
455
  }),
316
- execute: async (args: { video_inputs: string[]; prompt: string; fps?: number; start_offset?: string; end_offset?: string; media_resolution?: string }) => {
456
+ execute: async (args: {
457
+ video_inputs: string[];
458
+ prompt: string;
459
+ fps?: number;
460
+ start_offset?: string;
461
+ end_offset?: string;
462
+ media_resolution?: string;
463
+ }) => {
317
464
  try {
318
465
  // Handle array parsing
319
466
  if (!args.video_inputs) {
320
467
  throw new Error("Video inputs not provided");
321
468
  }
322
-
469
+
323
470
  // Convert to array if passed as string
324
471
  let videoInputs: string[];
325
- if (typeof args.video_inputs === 'string') {
472
+ if (typeof args.video_inputs === "string") {
326
473
  const strValue = args.video_inputs as string;
327
- if (strValue.startsWith('[') && strValue.endsWith(']')) {
474
+ if (strValue.startsWith("[") && strValue.endsWith("]")) {
328
475
  try {
329
476
  videoInputs = JSON.parse(strValue);
330
477
  } catch {
@@ -338,43 +485,47 @@ export const geminiAnalyzeVideos = {
338
485
  } else {
339
486
  throw new Error("Invalid video_inputs: must be array or string");
340
487
  }
341
-
488
+
342
489
  if (videoInputs.length === 0) {
343
490
  throw new Error("At least one video input must be provided");
344
491
  }
345
-
492
+
346
493
  if (videoInputs.length > 10) {
347
- throw new Error("Maximum 10 videos per request allowed for Gemini 2.5+ models");
494
+ throw new Error(
495
+ "Maximum 10 videos per request allowed for Gemini 2.5+ models"
496
+ );
348
497
  }
349
498
 
350
499
  // Prepare video parts for content
351
500
  const videoParts: any[] = [];
352
-
501
+
353
502
  // Process each video input
354
503
  for (const videoInput of videoInputs) {
355
504
  const videoConfig = {
356
505
  fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
357
506
  startOffset: args.start_offset,
358
- endOffset: args.end_offset
507
+ endOffset: args.end_offset,
359
508
  };
360
-
509
+
361
510
  const videoPart = await processVideoInput(videoInput, videoConfig);
362
511
  videoParts.push(videoPart);
363
512
  }
364
513
 
365
514
  // Build content using createUserContent and createPartFromUri for uploaded files
366
515
  const contentParts: any[] = [args.prompt];
367
-
516
+
368
517
  for (const videoPart of videoParts) {
369
518
  if (videoPart.uri && videoPart.mimeType) {
370
- contentParts.push(createPartFromUri(videoPart.uri, videoPart.mimeType));
519
+ contentParts.push(
520
+ createPartFromUri(videoPart.uri, videoPart.mimeType)
521
+ );
371
522
  }
372
523
  }
373
-
524
+
374
525
  const finalContents = createUserContent(contentParts);
375
-
526
+
376
527
  const response = await ai.models.generateContent({
377
- model: 'gemini-2.5-pro',
528
+ model: "gemini-2.5-pro",
378
529
  contents: finalContents,
379
530
  });
380
531
 
@@ -386,7 +537,7 @@ export const geminiAnalyzeVideos = {
386
537
  }
387
538
  }
388
539
  }
389
-
540
+
390
541
  return result || "Video analysis completed but no text response received";
391
542
  } catch (error: any) {
392
543
  throw new Error(`Video analysis failed: ${error.message}`);
@@ -1,9 +1,9 @@
1
- // @ts-nocheck
2
1
  import * as fs from "fs";
3
2
  import { GoogleAuth } from "google-auth-library";
4
3
  import { exec } from "child_process";
5
4
  import * as path from "path";
6
5
  import { z } from "zod";
6
+ import { getStorage } from "../storage";
7
7
 
8
8
  async function wait(ms: number): Promise<void> {
9
9
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -11,7 +11,9 @@ async function wait(ms: number): Promise<void> {
11
11
 
12
12
  async function fetchAccessToken(): Promise<string> {
13
13
  try {
14
- const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"] });
14
+ const auth = new GoogleAuth({
15
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
16
+ });
15
17
  const client = await auth.getClient();
16
18
  const token = await client.getAccessToken();
17
19
  if (!token || typeof token !== "string") {
@@ -23,12 +25,22 @@ async function fetchAccessToken(): Promise<string> {
23
25
  return await new Promise((resolve, reject) => {
24
26
  exec("gcloud auth print-access-token", (err, stdout, stderr) => {
25
27
  if (err) {
26
- reject(new Error(`Failed to fetch an access token (ADC and gcloud): ${stderr || err.message}`));
28
+ reject(
29
+ new Error(
30
+ `Failed to fetch an access token (ADC and gcloud): ${
31
+ stderr || err.message
32
+ }`
33
+ )
34
+ );
27
35
  return;
28
36
  }
29
37
  const t = (stdout || "").trim();
30
38
  if (!t) {
31
- reject(new Error("Failed to fetch an access token: empty token from gcloud"));
39
+ reject(
40
+ new Error(
41
+ "Failed to fetch an access token: empty token from gcloud"
42
+ )
43
+ );
32
44
  return;
33
45
  }
34
46
  resolve(t);
@@ -37,37 +49,102 @@ async function fetchAccessToken(): Promise<string> {
37
49
  }
38
50
  }
39
51
 
40
- function fileToBase64(path: string): { data: string; mimeType: string } {
41
- if (!fs.existsSync(path)) {
42
- throw new Error(`File not found: ${path}`);
52
+ async function fileToBase64(
53
+ filePath: string
54
+ ): Promise<{ data: string; mimeType: string }> {
55
+ const storage = getStorage();
56
+ if (!(await storage.exists(filePath))) {
57
+ throw new Error(`File not found: ${filePath}`);
43
58
  }
44
- const buf = fs.readFileSync(path);
59
+ const buf = await storage.readFile(filePath);
45
60
  const data = Buffer.from(buf).toString("base64");
46
61
  // Default to PNG if not sure, similar to existing code
47
62
  const mimeType = "image/png";
48
63
  return { data, mimeType };
49
64
  }
50
65
 
51
- export const imageToVideo = ({
52
- name: "image_to_video",
53
- description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
66
+ export const imageToVideo = {
67
+ name: "generateVideoi2v",
68
+ description:
69
+ "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
54
70
  parameters: z.object({
55
71
  prompt: z.string().describe("Text description for the video"),
56
- image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
57
- last_frame_path: z.string().optional().describe("Path to last frame image to guide ending frame (optional)"),
58
- aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
59
- duration_seconds: z.string().optional().describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
60
- resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
61
- negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
62
- person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
63
- reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
64
- output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
65
- project_id: z.string().optional().describe("GCP Project ID (default: mixio-pro)"),
66
- location_id: z.string().optional().describe("Vertex region (default: us-central1)"),
67
- model_id: z.string().optional().describe("Model ID (default: veo-3.1-fast-generate-preview)"),
68
- generate_audio: z.boolean().optional().describe("Boolean flag to enable generation of audio along with the video").default(false)
72
+ image_path: z
73
+ .string()
74
+ .optional()
75
+ .describe("Path to source image for image-to-video generation"),
76
+ last_frame_path: z
77
+ .string()
78
+ .optional()
79
+ .describe("Path to last frame image to guide ending frame (optional)"),
80
+ aspect_ratio: z
81
+ .string()
82
+ .optional()
83
+ .describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
84
+ duration_seconds: z
85
+ .string()
86
+ .optional()
87
+ .describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
88
+ resolution: z
89
+ .string()
90
+ .optional()
91
+ .describe("Video resolution: '720p' or '1080p' (default: '720p')"),
92
+ negative_prompt: z
93
+ .string()
94
+ .optional()
95
+ .describe("Text describing what not to include in the video"),
96
+ person_generation: z
97
+ .string()
98
+ .optional()
99
+ .describe(
100
+ "Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"
101
+ ),
102
+ reference_images: z
103
+ .array(z.string())
104
+ .optional()
105
+ .describe("Additional image paths for reference (max 3)"),
106
+ output_path: z
107
+ .string()
108
+ .optional()
109
+ .describe(
110
+ "Output MP4 file path (if multiple predictions, index suffix is added)"
111
+ ),
112
+ project_id: z
113
+ .string()
114
+ .optional()
115
+ .describe("GCP Project ID (default: mixio-pro)"),
116
+ location_id: z
117
+ .string()
118
+ .optional()
119
+ .describe("Vertex region (default: us-central1)"),
120
+ model_id: z
121
+ .string()
122
+ .optional()
123
+ .describe("Model ID (default: veo-3.1-fast-generate-preview)"),
124
+ generate_audio: z
125
+ .boolean()
126
+ .optional()
127
+ .describe(
128
+ "Boolean flag to enable generation of audio along with the video"
129
+ )
130
+ .default(false),
69
131
  }),
70
- async execute(args) {
132
+ async execute(args: {
133
+ prompt: string;
134
+ image_path?: string;
135
+ last_frame_path?: string;
136
+ aspect_ratio?: string;
137
+ duration_seconds?: string;
138
+ resolution?: string;
139
+ negative_prompt?: string;
140
+ person_generation?: string;
141
+ reference_images?: string[] | string;
142
+ output_path?: string;
143
+ project_id?: string;
144
+ location_id?: string;
145
+ model_id?: string;
146
+ generate_audio?: boolean;
147
+ }) {
71
148
  const projectId = args.project_id || "mixio-pro";
72
149
  const location = args.location_id || "us-central1";
73
150
  const modelId = args.model_id || "veo-3.1-fast-generate-preview";
@@ -78,7 +155,7 @@ export const imageToVideo = ({
78
155
 
79
156
  let imagePart: any = undefined;
80
157
  if (args.image_path) {
81
- const { data, mimeType } = fileToBase64(args.image_path);
158
+ const { data, mimeType } = await fileToBase64(args.image_path);
82
159
  imagePart = {
83
160
  image: {
84
161
  bytesBase64Encoded: data,
@@ -89,7 +166,7 @@ export const imageToVideo = ({
89
166
 
90
167
  let lastFramePart: any = undefined;
91
168
  if (args.last_frame_path) {
92
- const { data, mimeType } = fileToBase64(args.last_frame_path);
169
+ const { data, mimeType } = await fileToBase64(args.last_frame_path);
93
170
  lastFramePart = {
94
171
  lastFrame: {
95
172
  bytesBase64Encoded: data,
@@ -102,7 +179,10 @@ export const imageToVideo = ({
102
179
  if (args.reference_images) {
103
180
  let refImages: string[];
104
181
  if (typeof args.reference_images === "string") {
105
- if (args.reference_images.startsWith("[") && args.reference_images.endsWith("]")) {
182
+ if (
183
+ args.reference_images.startsWith("[") &&
184
+ args.reference_images.endsWith("]")
185
+ ) {
106
186
  try {
107
187
  refImages = JSON.parse(args.reference_images);
108
188
  } catch {
@@ -118,20 +198,23 @@ export const imageToVideo = ({
118
198
  }
119
199
 
120
200
  if (refImages.length > 0) {
121
- referenceImages = refImages.slice(0, 3).map((p) => {
122
- const { data, mimeType } = fileToBase64(p);
123
- return {
124
- image: {
125
- bytesBase64Encoded: data,
126
- mimeType,
127
- },
128
- referenceType: "asset",
129
- };
130
- });
201
+ referenceImages = await Promise.all(
202
+ refImages.slice(0, 3).map(async (p) => {
203
+ const { data, mimeType } = await fileToBase64(p);
204
+ return {
205
+ image: {
206
+ bytesBase64Encoded: data,
207
+ mimeType,
208
+ },
209
+ referenceType: "asset",
210
+ };
211
+ })
212
+ );
131
213
  }
132
214
  }
133
215
 
134
- const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
216
+ const personGeneration =
217
+ args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
135
218
 
136
219
  const instances: any[] = [
137
220
  {
@@ -144,7 +227,7 @@ export const imageToVideo = ({
144
227
 
145
228
  const parameters: any = {
146
229
  aspectRatio: args.aspect_ratio || "9:16",
147
- durationSeconds: parseInt(args.duration_seconds) || 6,
230
+ durationSeconds: parseInt(args.duration_seconds || "6") || 6,
148
231
  resolution: args.resolution || "720p",
149
232
  negativePrompt: args.negative_prompt,
150
233
  generateAudio: args.generate_audio || false,
@@ -165,10 +248,12 @@ export const imageToVideo = ({
165
248
  throw new Error(`Vertex request failed: ${res.status} ${text}`);
166
249
  }
167
250
 
168
- const op = await res.json();
251
+ const op = (await res.json()) as any;
169
252
  const name: string = op.name || op.operation || "";
170
253
  if (!name) {
171
- throw new Error("Vertex did not return an operation name for long-running request");
254
+ throw new Error(
255
+ "Vertex did not return an operation name for long-running request"
256
+ );
172
257
  }
173
258
 
174
259
  let current = op;
@@ -191,7 +276,7 @@ export const imageToVideo = ({
191
276
  const text = await poll.text();
192
277
  throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
193
278
  }
194
- current = await poll.json();
279
+ current = (await poll.json()) as any;
195
280
  done = !!current.done || !!current.response;
196
281
  tries++;
197
282
  }
@@ -199,34 +284,41 @@ export const imageToVideo = ({
199
284
  const resp = current.response || current;
200
285
  // Decode from response.videos[].bytesBase64Encoded only
201
286
  const outputs: string[] = [];
202
- const saveVideo = (base64: string, index: number) => {
287
+ const saveVideo = async (base64: string, index: number) => {
203
288
  if (!base64) return;
204
289
  const filePath = args.output_path
205
- ? (index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`))
206
- : `video_output_${Date.now()}${index === 0 ? '' : '_' + index}.mp4`;
207
- const absPath = path.resolve(filePath);
208
- const buf = Buffer.from(base64, 'base64');
209
- fs.writeFileSync(absPath, buf);
210
- outputs.push(absPath);
290
+ ? index === 0
291
+ ? args.output_path
292
+ : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`)
293
+ : `video_output_${Date.now()}${index === 0 ? "" : "_" + index}.mp4`;
294
+
295
+ const buf = Buffer.from(base64, "base64");
296
+ const storage = getStorage();
297
+ const url = await storage.writeFile(filePath, buf);
298
+ outputs.push(url);
211
299
  };
212
300
 
213
301
  if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
214
302
  for (let i = 0; i < resp.videos.length; i++) {
215
303
  const v = resp.videos[i] || {};
216
- if (typeof v.bytesBase64Encoded === 'string') {
217
- saveVideo(v.bytesBase64Encoded, i);
304
+ if (typeof v.bytesBase64Encoded === "string") {
305
+ await saveVideo(v.bytesBase64Encoded, i);
218
306
  }
219
307
  }
220
308
  }
221
309
  if (outputs.length > 0) {
222
- return `Video(s) saved: ${outputs.join(', ')}`;
310
+ return `Video(s) saved to: ${outputs.join(", ")}`;
223
311
  }
224
312
 
225
313
  // If nothing saved, return a concise summary plus head/tail snippets of JSON
226
314
  let jsonStr = "";
227
- try { jsonStr = JSON.stringify(resp); } catch {}
315
+ try {
316
+ jsonStr = JSON.stringify(resp);
317
+ } catch {}
228
318
  const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
229
- const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : "";
319
+ const tail50 = jsonStr
320
+ ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
321
+ : "";
230
322
  return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
231
323
  },
232
- });
324
+ };