npm - @mixio-pro/kalaasetu-mcp - Versions diffs - 1.0.5-beta → 1.0.7 - Mend

@mixio-pro/kalaasetu-mcp 1.0.5-beta → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +6 -2
package/package.json +3 -1
package/src/storage/gcs.ts +116 -0
package/src/storage/index.ts +30 -0
package/src/storage/interface.ts +7 -0
package/src/storage/local.ts +53 -0
package/src/tools/gemini.ts +249 -98
package/src/tools/image-to-video.ts +148 -56

package/README.md CHANGED Viewed

@@ -68,7 +68,9 @@ Add to your Cursor settings (`~/.cursor/config.json` or via Settings → MCP):
       "env": {
         "GEMINI_API_KEY": "your-gemini-api-key",
         "FAL_KEY": "your-fal-api-key",
-        "PERPLEXITY_API_KEY": "your-perplexity-api-key"
+        "PERPLEXITY_API_KEY": "your-perplexity-api-key",
+        "STORAGE_PROVIDER":"gcs",
+        "GCS_BUCKET":"your-gcs-bucket-name"
       }
     }
   }
@@ -88,7 +90,9 @@ Add to your OpenCode MCP configuration:
       "environment": {
         "GEMINI_API_KEY": "your-gemini-api-key",
         "FAL_KEY": "your-fal-api-key",
-        "PERPLEXITY_API_KEY": "your-perplexity-api-key"
+        "PERPLEXITY_API_KEY": "your-perplexity-api-key",
+        "STORAGE_PROVIDER":"gcs",
+        "GCS_BUCKET":"your-bucket-name"
       }
     }
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mixio-pro/kalaasetu-mcp",
-  "version": "1.0.5-beta",
+  "version": "1.0.7",
   "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
   "type": "module",
   "module": "src/index.ts",
@@ -49,8 +49,10 @@
   "dependencies": {
     "@fal-ai/client": "^1.7.2",
     "@google/genai": "^1.28.0",
+    "@types/node": "^24.10.1",
     "@types/wav": "^1.0.4",
     "fastmcp": "^3.22.0",
+    "form-data": "^4.0.5",
     "google-auth-library": "^10.5.0",
     "wav": "^1.0.2",
     "zod": "^4.1.12"

package/src/storage/gcs.ts ADDED Viewed

@@ -0,0 +1,116 @@
+import { GoogleAuth } from "google-auth-library";
+import type { StorageProvider } from "./interface";
+import * as path from "path";
+export class GCSStorageProvider implements StorageProvider {
+  private bucket: string;
+  private auth: GoogleAuth;
+  constructor(bucket: string) {
+    this.bucket = bucket;
+    this.auth = new GoogleAuth({
+      scopes: ["https://www.googleapis.com/auth/cloud-platform"],
+    });
+  }
+  async init(): Promise<void> {
+    console.log(
+      `Initializing GCS Storage Provider with bucket: ${this.bucket}`
+    );
+    // Verify we can get credentials
+    try {
+      await this.auth.getClient();
+    } catch (error) {
+      console.warn(`Warning: Could not initialize GCS client: ${error}`);
+    }
+  }
+  private async getAccessToken(): Promise<string> {
+    const client = await this.auth.getClient();
+    const token = await client.getAccessToken();
+    if (!token.token) {
+      throw new Error("Failed to get GCS access token");
+    }
+    return token.token;
+  }
+  async readFile(filePath: string): Promise<Buffer> {
+    const objectName = path.basename(filePath);
+    const url = `https://storage.googleapis.com/storage/v1/b/${
+      this.bucket
+    }/o/${encodeURIComponent(objectName)}?alt=media`;
+    const token = await this.getAccessToken();
+    const response = await fetch(url, {
+      headers: {
+        Authorization: `Bearer ${token}`,
+      },
+    });
+    if (!response.ok) {
+      throw new Error(
+        `Failed to read file from GCS: ${response.status} ${response.statusText}`
+      );
+    }
+    const arrayBuffer = await response.arrayBuffer();
+    return Buffer.from(arrayBuffer);
+  }
+  async writeFile(filePath: string, data: Buffer | string): Promise<string> {
+    const objectName = path.basename(filePath);
+    const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
+    // Upload using JSON API
+    const url = `https://storage.googleapis.com/upload/storage/v1/b/${
+      this.bucket
+    }/o?uploadType=media&name=${encodeURIComponent(objectName)}`;
+    const token = await this.getAccessToken();
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${token}`,
+        "Content-Type": "application/octet-stream",
+        "Content-Length": buffer.length.toString(),
+      },
+      body: buffer,
+    });
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(
+        `Failed to upload to GCS: ${response.status} ${errorText}`
+      );
+    }
+    // Return public URL
+    return `https://storage.googleapis.com/${this.bucket}/${objectName}`;
+  }
+  async exists(filePath: string): Promise<boolean> {
+    try {
+      const objectName = path.basename(filePath);
+      const url = `https://storage.googleapis.com/storage/v1/b/${
+        this.bucket
+      }/o/${encodeURIComponent(objectName)}`;
+      const token = await this.getAccessToken();
+      const response = await fetch(url, {
+        method: "GET",
+        headers: {
+          Authorization: `Bearer ${token}`,
+        },
+      });
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+  async getPublicUrl(filePath: string): Promise<string> {
+    const objectName = path.basename(filePath);
+    return `https://storage.googleapis.com/${this.bucket}/${objectName}`;
+  }
+}

package/src/storage/index.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import type { StorageProvider } from "./interface";
+import { LocalStorageProvider } from "./local";
+import { GCSStorageProvider } from "./gcs";
+let storageInstance: StorageProvider | null = null;
+export function getStorage(): StorageProvider {
+  if (!storageInstance) {
+    const type = process.env.STORAGE_PROVIDER || "local";
+    console.error(`Initializing storage provider: ${type}`);
+    if (type === "gcs") {
+      const bucket = process.env.GCS_BUCKET;
+      if (!bucket) {
+        throw new Error("GCS_BUCKET is required when using gcs storage");
+      }
+      storageInstance = new GCSStorageProvider(bucket);
+    } else {
+      storageInstance = new LocalStorageProvider(process.cwd());
+    }
+    // Initialize async
+    storageInstance
+      .init()
+      .catch((err) => console.error("Failed to init storage:", err));
+  }
+  return storageInstance;
+}

package/src/storage/interface.ts ADDED Viewed

@@ -0,0 +1,7 @@
+export interface StorageProvider {
+  init(): Promise<void>;
+  readFile(path: string): Promise<Buffer>;
+  writeFile(path: string, data: Buffer | string): Promise<string>; // Returns public URL
+  exists(path: string): Promise<boolean>;
+  getPublicUrl(path: string): Promise<string>;
+}

package/src/storage/local.ts ADDED Viewed

@@ -0,0 +1,53 @@
+import * as fs from "fs";
+import * as path from "path";
+import type { StorageProvider } from "./interface";
+export class LocalStorageProvider implements StorageProvider {
+  private basePath: string;
+  constructor(basePath: string = process.cwd()) {
+    this.basePath = basePath;
+  }
+  async init(): Promise<void> {
+    // No-op for local
+  }
+  async readFile(filePath: string): Promise<Buffer> {
+    let fullPath = filePath;
+    if (!path.isAbsolute(filePath)) {
+      fullPath = path.resolve(this.basePath, filePath);
+    }
+    return fs.promises.readFile(fullPath);
+  }
+  async writeFile(filePath: string, data: Buffer | string): Promise<string> {
+    let fullPath = filePath;
+    if (!path.isAbsolute(filePath)) {
+      fullPath = path.resolve(this.basePath, filePath);
+    }
+    const dir = path.dirname(fullPath);
+    if (!fs.existsSync(dir)) {
+      await fs.promises.mkdir(dir, { recursive: true });
+    }
+    await fs.promises.writeFile(fullPath, data);
+    return fullPath;
+  }
+  async exists(filePath: string): Promise<boolean> {
+    let fullPath = filePath;
+    if (!path.isAbsolute(filePath)) {
+      fullPath = path.resolve(this.basePath, filePath);
+    }
+    return fs.existsSync(fullPath);
+  }
+  async getPublicUrl(filePath: string): Promise<string> {
+    let fullPath = filePath;
+    if (!path.isAbsolute(filePath)) {
+      fullPath = path.resolve(this.basePath, filePath);
+    }
+    return fullPath;
+  }
+}

package/src/tools/gemini.ts CHANGED Viewed

@@ -1,17 +1,26 @@
 import { z } from "zod";
-import { GoogleGenAI, createPartFromUri, createUserContent } from "@google/genai";
+import {
+  GoogleGenAI,
+  createPartFromUri,
+  createUserContent,
+} from "@google/genai";
 import * as fs from "fs";
+import * as path from "path";
+import * as os from "os";
 import * as wav from "wav";
+import { PassThrough } from "stream";
+import { getStorage } from "../storage";
 const ai = new GoogleGenAI({
   apiKey: process.env.GEMINI_API_KEY || "",
 });
-function fileToGenerativePart(filePath: string) {
-  if (!fs.existsSync(filePath)) {
+async function fileToGenerativePart(filePath: string) {
+  const storage = getStorage();
+  if (!(await storage.exists(filePath))) {
     throw new Error(`File not found: ${filePath}`);
   }
-  const imageBytes = fs.readFileSync(filePath);
+  const imageBytes = await storage.readFile(filePath);
   return {
     inlineData: {
       data: Buffer.from(imageBytes).toString("base64"),
@@ -21,22 +30,37 @@ function fileToGenerativePart(filePath: string) {
 }
 // Helper function to save WAV file
-function saveWaveFile(
+// Helper function to save WAV file
+async function saveWaveFile(
   filename: string,
   pcmData: Buffer,
   channels = 1,
   rate = 24000,
-  sampleWidth = 2,
+  sampleWidth = 2
 ): Promise<void> {
   return new Promise((resolve, reject) => {
-    const writer = new wav.FileWriter(filename, {
+    const writer = new wav.Writer({
       channels,
       sampleRate: rate,
       bitDepth: sampleWidth * 8,
     });
-    writer.on('finish', resolve);
-    writer.on('error', reject);
+    const stream = new PassThrough();
+    const chunks: Buffer[] = [];
+    writer.pipe(stream);
+    stream.on("data", (chunk) => chunks.push(chunk));
+    stream.on("end", async () => {
+      try {
+        const wavBuffer = Buffer.concat(chunks);
+        const storage = getStorage();
+        await storage.writeFile(filename, wavBuffer);
+        resolve();
+      } catch (err) {
+        reject(err);
+      }
+    });
+    writer.on("error", reject);
     writer.write(pcmData);
     writer.end();
@@ -45,31 +69,59 @@ function saveWaveFile(
 // Helper function to check if URL is YouTube URL
 function isYouTubeUrl(url: string): boolean {
-  return url.includes('youtube.com/watch') || url.includes('youtu.be');
+  return url.includes("youtube.com/watch") || url.includes("youtu.be");
 }
 // Helper function to get file size in bytes
-function getFileSize(filePath: string): number {
-  const stats = fs.statSync(filePath);
-  return stats.size;
+async function getFileSize(filePath: string): Promise<number> {
+  const storage = getStorage();
+  const buffer = await storage.readFile(filePath);
+  return buffer.length;
 }
+// Helper function to upload file to Gemini API
 // Helper function to upload file to Gemini API
 async function uploadFileToGemini(filePath: string): Promise<any> {
   try {
+    const storage = getStorage();
+    // For Gemini API, we need a local file path.
+    // If storage is local, we can use the path directly (if we can resolve it).
+    // If storage is remote, we must download to a temp file.
+    let localPath = filePath;
+    let isTemp = false;
+    // Check if we can get a local path from storage (hacky check for LocalStorageProvider)
+    // A better way is to always download to temp if not sure, or ask storage for a local path.
+    // For now, let's assume we need to download if it's not a local file system path that exists.
+    if (!fs.existsSync(filePath)) {
+      // Try to read from storage and write to temp
+      const buffer = await storage.readFile(filePath);
+      const tempDir = os.tmpdir();
+      const tempFilePath = path.join(tempDir, path.basename(filePath));
+      fs.writeFileSync(tempFilePath, buffer);
+      localPath = tempFilePath;
+      isTemp = true;
+    }
     const uploadedFile = await ai.files.upload({
-      file: filePath,
+      file: localPath,
     });
+    if (isTemp) {
+      fs.unlinkSync(localPath);
+    }
     // Wait for file processing to complete
     let getFile = await ai.files.get({ name: uploadedFile.name! });
-    while (getFile.state === 'PROCESSING') {
-      await new Promise(resolve => setTimeout(resolve, 3000));
+    while (getFile.state === "PROCESSING") {
+      await new Promise((resolve) => setTimeout(resolve, 3000));
       getFile = await ai.files.get({ name: uploadedFile.name! });
     }
-    if (getFile.state === 'FAILED') {
-      throw new Error('File processing failed');
+    if (getFile.state === "FAILED") {
+      throw new Error("File processing failed");
     }
     return getFile;
@@ -79,41 +131,58 @@ async function uploadFileToGemini(filePath: string): Promise<any> {
 }
 // Helper function to process video input intelligently
-async function processVideoInput(input: string, config?: { fps?: number; startOffset?: string; endOffset?: string }): Promise<any> {
+async function processVideoInput(
+  input: string,
+  config?: { fps?: number; startOffset?: string; endOffset?: string }
+): Promise<any> {
   if (isYouTubeUrl(input)) {
     return {
       fileData: {
         fileUri: input,
-        mimeType: 'video/*',
-        videoMetadata: config ? {
-          fps: config.fps,
-          startOffset: config.startOffset,
-          endOffset: config.endOffset
-        } : undefined
-      }
+        mimeType: "video/*",
+        videoMetadata: config
+          ? {
+              fps: config.fps,
+              startOffset: config.startOffset,
+              endOffset: config.endOffset,
+            }
+          : undefined,
+      },
     };
   } else {
     // Local file processing - use File Upload API
-    if (!fs.existsSync(input)) {
+    const storage = getStorage();
+    if (!(await storage.exists(input))) {
       throw new Error(`Video file not found: ${input}`);
     }
     // Upload file to Gemini API
     const uploadedFile = await uploadFileToGemini(input);
     return uploadedFile;
   }
 }
 export const geminiTextToImage = {
-  name: "geminiTextToImage",
-  description: "Generate images from text prompts using Gemini 2.5 Flash Image model",
+  name: "generateImage",
+  description:
+    "Generate images from text prompts using Gemini 2.5 Flash Image model",
   parameters: z.object({
     prompt: z.string().describe("Text description of the image to generate"),
-    aspect_ratio: z.string().optional().describe("Aspect ratio: 1:1, 3:4, 4:3, 9:16, or 16:9"),
-    output_path: z.string().optional().describe("File path to save the generated image"),
+    aspect_ratio: z
+      .string()
+      .optional()
+      .describe("Aspect ratio: 1:1, 3:4, 4:3, 9:16, or 16:9"),
+    output_path: z
+      .string()
+      .optional()
+      .describe("File path to save the generated image"),
   }),
-  execute: async (args: { prompt: string; aspect_ratio?: string; output_path?: string }) => {
+  execute: async (args: {
+    prompt: string;
+    aspect_ratio?: string;
+    output_path?: string;
+  }) => {
     try {
       const response = await ai.models.generateContent({
         model: "gemini-2.5-flash-image",
@@ -133,10 +202,17 @@ export const geminiTextToImage = {
           } else if (part.inlineData?.data) {
             const imageData = part.inlineData.data;
             if (args.output_path) {
-              fs.writeFileSync(args.output_path, Buffer.from(imageData, "base64"));
-              result += `\nImage saved to: ${args.output_path}`;
+              const storage = getStorage();
+              const url = await storage.writeFile(
+                args.output_path,
+                Buffer.from(imageData, "base64")
+              );
+              result += `\nImage saved to: ${url}`;
             } else {
-              result += `\nGenerated image (base64): ${imageData.substring(0, 100)}...`;
+              result += `\nGenerated image (base64): ${imageData.substring(
+                0,
+                100
+              )}...`;
             }
           }
         }
@@ -149,21 +225,34 @@ export const geminiTextToImage = {
 };
 export const geminiEditImage = {
-  name: "geminiEditImage",
-  description: "Edit existing images with text instructions using Gemini 2.5 Flash Image Preview",
+  name: "editImage",
+  description:
+    "Edit existing images with text instructions using Gemini 2.5 Flash Image Preview",
   parameters: z.object({
     image_path: z.string().describe("Path to the source image file"),
     prompt: z.string().describe("Text instructions for editing the image"),
-    output_path: z.string().optional().describe("File path to save the edited image"),
-    reference_images: z.array(z.string()).optional().describe("Additional image paths for reference"),
+    output_path: z
+      .string()
+      .optional()
+      .describe("File path to save the edited image"),
+    reference_images: z
+      .array(z.string())
+      .optional()
+      .describe("Additional image paths for reference"),
   }),
-  execute: async (args: { image_path: string; prompt: string; output_path?: string; reference_images?: string[] }) => {
+  execute: async (args: {
+    image_path: string;
+    prompt: string;
+    output_path?: string;
+    reference_images?: string[];
+  }) => {
     try {
-      const contents: any[] = [args.prompt, fileToGenerativePart(args.image_path)];
+      const imagePart = await fileToGenerativePart(args.image_path);
+      const contents: any[] = [args.prompt, imagePart];
       if (args.reference_images) {
         for (const refPath of args.reference_images) {
-          contents.push(fileToGenerativePart(refPath));
+          contents.push(await fileToGenerativePart(refPath));
         }
       }
@@ -180,10 +269,17 @@ export const geminiEditImage = {
           } else if (part.inlineData?.data) {
             const imageData = part.inlineData.data;
             if (args.output_path) {
-              fs.writeFileSync(args.output_path, Buffer.from(imageData, "base64"));
-              result += `\nEdited image saved to: ${args.output_path}`;
+              const storage = getStorage();
+              const url = await storage.writeFile(
+                args.output_path,
+                Buffer.from(imageData, "base64")
+              );
+              result += `\nEdited image saved to: ${url}`;
             } else {
-              result += `\nEdited image (base64): ${imageData.substring(0, 100)}...`;
+              result += `\nEdited image (base64): ${imageData.substring(
+                0,
+                100
+              )}...`;
             }
           }
         }
@@ -196,10 +292,13 @@ export const geminiEditImage = {
 };
 export const geminiAnalyzeImages = {
-  name: "geminiAnalyzeImages",
-  description: "Analyze and describe images using Gemini 2.5 Pro with advanced multimodal understanding",
+  name: "analyzeImages",
+  description:
+    "Analyze and describe images using Gemini 2.5 Pro with advanced multimodal understanding",
   parameters: z.object({
-    image_paths: z.array(z.string()).describe("Array of image file paths to analyze"),
+    image_paths: z
+      .array(z.string())
+      .describe("Array of image file paths to analyze"),
     prompt: z.string().describe("Text prompt or question about the images"),
   }),
   execute: async (args: { image_paths: string[]; prompt: string }) => {
@@ -208,12 +307,12 @@ export const geminiAnalyzeImages = {
       if (!args.image_paths) {
         throw new Error("Image paths not provided");
       }
       // Convert to array if passed as string
       let imagePaths: string[];
-      if (typeof args.image_paths === 'string') {
+      if (typeof args.image_paths === "string") {
         const strValue = args.image_paths as string;
-        if (strValue.startsWith('[') && strValue.endsWith(']')) {
+        if (strValue.startsWith("[") && strValue.endsWith("]")) {
           try {
             imagePaths = JSON.parse(strValue);
           } catch {
@@ -227,15 +326,15 @@ export const geminiAnalyzeImages = {
       } else {
         throw new Error("Invalid image_paths: must be array or string");
       }
       if (imagePaths.length === 0) {
         throw new Error("At least one image path must be provided");
       }
       const contents: any[] = [args.prompt];
       for (const imagePath of imagePaths) {
-        contents.push(fileToGenerativePart(imagePath));
+        contents.push(await fileToGenerativePart(imagePath));
       }
       const response = await ai.models.generateContent({
@@ -259,43 +358,59 @@ export const geminiAnalyzeImages = {
 };
 export const geminiSingleSpeakerTts = {
-  name: "geminiSingleSpeakerTts",
-  description: "Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
+  name: "generateSpeech",
+  description:
+    "Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
   parameters: z.object({
     text: z.string().describe("Text to convert to speech"),
-    voice_name: z.string().describe("Voice name from supported options. Use Kore, Erinome or Despina for the female voices and Enceladus for male."),
-    output_path: z.string().optional().describe("Output WAV file path (optional, defaults to timestamp-based filename)"),
+    voice_name: z
+      .string()
+      .describe(
+        "Voice name from supported options. Use Kore, Erinome or Despina for the female voices and Enceladus for male."
+      ),
+    output_path: z
+      .string()
+      .optional()
+      .describe(
+        "Output WAV file path (optional, defaults to timestamp-based filename)"
+      ),
   }),
-  execute: async (args: { text: string; voice_name: string; output_path?: string }) => {
+  execute: async (args: {
+    text: string;
+    voice_name: string;
+    output_path?: string;
+  }) => {
     try {
       const response = await ai.models.generateContent({
         model: "gemini-2.5-pro-preview-tts",
         contents: [{ parts: [{ text: args.text }] }],
         config: {
-          responseModalities: ['AUDIO'],
+          responseModalities: ["AUDIO"],
           speechConfig: {
             voiceConfig: {
-              prebuiltVoiceConfig: {
-                voiceName: args.voice_name || 'Despina'
+              prebuiltVoiceConfig: {
+                voiceName: args.voice_name || "Despina",
               },
             },
           },
         },
       });
-      const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
+      const data =
+        response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
       if (!data) {
         throw new Error("No audio data received from Gemini API");
       }
-      const audioBuffer = Buffer.from(data, 'base64');
+      const audioBuffer = Buffer.from(data, "base64");
       // Generate output filename if not provided
       const outputPath = args.output_path || `voice_output_${Date.now()}.wav`;
-      await saveWaveFile(outputPath, audioBuffer);
-      return `Audio generated successfully: ${outputPath}`;
+      const storage = getStorage();
+      const url = await storage.writeFile(outputPath, audioBuffer);
+      return `Audio generated successfully: ${url}`;
     } catch (error: any) {
       throw new Error(`Voice generation failed: ${error.message}`);
     }
@@ -303,28 +418,60 @@ export const geminiSingleSpeakerTts = {
 };
 export const geminiAnalyzeVideos = {
-  name: "geminiAnalyzeVideos",
-  description: "Analyze and understand video content using Gemini 2.5 Flash model. Intelligently handles YouTube URLs and local videos (files <20MB processed inline, ≥20MB uploaded via File API). Supports timestamp queries, clipping, and custom frame rates with default 5 FPS for local videos to optimize processing.",
+  name: "analyzeVideos",
+  description:
+    "Analyze and understand video content using Gemini 2.5 Flash model. Intelligently handles YouTube URLs and local videos (files <20MB processed inline, ≥20MB uploaded via File API). Supports timestamp queries, clipping, and custom frame rates with default 5 FPS for local videos to optimize processing.",
   parameters: z.object({
-    video_inputs: z.array(z.string()).describe("Array of video inputs - mix of local file paths and YouTube URLs (max 10 videos). Local files <20MB processed inline, larger files uploaded via File API automatically."),
-    prompt: z.string().describe("Text prompt or question about the videos. Use MM:SS format for timestamp references (e.g., 'What happens at 01:30?')."),
-    fps: z.number().optional().describe("Frame rate for video processing (default: 5 FPS for local videos to reduce file size, 1 FPS for YouTube URLs)"),
-    start_offset: z.string().optional().describe("Clip start time in seconds with 's' suffix (e.g., '40s')"),
-    end_offset: z.string().optional().describe("Clip end time in seconds with 's' suffix (e.g., '80s')"),
-    media_resolution: z.string().optional().describe("Media resolution: 'default' or 'low' (low resolution uses ~100 tokens/sec vs 300 tokens/sec)"),
+    video_inputs: z
+      .array(z.string())
+      .describe(
+        "Array of video inputs - mix of local file paths and YouTube URLs (max 10 videos). Local files <20MB processed inline, larger files uploaded via File API automatically."
+      ),
+    prompt: z
+      .string()
+      .describe(
+        "Text prompt or question about the videos. Use MM:SS format for timestamp references (e.g., 'What happens at 01:30?')."
+      ),
+    fps: z
+      .number()
+      .optional()
+      .describe(
+        "Frame rate for video processing (default: 5 FPS for local videos to reduce file size, 1 FPS for YouTube URLs)"
+      ),
+    start_offset: z
+      .string()
+      .optional()
+      .describe("Clip start time in seconds with 's' suffix (e.g., '40s')"),
+    end_offset: z
+      .string()
+      .optional()
+      .describe("Clip end time in seconds with 's' suffix (e.g., '80s')"),
+    media_resolution: z
+      .string()
+      .optional()
+      .describe(
+        "Media resolution: 'default' or 'low' (low resolution uses ~100 tokens/sec vs 300 tokens/sec)"
+      ),
   }),
-  execute: async (args: { video_inputs: string[]; prompt: string; fps?: number; start_offset?: string; end_offset?: string; media_resolution?: string }) => {
+  execute: async (args: {
+    video_inputs: string[];
+    prompt: string;
+    fps?: number;
+    start_offset?: string;
+    end_offset?: string;
+    media_resolution?: string;
+  }) => {
     try {
       // Handle array parsing
       if (!args.video_inputs) {
         throw new Error("Video inputs not provided");
       }
       // Convert to array if passed as string
       let videoInputs: string[];
-      if (typeof args.video_inputs === 'string') {
+      if (typeof args.video_inputs === "string") {
         const strValue = args.video_inputs as string;
-        if (strValue.startsWith('[') && strValue.endsWith(']')) {
+        if (strValue.startsWith("[") && strValue.endsWith("]")) {
           try {
             videoInputs = JSON.parse(strValue);
           } catch {
@@ -338,43 +485,47 @@ export const geminiAnalyzeVideos = {
       } else {
         throw new Error("Invalid video_inputs: must be array or string");
       }
       if (videoInputs.length === 0) {
         throw new Error("At least one video input must be provided");
       }
       if (videoInputs.length > 10) {
-        throw new Error("Maximum 10 videos per request allowed for Gemini 2.5+ models");
+        throw new Error(
+          "Maximum 10 videos per request allowed for Gemini 2.5+ models"
+        );
       }
       // Prepare video parts for content
       const videoParts: any[] = [];
       // Process each video input
       for (const videoInput of videoInputs) {
         const videoConfig = {
           fps: args.fps || (isYouTubeUrl(videoInput) ? 1 : 5), // Default 5 FPS for local, 1 FPS for YouTube
           startOffset: args.start_offset,
-          endOffset: args.end_offset
+          endOffset: args.end_offset,
         };
         const videoPart = await processVideoInput(videoInput, videoConfig);
         videoParts.push(videoPart);
       }
       // Build content using createUserContent and createPartFromUri for uploaded files
       const contentParts: any[] = [args.prompt];
       for (const videoPart of videoParts) {
         if (videoPart.uri && videoPart.mimeType) {
-          contentParts.push(createPartFromUri(videoPart.uri, videoPart.mimeType));
+          contentParts.push(
+            createPartFromUri(videoPart.uri, videoPart.mimeType)
+          );
         }
       }
       const finalContents = createUserContent(contentParts);
       const response = await ai.models.generateContent({
-        model: 'gemini-2.5-pro',
+        model: "gemini-2.5-pro",
         contents: finalContents,
       });
@@ -386,7 +537,7 @@ export const geminiAnalyzeVideos = {
           }
         }
       }
       return result || "Video analysis completed but no text response received";
     } catch (error: any) {
       throw new Error(`Video analysis failed: ${error.message}`);

package/src/tools/image-to-video.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-// @ts-nocheck
 import * as fs from "fs";
 import { GoogleAuth } from "google-auth-library";
 import { exec } from "child_process";
 import * as path from "path";
 import { z } from "zod";
+import { getStorage } from "../storage";
 async function wait(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
@@ -11,7 +11,9 @@ async function wait(ms: number): Promise<void> {
 async function fetchAccessToken(): Promise<string> {
   try {
-    const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"] });
+    const auth = new GoogleAuth({
+      scopes: ["https://www.googleapis.com/auth/cloud-platform"],
+    });
     const client = await auth.getClient();
     const token = await client.getAccessToken();
     if (!token || typeof token !== "string") {
@@ -23,12 +25,22 @@ async function fetchAccessToken(): Promise<string> {
     return await new Promise((resolve, reject) => {
       exec("gcloud auth print-access-token", (err, stdout, stderr) => {
         if (err) {
-          reject(new Error(`Failed to fetch an access token (ADC and gcloud): ${stderr || err.message}`));
+          reject(
+            new Error(
+              `Failed to fetch an access token (ADC and gcloud): ${
+                stderr || err.message
+              }`
+            )
+          );
           return;
         }
         const t = (stdout || "").trim();
         if (!t) {
-          reject(new Error("Failed to fetch an access token: empty token from gcloud"));
+          reject(
+            new Error(
+              "Failed to fetch an access token: empty token from gcloud"
+            )
+          );
           return;
         }
         resolve(t);
@@ -37,37 +49,102 @@ async function fetchAccessToken(): Promise<string> {
   }
 }
-function fileToBase64(path: string): { data: string; mimeType: string } {
-  if (!fs.existsSync(path)) {
-    throw new Error(`File not found: ${path}`);
+async function fileToBase64(
+  filePath: string
+): Promise<{ data: string; mimeType: string }> {
+  const storage = getStorage();
+  if (!(await storage.exists(filePath))) {
+    throw new Error(`File not found: ${filePath}`);
   }
-  const buf = fs.readFileSync(path);
+  const buf = await storage.readFile(filePath);
   const data = Buffer.from(buf).toString("base64");
   // Default to PNG if not sure, similar to existing code
   const mimeType = "image/png";
   return { data, mimeType };
 }
-export const imageToVideo = ({
-  name: "image_to_video",
-  description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
+export const imageToVideo = {
+  name: "generateVideoi2v",
+  description:
+    "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
   parameters: z.object({
     prompt: z.string().describe("Text description for the video"),
-    image_path: z.string().optional().describe("Path to source image for image-to-video generation"),
-    last_frame_path: z.string().optional().describe("Path to last frame image to guide ending frame (optional)"),
-    aspect_ratio: z.string().optional().describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
-    duration_seconds: z.string().optional().describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
-    resolution: z.string().optional().describe("Video resolution: '720p' or '1080p' (default: '720p')"),
-    negative_prompt: z.string().optional().describe("Text describing what not to include in the video"),
-    person_generation: z.string().optional().describe("Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"),
-    reference_images: z.array(z.string()).optional().describe("Additional image paths for reference (max 3)"),
-    output_path: z.string().optional().describe("Output MP4 file path (if multiple predictions, index suffix is added)"),
-    project_id: z.string().optional().describe("GCP Project ID (default: mixio-pro)"),
-    location_id: z.string().optional().describe("Vertex region (default: us-central1)"),
-    model_id: z.string().optional().describe("Model ID (default: veo-3.1-fast-generate-preview)"),
-    generate_audio: z.boolean().optional().describe("Boolean flag to enable generation of audio along with the video").default(false)
+    image_path: z
+      .string()
+      .optional()
+      .describe("Path to source image for image-to-video generation"),
+    last_frame_path: z
+      .string()
+      .optional()
+      .describe("Path to last frame image to guide ending frame (optional)"),
+    aspect_ratio: z
+      .string()
+      .optional()
+      .describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
+    duration_seconds: z
+      .string()
+      .optional()
+      .describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
+    resolution: z
+      .string()
+      .optional()
+      .describe("Video resolution: '720p' or '1080p' (default: '720p')"),
+    negative_prompt: z
+      .string()
+      .optional()
+      .describe("Text describing what not to include in the video"),
+    person_generation: z
+      .string()
+      .optional()
+      .describe(
+        "Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"
+      ),
+    reference_images: z
+      .array(z.string())
+      .optional()
+      .describe("Additional image paths for reference (max 3)"),
+    output_path: z
+      .string()
+      .optional()
+      .describe(
+        "Output MP4 file path (if multiple predictions, index suffix is added)"
+      ),
+    project_id: z
+      .string()
+      .optional()
+      .describe("GCP Project ID (default: mixio-pro)"),
+    location_id: z
+      .string()
+      .optional()
+      .describe("Vertex region (default: us-central1)"),
+    model_id: z
+      .string()
+      .optional()
+      .describe("Model ID (default: veo-3.1-fast-generate-preview)"),
+    generate_audio: z
+      .boolean()
+      .optional()
+      .describe(
+        "Boolean flag to enable generation of audio along with the video"
+      )
+      .default(false),
   }),
-  async execute(args) {
+  async execute(args: {
+    prompt: string;
+    image_path?: string;
+    last_frame_path?: string;
+    aspect_ratio?: string;
+    duration_seconds?: string;
+    resolution?: string;
+    negative_prompt?: string;
+    person_generation?: string;
+    reference_images?: string[] | string;
+    output_path?: string;
+    project_id?: string;
+    location_id?: string;
+    model_id?: string;
+    generate_audio?: boolean;
+  }) {
     const projectId = args.project_id || "mixio-pro";
     const location = args.location_id || "us-central1";
     const modelId = args.model_id || "veo-3.1-fast-generate-preview";
@@ -78,7 +155,7 @@ export const imageToVideo = ({
     let imagePart: any = undefined;
     if (args.image_path) {
-      const { data, mimeType } = fileToBase64(args.image_path);
+      const { data, mimeType } = await fileToBase64(args.image_path);
       imagePart = {
         image: {
           bytesBase64Encoded: data,
@@ -89,7 +166,7 @@ export const imageToVideo = ({
     let lastFramePart: any = undefined;
     if (args.last_frame_path) {
-      const { data, mimeType } = fileToBase64(args.last_frame_path);
+      const { data, mimeType } = await fileToBase64(args.last_frame_path);
       lastFramePart = {
         lastFrame: {
           bytesBase64Encoded: data,
@@ -102,7 +179,10 @@ export const imageToVideo = ({
     if (args.reference_images) {
       let refImages: string[];
       if (typeof args.reference_images === "string") {
-        if (args.reference_images.startsWith("[") && args.reference_images.endsWith("]")) {
+        if (
+          args.reference_images.startsWith("[") &&
+          args.reference_images.endsWith("]")
+        ) {
           try {
             refImages = JSON.parse(args.reference_images);
           } catch {
@@ -118,20 +198,23 @@ export const imageToVideo = ({
       }
       if (refImages.length > 0) {
-        referenceImages = refImages.slice(0, 3).map((p) => {
-          const { data, mimeType } = fileToBase64(p);
-          return {
-            image: {
-              bytesBase64Encoded: data,
-              mimeType,
-            },
-            referenceType: "asset",
-          };
-        });
+        referenceImages = await Promise.all(
+          refImages.slice(0, 3).map(async (p) => {
+            const { data, mimeType } = await fileToBase64(p);
+            return {
+              image: {
+                bytesBase64Encoded: data,
+                mimeType,
+              },
+              referenceType: "asset",
+            };
+          })
+        );
       }
     }
-    const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
+    const personGeneration =
+      args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
     const instances: any[] = [
       {
@@ -144,7 +227,7 @@ export const imageToVideo = ({
     const parameters: any = {
       aspectRatio: args.aspect_ratio || "9:16",
-      durationSeconds: parseInt(args.duration_seconds) || 6,
+      durationSeconds: parseInt(args.duration_seconds || "6") || 6,
       resolution: args.resolution || "720p",
       negativePrompt: args.negative_prompt,
       generateAudio: args.generate_audio || false,
@@ -165,10 +248,12 @@ export const imageToVideo = ({
       throw new Error(`Vertex request failed: ${res.status} ${text}`);
     }
-    const op = await res.json();
+    const op = (await res.json()) as any;
     const name: string = op.name || op.operation || "";
     if (!name) {
-      throw new Error("Vertex did not return an operation name for long-running request");
+      throw new Error(
+        "Vertex did not return an operation name for long-running request"
+      );
     }
     let current = op;
@@ -191,7 +276,7 @@ export const imageToVideo = ({
         const text = await poll.text();
         throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
       }
-      current = await poll.json();
+      current = (await poll.json()) as any;
       done = !!current.done || !!current.response;
       tries++;
     }
@@ -199,34 +284,41 @@ export const imageToVideo = ({
     const resp = current.response || current;
     // Decode from response.videos[].bytesBase64Encoded only
     const outputs: string[] = [];
-    const saveVideo = (base64: string, index: number) => {
+    const saveVideo = async (base64: string, index: number) => {
       if (!base64) return;
       const filePath = args.output_path
-        ? (index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`))
-        : `video_output_${Date.now()}${index === 0 ? '' : '_' + index}.mp4`;
-      const absPath = path.resolve(filePath);
-      const buf = Buffer.from(base64, 'base64');
-      fs.writeFileSync(absPath, buf);
-      outputs.push(absPath);
+        ? index === 0
+          ? args.output_path
+          : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`)
+        : `video_output_${Date.now()}${index === 0 ? "" : "_" + index}.mp4`;
+      const buf = Buffer.from(base64, "base64");
+      const storage = getStorage();
+      const url = await storage.writeFile(filePath, buf);
+      outputs.push(url);
     };
     if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
       for (let i = 0; i < resp.videos.length; i++) {
         const v = resp.videos[i] || {};
-        if (typeof v.bytesBase64Encoded === 'string') {
-          saveVideo(v.bytesBase64Encoded, i);
+        if (typeof v.bytesBase64Encoded === "string") {
+          await saveVideo(v.bytesBase64Encoded, i);
         }
       }
     }
     if (outputs.length > 0) {
-      return `Video(s) saved: ${outputs.join(', ')}`;
+      return `Video(s) saved to: ${outputs.join(", ")}`;
     }
     // If nothing saved, return a concise summary plus head/tail snippets of JSON
     let jsonStr = "";
-    try { jsonStr = JSON.stringify(resp); } catch {}
+    try {
+      jsonStr = JSON.stringify(resp);
+    } catch {}
     const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
-    const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : "";
+    const tail50 = jsonStr
+      ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
+      : "";
     return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
   },
-});
+};