npm - @mixio-pro/kalaasetu-mcp - Versions diffs - 1.2.2 → 2.0.2-beta - Mend

@mixio-pro/kalaasetu-mcp 1.2.2 → 2.0.2-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/fal-config.json +106 -0
package/package.json +2 -1
package/src/index.ts +5 -9
package/src/tools/fal/config.ts +120 -23
package/src/tools/fal/generate.ts +361 -103
package/src/tools/fal/index.ts +2 -7
package/src/tools/fal/models.ts +157 -32
package/src/tools/gemini.ts +40 -2
package/src/tools/get-status.ts +174 -0
package/src/tools/image-to-video.ts +334 -119
package/src/utils/llm-prompt-enhancer.ts +302 -0
package/src/utils/prompt-enhancer-presets.ts +303 -0
package/src/utils/prompt-enhancer.ts +186 -0

package/src/tools/image-to-video.ts CHANGED Viewed

@@ -2,6 +2,10 @@ import { z } from "zod";
 import { getStorage } from "../storage";
 import { generateTimestampedFilename } from "../utils/filename";
 import { safeToolExecute } from "../utils/tool-wrapper";
+import {
+  resolveEnhancer,
+  listVideoEnhancerPresets,
+} from "../utils/prompt-enhancer-presets";
 import { getGoogleAccessToken } from "../utils/google-auth";
@@ -41,15 +45,17 @@ async function fileToBase64(
 export const imageToVideo = {
   name: "generateVideoi2v",
   description:
-    "Generate professional-quality cinematic videos from a starting image and text prompt using Vertex AI's Veo models. " +
-    "This is a high-latency tool (often takes 5-15 minutes) but produces state-of-the-art results. " +
-    "It supports guided generation with start/end frames and specific durations. " +
+    "Generate professional-quality cinematic videos from a starting image and text prompt using Google's Vertex AI Veo models. " +
+    "This tool follows a 'Synchronous Facade' pattern: it handles polling internally but can be paused/resumed. " +
+    "If the generation takes too long, it returns a 'resume_id' that you MUST use to call this tool again to pick up progress. " +
+    "It produces state-of-the-art cinematic results. " +
     "ONLY USE WHEN WORKING WITH GOOGLE VERTEX AI MODELS.",
   parameters: z.object({
     prompt: z
       .string()
+      .optional()
       .describe(
-        "Descriptive text for the video action and style (e.g., 'A robot walking through a neon city at night')."
+        "Required for new requests. Descriptive text for the video action and style (e.g., 'A robot walking through a neon city at night')."
       ),
     image_path: z
       .string()
@@ -125,24 +131,63 @@ export const imageToVideo = {
         "If true, Vertex will attempt to synthesize synchronized audio for the video."
       )
       .default(false),
+    resume_id: z
+      .string()
+      .optional()
+      .describe(
+        "If provided, the tool will check the status of an existing Vertex operation instead of starting a new one. " +
+          "Use the 'request_id' returned in an 'IN_PROGRESS' response."
+      ),
+    auto_enhance: z
+      .boolean()
+      .optional()
+      .describe(
+        "Whether to automatically enhance the prompt using Veo/LTX guidelines (default: true if enabled via preset or config). Set to false to disable enhancement."
+      ),
+    enhancer_preset: z
+      .string()
+      .optional()
+      .describe(
+        "Optional: Name of a video prompt enhancer preset (e.g., 'veo', 'ltx2', 'cinematic_video'). " +
+          "When using Veo, setting this to 'veo' (or setting auto_enhance=true) will trigger the LLM-based enhancer."
+      ),
   }),
-  timeoutMs: 1200000, // 20 minutes
-  async execute(args: {
-    prompt: string;
-    image_path?: string;
-    last_frame_path?: string;
-    aspect_ratio?: string;
-    duration_seconds?: string;
-    resolution?: string;
-    negative_prompt?: string;
-    person_generation?: string;
-    reference_images?: string[] | string;
-    output_path?: string;
-    project_id?: string;
-    location_id?: string;
-    model_id?: string;
-    generate_audio?: boolean;
-  }) {
+  timeoutMs: 90000, // 90 seconds MCP timeout (internal timeout is 60s)
+  async execute(
+    args: {
+      prompt?: string;
+      image_path?: string;
+      last_frame_path?: string;
+      aspect_ratio?: string;
+      duration_seconds?: string;
+      resolution?: string;
+      negative_prompt?: string;
+      person_generation?: string;
+      reference_images?: string[] | string;
+      output_path?: string;
+      project_id?: string;
+      location_id?: string;
+      model_id?: string;
+      generate_audio?: boolean;
+      resume_id?: string;
+      enhancer_preset?: string;
+      auto_enhance?: boolean;
+    },
+    context?: {
+      reportProgress?: (progress: {
+        progress: number;
+        total: number;
+      }) => Promise<void>;
+      streamContent?: (content: {
+        type: "text";
+        text: string;
+      }) => Promise<void>;
+      log?: {
+        info: (msg: string, data?: any) => void;
+        debug: (msg: string, data?: any) => void;
+      };
+    }
+  ) {
     return safeToolExecute(async () => {
       const projectId = args.project_id || "mixio-pro";
       const location = args.location_id || "us-central1";
@@ -180,130 +225,270 @@ export const imageToVideo = {
       ) {
         durationSeconds = 8;
       }
+      // Stream diagnostic info about auth
+      let token: string;
+      try {
+        if (context?.streamContent) {
+          await context.streamContent({
+            type: "text" as const,
+            text: `[Vertex] Authenticating with Google Cloud (project: ${projectId}, location: ${location})...`,
+          });
+        }
+        token = await getGoogleAccessToken();
+        if (context?.streamContent) {
+          await context.streamContent({
+            type: "text" as const,
+            text: `[Vertex] ✓ Authentication successful. Token acquired.`,
+          });
+        }
+      } catch (authError: any) {
+        const errorMsg = authError?.message || String(authError);
+        if (context?.streamContent) {
+          await context.streamContent({
+            type: "text" as const,
+            text: `[Vertex] ✗ Authentication FAILED: ${errorMsg}. Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth application-default login'.`,
+          });
+        }
+        throw new Error(`Google Cloud authentication failed: ${errorMsg}`);
+      }
-      const token = await getGoogleAccessToken();
-      const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
+      const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
-      let imagePart: any = undefined;
-      if (args.image_path) {
-        const { data, mimeType } = await fileToBase64(args.image_path);
-        imagePart = {
-          image: {
-            bytesBase64Encoded: data,
-            mimeType,
-          },
-        };
+      // If resuming, reconstruct the full operation path from the UUID
+      let operationName: string | undefined;
+      if (args.resume_id) {
+        // Support both UUID-only and full path formats
+        if (args.resume_id.includes("/")) {
+          operationName = args.resume_id; // Already a full path
+        } else {
+          // Reconstruct full path from UUID
+          operationName = `projects/${projectId}/locations/${location}/publishers/google/models/${modelId}/operations/${args.resume_id}`;
+        }
       }
+      let current: any;
-      let lastFramePart: any = undefined;
-      if (args.last_frame_path) {
-        const { data, mimeType } = await fileToBase64(args.last_frame_path);
-        lastFramePart = {
-          lastFrame: {
-            bytesBase64Encoded: data,
-            mimeType,
-          },
-        };
-      }
+      if (!operationName) {
+        if (!args.prompt) {
+          throw new Error("prompt is required when starting a new generation.");
+        }
+        if (context?.streamContent) {
+          await context.streamContent({
+            type: "text" as const,
+            text: `[Vertex] Submitting video generation request to Veo model: ${modelId}...`,
+          });
+        }
-      let referenceImages: any[] | undefined = undefined;
-      if (args.reference_images) {
-        let refImages: string[];
-        if (typeof args.reference_images === "string") {
-          if (
-            args.reference_images.startsWith("[") &&
-            args.reference_images.endsWith("]")
-          ) {
-            try {
-              refImages = JSON.parse(args.reference_images);
-            } catch {
-              throw new Error("Invalid reference_images format");
+        const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
+        let imagePart: any = undefined;
+        if (args.image_path) {
+          const { data, mimeType } = await fileToBase64(args.image_path);
+          imagePart = {
+            image: {
+              bytesBase64Encoded: data,
+              mimeType,
+            },
+          };
+        }
+        let lastFramePart: any = undefined;
+        if (args.last_frame_path) {
+          const { data, mimeType } = await fileToBase64(args.last_frame_path);
+          lastFramePart = {
+            lastFrame: {
+              bytesBase64Encoded: data,
+              mimeType,
+            },
+          };
+        }
+        let referenceImages: any[] | undefined = undefined;
+        if (args.reference_images) {
+          let refImages: string[];
+          if (typeof args.reference_images === "string") {
+            if (
+              args.reference_images.startsWith("[") &&
+              args.reference_images.endsWith("]")
+            ) {
+              try {
+                refImages = JSON.parse(args.reference_images);
+              } catch {
+                throw new Error("Invalid reference_images format");
+              }
+            } else {
+              refImages = [args.reference_images];
             }
+          } else if (Array.isArray(args.reference_images)) {
+            refImages = args.reference_images;
           } else {
-            refImages = [args.reference_images];
+            throw new Error(
+              "Invalid reference_images: must be array or string"
+            );
+          }
+          if (refImages.length > 0) {
+            referenceImages = await Promise.all(
+              refImages.slice(0, 3).map(async (p) => {
+                const { data, mimeType } = await fileToBase64(p);
+                return {
+                  image: {
+                    bytesBase64Encoded: data,
+                    mimeType,
+                  },
+                  referenceType: "asset",
+                };
+              })
+            );
           }
-        } else if (Array.isArray(args.reference_images)) {
-          refImages = args.reference_images;
-        } else {
-          throw new Error("Invalid reference_images: must be array or string");
         }
-        if (refImages.length > 0) {
-          referenceImages = await Promise.all(
-            refImages.slice(0, 3).map(async (p) => {
-              const { data, mimeType } = await fileToBase64(p);
-              return {
-                image: {
-                  bytesBase64Encoded: data,
-                  mimeType,
-                },
-                referenceType: "asset",
-              };
-            })
-          );
+        const personGeneration =
+          args.person_generation ||
+          (args.image_path ? "allow_adult" : "allow_all");
+        // Apply prompt enhancement logic
+        let enhancedPrompt = args.prompt;
+        let enhancedNegativePrompt = args.negative_prompt;
+        // Determine which preset to use
+        let presetToUse = args.enhancer_preset;
+        // If auto_enhance is true and no preset specified, default to 'veo'
+        if (args.auto_enhance === true && !presetToUse) {
+          presetToUse = "veo";
         }
-      }
-      const personGeneration =
-        args.person_generation ||
-        (args.image_path ? "allow_adult" : "allow_all");
-      const instances: any[] = [
-        {
-          prompt: args.prompt,
-          ...(imagePart || {}),
-          ...(lastFramePart || {}),
-          ...(referenceImages ? { referenceImages } : {}),
-        },
-      ];
-      const parameters: any = {
-        aspectRatio: args.aspect_ratio || "9:16",
-        durationSeconds: durationSeconds,
-        resolution: args.resolution || "720p",
-        negativePrompt: args.negative_prompt,
-        generateAudio: args.generate_audio || false,
-        personGeneration,
-      };
+        // Disable enhancement if auto_enhance is explicitly false
+        if (args.auto_enhance === false) {
+          presetToUse = undefined;
+        }
-      const res = await fetch(url, {
-        method: "POST",
-        headers: {
-          Authorization: `Bearer ${token}`,
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({ instances, parameters }),
-      });
+        if (presetToUse && args.prompt) {
+          // Use LLM-based enhancement for 'veo' preset
+          if (presetToUse === "veo") {
+            const { enhancePromptWithLLM, isLLMEnhancerAvailable } =
+              await import("../utils/llm-prompt-enhancer");
+            if (isLLMEnhancerAvailable()) {
+              if (context?.streamContent) {
+                await context.streamContent({
+                  type: "text" as const,
+                  text: `[VEO] Enhancing prompt with Gemini for optimal Veo 3.1 generation...`,
+                });
+              }
+              try {
+                enhancedPrompt = await enhancePromptWithLLM(args.prompt, "veo");
+                context?.log?.info(
+                  `LLM-enhanced prompt for Veo: "${args.prompt}" → "${enhancedPrompt}"`
+                );
+                if (context?.streamContent) {
+                  await context.streamContent({
+                    type: "text" as const,
+                    text: `[VEO] ✓ Prompt enhanced. Length: ${args.prompt.length} → ${enhancedPrompt.length} chars`,
+                  });
+                }
+              } catch (err: any) {
+                context?.log?.info(
+                  `LLM enhancement failed, using original: ${err.message}`
+                );
+              }
+            } else {
+              context?.log?.info(
+                "GEMINI_API_KEY not set, skipping Veo LLM enhancement"
+              );
+            }
+          } else {
+            // Fall back to static string-based enhancement for other presets
+            const enhancer = resolveEnhancer(presetToUse);
+            if (enhancer.hasTransformations()) {
+              enhancedPrompt = enhancer.enhance(args.prompt);
+              // Apply negative elements if not already set
+              const negatives = enhancer.getNegativeElements();
+              if (negatives && !enhancedNegativePrompt) {
+                enhancedNegativePrompt = negatives;
+              }
+            }
+          }
+        }
+        const instances: any[] = [
+          {
+            prompt: enhancedPrompt,
+            ...(imagePart || {}),
+            ...(lastFramePart || {}),
+            ...(referenceImages ? { referenceImages } : {}),
+          },
+        ];
+        const parameters: any = {
+          aspectRatio: args.aspect_ratio || "9:16",
+          durationSeconds: durationSeconds,
+          resolution: args.resolution || "720p",
+          negativePrompt: enhancedNegativePrompt,
+          generateAudio: args.generate_audio || false,
+          personGeneration,
+        };
+        const res = await fetch(url, {
+          method: "POST",
+          headers: {
+            Authorization: `Bearer ${token}`,
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify({ instances, parameters }),
+        });
+        if (!res.ok) {
+          const text = await res.text();
+          throw new Error(`Vertex request failed: ${res.status} ${text}`);
+        }
-      if (!res.ok) {
-        const text = await res.text();
-        throw new Error(`Vertex request failed: ${res.status} ${text}`);
+        const op = (await res.json()) as any;
+        operationName = op.name || op.operation || "";
+        current = op;
       }
-      const op = (await res.json()) as any;
-      const name: string = op.name || op.operation || "";
-      if (!name) {
+      if (!operationName) {
         throw new Error(
           "Vertex did not return an operation name for long-running request"
         );
       }
-      let current = op;
-      let done = !!op.done;
-      let tries = 0;
+      // Extract just the operation UUID from the full path for a cleaner resume_id
+      // Full path: projects/.../operations/<uuid>
+      const operationUuid = operationName.split("/").pop() || operationName;
+      // Stream the resume_id to the LLM immediately (before polling starts)
+      // This way the LLM has it even if MCP client times out during polling
+      if (context?.streamContent) {
+        const isResume = !!args.resume_id;
+        await context.streamContent({
+          type: "text" as const,
+          text: isResume
+            ? `[Vertex] Resuming status check for job: ${operationUuid}`
+            : `[Vertex] Video generation started. resume_id: ${operationUuid} (use this to check status if needed)`,
+        });
+      }
+      // Poll for status - keep polling until done
+      // Resume_id was already streamed, so if MCP client times out the LLM still has it
+      let done = current ? !!current.done || !!current.response : false;
+      const startTime = Date.now();
+      const MAX_POLL_TIME = 60000; // 60 seconds internal timeout - then return resume_id
+      while (!done && Date.now() - startTime < MAX_POLL_TIME) {
+        await wait(10000); // 10 second intervals
-      // Poll using fetchPredictOperation as per Vertex recommendation
-      const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
-      while (!done && tries < 60) {
-        await wait(10000);
         const poll = await fetch(fetchUrl, {
           method: "POST",
           headers: {
             Authorization: `Bearer ${token}`,
             "Content-Type": "application/json",
           },
-          body: JSON.stringify({ operationName: name }),
+          body: JSON.stringify({ operationName }),
         });
         if (!poll.ok) {
           const text = await poll.text();
@@ -313,7 +498,37 @@ export const imageToVideo = {
         }
         current = (await poll.json()) as any;
         done = !!current.done || !!current.response;
-        tries++;
+        if (context?.reportProgress) {
+          const elapsed = Date.now() - startTime;
+          const progressPercent = Math.min(
+            Math.round((elapsed / MAX_POLL_TIME) * 100),
+            99
+          );
+          await context.reportProgress({
+            progress: progressPercent,
+            total: 100,
+          });
+        }
+        if (context?.streamContent && !done) {
+          await context.streamContent({
+            type: "text" as const,
+            text: `[Vertex] Still processing... (${Math.round(
+              (Date.now() - startTime) / 1000
+            )}s elapsed)`,
+          });
+        }
+      }
+      if (!done) {
+        return JSON.stringify({
+          status: "IN_PROGRESS",
+          request_id: operationName,
+          resume_id: operationName,
+          message:
+            "Still in progress. Call this tool again with resume_id to continue checking.",
+        });
       }
       const resp = current.response || current;
@@ -371,7 +586,7 @@ export const imageToVideo = {
       const tail50 = jsonStr
         ? jsonStr.slice(Math.max(0, jsonStr.length - 50))
         : "";
-      return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
+      return `Vertex operation done but no videos array present. operationName=${operationName}. json_head150=${head150} json_tail50=${tail50}`;
     }, "imageToVideo");
   },
 };