npm - @mixio-pro/kalaasetu-mcp - Versions diffs - 2.1.0 → 2.1.1-beta - Mend

@mixio-pro/kalaasetu-mcp 2.1.0 → 2.1.1-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/package.json +6 -2
package/src/index.ts +4 -3
package/src/storage/index.ts +4 -3
package/src/tools/fal/config.ts +9 -8
package/src/tools/fal/dynamic-tools.ts +214 -237
package/src/tools/fal/models.ts +115 -93
package/src/tools/fal/storage.ts +66 -61
package/src/tools/gemini.ts +302 -281
package/src/tools/get-status.ts +50 -46
package/src/tools/image-to-video.ts +309 -300
package/src/tools/perplexity.ts +188 -172
package/src/tools/youtube.ts +45 -41
package/src/utils/llm-prompt-enhancer.ts +3 -2
package/src/utils/logger.ts +71 -0
package/src/utils/openmeter.ts +123 -0
package/src/utils/prompt-enhancer-presets.ts +7 -5
package/src/utils/remote-sync.ts +19 -10
package/src/utils/tool-credits.ts +104 -0
package/src/utils/tool-wrapper.ts +37 -6
package/src/utils/url-file.ts +4 -3
package/src/test-context.ts +0 -52
package/src/test-error-handling.ts +0 -31
package/src/tools/image-to-video.sdk-backup.ts +0 -218

package/src/tools/image-to-video.ts CHANGED Viewed

@@ -179,357 +179,366 @@ export const imageToVideo = {
       };
     },
   ) {
-    return safeToolExecute(async () => {
-      const projectId = "mixio-pro";
-      const location = "us-central1";
-      const modelId = args.model_id || "veo-3.1-fast-generate-preview";
-      // Validate and parse duration_seconds - snap to nearest 4, 6, or 8
-      let durationSeconds = parseInt(args.duration_seconds || "6");
-      if (isNaN(durationSeconds)) durationSeconds = 6;
-      const validDurations = [4, 6, 8];
-      // Find nearest valid duration
-      durationSeconds = validDurations.reduce((prev, curr) => {
-        return Math.abs(curr - durationSeconds) <
-          Math.abs(prev - durationSeconds)
-          ? curr
-          : prev;
-      });
-      // Tie-breaking: if equidistant (e.g. 5), the reduce above keeps the first one (4) because < is strict.
-      // However, user requested "nearest duration with the ceil", effectively meaning round up if equidistant.
-      // Let's explicitly handle the equidistant cases or just use a custom finder.
-      // 5 -> equidistant to 4 and 6. "With ceil" implies 6.
-      // 7 -> equidistant to 6 and 8. "With ceil" implies 8.
-      // Simpler logic for these specific values:
-      if (
-        durationSeconds === 4 &&
-        parseInt(args.duration_seconds || "6") === 5
-      ) {
-        durationSeconds = 6;
-      }
-      if (
-        durationSeconds === 6 &&
-        parseInt(args.duration_seconds || "6") === 7
-      ) {
-        durationSeconds = 8;
-      }
-      // Stream diagnostic info about auth
-      let token: string;
-      try {
-        if (context?.streamContent) {
-          await context.streamContent({
-            type: "text" as const,
-            text: `[Vertex] Authenticating with Google Cloud (project: ${projectId}, location: ${location})...`,
-          });
+    return safeToolExecute(
+      async () => {
+        const projectId = "mixio-pro";
+        const location = "us-central1";
+        const modelId = args.model_id || "veo-3.1-fast-generate-preview";
+        // Validate and parse duration_seconds - snap to nearest 4, 6, or 8
+        let durationSeconds = parseInt(args.duration_seconds || "6");
+        if (isNaN(durationSeconds)) durationSeconds = 6;
+        const validDurations = [4, 6, 8];
+        // Find nearest valid duration
+        durationSeconds = validDurations.reduce((prev, curr) => {
+          return Math.abs(curr - durationSeconds) <
+            Math.abs(prev - durationSeconds)
+            ? curr
+            : prev;
+        });
+        // Tie-breaking: if equidistant (e.g. 5), the reduce above keeps the first one (4) because < is strict.
+        // However, user requested "nearest duration with the ceil", effectively meaning round up if equidistant.
+        // Let's explicitly handle the equidistant cases or just use a custom finder.
+        // 5 -> equidistant to 4 and 6. "With ceil" implies 6.
+        // 7 -> equidistant to 6 and 8. "With ceil" implies 8.
+        // Simpler logic for these specific values:
+        if (
+          durationSeconds === 4 &&
+          parseInt(args.duration_seconds || "6") === 5
+        ) {
+          durationSeconds = 6;
         }
-        token = await getGoogleAccessToken();
-        if (context?.streamContent) {
-          await context.streamContent({
-            type: "text" as const,
-            text: `[Vertex] ✓ Authentication successful. Token acquired.`,
-          });
+        if (
+          durationSeconds === 6 &&
+          parseInt(args.duration_seconds || "6") === 7
+        ) {
+          durationSeconds = 8;
         }
-      } catch (authError: any) {
-        const errorMsg = authError?.message || String(authError);
-        if (context?.streamContent) {
-          await context.streamContent({
-            type: "text" as const,
-            text: `[Vertex] ✗ Authentication FAILED: ${errorMsg}. Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth application-default login'.`,
-          });
-        }
-        throw new Error(`Google Cloud authentication failed: ${errorMsg}`);
-      }
-      const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
-      // If resuming, reconstruct the full operation path from the UUID
-      let operationName: string | undefined;
-      if (args.resume_endpoint) {
-        // Support both UUID-only and full path formats
-        if (args.resume_endpoint.includes("/")) {
-          operationName = args.resume_endpoint; // Already a full path
-        } else {
-          // Reconstruct full path from UUID
-          operationName = `projects/${projectId}/locations/${location}/publishers/google/models/${modelId}/operations/${args.resume_endpoint}`;
+        // Stream diagnostic info about auth
+        let token: string;
+        try {
+          if (context?.streamContent) {
+            await context.streamContent({
+              type: "text" as const,
+              text: `[Vertex] Authenticating with Google Cloud (project: ${projectId}, location: ${location})...`,
+            });
+          }
+          token = await getGoogleAccessToken();
+          if (context?.streamContent) {
+            await context.streamContent({
+              type: "text" as const,
+              text: `[Vertex] ✓ Authentication successful. Token acquired.`,
+            });
+          }
+        } catch (authError: any) {
+          const errorMsg = authError?.message || String(authError);
+          if (context?.streamContent) {
+            await context.streamContent({
+              type: "text" as const,
+              text: `[Vertex] ✗ Authentication FAILED: ${errorMsg}. Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth application-default login'.`,
+            });
+          }
+          throw new Error(`Google Cloud authentication failed: ${errorMsg}`);
         }
-      }
-      let current: any;
-      if (!operationName) {
-        if (!args.prompt) {
-          throw new Error("prompt is required when starting a new generation.");
-        }
+        const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
-        if (context?.streamContent) {
-          await context.streamContent({
-            type: "text" as const,
-            text: `[Vertex] Submitting video generation request to Veo model: ${modelId}...`,
-          });
+        // If resuming, reconstruct the full operation path from the UUID
+        let operationName: string | undefined;
+        if (args.resume_endpoint) {
+          // Support both UUID-only and full path formats
+          if (args.resume_endpoint.includes("/")) {
+            operationName = args.resume_endpoint; // Already a full path
+          } else {
+            // Reconstruct full path from UUID
+            operationName = `projects/${projectId}/locations/${location}/publishers/google/models/${modelId}/operations/${args.resume_endpoint}`;
+          }
         }
+        let current: any;
-        const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
+        if (!operationName) {
+          if (!args.prompt) {
+            throw new Error(
+              "prompt is required when starting a new generation.",
+            );
+          }
-        let imagePart: any = undefined;
-        if (args.image_path) {
-          const { data, mimeType } = await fileToBase64(args.image_path);
-          imagePart = {
-            image: {
-              bytesBase64Encoded: data,
-              mimeType,
-            },
-          };
-        }
+          if (context?.streamContent) {
+            await context.streamContent({
+              type: "text" as const,
+              text: `[Vertex] Submitting video generation request to Veo model: ${modelId}...`,
+            });
+          }
-        let lastFramePart: any = undefined;
-        if (args.last_frame_path) {
-          const { data, mimeType } = await fileToBase64(args.last_frame_path);
-          lastFramePart = {
-            lastFrame: {
-              bytesBase64Encoded: data,
-              mimeType,
-            },
-          };
-        }
+          const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
+          let imagePart: any = undefined;
+          if (args.image_path) {
+            const { data, mimeType } = await fileToBase64(args.image_path);
+            imagePart = {
+              image: {
+                bytesBase64Encoded: data,
+                mimeType,
+              },
+            };
+          }
+          let lastFramePart: any = undefined;
+          if (args.last_frame_path) {
+            const { data, mimeType } = await fileToBase64(args.last_frame_path);
+            lastFramePart = {
+              lastFrame: {
+                bytesBase64Encoded: data,
+                mimeType,
+              },
+            };
+          }
-        let referenceImages: any[] | undefined = undefined;
-        if (args.reference_images) {
-          let refImages: string[];
-          if (typeof args.reference_images === "string") {
-            if (
-              args.reference_images.startsWith("[") &&
-              args.reference_images.endsWith("]")
-            ) {
-              try {
-                refImages = JSON.parse(args.reference_images);
-              } catch {
-                throw new Error("Invalid reference_images format");
+          let referenceImages: any[] | undefined = undefined;
+          if (args.reference_images) {
+            let refImages: string[];
+            if (typeof args.reference_images === "string") {
+              if (
+                args.reference_images.startsWith("[") &&
+                args.reference_images.endsWith("]")
+              ) {
+                try {
+                  refImages = JSON.parse(args.reference_images);
+                } catch {
+                  throw new Error("Invalid reference_images format");
+                }
+              } else {
+                refImages = [args.reference_images];
               }
+            } else if (Array.isArray(args.reference_images)) {
+              refImages = args.reference_images;
             } else {
-              refImages = [args.reference_images];
+              throw new Error(
+                "Invalid reference_images: must be array or string",
+              );
             }
-          } else if (Array.isArray(args.reference_images)) {
-            refImages = args.reference_images;
-          } else {
-            throw new Error(
-              "Invalid reference_images: must be array or string",
-            );
-          }
-          if (refImages.length > 0) {
-            referenceImages = await Promise.all(
-              refImages.slice(0, 3).map(async (p) => {
-                const { data, mimeType } = await fileToBase64(p);
-                return {
-                  image: {
-                    bytesBase64Encoded: data,
-                    mimeType,
-                  },
-                  referenceType: "asset",
-                };
-              }),
-            );
+            if (refImages.length > 0) {
+              referenceImages = await Promise.all(
+                refImages.slice(0, 3).map(async (p) => {
+                  const { data, mimeType } = await fileToBase64(p);
+                  return {
+                    image: {
+                      bytesBase64Encoded: data,
+                      mimeType,
+                    },
+                    referenceType: "asset",
+                  };
+                }),
+              );
+            }
           }
-        }
-        const personGeneration =
-          args.person_generation ||
-          (args.image_path ? "allow_adult" : "allow_all");
-        // Apply prompt enhancement logic
-        let enhancedPrompt = args.prompt;
-        let enhancedNegativePrompt = args.negative_prompt;
+          const personGeneration =
+            args.person_generation ||
+            (args.image_path ? "allow_adult" : "allow_all");
-        // Determine which preset to use
-        let presetToUse = args.enhancer_preset;
+          // Apply prompt enhancement logic
+          let enhancedPrompt = args.prompt;
+          let enhancedNegativePrompt = args.negative_prompt;
-        // If auto_enhance is true and no preset specified, default to 'veo'
-        if (args.auto_enhance === true && !presetToUse) {
-          presetToUse = "veo";
-        }
+          // Determine which preset to use
+          let presetToUse = args.enhancer_preset;
-        // Disable enhancement if auto_enhance is explicitly false
-        if (args.auto_enhance === false) {
-          presetToUse = undefined;
-        }
+          // If auto_enhance is true and no preset specified, default to 'veo'
+          if (args.auto_enhance === true && !presetToUse) {
+            presetToUse = "veo";
+          }
-        if (presetToUse && args.prompt) {
-          // Use LLM-based enhancement for 'veo' preset
-          if (presetToUse === "veo") {
-            const { enhancePromptWithLLM, isLLMEnhancerAvailable } =
-              await import("../utils/llm-prompt-enhancer");
-            if (isLLMEnhancerAvailable()) {
-              if (context?.streamContent) {
-                await context.streamContent({
-                  type: "text" as const,
-                  text: `[VEO] Enhancing prompt with Gemini for optimal Veo 3.1 generation...`,
-                });
-              }
+          // Disable enhancement if auto_enhance is explicitly false
+          if (args.auto_enhance === false) {
+            presetToUse = undefined;
+          }
-              try {
-                enhancedPrompt = await enhancePromptWithLLM(args.prompt, "veo");
-                context?.log?.info(
-                  `LLM-enhanced prompt for Veo: "${args.prompt}" → "${enhancedPrompt}"`,
-                );
+          if (presetToUse && args.prompt) {
+            // Use LLM-based enhancement for 'veo' preset
+            if (presetToUse === "veo") {
+              const { enhancePromptWithLLM, isLLMEnhancerAvailable } =
+                await import("../utils/llm-prompt-enhancer");
+              if (isLLMEnhancerAvailable()) {
                 if (context?.streamContent) {
                   await context.streamContent({
                     type: "text" as const,
-                    text: `[VEO] ✓ Prompt enhanced. Length: ${args.prompt.length} → ${enhancedPrompt.length} chars`,
+                    text: `[VEO] Enhancing prompt with Gemini for optimal Veo 3.1 generation...`,
                   });
                 }
-              } catch (err: any) {
+                try {
+                  enhancedPrompt = await enhancePromptWithLLM(
+                    args.prompt,
+                    "veo",
+                  );
+                  context?.log?.info(
+                    `LLM-enhanced prompt for Veo: "${args.prompt}" → "${enhancedPrompt}"`,
+                  );
+                  if (context?.streamContent) {
+                    await context.streamContent({
+                      type: "text" as const,
+                      text: `[VEO] ✓ Prompt enhanced. Length: ${args.prompt.length} → ${enhancedPrompt.length} chars`,
+                    });
+                  }
+                } catch (err: any) {
+                  context?.log?.info(
+                    `LLM enhancement failed, using original: ${err.message}`,
+                  );
+                }
+              } else {
                 context?.log?.info(
-                  `LLM enhancement failed, using original: ${err.message}`,
+                  "GEMINI_API_KEY not set, skipping Veo LLM enhancement",
                 );
               }
             } else {
-              context?.log?.info(
-                "GEMINI_API_KEY not set, skipping Veo LLM enhancement",
-              );
-            }
-          } else {
-            // Fall back to static string-based enhancement for other presets
-            const enhancer = resolveEnhancer(presetToUse);
-            if (enhancer.hasTransformations()) {
-              enhancedPrompt = enhancer.enhance(args.prompt);
-              // Apply negative elements if not already set
-              const negatives = enhancer.getNegativeElements();
-              if (negatives && !enhancedNegativePrompt) {
-                enhancedNegativePrompt = negatives;
+              // Fall back to static string-based enhancement for other presets
+              const enhancer = resolveEnhancer(presetToUse);
+              if (enhancer.hasTransformations()) {
+                enhancedPrompt = enhancer.enhance(args.prompt);
+                // Apply negative elements if not already set
+                const negatives = enhancer.getNegativeElements();
+                if (negatives && !enhancedNegativePrompt) {
+                  enhancedNegativePrompt = negatives;
+                }
               }
             }
           }
+          const instances: any[] = [
+            {
+              prompt: enhancedPrompt,
+              ...(imagePart || {}),
+              ...(lastFramePart || {}),
+              ...(referenceImages ? { referenceImages } : {}),
+            },
+          ];
+          const parameters: any = {
+            aspectRatio: args.aspect_ratio || "9:16",
+            durationSeconds: durationSeconds,
+            resolution: args.resolution || "720p",
+            negativePrompt: enhancedNegativePrompt,
+            generateAudio: args.generate_audio || false,
+            personGeneration,
+          };
+          const res = await fetch(url, {
+            method: "POST",
+            headers: {
+              Authorization: `Bearer ${token}`,
+              "Content-Type": "application/json",
+            },
+            body: JSON.stringify({ instances, parameters }),
+          });
+          if (!res.ok) {
+            const text = await res.text();
+            throw new Error(`Vertex request failed: ${res.status} ${text}`);
+          }
+          const op = (await res.json()) as any;
+          operationName = op.name || op.operation || "";
+          current = op;
         }
-        const instances: any[] = [
-          {
-            prompt: enhancedPrompt,
-            ...(imagePart || {}),
-            ...(lastFramePart || {}),
-            ...(referenceImages ? { referenceImages } : {}),
-          },
-        ];
-        const parameters: any = {
-          aspectRatio: args.aspect_ratio || "9:16",
-          durationSeconds: durationSeconds,
-          resolution: args.resolution || "720p",
-          negativePrompt: enhancedNegativePrompt,
-          generateAudio: args.generate_audio || false,
-          personGeneration,
-        };
-        const res = await fetch(url, {
-          method: "POST",
-          headers: {
-            Authorization: `Bearer ${token}`,
-            "Content-Type": "application/json",
-          },
-          body: JSON.stringify({ instances, parameters }),
-        });
+        if (!operationName) {
+          throw new Error(
+            "Vertex did not return an operation name for long-running request",
+          );
+        }
+        // Construct the composite resume_endpoint: fetchUrl||operationName||outputPath
+        // This allows get_generation_status to use the URL directly and preserve output_path
+        const outputPathPart = args.output_path || "";
+        const compositeResumeEndpoint = `${fetchUrl}||${operationName}||${outputPathPart}`;
-        if (!res.ok) {
-          const text = await res.text();
-          throw new Error(`Vertex request failed: ${res.status} ${text}`);
+        // Stream the resume_endpoint to the LLM immediately (before polling starts)
+        // This way the LLM has it even if MCP client times out during polling
+        if (context?.streamContent) {
+          const isResume = !!args.resume_endpoint;
+          await context.streamContent({
+            type: "text" as const,
+            text: isResume
+              ? `[Vertex] Resuming status check for job`
+              : `[Vertex] Video generation started. resume_endpoint: ${compositeResumeEndpoint} (use this to check status if needed)`,
+          });
         }
-        const op = (await res.json()) as any;
-        operationName = op.name || op.operation || "";
-        current = op;
-      }
-      if (!operationName) {
-        throw new Error(
-          "Vertex did not return an operation name for long-running request",
-        );
-      }
-      // Construct the composite resume_endpoint: fetchUrl||operationName||outputPath
-      // This allows get_generation_status to use the URL directly and preserve output_path
-      const outputPathPart = args.output_path || "";
-      const compositeResumeEndpoint = `${fetchUrl}||${operationName}||${outputPathPart}`;
-      // Stream the resume_endpoint to the LLM immediately (before polling starts)
-      // This way the LLM has it even if MCP client times out during polling
-      if (context?.streamContent) {
-        const isResume = !!args.resume_endpoint;
-        await context.streamContent({
-          type: "text" as const,
-          text: isResume
-            ? `[Vertex] Resuming status check for job`
-            : `[Vertex] Video generation started. resume_endpoint: ${compositeResumeEndpoint} (use this to check status if needed)`,
-        });
-      }
+        // Poll for status - keep polling until done
+        // Resume_endpoint was already streamed, so if MCP client times out the LLM still has it
+        let done = current ? !!current.done || !!current.response : false;
+        const startTime = Date.now();
+        const MAX_POLL_TIME = 60000; // 60 seconds internal timeout - then return resume_endpoint
-      // Poll for status - keep polling until done
-      // Resume_endpoint was already streamed, so if MCP client times out the LLM still has it
-      let done = current ? !!current.done || !!current.response : false;
-      const startTime = Date.now();
-      const MAX_POLL_TIME = 60000; // 60 seconds internal timeout - then return resume_endpoint
+        while (!done && Date.now() - startTime < MAX_POLL_TIME) {
+          await wait(10000); // 10 second intervals
-      while (!done && Date.now() - startTime < MAX_POLL_TIME) {
-        await wait(10000); // 10 second intervals
+          current = await checkVertexStatus(compositeResumeEndpoint);
+          done = !!current.done || !!current.response;
-        current = await checkVertexStatus(compositeResumeEndpoint);
-        done = !!current.done || !!current.response;
+          if (context?.reportProgress) {
+            const elapsed = Date.now() - startTime;
+            const progressPercent = Math.min(
+              Math.round((elapsed / MAX_POLL_TIME) * 100),
+              99,
+            );
+            await context.reportProgress({
+              progress: progressPercent,
+              total: 100,
+            });
+          }
-        if (context?.reportProgress) {
-          const elapsed = Date.now() - startTime;
-          const progressPercent = Math.min(
-            Math.round((elapsed / MAX_POLL_TIME) * 100),
-            99,
-          );
-          await context.reportProgress({
-            progress: progressPercent,
-            total: 100,
+          if (context?.streamContent && !done) {
+            await context.streamContent({
+              type: "text" as const,
+              text: `[Vertex] Still processing... (${Math.round(
+                (Date.now() - startTime) / 1000,
+              )}s elapsed)`,
+            });
+          }
+        }
+        if (!done) {
+          return JSON.stringify({
+            status: "IN_PROGRESS",
+            request_id: operationName,
+            resume_endpoint: compositeResumeEndpoint,
+            message:
+              "Still in progress. Call this tool again with resume_endpoint to continue checking.",
           });
         }
-        if (context?.streamContent && !done) {
-          await context.streamContent({
-            type: "text" as const,
-            text: `[Vertex] Still processing... (${Math.round(
-              (Date.now() - startTime) / 1000,
-            )}s elapsed)`,
+        const resp = current.response || current;
+        // checkVertexStatus already handles saving videos and sanitizing base64
+        if (Array.isArray(resp.saved_videos) && resp.saved_videos.length > 0) {
+          return JSON.stringify({
+            videos: resp.saved_videos,
+            message: "Video(s) generated successfully",
           });
         }
-      }
-      if (!done) {
+        // If nothing saved, return a clean error without any raw JSON that could contain base64
+        // CRITICAL: Never return raw response data to prevent context window poisoning
+        const respKeys = resp ? Object.keys(resp) : [];
         return JSON.stringify({
-          status: "IN_PROGRESS",
-          request_id: operationName,
-          resume_endpoint: compositeResumeEndpoint,
+          status: "ERROR",
           message:
-            "Still in progress. Call this tool again with resume_endpoint to continue checking.",
-        });
-      }
-      const resp = current.response || current;
-      // checkVertexStatus already handles saving videos and sanitizing base64
-      if (Array.isArray(resp.saved_videos) && resp.saved_videos.length > 0) {
-        return JSON.stringify({
-          videos: resp.saved_videos,
-          message: "Video(s) generated successfully",
+            "Vertex operation completed but no videos were found in the response.",
+          operationName,
+          responseKeys: respKeys,
+          hint: "The response structure may have changed. Check the Vertex AI documentation or search for the expected response format.",
         });
-      }
-      // If nothing saved, return a clean error without any raw JSON that could contain base64
-      // CRITICAL: Never return raw response data to prevent context window poisoning
-      const respKeys = resp ? Object.keys(resp) : [];
-      return JSON.stringify({
-        status: "ERROR",
-        message:
-          "Vertex operation completed but no videos were found in the response.",
-        operationName,
-        responseKeys: respKeys,
-        hint: "The response structure may have changed. Check the Vertex AI documentation or search for the expected response format.",
-      });
-    }, "imageToVideo");
+      },
+      "imageToVideo",
+      { toolName: "generateVideoi2v" },
+    );
   },
 };