npm - @mixio-pro/kalaasetu-mcp - Versions diffs - 2.3.29 → 2.3.31 - Mend

@mixio-pro/kalaasetu-mcp 2.3.29 → 2.3.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +3 -3
package/src/.cache/fal-config.json +539 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mixio-pro/kalaasetu-mcp",
-  "version": "2.3.29",
+  "version": "2.3.31",
   "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
   "type": "module",
   "module": "src/index.ts",
@@ -51,15 +51,15 @@
     "@fal-ai/client": "^1.7.2",
     "@google/genai": "^1.28.0",
     "@openmeter/sdk": "^1.0.0-beta.226",
-    "@sentry/node": "^10.36.0",
+    "@sentry/node": "^8.13.0",
     "@types/node": "^24.10.1",
-    "@types/sharp": "^0.32.0",
     "@types/wav": "^1.0.4",
     "dotenv": "^17.3.1",
     "fastmcp": "3.26.8",
     "form-data": "^4.0.5",
     "google-auth-library": "^10.5.0",
     "jsonata": "^2.1.0",
+    "node-addon-api": "^8.2.1",
     "sharp": "^0.34.5",
     "wav": "^1.0.2",
     "winston": "^3.19.0",

package/src/.cache/fal-config.json ADDED Viewed

@@ -0,0 +1,539 @@
+{
+  "presets": [
+    {
+      "presetName": "motion_control_pro",
+      "enabled": true,
+      "intent": "When to use: Use this for precise motion transfer or camera control where you have a specific reference video. Ideal for making a still character perform complex actions (e.g., dancing) or applying a cinematic camera path (e.g., slow pan). How to use: Provide a high-quality character still and a continuous reference video clip. Use 'video' orientation to transfer the character's movement, and 'image' orientation to keep the original pose while copying the camera's perspective. Prompting: Describe the scene context and lighting; use '@Element1' to reference the facial consistency mapping if an element is provided.",
+      "fallback_preset": "motion_control_lite",
+      "modelId": "fal-ai/kling-video/v3/pro/motion-control",
+      "inputType": "image+video",
+      "outputType": "video",
+      "input_schema": {
+        "prompt": {
+          "type": "string",
+          "description": "Provide a detailed text description of the scene's actions, environment, and character behavior. Use '@Element1' to bind to the facial consistency reference if an element is provided. Crucially, describe the exact character motion (e.g., 'a precise high-kick', 'fluid ballet turns') or camera movement (e.g., 'slow cinematic dolly-in', 'dynamic low-angle pan') that you want to replicate or enhance from the reference video. Address how the character interacts with their surroundings and how their expressions or clothing should react to the specific motion captured in the reference clip.",
+          "example": "A high-quality cinematic shot of @Element1 dancing in a neon-lit urban alley at night, detailed fabric textures, volumetric lighting, and realistic skin shaders."
+        },
+        "image_url": {
+          "type": "string",
+          "description": "URL of the reference character image. Characters should be clearly visible and not obstructed, and occupy more than 5% of the image area.",
+          "example": "https://mixio.studio/assets/character-portrait.png"
+        },
+        "video_url": {
+          "type": "string",
+          "description": "URL of the reference video to mimic actions from. Should contain a realistic-style character with entire body or upper body visible. Use a single continuous shot with steady movements.",
+          "example": "https://mixio.studio/assets/dance-reference.mp4"
+        },
+        "keep_original_sound": {
+          "type": "boolean",
+          "description": "Whether to keep the original sound from the reference video.",
+          "default": true
+        },
+        "character_orientation": {
+          "type": "string",
+          "description": "Selects the type of motion transfer. 'video': transfers character body motion (dances, gestures) from the reference video onto the character — best for character animation (max 30s). 'image': transfers camera motion from the reference video while the character stays in the original image pose — best for cinematic camera work (max 10s).",
+          "enum": [
+            "image",
+            "video"
+          ],
+          "default": "video"
+        },
+        "elements": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "frontal_image_url": {
+                "type": "string",
+                "description": "The frontal image of the element (main view).",
+                "example": "https://mixio.studio/assets/character-face-frontal.png"
+              },
+              "reference_image_urls": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Additional reference images from different angles. 1-3 images supported. At least one image is required.",
+                "example": [
+                  "https://mixio.studio/assets/character-face-side.png"
+                ]
+              }
+            }
+          },
+          "description": "Optional element for facial consistency binding. Upload a facial element to enhance identity preservation in the generated video. Only 1 element is supported. Reference in prompt as @Element1. Element binding is only supported when character_orientation is 'video'."
+        }
+      },
+      "pricing": {
+        "baseCredits": 200,
+        "rounding": "ceil",
+        "modifiers": [
+          {
+            "field": "duration",
+            "mode": "discrete",
+            "operation": "multiply",
+            "map": {
+              "5": 0.5,
+              "10": 1
+            }
+          }
+        ],
+        "minCredits": 1
+      }
+    },
+    {
+      "presetName": "motion_control",
+      "enabled": true,
+      "intent": "When to use: Use this for high-quality motion transfer where you have a specific reference video. A robust alternative to the v3 pro model, often better at following complex character physics. How to use: Provide a character image and a driving video. Use 'video' orientation for full motion transfer (max 30s) or 'image' orientation to follow camera movement (max 10s). Prompting: Describe the scene context and lighting; use detailed descriptions of the motion you want to replicate.",
+      "fallback_preset": "motion_control_lite",
+      "modelId": "fal-ai/kling-video/v2.6/pro/motion-control",
+      "inputType": "image+video",
+      "outputType": "video",
+      "input_schema": {
+        "prompt": {
+          "type": "string",
+          "description": "Provide a detailed text description of the scene's actions, environment, and character behavior. Describe the exact character motion (e.g., 'a precise high-kick', 'fluid ballet turns') or camera movement that you want to replicate from the reference video.",
+          "example": "A high-quality cinematic shot of a warrior performing a sword dance in a bamboo forest at sunset, golden hour lighting, detailed fabric textures."
+        },
+        "image_url": {
+          "type": "string",
+          "description": "URL of the reference character image. Characters should be clearly visible and not obstructed, and occupy more than 5% of the image area.",
+          "example": "https://mixio.studio/assets/character-portrait.png"
+        },
+        "video_url": {
+          "type": "string",
+          "description": "URL of the reference video to mimic actions from. Should contain a realistic-style character with entire body or upper body visible. Use a single continuous shot with steady movements.",
+          "example": "https://mixio.studio/assets/dance-reference.mp4"
+        },
+        "keep_original_sound": {
+          "type": "boolean",
+          "description": "Whether to keep the original sound from the reference video.",
+          "default": true
+        },
+        "character_orientation": {
+          "type": "string",
+          "description": "Selects the type of motion transfer. 'video': orientation matches reference video (max 30s). 'image': orientation matches reference image (max 10s).",
+          "enum": [
+            "image",
+            "video"
+          ],
+          "default": "video"
+        },
+        "duration": {
+          "type": "string",
+          "description": "The estimated duration of the generated video in seconds. Used for credit computation.",
+          "enum": [
+            "5",
+            "10",
+            "15",
+            "20",
+            "25",
+            "30"
+          ],
+          "default": "10"
+        }
+      },
+      "pricing": {
+        "baseCredits": 112,
+        "rounding": "ceil",
+        "modifiers": [
+          {
+            "field": "duration",
+            "mode": "discrete",
+            "operation": "multiply",
+            "map": {
+              "5": 0.5,
+              "10": 1,
+              "15": 1.5,
+              "20": 2,
+              "25": 2.5,
+              "30": 3
+            }
+          }
+        ],
+        "minCredits": 1
+      }
+    },
+    {
+      "presetName": "seedance_i2v_pro",
+      "enabled": true,
+      "intent": "When to use: Best for high-fidelity scene animation from a starting frame, especially when you need synchronized audio and dialogue synthesis. Use this for storytelling where the AI's creativity in imagining movement is valued. How to use: Provide a clear start frame and an optional end frame for guided transitions. For dialogue, include the spoken text in the prompt using quotes. Prompting: Use narrative language to describe the scene; example: 'The man looks up with awe, whispering \"This is incredible\" as the lights glow'.",
+      "modelId": "fal-ai/bytedance/seedance/v1.5/pro/image-to-video",
+      "inputType": "image",
+      "outputType": "video",
+      "input_schema": {
+        "prompt": {
+          "type": "string",
+          "description": "A comprehensive narrative description of the video's progression, including character actions, camera movements, and environmental changes. Detail the cinematic style, lighting conditions, and specific material interactions. For dialogue-heavy scenes, include the spoken words in double quotes to guide the synchronized audio synthesis.",
+          "example": "A deep-sea explorer finds an ancient artifact, they whisper \"Finally, the lost heart of Atlantis\" as a golden glow illuminates their face and the underwater surroundings with caustic light patterns."
+        },
+        "image_url": {
+          "type": "string",
+          "description": "The URL of the start frame image used to generate the video.",
+          "example": "https://mixio.studio/assets/scene-start-frame.png"
+        },
+        "end_image_url": {
+          "type": "string",
+          "description": "The URL of the image the video ends with. Optional — leave empty for free-form generation."
+        },
+        "aspect_ratio": {
+          "type": "string",
+          "description": "The aspect ratio of the generated video.",
+          "enum": [
+            "21:9",
+            "16:9",
+            "4:3",
+            "1:1",
+            "3:4",
+            "9:16",
+            "auto"
+          ],
+          "default": "16:9"
+        },
+        "resolution": {
+          "type": "string",
+          "description": "Video resolution. 480p for faster generation, 720p for balance, 1080p for higher quality.",
+          "enum": [
+            "480p",
+            "720p",
+            "1080p"
+          ],
+          "default": "720p"
+        },
+        "duration": {
+          "type": "string",
+          "description": "Duration of the video in seconds.",
+          "enum": [
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "10",
+            "11",
+            "12"
+          ],
+          "default": "5"
+        },
+        "camera_fixed": {
+          "type": "boolean",
+          "description": "Whether to fix the camera position.",
+          "default": false
+        },
+        "generate_audio": {
+          "type": "boolean",
+          "description": "Whether to generate audio for the video.",
+          "default": true
+        },
+        "seed": {
+          "type": "integer",
+          "description": "Random seed to control video generation. Use -1 for random."
+        },
+        "enable_safety_checker": {
+          "type": "boolean",
+          "description": "If set to true, the safety checker will be enabled.",
+          "default": true
+        }
+      },
+      "pricing": {
+        "baseCredits": 240,
+        "rounding": "ceil",
+        "modifiers": [
+          {
+            "field": "duration",
+            "mode": "discrete",
+            "operation": "multiply",
+            "map": {
+              "4": 0.3333,
+              "5": 0.4167,
+              "6": 0.5,
+              "7": 0.5833,
+              "8": 0.6667,
+              "9": 0.75,
+              "10": 0.8333,
+              "11": 0.9167,
+              "12": 1
+            }
+          },
+          {
+            "field": "resolution",
+            "mode": "discrete",
+            "operation": "multiply",
+            "map": {
+              "480p": 0.5,
+              "720p": 1,
+              "1080p": 2.25
+            }
+          }
+        ],
+        "minCredits": 1
+      }
+    },
+    {
+      "presetName": "a2v_lipsync",
+      "enabled": true,
+      "intent": "When to use: Professional-grade talking-head generation. Best for podcasts, news, or dubbing where lip-sync accuracy and natural facial expressions are critical. How to use: Provide a high-resolution frontal face image (avoid occlusion) and clear audio. Choose '1080p' for final quality and '720p' for faster iterations. Prompting: Describe the emotional tone (e.g., 'joyful', 'serious') and background setting to guide the facial performance.",
+      "modelId": "fal-ai/bytedance/omnihuman/v1.5",
+      "inputType": "image+audio",
+      "outputType": "video",
+      "input_schema": {
+        "prompt": {
+          "type": "string",
+          "description": "Describe the character's emotional state, facial performance, and the background environment. Detail how the character should react to the audio (e.g., 'eyes widening with surprise', 'a subtle, sad smile'). Specify the cinematography, such as 'extreme close-up' or 'dramatic side-lighting', to guide the facial animation quality.",
+          "example": "A close-up of a weathered sailor recounting a legend, his eyes reflecting the flickering campfire light, displaying a mixture of regret and longing while his voice remains steady."
+        },
+        "image_url": {
+          "type": "string",
+          "description": "The URL of the human figure image used to generate the video.",
+          "example": "https://mixio.studio/assets/person-portrait.png"
+        },
+        "audio_url": {
+          "type": "string",
+          "description": "The URL of the audio file to lip-sync. Audio must be under 30s for 1080p and under 60s for 720p generation.",
+          "example": "https://mixio.studio/assets/voiceover-clip.mp3"
+        },
+        "turbo_mode": {
+          "type": "boolean",
+          "description": "Generate video at a faster rate with a slight quality trade-off.",
+          "default": false
+        },
+        "resolution": {
+          "type": "string",
+          "description": "Resolution of the generated video. 720p is faster and higher quality. 1080p limited to 30s audio, 720p limited to 60s audio.",
+          "enum": [
+            "720p",
+            "1080p"
+          ],
+          "default": "1080p"
+        }
+      },
+      "pricing": {
+        "baseCredits": 225,
+        "rounding": "ceil",
+        "modifiers": [
+          {
+            "field": "resolution",
+            "mode": "discrete",
+            "operation": "multiply",
+            "map": {
+              "720p": 0.75,
+              "1080p": 1
+            }
+          }
+        ],
+        "minCredits": 1
+      }
+    },
+    {
+      "presetName": "multi_angle_img_generation",
+      "enabled": true,
+      "intent": "When to use: Precise 3D perspective control for consistency. Use this when you need a consistent character or object viewed from multiple angles (e.g., character turnarounds, product shots). How to use: Input single or multiple views. Specify rotation angles in degrees (0-360) and camera pitch/zoom to get exact framing. Prompting: Use the additional prompt to describe hidden features that should be revealed (e.g., 'a specific logo on the back of the device').",
+      "modelId": "fal-ai/qwen-image-edit-2511-multiple-angles",
+      "inputType": "image",
+      "outputType": "image",
+      "input_schema": {
+        "image_urls": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "The URLs of the images to adjust camera angle for. Pass an array of URLs."
+        },
+        "horizontal_angle": {
+          "type": "number",
+          "description": "Horizontal rotation angle around the object in degrees. 0 = front view, 90 = right side, 180 = back view, 270 = left side, 360 = front view again."
+        },
+        "vertical_angle": {
+          "type": "number",
+          "description": "Vertical camera angle in degrees. -30 = low-angle shot (looking up), 0 = eye-level, 30 = elevated, 60 = high-angle."
+        },
+        "zoom": {
+          "type": "number",
+          "description": "Camera zoom/distance. 0 = wide shot (far away), 5 = medium shot (normal), 10 = close-up (very close). Default value: 5",
+          "default": 5
+        },
+        "additional_prompt": {
+          "type": "string",
+          "description": "Describe specific details that might be hidden in the original view or need clarification in the newly generated perspective. For example, detail the texture of the back of a garment, hidden markings on an object, or the background environment behind the subject to maintain high consistency.",
+          "example": "The back of the leather jacket features an intricate silver embroidery of a dragon, catching the moonlight in a dark forest setting with realistic leather textures."
+        },
+        "negative_prompt": {
+          "type": "string",
+          "description": "The negative prompt for the generation"
+        },
+        "seed": {
+          "type": "integer",
+          "description": "Random seed for reproducibility"
+        }
+      },
+      "pricing": {
+        "baseCredits": 0,
+        "rounding": "ceil",
+        "modifiers": [],
+        "minCredits": 0
+      }
+    },
+    {
+      "presetName": "cinematic_i2v_v3",
+      "enabled": false,
+      "intent": "When to use: Premium image-to-video generation for complex cinematic requests. Best for high-stakes visual storytelling requiring extreme realism and perfect adherence to multi-part prompts. How to use: Provide a master-quality start frame as a basis. Use durations up to 15s for slow-burn cinematic sequences. Prompting: Structure prompts with specific material properties (e.g., 'soft silk', 'polished brass') and cinematic camera terminology (e.g., 'wide-angle', 'low-angle').",
+      "modelId": "fal-ai/kling-video/v3/pro/image-to-video",
+      "inputType": "image",
+      "outputType": "video",
+      "input_schema": {
+        "prompt": {
+          "type": "string",
+          "description": "A rich, cinematic description of the scene's action and atmosphere. Focus on physical interactions, particle effects (like snow, dust, or rain), and complex character movements. Utilize professional terminology like 'anamorphic flares', 'shallow depth of field', or 'handheld camera shake' to guide the high-realism model.",
+          "example": "An astronaut walks slowly through a Martian dust storm, their boots sinking into the red sand with every step, as sunlight struggles to pierce through the thick, swirling orange haze while their mask reflects the desolate landscape."
+        },
+        "start_image_url": {
+          "type": "string",
+          "description": "URL of the image to be used for the video",
+          "example": "https://mixio.studio/assets/scene-start-frame.png"
+        },
+        "end_image_url": {
+          "type": "string",
+          "description": "URL of the image to be used for the end of the video. Optional."
+        },
+        "duration": {
+          "type": "string",
+          "description": "The duration of the generated video in seconds",
+          "enum": [
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "10",
+            "11",
+            "12",
+            "13",
+            "14",
+            "15"
+          ],
+          "default": "5"
+        },
+        "generate_audio": {
+          "type": "boolean",
+          "description": "Whether to generate native audio for the video.",
+          "default": true
+        }
+      },
+      "pricing": {
+        "baseCredits": 25,
+        "rounding": "ceil",
+        "modifiers": [
+          {
+            "field": "duration",
+            "mode": "discrete",
+            "operation": "multiply",
+            "map": {
+              "3": 3,
+              "4": 4,
+              "5": 5,
+              "6": 6,
+              "7": 7,
+              "8": 8,
+              "9": 9,
+              "10": 10,
+              "11": 11,
+              "12": 12,
+              "13": 13,
+              "14": 14,
+              "15": 15
+            }
+          }
+        ],
+        "minCredits": 1
+      }
+    },
+    {
+      "presetName": "v2v_lipsync",
+      "enabled": true,
+      "intent": "When to use: Video-to-video re-voicing and dubbing. Use this to change the dialogue in an existing video while preserving the environment and body movements. How to use: Upload a target video and new audio track. Ensure the character's mouth is visible for best results. Prompting: Focusing descriptions on the voice clarity and speech rhythm if applicable.",
+      "modelId": "fal-ai/sync-lipsync/v2",
+      "inputType": "video+audio",
+      "outputType": "video",
+      "input_schema": {
+        "video_url": {
+          "type": "string",
+          "description": "URL of the input video to be lipsynched.",
+          "example": "https://v3.fal.media/files/tiger/IugLCDJRIoGqvqTa-EJTr_3wg74vCqyNuQ-IiBd77MM_output.mp4"
+        },
+        "audio_url": {
+          "type": "string",
+          "description": "URL of the input audio to sync with the video.",
+          "example": "https://fal.media/files/lion/vyFWygmZsIZlUO4s0nr2n.wav"
+        },
+        "model": {
+          "type": "string",
+          "description": "The model to use for lipsyncing. `lipsync-2-pro` is higher quality but costs more.",
+          "enum": [
+            "lipsync-2",
+            "lipsync-2-pro"
+          ],
+          "default": "lipsync-2"
+        },
+        "sync_mode": {
+          "type": "string",
+          "description": "Lipsync mode when audio and video durations are out of sync.",
+          "enum": [
+            "cut_off",
+            "loop",
+            "bounce",
+            "silence",
+            "remap"
+          ],
+          "default": "cut_off"
+        }
+      },
+      "pricing": {
+        "baseCredits": 100,
+        "rounding": "ceil",
+        "modifiers": [],
+        "minCredits": 100
+      }
+    },
+    {
+      "presetName": "motion_control_lite",
+      "enabled": true,
+      "intent": "When to use: Precise motion transfer for non-human, multiple characters, or when characters are partially occluded/not fully in frame (a common failure case for Kling). A lighter, faster alternative for motion transfer. How to use: Provide a character image and a driving video. Prompting: Describe the scene context and lighting.",
+      "modelId": "fal-ai/bytedance/dreamactor/v2",
+      "inputType": "image+video",
+      "outputType": "video",
+      "input_schema": {
+        "image_url": {
+          "type": "string",
+          "description": "The URL of the reference image to animate. Supports real people, animation, pets, etc.",
+          "example": "https://mixio.studio/assets/character-portrait.png"
+        },
+        "video_url": {
+          "type": "string",
+          "description": "The URL of the driving template video providing motion, facial expressions, and lip movement reference. Supports full face and body driving.",
+          "example": "https://mixio.studio/assets/dance-reference.mp4"
+        },
+        "trim_first_second": {
+          "type": "boolean",
+          "description": "Whether to crop the first second of the output video. The output has a 1-second transition at the beginning; enable this to remove it.",
+          "default": true
+        }
+      },
+      "pricing": {
+        "baseCredits": 0,
+        "rounding": "ceil",
+        "modifiers": [
+          {
+            "field": "duration",
+            "mode": "step",
+            "operation": "add",
+            "step": 1,
+            "valuePerStep": 5
+          }
+        ],
+        "minCredits": 1
+      }
+    }
+  ]
+}