@mixio-pro/kalaasetu-mcp 2.3.31 → 2.3.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "2.3.31",
3
+ "version": "2.3.32",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -10,6 +10,7 @@ import { getGoogleAccessToken } from "../utils/google-auth";
10
10
  import { sanitizeResponse } from "../utils/sanitize";
11
11
  import { getStorage } from "../storage";
12
12
  import * as path from "path";
13
+ import { parseVertexEndpoint, buildVertexCompositeEndpoint } from "../utils/vertex-endpoint";
13
14
 
14
15
  function getFalKey(): string {
15
16
  const falKey = process.env.FAL_KEY;
@@ -352,9 +353,12 @@ export const getGenerationStatus = {
352
353
  if (resume_endpoint.includes("||")) {
353
354
  const parts = resume_endpoint.split("||");
354
355
 
355
- // For FAL jobs starting with https://
356
- if (parts?.[0]?.startsWith("https://")) {
357
- originalEndpoint = parts[0];
356
+ // Domain-specific detection
357
+ const isFal = parts?.[0]?.includes("fal.run");
358
+ const isVertex = parts?.[0]?.includes("googleapis.com") || parts?.[1]?.includes("v1/projects/");
359
+
360
+ if (isFal) {
361
+ originalEndpoint = parts[0]!;
358
362
  // Part 1 is tracking context (base64)
359
363
  if (parts.length >= 2 && parts[1]) {
360
364
  try {
@@ -370,27 +374,40 @@ export const getGenerationStatus = {
370
374
  if (parts.length >= 3 && parts[2] && !outputPath) {
371
375
  outputPath = parts[2];
372
376
  }
377
+ } else if (isVertex) {
378
+ // New robust Vertex parsing
379
+ // For get_generation_status, we don't necessarily know the modelId here,
380
+ // but parseVertexEndpoint uses it mainly for constructing the default op name.
381
+ // Since we HAVE a composite endpoint, the modelId passed to parseVertexEndpoint won't be used for the op name.
382
+ const parsed = parseVertexEndpoint(resume_endpoint, "veo-3.1-lite-generate-001");
383
+ originalEndpoint = buildVertexCompositeEndpoint(parsed.fetchUrl, parsed.operationName);
384
+ outputPath = args.output_path || parsed.outputPath || outputPath;
385
+
386
+ if (parsed.trackingContext) {
387
+ try {
388
+ const { decodeTrackingContext } = await import("../utils/endpoint-encoder");
389
+ trackingContext = decodeTrackingContext(parsed.trackingContext);
390
+ } catch (err) {}
391
+ }
373
392
  } else {
374
- // Vertex logic (legacy/other)
393
+ // Legacy/Fallback Vertex logic
375
394
  const lastPart = parts[parts.length - 1];
376
395
  if (
377
396
  lastPart &&
378
397
  !lastPart.startsWith("http") &&
379
398
  !lastPart.includes("/") &&
380
- !lastPart.includes("mixio-pro") // Ensure it doesn't look like an operation path or URL
399
+ !lastPart.includes("mixio-pro")
381
400
  ) {
382
401
  try {
383
402
  const { decodeTrackingContext } =
384
403
  await import("../utils/endpoint-encoder");
385
404
  trackingContext = decodeTrackingContext(lastPart);
386
405
  if (trackingContext?.toolName) {
387
- // Remove the tracking piece from the endpoint we pass to handlers
388
406
  originalEndpoint = parts.slice(0, -1).join("||");
389
407
  }
390
408
  } catch (err) {}
391
409
  }
392
410
 
393
- // For Vertex, if outputPath is not explicitly provided, it might be in the 3rd part of originalEndpoint
394
411
  if (!outputPath) {
395
412
  const vertexParts = originalEndpoint.split("||");
396
413
  if (vertexParts.length >= 3) {
@@ -5,6 +5,7 @@ import { safeToolExecute, extractPrimitiveArgs } from "../utils/tool-wrapper";
5
5
 
6
6
  import { getGoogleAccessToken } from "../utils/google-auth";
7
7
  import { checkVertexStatus } from "./get-status";
8
+ import { parseVertexEndpoint, buildVertexCompositeEndpoint, buildVertexFetchUrl } from "../utils/vertex-endpoint";
8
9
 
9
10
  async function wait(ms: number): Promise<void> {
10
11
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -222,18 +223,17 @@ export const imageToVideo = {
222
223
  throw new Error(`Google Cloud authentication failed: ${errorMsg}`);
223
224
  }
224
225
 
225
- const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
226
-
227
- // If resuming, reconstruct the full operation path from the UUID
226
+ let fetchUrl = buildVertexFetchUrl(modelId, projectId, location);
228
227
  let operationName: string | undefined;
228
+ let outputPath = args.output_path || "";
229
+ let preservedTrackingContext: string | undefined;
230
+
229
231
  if (args.resume_endpoint) {
230
- // Support both UUID-only and full path formats
231
- if (args.resume_endpoint.includes("/")) {
232
- operationName = args.resume_endpoint; // Already a full path
233
- } else {
234
- // Reconstruct full path from UUID
235
- operationName = `projects/${projectId}/locations/${location}/publishers/google/models/${modelId}/operations/${args.resume_endpoint}`;
236
- }
232
+ const parsed = parseVertexEndpoint(args.resume_endpoint, modelId, projectId, location);
233
+ fetchUrl = parsed.fetchUrl;
234
+ operationName = parsed.operationName;
235
+ outputPath = parsed.outputPath || outputPath;
236
+ preservedTrackingContext = parsed.trackingContext;
237
237
  }
238
238
  let current: any;
239
239
 
@@ -367,8 +367,7 @@ export const imageToVideo = {
367
367
 
368
368
  // Construct the composite resume_endpoint: fetchUrl||operationName||outputPath
369
369
  // This allows get_generation_status to use the URL directly and preserve output_path
370
- const outputPathPart = args.output_path || "";
371
- const compositeResumeEndpoint = `${fetchUrl}||${operationName}||${outputPathPart}`;
370
+ const compositeResumeEndpoint = buildVertexCompositeEndpoint(fetchUrl, operationName, outputPath);
372
371
 
373
372
  // Stream the resume_endpoint to the LLM immediately (before polling starts)
374
373
  // This way the LLM has it even if MCP client times out during polling
@@ -428,7 +427,7 @@ export const imageToVideo = {
428
427
  return JSON.stringify({
429
428
  status: "IN_PROGRESS",
430
429
  request_id: operationName,
431
- resume_endpoint: `${compositeResumeEndpoint}||${packedTracking}`,
430
+ resume_endpoint: buildVertexCompositeEndpoint(fetchUrl, operationName, outputPath, packedTracking),
432
431
  message:
433
432
  "Still in progress. Call this tool again with resume_endpoint to continue checking.",
434
433
  });
@@ -6,6 +6,11 @@ import { getGoogleAccessToken } from "../utils/google-auth";
6
6
  import { ensureLocalFile } from "../utils/url-file";
7
7
  import { encodeTrackingContext } from "../utils/endpoint-encoder";
8
8
  import { checkVertexStatus } from "./get-status";
9
+ import {
10
+ parseVertexEndpoint,
11
+ buildVertexCompositeEndpoint,
12
+ buildVertexFetchUrl
13
+ } from "../utils/vertex-endpoint";
9
14
 
10
15
  const DEFAULT_PROJECT_ID = "mixio-pro";
11
16
  const DEFAULT_LOCATION = "us-central1";
@@ -24,12 +29,7 @@ type ToolContext = {
24
29
  requestId?: string;
25
30
  };
26
31
 
27
- interface ParsedResumeEndpoint {
28
- fetchUrl: string;
29
- operationName?: string;
30
- outputPath?: string;
31
- trackingContext?: string;
32
- }
32
+
33
33
 
34
34
  async function wait(ms: number): Promise<void> {
35
35
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -84,43 +84,7 @@ async function fileToBase64(
84
84
  }
85
85
  }
86
86
 
87
- function buildFetchUrl(modelId: string): string {
88
- return `https://${DEFAULT_LOCATION}-aiplatform.googleapis.com/v1/projects/${DEFAULT_PROJECT_ID}/locations/${DEFAULT_LOCATION}/publishers/google/models/${modelId}:fetchPredictOperation`;
89
- }
90
-
91
- function parseResumeEndpoint(
92
- resumeEndpoint: string,
93
- modelId: string,
94
- ): ParsedResumeEndpoint {
95
- const defaultFetchUrl = buildFetchUrl(modelId);
96
-
97
- if (resumeEndpoint.includes("||")) {
98
- const parts = resumeEndpoint.split("||");
99
- const fetchUrl = parts[0] || "";
100
- const operationName = parts[1] || "";
101
-
102
- if (fetchUrl.startsWith("http") && operationName) {
103
- return {
104
- fetchUrl,
105
- operationName,
106
- outputPath: parts[2] || "",
107
- trackingContext: parts.length > 3 ? parts.slice(3).join("||") : undefined,
108
- };
109
- }
110
- }
111
-
112
- if (resumeEndpoint.includes("/")) {
113
- return {
114
- fetchUrl: defaultFetchUrl,
115
- operationName: resumeEndpoint,
116
- };
117
- }
118
87
 
119
- return {
120
- fetchUrl: defaultFetchUrl,
121
- operationName: `projects/${DEFAULT_PROJECT_ID}/locations/${DEFAULT_LOCATION}/publishers/google/models/${modelId}/operations/${resumeEndpoint}`,
122
- };
123
- }
124
88
 
125
89
  function normalizeDurationSeconds(durationSecondsRaw?: string): number {
126
90
  const requestedDuration = parseInt(durationSecondsRaw || "6");
@@ -343,14 +307,14 @@ export const ingredientsToVideo = {
343
307
  throw new Error(`Google Cloud authentication failed: ${errorMessage}`);
344
308
  }
345
309
 
346
- let fetchUrl = buildFetchUrl(modelId);
310
+ let fetchUrl = buildVertexFetchUrl(modelId);
347
311
  let operationName: string | undefined;
348
312
  let outputPath = args.output_path || "";
349
313
  let preservedTrackingContext: string | undefined;
350
314
  let current: any;
351
315
 
352
316
  if (args.resume_endpoint) {
353
- const parsedResume = parseResumeEndpoint(args.resume_endpoint, modelId);
317
+ const parsedResume = parseVertexEndpoint(args.resume_endpoint, modelId);
354
318
  fetchUrl = parsedResume.fetchUrl;
355
319
  operationName = parsedResume.operationName;
356
320
  outputPath = parsedResume.outputPath || outputPath;
@@ -418,7 +382,7 @@ export const ingredientsToVideo = {
418
382
  );
419
383
  }
420
384
 
421
- const compositeResumeEndpoint = `${fetchUrl}||${operationName}||${outputPath}`;
385
+ const compositeResumeEndpoint = buildVertexCompositeEndpoint(fetchUrl, operationName, outputPath);
422
386
 
423
387
  if (context?.streamContent) {
424
388
  await context.streamContent({
@@ -471,7 +435,7 @@ export const ingredientsToVideo = {
471
435
  return JSON.stringify({
472
436
  status: "IN_PROGRESS",
473
437
  request_id: operationName,
474
- resume_endpoint: `${compositeResumeEndpoint}||${packedTracking}`,
438
+ resume_endpoint: buildVertexCompositeEndpoint(fetchUrl, operationName, outputPath, packedTracking),
475
439
  message:
476
440
  "Still in progress. Call this tool again with resume_endpoint to continue checking.",
477
441
  });
@@ -33,6 +33,7 @@ const STATIC_TOOL_PRICING: Record<string, ToolPricingConfig> = {
33
33
  mode: "discrete",
34
34
  operation: "multiply",
35
35
  map: {
36
+ "veo-3.1-lite-generate-001": 1.0,
36
37
  "veo-3.1-fast-generate-001": 1.0,
37
38
  "veo-3.1-fast": 1.0,
38
39
  "veo-3.1": 2 / 3, // 30/45
@@ -174,8 +175,9 @@ export const TOOL_CREDITS: Record<string, ToolCreditConfig> = {
174
175
  credits: 180,
175
176
  provider: "google-vertex",
176
177
  chargeable: true,
177
- modelName: "veo-3.1-fast-generate-001",
178
+ modelName: "veo-3.1-lite-generate-001",
178
179
  },
180
+
179
181
  generateVideoIngredientsToVideo: {
180
182
  credits: 180,
181
183
  provider: "google-vertex",
@@ -231,7 +233,10 @@ export function getToolCredits(
231
233
  if (toolArgs?.resume_endpoint) {
232
234
  return {
233
235
  credits: 0,
234
- provider: toolName.startsWith("fal_") || toolName.startsWith("mixio_") ? "fal-ai" : "unknown",
236
+ provider:
237
+ toolName.startsWith("fal_") || toolName.startsWith("mixio_")
238
+ ? "fal-ai"
239
+ : "unknown",
235
240
  chargeable: false,
236
241
  modelName: toolName,
237
242
  pricingSource: "none",
@@ -241,15 +246,20 @@ export function getToolCredits(
241
246
  // Check for dynamic FAL tools (prefixed with fal_ or mixio_)
242
247
  if (toolName.startsWith("fal_") || toolName.startsWith("mixio_")) {
243
248
  // Normalize tool name by removing fal_ or mixio_ prefix to find the preset
244
- let presetName = toolName.startsWith("fal_") ? toolName.slice(4) : toolName.slice(6);
245
-
249
+ let presetName = toolName.startsWith("fal_")
250
+ ? toolName.slice(4)
251
+ : toolName.slice(6);
252
+
246
253
  // Special case for generic generate tools: Use preset_name from args
247
254
  if (presetName === "generate" && toolArgs?.preset_name) {
248
255
  presetName = String(toolArgs.preset_name);
249
256
  }
250
257
 
251
258
  const preset = loadFalConfig().presets.find(
252
- (item) => item.presetName === presetName || `fal_${item.presetName}` === toolName || `mixio_${item.presetName}` === toolName,
259
+ (item) =>
260
+ item.presetName === presetName ||
261
+ `fal_${item.presetName}` === toolName ||
262
+ `mixio_${item.presetName}` === toolName,
253
263
  );
254
264
 
255
265
  if (preset?.pricing) {
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Utilities for parsing and building composite Vertex AI resume endpoints.
3
+ * Composite format: fetchUrl||operationName||outputPath||trackingContext
4
+ */
5
+
6
+ const DEFAULT_PROJECT_ID = "mixio-pro";
7
+ const DEFAULT_LOCATION = "us-central1";
8
+
9
+ export interface ParsedVertexEndpoint {
10
+ fetchUrl: string;
11
+ operationName: string;
12
+ outputPath?: string;
13
+ trackingContext?: string;
14
+ }
15
+
16
+ /**
17
+ * Builds a fetch URL for the Vertex AI Veo fetchPredictOperation RPC.
18
+ */
19
+ export function buildVertexFetchUrl(modelId: string, projectId: string = DEFAULT_PROJECT_ID, location: string = DEFAULT_LOCATION): string {
20
+ return `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
21
+ }
22
+
23
+ /**
24
+ * Parses a resume_endpoint string into its constituent parts.
25
+ * Handles both composite (|| delimited) and simple (UUID or full path) formats.
26
+ */
27
+ export function parseVertexEndpoint(
28
+ endpoint: string,
29
+ modelId: string,
30
+ projectId: string = DEFAULT_PROJECT_ID,
31
+ location: string = DEFAULT_LOCATION
32
+ ): ParsedVertexEndpoint {
33
+ const defaultFetchUrl = buildVertexFetchUrl(modelId, projectId, location);
34
+
35
+ // 1. Handle composite format
36
+ if (endpoint.includes("||")) {
37
+ const parts = endpoint.split("||");
38
+ const fetchUrl = parts[0] || "";
39
+ const operationName = parts[1] || "";
40
+
41
+ // If it looks like our composite format (URL || Operation)
42
+ if (fetchUrl.startsWith("http") && operationName) {
43
+ return {
44
+ fetchUrl,
45
+ operationName,
46
+ outputPath: parts[2] || undefined,
47
+ trackingContext: parts.length > 3 ? parts.slice(3).join("||") : undefined,
48
+ };
49
+ }
50
+ }
51
+
52
+ // 2. Handle simple formats (UUID or full resource path)
53
+ if (endpoint.includes("/")) {
54
+ // Already a full resource path: projects/.../operations/...
55
+ return {
56
+ fetchUrl: defaultFetchUrl,
57
+ operationName: endpoint,
58
+ };
59
+ }
60
+
61
+ // 3. Handle UUID format
62
+ return {
63
+ fetchUrl: defaultFetchUrl,
64
+ operationName: `projects/${projectId}/locations/${location}/publishers/google/models/${modelId}/operations/${endpoint}`,
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Builds a composite resume endpoint string.
70
+ */
71
+ export function buildVertexCompositeEndpoint(
72
+ fetchUrl: string,
73
+ operationName: string,
74
+ outputPath?: string,
75
+ trackingContext?: string
76
+ ): string {
77
+ const parts = [fetchUrl, operationName];
78
+ if (outputPath || trackingContext) {
79
+ parts.push(outputPath || "");
80
+ }
81
+ if (trackingContext) {
82
+ parts.push(trackingContext);
83
+ }
84
+ return parts.join("||");
85
+ }
@@ -1,539 +0,0 @@
1
- {
2
- "presets": [
3
- {
4
- "presetName": "motion_control_pro",
5
- "enabled": true,
6
- "intent": "When to use: Use this for precise motion transfer or camera control where you have a specific reference video. Ideal for making a still character perform complex actions (e.g., dancing) or applying a cinematic camera path (e.g., slow pan). How to use: Provide a high-quality character still and a continuous reference video clip. Use 'video' orientation to transfer the character's movement, and 'image' orientation to keep the original pose while copying the camera's perspective. Prompting: Describe the scene context and lighting; use '@Element1' to reference the facial consistency mapping if an element is provided.",
7
- "fallback_preset": "motion_control_lite",
8
- "modelId": "fal-ai/kling-video/v3/pro/motion-control",
9
- "inputType": "image+video",
10
- "outputType": "video",
11
- "input_schema": {
12
- "prompt": {
13
- "type": "string",
14
- "description": "Provide a detailed text description of the scene's actions, environment, and character behavior. Use '@Element1' to bind to the facial consistency reference if an element is provided. Crucially, describe the exact character motion (e.g., 'a precise high-kick', 'fluid ballet turns') or camera movement (e.g., 'slow cinematic dolly-in', 'dynamic low-angle pan') that you want to replicate or enhance from the reference video. Address how the character interacts with their surroundings and how their expressions or clothing should react to the specific motion captured in the reference clip.",
15
- "example": "A high-quality cinematic shot of @Element1 dancing in a neon-lit urban alley at night, detailed fabric textures, volumetric lighting, and realistic skin shaders."
16
- },
17
- "image_url": {
18
- "type": "string",
19
- "description": "URL of the reference character image. Characters should be clearly visible and not obstructed, and occupy more than 5% of the image area.",
20
- "example": "https://mixio.studio/assets/character-portrait.png"
21
- },
22
- "video_url": {
23
- "type": "string",
24
- "description": "URL of the reference video to mimic actions from. Should contain a realistic-style character with entire body or upper body visible. Use a single continuous shot with steady movements.",
25
- "example": "https://mixio.studio/assets/dance-reference.mp4"
26
- },
27
- "keep_original_sound": {
28
- "type": "boolean",
29
- "description": "Whether to keep the original sound from the reference video.",
30
- "default": true
31
- },
32
- "character_orientation": {
33
- "type": "string",
34
- "description": "Selects the type of motion transfer. 'video': transfers character body motion (dances, gestures) from the reference video onto the character — best for character animation (max 30s). 'image': transfers camera motion from the reference video while the character stays in the original image pose — best for cinematic camera work (max 10s).",
35
- "enum": [
36
- "image",
37
- "video"
38
- ],
39
- "default": "video"
40
- },
41
- "elements": {
42
- "type": "array",
43
- "items": {
44
- "type": "object",
45
- "properties": {
46
- "frontal_image_url": {
47
- "type": "string",
48
- "description": "The frontal image of the element (main view).",
49
- "example": "https://mixio.studio/assets/character-face-frontal.png"
50
- },
51
- "reference_image_urls": {
52
- "type": "array",
53
- "items": {
54
- "type": "string"
55
- },
56
- "description": "Additional reference images from different angles. 1-3 images supported. At least one image is required.",
57
- "example": [
58
- "https://mixio.studio/assets/character-face-side.png"
59
- ]
60
- }
61
- }
62
- },
63
- "description": "Optional element for facial consistency binding. Upload a facial element to enhance identity preservation in the generated video. Only 1 element is supported. Reference in prompt as @Element1. Element binding is only supported when character_orientation is 'video'."
64
- }
65
- },
66
- "pricing": {
67
- "baseCredits": 200,
68
- "rounding": "ceil",
69
- "modifiers": [
70
- {
71
- "field": "duration",
72
- "mode": "discrete",
73
- "operation": "multiply",
74
- "map": {
75
- "5": 0.5,
76
- "10": 1
77
- }
78
- }
79
- ],
80
- "minCredits": 1
81
- }
82
- },
83
- {
84
- "presetName": "motion_control",
85
- "enabled": true,
86
- "intent": "When to use: Use this for high-quality motion transfer where you have a specific reference video. A robust alternative to the v3 pro model, often better at following complex character physics. How to use: Provide a character image and a driving video. Use 'video' orientation for full motion transfer (max 30s) or 'image' orientation to follow camera movement (max 10s). Prompting: Describe the scene context and lighting; use detailed descriptions of the motion you want to replicate.",
87
- "fallback_preset": "motion_control_lite",
88
- "modelId": "fal-ai/kling-video/v2.6/pro/motion-control",
89
- "inputType": "image+video",
90
- "outputType": "video",
91
- "input_schema": {
92
- "prompt": {
93
- "type": "string",
94
- "description": "Provide a detailed text description of the scene's actions, environment, and character behavior. Describe the exact character motion (e.g., 'a precise high-kick', 'fluid ballet turns') or camera movement that you want to replicate from the reference video.",
95
- "example": "A high-quality cinematic shot of a warrior performing a sword dance in a bamboo forest at sunset, golden hour lighting, detailed fabric textures."
96
- },
97
- "image_url": {
98
- "type": "string",
99
- "description": "URL of the reference character image. Characters should be clearly visible and not obstructed, and occupy more than 5% of the image area.",
100
- "example": "https://mixio.studio/assets/character-portrait.png"
101
- },
102
- "video_url": {
103
- "type": "string",
104
- "description": "URL of the reference video to mimic actions from. Should contain a realistic-style character with entire body or upper body visible. Use a single continuous shot with steady movements.",
105
- "example": "https://mixio.studio/assets/dance-reference.mp4"
106
- },
107
- "keep_original_sound": {
108
- "type": "boolean",
109
- "description": "Whether to keep the original sound from the reference video.",
110
- "default": true
111
- },
112
- "character_orientation": {
113
- "type": "string",
114
- "description": "Selects the type of motion transfer. 'video': orientation matches reference video (max 30s). 'image': orientation matches reference image (max 10s).",
115
- "enum": [
116
- "image",
117
- "video"
118
- ],
119
- "default": "video"
120
- },
121
- "duration": {
122
- "type": "string",
123
- "description": "The estimated duration of the generated video in seconds. Used for credit computation.",
124
- "enum": [
125
- "5",
126
- "10",
127
- "15",
128
- "20",
129
- "25",
130
- "30"
131
- ],
132
- "default": "10"
133
- }
134
- },
135
- "pricing": {
136
- "baseCredits": 112,
137
- "rounding": "ceil",
138
- "modifiers": [
139
- {
140
- "field": "duration",
141
- "mode": "discrete",
142
- "operation": "multiply",
143
- "map": {
144
- "5": 0.5,
145
- "10": 1,
146
- "15": 1.5,
147
- "20": 2,
148
- "25": 2.5,
149
- "30": 3
150
- }
151
- }
152
- ],
153
- "minCredits": 1
154
- }
155
- },
156
- {
157
- "presetName": "seedance_i2v_pro",
158
- "enabled": true,
159
- "intent": "When to use: Best for high-fidelity scene animation from a starting frame, especially when you need synchronized audio and dialogue synthesis. Use this for storytelling where the AI's creativity in imagining movement is valued. How to use: Provide a clear start frame and an optional end frame for guided transitions. For dialogue, include the spoken text in the prompt using quotes. Prompting: Use narrative language to describe the scene; example: 'The man looks up with awe, whispering \"This is incredible\" as the lights glow'.",
160
- "modelId": "fal-ai/bytedance/seedance/v1.5/pro/image-to-video",
161
- "inputType": "image",
162
- "outputType": "video",
163
- "input_schema": {
164
- "prompt": {
165
- "type": "string",
166
- "description": "A comprehensive narrative description of the video's progression, including character actions, camera movements, and environmental changes. Detail the cinematic style, lighting conditions, and specific material interactions. For dialogue-heavy scenes, include the spoken words in double quotes to guide the synchronized audio synthesis.",
167
- "example": "A deep-sea explorer finds an ancient artifact, they whisper \"Finally, the lost heart of Atlantis\" as a golden glow illuminates their face and the underwater surroundings with caustic light patterns."
168
- },
169
- "image_url": {
170
- "type": "string",
171
- "description": "The URL of the start frame image used to generate the video.",
172
- "example": "https://mixio.studio/assets/scene-start-frame.png"
173
- },
174
- "end_image_url": {
175
- "type": "string",
176
- "description": "The URL of the image the video ends with. Optional — leave empty for free-form generation."
177
- },
178
- "aspect_ratio": {
179
- "type": "string",
180
- "description": "The aspect ratio of the generated video.",
181
- "enum": [
182
- "21:9",
183
- "16:9",
184
- "4:3",
185
- "1:1",
186
- "3:4",
187
- "9:16",
188
- "auto"
189
- ],
190
- "default": "16:9"
191
- },
192
- "resolution": {
193
- "type": "string",
194
- "description": "Video resolution. 480p for faster generation, 720p for balance, 1080p for higher quality.",
195
- "enum": [
196
- "480p",
197
- "720p",
198
- "1080p"
199
- ],
200
- "default": "720p"
201
- },
202
- "duration": {
203
- "type": "string",
204
- "description": "Duration of the video in seconds.",
205
- "enum": [
206
- "4",
207
- "5",
208
- "6",
209
- "7",
210
- "8",
211
- "9",
212
- "10",
213
- "11",
214
- "12"
215
- ],
216
- "default": "5"
217
- },
218
- "camera_fixed": {
219
- "type": "boolean",
220
- "description": "Whether to fix the camera position.",
221
- "default": false
222
- },
223
- "generate_audio": {
224
- "type": "boolean",
225
- "description": "Whether to generate audio for the video.",
226
- "default": true
227
- },
228
- "seed": {
229
- "type": "integer",
230
- "description": "Random seed to control video generation. Use -1 for random."
231
- },
232
- "enable_safety_checker": {
233
- "type": "boolean",
234
- "description": "If set to true, the safety checker will be enabled.",
235
- "default": true
236
- }
237
- },
238
- "pricing": {
239
- "baseCredits": 240,
240
- "rounding": "ceil",
241
- "modifiers": [
242
- {
243
- "field": "duration",
244
- "mode": "discrete",
245
- "operation": "multiply",
246
- "map": {
247
- "4": 0.3333,
248
- "5": 0.4167,
249
- "6": 0.5,
250
- "7": 0.5833,
251
- "8": 0.6667,
252
- "9": 0.75,
253
- "10": 0.8333,
254
- "11": 0.9167,
255
- "12": 1
256
- }
257
- },
258
- {
259
- "field": "resolution",
260
- "mode": "discrete",
261
- "operation": "multiply",
262
- "map": {
263
- "480p": 0.5,
264
- "720p": 1,
265
- "1080p": 2.25
266
- }
267
- }
268
- ],
269
- "minCredits": 1
270
- }
271
- },
272
- {
273
- "presetName": "a2v_lipsync",
274
- "enabled": true,
275
- "intent": "When to use: Professional-grade talking-head generation. Best for podcasts, news, or dubbing where lip-sync accuracy and natural facial expressions are critical. How to use: Provide a high-resolution frontal face image (avoid occlusion) and clear audio. Choose '1080p' for final quality and '720p' for faster iterations. Prompting: Describe the emotional tone (e.g., 'joyful', 'serious') and background setting to guide the facial performance.",
276
- "modelId": "fal-ai/bytedance/omnihuman/v1.5",
277
- "inputType": "image+audio",
278
- "outputType": "video",
279
- "input_schema": {
280
- "prompt": {
281
- "type": "string",
282
- "description": "Describe the character's emotional state, facial performance, and the background environment. Detail how the character should react to the audio (e.g., 'eyes widening with surprise', 'a subtle, sad smile'). Specify the cinematography, such as 'extreme close-up' or 'dramatic side-lighting', to guide the facial animation quality.",
283
- "example": "A close-up of a weathered sailor recounting a legend, his eyes reflecting the flickering campfire light, displaying a mixture of regret and longing while his voice remains steady."
284
- },
285
- "image_url": {
286
- "type": "string",
287
- "description": "The URL of the human figure image used to generate the video.",
288
- "example": "https://mixio.studio/assets/person-portrait.png"
289
- },
290
- "audio_url": {
291
- "type": "string",
292
- "description": "The URL of the audio file to lip-sync. Audio must be under 30s for 1080p and under 60s for 720p generation.",
293
- "example": "https://mixio.studio/assets/voiceover-clip.mp3"
294
- },
295
- "turbo_mode": {
296
- "type": "boolean",
297
- "description": "Generate video at a faster rate with a slight quality trade-off.",
298
- "default": false
299
- },
300
- "resolution": {
301
- "type": "string",
302
- "description": "Resolution of the generated video. 720p is faster and higher quality. 1080p limited to 30s audio, 720p limited to 60s audio.",
303
- "enum": [
304
- "720p",
305
- "1080p"
306
- ],
307
- "default": "1080p"
308
- }
309
- },
310
- "pricing": {
311
- "baseCredits": 225,
312
- "rounding": "ceil",
313
- "modifiers": [
314
- {
315
- "field": "resolution",
316
- "mode": "discrete",
317
- "operation": "multiply",
318
- "map": {
319
- "720p": 0.75,
320
- "1080p": 1
321
- }
322
- }
323
- ],
324
- "minCredits": 1
325
- }
326
- },
327
- {
328
- "presetName": "multi_angle_img_generation",
329
- "enabled": true,
330
- "intent": "When to use: Precise 3D perspective control for consistency. Use this when you need a consistent character or object viewed from multiple angles (e.g., character turnarounds, product shots). How to use: Input single or multiple views. Specify rotation angles in degrees (0-360) and camera pitch/zoom to get exact framing. Prompting: Use the additional prompt to describe hidden features that should be revealed (e.g., 'a specific logo on the back of the device').",
331
- "modelId": "fal-ai/qwen-image-edit-2511-multiple-angles",
332
- "inputType": "image",
333
- "outputType": "image",
334
- "input_schema": {
335
- "image_urls": {
336
- "type": "array",
337
- "items": {
338
- "type": "string"
339
- },
340
- "description": "The URLs of the images to adjust camera angle for. Pass an array of URLs."
341
- },
342
- "horizontal_angle": {
343
- "type": "number",
344
- "description": "Horizontal rotation angle around the object in degrees. 0 = front view, 90 = right side, 180 = back view, 270 = left side, 360 = front view again."
345
- },
346
- "vertical_angle": {
347
- "type": "number",
348
- "description": "Vertical camera angle in degrees. -30 = low-angle shot (looking up), 0 = eye-level, 30 = elevated, 60 = high-angle."
349
- },
350
- "zoom": {
351
- "type": "number",
352
- "description": "Camera zoom/distance. 0 = wide shot (far away), 5 = medium shot (normal), 10 = close-up (very close). Default value: 5",
353
- "default": 5
354
- },
355
- "additional_prompt": {
356
- "type": "string",
357
- "description": "Describe specific details that might be hidden in the original view or need clarification in the newly generated perspective. For example, detail the texture of the back of a garment, hidden markings on an object, or the background environment behind the subject to maintain high consistency.",
358
- "example": "The back of the leather jacket features an intricate silver embroidery of a dragon, catching the moonlight in a dark forest setting with realistic leather textures."
359
- },
360
- "negative_prompt": {
361
- "type": "string",
362
- "description": "The negative prompt for the generation"
363
- },
364
- "seed": {
365
- "type": "integer",
366
- "description": "Random seed for reproducibility"
367
- }
368
- },
369
- "pricing": {
370
- "baseCredits": 0,
371
- "rounding": "ceil",
372
- "modifiers": [],
373
- "minCredits": 0
374
- }
375
- },
376
- {
377
- "presetName": "cinematic_i2v_v3",
378
- "enabled": false,
379
- "intent": "When to use: Premium image-to-video generation for complex cinematic requests. Best for high-stakes visual storytelling requiring extreme realism and perfect adherence to multi-part prompts. How to use: Provide a master-quality start frame as a basis. Use durations up to 15s for slow-burn cinematic sequences. Prompting: Structure prompts with specific material properties (e.g., 'soft silk', 'polished brass') and cinematic camera terminology (e.g., 'wide-angle', 'low-angle').",
380
- "modelId": "fal-ai/kling-video/v3/pro/image-to-video",
381
- "inputType": "image",
382
- "outputType": "video",
383
- "input_schema": {
384
- "prompt": {
385
- "type": "string",
386
- "description": "A rich, cinematic description of the scene's action and atmosphere. Focus on physical interactions, particle effects (like snow, dust, or rain), and complex character movements. Utilize professional terminology like 'anamorphic flares', 'shallow depth of field', or 'handheld camera shake' to guide the high-realism model.",
387
- "example": "An astronaut walks slowly through a Martian dust storm, their boots sinking into the red sand with every step, as sunlight struggles to pierce through the thick, swirling orange haze while their mask reflects the desolate landscape."
388
- },
389
- "start_image_url": {
390
- "type": "string",
391
- "description": "URL of the image to be used for the video",
392
- "example": "https://mixio.studio/assets/scene-start-frame.png"
393
- },
394
- "end_image_url": {
395
- "type": "string",
396
- "description": "URL of the image to be used for the end of the video. Optional."
397
- },
398
- "duration": {
399
- "type": "string",
400
- "description": "The duration of the generated video in seconds",
401
- "enum": [
402
- "3",
403
- "4",
404
- "5",
405
- "6",
406
- "7",
407
- "8",
408
- "9",
409
- "10",
410
- "11",
411
- "12",
412
- "13",
413
- "14",
414
- "15"
415
- ],
416
- "default": "5"
417
- },
418
- "generate_audio": {
419
- "type": "boolean",
420
- "description": "Whether to generate native audio for the video.",
421
- "default": true
422
- }
423
- },
424
- "pricing": {
425
- "baseCredits": 25,
426
- "rounding": "ceil",
427
- "modifiers": [
428
- {
429
- "field": "duration",
430
- "mode": "discrete",
431
- "operation": "multiply",
432
- "map": {
433
- "3": 3,
434
- "4": 4,
435
- "5": 5,
436
- "6": 6,
437
- "7": 7,
438
- "8": 8,
439
- "9": 9,
440
- "10": 10,
441
- "11": 11,
442
- "12": 12,
443
- "13": 13,
444
- "14": 14,
445
- "15": 15
446
- }
447
- }
448
- ],
449
- "minCredits": 1
450
- }
451
- },
452
- {
453
- "presetName": "v2v_lipsync",
454
- "enabled": true,
455
- "intent": "When to use: Video-to-video re-voicing and dubbing. Use this to change the dialogue in an existing video while preserving the environment and body movements. How to use: Upload a target video and new audio track. Ensure the character's mouth is visible for best results. Prompting: Focusing descriptions on the voice clarity and speech rhythm if applicable.",
456
- "modelId": "fal-ai/sync-lipsync/v2",
457
- "inputType": "video+audio",
458
- "outputType": "video",
459
- "input_schema": {
460
- "video_url": {
461
- "type": "string",
462
- "description": "URL of the input video to be lipsynched.",
463
- "example": "https://v3.fal.media/files/tiger/IugLCDJRIoGqvqTa-EJTr_3wg74vCqyNuQ-IiBd77MM_output.mp4"
464
- },
465
- "audio_url": {
466
- "type": "string",
467
- "description": "URL of the input audio to sync with the video.",
468
- "example": "https://fal.media/files/lion/vyFWygmZsIZlUO4s0nr2n.wav"
469
- },
470
- "model": {
471
- "type": "string",
472
- "description": "The model to use for lipsyncing. `lipsync-2-pro` is higher quality but costs more.",
473
- "enum": [
474
- "lipsync-2",
475
- "lipsync-2-pro"
476
- ],
477
- "default": "lipsync-2"
478
- },
479
- "sync_mode": {
480
- "type": "string",
481
- "description": "Lipsync mode when audio and video durations are out of sync.",
482
- "enum": [
483
- "cut_off",
484
- "loop",
485
- "bounce",
486
- "silence",
487
- "remap"
488
- ],
489
- "default": "cut_off"
490
- }
491
- },
492
- "pricing": {
493
- "baseCredits": 100,
494
- "rounding": "ceil",
495
- "modifiers": [],
496
- "minCredits": 100
497
- }
498
- },
499
- {
500
- "presetName": "motion_control_lite",
501
- "enabled": true,
502
- "intent": "When to use: Precise motion transfer for non-human, multiple characters, or when characters are partially occluded/not fully in frame (a common failure case for Kling). A lighter, faster alternative for motion transfer. How to use: Provide a character image and a driving video. Prompting: Describe the scene context and lighting.",
503
- "modelId": "fal-ai/bytedance/dreamactor/v2",
504
- "inputType": "image+video",
505
- "outputType": "video",
506
- "input_schema": {
507
- "image_url": {
508
- "type": "string",
509
- "description": "The URL of the reference image to animate. Supports real people, animation, pets, etc.",
510
- "example": "https://mixio.studio/assets/character-portrait.png"
511
- },
512
- "video_url": {
513
- "type": "string",
514
- "description": "The URL of the driving template video providing motion, facial expressions, and lip movement reference. Supports full face and body driving.",
515
- "example": "https://mixio.studio/assets/dance-reference.mp4"
516
- },
517
- "trim_first_second": {
518
- "type": "boolean",
519
- "description": "Whether to crop the first second of the output video. The output has a 1-second transition at the beginning; enable this to remove it.",
520
- "default": true
521
- }
522
- },
523
- "pricing": {
524
- "baseCredits": 0,
525
- "rounding": "ceil",
526
- "modifiers": [
527
- {
528
- "field": "duration",
529
- "mode": "step",
530
- "operation": "add",
531
- "step": 1,
532
- "valuePerStep": 5
533
- }
534
- ],
535
- "minCredits": 1
536
- }
537
- }
538
- ]
539
- }