@mixio-pro/kalaasetu-mcp 1.2.1 → 2.0.1-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/fal-config.json +106 -0
- package/package.json +2 -1
- package/src/index.ts +0 -9
- package/src/tools/fal/config.ts +120 -23
- package/src/tools/fal/generate.ts +370 -84
- package/src/tools/fal/index.ts +2 -7
- package/src/tools/fal/models.ts +163 -29
- package/src/tools/fal/storage.ts +9 -2
- package/src/tools/gemini.ts +106 -26
- package/src/tools/image-to-video.ts +359 -129
- package/src/tools/perplexity.ts +61 -61
- package/src/tools/youtube.ts +8 -3
- package/src/utils/llm-prompt-enhancer.ts +302 -0
- package/src/utils/prompt-enhancer-presets.ts +303 -0
- package/src/utils/prompt-enhancer.ts +186 -0
|
@@ -2,6 +2,10 @@ import { z } from "zod";
|
|
|
2
2
|
import { getStorage } from "../storage";
|
|
3
3
|
import { generateTimestampedFilename } from "../utils/filename";
|
|
4
4
|
import { safeToolExecute } from "../utils/tool-wrapper";
|
|
5
|
+
import {
|
|
6
|
+
resolveEnhancer,
|
|
7
|
+
listVideoEnhancerPresets,
|
|
8
|
+
} from "../utils/prompt-enhancer-presets";
|
|
5
9
|
|
|
6
10
|
import { getGoogleAccessToken } from "../utils/google-auth";
|
|
7
11
|
|
|
@@ -41,93 +45,149 @@ async function fileToBase64(
|
|
|
41
45
|
export const imageToVideo = {
|
|
42
46
|
name: "generateVideoi2v",
|
|
43
47
|
description:
|
|
44
|
-
"Generate videos from
|
|
48
|
+
"Generate professional-quality cinematic videos from a starting image and text prompt using Google's Vertex AI Veo models. " +
|
|
49
|
+
"This tool follows a 'Synchronous Facade' pattern: it handles polling internally but can be paused/resumed. " +
|
|
50
|
+
"If the generation takes too long, it returns a 'resume_id' that you MUST use to call this tool again to pick up progress. " +
|
|
51
|
+
"It produces state-of-the-art cinematic results. " +
|
|
52
|
+
"ONLY USE WHEN WORKING WITH GOOGLE VERTEX AI MODELS.",
|
|
45
53
|
parameters: z.object({
|
|
46
|
-
prompt: z
|
|
54
|
+
prompt: z
|
|
55
|
+
.string()
|
|
56
|
+
.optional()
|
|
57
|
+
.describe(
|
|
58
|
+
"Required for new requests. Descriptive text for the video action and style (e.g., 'A robot walking through a neon city at night')."
|
|
59
|
+
),
|
|
47
60
|
image_path: z
|
|
48
61
|
.string()
|
|
49
62
|
.optional()
|
|
50
|
-
.describe("
|
|
63
|
+
.describe("Absolute local path or URL to the STARTING image frame."),
|
|
51
64
|
last_frame_path: z
|
|
52
65
|
.string()
|
|
53
66
|
.optional()
|
|
54
|
-
.describe(
|
|
67
|
+
.describe(
|
|
68
|
+
"Optional: Absolute local path or URL to the ENDING image frame to guide the video's conclusion."
|
|
69
|
+
),
|
|
55
70
|
aspect_ratio: z
|
|
56
71
|
.string()
|
|
57
72
|
.optional()
|
|
58
73
|
.default("16:9")
|
|
59
|
-
.describe(
|
|
74
|
+
.describe(
|
|
75
|
+
"Target aspect ratio: '16:9' (landscape) or '9:16' (vertical)."
|
|
76
|
+
),
|
|
60
77
|
duration_seconds: z
|
|
61
78
|
.string()
|
|
62
79
|
.optional()
|
|
63
80
|
.default("6")
|
|
64
81
|
.describe(
|
|
65
|
-
"
|
|
82
|
+
"Target duration. Vertex AI ONLY supports exactly '4', '6', or '8' seconds. Other values will be rounded to the nearest supported step."
|
|
66
83
|
),
|
|
67
84
|
resolution: z
|
|
68
85
|
.string()
|
|
69
86
|
.optional()
|
|
70
|
-
.describe("
|
|
87
|
+
.describe("Target resolution: '720p' or '1080p'. Default is '720p'."),
|
|
71
88
|
negative_prompt: z
|
|
72
89
|
.string()
|
|
73
90
|
.optional()
|
|
74
|
-
.describe(
|
|
91
|
+
.describe(
|
|
92
|
+
"Visual elements or styles to EXCLUDE from the generated video."
|
|
93
|
+
),
|
|
75
94
|
person_generation: z
|
|
76
95
|
.string()
|
|
77
96
|
.optional()
|
|
78
97
|
.describe(
|
|
79
|
-
"
|
|
98
|
+
"Policy for generating people: 'allow_adult' (standard) or 'allow_all'. Note: Gemini 1.5+ safety filters apply."
|
|
80
99
|
),
|
|
81
100
|
reference_images: z
|
|
82
101
|
.array(z.string())
|
|
83
102
|
.optional()
|
|
84
|
-
.describe(
|
|
103
|
+
.describe(
|
|
104
|
+
"Optional: Additional images (up to 3) to guide style or character consistency."
|
|
105
|
+
),
|
|
85
106
|
output_path: z
|
|
86
107
|
.string()
|
|
87
108
|
.optional()
|
|
88
109
|
.describe(
|
|
89
|
-
"
|
|
110
|
+
"Optional: Local path to save the resulting .mp4 file. Defaults to timestamped filename."
|
|
90
111
|
),
|
|
91
112
|
project_id: z
|
|
92
113
|
.string()
|
|
93
114
|
.optional()
|
|
94
115
|
.default("mixio-pro")
|
|
95
|
-
.describe("GCP Project ID
|
|
116
|
+
.describe("GCP Project ID for Vertex billing."),
|
|
96
117
|
location_id: z
|
|
97
118
|
.string()
|
|
98
119
|
.optional()
|
|
99
120
|
.default("us-central1")
|
|
100
|
-
.describe("
|
|
121
|
+
.describe("GCP region for Vertex AI processing (e.g., 'us-central1')."),
|
|
101
122
|
model_id: z
|
|
102
123
|
.string()
|
|
103
124
|
.optional()
|
|
104
125
|
.default("veo-3.1-fast-generate-001")
|
|
105
|
-
.describe("
|
|
126
|
+
.describe("Specific Vertex Veo model ID to use."),
|
|
106
127
|
generate_audio: z
|
|
107
128
|
.boolean()
|
|
108
129
|
.optional()
|
|
109
130
|
.describe(
|
|
110
|
-
"
|
|
131
|
+
"If true, Vertex will attempt to synthesize synchronized audio for the video."
|
|
111
132
|
)
|
|
112
133
|
.default(false),
|
|
134
|
+
resume_id: z
|
|
135
|
+
.string()
|
|
136
|
+
.optional()
|
|
137
|
+
.describe(
|
|
138
|
+
"If provided, the tool will check the status of an existing Vertex operation instead of starting a new one. " +
|
|
139
|
+
"Use the 'request_id' returned in an 'IN_PROGRESS' response."
|
|
140
|
+
),
|
|
141
|
+
auto_enhance: z
|
|
142
|
+
.boolean()
|
|
143
|
+
.optional()
|
|
144
|
+
.describe(
|
|
145
|
+
"Whether to automatically enhance the prompt using Veo/LTX guidelines (default: true if enabled via preset or config). Set to false to disable enhancement."
|
|
146
|
+
),
|
|
147
|
+
enhancer_preset: z
|
|
148
|
+
.string()
|
|
149
|
+
.optional()
|
|
150
|
+
.describe(
|
|
151
|
+
"Optional: Name of a video prompt enhancer preset (e.g., 'veo', 'ltx2', 'cinematic_video'). " +
|
|
152
|
+
"When using Veo, setting this to 'veo' (or setting auto_enhance=true) will trigger the LLM-based enhancer."
|
|
153
|
+
),
|
|
113
154
|
}),
|
|
114
155
|
timeoutMs: 1200000, // 20 minutes
|
|
115
|
-
async execute(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
156
|
+
async execute(
|
|
157
|
+
args: {
|
|
158
|
+
prompt?: string;
|
|
159
|
+
image_path?: string;
|
|
160
|
+
last_frame_path?: string;
|
|
161
|
+
aspect_ratio?: string;
|
|
162
|
+
duration_seconds?: string;
|
|
163
|
+
resolution?: string;
|
|
164
|
+
negative_prompt?: string;
|
|
165
|
+
person_generation?: string;
|
|
166
|
+
reference_images?: string[] | string;
|
|
167
|
+
output_path?: string;
|
|
168
|
+
project_id?: string;
|
|
169
|
+
location_id?: string;
|
|
170
|
+
model_id?: string;
|
|
171
|
+
generate_audio?: boolean;
|
|
172
|
+
resume_id?: string;
|
|
173
|
+
enhancer_preset?: string;
|
|
174
|
+
auto_enhance?: boolean;
|
|
175
|
+
},
|
|
176
|
+
context?: {
|
|
177
|
+
reportProgress?: (progress: {
|
|
178
|
+
progress: number;
|
|
179
|
+
total: number;
|
|
180
|
+
}) => Promise<void>;
|
|
181
|
+
streamContent?: (content: {
|
|
182
|
+
type: "text";
|
|
183
|
+
text: string;
|
|
184
|
+
}) => Promise<void>;
|
|
185
|
+
log?: {
|
|
186
|
+
info: (msg: string, data?: any) => void;
|
|
187
|
+
debug: (msg: string, data?: any) => void;
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
) {
|
|
131
191
|
return safeToolExecute(async () => {
|
|
132
192
|
const projectId = args.project_id || "mixio-pro";
|
|
133
193
|
const location = args.location_id || "us-central1";
|
|
@@ -165,130 +225,270 @@ export const imageToVideo = {
|
|
|
165
225
|
) {
|
|
166
226
|
durationSeconds = 8;
|
|
167
227
|
}
|
|
228
|
+
// Stream diagnostic info about auth
|
|
229
|
+
let token: string;
|
|
230
|
+
try {
|
|
231
|
+
if (context?.streamContent) {
|
|
232
|
+
await context.streamContent({
|
|
233
|
+
type: "text" as const,
|
|
234
|
+
text: `[Vertex] Authenticating with Google Cloud (project: ${projectId}, location: ${location})...`,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
token = await getGoogleAccessToken();
|
|
238
|
+
if (context?.streamContent) {
|
|
239
|
+
await context.streamContent({
|
|
240
|
+
type: "text" as const,
|
|
241
|
+
text: `[Vertex] ✓ Authentication successful. Token acquired.`,
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
} catch (authError: any) {
|
|
245
|
+
const errorMsg = authError?.message || String(authError);
|
|
246
|
+
if (context?.streamContent) {
|
|
247
|
+
await context.streamContent({
|
|
248
|
+
type: "text" as const,
|
|
249
|
+
text: `[Vertex] ✗ Authentication FAILED: ${errorMsg}. Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth application-default login'.`,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
throw new Error(`Google Cloud authentication failed: ${errorMsg}`);
|
|
253
|
+
}
|
|
168
254
|
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
|
|
255
|
+
const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
|
|
172
256
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
257
|
+
// If resuming, reconstruct the full operation path from the UUID
|
|
258
|
+
let operationName: string | undefined;
|
|
259
|
+
if (args.resume_id) {
|
|
260
|
+
// Support both UUID-only and full path formats
|
|
261
|
+
if (args.resume_id.includes("/")) {
|
|
262
|
+
operationName = args.resume_id; // Already a full path
|
|
263
|
+
} else {
|
|
264
|
+
// Reconstruct full path from UUID
|
|
265
|
+
operationName = `projects/${projectId}/locations/${location}/publishers/google/models/${modelId}/operations/${args.resume_id}`;
|
|
266
|
+
}
|
|
182
267
|
}
|
|
268
|
+
let current: any;
|
|
183
269
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
270
|
+
if (!operationName) {
|
|
271
|
+
if (!args.prompt) {
|
|
272
|
+
throw new Error("prompt is required when starting a new generation.");
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (context?.streamContent) {
|
|
276
|
+
await context.streamContent({
|
|
277
|
+
type: "text" as const,
|
|
278
|
+
text: `[Vertex] Submitting video generation request to Veo model: ${modelId}...`,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
|
|
283
|
+
|
|
284
|
+
let imagePart: any = undefined;
|
|
285
|
+
if (args.image_path) {
|
|
286
|
+
const { data, mimeType } = await fileToBase64(args.image_path);
|
|
287
|
+
imagePart = {
|
|
288
|
+
image: {
|
|
289
|
+
bytesBase64Encoded: data,
|
|
290
|
+
mimeType,
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
let lastFramePart: any = undefined;
|
|
296
|
+
if (args.last_frame_path) {
|
|
297
|
+
const { data, mimeType } = await fileToBase64(args.last_frame_path);
|
|
298
|
+
lastFramePart = {
|
|
299
|
+
lastFrame: {
|
|
300
|
+
bytesBase64Encoded: data,
|
|
301
|
+
mimeType,
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
}
|
|
194
305
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
306
|
+
let referenceImages: any[] | undefined = undefined;
|
|
307
|
+
if (args.reference_images) {
|
|
308
|
+
let refImages: string[];
|
|
309
|
+
if (typeof args.reference_images === "string") {
|
|
310
|
+
if (
|
|
311
|
+
args.reference_images.startsWith("[") &&
|
|
312
|
+
args.reference_images.endsWith("]")
|
|
313
|
+
) {
|
|
314
|
+
try {
|
|
315
|
+
refImages = JSON.parse(args.reference_images);
|
|
316
|
+
} catch {
|
|
317
|
+
throw new Error("Invalid reference_images format");
|
|
318
|
+
}
|
|
319
|
+
} else {
|
|
320
|
+
refImages = [args.reference_images];
|
|
207
321
|
}
|
|
322
|
+
} else if (Array.isArray(args.reference_images)) {
|
|
323
|
+
refImages = args.reference_images;
|
|
208
324
|
} else {
|
|
209
|
-
|
|
325
|
+
throw new Error(
|
|
326
|
+
"Invalid reference_images: must be array or string"
|
|
327
|
+
);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (refImages.length > 0) {
|
|
331
|
+
referenceImages = await Promise.all(
|
|
332
|
+
refImages.slice(0, 3).map(async (p) => {
|
|
333
|
+
const { data, mimeType } = await fileToBase64(p);
|
|
334
|
+
return {
|
|
335
|
+
image: {
|
|
336
|
+
bytesBase64Encoded: data,
|
|
337
|
+
mimeType,
|
|
338
|
+
},
|
|
339
|
+
referenceType: "asset",
|
|
340
|
+
};
|
|
341
|
+
})
|
|
342
|
+
);
|
|
210
343
|
}
|
|
211
|
-
} else if (Array.isArray(args.reference_images)) {
|
|
212
|
-
refImages = args.reference_images;
|
|
213
|
-
} else {
|
|
214
|
-
throw new Error("Invalid reference_images: must be array or string");
|
|
215
344
|
}
|
|
216
345
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
346
|
+
const personGeneration =
|
|
347
|
+
args.person_generation ||
|
|
348
|
+
(args.image_path ? "allow_adult" : "allow_all");
|
|
349
|
+
|
|
350
|
+
// Apply prompt enhancement logic
|
|
351
|
+
let enhancedPrompt = args.prompt;
|
|
352
|
+
let enhancedNegativePrompt = args.negative_prompt;
|
|
353
|
+
|
|
354
|
+
// Determine which preset to use
|
|
355
|
+
let presetToUse = args.enhancer_preset;
|
|
356
|
+
|
|
357
|
+
// If auto_enhance is true and no preset specified, default to 'veo'
|
|
358
|
+
if (args.auto_enhance === true && !presetToUse) {
|
|
359
|
+
presetToUse = "veo";
|
|
230
360
|
}
|
|
231
|
-
}
|
|
232
361
|
|
|
233
|
-
|
|
234
|
-
args.
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
const instances: any[] = [
|
|
238
|
-
{
|
|
239
|
-
prompt: args.prompt,
|
|
240
|
-
...(imagePart || {}),
|
|
241
|
-
...(lastFramePart || {}),
|
|
242
|
-
...(referenceImages ? { referenceImages } : {}),
|
|
243
|
-
},
|
|
244
|
-
];
|
|
245
|
-
|
|
246
|
-
const parameters: any = {
|
|
247
|
-
aspectRatio: args.aspect_ratio || "9:16",
|
|
248
|
-
durationSeconds: durationSeconds,
|
|
249
|
-
resolution: args.resolution || "720p",
|
|
250
|
-
negativePrompt: args.negative_prompt,
|
|
251
|
-
generateAudio: args.generate_audio || false,
|
|
252
|
-
personGeneration,
|
|
253
|
-
};
|
|
362
|
+
// Disable enhancement if auto_enhance is explicitly false
|
|
363
|
+
if (args.auto_enhance === false) {
|
|
364
|
+
presetToUse = undefined;
|
|
365
|
+
}
|
|
254
366
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
367
|
+
if (presetToUse && args.prompt) {
|
|
368
|
+
// Use LLM-based enhancement for 'veo' preset
|
|
369
|
+
if (presetToUse === "veo") {
|
|
370
|
+
const { enhancePromptWithLLM, isLLMEnhancerAvailable } =
|
|
371
|
+
await import("../utils/llm-prompt-enhancer");
|
|
372
|
+
|
|
373
|
+
if (isLLMEnhancerAvailable()) {
|
|
374
|
+
if (context?.streamContent) {
|
|
375
|
+
await context.streamContent({
|
|
376
|
+
type: "text" as const,
|
|
377
|
+
text: `[VEO] Enhancing prompt with Gemini for optimal Veo 3.1 generation...`,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
263
380
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
381
|
+
try {
|
|
382
|
+
enhancedPrompt = await enhancePromptWithLLM(args.prompt, "veo");
|
|
383
|
+
context?.log?.info(
|
|
384
|
+
`LLM-enhanced prompt for Veo: "${args.prompt}" → "${enhancedPrompt}"`
|
|
385
|
+
);
|
|
386
|
+
|
|
387
|
+
if (context?.streamContent) {
|
|
388
|
+
await context.streamContent({
|
|
389
|
+
type: "text" as const,
|
|
390
|
+
text: `[VEO] ✓ Prompt enhanced. Length: ${args.prompt.length} → ${enhancedPrompt.length} chars`,
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
} catch (err: any) {
|
|
394
|
+
context?.log?.info(
|
|
395
|
+
`LLM enhancement failed, using original: ${err.message}`
|
|
396
|
+
);
|
|
397
|
+
}
|
|
398
|
+
} else {
|
|
399
|
+
context?.log?.info(
|
|
400
|
+
"GEMINI_API_KEY not set, skipping Veo LLM enhancement"
|
|
401
|
+
);
|
|
402
|
+
}
|
|
403
|
+
} else {
|
|
404
|
+
// Fall back to static string-based enhancement for other presets
|
|
405
|
+
const enhancer = resolveEnhancer(presetToUse);
|
|
406
|
+
if (enhancer.hasTransformations()) {
|
|
407
|
+
enhancedPrompt = enhancer.enhance(args.prompt);
|
|
408
|
+
// Apply negative elements if not already set
|
|
409
|
+
const negatives = enhancer.getNegativeElements();
|
|
410
|
+
if (negatives && !enhancedNegativePrompt) {
|
|
411
|
+
enhancedNegativePrompt = negatives;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
const instances: any[] = [
|
|
418
|
+
{
|
|
419
|
+
prompt: enhancedPrompt,
|
|
420
|
+
...(imagePart || {}),
|
|
421
|
+
...(lastFramePart || {}),
|
|
422
|
+
...(referenceImages ? { referenceImages } : {}),
|
|
423
|
+
},
|
|
424
|
+
];
|
|
425
|
+
|
|
426
|
+
const parameters: any = {
|
|
427
|
+
aspectRatio: args.aspect_ratio || "9:16",
|
|
428
|
+
durationSeconds: durationSeconds,
|
|
429
|
+
resolution: args.resolution || "720p",
|
|
430
|
+
negativePrompt: enhancedNegativePrompt,
|
|
431
|
+
generateAudio: args.generate_audio || false,
|
|
432
|
+
personGeneration,
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
const res = await fetch(url, {
|
|
436
|
+
method: "POST",
|
|
437
|
+
headers: {
|
|
438
|
+
Authorization: `Bearer ${token}`,
|
|
439
|
+
"Content-Type": "application/json",
|
|
440
|
+
},
|
|
441
|
+
body: JSON.stringify({ instances, parameters }),
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
if (!res.ok) {
|
|
445
|
+
const text = await res.text();
|
|
446
|
+
throw new Error(`Vertex request failed: ${res.status} ${text}`);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const op = (await res.json()) as any;
|
|
450
|
+
operationName = op.name || op.operation || "";
|
|
451
|
+
current = op;
|
|
267
452
|
}
|
|
268
453
|
|
|
269
|
-
|
|
270
|
-
const name: string = op.name || op.operation || "";
|
|
271
|
-
if (!name) {
|
|
454
|
+
if (!operationName) {
|
|
272
455
|
throw new Error(
|
|
273
456
|
"Vertex did not return an operation name for long-running request"
|
|
274
457
|
);
|
|
275
458
|
}
|
|
276
459
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
460
|
+
// Extract just the operation UUID from the full path for a cleaner resume_id
|
|
461
|
+
// Full path: projects/.../operations/<uuid>
|
|
462
|
+
const operationUuid = operationName.split("/").pop() || operationName;
|
|
463
|
+
|
|
464
|
+
// Stream the resume_id to the LLM immediately (before polling starts)
|
|
465
|
+
// This way the LLM has it even if MCP client times out during polling
|
|
466
|
+
if (context?.streamContent) {
|
|
467
|
+
const isResume = !!args.resume_id;
|
|
468
|
+
await context.streamContent({
|
|
469
|
+
type: "text" as const,
|
|
470
|
+
text: isResume
|
|
471
|
+
? `[Vertex] Resuming status check for job: ${operationUuid}`
|
|
472
|
+
: `[Vertex] Video generation started. resume_id: ${operationUuid} (use this to check status if needed)`,
|
|
473
|
+
});
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Poll for status - keep polling until done
|
|
477
|
+
// Resume_id was already streamed, so if MCP client times out the LLM still has it
|
|
478
|
+
let done = current ? !!current.done || !!current.response : false;
|
|
479
|
+
const startTime = Date.now();
|
|
480
|
+
const MAX_POLL_TIME = 600000; // 10 minutes - full tool timeout is 20 mins
|
|
481
|
+
|
|
482
|
+
while (!done && Date.now() - startTime < MAX_POLL_TIME) {
|
|
483
|
+
await wait(10000); // 10 second intervals
|
|
280
484
|
|
|
281
|
-
// Poll using fetchPredictOperation as per Vertex recommendation
|
|
282
|
-
const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
|
|
283
|
-
while (!done && tries < 60) {
|
|
284
|
-
await wait(10000);
|
|
285
485
|
const poll = await fetch(fetchUrl, {
|
|
286
486
|
method: "POST",
|
|
287
487
|
headers: {
|
|
288
488
|
Authorization: `Bearer ${token}`,
|
|
289
489
|
"Content-Type": "application/json",
|
|
290
490
|
},
|
|
291
|
-
body: JSON.stringify({ operationName
|
|
491
|
+
body: JSON.stringify({ operationName }),
|
|
292
492
|
});
|
|
293
493
|
if (!poll.ok) {
|
|
294
494
|
const text = await poll.text();
|
|
@@ -298,7 +498,37 @@ export const imageToVideo = {
|
|
|
298
498
|
}
|
|
299
499
|
current = (await poll.json()) as any;
|
|
300
500
|
done = !!current.done || !!current.response;
|
|
301
|
-
|
|
501
|
+
|
|
502
|
+
if (context?.reportProgress) {
|
|
503
|
+
const elapsed = Date.now() - startTime;
|
|
504
|
+
const progressPercent = Math.min(
|
|
505
|
+
Math.round((elapsed / MAX_POLL_TIME) * 100),
|
|
506
|
+
99
|
|
507
|
+
);
|
|
508
|
+
await context.reportProgress({
|
|
509
|
+
progress: progressPercent,
|
|
510
|
+
total: 100,
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
if (context?.streamContent && !done) {
|
|
515
|
+
await context.streamContent({
|
|
516
|
+
type: "text" as const,
|
|
517
|
+
text: `[Vertex] Still processing... (${Math.round(
|
|
518
|
+
(Date.now() - startTime) / 1000
|
|
519
|
+
)}s elapsed)`,
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (!done) {
|
|
525
|
+
return JSON.stringify({
|
|
526
|
+
status: "IN_PROGRESS",
|
|
527
|
+
request_id: operationName,
|
|
528
|
+
resume_id: operationName,
|
|
529
|
+
message:
|
|
530
|
+
"Still in progress. Call this tool again with resume_id to continue checking.",
|
|
531
|
+
});
|
|
302
532
|
}
|
|
303
533
|
|
|
304
534
|
const resp = current.response || current;
|
|
@@ -356,7 +586,7 @@ export const imageToVideo = {
|
|
|
356
586
|
const tail50 = jsonStr
|
|
357
587
|
? jsonStr.slice(Math.max(0, jsonStr.length - 50))
|
|
358
588
|
: "";
|
|
359
|
-
return `Vertex operation done but no videos array present. operationName=${
|
|
589
|
+
return `Vertex operation done but no videos array present. operationName=${operationName}. json_head150=${head150} json_tail50=${tail50}`;
|
|
360
590
|
}, "imageToVideo");
|
|
361
591
|
},
|
|
362
592
|
};
|