@vibeframe/cli 0.27.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agent/adapters/index.d.ts +1 -0
  3. package/dist/agent/adapters/index.d.ts.map +1 -1
  4. package/dist/agent/adapters/index.js +5 -0
  5. package/dist/agent/adapters/index.js.map +1 -1
  6. package/dist/agent/adapters/openrouter.d.ts +16 -0
  7. package/dist/agent/adapters/openrouter.d.ts.map +1 -0
  8. package/dist/agent/adapters/openrouter.js +100 -0
  9. package/dist/agent/adapters/openrouter.js.map +1 -0
  10. package/dist/agent/types.d.ts +1 -1
  11. package/dist/agent/types.d.ts.map +1 -1
  12. package/dist/commands/agent.d.ts.map +1 -1
  13. package/dist/commands/agent.js +3 -1
  14. package/dist/commands/agent.js.map +1 -1
  15. package/dist/commands/ai-edit-cli.d.ts.map +1 -1
  16. package/dist/commands/ai-edit-cli.js +18 -0
  17. package/dist/commands/ai-edit-cli.js.map +1 -1
  18. package/dist/commands/generate.js +14 -0
  19. package/dist/commands/generate.js.map +1 -1
  20. package/dist/commands/schema.d.ts +1 -0
  21. package/dist/commands/schema.d.ts.map +1 -1
  22. package/dist/commands/schema.js +122 -21
  23. package/dist/commands/schema.js.map +1 -1
  24. package/dist/commands/setup.js +5 -2
  25. package/dist/commands/setup.js.map +1 -1
  26. package/dist/config/schema.d.ts +2 -1
  27. package/dist/config/schema.d.ts.map +1 -1
  28. package/dist/config/schema.js +2 -0
  29. package/dist/config/schema.js.map +1 -1
  30. package/dist/index.js +0 -0
  31. package/package.json +16 -12
  32. package/.turbo/turbo-build.log +0 -4
  33. package/.turbo/turbo-lint.log +0 -21
  34. package/.turbo/turbo-test.log +0 -689
  35. package/src/agent/adapters/claude.ts +0 -143
  36. package/src/agent/adapters/gemini.ts +0 -159
  37. package/src/agent/adapters/index.ts +0 -61
  38. package/src/agent/adapters/ollama.ts +0 -231
  39. package/src/agent/adapters/openai.ts +0 -116
  40. package/src/agent/adapters/xai.ts +0 -119
  41. package/src/agent/index.ts +0 -251
  42. package/src/agent/memory/index.ts +0 -151
  43. package/src/agent/prompts/system.ts +0 -106
  44. package/src/agent/tools/ai-editing.ts +0 -845
  45. package/src/agent/tools/ai-generation.ts +0 -1073
  46. package/src/agent/tools/ai-pipeline.ts +0 -1055
  47. package/src/agent/tools/ai.ts +0 -21
  48. package/src/agent/tools/batch.ts +0 -429
  49. package/src/agent/tools/e2e.test.ts +0 -545
  50. package/src/agent/tools/export.ts +0 -184
  51. package/src/agent/tools/filesystem.ts +0 -237
  52. package/src/agent/tools/index.ts +0 -150
  53. package/src/agent/tools/integration.test.ts +0 -775
  54. package/src/agent/tools/media.ts +0 -697
  55. package/src/agent/tools/project.ts +0 -313
  56. package/src/agent/tools/timeline.ts +0 -951
  57. package/src/agent/types.ts +0 -68
  58. package/src/commands/agent.ts +0 -340
  59. package/src/commands/ai-analyze.ts +0 -429
  60. package/src/commands/ai-animated-caption.ts +0 -390
  61. package/src/commands/ai-audio.ts +0 -941
  62. package/src/commands/ai-broll.ts +0 -490
  63. package/src/commands/ai-edit-cli.ts +0 -658
  64. package/src/commands/ai-edit.ts +0 -1542
  65. package/src/commands/ai-fill-gaps.ts +0 -566
  66. package/src/commands/ai-helpers.ts +0 -65
  67. package/src/commands/ai-highlights.ts +0 -1303
  68. package/src/commands/ai-image.ts +0 -761
  69. package/src/commands/ai-motion.ts +0 -347
  70. package/src/commands/ai-narrate.ts +0 -451
  71. package/src/commands/ai-review.ts +0 -309
  72. package/src/commands/ai-script-pipeline-cli.ts +0 -1710
  73. package/src/commands/ai-script-pipeline.ts +0 -1365
  74. package/src/commands/ai-suggest-edit.ts +0 -264
  75. package/src/commands/ai-video-fx.ts +0 -445
  76. package/src/commands/ai-video.ts +0 -915
  77. package/src/commands/ai-viral.ts +0 -595
  78. package/src/commands/ai-visual-fx.ts +0 -601
  79. package/src/commands/ai.test.ts +0 -627
  80. package/src/commands/ai.ts +0 -307
  81. package/src/commands/analyze.ts +0 -282
  82. package/src/commands/audio.ts +0 -644
  83. package/src/commands/batch.test.ts +0 -279
  84. package/src/commands/batch.ts +0 -440
  85. package/src/commands/detect.ts +0 -329
  86. package/src/commands/doctor.ts +0 -237
  87. package/src/commands/edit-cmd.ts +0 -1014
  88. package/src/commands/export.ts +0 -918
  89. package/src/commands/generate.ts +0 -2146
  90. package/src/commands/media.ts +0 -177
  91. package/src/commands/output.ts +0 -142
  92. package/src/commands/pipeline.ts +0 -398
  93. package/src/commands/project.test.ts +0 -127
  94. package/src/commands/project.ts +0 -149
  95. package/src/commands/sanitize.ts +0 -60
  96. package/src/commands/schema.ts +0 -130
  97. package/src/commands/setup.ts +0 -509
  98. package/src/commands/timeline.test.ts +0 -499
  99. package/src/commands/timeline.ts +0 -529
  100. package/src/commands/validate.ts +0 -77
  101. package/src/config/config.test.ts +0 -197
  102. package/src/config/index.ts +0 -125
  103. package/src/config/schema.ts +0 -82
  104. package/src/engine/index.ts +0 -2
  105. package/src/engine/project.test.ts +0 -702
  106. package/src/engine/project.ts +0 -439
  107. package/src/index.ts +0 -146
  108. package/src/utils/api-key.test.ts +0 -41
  109. package/src/utils/api-key.ts +0 -247
  110. package/src/utils/audio.ts +0 -83
  111. package/src/utils/exec-safe.ts +0 -75
  112. package/src/utils/first-run.ts +0 -52
  113. package/src/utils/provider-resolver.ts +0 -56
  114. package/src/utils/remotion.ts +0 -951
  115. package/src/utils/subtitle.test.ts +0 -227
  116. package/src/utils/subtitle.ts +0 -169
  117. package/src/utils/tty.ts +0 -196
  118. package/tsconfig.json +0 -20
@@ -1,1055 +0,0 @@
1
- /**
2
- * @module ai-pipeline
3
- * @description Agent tools for advanced multi-step AI pipelines (script-to-video,
4
- * highlights, auto-shorts, analysis, editing, regeneration). Orchestrates
5
- * multiple AI providers via execute functions from CLI commands.
6
- *
7
- * ## Tools: pipeline_script_to_video, pipeline_highlights, pipeline_auto_shorts, analyze_video,
8
- * analyze_media, edit_image, pipeline_regenerate_scene
9
- * ## Dependencies: Claude, Gemini, OpenAI, Whisper, ElevenLabs, Kling
10
- * @see MODELS.md for the Single Source of Truth (SSOT) on supported providers/models
11
- */
12
-
13
- import { writeFile, readFile } from "node:fs/promises";
14
- import { resolve } from "node:path";
15
- import type { ToolRegistry, ToolHandler } from "./index.js";
16
- import type { ToolDefinition, ToolResult } from "../types.js";
17
- import { getApiKeyFromConfig } from "../../config/index.js";
18
- import {
19
- executeScriptToVideo,
20
- executeRegenerateScene,
21
- } from "../../commands/ai-script-pipeline.js";
22
- import {
23
- executeAnimatedCaption,
24
- type AnimatedCaptionStyle,
25
- } from "../../commands/ai-animated-caption.js";
26
- import {
27
- executeHighlights,
28
- executeAutoShorts,
29
- } from "../../commands/ai-highlights.js";
30
- import {
31
- executeGeminiVideo,
32
- executeAnalyze,
33
- } from "../../commands/ai-analyze.js";
34
- import { sanitizeAIResult } from "../../commands/sanitize.js";
35
-
36
- // Helper to get timestamp for filenames
37
- function getTimestamp(): string {
38
- return Date.now().toString();
39
- }
40
-
41
- // ============================================================================
42
- // Tool Definitions
43
- // ============================================================================
44
-
45
- const scriptToVideoDef: ToolDefinition = {
46
- name: "pipeline_script_to_video",
47
- description:
48
- "Generate complete video from text script. Full pipeline: storyboard (Claude/OpenAI/Gemini) → ElevenLabs TTS → Image gen (DALL-E/Gemini) → Video gen (Runway/Kling). Creates project file with all assets.",
49
- parameters: {
50
- type: "object",
51
- properties: {
52
- script: {
53
- type: "string",
54
- description: "Script text for video (e.g., 'Product introduction. Feature showcase. Call to action.')",
55
- },
56
- outputDir: {
57
- type: "string",
58
- description: "Output directory for assets (default: script-video-output)",
59
- },
60
- duration: {
61
- type: "number",
62
- description: "Target total duration in seconds",
63
- },
64
- voice: {
65
- type: "string",
66
- description: "ElevenLabs voice ID for narration",
67
- },
68
- generator: {
69
- type: "string",
70
- description: "Video generator to use",
71
- enum: ["runway", "kling"],
72
- },
73
- imageProvider: {
74
- type: "string",
75
- description: "Image provider to use",
76
- enum: ["openai", "gemini", "grok"],
77
- },
78
- aspectRatio: {
79
- type: "string",
80
- description: "Aspect ratio for output",
81
- enum: ["16:9", "9:16", "1:1"],
82
- },
83
- imagesOnly: {
84
- type: "boolean",
85
- description: "Generate images only, skip video generation",
86
- },
87
- noVoiceover: {
88
- type: "boolean",
89
- description: "Skip voiceover generation",
90
- },
91
- retries: {
92
- type: "number",
93
- description: "Number of retries for video generation failures (default: 2)",
94
- },
95
- creativity: {
96
- type: "string",
97
- description: "Creativity level for storyboard: 'low' (default, consistent scenes) or 'high' (varied, unexpected scenes)",
98
- enum: ["low", "high"],
99
- },
100
- storyboardProvider: {
101
- type: "string",
102
- description: "Provider for storyboard generation: 'claude' (default), 'openai', or 'gemini'",
103
- enum: ["claude", "openai", "gemini"],
104
- },
105
- },
106
- required: ["script"],
107
- },
108
- };
109
-
110
- const highlightsDef: ToolDefinition = {
111
- name: "pipeline_highlights",
112
- description:
113
- "Extract highlights from long-form video/audio content. Uses Whisper+Claude or Gemini Video Understanding to find engaging moments. Returns timestamps and can create highlight reel project.",
114
- parameters: {
115
- type: "object",
116
- properties: {
117
- media: {
118
- type: "string",
119
- description: "Video or audio file path",
120
- },
121
- output: {
122
- type: "string",
123
- description: "Output JSON file path for highlights data",
124
- },
125
- project: {
126
- type: "string",
127
- description: "Create a VibeFrame project file with highlight clips",
128
- },
129
- duration: {
130
- type: "number",
131
- description: "Target highlight reel duration in seconds",
132
- },
133
- count: {
134
- type: "number",
135
- description: "Maximum number of highlights to extract",
136
- },
137
- threshold: {
138
- type: "number",
139
- description: "Confidence threshold (0-1, default: 0.7)",
140
- },
141
- criteria: {
142
- type: "string",
143
- description: "Selection criteria for highlights",
144
- enum: ["emotional", "informative", "funny", "all"],
145
- },
146
- language: {
147
- type: "string",
148
- description: "Language code for transcription (e.g., en, ko)",
149
- },
150
- useGemini: {
151
- type: "boolean",
152
- description: "Use Gemini Video Understanding for visual+audio analysis (recommended for video)",
153
- },
154
- lowRes: {
155
- type: "boolean",
156
- description: "Use low resolution mode for longer videos (Gemini only)",
157
- },
158
- },
159
- required: ["media"],
160
- },
161
- };
162
-
163
- const autoShortsDef: ToolDefinition = {
164
- name: "pipeline_auto_shorts",
165
- description:
166
- "Auto-generate vertical shorts from long-form video. Finds viral-worthy moments, crops to vertical format, and exports as separate short videos. Perfect for TikTok, YouTube Shorts, Instagram Reels.",
167
- parameters: {
168
- type: "object",
169
- properties: {
170
- video: {
171
- type: "string",
172
- description: "Input video file path",
173
- },
174
- outputDir: {
175
- type: "string",
176
- description: "Output directory for generated shorts",
177
- },
178
- duration: {
179
- type: "number",
180
- description: "Target duration for each short (15-60 seconds, default: 60)",
181
- },
182
- count: {
183
- type: "number",
184
- description: "Number of shorts to generate (default: 1)",
185
- },
186
- aspect: {
187
- type: "string",
188
- description: "Aspect ratio for shorts",
189
- enum: ["9:16", "1:1"],
190
- },
191
- addCaptions: {
192
- type: "boolean",
193
- description: "Add auto-generated captions",
194
- },
195
- captionStyle: {
196
- type: "string",
197
- description: "Caption style",
198
- enum: ["minimal", "bold", "animated"],
199
- },
200
- analyzeOnly: {
201
- type: "boolean",
202
- description: "Show detected segments without generating videos",
203
- },
204
- language: {
205
- type: "string",
206
- description: "Language code for transcription",
207
- },
208
- useGemini: {
209
- type: "boolean",
210
- description: "Use Gemini Video Understanding for enhanced visual+audio analysis",
211
- },
212
- lowRes: {
213
- type: "boolean",
214
- description: "Use low resolution mode for longer videos (Gemini only)",
215
- },
216
- },
217
- required: ["video"],
218
- },
219
- };
220
-
221
- const geminiVideoDef: ToolDefinition = {
222
- name: "analyze_video",
223
- description:
224
- "Analyze video using Gemini Video Understanding. Supports video summarization, Q&A, content extraction, and timestamp analysis. Works with local files and YouTube URLs.",
225
- parameters: {
226
- type: "object",
227
- properties: {
228
- source: {
229
- type: "string",
230
- description: "Video file path or YouTube URL",
231
- },
232
- prompt: {
233
- type: "string",
234
- description: "Analysis prompt (e.g., 'Summarize this video', 'What happens at 2:30?')",
235
- },
236
- model: {
237
- type: "string",
238
- description: "Gemini model to use",
239
- enum: ["flash", "flash-2.5", "pro"],
240
- },
241
- fps: {
242
- type: "number",
243
- description: "Frames per second for sampling (default: 1, higher for action)",
244
- },
245
- start: {
246
- type: "number",
247
- description: "Start offset in seconds",
248
- },
249
- end: {
250
- type: "number",
251
- description: "End offset in seconds",
252
- },
253
- lowRes: {
254
- type: "boolean",
255
- description: "Use low resolution mode (fewer tokens, longer videos)",
256
- },
257
- },
258
- required: ["source", "prompt"],
259
- },
260
- };
261
-
262
- const analyzeDef: ToolDefinition = {
263
- name: "analyze_media",
264
- description:
265
- "Analyze any media using Gemini: images, videos, or YouTube URLs. Auto-detects source type. Use for image description, video summarization, Q&A, content extraction, and comparison analysis.",
266
- parameters: {
267
- type: "object",
268
- properties: {
269
- source: {
270
- type: "string",
271
- description: "Image/video file path, image URL (http...*.png/jpg/webp), or YouTube URL",
272
- },
273
- prompt: {
274
- type: "string",
275
- description: "Analysis prompt (e.g., 'Describe this image', 'Summarize this video', 'What happens at 2:30?')",
276
- },
277
- model: {
278
- type: "string",
279
- description: "Gemini model to use",
280
- enum: ["flash", "flash-2.5", "pro"],
281
- },
282
- fps: {
283
- type: "number",
284
- description: "Frames per second for video sampling (default: 1)",
285
- },
286
- start: {
287
- type: "number",
288
- description: "Start offset in seconds (video only)",
289
- },
290
- end: {
291
- type: "number",
292
- description: "End offset in seconds (video only)",
293
- },
294
- lowRes: {
295
- type: "boolean",
296
- description: "Use low resolution mode (fewer tokens, longer videos/larger images)",
297
- },
298
- },
299
- required: ["source", "prompt"],
300
- },
301
- };
302
-
303
- const editImageDef: ToolDefinition = {
304
- name: "edit_image",
305
- description:
306
- "Edit images using AI. Supports Gemini (up to 14 images, default), OpenAI GPT Image 1.5 (up to 16 images), or Grok Imagine (1 image). Use for image editing, style transfer, or multi-image composition.",
307
- parameters: {
308
- type: "object",
309
- properties: {
310
- images: {
311
- type: "array",
312
- items: { type: "string", description: "Image file path" },
313
- description: "Input image file paths",
314
- },
315
- prompt: {
316
- type: "string",
317
- description: "Edit instruction (e.g., 'change background to sunset', 'combine these images into a collage')",
318
- },
319
- output: {
320
- type: "string",
321
- description: "Output file path (default: edited-{timestamp}.png)",
322
- },
323
- provider: {
324
- type: "string",
325
- description: "Provider: gemini (default, up to 14 images), openai (up to 16 images), grok (1 image)",
326
- enum: ["gemini", "openai", "grok"],
327
- },
328
- model: {
329
- type: "string",
330
- description: "Model to use (Gemini only): flash (max 3 images, fast) or pro (max 14 images, higher quality)",
331
- enum: ["flash", "pro"],
332
- },
333
- aspectRatio: {
334
- type: "string",
335
- description: "Output aspect ratio",
336
- enum: ["1:1", "16:9", "9:16", "3:4", "4:3", "3:2", "2:3", "21:9"],
337
- },
338
- resolution: {
339
- type: "string",
340
- description: "Output resolution (Gemini Pro model only): 1K, 2K, 4K",
341
- enum: ["1K", "2K", "4K"],
342
- },
343
- },
344
- required: ["images", "prompt"],
345
- },
346
- };
347
-
348
- const regenerateSceneDef: ToolDefinition = {
349
- name: "pipeline_regenerate_scene",
350
- description: `Regenerate specific scene(s) in a script-to-video project.
351
-
352
- RECOMMENDED WORKFLOW:
353
- 1. FIRST use fs_read to read storyboard.json in the project directory
354
- 2. Tell the user what scene(s) they're about to regenerate (show visuals, narration, duration)
355
- 3. THEN use this tool to regenerate
356
-
357
- This tool re-creates videos for failed scenes using image-to-video (if ImgBB key available) or text-to-video. When regenerating images, uses reference-based generation for character consistency.`,
358
- parameters: {
359
- type: "object",
360
- properties: {
361
- projectDir: {
362
- type: "string",
363
- description: "Path to the script-to-video output directory (e.g., ./tiktok/)",
364
- },
365
- scenes: {
366
- type: "array",
367
- items: { type: "number", description: "Scene number (1-based)" },
368
- description: "Scene numbers to regenerate (1-based), e.g., [3, 4, 5]",
369
- },
370
- videoOnly: {
371
- type: "boolean",
372
- description: "Only regenerate videos, not images or narration (default: true)",
373
- },
374
- imageOnly: {
375
- type: "boolean",
376
- description: "Only regenerate images, not videos or narration",
377
- },
378
- generator: {
379
- type: "string",
380
- description: "Video generator: kling or runway",
381
- enum: ["kling", "runway"],
382
- },
383
- aspectRatio: {
384
- type: "string",
385
- description: "Aspect ratio for videos",
386
- enum: ["16:9", "9:16", "1:1"],
387
- },
388
- referenceScene: {
389
- type: "number",
390
- description: "Scene number to use as reference for character consistency when regenerating images (auto-detects if not specified)",
391
- },
392
- },
393
- required: ["projectDir", "scenes"],
394
- },
395
- };
396
-
397
- const animatedCaptionDef: ToolDefinition = {
398
- name: "pipeline_animated_caption",
399
- description:
400
- "Add animated word-by-word captions to video. Styles: highlight (TikTok-style), bounce, pop-in, neon (Remotion), karaoke-sweep, typewriter (ASS/fast). Requires OPENAI_API_KEY for Whisper.",
401
- parameters: {
402
- type: "object",
403
- properties: {
404
- videoPath: {
405
- type: "string",
406
- description: "Input video file path",
407
- },
408
- outputPath: {
409
- type: "string",
410
- description: "Output video file path",
411
- },
412
- style: {
413
- type: "string",
414
- description: "Caption animation style",
415
- enum: ["highlight", "bounce", "pop-in", "neon", "karaoke-sweep", "typewriter"],
416
- },
417
- highlightColor: {
418
- type: "string",
419
- description: "Active word highlight color (default: #FFFF00)",
420
- },
421
- fontSize: {
422
- type: "number",
423
- description: "Font size in pixels (default: auto based on resolution)",
424
- },
425
- position: {
426
- type: "string",
427
- description: "Caption position",
428
- enum: ["top", "center", "bottom"],
429
- },
430
- wordsPerGroup: {
431
- type: "number",
432
- description: "Words shown at once (default: auto 3-5)",
433
- },
434
- language: {
435
- type: "string",
436
- description: "Whisper language hint (e.g., en, ko)",
437
- },
438
- fast: {
439
- type: "boolean",
440
- description: "Use ASS/FFmpeg only (no Remotion, forces ASS tier styles)",
441
- },
442
- },
443
- required: ["videoPath", "outputPath"],
444
- },
445
- };
446
-
447
- // ============================================================================
448
- // Tool Handlers
449
- // ============================================================================
450
-
451
- const scriptToVideoHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
452
- const script = args.script as string;
453
- const outputDir = args.outputDir
454
- ? resolve(context.workingDirectory, args.outputDir as string)
455
- : resolve(context.workingDirectory, "script-video-output");
456
-
457
- try {
458
- const result = await executeScriptToVideo({
459
- script,
460
- outputDir,
461
- duration: args.duration as number | undefined,
462
- voice: args.voice as string | undefined,
463
- generator: args.generator as "runway" | "kling" | undefined,
464
- imageProvider: args.imageProvider as "openai" | "gemini" | undefined,
465
- aspectRatio: args.aspectRatio as "16:9" | "9:16" | "1:1" | undefined,
466
- imagesOnly: args.imagesOnly as boolean | undefined,
467
- noVoiceover: args.noVoiceover as boolean | undefined,
468
- retries: args.retries as number | undefined,
469
- creativity: args.creativity as "low" | "high" | undefined,
470
- storyboardProvider: args.storyboardProvider as "claude" | "openai" | "gemini" | undefined,
471
- });
472
-
473
- if (!result.success) {
474
- return {
475
- toolCallId: "",
476
- success: false,
477
- output: "",
478
- error: result.error || "Script-to-video pipeline failed",
479
- };
480
- }
481
-
482
- // Build summary
483
- const lines: string[] = [
484
- `✅ Script-to-Video complete!`,
485
- ``,
486
- `📁 Output: ${result.outputDir}`,
487
- `🎬 Scenes: ${result.scenes}`,
488
- ];
489
-
490
- if (result.totalDuration) {
491
- lines.push(`⏱️ Duration: ${result.totalDuration.toFixed(1)}s`);
492
- }
493
-
494
- if (result.storyboardPath) {
495
- lines.push(`📝 Storyboard: storyboard.json`);
496
- }
497
-
498
- // Show narrations with failed count
499
- const successfulNarrations = result.narrationEntries?.filter((e) => !e.failed && e.path) || [];
500
- const failedNarrationCount = result.failedNarrations?.length || 0;
501
- if (successfulNarrations.length > 0 || failedNarrationCount > 0) {
502
- if (failedNarrationCount > 0) {
503
- lines.push(`🎙️ Narrations: ${successfulNarrations.length}/${result.scenes} (${failedNarrationCount} failed: scene ${result.failedNarrations!.join(", ")})`);
504
- } else {
505
- lines.push(`🎙️ Narrations: ${successfulNarrations.length} narration-*.mp3`);
506
- }
507
- }
508
-
509
- if (result.images && result.images.length > 0) {
510
- lines.push(`🖼️ Images: ${result.images.length} scene-*.png`);
511
- }
512
-
513
- if (result.videos && result.videos.length > 0) {
514
- lines.push(`🎥 Videos: ${result.videos.length} scene-*.mp4`);
515
- }
516
-
517
- if (result.failedScenes && result.failedScenes.length > 0) {
518
- lines.push(`⚠️ Failed video scenes: ${result.failedScenes.join(", ")}`);
519
- }
520
-
521
- if (result.projectPath) {
522
- lines.push(`📄 Project: project.vibe.json`);
523
- }
524
-
525
- return {
526
- toolCallId: "",
527
- success: true,
528
- output: lines.join("\n"),
529
- };
530
- } catch (error) {
531
- return {
532
- toolCallId: "",
533
- success: false,
534
- output: "",
535
- error: `Script-to-video failed: ${error instanceof Error ? error.message : String(error)}`,
536
- };
537
- }
538
- };
539
-
540
- const highlightsHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
541
- const media = resolve(context.workingDirectory, args.media as string);
542
- const output = args.output
543
- ? resolve(context.workingDirectory, args.output as string)
544
- : undefined;
545
- const project = args.project
546
- ? resolve(context.workingDirectory, args.project as string)
547
- : undefined;
548
-
549
- try {
550
- const result = await executeHighlights({
551
- media,
552
- output,
553
- project,
554
- duration: args.duration as number | undefined,
555
- count: args.count as number | undefined,
556
- threshold: args.threshold as number | undefined,
557
- criteria: args.criteria as "emotional" | "informative" | "funny" | "all" | undefined,
558
- language: args.language as string | undefined,
559
- useGemini: args.useGemini as boolean | undefined,
560
- lowRes: args.lowRes as boolean | undefined,
561
- });
562
-
563
- if (!result.success) {
564
- return {
565
- toolCallId: "",
566
- success: false,
567
- output: "",
568
- error: result.error || "Highlight extraction failed",
569
- };
570
- }
571
-
572
- if (result.highlights.length === 0) {
573
- return {
574
- toolCallId: "",
575
- success: true,
576
- output: "No highlights detected in the content.",
577
- };
578
- }
579
-
580
- // Build summary
581
- const lines: string[] = [
582
- `✅ Found ${result.highlights.length} highlights (${result.totalHighlightDuration.toFixed(1)}s total)`,
583
- ``,
584
- ];
585
-
586
- for (const h of result.highlights) {
587
- const startMin = Math.floor(h.startTime / 60);
588
- const startSec = (h.startTime % 60).toFixed(1);
589
- const endMin = Math.floor(h.endTime / 60);
590
- const endSec = (h.endTime % 60).toFixed(1);
591
- lines.push(`${h.index}. [${startMin}:${startSec.padStart(4, "0")} - ${endMin}:${endSec.padStart(4, "0")}] ${h.category} (${(h.confidence * 100).toFixed(0)}%)`);
592
- lines.push(` ${sanitizeAIResult(h.reason)}`);
593
- }
594
-
595
- if (result.outputPath) {
596
- lines.push(``, `💾 Saved to: ${result.outputPath}`);
597
- }
598
-
599
- if (result.projectPath) {
600
- lines.push(`📄 Project: ${result.projectPath}`);
601
- }
602
-
603
- return {
604
- toolCallId: "",
605
- success: true,
606
- output: lines.join("\n"),
607
- };
608
- } catch (error) {
609
- return {
610
- toolCallId: "",
611
- success: false,
612
- output: "",
613
- error: `Highlight extraction failed: ${error instanceof Error ? error.message : String(error)}`,
614
- };
615
- }
616
- };
617
-
618
- const autoShortsHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
619
- const video = resolve(context.workingDirectory, args.video as string);
620
- const outputDir = args.outputDir
621
- ? resolve(context.workingDirectory, args.outputDir as string)
622
- : undefined;
623
-
624
- try {
625
- const result = await executeAutoShorts({
626
- video,
627
- outputDir,
628
- duration: args.duration as number | undefined,
629
- count: args.count as number | undefined,
630
- aspect: args.aspect as "9:16" | "1:1" | undefined,
631
- addCaptions: args.addCaptions as boolean | undefined,
632
- captionStyle: args.captionStyle as "minimal" | "bold" | "animated" | undefined,
633
- analyzeOnly: args.analyzeOnly as boolean | undefined,
634
- language: args.language as string | undefined,
635
- useGemini: args.useGemini as boolean | undefined,
636
- lowRes: args.lowRes as boolean | undefined,
637
- });
638
-
639
- if (!result.success) {
640
- return {
641
- toolCallId: "",
642
- success: false,
643
- output: "",
644
- error: result.error || "Auto shorts generation failed",
645
- };
646
- }
647
-
648
- if (result.shorts.length === 0) {
649
- return {
650
- toolCallId: "",
651
- success: true,
652
- output: "No suitable shorts found in the video.",
653
- };
654
- }
655
-
656
- // Build summary
657
- const isAnalyzeOnly = args.analyzeOnly as boolean;
658
- const lines: string[] = [
659
- isAnalyzeOnly
660
- ? `📊 Found ${result.shorts.length} potential shorts:`
661
- : `✅ Generated ${result.shorts.length} short(s):`,
662
- ``,
663
- ];
664
-
665
- for (const s of result.shorts) {
666
- const startMin = Math.floor(s.startTime / 60);
667
- const startSec = (s.startTime % 60).toFixed(1);
668
- const endMin = Math.floor(s.endTime / 60);
669
- const endSec = (s.endTime % 60).toFixed(1);
670
- lines.push(`[Short ${s.index}] ${startMin}:${startSec.padStart(4, "0")} - ${endMin}:${endSec.padStart(4, "0")} (${s.duration.toFixed(1)}s)`);
671
- lines.push(` ${sanitizeAIResult(s.reason)}`);
672
- lines.push(` Confidence: ${(s.confidence * 100).toFixed(0)}%`);
673
- if (s.outputPath) {
674
- lines.push(` 📁 ${s.outputPath}`);
675
- }
676
- }
677
-
678
- return {
679
- toolCallId: "",
680
- success: true,
681
- output: lines.join("\n"),
682
- };
683
- } catch (error) {
684
- return {
685
- toolCallId: "",
686
- success: false,
687
- output: "",
688
- error: `Auto shorts failed: ${error instanceof Error ? error.message : String(error)}`,
689
- };
690
- }
691
- };
692
-
693
- const geminiVideoHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
694
- let source = args.source as string;
695
-
696
- // Resolve local paths
697
- if (!source.includes("youtube.com") && !source.includes("youtu.be")) {
698
- source = resolve(context.workingDirectory, source);
699
- }
700
-
701
- try {
702
- const result = await executeGeminiVideo({
703
- source,
704
- prompt: args.prompt as string,
705
- model: args.model as "flash" | "flash-2.5" | "pro" | undefined,
706
- fps: args.fps as number | undefined,
707
- start: args.start as number | undefined,
708
- end: args.end as number | undefined,
709
- lowRes: args.lowRes as boolean | undefined,
710
- });
711
-
712
- if (!result.success) {
713
- return {
714
- toolCallId: "",
715
- success: false,
716
- output: "",
717
- error: result.error || "Video analysis failed",
718
- };
719
- }
720
-
721
- // Build output
722
- const lines: string[] = [sanitizeAIResult(result.response || "")];
723
-
724
- if (result.model || result.totalTokens) {
725
- lines.push(``);
726
- lines.push(`---`);
727
- if (result.model) {
728
- lines.push(`Model: ${result.model}`);
729
- }
730
- if (result.totalTokens) {
731
- lines.push(`Tokens: ${result.totalTokens.toLocaleString()}`);
732
- }
733
- }
734
-
735
- return {
736
- toolCallId: "",
737
- success: true,
738
- output: lines.join("\n"),
739
- };
740
- } catch (error) {
741
- return {
742
- toolCallId: "",
743
- success: false,
744
- output: "",
745
- error: `Gemini video analysis failed: ${error instanceof Error ? error.message : String(error)}`,
746
- };
747
- }
748
- };
749
-
750
- const analyzeHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
751
- let source = args.source as string;
752
-
753
- // Resolve local paths (not URLs)
754
- if (!source.startsWith("http://") && !source.startsWith("https://")) {
755
- source = resolve(context.workingDirectory, source);
756
- }
757
-
758
- try {
759
- const result = await executeAnalyze({
760
- source,
761
- prompt: args.prompt as string,
762
- model: args.model as "flash" | "flash-2.5" | "pro" | undefined,
763
- fps: args.fps as number | undefined,
764
- start: args.start as number | undefined,
765
- end: args.end as number | undefined,
766
- lowRes: args.lowRes as boolean | undefined,
767
- });
768
-
769
- if (!result.success) {
770
- return {
771
- toolCallId: "",
772
- success: false,
773
- output: "",
774
- error: result.error || "Analysis failed",
775
- };
776
- }
777
-
778
- // Build output
779
- const lines: string[] = [`[${result.sourceType}] ${sanitizeAIResult(result.response || "")}`];
780
-
781
- if (result.model || result.totalTokens) {
782
- lines.push(``);
783
- lines.push(`---`);
784
- if (result.model) {
785
- lines.push(`Model: ${result.model}`);
786
- }
787
- if (result.totalTokens) {
788
- lines.push(`Tokens: ${result.totalTokens.toLocaleString()}`);
789
- }
790
- }
791
-
792
- return {
793
- toolCallId: "",
794
- success: true,
795
- output: lines.join("\n"),
796
- };
797
- } catch (error) {
798
- return {
799
- toolCallId: "",
800
- success: false,
801
- output: "",
802
- error: `Analysis failed: ${error instanceof Error ? error.message : String(error)}`,
803
- };
804
- }
805
- };
806
-
807
- const editImageHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
808
- const images = args.images as string[];
809
- const prompt = args.prompt as string;
810
- const output = (args.output as string) || `edited-${getTimestamp()}.png`;
811
- const provider = (args.provider as "gemini" | "openai" | "grok") || "gemini";
812
- const model = (args.model as "flash" | "pro") || "flash";
813
- const aspectRatio = args.aspectRatio as string | undefined;
814
- const resolution = args.resolution as string | undefined;
815
-
816
- try {
817
- // Provider-specific API key
818
- const apiKeyMap: Record<string, string> = {
819
- gemini: "google",
820
- openai: "openai",
821
- grok: "xai",
822
- };
823
- const apiKey = await getApiKeyFromConfig(apiKeyMap[provider] || "google");
824
- if (!apiKey) {
825
- const keyNames: Record<string, string> = {
826
- gemini: "Google (GOOGLE_API_KEY)",
827
- openai: "OpenAI (OPENAI_API_KEY)",
828
- grok: "xAI (XAI_API_KEY)",
829
- };
830
- return {
831
- toolCallId: "",
832
- success: false,
833
- output: "",
834
- error: `${keyNames[provider]} API key required. Configure via 'vibe setup'.`,
835
- };
836
- }
837
-
838
- // Validate image count per provider
839
- if (provider === "grok" && images.length > 1) {
840
- return {
841
- toolCallId: "",
842
- success: false,
843
- output: "",
844
- error: "Grok supports only 1 input image for editing. Use gemini (up to 14) or openai (up to 16) for multi-image editing.",
845
- };
846
- }
847
- if (provider === "gemini") {
848
- const maxImages = model === "pro" ? 14 : 3;
849
- if (images.length > maxImages) {
850
- return {
851
- toolCallId: "",
852
- success: false,
853
- output: "",
854
- error: `Too many images. Gemini ${model} model supports up to ${maxImages} images.`,
855
- };
856
- }
857
- }
858
- if (provider === "openai" && images.length > 16) {
859
- return {
860
- toolCallId: "",
861
- success: false,
862
- output: "",
863
- error: "OpenAI supports up to 16 input images for editing.",
864
- };
865
- }
866
-
867
- // Load all images
868
- const imageBuffers: Buffer[] = [];
869
- for (const imagePath of images) {
870
- const absPath = resolve(context.workingDirectory, imagePath);
871
- const buffer = await readFile(absPath);
872
- imageBuffers.push(buffer);
873
- }
874
-
875
- let result: import("@vibeframe/ai-providers").ImageResult;
876
- let usedProvider = provider;
877
-
878
- if (provider === "openai") {
879
- const { OpenAIImageProvider } = await import("@vibeframe/ai-providers");
880
- const openai = new OpenAIImageProvider();
881
- await openai.initialize({ apiKey });
882
- result = await openai.editImage(imageBuffers, prompt);
883
- } else if (provider === "grok") {
884
- const { GrokProvider } = await import("@vibeframe/ai-providers");
885
- const grok = new GrokProvider();
886
- await grok.initialize({ apiKey });
887
- result = await grok.editImage(imageBuffers[0], prompt, {
888
- aspectRatio: aspectRatio,
889
- });
890
- } else {
891
- const { GeminiProvider } = await import("@vibeframe/ai-providers");
892
- const gemini = new GeminiProvider();
893
- await gemini.initialize({ apiKey });
894
- result = await gemini.editImage(imageBuffers, prompt, {
895
- model,
896
- aspectRatio: aspectRatio as "1:1" | "16:9" | "9:16" | "3:4" | "4:3" | "3:2" | "2:3" | "21:9" | undefined,
897
- resolution: resolution as "1K" | "2K" | "4K" | undefined,
898
- });
899
- usedProvider = "gemini";
900
- }
901
-
902
- if (!result.success || !result.images || result.images.length === 0) {
903
- return {
904
- toolCallId: "",
905
- success: false,
906
- output: "",
907
- error: `Image editing failed: ${result.error || "No image generated"}`,
908
- };
909
- }
910
-
911
- // Save the edited image (handle both base64 and URL)
912
- const img = result.images[0];
913
- const outputPath = resolve(context.workingDirectory, output);
914
- if (img.base64) {
915
- const buffer = Buffer.from(img.base64, "base64");
916
- await writeFile(outputPath, buffer);
917
- } else if (img.url) {
918
- const resp = await fetch(img.url);
919
- const arrayBuf = await resp.arrayBuffer();
920
- await writeFile(outputPath, Buffer.from(arrayBuf));
921
- }
922
-
923
- return {
924
- toolCallId: "",
925
- success: true,
926
- output: `Image edited: ${output}\nProvider: ${usedProvider}\nInput images: ${images.length}\nPrompt: ${prompt}`,
927
- };
928
- } catch (error) {
929
- return {
930
- toolCallId: "",
931
- success: false,
932
- output: "",
933
- error: `Failed to edit image: ${error instanceof Error ? error.message : String(error)}`,
934
- };
935
- }
936
- };
937
-
938
- const regenerateSceneHandler: ToolHandler = async (args) => {
939
- const { projectDir, scenes, videoOnly, imageOnly, generator = "kling", aspectRatio = "16:9", referenceScene } = args as {
940
- projectDir: string;
941
- scenes: number[];
942
- videoOnly?: boolean;
943
- imageOnly?: boolean;
944
- generator?: "kling" | "runway";
945
- aspectRatio?: "16:9" | "9:16" | "1:1";
946
- referenceScene?: number;
947
- };
948
-
949
- if (!projectDir) {
950
- return {
951
- toolCallId: "",
952
- success: false,
953
- output: "",
954
- error: "projectDir is required",
955
- };
956
- }
957
-
958
- if (!scenes || !Array.isArray(scenes) || scenes.length === 0) {
959
- return {
960
- toolCallId: "",
961
- success: false,
962
- output: "",
963
- error: "scenes array is required (e.g., [3, 4, 5])",
964
- };
965
- }
966
-
967
- // Default to videoOnly unless imageOnly is explicitly set
968
- const effectiveVideoOnly = imageOnly ? false : (videoOnly ?? true);
969
-
970
- const result = await executeRegenerateScene({
971
- projectDir,
972
- scenes,
973
- videoOnly: effectiveVideoOnly,
974
- imageOnly,
975
- generator,
976
- aspectRatio,
977
- referenceScene,
978
- });
979
-
980
- if (!result.success) {
981
- return {
982
- toolCallId: "",
983
- success: false,
984
- output: "",
985
- error: result.error || "Scene regeneration failed",
986
- };
987
- }
988
-
989
- let output = `Regenerated ${result.regeneratedScenes.length} scene(s): ${result.regeneratedScenes.join(", ")}`;
990
- if (result.failedScenes.length > 0) {
991
- output += `\nFailed scenes: ${result.failedScenes.join(", ")}`;
992
- }
993
-
994
- return {
995
- toolCallId: "",
996
- success: true,
997
- output,
998
- };
999
- };
1000
-
1001
- const animatedCaptionHandler: ToolHandler = async (args, context): Promise<ToolResult> => {
1002
- const videoPath = resolve(context.workingDirectory, args.videoPath as string);
1003
- const outputPath = resolve(context.workingDirectory, args.outputPath as string);
1004
-
1005
- try {
1006
- const result = await executeAnimatedCaption({
1007
- videoPath,
1008
- outputPath,
1009
- style: (args.style as AnimatedCaptionStyle) || "highlight",
1010
- highlightColor: (args.highlightColor as string) || "#FFFF00",
1011
- fontSize: args.fontSize as number | undefined,
1012
- position: (args.position as "top" | "center" | "bottom") || "bottom",
1013
- wordsPerGroup: args.wordsPerGroup as number | undefined,
1014
- language: args.language as string | undefined,
1015
- fast: args.fast as boolean | undefined,
1016
- });
1017
-
1018
- if (!result.success) {
1019
- return {
1020
- toolCallId: "",
1021
- success: false,
1022
- output: "",
1023
- error: result.error || "Animated caption failed",
1024
- };
1025
- }
1026
-
1027
- return {
1028
- toolCallId: "",
1029
- success: true,
1030
- output: `✅ Animated captions applied!\n\nOutput: ${result.outputPath}\nWords: ${result.wordCount}\nGroups: ${result.groupCount}\nStyle: ${result.style}\nTier: ${result.tier}`,
1031
- };
1032
- } catch (error) {
1033
- return {
1034
- toolCallId: "",
1035
- success: false,
1036
- output: "",
1037
- error: `Animated caption failed: ${error instanceof Error ? error.message : String(error)}`,
1038
- };
1039
- }
1040
- };
1041
-
1042
- // ============================================================================
1043
- // Registration
1044
- // ============================================================================
1045
-
1046
- export function registerPipelineTools(registry: ToolRegistry): void {
1047
- registry.register(scriptToVideoDef, scriptToVideoHandler);
1048
- registry.register(highlightsDef, highlightsHandler);
1049
- registry.register(autoShortsDef, autoShortsHandler);
1050
- registry.register(geminiVideoDef, geminiVideoHandler);
1051
- registry.register(analyzeDef, analyzeHandler);
1052
- registry.register(editImageDef, editImageHandler);
1053
- registry.register(regenerateSceneDef, regenerateSceneHandler);
1054
- registry.register(animatedCaptionDef, animatedCaptionHandler);
1055
- }