shmakk 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -2
- package/package.json +2 -2
- package/scripts/demo/record.py +196 -0
- package/scripts/demo/scenes.html +913 -0
- package/skills/media-video-compose.md +320 -0
- package/skills/media-video-script.md +204 -0
- package/skills/media-video-voice.md +184 -0
- package/src/agent-overview.js +320 -0
- package/src/agent-roster.js +53 -0
- package/src/agent.js +178 -18
- package/src/cli.js +193 -86
- package/src/completions.js +3 -1
- package/src/correction.js +11 -4
- package/src/endpoints.js +94 -31
- package/src/guard.js +101 -0
- package/src/index.js +19 -5
- package/src/llm.js +462 -52
- package/src/markdown.js +217 -0
- package/src/notify.js +34 -0
- package/src/pty.js +1 -1
- package/src/review.js +8 -1
- package/src/self-commands.js +108 -2
- package/src/session.js +58 -2
- package/src/subagent.js +12 -1
- package/src/taskClassifier.js +2 -2
- package/src/team.js +22 -0
- package/src/tools.js +408 -1
- package/src/workflows.js +32 -0
package/src/tools.js
CHANGED
|
@@ -9,6 +9,27 @@ const { webSearch, fetchUrl } = require('./web');
|
|
|
9
9
|
const { dispatchBrowser, classifyBrowserCommand } = require('./browser');
|
|
10
10
|
const { recordEdit } = require('./edit-tracker');
|
|
11
11
|
const { appendMemory } = require('./memory');
|
|
12
|
+
const { isMutationTool, hashArgs } = require('./guard');
|
|
13
|
+
const https = require('https');
|
|
14
|
+
const http = require('http');
|
|
15
|
+
const os = require('os');
|
|
16
|
+
|
|
17
|
+
// Lazy-load TTS (kokoro-js is an optional dep; only required when
|
|
18
|
+
// tts_generate is actually called).
|
|
19
|
+
let _ttsGenerate = null;
|
|
20
|
+
function _getTtsGenerate() {
|
|
21
|
+
if (_ttsGenerate) return _ttsGenerate;
|
|
22
|
+
try {
|
|
23
|
+
({ generate: _ttsGenerate } = require('./services/tts'));
|
|
24
|
+
} catch (e) {
|
|
25
|
+
throw new Error(
|
|
26
|
+
'TTS dependencies not installed. Run: npm run setup:voice\n' +
|
|
27
|
+
'Or: npm install --include=optional\n' +
|
|
28
|
+
`Details: ${e.message}`,
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
return _ttsGenerate;
|
|
32
|
+
}
|
|
12
33
|
|
|
13
34
|
const MAX_FILE_BYTES = 64 * 1024;
|
|
14
35
|
|
|
@@ -149,6 +170,65 @@ const TOOLS = [
|
|
|
149
170
|
},
|
|
150
171
|
},
|
|
151
172
|
}},
|
|
173
|
+
{ type: 'function', function: {
|
|
174
|
+
name: 'image_gen',
|
|
175
|
+
description: 'Generate an image from a text prompt using OpenAI DALL-E. The image is saved to disk and the file path is returned. Requires SHMAKK_OPENAI_API_KEY env var.',
|
|
176
|
+
parameters: {
|
|
177
|
+
type: 'object',
|
|
178
|
+
required: ['prompt'],
|
|
179
|
+
properties: {
|
|
180
|
+
prompt: { type: 'string', description: 'Text description of the image to generate' },
|
|
181
|
+
outputPath: { type: 'string', description: 'Optional output path. Defaults to a temp file.' },
|
|
182
|
+
size: { type: 'string', enum: ['1024x1024', '1792x1024', '1024x1792'], description: 'Image size. Defaults to 1024x1024.' },
|
|
183
|
+
quality: { type: 'string', enum: ['standard', 'hd'], description: 'Quality level. Defaults to standard.' },
|
|
184
|
+
style: { type: 'string', enum: ['vivid', 'natural'], description: 'Style. Defaults to vivid.' },
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
}},
|
|
188
|
+
{ type: 'function', function: {
|
|
189
|
+
name: 'tts_generate',
|
|
190
|
+
description: 'Generate speech audio from text using Kokoro TTS (local, no API key needed). Returns the audio file path and voice used.',
|
|
191
|
+
parameters: {
|
|
192
|
+
type: 'object',
|
|
193
|
+
required: ['text'],
|
|
194
|
+
properties: {
|
|
195
|
+
text: { type: 'string', description: 'Text to convert to speech' },
|
|
196
|
+
outputPath: { type: 'string', description: 'Optional WAV output path. Defaults to a temp file.' },
|
|
197
|
+
voice: { type: 'string', description: 'Voice name. Defaults to af_heart. Use list_voices tool to discover available voices.' },
|
|
198
|
+
speed: { type: 'number', description: 'Speech speed. Defaults to 1.5.' },
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
}},
|
|
202
|
+
{ type: 'function', function: {
|
|
203
|
+
name: 'video_probe',
|
|
204
|
+
description: 'Get media file metadata using ffprobe: duration, codec, resolution, frame rate, etc.',
|
|
205
|
+
parameters: {
|
|
206
|
+
type: 'object',
|
|
207
|
+
required: ['path'],
|
|
208
|
+
properties: {
|
|
209
|
+
path: { type: 'string', description: 'Path to the media file to probe' },
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
}},
|
|
213
|
+
{ type: 'function', function: {
|
|
214
|
+
name: 'video_compose',
|
|
215
|
+
description: 'Compose images, audio tracks, and transitions into a video using ffmpeg. Takes a structured timeline of segments and assembles them into a single MP4 file.',
|
|
216
|
+
parameters: {
|
|
217
|
+
type: 'object',
|
|
218
|
+
required: ['segments', 'outputPath'],
|
|
219
|
+
properties: {
|
|
220
|
+
segments: {
|
|
221
|
+
type: 'array',
|
|
222
|
+
description: 'Array of segment objects. Each segment: { imagePath: string (required), audioPath: string (required), startSec: number, durationSec: number, transition: string|null (fade/crossfade/dissolve/slide_left/slide_right/zoompan) }',
|
|
223
|
+
},
|
|
224
|
+
outputPath: { type: 'string', description: 'Output MP4 file path' },
|
|
225
|
+
width: { type: 'number', description: 'Output video width. Defaults to 1920.' },
|
|
226
|
+
height: { type: 'number', description: 'Output video height. Defaults to 1080.' },
|
|
227
|
+
fps: { type: 'number', description: 'Output frame rate. Defaults to 24.' },
|
|
228
|
+
backgroundColor: { type: 'string', description: 'Background color as hex. Defaults to #000000.' },
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
}},
|
|
152
232
|
];
|
|
153
233
|
|
|
154
234
|
// Tool safety classification.
|
|
@@ -174,6 +254,10 @@ function classifyTool(name, args, mcpManager) {
|
|
|
174
254
|
if (name === 'web_search' || name === 'fetch_url') return 'safe';
|
|
175
255
|
if (name === 'browser') return classifyBrowserCommand(args);
|
|
176
256
|
if (name === 'remember') return 'safe';
|
|
257
|
+
if (name === 'image_gen') return 'unsafe'; // external API call, costs money
|
|
258
|
+
if (name === 'tts_generate') return 'safe'; // local-only, no network
|
|
259
|
+
if (name === 'video_probe') return 'safe'; // read-only local metadata
|
|
260
|
+
if (name === 'video_compose') return 'safe'; // local ffmpeg, reads only workspace files
|
|
177
261
|
return 'uncertain';
|
|
178
262
|
}
|
|
179
263
|
|
|
@@ -199,6 +283,10 @@ function describeTool(name, args, mcpManager) {
|
|
|
199
283
|
if (cmd === 'evaluate') return `browser eval JS`;
|
|
200
284
|
return `browser ${cmd}`;
|
|
201
285
|
}
|
|
286
|
+
if (name === 'image_gen') return `image_gen: "${(args.prompt || '').slice(0, 80)}" (${args.size || '1024x1024'})`;
|
|
287
|
+
if (name === 'tts_generate') return `tts_generate: "${(args.text || '').slice(0, 80)}" (voice: ${args.voice || 'af_heart'})`;
|
|
288
|
+
if (name === 'video_probe') return `video_probe ${args.path || ''}`;
|
|
289
|
+
if (name === 'video_compose') return `video_compose ${(args.segments || []).length} segments → ${args.outputPath || ''}`;
|
|
202
290
|
return `${name} ${JSON.stringify(args).slice(0, 80)}`;
|
|
203
291
|
}
|
|
204
292
|
|
|
@@ -229,7 +317,23 @@ function runCmd(cwd, cmd, signal) {
|
|
|
229
317
|
async function dispatchTool(name, args, roots, confirmTool, signal, mcpManager) {
|
|
230
318
|
if (signal && signal.aborted) return { error: 'aborted' };
|
|
231
319
|
const safety = classifyTool(name, args, mcpManager);
|
|
232
|
-
|
|
320
|
+
|
|
321
|
+
// ── Mutation-tool approval ────────────────────────────────────────────
|
|
322
|
+
// Every mutation tool MUST have explicit, fresh user approval before
|
|
323
|
+
// execution. This check is the runtime enforcement — even if the agent
|
|
324
|
+
// loop has a bug, the tool refuses to run without valid approval.
|
|
325
|
+
if (isMutationTool(name)) {
|
|
326
|
+
if (!confirmTool) return { error: 'mutation tool requires explicit user approval (no confirmTool available)' };
|
|
327
|
+
const ok = await confirmTool({ name, args, safety, description: describeTool(name, args, mcpManager) });
|
|
328
|
+
if (!ok) {
|
|
329
|
+
try {
|
|
330
|
+
const audit = require('./audit');
|
|
331
|
+
audit.append({ kind: 'tool-denied', name, argsHash: hashArgs(args) });
|
|
332
|
+
} catch {}
|
|
333
|
+
return { error: 'user declined' };
|
|
334
|
+
}
|
|
335
|
+
} else if (confirmTool) {
|
|
336
|
+
// Non-mutation tools: still confirm, but don't enforce the same strictness.
|
|
233
337
|
const ok = await confirmTool({ name, args, safety, description: describeTool(name, args, mcpManager) });
|
|
234
338
|
if (!ok) return { error: 'user declined' };
|
|
235
339
|
}
|
|
@@ -360,6 +464,309 @@ async function dispatchTool(name, args, roots, confirmTool, signal, mcpManager)
|
|
|
360
464
|
? { ok: true, saved_to: r.path, line: r.line }
|
|
361
465
|
: { error: r.error };
|
|
362
466
|
}
|
|
467
|
+
if (name === 'image_gen') {
|
|
468
|
+
const apiKey = process.env.SHMAKK_OPENAI_API_KEY;
|
|
469
|
+
if (!apiKey) return { error: 'SHMAKK_OPENAI_API_KEY env var is not set' };
|
|
470
|
+
const prompt = String(args.prompt || '').trim();
|
|
471
|
+
if (!prompt) return { error: 'prompt is required' };
|
|
472
|
+
const size = args.size || '1024x1024';
|
|
473
|
+
const quality = args.quality || 'standard';
|
|
474
|
+
const style = args.style || 'vivid';
|
|
475
|
+
const outputPath = args.outputPath || path.join(os.tmpdir(), `shmakk-img-${Date.now()}.png`);
|
|
476
|
+
|
|
477
|
+
const body = JSON.stringify({
|
|
478
|
+
model: 'dall-e-3',
|
|
479
|
+
prompt,
|
|
480
|
+
n: 1,
|
|
481
|
+
size,
|
|
482
|
+
quality,
|
|
483
|
+
style,
|
|
484
|
+
response_format: 'b64_json',
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
const postData = await new Promise((resolve, reject) => {
|
|
488
|
+
const url = new URL('https://api.openai.com/v1/images/generations');
|
|
489
|
+
const req = https.request({
|
|
490
|
+
hostname: url.hostname,
|
|
491
|
+
path: url.pathname,
|
|
492
|
+
method: 'POST',
|
|
493
|
+
headers: {
|
|
494
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
495
|
+
'Content-Type': 'application/json',
|
|
496
|
+
},
|
|
497
|
+
timeout: 120000,
|
|
498
|
+
}, (res) => {
|
|
499
|
+
let data = '';
|
|
500
|
+
res.on('data', (chunk) => { data += chunk; });
|
|
501
|
+
res.on('end', () => {
|
|
502
|
+
try {
|
|
503
|
+
const json = JSON.parse(data);
|
|
504
|
+
if (res.statusCode >= 400) {
|
|
505
|
+
reject(new Error(`OpenAI API error ${res.statusCode}: ${(json.error && json.error.message) || data.slice(0, 200)}`));
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
resolve(json);
|
|
509
|
+
} catch (e) {
|
|
510
|
+
reject(new Error(`Failed to parse OpenAI response: ${data.slice(0, 200)}`));
|
|
511
|
+
}
|
|
512
|
+
});
|
|
513
|
+
});
|
|
514
|
+
req.on('error', reject);
|
|
515
|
+
req.on('timeout', () => { req.destroy(); reject(new Error('OpenAI API request timed out after 120s')); });
|
|
516
|
+
req.write(body);
|
|
517
|
+
req.end();
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
const b64 = postData.data && postData.data[0] && postData.data[0].b64_json;
|
|
521
|
+
if (!b64) return { error: 'No image data in OpenAI response' };
|
|
522
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
523
|
+
fs.writeFileSync(outputPath, Buffer.from(b64, 'base64'));
|
|
524
|
+
recordEdit({ filePath: outputPath, oldContent: null, newContent: `[binary image ${(b64.length * 0.75) | 0} bytes]`, tool: 'image_gen' });
|
|
525
|
+
return {
|
|
526
|
+
ok: true,
|
|
527
|
+
imagePath: outputPath,
|
|
528
|
+
prompt,
|
|
529
|
+
size,
|
|
530
|
+
revised_prompt: postData.data[0].revised_prompt || prompt,
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
if (name === 'tts_generate') {
|
|
534
|
+
const text = String(args.text || '').trim();
|
|
535
|
+
if (!text) return { error: 'text is required' };
|
|
536
|
+
const outputPath = args.outputPath || path.join(os.tmpdir(), `shmakk-tts-${Date.now()}.wav`);
|
|
537
|
+
const opts = {};
|
|
538
|
+
if (args.voice) opts.voice = args.voice;
|
|
539
|
+
if (args.speed !== undefined) opts.speed = Number(args.speed);
|
|
540
|
+
opts.outputPath = outputPath;
|
|
541
|
+
|
|
542
|
+
try {
|
|
543
|
+
const ttsFn = _getTtsGenerate();
|
|
544
|
+
const result = await ttsFn(text, opts);
|
|
545
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
546
|
+
return {
|
|
547
|
+
ok: true,
|
|
548
|
+
audioPath: result.audioPath,
|
|
549
|
+
voice: result.voice,
|
|
550
|
+
textLength: text.length,
|
|
551
|
+
};
|
|
552
|
+
} catch (e) {
|
|
553
|
+
return { error: `TTS generation failed: ${e.message}` };
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
if (name === 'video_probe') {
|
|
557
|
+
const p = within(roots, args.path);
|
|
558
|
+
if (!p) return { error: 'path outside workspace' };
|
|
559
|
+
if (!fs.existsSync(p)) return { error: `file not found: ${p}` };
|
|
560
|
+
|
|
561
|
+
const result = await new Promise((resolve) => {
|
|
562
|
+
const child = execFile('ffprobe', [
|
|
563
|
+
'-v', 'quiet',
|
|
564
|
+
'-print_format', 'json',
|
|
565
|
+
'-show_format',
|
|
566
|
+
'-show_streams',
|
|
567
|
+
p,
|
|
568
|
+
], { timeout: 30000, maxBuffer: 1024 * 1024 }, (err, stdout, stderr) => {
|
|
569
|
+
if (err) {
|
|
570
|
+
const msg = (stderr || '').toString().trim() || err.message;
|
|
571
|
+
resolve({ error: `ffprobe failed: ${msg}` });
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
try {
|
|
575
|
+
const data = JSON.parse(stdout);
|
|
576
|
+
// Extract a clean summary
|
|
577
|
+
const summary = { path: p };
|
|
578
|
+
if (data.format) {
|
|
579
|
+
summary.format = data.format.format_name;
|
|
580
|
+
summary.durationSec = parseFloat(data.format.duration) || null;
|
|
581
|
+
summary.sizeBytes = parseInt(data.format.size, 10) || null;
|
|
582
|
+
summary.bitRate = parseInt(data.format.bit_rate, 10) || null;
|
|
583
|
+
}
|
|
584
|
+
if (data.streams) {
|
|
585
|
+
summary.streams = data.streams.map((s) => ({
|
|
586
|
+
index: s.index,
|
|
587
|
+
codec_type: s.codec_type,
|
|
588
|
+
codec_name: s.codec_name,
|
|
589
|
+
width: s.width || null,
|
|
590
|
+
height: s.height || null,
|
|
591
|
+
r_frame_rate: s.r_frame_rate || null,
|
|
592
|
+
sample_rate: s.sample_rate || null,
|
|
593
|
+
channels: s.channels || null,
|
|
594
|
+
duration_ts: s.duration_ts || null,
|
|
595
|
+
}));
|
|
596
|
+
}
|
|
597
|
+
resolve({ ok: true, ...summary, raw: data });
|
|
598
|
+
} catch (e) {
|
|
599
|
+
resolve({ error: `Failed to parse ffprobe output: ${e.message}` });
|
|
600
|
+
}
|
|
601
|
+
});
|
|
602
|
+
if (signal) {
|
|
603
|
+
const onAbort = () => { try { child.kill('SIGINT'); } catch {} };
|
|
604
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
605
|
+
}
|
|
606
|
+
});
|
|
607
|
+
return result;
|
|
608
|
+
}
|
|
609
|
+
if (name === 'video_compose') {
|
|
610
|
+
const segments = args.segments || [];
|
|
611
|
+
if (!Array.isArray(segments) || segments.length === 0) {
|
|
612
|
+
return { error: 'segments must be a non-empty array' };
|
|
613
|
+
}
|
|
614
|
+
const outputPath = args.outputPath;
|
|
615
|
+
if (!outputPath) return { error: 'outputPath is required' };
|
|
616
|
+
|
|
617
|
+
// Validate all input files exist
|
|
618
|
+
for (let i = 0; i < segments.length; i++) {
|
|
619
|
+
const seg = segments[i];
|
|
620
|
+
if (!seg.imagePath) return { error: `segment[${i}]: imagePath is required` };
|
|
621
|
+
if (!seg.audioPath) return { error: `segment[${i}]: audioPath is required` };
|
|
622
|
+
const imgPath = within(roots, seg.imagePath);
|
|
623
|
+
const audPath = within(roots, seg.audioPath);
|
|
624
|
+
if (!imgPath) return { error: `segment[${i}]: imagePath outside workspace` };
|
|
625
|
+
if (!audPath) return { error: `segment[${i}]: audioPath outside workspace` };
|
|
626
|
+
if (!fs.existsSync(imgPath)) return { error: `segment[${i}]: imagePath not found: ${seg.imagePath}` };
|
|
627
|
+
if (!fs.existsSync(audPath)) return { error: `segment[${i}]: audioPath not found: ${seg.audioPath}` };
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
const width = args.width || 1920;
|
|
631
|
+
const height = args.height || 1080;
|
|
632
|
+
const fps = args.fps || 24;
|
|
633
|
+
const bgColor = args.backgroundColor || '#000000';
|
|
634
|
+
|
|
635
|
+
// First pass: probe each audio segment for its actual duration.
|
|
636
|
+
// Fall back to segment.durationSec if ffprobe is unavailable.
|
|
637
|
+
let segmentDurations = [];
|
|
638
|
+
for (let i = 0; i < segments.length; i++) {
|
|
639
|
+
const seg = segments[i];
|
|
640
|
+
const audPath = within(roots, seg.audioPath);
|
|
641
|
+
if (seg.durationSec && seg.durationSec > 0) {
|
|
642
|
+
segmentDurations.push({ ...seg, resolvedSec: seg.durationSec, audPath });
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
try {
|
|
646
|
+
const probeOut = await new Promise((resolve) => {
|
|
647
|
+
execFile('ffprobe', [
|
|
648
|
+
'-v', 'quiet',
|
|
649
|
+
'-print_format', 'json',
|
|
650
|
+
'-show_format',
|
|
651
|
+
audPath,
|
|
652
|
+
], { timeout: 10000, maxBuffer: 256 * 1024 }, (err, stdout) => {
|
|
653
|
+
if (err) { resolve(null); return; }
|
|
654
|
+
try { resolve(JSON.parse(stdout)); } catch { resolve(null); }
|
|
655
|
+
});
|
|
656
|
+
});
|
|
657
|
+
const dur = probeOut && probeOut.format && probeOut.format.duration
|
|
658
|
+
? parseFloat(probeOut.format.duration) : 3;
|
|
659
|
+
segmentDurations.push({ ...seg, resolvedSec: dur, audPath });
|
|
660
|
+
} catch {
|
|
661
|
+
segmentDurations.push({ ...seg, resolvedSec: seg.durationSec || 3, audPath });
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
// Build filter_complex: for each segment, scale/zoom the image to fill,
|
|
666
|
+
// concatenate with transitions.
|
|
667
|
+
const filterParts = [];
|
|
668
|
+
let totalDuration = 0;
|
|
669
|
+
const trimPairs = [];
|
|
670
|
+
|
|
671
|
+
// Build per-segment video inputs
|
|
672
|
+
for (let i = 0; i < segmentDurations.length; i++) {
|
|
673
|
+
const seg = segmentDurations[i];
|
|
674
|
+
const dur = seg.resolvedSec;
|
|
675
|
+
const imgPath = within(roots, seg.imagePath);
|
|
676
|
+
const trans = seg.transition || null;
|
|
677
|
+
|
|
678
|
+
// Each image looped for its duration, scaled to fill.
|
|
679
|
+
filterParts.push(`[${i}:v]loop=loop=-1:size=${Math.ceil(dur * fps)},trim=duration=${dur},setpts=PTS-STARTPTS,scale=${width}:${height}:force_original_aspect_ratio=crop,crop=${width}:${height},setsar=1[v${i}]`);
|
|
680
|
+
|
|
681
|
+
if (trans === 'fade' || trans === 'crossfade' || trans === 'dissolve') {
|
|
682
|
+
// Fade in at start, except first segment
|
|
683
|
+
if (i === 0) {
|
|
684
|
+
filterParts.push(`[v${i}]fade=t=in:st=0:d=0.5,fade=t=out:st=${dur - 0.5}:d=0.5[fv${i}]`);
|
|
685
|
+
} else {
|
|
686
|
+
filterParts.push(`[v${i}]fade=t=in:st=0:d=0.5,fade=t=out:st=${dur - 0.5}:d=0.5[fv${i}]`);
|
|
687
|
+
}
|
|
688
|
+
trimPairs.push(`[fv${i}]`);
|
|
689
|
+
} else if (trans === 'zoompan') {
|
|
690
|
+
filterParts.push(`[v${i}]zoompan=z='min(zoom+0.0015,1.5)':d=${Math.ceil(dur * fps)}:s=${width}x${height}[fv${i}]`);
|
|
691
|
+
trimPairs.push(`[fv${i}]`);
|
|
692
|
+
} else if (trans === 'slide_left') {
|
|
693
|
+
// Slide in from right
|
|
694
|
+
const steps = Math.ceil(dur * fps);
|
|
695
|
+
filterParts.push(`[v${i}]trim=duration=${dur},setpts=PTS-STARTPTS,format=rgba,fade=t=in:st=0:d=0.3:alpha=1,overlay=x='min(W-(W/2)*(t/${dur}),W)':y=0:format=auto,setsar=1,trim=duration=${dur}[fv${i}]`);
|
|
696
|
+
trimPairs.push(`[fv${i}]`);
|
|
697
|
+
} else if (trans === 'slide_right') {
|
|
698
|
+
filterParts.push(`[v${i}]trim=duration=${dur},setpts=PTS-STARTPTS,format=rgba,fade=t=in:st=0:d=0.3:alpha=1,setsar=1,trim=duration=${dur}[fv${i}]`);
|
|
699
|
+
trimPairs.push(`[fv${i}]`);
|
|
700
|
+
} else {
|
|
701
|
+
// No transition
|
|
702
|
+
trimPairs.push(`[v${i}]`);
|
|
703
|
+
}
|
|
704
|
+
totalDuration += dur;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Concatenate all video segments
|
|
708
|
+
const concatInputs = trimPairs.join('');
|
|
709
|
+
filterParts.push(`${concatInputs}concat=n=${segmentDurations.length}:v=1:a=0[outv]`);
|
|
710
|
+
|
|
711
|
+
// Build audio inputs: concat all audio files
|
|
712
|
+
const audioInputs = [];
|
|
713
|
+
for (let i = 0; i < segmentDurations.length; i++) {
|
|
714
|
+
const seg = segmentDurations[i];
|
|
715
|
+
// Input index for the audio: total image inputs + i
|
|
716
|
+
const audioIdx = segments.length + i;
|
|
717
|
+
audioInputs.push(`[${audioIdx}:a]`);
|
|
718
|
+
}
|
|
719
|
+
filterParts.push(`${audioInputs.join('')}concat=n=${segmentDurations.length}:v=0:a=1[outa]`);
|
|
720
|
+
|
|
721
|
+
const filterComplex = filterParts.join(';');
|
|
722
|
+
|
|
723
|
+
// Build ffmpeg args
|
|
724
|
+
const ffmpegArgs = ['-y'];
|
|
725
|
+
// Image inputs
|
|
726
|
+
for (const seg of segmentDurations) {
|
|
727
|
+
ffmpegArgs.push('-loop', '1', '-i', within(roots, seg.imagePath));
|
|
728
|
+
}
|
|
729
|
+
// Audio inputs
|
|
730
|
+
for (const seg of segmentDurations) {
|
|
731
|
+
ffmpegArgs.push('-i', seg.audPath);
|
|
732
|
+
}
|
|
733
|
+
ffmpegArgs.push(
|
|
734
|
+
'-filter_complex', filterComplex,
|
|
735
|
+
'-map', '[outv]',
|
|
736
|
+
'-map', '[outa]',
|
|
737
|
+
'-c:v', 'libx264',
|
|
738
|
+
'-preset', 'medium',
|
|
739
|
+
'-crf', '23',
|
|
740
|
+
'-pix_fmt', 'yuv420p',
|
|
741
|
+
'-c:a', 'aac',
|
|
742
|
+
'-b:a', '192k',
|
|
743
|
+
'-shortest',
|
|
744
|
+
'-t', String(totalDuration),
|
|
745
|
+
outputPath,
|
|
746
|
+
);
|
|
747
|
+
|
|
748
|
+
fs.mkdirSync(path.dirname(path.resolve(roots[0], outputPath)), { recursive: true });
|
|
749
|
+
|
|
750
|
+
const composeResult = await new Promise((resolve) => {
|
|
751
|
+
const child = execFile('ffmpeg', ffmpegArgs, {
|
|
752
|
+
cwd: roots[0],
|
|
753
|
+
timeout: 300000, // 5 min timeout for video encoding
|
|
754
|
+
maxBuffer: 512 * 1024,
|
|
755
|
+
}, (err, stdout, stderr) => {
|
|
756
|
+
if (err) {
|
|
757
|
+
const msg = (stderr || '').toString().split('\n').slice(-5).join('\n') || err.message;
|
|
758
|
+
resolve({ error: `ffmpeg compose failed: ${msg}` });
|
|
759
|
+
return;
|
|
760
|
+
}
|
|
761
|
+
resolve({ ok: true, outputPath, durationSec: totalDuration, segmentCount: segments.length });
|
|
762
|
+
});
|
|
763
|
+
if (signal) {
|
|
764
|
+
const onAbort = () => { try { child.kill('SIGINT'); } catch {} };
|
|
765
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
766
|
+
}
|
|
767
|
+
});
|
|
768
|
+
return composeResult;
|
|
769
|
+
}
|
|
363
770
|
return { error: `unknown tool: ${name}` };
|
|
364
771
|
}
|
|
365
772
|
|
package/src/workflows.js
CHANGED
|
@@ -217,6 +217,38 @@ const WORKFLOWS = {
|
|
|
217
217
|
},
|
|
218
218
|
],
|
|
219
219
|
},
|
|
220
|
+
|
|
221
|
+
// ── Hybrid workflows (pipeline with parallel sub-stages) ─────────────────
|
|
222
|
+
|
|
223
|
+
'video-production': {
|
|
224
|
+
id: 'video-production',
|
|
225
|
+
description: 'Produce a video: script → voiceover + visuals → composite',
|
|
226
|
+
topology: 'pipeline',
|
|
227
|
+
triggers: [
|
|
228
|
+
/\b(create|make|produce|generate|render)\s+(a\s+)?video\b/i,
|
|
229
|
+
/\bvideo\s+(production|editing|creation|demo|explainer)\b/i,
|
|
230
|
+
/\b(voice.?over|narration)\s+.*\bvideo\b/i,
|
|
231
|
+
/\b(composite|assemble|stitch)\s+.*\b(video|mp4|clip)\b/i,
|
|
232
|
+
],
|
|
233
|
+
steps: [
|
|
234
|
+
{
|
|
235
|
+
role: 'script',
|
|
236
|
+
task: 'Turn the user prompt into a timed JSON storyboard. Output an array of segments, each with: startTime (seconds), endTime (seconds), narration (text for TTS), visualDesc (prompt for image generation). Total duration must match the user request. For: {input}',
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
role: 'voice',
|
|
240
|
+
task: 'Generate TTS audio files for each segment from the narration text in the script output. Use tts_generate tool. Return per-segment audio file paths.',
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
role: 'visual',
|
|
244
|
+
task: 'Generate images for each segment from the visualDesc in the script output. Use image_gen tool. Return per-segment image file paths.',
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
role: 'compositor',
|
|
248
|
+
task: 'Assemble the final video from the per-segment audio and image files using video_compose, then concatenate segments with video_concat. Apply transitions, fit images to duration, mix audio tracks. Output a single .mp4 file.',
|
|
249
|
+
},
|
|
250
|
+
],
|
|
251
|
+
},
|
|
220
252
|
};
|
|
221
253
|
|
|
222
254
|
function listWorkflows() {
|