shmakk 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -2
- package/package.json +2 -2
- package/scripts/demo/record.py +196 -0
- package/scripts/demo/scenes.html +913 -0
- package/skills/media-video-compose.md +320 -0
- package/skills/media-video-script.md +204 -0
- package/skills/media-video-voice.md +184 -0
- package/src/agent-overview.js +320 -0
- package/src/agent-roster.js +53 -0
- package/src/agent.js +178 -18
- package/src/cli.js +220 -86
- package/src/completions.js +3 -1
- package/src/correction.js +11 -4
- package/src/endpoints.js +94 -31
- package/src/guard.js +101 -0
- package/src/index.js +19 -5
- package/src/llm.js +462 -52
- package/src/markdown.js +217 -0
- package/src/notify.js +34 -0
- package/src/pty.js +1 -1
- package/src/review.js +8 -1
- package/src/self-commands.js +108 -2
- package/src/session.js +58 -2
- package/src/ssh.js +255 -0
- package/src/subagent.js +12 -1
- package/src/taskClassifier.js +2 -2
- package/src/team.js +22 -0
- package/src/tools.js +487 -1
- package/src/workflows.js +32 -0
package/src/tools.js
CHANGED
|
@@ -9,6 +9,35 @@ const { webSearch, fetchUrl } = require('./web');
|
|
|
9
9
|
const { dispatchBrowser, classifyBrowserCommand } = require('./browser');
|
|
10
10
|
const { recordEdit } = require('./edit-tracker');
|
|
11
11
|
const { appendMemory } = require('./memory');
|
|
12
|
+
const { isMutationTool, hashArgs } = require('./guard');
|
|
13
|
+
const https = require('https');
|
|
14
|
+
const http = require('http');
|
|
15
|
+
const os = require('os');
|
|
16
|
+
|
|
17
|
+
// Lazy-load SSH (optional — only required when ssh_* tools are called).
|
|
18
|
+
let _ssh = null;
|
|
19
|
+
function _getSSH(roots) {
|
|
20
|
+
if (_ssh) return _ssh;
|
|
21
|
+
try { _ssh = require('./ssh'); } catch (e) { return null; }
|
|
22
|
+
return _ssh;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Lazy-load TTS (kokoro-js is an optional dep; only required when
|
|
26
|
+
// tts_generate is actually called).
|
|
27
|
+
let _ttsGenerate = null;
|
|
28
|
+
function _getTtsGenerate() {
|
|
29
|
+
if (_ttsGenerate) return _ttsGenerate;
|
|
30
|
+
try {
|
|
31
|
+
({ generate: _ttsGenerate } = require('./services/tts'));
|
|
32
|
+
} catch (e) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
'TTS dependencies not installed. Run: npm run setup:voice\n' +
|
|
35
|
+
'Or: npm install --include=optional\n' +
|
|
36
|
+
`Details: ${e.message}`,
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
return _ttsGenerate;
|
|
40
|
+
}
|
|
12
41
|
|
|
13
42
|
const MAX_FILE_BYTES = 64 * 1024;
|
|
14
43
|
|
|
@@ -149,6 +178,103 @@ const TOOLS = [
|
|
|
149
178
|
},
|
|
150
179
|
},
|
|
151
180
|
}},
|
|
181
|
+
{ type: 'function', function: {
|
|
182
|
+
name: 'image_gen',
|
|
183
|
+
description: 'Generate an image from a text prompt using OpenAI DALL-E. The image is saved to disk and the file path is returned. Requires SHMAKK_OPENAI_API_KEY env var.',
|
|
184
|
+
parameters: {
|
|
185
|
+
type: 'object',
|
|
186
|
+
required: ['prompt'],
|
|
187
|
+
properties: {
|
|
188
|
+
prompt: { type: 'string', description: 'Text description of the image to generate' },
|
|
189
|
+
outputPath: { type: 'string', description: 'Optional output path. Defaults to a temp file.' },
|
|
190
|
+
size: { type: 'string', enum: ['1024x1024', '1792x1024', '1024x1792'], description: 'Image size. Defaults to 1024x1024.' },
|
|
191
|
+
quality: { type: 'string', enum: ['standard', 'hd'], description: 'Quality level. Defaults to standard.' },
|
|
192
|
+
style: { type: 'string', enum: ['vivid', 'natural'], description: 'Style. Defaults to vivid.' },
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
}},
|
|
196
|
+
{ type: 'function', function: {
|
|
197
|
+
name: 'tts_generate',
|
|
198
|
+
description: 'Generate speech audio from text using Kokoro TTS (local, no API key needed). Returns the audio file path and voice used.',
|
|
199
|
+
parameters: {
|
|
200
|
+
type: 'object',
|
|
201
|
+
required: ['text'],
|
|
202
|
+
properties: {
|
|
203
|
+
text: { type: 'string', description: 'Text to convert to speech' },
|
|
204
|
+
outputPath: { type: 'string', description: 'Optional WAV output path. Defaults to a temp file.' },
|
|
205
|
+
voice: { type: 'string', description: 'Voice name. Defaults to af_heart. Use list_voices tool to discover available voices.' },
|
|
206
|
+
speed: { type: 'number', description: 'Speech speed. Defaults to 1.5.' },
|
|
207
|
+
},
|
|
208
|
+
},
|
|
209
|
+
}},
|
|
210
|
+
{ type: 'function', function: {
|
|
211
|
+
name: 'video_probe',
|
|
212
|
+
description: 'Get media file metadata using ffprobe: duration, codec, resolution, frame rate, etc.',
|
|
213
|
+
parameters: {
|
|
214
|
+
type: 'object',
|
|
215
|
+
required: ['path'],
|
|
216
|
+
properties: {
|
|
217
|
+
path: { type: 'string', description: 'Path to the media file to probe' },
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
}},
|
|
221
|
+
{ type: 'function', function: {
|
|
222
|
+
name: 'video_compose',
|
|
223
|
+
description: 'Compose images, audio tracks, and transitions into a video using ffmpeg. Takes a structured timeline of segments and assembles them into a single MP4 file.',
|
|
224
|
+
parameters: {
|
|
225
|
+
type: 'object',
|
|
226
|
+
required: ['segments', 'outputPath'],
|
|
227
|
+
properties: {
|
|
228
|
+
segments: {
|
|
229
|
+
type: 'array',
|
|
230
|
+
description: 'Array of segment objects. Each segment: { imagePath: string (required), audioPath: string (required), startSec: number, durationSec: number, transition: string|null (fade/crossfade/dissolve/slide_left/slide_right/zoompan) }',
|
|
231
|
+
},
|
|
232
|
+
outputPath: { type: 'string', description: 'Output MP4 file path' },
|
|
233
|
+
width: { type: 'number', description: 'Output video width. Defaults to 1920.' },
|
|
234
|
+
height: { type: 'number', description: 'Output video height. Defaults to 1080.' },
|
|
235
|
+
fps: { type: 'number', description: 'Output frame rate. Defaults to 24.' },
|
|
236
|
+
backgroundColor: { type: 'string', description: 'Background color as hex. Defaults to #000000.' },
|
|
237
|
+
},
|
|
238
|
+
},
|
|
239
|
+
}},
|
|
240
|
+
{ type: 'function', function: {
|
|
241
|
+
name: 'ssh_run',
|
|
242
|
+
description: 'Run a shell command on a pre-configured remote host via SSH. Hosts are defined in .shmakk/hosts.json or ~/.config/shmakk/hosts.json. Output is captured.',
|
|
243
|
+
parameters: {
|
|
244
|
+
type: 'object',
|
|
245
|
+
required: ['host', 'cmd'],
|
|
246
|
+
properties: {
|
|
247
|
+
host: { type: 'string', description: 'Host alias as defined in hosts.json (e.g. "devbox")' },
|
|
248
|
+
cmd: { type: 'string', description: 'Shell command to run on the remote host' },
|
|
249
|
+
},
|
|
250
|
+
},
|
|
251
|
+
}},
|
|
252
|
+
{ type: 'function', function: {
|
|
253
|
+
name: 'ssh_push',
|
|
254
|
+
description: 'Copy a file from the local workspace to a remote host via SCP. Hosts are defined in .shmakk/hosts.json.',
|
|
255
|
+
parameters: {
|
|
256
|
+
type: 'object',
|
|
257
|
+
required: ['host', 'src', 'dest'],
|
|
258
|
+
properties: {
|
|
259
|
+
host: { type: 'string', description: 'Host alias as defined in hosts.json' },
|
|
260
|
+
src: { type: 'string', description: 'Local file path (relative to workspace or absolute)' },
|
|
261
|
+
dest: { type: 'string', description: 'Remote destination path (absolute on remote host)' },
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
}},
|
|
265
|
+
{ type: 'function', function: {
|
|
266
|
+
name: 'ssh_pull',
|
|
267
|
+
description: 'Copy a file from a remote host to the local workspace via SCP. Hosts are defined in .shmakk/hosts.json.',
|
|
268
|
+
parameters: {
|
|
269
|
+
type: 'object',
|
|
270
|
+
required: ['host', 'src', 'dest'],
|
|
271
|
+
properties: {
|
|
272
|
+
host: { type: 'string', description: 'Host alias as defined in hosts.json' },
|
|
273
|
+
src: { type: 'string', description: 'Remote source path (absolute on remote host)' },
|
|
274
|
+
dest: { type: 'string', description: 'Local destination path (relative to workspace or absolute)' },
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
}},
|
|
152
278
|
];
|
|
153
279
|
|
|
154
280
|
// Tool safety classification.
|
|
@@ -174,6 +300,11 @@ function classifyTool(name, args, mcpManager) {
|
|
|
174
300
|
if (name === 'web_search' || name === 'fetch_url') return 'safe';
|
|
175
301
|
if (name === 'browser') return classifyBrowserCommand(args);
|
|
176
302
|
if (name === 'remember') return 'safe';
|
|
303
|
+
if (name === 'image_gen') return 'unsafe'; // external API call, costs money
|
|
304
|
+
if (name === 'tts_generate') return 'safe'; // local-only, no network
|
|
305
|
+
if (name === 'video_probe') return 'safe'; // read-only local metadata
|
|
306
|
+
if (name === 'video_compose') return 'safe'; // local ffmpeg, reads only workspace files
|
|
307
|
+
if (name === 'ssh_run' || name === 'ssh_push' || name === 'ssh_pull') return 'unsafe';
|
|
177
308
|
return 'uncertain';
|
|
178
309
|
}
|
|
179
310
|
|
|
@@ -199,6 +330,13 @@ function describeTool(name, args, mcpManager) {
|
|
|
199
330
|
if (cmd === 'evaluate') return `browser eval JS`;
|
|
200
331
|
return `browser ${cmd}`;
|
|
201
332
|
}
|
|
333
|
+
if (name === 'image_gen') return `image_gen: "${(args.prompt || '').slice(0, 80)}" (${args.size || '1024x1024'})`;
|
|
334
|
+
if (name === 'tts_generate') return `tts_generate: "${(args.text || '').slice(0, 80)}" (voice: ${args.voice || 'af_heart'})`;
|
|
335
|
+
if (name === 'video_probe') return `video_probe ${args.path || ''}`;
|
|
336
|
+
if (name === 'video_compose') return `video_compose ${(args.segments || []).length} segments → ${args.outputPath || ''}`;
|
|
337
|
+
if (name === 'ssh_run') return `ssh_run ${args.host || ''}: ${(args.cmd || '').slice(0, 100)}`;
|
|
338
|
+
if (name === 'ssh_push') return `ssh_push ${args.src || ''} → ${args.host || ''}:${args.dest || ''}`;
|
|
339
|
+
if (name === 'ssh_pull') return `ssh_pull ${args.host || ''}:${args.src || ''} → ${args.dest || ''}`;
|
|
202
340
|
return `${name} ${JSON.stringify(args).slice(0, 80)}`;
|
|
203
341
|
}
|
|
204
342
|
|
|
@@ -229,7 +367,23 @@ function runCmd(cwd, cmd, signal) {
|
|
|
229
367
|
async function dispatchTool(name, args, roots, confirmTool, signal, mcpManager) {
|
|
230
368
|
if (signal && signal.aborted) return { error: 'aborted' };
|
|
231
369
|
const safety = classifyTool(name, args, mcpManager);
|
|
232
|
-
|
|
370
|
+
|
|
371
|
+
// ── Mutation-tool approval ────────────────────────────────────────────
|
|
372
|
+
// Every mutation tool MUST have explicit, fresh user approval before
|
|
373
|
+
// execution. This check is the runtime enforcement — even if the agent
|
|
374
|
+
// loop has a bug, the tool refuses to run without valid approval.
|
|
375
|
+
if (isMutationTool(name)) {
|
|
376
|
+
if (!confirmTool) return { error: 'mutation tool requires explicit user approval (no confirmTool available)' };
|
|
377
|
+
const ok = await confirmTool({ name, args, safety, description: describeTool(name, args, mcpManager) });
|
|
378
|
+
if (!ok) {
|
|
379
|
+
try {
|
|
380
|
+
const audit = require('./audit');
|
|
381
|
+
audit.append({ kind: 'tool-denied', name, argsHash: hashArgs(args) });
|
|
382
|
+
} catch {}
|
|
383
|
+
return { error: 'user declined' };
|
|
384
|
+
}
|
|
385
|
+
} else if (confirmTool) {
|
|
386
|
+
// Non-mutation tools: still confirm, but don't enforce the same strictness.
|
|
233
387
|
const ok = await confirmTool({ name, args, safety, description: describeTool(name, args, mcpManager) });
|
|
234
388
|
if (!ok) return { error: 'user declined' };
|
|
235
389
|
}
|
|
@@ -360,6 +514,338 @@ async function dispatchTool(name, args, roots, confirmTool, signal, mcpManager)
|
|
|
360
514
|
? { ok: true, saved_to: r.path, line: r.line }
|
|
361
515
|
: { error: r.error };
|
|
362
516
|
}
|
|
517
|
+
if (name === 'image_gen') {
|
|
518
|
+
const apiKey = process.env.SHMAKK_OPENAI_API_KEY;
|
|
519
|
+
if (!apiKey) return { error: 'SHMAKK_OPENAI_API_KEY env var is not set' };
|
|
520
|
+
const prompt = String(args.prompt || '').trim();
|
|
521
|
+
if (!prompt) return { error: 'prompt is required' };
|
|
522
|
+
const size = args.size || '1024x1024';
|
|
523
|
+
const quality = args.quality || 'standard';
|
|
524
|
+
const style = args.style || 'vivid';
|
|
525
|
+
const outputPath = args.outputPath || path.join(os.tmpdir(), `shmakk-img-${Date.now()}.png`);
|
|
526
|
+
|
|
527
|
+
const body = JSON.stringify({
|
|
528
|
+
model: 'dall-e-3',
|
|
529
|
+
prompt,
|
|
530
|
+
n: 1,
|
|
531
|
+
size,
|
|
532
|
+
quality,
|
|
533
|
+
style,
|
|
534
|
+
response_format: 'b64_json',
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
const postData = await new Promise((resolve, reject) => {
|
|
538
|
+
const url = new URL('https://api.openai.com/v1/images/generations');
|
|
539
|
+
const req = https.request({
|
|
540
|
+
hostname: url.hostname,
|
|
541
|
+
path: url.pathname,
|
|
542
|
+
method: 'POST',
|
|
543
|
+
headers: {
|
|
544
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
545
|
+
'Content-Type': 'application/json',
|
|
546
|
+
},
|
|
547
|
+
timeout: 120000,
|
|
548
|
+
}, (res) => {
|
|
549
|
+
let data = '';
|
|
550
|
+
res.on('data', (chunk) => { data += chunk; });
|
|
551
|
+
res.on('end', () => {
|
|
552
|
+
try {
|
|
553
|
+
const json = JSON.parse(data);
|
|
554
|
+
if (res.statusCode >= 400) {
|
|
555
|
+
reject(new Error(`OpenAI API error ${res.statusCode}: ${(json.error && json.error.message) || data.slice(0, 200)}`));
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
resolve(json);
|
|
559
|
+
} catch (e) {
|
|
560
|
+
reject(new Error(`Failed to parse OpenAI response: ${data.slice(0, 200)}`));
|
|
561
|
+
}
|
|
562
|
+
});
|
|
563
|
+
});
|
|
564
|
+
req.on('error', reject);
|
|
565
|
+
req.on('timeout', () => { req.destroy(); reject(new Error('OpenAI API request timed out after 120s')); });
|
|
566
|
+
req.write(body);
|
|
567
|
+
req.end();
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
const b64 = postData.data && postData.data[0] && postData.data[0].b64_json;
|
|
571
|
+
if (!b64) return { error: 'No image data in OpenAI response' };
|
|
572
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
573
|
+
fs.writeFileSync(outputPath, Buffer.from(b64, 'base64'));
|
|
574
|
+
recordEdit({ filePath: outputPath, oldContent: null, newContent: `[binary image ${(b64.length * 0.75) | 0} bytes]`, tool: 'image_gen' });
|
|
575
|
+
return {
|
|
576
|
+
ok: true,
|
|
577
|
+
imagePath: outputPath,
|
|
578
|
+
prompt,
|
|
579
|
+
size,
|
|
580
|
+
revised_prompt: postData.data[0].revised_prompt || prompt,
|
|
581
|
+
};
|
|
582
|
+
}
|
|
583
|
+
if (name === 'tts_generate') {
|
|
584
|
+
const text = String(args.text || '').trim();
|
|
585
|
+
if (!text) return { error: 'text is required' };
|
|
586
|
+
const outputPath = args.outputPath || path.join(os.tmpdir(), `shmakk-tts-${Date.now()}.wav`);
|
|
587
|
+
const opts = {};
|
|
588
|
+
if (args.voice) opts.voice = args.voice;
|
|
589
|
+
if (args.speed !== undefined) opts.speed = Number(args.speed);
|
|
590
|
+
opts.outputPath = outputPath;
|
|
591
|
+
|
|
592
|
+
try {
|
|
593
|
+
const ttsFn = _getTtsGenerate();
|
|
594
|
+
const result = await ttsFn(text, opts);
|
|
595
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
596
|
+
return {
|
|
597
|
+
ok: true,
|
|
598
|
+
audioPath: result.audioPath,
|
|
599
|
+
voice: result.voice,
|
|
600
|
+
textLength: text.length,
|
|
601
|
+
};
|
|
602
|
+
} catch (e) {
|
|
603
|
+
return { error: `TTS generation failed: ${e.message}` };
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
if (name === 'video_probe') {
|
|
607
|
+
const p = within(roots, args.path);
|
|
608
|
+
if (!p) return { error: 'path outside workspace' };
|
|
609
|
+
if (!fs.existsSync(p)) return { error: `file not found: ${p}` };
|
|
610
|
+
|
|
611
|
+
const result = await new Promise((resolve) => {
|
|
612
|
+
const child = execFile('ffprobe', [
|
|
613
|
+
'-v', 'quiet',
|
|
614
|
+
'-print_format', 'json',
|
|
615
|
+
'-show_format',
|
|
616
|
+
'-show_streams',
|
|
617
|
+
p,
|
|
618
|
+
], { timeout: 30000, maxBuffer: 1024 * 1024 }, (err, stdout, stderr) => {
|
|
619
|
+
if (err) {
|
|
620
|
+
const msg = (stderr || '').toString().trim() || err.message;
|
|
621
|
+
resolve({ error: `ffprobe failed: ${msg}` });
|
|
622
|
+
return;
|
|
623
|
+
}
|
|
624
|
+
try {
|
|
625
|
+
const data = JSON.parse(stdout);
|
|
626
|
+
// Extract a clean summary
|
|
627
|
+
const summary = { path: p };
|
|
628
|
+
if (data.format) {
|
|
629
|
+
summary.format = data.format.format_name;
|
|
630
|
+
summary.durationSec = parseFloat(data.format.duration) || null;
|
|
631
|
+
summary.sizeBytes = parseInt(data.format.size, 10) || null;
|
|
632
|
+
summary.bitRate = parseInt(data.format.bit_rate, 10) || null;
|
|
633
|
+
}
|
|
634
|
+
if (data.streams) {
|
|
635
|
+
summary.streams = data.streams.map((s) => ({
|
|
636
|
+
index: s.index,
|
|
637
|
+
codec_type: s.codec_type,
|
|
638
|
+
codec_name: s.codec_name,
|
|
639
|
+
width: s.width || null,
|
|
640
|
+
height: s.height || null,
|
|
641
|
+
r_frame_rate: s.r_frame_rate || null,
|
|
642
|
+
sample_rate: s.sample_rate || null,
|
|
643
|
+
channels: s.channels || null,
|
|
644
|
+
duration_ts: s.duration_ts || null,
|
|
645
|
+
}));
|
|
646
|
+
}
|
|
647
|
+
resolve({ ok: true, ...summary, raw: data });
|
|
648
|
+
} catch (e) {
|
|
649
|
+
resolve({ error: `Failed to parse ffprobe output: ${e.message}` });
|
|
650
|
+
}
|
|
651
|
+
});
|
|
652
|
+
if (signal) {
|
|
653
|
+
const onAbort = () => { try { child.kill('SIGINT'); } catch {} };
|
|
654
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
655
|
+
}
|
|
656
|
+
});
|
|
657
|
+
return result;
|
|
658
|
+
}
|
|
659
|
+
if (name === 'video_compose') {
|
|
660
|
+
const segments = args.segments || [];
|
|
661
|
+
if (!Array.isArray(segments) || segments.length === 0) {
|
|
662
|
+
return { error: 'segments must be a non-empty array' };
|
|
663
|
+
}
|
|
664
|
+
const outputPath = args.outputPath;
|
|
665
|
+
if (!outputPath) return { error: 'outputPath is required' };
|
|
666
|
+
|
|
667
|
+
// Validate all input files exist
|
|
668
|
+
for (let i = 0; i < segments.length; i++) {
|
|
669
|
+
const seg = segments[i];
|
|
670
|
+
if (!seg.imagePath) return { error: `segment[${i}]: imagePath is required` };
|
|
671
|
+
if (!seg.audioPath) return { error: `segment[${i}]: audioPath is required` };
|
|
672
|
+
const imgPath = within(roots, seg.imagePath);
|
|
673
|
+
const audPath = within(roots, seg.audioPath);
|
|
674
|
+
if (!imgPath) return { error: `segment[${i}]: imagePath outside workspace` };
|
|
675
|
+
if (!audPath) return { error: `segment[${i}]: audioPath outside workspace` };
|
|
676
|
+
if (!fs.existsSync(imgPath)) return { error: `segment[${i}]: imagePath not found: ${seg.imagePath}` };
|
|
677
|
+
if (!fs.existsSync(audPath)) return { error: `segment[${i}]: audioPath not found: ${seg.audioPath}` };
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
const width = args.width || 1920;
|
|
681
|
+
const height = args.height || 1080;
|
|
682
|
+
const fps = args.fps || 24;
|
|
683
|
+
const bgColor = args.backgroundColor || '#000000';
|
|
684
|
+
|
|
685
|
+
// First pass: probe each audio segment for its actual duration.
|
|
686
|
+
// Fall back to segment.durationSec if ffprobe is unavailable.
|
|
687
|
+
let segmentDurations = [];
|
|
688
|
+
for (let i = 0; i < segments.length; i++) {
|
|
689
|
+
const seg = segments[i];
|
|
690
|
+
const audPath = within(roots, seg.audioPath);
|
|
691
|
+
if (seg.durationSec && seg.durationSec > 0) {
|
|
692
|
+
segmentDurations.push({ ...seg, resolvedSec: seg.durationSec, audPath });
|
|
693
|
+
continue;
|
|
694
|
+
}
|
|
695
|
+
try {
|
|
696
|
+
const probeOut = await new Promise((resolve) => {
|
|
697
|
+
execFile('ffprobe', [
|
|
698
|
+
'-v', 'quiet',
|
|
699
|
+
'-print_format', 'json',
|
|
700
|
+
'-show_format',
|
|
701
|
+
audPath,
|
|
702
|
+
], { timeout: 10000, maxBuffer: 256 * 1024 }, (err, stdout) => {
|
|
703
|
+
if (err) { resolve(null); return; }
|
|
704
|
+
try { resolve(JSON.parse(stdout)); } catch { resolve(null); }
|
|
705
|
+
});
|
|
706
|
+
});
|
|
707
|
+
const dur = probeOut && probeOut.format && probeOut.format.duration
|
|
708
|
+
? parseFloat(probeOut.format.duration) : 3;
|
|
709
|
+
segmentDurations.push({ ...seg, resolvedSec: dur, audPath });
|
|
710
|
+
} catch {
|
|
711
|
+
segmentDurations.push({ ...seg, resolvedSec: seg.durationSec || 3, audPath });
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// Build filter_complex: for each segment, scale/zoom the image to fill,
|
|
716
|
+
// concatenate with transitions.
|
|
717
|
+
const filterParts = [];
|
|
718
|
+
let totalDuration = 0;
|
|
719
|
+
const trimPairs = [];
|
|
720
|
+
|
|
721
|
+
// Build per-segment video inputs
|
|
722
|
+
for (let i = 0; i < segmentDurations.length; i++) {
|
|
723
|
+
const seg = segmentDurations[i];
|
|
724
|
+
const dur = seg.resolvedSec;
|
|
725
|
+
const imgPath = within(roots, seg.imagePath);
|
|
726
|
+
const trans = seg.transition || null;
|
|
727
|
+
|
|
728
|
+
// Each image looped for its duration, scaled to fill.
|
|
729
|
+
filterParts.push(`[${i}:v]loop=loop=-1:size=${Math.ceil(dur * fps)},trim=duration=${dur},setpts=PTS-STARTPTS,scale=${width}:${height}:force_original_aspect_ratio=crop,crop=${width}:${height},setsar=1[v${i}]`);
|
|
730
|
+
|
|
731
|
+
if (trans === 'fade' || trans === 'crossfade' || trans === 'dissolve') {
|
|
732
|
+
// Fade in at start, except first segment
|
|
733
|
+
if (i === 0) {
|
|
734
|
+
filterParts.push(`[v${i}]fade=t=in:st=0:d=0.5,fade=t=out:st=${dur - 0.5}:d=0.5[fv${i}]`);
|
|
735
|
+
} else {
|
|
736
|
+
filterParts.push(`[v${i}]fade=t=in:st=0:d=0.5,fade=t=out:st=${dur - 0.5}:d=0.5[fv${i}]`);
|
|
737
|
+
}
|
|
738
|
+
trimPairs.push(`[fv${i}]`);
|
|
739
|
+
} else if (trans === 'zoompan') {
|
|
740
|
+
filterParts.push(`[v${i}]zoompan=z='min(zoom+0.0015,1.5)':d=${Math.ceil(dur * fps)}:s=${width}x${height}[fv${i}]`);
|
|
741
|
+
trimPairs.push(`[fv${i}]`);
|
|
742
|
+
} else if (trans === 'slide_left') {
|
|
743
|
+
// Slide in from right
|
|
744
|
+
const steps = Math.ceil(dur * fps);
|
|
745
|
+
filterParts.push(`[v${i}]trim=duration=${dur},setpts=PTS-STARTPTS,format=rgba,fade=t=in:st=0:d=0.3:alpha=1,overlay=x='min(W-(W/2)*(t/${dur}),W)':y=0:format=auto,setsar=1,trim=duration=${dur}[fv${i}]`);
|
|
746
|
+
trimPairs.push(`[fv${i}]`);
|
|
747
|
+
} else if (trans === 'slide_right') {
|
|
748
|
+
filterParts.push(`[v${i}]trim=duration=${dur},setpts=PTS-STARTPTS,format=rgba,fade=t=in:st=0:d=0.3:alpha=1,setsar=1,trim=duration=${dur}[fv${i}]`);
|
|
749
|
+
trimPairs.push(`[fv${i}]`);
|
|
750
|
+
} else {
|
|
751
|
+
// No transition
|
|
752
|
+
trimPairs.push(`[v${i}]`);
|
|
753
|
+
}
|
|
754
|
+
totalDuration += dur;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Concatenate all video segments
|
|
758
|
+
const concatInputs = trimPairs.join('');
|
|
759
|
+
filterParts.push(`${concatInputs}concat=n=${segmentDurations.length}:v=1:a=0[outv]`);
|
|
760
|
+
|
|
761
|
+
// Build audio inputs: concat all audio files
|
|
762
|
+
const audioInputs = [];
|
|
763
|
+
for (let i = 0; i < segmentDurations.length; i++) {
|
|
764
|
+
const seg = segmentDurations[i];
|
|
765
|
+
// Input index for the audio: total image inputs + i
|
|
766
|
+
const audioIdx = segments.length + i;
|
|
767
|
+
audioInputs.push(`[${audioIdx}:a]`);
|
|
768
|
+
}
|
|
769
|
+
filterParts.push(`${audioInputs.join('')}concat=n=${segmentDurations.length}:v=0:a=1[outa]`);
|
|
770
|
+
|
|
771
|
+
const filterComplex = filterParts.join(';');
|
|
772
|
+
|
|
773
|
+
// Build ffmpeg args
|
|
774
|
+
const ffmpegArgs = ['-y'];
|
|
775
|
+
// Image inputs
|
|
776
|
+
for (const seg of segmentDurations) {
|
|
777
|
+
ffmpegArgs.push('-loop', '1', '-i', within(roots, seg.imagePath));
|
|
778
|
+
}
|
|
779
|
+
// Audio inputs
|
|
780
|
+
for (const seg of segmentDurations) {
|
|
781
|
+
ffmpegArgs.push('-i', seg.audPath);
|
|
782
|
+
}
|
|
783
|
+
ffmpegArgs.push(
|
|
784
|
+
'-filter_complex', filterComplex,
|
|
785
|
+
'-map', '[outv]',
|
|
786
|
+
'-map', '[outa]',
|
|
787
|
+
'-c:v', 'libx264',
|
|
788
|
+
'-preset', 'medium',
|
|
789
|
+
'-crf', '23',
|
|
790
|
+
'-pix_fmt', 'yuv420p',
|
|
791
|
+
'-c:a', 'aac',
|
|
792
|
+
'-b:a', '192k',
|
|
793
|
+
'-shortest',
|
|
794
|
+
'-t', String(totalDuration),
|
|
795
|
+
outputPath,
|
|
796
|
+
);
|
|
797
|
+
|
|
798
|
+
fs.mkdirSync(path.dirname(path.resolve(roots[0], outputPath)), { recursive: true });
|
|
799
|
+
|
|
800
|
+
const composeResult = await new Promise((resolve) => {
|
|
801
|
+
const child = execFile('ffmpeg', ffmpegArgs, {
|
|
802
|
+
cwd: roots[0],
|
|
803
|
+
timeout: 300000, // 5 min timeout for video encoding
|
|
804
|
+
maxBuffer: 512 * 1024,
|
|
805
|
+
}, (err, stdout, stderr) => {
|
|
806
|
+
if (err) {
|
|
807
|
+
const msg = (stderr || '').toString().split('\n').slice(-5).join('\n') || err.message;
|
|
808
|
+
resolve({ error: `ffmpeg compose failed: ${msg}` });
|
|
809
|
+
return;
|
|
810
|
+
}
|
|
811
|
+
resolve({ ok: true, outputPath, durationSec: totalDuration, segmentCount: segments.length });
|
|
812
|
+
});
|
|
813
|
+
if (signal) {
|
|
814
|
+
const onAbort = () => { try { child.kill('SIGINT'); } catch {} };
|
|
815
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
816
|
+
}
|
|
817
|
+
});
|
|
818
|
+
return composeResult;
|
|
819
|
+
}
|
|
820
|
+
if (name === 'ssh_run') {
|
|
821
|
+
const ssh = _getSSH(roots);
|
|
822
|
+
if (!ssh) return { error: 'SSH module not available' };
|
|
823
|
+
const cfg = ssh.loadHostConfig(roots[0]);
|
|
824
|
+
const entry = ssh.resolveHost(cfg, args.host);
|
|
825
|
+
if (!entry) return { error: 'host not configured: ' + args.host + '. Define it in .shmakk/hosts.json or ~/.config/shmakk/hosts.json' };
|
|
826
|
+
return await ssh.sshRun(entry, args.cmd, signal);
|
|
827
|
+
}
|
|
828
|
+
if (name === 'ssh_push') {
|
|
829
|
+
const ssh = _getSSH(roots);
|
|
830
|
+
if (!ssh) return { error: 'SSH module not available' };
|
|
831
|
+
const cfg = ssh.loadHostConfig(roots[0]);
|
|
832
|
+
const entry = ssh.resolveHost(cfg, args.host);
|
|
833
|
+
if (!entry) return { error: 'host not configured: ' + args.host };
|
|
834
|
+
const p = within(roots, args.src);
|
|
835
|
+
if (!p) return { error: 'src path outside workspace' };
|
|
836
|
+
if (!fs.existsSync(p)) return { error: 'src not found: ' + args.src };
|
|
837
|
+
return await ssh.sshTransfer(entry, p, args.dest, 'push', signal);
|
|
838
|
+
}
|
|
839
|
+
if (name === 'ssh_pull') {
|
|
840
|
+
const ssh = _getSSH(roots);
|
|
841
|
+
if (!ssh) return { error: 'SSH module not available' };
|
|
842
|
+
const cfg = ssh.loadHostConfig(roots[0]);
|
|
843
|
+
const entry = ssh.resolveHost(cfg, args.host);
|
|
844
|
+
if (!entry) return { error: 'host not configured: ' + args.host };
|
|
845
|
+
const p = within(roots, args.dest);
|
|
846
|
+
if (!p) return { error: 'dest path outside workspace' };
|
|
847
|
+
return await ssh.sshTransfer(entry, args.src, p, 'pull', signal);
|
|
848
|
+
}
|
|
363
849
|
return { error: `unknown tool: ${name}` };
|
|
364
850
|
}
|
|
365
851
|
|
package/src/workflows.js
CHANGED
|
@@ -217,6 +217,38 @@ const WORKFLOWS = {
|
|
|
217
217
|
},
|
|
218
218
|
],
|
|
219
219
|
},
|
|
220
|
+
|
|
221
|
+
// ── Hybrid workflows (pipeline with parallel sub-stages) ─────────────────
|
|
222
|
+
|
|
223
|
+
'video-production': {
|
|
224
|
+
id: 'video-production',
|
|
225
|
+
description: 'Produce a video: script → voiceover + visuals → composite',
|
|
226
|
+
topology: 'pipeline',
|
|
227
|
+
triggers: [
|
|
228
|
+
/\b(create|make|produce|generate|render)\s+(a\s+)?video\b/i,
|
|
229
|
+
/\bvideo\s+(production|editing|creation|demo|explainer)\b/i,
|
|
230
|
+
/\b(voice.?over|narration)\s+.*\bvideo\b/i,
|
|
231
|
+
/\b(composite|assemble|stitch)\s+.*\b(video|mp4|clip)\b/i,
|
|
232
|
+
],
|
|
233
|
+
steps: [
|
|
234
|
+
{
|
|
235
|
+
role: 'script',
|
|
236
|
+
task: 'Turn the user prompt into a timed JSON storyboard. Output an array of segments, each with: startTime (seconds), endTime (seconds), narration (text for TTS), visualDesc (prompt for image generation). Total duration must match the user request. For: {input}',
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
role: 'voice',
|
|
240
|
+
task: 'Generate TTS audio files for each segment from the narration text in the script output. Use tts_generate tool. Return per-segment audio file paths.',
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
role: 'visual',
|
|
244
|
+
task: 'Generate images for each segment from the visualDesc in the script output. Use image_gen tool. Return per-segment image file paths.',
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
role: 'compositor',
|
|
248
|
+
task: 'Assemble the final video from the per-segment audio and image files using video_compose, then concatenate segments with video_concat. Apply transitions, fit images to duration, mix audio tracks. Output a single .mp4 file.',
|
|
249
|
+
},
|
|
250
|
+
],
|
|
251
|
+
},
|
|
220
252
|
};
|
|
221
253
|
|
|
222
254
|
function listWorkflows() {
|