@jiggai/recipes 0.4.33 → 0.4.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/ARCHITECTURE.md +66 -1
- package/docs/COMMANDS.md +12 -0
- package/docs/MEDIA_DRIVERS.md +175 -0
- package/docs/MEDIA_GENERATION.md +553 -0
- package/docs/TEMPLATE_VARIABLES.md +196 -0
- package/docs/WORKFLOW_APPROVALS.md +334 -0
- package/docs/WORKFLOW_NODES.md +147 -0
- package/docs/WORKFLOW_RUNS_FILE_FIRST.md +101 -0
- package/index.ts +9 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/handlers/media-drivers.ts +49 -0
- package/src/lib/workflows/media-drivers/generic.driver.ts +128 -0
- package/src/lib/workflows/media-drivers/index.ts +22 -0
- package/src/lib/workflows/media-drivers/kling-video.driver.ts +110 -0
- package/src/lib/workflows/media-drivers/luma-video.driver.ts +59 -0
- package/src/lib/workflows/media-drivers/nano-banana-pro.driver.ts +70 -0
- package/src/lib/workflows/media-drivers/openai-image-gen.driver.ts +60 -0
- package/src/lib/workflows/media-drivers/registry.ts +96 -0
- package/src/lib/workflows/media-drivers/runway-video.driver.ts +59 -0
- package/src/lib/workflows/media-drivers/types.ts +50 -0
- package/src/lib/workflows/media-drivers/utils.ts +149 -0
- package/src/lib/workflows/workflow-worker.ts +92 -91
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
This document explains how ClawRecipes workflows work in practice.
|
|
4
4
|
|
|
5
5
|
If you want a copy-paste cookbook after reading this reference, also see:
|
|
6
|
+
|
|
7
|
+
- [WORKFLOW_NODES.md](WORKFLOW_NODES.md) — runtime node config reference (LLM/media/tool/approval)
|
|
6
8
|
- [WORKFLOW_EXAMPLES.md](WORKFLOW_EXAMPLES.md)
|
|
7
9
|
|
|
8
10
|
If you are trying to answer any of these questions, start here:
|
|
@@ -104,6 +106,9 @@ Use this when you are deciding what kind of node to add:
|
|
|
104
106
|
- use **`tool`** when you want the workflow to call a tool or side-effecting action
|
|
105
107
|
- use **`human_approval`** when a person must approve before the workflow continues
|
|
106
108
|
- use **`writeback`** when you want to append workflow breadcrumbs/results into team files
|
|
109
|
+
- use **`media-image`** when you want to generate images as part of the workflow
|
|
110
|
+
- use **`media-video`** when you want to generate video content as part of the workflow
|
|
111
|
+
- use **`media-audio`** when you want to generate audio content as part of the workflow
|
|
107
112
|
|
|
108
113
|
### `start`
|
|
109
114
|
Purpose:
|
|
@@ -370,6 +375,102 @@ Example:
|
|
|
370
375
|
}
|
|
371
376
|
```
|
|
372
377
|
|
|
378
|
+
### `media-image`
|
|
379
|
+
Purpose:
|
|
380
|
+
- generate images using available media generation skills
|
|
381
|
+
|
|
382
|
+
Use it when:
|
|
383
|
+
- you want to create visual content as part of a workflow
|
|
384
|
+
- you need to generate images from text prompts
|
|
385
|
+
- you want to create marketing visuals or illustrations
|
|
386
|
+
|
|
387
|
+
Required pieces:
|
|
388
|
+
- `assignedTo.agentId`
|
|
389
|
+
- either `action.image_prompt` or upstream node output with image prompt
|
|
390
|
+
|
|
391
|
+
What it does:
|
|
392
|
+
- scans available skills for image generation capabilities
|
|
393
|
+
- executes image generation via skill auto-discovery
|
|
394
|
+
- writes generated image data to node outputs
|
|
395
|
+
|
|
396
|
+
Example:
|
|
397
|
+
|
|
398
|
+
```json
|
|
399
|
+
{
|
|
400
|
+
"id": "generate_hero_image",
|
|
401
|
+
"kind": "media-image",
|
|
402
|
+
"assignedTo": { "agentId": "development-team-lead" },
|
|
403
|
+
"action": {
|
|
404
|
+
"image_prompt": "A modern, clean illustration of a workflow automation dashboard",
|
|
405
|
+
"mediaType": "image"
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
### `media-video`
|
|
411
|
+
Purpose:
|
|
412
|
+
- generate video content using available media generation skills
|
|
413
|
+
|
|
414
|
+
Use it when:
|
|
415
|
+
- you want to create video content as part of a workflow
|
|
416
|
+
- you need to generate promotional or educational videos
|
|
417
|
+
- you want to create dynamic visual content
|
|
418
|
+
|
|
419
|
+
Required pieces:
|
|
420
|
+
- `assignedTo.agentId`
|
|
421
|
+
- either `action.video_prompt` or upstream node output with video prompt
|
|
422
|
+
|
|
423
|
+
What it does:
|
|
424
|
+
- scans available skills for video generation capabilities
|
|
425
|
+
- executes video generation via skill auto-discovery
|
|
426
|
+
- writes generated video data to node outputs
|
|
427
|
+
|
|
428
|
+
Example:
|
|
429
|
+
|
|
430
|
+
```json
|
|
431
|
+
{
|
|
432
|
+
"id": "generate_demo_video",
|
|
433
|
+
"kind": "media-video",
|
|
434
|
+
"assignedTo": { "agentId": "development-team-lead" },
|
|
435
|
+
"action": {
|
|
436
|
+
"video_prompt": "A 30-second demo of workflow automation in action",
|
|
437
|
+
"mediaType": "video"
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
### `media-audio`
|
|
443
|
+
Purpose:
|
|
444
|
+
- generate audio content using available media generation skills
|
|
445
|
+
|
|
446
|
+
Use it when:
|
|
447
|
+
- you want to create audio content as part of a workflow
|
|
448
|
+
- you need to generate voiceovers or music
|
|
449
|
+
- you want to create podcast content or audio narration
|
|
450
|
+
|
|
451
|
+
Required pieces:
|
|
452
|
+
- `assignedTo.agentId`
|
|
453
|
+
- either `action.audio_prompt` or upstream node output with audio prompt
|
|
454
|
+
|
|
455
|
+
What it does:
|
|
456
|
+
- scans available skills for audio generation capabilities
|
|
457
|
+
- executes audio generation via skill auto-discovery
|
|
458
|
+
- writes generated audio data to node outputs
|
|
459
|
+
|
|
460
|
+
Example:
|
|
461
|
+
|
|
462
|
+
```json
|
|
463
|
+
{
|
|
464
|
+
"id": "generate_voiceover",
|
|
465
|
+
"kind": "media-audio",
|
|
466
|
+
"assignedTo": { "agentId": "development-team-lead" },
|
|
467
|
+
"action": {
|
|
468
|
+
"audio_prompt": "Professional voiceover explaining our new feature launch",
|
|
469
|
+
"mediaType": "audio"
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
```
|
|
473
|
+
|
|
373
474
|
---
|
|
374
475
|
|
|
375
476
|
## What is **not** currently a first-class built-in node type?
|
package/index.ts
CHANGED
|
@@ -47,6 +47,7 @@ import { handleScaffold, scaffoldAgentFromRecipe } from "./src/handlers/scaffold
|
|
|
47
47
|
import { handleAddRoleToTeam } from "./src/handlers/team-add-role";
|
|
48
48
|
import { reconcileRecipeCronJobs } from "./src/handlers/cron";
|
|
49
49
|
import { handleWorkflowsApprove, handleWorkflowsPollApprovals, handleWorkflowsResume, handleWorkflowsRun, handleWorkflowsRunnerOnce, handleWorkflowsRunnerTick, handleWorkflowsWorkerTick } from "./src/handlers/workflows";
|
|
50
|
+
import { handleMediaDriversList } from "./src/handlers/media-drivers";
|
|
50
51
|
import { listRecipeFiles, loadRecipeById, workspacePath } from "./src/lib/recipes";
|
|
51
52
|
import {
|
|
52
53
|
executeWorkspaceCleanup,
|
|
@@ -728,6 +729,14 @@ workflows
|
|
|
728
729
|
console.log(JSON.stringify(res, null, 2));
|
|
729
730
|
});
|
|
730
731
|
|
|
732
|
+
workflows
|
|
733
|
+
.command("media-drivers")
|
|
734
|
+
.description("List available media generation drivers with env-var availability")
|
|
735
|
+
.action(async () => {
|
|
736
|
+
const drivers = await handleMediaDriversList();
|
|
737
|
+
console.log(JSON.stringify(drivers));
|
|
738
|
+
});
|
|
739
|
+
|
|
731
740
|
workflows
|
|
732
741
|
.command("poll-approvals")
|
|
733
742
|
.description("Auto-resume any workflow runs whose approval decision has been recorded (approved/rejected)")
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { getAllDrivers, isDriverAvailable } from '../lib/workflows/media-drivers/registry';
|
|
2
|
+
import { loadConfigEnv } from '../lib/workflows/media-drivers/utils';
|
|
3
|
+
|
|
4
|
+
export interface DurationConstraintsInfo {
|
|
5
|
+
minSeconds: number;
|
|
6
|
+
maxSeconds: number;
|
|
7
|
+
defaultSeconds: number;
|
|
8
|
+
stepSeconds?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface MediaDriverInfo {
|
|
12
|
+
slug: string;
|
|
13
|
+
displayName: string;
|
|
14
|
+
mediaType: 'image' | 'video' | 'audio';
|
|
15
|
+
requiredEnvVars: string[];
|
|
16
|
+
available: boolean;
|
|
17
|
+
missingEnvVars: string[];
|
|
18
|
+
durationConstraints: DurationConstraintsInfo | null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* List all known media drivers with availability status.
|
|
23
|
+
*/
|
|
24
|
+
export async function handleMediaDriversList(): Promise<MediaDriverInfo[]> {
|
|
25
|
+
const configEnv = await loadConfigEnv();
|
|
26
|
+
|
|
27
|
+
// Merge process.env (strings only) with config env vars
|
|
28
|
+
const mergedEnv: Record<string, string> = {};
|
|
29
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
30
|
+
if (typeof v === 'string') mergedEnv[k] = v;
|
|
31
|
+
}
|
|
32
|
+
Object.assign(mergedEnv, configEnv);
|
|
33
|
+
|
|
34
|
+
return getAllDrivers().map((driver) => {
|
|
35
|
+
const available = isDriverAvailable(driver.slug, mergedEnv);
|
|
36
|
+
const missing = driver.requiredEnvVars.filter(
|
|
37
|
+
(v) => !mergedEnv[v] || mergedEnv[v].trim().length === 0
|
|
38
|
+
);
|
|
39
|
+
return {
|
|
40
|
+
slug: driver.slug,
|
|
41
|
+
displayName: driver.displayName,
|
|
42
|
+
mediaType: driver.mediaType,
|
|
43
|
+
requiredEnvVars: driver.requiredEnvVars,
|
|
44
|
+
available,
|
|
45
|
+
missingEnvVars: missing,
|
|
46
|
+
durationConstraints: driver.durationConstraints ?? null,
|
|
47
|
+
};
|
|
48
|
+
});
|
|
49
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult } from './types';
|
|
2
|
+
import { findSkillDir, findVenvPython, runScript, parseMediaOutput, findScriptInSkill } from './utils';
|
|
3
|
+
|
|
4
|
+
export class GenericDriver implements MediaDriver {
|
|
5
|
+
slug: string;
|
|
6
|
+
mediaType: 'image' | 'video' | 'audio';
|
|
7
|
+
displayName: string;
|
|
8
|
+
requiredEnvVars: string[] = [];
|
|
9
|
+
durationConstraints = null;
|
|
10
|
+
|
|
11
|
+
constructor(slug: string, mediaType: 'image' | 'video' | 'audio', displayName?: string) {
|
|
12
|
+
this.slug = slug;
|
|
13
|
+
this.mediaType = mediaType;
|
|
14
|
+
this.displayName = displayName || `Generic ${mediaType} driver for ${slug}`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
|
|
18
|
+
const { prompt, outputDir, env, timeout } = opts;
|
|
19
|
+
|
|
20
|
+
// Find the skill directory
|
|
21
|
+
const skillDir = await findSkillDir(this.slug);
|
|
22
|
+
if (!skillDir) {
|
|
23
|
+
throw new Error(`Skill directory not found for ${this.slug}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Determine script candidates based on media type
|
|
27
|
+
const scriptCandidates = this.mediaType === 'image'
|
|
28
|
+
? ['generate_image.py', 'generate_image.sh', 'generate.sh']
|
|
29
|
+
: this.mediaType === 'video'
|
|
30
|
+
? ['generate_video.py', 'generate_video.sh', 'generate.py', 'generate.sh']
|
|
31
|
+
: ['generate_audio.py', 'generate_audio.sh', 'generate.py', 'generate.sh'];
|
|
32
|
+
|
|
33
|
+
// Find the script
|
|
34
|
+
const scriptPath = await findScriptInSkill(skillDir, scriptCandidates);
|
|
35
|
+
if (!scriptPath) {
|
|
36
|
+
throw new Error(`No generation script found in ${skillDir}. Looked for: ${scriptCandidates.join(', ')}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Determine runner
|
|
40
|
+
let runner = 'bash';
|
|
41
|
+
if (scriptPath.endsWith('.py')) {
|
|
42
|
+
runner = await findVenvPython(skillDir);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Execute the script with stdin input (most common interface)
|
|
46
|
+
const scriptOutput = runScript({
|
|
47
|
+
runner,
|
|
48
|
+
script: scriptPath,
|
|
49
|
+
stdin: prompt,
|
|
50
|
+
env: {
|
|
51
|
+
...env,
|
|
52
|
+
HOME: process.env.HOME || '/home/control',
|
|
53
|
+
},
|
|
54
|
+
cwd: outputDir,
|
|
55
|
+
timeout,
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Try to parse MEDIA: output first
|
|
59
|
+
let filePath = parseMediaOutput(scriptOutput);
|
|
60
|
+
|
|
61
|
+
// If no MEDIA: prefix, try to find the actual file path in the output
|
|
62
|
+
if (!filePath) {
|
|
63
|
+
const lines = scriptOutput.split('\n').map(line => line.trim()).filter(Boolean);
|
|
64
|
+
// Look for lines that look like file paths
|
|
65
|
+
for (const line of lines.reverse()) {
|
|
66
|
+
if (line.includes('/') && (line.includes('.') || line.includes(outputDir))) {
|
|
67
|
+
filePath = line;
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (!filePath) {
|
|
74
|
+
throw new Error(`No file path found in script output. Output: ${scriptOutput}`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
filePath,
|
|
79
|
+
metadata: {
|
|
80
|
+
skill: this.slug,
|
|
81
|
+
prompt,
|
|
82
|
+
script_output: scriptOutput,
|
|
83
|
+
script_path: scriptPath,
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Create a generic driver by auto-detecting a skill's capabilities
|
|
90
|
+
*/
|
|
91
|
+
static async createFromSkill(slug: string): Promise<GenericDriver | null> {
|
|
92
|
+
const skillDir = await findSkillDir(slug);
|
|
93
|
+
if (!skillDir) {
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Check what types of scripts are available
|
|
98
|
+
const imageScripts = ['generate_image.py', 'generate_image.sh'];
|
|
99
|
+
const videoScripts = ['generate_video.py', 'generate_video.sh'];
|
|
100
|
+
const audioScripts = ['generate_audio.py', 'generate_audio.sh'];
|
|
101
|
+
|
|
102
|
+
// Check for image generation capability
|
|
103
|
+
const imageScript = await findScriptInSkill(skillDir, imageScripts);
|
|
104
|
+
if (imageScript) {
|
|
105
|
+
return new GenericDriver(slug, 'image', `${slug} Image Generation`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Check for video generation capability
|
|
109
|
+
const videoScript = await findScriptInSkill(skillDir, videoScripts);
|
|
110
|
+
if (videoScript) {
|
|
111
|
+
return new GenericDriver(slug, 'video', `${slug} Video Generation`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Check for audio generation capability
|
|
115
|
+
const audioScript = await findScriptInSkill(skillDir, audioScripts);
|
|
116
|
+
if (audioScript) {
|
|
117
|
+
return new GenericDriver(slug, 'audio', `${slug} Audio Generation`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Fall back to generic generate script
|
|
121
|
+
const genericScript = await findScriptInSkill(skillDir, ['generate.py', 'generate.sh']);
|
|
122
|
+
if (genericScript) {
|
|
123
|
+
return new GenericDriver(slug, 'image', `${slug} Generic Generation`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export {
|
|
2
|
+
getDriver,
|
|
3
|
+
getDriversByType,
|
|
4
|
+
getAllDrivers,
|
|
5
|
+
isDriverAvailable,
|
|
6
|
+
getAvailableDrivers,
|
|
7
|
+
getAvailableDriversByType
|
|
8
|
+
} from './registry';
|
|
9
|
+
|
|
10
|
+
export type {
|
|
11
|
+
MediaDriver,
|
|
12
|
+
MediaDriverInvokeOpts,
|
|
13
|
+
MediaDriverResult,
|
|
14
|
+
DurationConstraints
|
|
15
|
+
} from './types';
|
|
16
|
+
|
|
17
|
+
export { NanoBananaPro } from './nano-banana-pro.driver';
|
|
18
|
+
export { OpenAIImageGen } from './openai-image-gen.driver';
|
|
19
|
+
export { RunwayVideo } from './runway-video.driver';
|
|
20
|
+
export { KlingVideo } from './kling-video.driver';
|
|
21
|
+
export { LumaVideo } from './luma-video.driver';
|
|
22
|
+
export { GenericDriver } from './generic.driver';
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult, DurationConstraints, parseDuration } from './types';
|
|
4
|
+
import { findSkillDir, runScript, parseMediaOutput } from './utils';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Kling AI video driver — uses official `klingai` ClawHub skill.
|
|
8
|
+
*
|
|
9
|
+
* Auth: JWT via ~/.config/kling/.credentials (Access Key + Secret Key).
|
|
10
|
+
* NOT a simple Bearer API key — the skill's auth.mjs handles JWT signing.
|
|
11
|
+
* No env var needed; credentials file is the source of truth.
|
|
12
|
+
*/
|
|
13
|
+
export class KlingVideo implements MediaDriver {
|
|
14
|
+
slug = 'klingai';
|
|
15
|
+
mediaType = 'video' as const;
|
|
16
|
+
displayName = 'Kling AI Video (Official)';
|
|
17
|
+
// Auth is via ~/.config/kling/.credentials, not env vars.
|
|
18
|
+
// We check for the credentials file in a custom availability method.
|
|
19
|
+
requiredEnvVars: string[] = [];
|
|
20
|
+
durationConstraints: DurationConstraints = { minSeconds: 3, maxSeconds: 15, defaultSeconds: 5, stepSeconds: 1 };
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Check if Kling credentials are configured (credentials file exists with AK/SK).
|
|
24
|
+
*/
|
|
25
|
+
isConfigured(): boolean {
|
|
26
|
+
const home = process.env.HOME || '/home/control';
|
|
27
|
+
const credPath = path.join(home, '.config', 'kling', '.credentials');
|
|
28
|
+
try {
|
|
29
|
+
const content = fs.readFileSync(credPath, 'utf8');
|
|
30
|
+
return content.includes('access_key_id') && content.includes('secret_access_key');
|
|
31
|
+
} catch {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
|
|
37
|
+
const { prompt, outputDir, env, timeout, config } = opts;
|
|
38
|
+
// Kling supports 3-15s; clamp to valid range
|
|
39
|
+
const rawDuration = Math.max(3, Math.min(15, Number(parseDuration(config))));
|
|
40
|
+
const duration = String(rawDuration);
|
|
41
|
+
|
|
42
|
+
const skillDir = await findSkillDir(this.slug);
|
|
43
|
+
if (!skillDir) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
`Skill directory not found for ${this.slug}. Install it: clawhub install klingai --force`
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const scriptPath = path.join(skillDir, 'scripts', 'video.mjs');
|
|
50
|
+
|
|
51
|
+
// The official skill is a Node.js script (not Python)
|
|
52
|
+
const runner = 'node';
|
|
53
|
+
|
|
54
|
+
const scriptOutput = runScript({
|
|
55
|
+
runner,
|
|
56
|
+
script: scriptPath,
|
|
57
|
+
args: [
|
|
58
|
+
'--prompt', prompt,
|
|
59
|
+
'--output_dir', outputDir,
|
|
60
|
+
'--duration', duration,
|
|
61
|
+
'--aspect_ratio', String(config?.aspect_ratio ?? config?.size ?? '16:9'),
|
|
62
|
+
'--mode', 'pro',
|
|
63
|
+
],
|
|
64
|
+
env: {
|
|
65
|
+
...env,
|
|
66
|
+
HOME: process.env.HOME || '/home/control',
|
|
67
|
+
},
|
|
68
|
+
cwd: outputDir,
|
|
69
|
+
timeout,
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
// The script prints "Done: /path/to/file.mp4" or "Saved: /path/to/file.mp4"
|
|
73
|
+
const doneMatch = scriptOutput.match(/(?:Done|完成|Saved|已保存):\s*(.+\.mp4)/m);
|
|
74
|
+
if (doneMatch) {
|
|
75
|
+
return {
|
|
76
|
+
filePath: doneMatch[1].trim(),
|
|
77
|
+
metadata: { skill: this.slug, prompt },
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Fallback: check for MEDIA: prefix (in case a bridge wrapper is used)
|
|
82
|
+
const mediaPath = parseMediaOutput(scriptOutput);
|
|
83
|
+
if (mediaPath) {
|
|
84
|
+
return {
|
|
85
|
+
filePath: mediaPath,
|
|
86
|
+
metadata: { skill: this.slug, prompt },
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Last resort: look for any .mp4 in output dir
|
|
91
|
+
try {
|
|
92
|
+
const files = fs.readdirSync(outputDir)
|
|
93
|
+
.filter(f => f.endsWith('.mp4'))
|
|
94
|
+
.sort()
|
|
95
|
+
.reverse();
|
|
96
|
+
if (files.length > 0) {
|
|
97
|
+
return {
|
|
98
|
+
filePath: path.join(outputDir, files[0]),
|
|
99
|
+
metadata: { skill: this.slug, prompt },
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
} catch {
|
|
103
|
+
// ignore
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
throw new Error(
|
|
107
|
+
`Could not find generated video in output. Script output:\n${scriptOutput}`
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult, DurationConstraints, parseDuration } from './types';
|
|
3
|
+
import { findSkillDir, findVenvPython, runScript, parseMediaOutput } from './utils';
|
|
4
|
+
|
|
5
|
+
export class LumaVideo implements MediaDriver {
|
|
6
|
+
slug = 'skill-luma-video';
|
|
7
|
+
mediaType = 'video' as const;
|
|
8
|
+
displayName = 'Luma Video Generation';
|
|
9
|
+
requiredEnvVars = ['LUMAAI_API_KEY'];
|
|
10
|
+
durationConstraints: DurationConstraints = { minSeconds: 5, maxSeconds: 9, defaultSeconds: 5, stepSeconds: 4 };
|
|
11
|
+
|
|
12
|
+
async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
|
|
13
|
+
const { prompt, outputDir, env, timeout, config } = opts;
|
|
14
|
+
const duration = parseDuration(config);
|
|
15
|
+
|
|
16
|
+
// Find the skill directory
|
|
17
|
+
const skillDir = await findSkillDir(this.slug);
|
|
18
|
+
if (!skillDir) {
|
|
19
|
+
throw new Error(`Skill directory not found for ${this.slug}`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Find the script
|
|
23
|
+
const scriptPath = path.join(skillDir, 'generate_video.py');
|
|
24
|
+
|
|
25
|
+
// Find Python runner
|
|
26
|
+
const runner = await findVenvPython(skillDir);
|
|
27
|
+
|
|
28
|
+
// Execute the script with stdin input
|
|
29
|
+
const scriptOutput = runScript({
|
|
30
|
+
runner,
|
|
31
|
+
script: scriptPath,
|
|
32
|
+
stdin: prompt,
|
|
33
|
+
env: {
|
|
34
|
+
...env,
|
|
35
|
+
HOME: process.env.HOME || '/home/control',
|
|
36
|
+
MEDIA_DURATION: duration,
|
|
37
|
+
MEDIA_ASPECT_RATIO: String(config?.aspect_ratio ?? config?.size ?? '16:9'),
|
|
38
|
+
},
|
|
39
|
+
cwd: outputDir,
|
|
40
|
+
timeout,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Parse the MEDIA: output
|
|
44
|
+
const filePath = parseMediaOutput(scriptOutput);
|
|
45
|
+
|
|
46
|
+
if (!filePath) {
|
|
47
|
+
throw new Error(`No MEDIA: path found in script output. Output: ${scriptOutput}`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
filePath,
|
|
52
|
+
metadata: {
|
|
53
|
+
skill: this.slug,
|
|
54
|
+
prompt,
|
|
55
|
+
script_output: scriptOutput,
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult } from './types';
|
|
3
|
+
import { findSkillDir, findVenvPython, runScript } from './utils';
|
|
4
|
+
|
|
5
|
+
export class NanoBananaPro implements MediaDriver {
|
|
6
|
+
slug = 'nano-banana-pro';
|
|
7
|
+
mediaType = 'image' as const;
|
|
8
|
+
displayName = 'Nano Banana Pro (Gemini Image Generation)';
|
|
9
|
+
requiredEnvVars = ['GEMINI_API_KEY'];
|
|
10
|
+
durationConstraints = null;
|
|
11
|
+
|
|
12
|
+
async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
|
|
13
|
+
const { prompt, outputDir, env, timeout, config } = opts;
|
|
14
|
+
|
|
15
|
+
// Find the skill directory
|
|
16
|
+
const skillDir = await findSkillDir(this.slug);
|
|
17
|
+
if (!skillDir) {
|
|
18
|
+
throw new Error(`Skill directory not found for ${this.slug}`);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Find the script
|
|
22
|
+
const scriptPath = path.join(skillDir, 'scripts', 'generate_image.py');
|
|
23
|
+
|
|
24
|
+
// Find Python runner - check for venv first, fallback to uv run
|
|
25
|
+
let runner: string;
|
|
26
|
+
try {
|
|
27
|
+
runner = await findVenvPython(skillDir);
|
|
28
|
+
} catch {
|
|
29
|
+
// Fallback to uv run if no venv
|
|
30
|
+
runner = 'uv run python';
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Generate a filename for the output
|
|
34
|
+
const filename = 'output.png';
|
|
35
|
+
|
|
36
|
+
// Map pixel size to resolution tier (nano-banana-pro uses 1K/2K/4K)
|
|
37
|
+
const sizeStr = String(config?.size ?? '1024x1024');
|
|
38
|
+
const maxDim = Math.max(...sizeStr.split('x').map(Number).filter(n => !isNaN(n)), 1024);
|
|
39
|
+
const resolution = maxDim >= 3840 ? '4K' : maxDim >= 1792 ? '2K' : '1K';
|
|
40
|
+
|
|
41
|
+
// Execute the script with argparse CLI interface
|
|
42
|
+
const scriptOutput = runScript({
|
|
43
|
+
runner,
|
|
44
|
+
script: scriptPath,
|
|
45
|
+
args: ['--prompt', prompt, '--filename', filename, '--resolution', resolution],
|
|
46
|
+
env: {
|
|
47
|
+
...env,
|
|
48
|
+
HOME: process.env.HOME || '/home/control',
|
|
49
|
+
},
|
|
50
|
+
cwd: outputDir,
|
|
51
|
+
timeout,
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
// nano-banana-pro prints the full path on stdout
|
|
55
|
+
const outputPath = scriptOutput.trim();
|
|
56
|
+
|
|
57
|
+
if (!outputPath || !outputPath.includes('.')) {
|
|
58
|
+
throw new Error(`No valid file path returned from script. Output: ${scriptOutput}`);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
filePath: outputPath,
|
|
63
|
+
metadata: {
|
|
64
|
+
skill: this.slug,
|
|
65
|
+
prompt,
|
|
66
|
+
script_output: scriptOutput,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult } from './types';
|
|
3
|
+
import { findSkillDir, findVenvPython, runScript, parseMediaOutput } from './utils';
|
|
4
|
+
|
|
5
|
+
export class OpenAIImageGen implements MediaDriver {
|
|
6
|
+
slug = 'openai-image-gen';
|
|
7
|
+
mediaType = 'image' as const;
|
|
8
|
+
displayName = 'OpenAI Image Generation (DALL-E)';
|
|
9
|
+
requiredEnvVars = ['OPENAI_API_KEY'];
|
|
10
|
+
durationConstraints = null;
|
|
11
|
+
|
|
12
|
+
async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
|
|
13
|
+
const { prompt, outputDir, env, timeout, config } = opts;
|
|
14
|
+
|
|
15
|
+
// Find the skill directory
|
|
16
|
+
const skillDir = await findSkillDir(this.slug);
|
|
17
|
+
if (!skillDir) {
|
|
18
|
+
throw new Error(`Skill directory not found for ${this.slug}`);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Find the script
|
|
22
|
+
const scriptPath = path.join(skillDir, 'generate_image.py');
|
|
23
|
+
|
|
24
|
+
// Find Python runner
|
|
25
|
+
const runner = await findVenvPython(skillDir);
|
|
26
|
+
|
|
27
|
+
// Pass size via env var (script reads DALL_E_SIZE, defaults to 1024x1024)
|
|
28
|
+
const size = String(config?.size ?? '1024x1024');
|
|
29
|
+
|
|
30
|
+
// Execute the script with stdin input
|
|
31
|
+
const scriptOutput = runScript({
|
|
32
|
+
runner,
|
|
33
|
+
script: scriptPath,
|
|
34
|
+
stdin: prompt,
|
|
35
|
+
env: {
|
|
36
|
+
...env,
|
|
37
|
+
HOME: process.env.HOME || '/home/control',
|
|
38
|
+
DALL_E_SIZE: size,
|
|
39
|
+
},
|
|
40
|
+
cwd: outputDir,
|
|
41
|
+
timeout,
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Parse the MEDIA: output
|
|
45
|
+
const filePath = parseMediaOutput(scriptOutput);
|
|
46
|
+
|
|
47
|
+
if (!filePath) {
|
|
48
|
+
throw new Error(`No MEDIA: path found in script output. Output: ${scriptOutput}`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
filePath,
|
|
53
|
+
metadata: {
|
|
54
|
+
skill: this.slug,
|
|
55
|
+
prompt,
|
|
56
|
+
script_output: scriptOutput,
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
}
|