@jiggai/recipes 0.4.33 → 0.4.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,8 @@
3
3
  This document explains how ClawRecipes workflows work in practice.
4
4
 
5
5
  If you want a copy-paste cookbook after reading this reference, also see:
6
+
7
+ - [WORKFLOW_NODES.md](WORKFLOW_NODES.md) — runtime node config reference (LLM/media/tool/approval)
6
8
  - [WORKFLOW_EXAMPLES.md](WORKFLOW_EXAMPLES.md)
7
9
 
8
10
  If you are trying to answer any of these questions, start here:
@@ -104,6 +106,9 @@ Use this when you are deciding what kind of node to add:
104
106
  - use **`tool`** when you want the workflow to call a tool or side-effecting action
105
107
  - use **`human_approval`** when a person must approve before the workflow continues
106
108
  - use **`writeback`** when you want to append workflow breadcrumbs/results into team files
109
+ - use **`media-image`** when you want to generate images as part of the workflow
110
+ - use **`media-video`** when you want to generate video content as part of the workflow
111
+ - use **`media-audio`** when you want to generate audio content as part of the workflow
107
112
 
108
113
  ### `start`
109
114
  Purpose:
@@ -370,6 +375,102 @@ Example:
370
375
  }
371
376
  ```
372
377
 
378
+ ### `media-image`
379
+ Purpose:
380
+ - generate images using available media generation skills
381
+
382
+ Use it when:
383
+ - you want to create visual content as part of a workflow
384
+ - you need to generate images from text prompts
385
+ - you want to create marketing visuals or illustrations
386
+
387
+ Required pieces:
388
+ - `assignedTo.agentId`
389
+ - either `action.image_prompt` or upstream node output with image prompt
390
+
391
+ What it does:
392
+ - scans available skills for image generation capabilities
393
+ - executes image generation via skill auto-discovery
394
+ - writes generated image data to node outputs
395
+
396
+ Example:
397
+
398
+ ```json
399
+ {
400
+ "id": "generate_hero_image",
401
+ "kind": "media-image",
402
+ "assignedTo": { "agentId": "development-team-lead" },
403
+ "action": {
404
+ "image_prompt": "A modern, clean illustration of a workflow automation dashboard",
405
+ "mediaType": "image"
406
+ }
407
+ }
408
+ ```
409
+
410
+ ### `media-video`
411
+ Purpose:
412
+ - generate video content using available media generation skills
413
+
414
+ Use it when:
415
+ - you want to create video content as part of a workflow
416
+ - you need to generate promotional or educational videos
417
+ - you want to create dynamic visual content
418
+
419
+ Required pieces:
420
+ - `assignedTo.agentId`
421
+ - either `action.video_prompt` or upstream node output with video prompt
422
+
423
+ What it does:
424
+ - scans available skills for video generation capabilities
425
+ - executes video generation via skill auto-discovery
426
+ - writes generated video data to node outputs
427
+
428
+ Example:
429
+
430
+ ```json
431
+ {
432
+ "id": "generate_demo_video",
433
+ "kind": "media-video",
434
+ "assignedTo": { "agentId": "development-team-lead" },
435
+ "action": {
436
+ "video_prompt": "A 30-second demo of workflow automation in action",
437
+ "mediaType": "video"
438
+ }
439
+ }
440
+ ```
441
+
442
+ ### `media-audio`
443
+ Purpose:
444
+ - generate audio content using available media generation skills
445
+
446
+ Use it when:
447
+ - you want to create audio content as part of a workflow
448
+ - you need to generate voiceovers or music
449
+ - you want to create podcast content or audio narration
450
+
451
+ Required pieces:
452
+ - `assignedTo.agentId`
453
+ - either `action.audio_prompt` or upstream node output with audio prompt
454
+
455
+ What it does:
456
+ - scans available skills for audio generation capabilities
457
+ - executes audio generation via skill auto-discovery
458
+ - writes generated audio data to node outputs
459
+
460
+ Example:
461
+
462
+ ```json
463
+ {
464
+ "id": "generate_voiceover",
465
+ "kind": "media-audio",
466
+ "assignedTo": { "agentId": "development-team-lead" },
467
+ "action": {
468
+ "audio_prompt": "Professional voiceover explaining our new feature launch",
469
+ "mediaType": "audio"
470
+ }
471
+ }
472
+ ```
473
+
373
474
  ---
374
475
 
375
476
  ## What is **not** currently a first-class built-in node type?
package/index.ts CHANGED
@@ -47,6 +47,7 @@ import { handleScaffold, scaffoldAgentFromRecipe } from "./src/handlers/scaffold
47
47
  import { handleAddRoleToTeam } from "./src/handlers/team-add-role";
48
48
  import { reconcileRecipeCronJobs } from "./src/handlers/cron";
49
49
  import { handleWorkflowsApprove, handleWorkflowsPollApprovals, handleWorkflowsResume, handleWorkflowsRun, handleWorkflowsRunnerOnce, handleWorkflowsRunnerTick, handleWorkflowsWorkerTick } from "./src/handlers/workflows";
50
+ import { handleMediaDriversList } from "./src/handlers/media-drivers";
50
51
  import { listRecipeFiles, loadRecipeById, workspacePath } from "./src/lib/recipes";
51
52
  import {
52
53
  executeWorkspaceCleanup,
@@ -728,6 +729,14 @@ workflows
728
729
  console.log(JSON.stringify(res, null, 2));
729
730
  });
730
731
 
732
+ workflows
733
+ .command("media-drivers")
734
+ .description("List available media generation drivers with env-var availability")
735
+ .action(async () => {
736
+ const drivers = await handleMediaDriversList();
737
+ console.log(JSON.stringify(drivers));
738
+ });
739
+
731
740
  workflows
732
741
  .command("poll-approvals")
733
742
  .description("Auto-resume any workflow runs whose approval decision has been recorded (approved/rejected)")
@@ -2,7 +2,7 @@
2
2
  "id": "recipes",
3
3
  "name": "Recipes",
4
4
  "description": "Markdown recipes that scaffold agents and teams (workspace-local).",
5
- "version": "0.4.33",
5
+ "version": "0.4.35",
6
6
  "configSchema": {
7
7
  "type": "object",
8
8
  "additionalProperties": false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jiggai/recipes",
3
- "version": "0.4.33",
3
+ "version": "0.4.35",
4
4
  "description": "ClawRecipes plugin for OpenClaw (markdown recipes -> scaffold agents/teams)",
5
5
  "main": "index.ts",
6
6
  "type": "commonjs",
@@ -0,0 +1,49 @@
1
+ import { getAllDrivers, isDriverAvailable } from '../lib/workflows/media-drivers/registry';
2
+ import { loadConfigEnv } from '../lib/workflows/media-drivers/utils';
3
+
4
+ export interface DurationConstraintsInfo {
5
+ minSeconds: number;
6
+ maxSeconds: number;
7
+ defaultSeconds: number;
8
+ stepSeconds?: number;
9
+ }
10
+
11
+ export interface MediaDriverInfo {
12
+ slug: string;
13
+ displayName: string;
14
+ mediaType: 'image' | 'video' | 'audio';
15
+ requiredEnvVars: string[];
16
+ available: boolean;
17
+ missingEnvVars: string[];
18
+ durationConstraints: DurationConstraintsInfo | null;
19
+ }
20
+
21
+ /**
22
+ * List all known media drivers with availability status.
23
+ */
24
+ export async function handleMediaDriversList(): Promise<MediaDriverInfo[]> {
25
+ const configEnv = await loadConfigEnv();
26
+
27
+ // Merge process.env (strings only) with config env vars
28
+ const mergedEnv: Record<string, string> = {};
29
+ for (const [k, v] of Object.entries(process.env)) {
30
+ if (typeof v === 'string') mergedEnv[k] = v;
31
+ }
32
+ Object.assign(mergedEnv, configEnv);
33
+
34
+ return getAllDrivers().map((driver) => {
35
+ const available = isDriverAvailable(driver.slug, mergedEnv);
36
+ const missing = driver.requiredEnvVars.filter(
37
+ (v) => !mergedEnv[v] || mergedEnv[v].trim().length === 0
38
+ );
39
+ return {
40
+ slug: driver.slug,
41
+ displayName: driver.displayName,
42
+ mediaType: driver.mediaType,
43
+ requiredEnvVars: driver.requiredEnvVars,
44
+ available,
45
+ missingEnvVars: missing,
46
+ durationConstraints: driver.durationConstraints ?? null,
47
+ };
48
+ });
49
+ }
@@ -0,0 +1,128 @@
1
+ import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult } from './types';
2
+ import { findSkillDir, findVenvPython, runScript, parseMediaOutput, findScriptInSkill } from './utils';
3
+
4
+ export class GenericDriver implements MediaDriver {
5
+ slug: string;
6
+ mediaType: 'image' | 'video' | 'audio';
7
+ displayName: string;
8
+ requiredEnvVars: string[] = [];
9
+ durationConstraints = null;
10
+
11
+ constructor(slug: string, mediaType: 'image' | 'video' | 'audio', displayName?: string) {
12
+ this.slug = slug;
13
+ this.mediaType = mediaType;
14
+ this.displayName = displayName || `Generic ${mediaType} driver for ${slug}`;
15
+ }
16
+
17
+ async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
18
+ const { prompt, outputDir, env, timeout } = opts;
19
+
20
+ // Find the skill directory
21
+ const skillDir = await findSkillDir(this.slug);
22
+ if (!skillDir) {
23
+ throw new Error(`Skill directory not found for ${this.slug}`);
24
+ }
25
+
26
+ // Determine script candidates based on media type
27
+ const scriptCandidates = this.mediaType === 'image'
28
+ ? ['generate_image.py', 'generate_image.sh', 'generate.sh']
29
+ : this.mediaType === 'video'
30
+ ? ['generate_video.py', 'generate_video.sh', 'generate.py', 'generate.sh']
31
+ : ['generate_audio.py', 'generate_audio.sh', 'generate.py', 'generate.sh'];
32
+
33
+ // Find the script
34
+ const scriptPath = await findScriptInSkill(skillDir, scriptCandidates);
35
+ if (!scriptPath) {
36
+ throw new Error(`No generation script found in ${skillDir}. Looked for: ${scriptCandidates.join(', ')}`);
37
+ }
38
+
39
+ // Determine runner
40
+ let runner = 'bash';
41
+ if (scriptPath.endsWith('.py')) {
42
+ runner = await findVenvPython(skillDir);
43
+ }
44
+
45
+ // Execute the script with stdin input (most common interface)
46
+ const scriptOutput = runScript({
47
+ runner,
48
+ script: scriptPath,
49
+ stdin: prompt,
50
+ env: {
51
+ ...env,
52
+ HOME: process.env.HOME || '/home/control',
53
+ },
54
+ cwd: outputDir,
55
+ timeout,
56
+ });
57
+
58
+ // Try to parse MEDIA: output first
59
+ let filePath = parseMediaOutput(scriptOutput);
60
+
61
+ // If no MEDIA: prefix, try to find the actual file path in the output
62
+ if (!filePath) {
63
+ const lines = scriptOutput.split('\n').map(line => line.trim()).filter(Boolean);
64
+ // Look for lines that look like file paths
65
+ for (const line of lines.reverse()) {
66
+ if (line.includes('/') && (line.includes('.') || line.includes(outputDir))) {
67
+ filePath = line;
68
+ break;
69
+ }
70
+ }
71
+ }
72
+
73
+ if (!filePath) {
74
+ throw new Error(`No file path found in script output. Output: ${scriptOutput}`);
75
+ }
76
+
77
+ return {
78
+ filePath,
79
+ metadata: {
80
+ skill: this.slug,
81
+ prompt,
82
+ script_output: scriptOutput,
83
+ script_path: scriptPath,
84
+ },
85
+ };
86
+ }
87
+
88
+ /**
89
+ * Create a generic driver by auto-detecting a skill's capabilities
90
+ */
91
+ static async createFromSkill(slug: string): Promise<GenericDriver | null> {
92
+ const skillDir = await findSkillDir(slug);
93
+ if (!skillDir) {
94
+ return null;
95
+ }
96
+
97
+ // Check what types of scripts are available
98
+ const imageScripts = ['generate_image.py', 'generate_image.sh'];
99
+ const videoScripts = ['generate_video.py', 'generate_video.sh'];
100
+ const audioScripts = ['generate_audio.py', 'generate_audio.sh'];
101
+
102
+ // Check for image generation capability
103
+ const imageScript = await findScriptInSkill(skillDir, imageScripts);
104
+ if (imageScript) {
105
+ return new GenericDriver(slug, 'image', `${slug} Image Generation`);
106
+ }
107
+
108
+ // Check for video generation capability
109
+ const videoScript = await findScriptInSkill(skillDir, videoScripts);
110
+ if (videoScript) {
111
+ return new GenericDriver(slug, 'video', `${slug} Video Generation`);
112
+ }
113
+
114
+ // Check for audio generation capability
115
+ const audioScript = await findScriptInSkill(skillDir, audioScripts);
116
+ if (audioScript) {
117
+ return new GenericDriver(slug, 'audio', `${slug} Audio Generation`);
118
+ }
119
+
120
+ // Fall back to generic generate script
121
+ const genericScript = await findScriptInSkill(skillDir, ['generate.py', 'generate.sh']);
122
+ if (genericScript) {
123
+ return new GenericDriver(slug, 'image', `${slug} Generic Generation`);
124
+ }
125
+
126
+ return null;
127
+ }
128
+ }
@@ -0,0 +1,22 @@
1
+ export {
2
+ getDriver,
3
+ getDriversByType,
4
+ getAllDrivers,
5
+ isDriverAvailable,
6
+ getAvailableDrivers,
7
+ getAvailableDriversByType
8
+ } from './registry';
9
+
10
+ export type {
11
+ MediaDriver,
12
+ MediaDriverInvokeOpts,
13
+ MediaDriverResult,
14
+ DurationConstraints
15
+ } from './types';
16
+
17
+ export { NanoBananaPro } from './nano-banana-pro.driver';
18
+ export { OpenAIImageGen } from './openai-image-gen.driver';
19
+ export { RunwayVideo } from './runway-video.driver';
20
+ export { KlingVideo } from './kling-video.driver';
21
+ export { LumaVideo } from './luma-video.driver';
22
+ export { GenericDriver } from './generic.driver';
@@ -0,0 +1,110 @@
1
+ import * as path from 'path';
2
+ import * as fs from 'fs';
3
+ import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult, DurationConstraints, parseDuration } from './types';
4
+ import { findSkillDir, runScript, parseMediaOutput } from './utils';
5
+
6
+ /**
7
+ * Kling AI video driver — uses official `klingai` ClawHub skill.
8
+ *
9
+ * Auth: JWT via ~/.config/kling/.credentials (Access Key + Secret Key).
10
+ * NOT a simple Bearer API key — the skill's auth.mjs handles JWT signing.
11
+ * No env var needed; credentials file is the source of truth.
12
+ */
13
+ export class KlingVideo implements MediaDriver {
14
+ slug = 'klingai';
15
+ mediaType = 'video' as const;
16
+ displayName = 'Kling AI Video (Official)';
17
+ // Auth is via ~/.config/kling/.credentials, not env vars.
18
+ // We check for the credentials file in a custom availability method.
19
+ requiredEnvVars: string[] = [];
20
+ durationConstraints: DurationConstraints = { minSeconds: 3, maxSeconds: 15, defaultSeconds: 5, stepSeconds: 1 };
21
+
22
+ /**
23
+ * Check if Kling credentials are configured (credentials file exists with AK/SK).
24
+ */
25
+ isConfigured(): boolean {
26
+ const home = process.env.HOME || '/home/control';
27
+ const credPath = path.join(home, '.config', 'kling', '.credentials');
28
+ try {
29
+ const content = fs.readFileSync(credPath, 'utf8');
30
+ return content.includes('access_key_id') && content.includes('secret_access_key');
31
+ } catch {
32
+ return false;
33
+ }
34
+ }
35
+
36
+ async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
37
+ const { prompt, outputDir, env, timeout, config } = opts;
38
+ // Kling supports 3-15s; clamp to valid range
39
+ const rawDuration = Math.max(3, Math.min(15, Number(parseDuration(config))));
40
+ const duration = String(rawDuration);
41
+
42
+ const skillDir = await findSkillDir(this.slug);
43
+ if (!skillDir) {
44
+ throw new Error(
45
+ `Skill directory not found for ${this.slug}. Install it: clawhub install klingai --force`
46
+ );
47
+ }
48
+
49
+ const scriptPath = path.join(skillDir, 'scripts', 'video.mjs');
50
+
51
+ // The official skill is a Node.js script (not Python)
52
+ const runner = 'node';
53
+
54
+ const scriptOutput = runScript({
55
+ runner,
56
+ script: scriptPath,
57
+ args: [
58
+ '--prompt', prompt,
59
+ '--output_dir', outputDir,
60
+ '--duration', duration,
61
+ '--aspect_ratio', String(config?.aspect_ratio ?? config?.size ?? '16:9'),
62
+ '--mode', 'pro',
63
+ ],
64
+ env: {
65
+ ...env,
66
+ HOME: process.env.HOME || '/home/control',
67
+ },
68
+ cwd: outputDir,
69
+ timeout,
70
+ });
71
+
72
+ // The script prints "Done: /path/to/file.mp4" or "Saved: /path/to/file.mp4"
73
+ const doneMatch = scriptOutput.match(/(?:Done|完成|Saved|已保存):\s*(.+\.mp4)/m);
74
+ if (doneMatch) {
75
+ return {
76
+ filePath: doneMatch[1].trim(),
77
+ metadata: { skill: this.slug, prompt },
78
+ };
79
+ }
80
+
81
+ // Fallback: check for MEDIA: prefix (in case a bridge wrapper is used)
82
+ const mediaPath = parseMediaOutput(scriptOutput);
83
+ if (mediaPath) {
84
+ return {
85
+ filePath: mediaPath,
86
+ metadata: { skill: this.slug, prompt },
87
+ };
88
+ }
89
+
90
+ // Last resort: look for any .mp4 in output dir
91
+ try {
92
+ const files = fs.readdirSync(outputDir)
93
+ .filter(f => f.endsWith('.mp4'))
94
+ .sort()
95
+ .reverse();
96
+ if (files.length > 0) {
97
+ return {
98
+ filePath: path.join(outputDir, files[0]),
99
+ metadata: { skill: this.slug, prompt },
100
+ };
101
+ }
102
+ } catch {
103
+ // ignore
104
+ }
105
+
106
+ throw new Error(
107
+ `Could not find generated video in output. Script output:\n${scriptOutput}`
108
+ );
109
+ }
110
+ }
@@ -0,0 +1,59 @@
1
+ import * as path from 'path';
2
+ import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult, DurationConstraints, parseDuration } from './types';
3
+ import { findSkillDir, findVenvPython, runScript, parseMediaOutput } from './utils';
4
+
5
+ export class LumaVideo implements MediaDriver {
6
+ slug = 'skill-luma-video';
7
+ mediaType = 'video' as const;
8
+ displayName = 'Luma Video Generation';
9
+ requiredEnvVars = ['LUMAAI_API_KEY'];
10
+ durationConstraints: DurationConstraints = { minSeconds: 5, maxSeconds: 9, defaultSeconds: 5, stepSeconds: 4 };
11
+
12
+ async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
13
+ const { prompt, outputDir, env, timeout, config } = opts;
14
+ const duration = parseDuration(config);
15
+
16
+ // Find the skill directory
17
+ const skillDir = await findSkillDir(this.slug);
18
+ if (!skillDir) {
19
+ throw new Error(`Skill directory not found for ${this.slug}`);
20
+ }
21
+
22
+ // Find the script
23
+ const scriptPath = path.join(skillDir, 'generate_video.py');
24
+
25
+ // Find Python runner
26
+ const runner = await findVenvPython(skillDir);
27
+
28
+ // Execute the script with stdin input
29
+ const scriptOutput = runScript({
30
+ runner,
31
+ script: scriptPath,
32
+ stdin: prompt,
33
+ env: {
34
+ ...env,
35
+ HOME: process.env.HOME || '/home/control',
36
+ MEDIA_DURATION: duration,
37
+ MEDIA_ASPECT_RATIO: String(config?.aspect_ratio ?? config?.size ?? '16:9'),
38
+ },
39
+ cwd: outputDir,
40
+ timeout,
41
+ });
42
+
43
+ // Parse the MEDIA: output
44
+ const filePath = parseMediaOutput(scriptOutput);
45
+
46
+ if (!filePath) {
47
+ throw new Error(`No MEDIA: path found in script output. Output: ${scriptOutput}`);
48
+ }
49
+
50
+ return {
51
+ filePath,
52
+ metadata: {
53
+ skill: this.slug,
54
+ prompt,
55
+ script_output: scriptOutput,
56
+ },
57
+ };
58
+ }
59
+ }
@@ -0,0 +1,70 @@
1
+ import * as path from 'path';
2
+ import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult } from './types';
3
+ import { findSkillDir, findVenvPython, runScript } from './utils';
4
+
5
+ export class NanoBananaPro implements MediaDriver {
6
+ slug = 'nano-banana-pro';
7
+ mediaType = 'image' as const;
8
+ displayName = 'Nano Banana Pro (Gemini Image Generation)';
9
+ requiredEnvVars = ['GEMINI_API_KEY'];
10
+ durationConstraints = null;
11
+
12
+ async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
13
+ const { prompt, outputDir, env, timeout, config } = opts;
14
+
15
+ // Find the skill directory
16
+ const skillDir = await findSkillDir(this.slug);
17
+ if (!skillDir) {
18
+ throw new Error(`Skill directory not found for ${this.slug}`);
19
+ }
20
+
21
+ // Find the script
22
+ const scriptPath = path.join(skillDir, 'scripts', 'generate_image.py');
23
+
24
+ // Find Python runner - check for venv first, fallback to uv run
25
+ let runner: string;
26
+ try {
27
+ runner = await findVenvPython(skillDir);
28
+ } catch {
29
+ // Fallback to uv run if no venv
30
+ runner = 'uv run python';
31
+ }
32
+
33
+ // Generate a filename for the output
34
+ const filename = 'output.png';
35
+
36
+ // Map pixel size to resolution tier (nano-banana-pro uses 1K/2K/4K)
37
+ const sizeStr = String(config?.size ?? '1024x1024');
38
+ const maxDim = Math.max(...sizeStr.split('x').map(Number).filter(n => !isNaN(n)), 1024);
39
+ const resolution = maxDim >= 3840 ? '4K' : maxDim >= 1792 ? '2K' : '1K';
40
+
41
+ // Execute the script with argparse CLI interface
42
+ const scriptOutput = runScript({
43
+ runner,
44
+ script: scriptPath,
45
+ args: ['--prompt', prompt, '--filename', filename, '--resolution', resolution],
46
+ env: {
47
+ ...env,
48
+ HOME: process.env.HOME || '/home/control',
49
+ },
50
+ cwd: outputDir,
51
+ timeout,
52
+ });
53
+
54
+ // nano-banana-pro prints the full path on stdout
55
+ const outputPath = scriptOutput.trim();
56
+
57
+ if (!outputPath || !outputPath.includes('.')) {
58
+ throw new Error(`No valid file path returned from script. Output: ${scriptOutput}`);
59
+ }
60
+
61
+ return {
62
+ filePath: outputPath,
63
+ metadata: {
64
+ skill: this.slug,
65
+ prompt,
66
+ script_output: scriptOutput,
67
+ },
68
+ };
69
+ }
70
+ }
@@ -0,0 +1,60 @@
1
+ import * as path from 'path';
2
+ import { MediaDriver, MediaDriverInvokeOpts, MediaDriverResult } from './types';
3
+ import { findSkillDir, findVenvPython, runScript, parseMediaOutput } from './utils';
4
+
5
+ export class OpenAIImageGen implements MediaDriver {
6
+ slug = 'openai-image-gen';
7
+ mediaType = 'image' as const;
8
+ displayName = 'OpenAI Image Generation (DALL-E)';
9
+ requiredEnvVars = ['OPENAI_API_KEY'];
10
+ durationConstraints = null;
11
+
12
+ async invoke(opts: MediaDriverInvokeOpts): Promise<MediaDriverResult> {
13
+ const { prompt, outputDir, env, timeout, config } = opts;
14
+
15
+ // Find the skill directory
16
+ const skillDir = await findSkillDir(this.slug);
17
+ if (!skillDir) {
18
+ throw new Error(`Skill directory not found for ${this.slug}`);
19
+ }
20
+
21
+ // Find the script
22
+ const scriptPath = path.join(skillDir, 'generate_image.py');
23
+
24
+ // Find Python runner
25
+ const runner = await findVenvPython(skillDir);
26
+
27
+ // Pass size via env var (script reads DALL_E_SIZE, defaults to 1024x1024)
28
+ const size = String(config?.size ?? '1024x1024');
29
+
30
+ // Execute the script with stdin input
31
+ const scriptOutput = runScript({
32
+ runner,
33
+ script: scriptPath,
34
+ stdin: prompt,
35
+ env: {
36
+ ...env,
37
+ HOME: process.env.HOME || '/home/control',
38
+ DALL_E_SIZE: size,
39
+ },
40
+ cwd: outputDir,
41
+ timeout,
42
+ });
43
+
44
+ // Parse the MEDIA: output
45
+ const filePath = parseMediaOutput(scriptOutput);
46
+
47
+ if (!filePath) {
48
+ throw new Error(`No MEDIA: path found in script output. Output: ${scriptOutput}`);
49
+ }
50
+
51
+ return {
52
+ filePath,
53
+ metadata: {
54
+ skill: this.slug,
55
+ prompt,
56
+ script_output: scriptOutput,
57
+ },
58
+ };
59
+ }
60
+ }