@jiggai/recipes 0.4.32 → 0.4.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -104,6 +104,9 @@ Use this when you are deciding what kind of node to add:
104
104
  - use **`tool`** when you want the workflow to call a tool or side-effecting action
105
105
  - use **`human_approval`** when a person must approve before the workflow continues
106
106
  - use **`writeback`** when you want to append workflow breadcrumbs/results into team files
107
+ - use **`media-image`** when you want to generate images as part of the workflow
108
+ - use **`media-video`** when you want to generate video content as part of the workflow
109
+ - use **`media-audio`** when you want to generate audio content as part of the workflow
107
110
 
108
111
  ### `start`
109
112
  Purpose:
@@ -370,6 +373,102 @@ Example:
370
373
  }
371
374
  ```
372
375
 
376
+ ### `media-image`
377
+ Purpose:
378
+ - generate images using available media generation skills
379
+
380
+ Use it when:
381
+ - you want to create visual content as part of a workflow
382
+ - you need to generate images from text prompts
383
+ - you want to create marketing visuals or illustrations
384
+
385
+ Required pieces:
386
+ - `assignedTo.agentId`
387
+ - either `action.image_prompt` or upstream node output with image prompt
388
+
389
+ What it does:
390
+ - scans available skills for image generation capabilities
391
+ - executes image generation via skill auto-discovery
392
+ - writes generated image data to node outputs
393
+
394
+ Example:
395
+
396
+ ```json
397
+ {
398
+ "id": "generate_hero_image",
399
+ "kind": "media-image",
400
+ "assignedTo": { "agentId": "development-team-lead" },
401
+ "action": {
402
+ "image_prompt": "A modern, clean illustration of a workflow automation dashboard",
403
+ "mediaType": "image"
404
+ }
405
+ }
406
+ ```
407
+
408
+ ### `media-video`
409
+ Purpose:
410
+ - generate video content using available media generation skills
411
+
412
+ Use it when:
413
+ - you want to create video content as part of a workflow
414
+ - you need to generate promotional or educational videos
415
+ - you want to create dynamic visual content
416
+
417
+ Required pieces:
418
+ - `assignedTo.agentId`
419
+ - either `action.video_prompt` or upstream node output with video prompt
420
+
421
+ What it does:
422
+ - scans available skills for video generation capabilities
423
+ - executes video generation via skill auto-discovery
424
+ - writes generated video data to node outputs
425
+
426
+ Example:
427
+
428
+ ```json
429
+ {
430
+ "id": "generate_demo_video",
431
+ "kind": "media-video",
432
+ "assignedTo": { "agentId": "development-team-lead" },
433
+ "action": {
434
+ "video_prompt": "A 30-second demo of workflow automation in action",
435
+ "mediaType": "video"
436
+ }
437
+ }
438
+ ```
439
+
440
+ ### `media-audio`
441
+ Purpose:
442
+ - generate audio content using available media generation skills
443
+
444
+ Use it when:
445
+ - you want to create audio content as part of a workflow
446
+ - you need to generate voiceovers or music
447
+ - you want to create podcast content or audio narration
448
+
449
+ Required pieces:
450
+ - `assignedTo.agentId`
451
+ - either `action.audio_prompt` or upstream node output with audio prompt
452
+
453
+ What it does:
454
+ - scans available skills for audio generation capabilities
455
+ - executes audio generation via skill auto-discovery
456
+ - writes generated audio data to node outputs
457
+
458
+ Example:
459
+
460
+ ```json
461
+ {
462
+ "id": "generate_voiceover",
463
+ "kind": "media-audio",
464
+ "assignedTo": { "agentId": "development-team-lead" },
465
+ "action": {
466
+ "audio_prompt": "Professional voiceover explaining our new feature launch",
467
+ "mediaType": "audio"
468
+ }
469
+ }
470
+ ```
471
+
373
472
  ---
374
473
 
375
474
  ## What is **not** currently a first-class built-in node type?
@@ -2,7 +2,7 @@
2
2
  "id": "recipes",
3
3
  "name": "Recipes",
4
4
  "description": "Markdown recipes that scaffold agents and teams (workspace-local).",
5
- "version": "0.4.32",
5
+ "version": "0.4.34",
6
6
  "configSchema": {
7
7
  "type": "object",
8
8
  "additionalProperties": false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jiggai/recipes",
3
- "version": "0.4.32",
3
+ "version": "0.4.34",
4
4
  "description": "ClawRecipes plugin for OpenClaw (markdown recipes -> scaffold agents/teams)",
5
5
  "main": "index.ts",
6
6
  "type": "commonjs",
@@ -399,7 +399,38 @@ export async function runWorkflowWorkerTick(api: OpenClawPluginApi, opts: {
399
399
 
400
400
  // Apply template variable replacement
401
401
  const prompt = templateReplace(promptRaw, vars);
402
-
402
+
403
+ // Build output format instructions from outputFields when defined
404
+ const nodeConfig = asRecord((node as unknown as Record<string, unknown>)['config']);
405
+ const outputFields = Array.isArray(nodeConfig['outputFields']) ? nodeConfig['outputFields'] as Array<Record<string, string>> : [];
406
+ const validFields = outputFields.filter(f => typeof f === 'object' && f && typeof f['name'] === 'string' && f['name'].trim());
407
+
408
+ let outputFormatBlock: string;
409
+ if (validFields.length > 0) {
410
+ const fieldDescriptions = validFields.map(f => {
411
+ const name = String(f['name']).trim();
412
+ const type = String(f['type'] ?? 'text').trim();
413
+ const typeHint = type === 'list' ? '(array of strings)'
414
+ : type === 'json' ? '(JSON object)'
415
+ : '(string)';
416
+ return ` - "${name}" ${typeHint}`;
417
+ }).join('\n');
418
+ outputFormatBlock = [
419
+ 'Return a JSON object with EXACTLY these fields:',
420
+ fieldDescriptions,
421
+ '',
422
+ 'Rules:',
423
+ '- Return ONLY the JSON object, no markdown fences or explanation.',
424
+ '- Every field listed above MUST be present in your response.',
425
+ '- "text" fields → string values.',
426
+ '- "list" fields → arrays of strings.',
427
+ '- "json" fields → nested JSON objects.',
428
+ '- You may include additional fields if genuinely useful, but the listed fields are required.',
429
+ ].join('\n');
430
+ } else {
431
+ outputFormatBlock = 'Return ONLY the final content (the worker will store it as JSON).';
432
+ }
433
+
403
434
  const taskText = [
404
435
  `You are executing a workflow run for teamId=${teamId}.`,
405
436
  `Workflow: ${workflow.name ?? workflow.id ?? workflowFile}`,
@@ -408,7 +439,7 @@ export async function runWorkflowWorkerTick(api: OpenClawPluginApi, opts: {
408
439
  `\n---\nPROMPT TEMPLATE\n---\n`,
409
440
  prompt.trim(),
410
441
  `\n---\nOUTPUT FORMAT\n---\n`,
411
- `Return ONLY the final content (the worker will store it as JSON).`,
442
+ outputFormatBlock,
412
443
  ].join('\n');
413
444
 
414
445
  let text = '';
@@ -436,6 +467,30 @@ export async function runWorkflowWorkerTick(api: OpenClawPluginApi, opts: {
436
467
  const memoryContext = await buildMemoryContext(teamDir);
437
468
  const promptWithMemory = memoryContext ? `${memoryContext}\n\n${taskText}` : taskText;
438
469
 
470
+ // Build JSON Schema from outputFields for structured validation
471
+ let outputSchema: Record<string, unknown> | undefined;
472
+ if (validFields.length > 0) {
473
+ const properties: Record<string, Record<string, unknown>> = {};
474
+ const required: string[] = [];
475
+ for (const f of validFields) {
476
+ const name = String(f['name']).trim();
477
+ const type = String(f['type'] ?? 'text').trim();
478
+ required.push(name);
479
+ if (type === 'list') {
480
+ properties[name] = { type: 'array', items: { type: 'string' } };
481
+ } else if (type === 'json') {
482
+ properties[name] = { type: 'object' };
483
+ } else {
484
+ properties[name] = { type: 'string' };
485
+ }
486
+ }
487
+ outputSchema = {
488
+ type: 'object',
489
+ properties,
490
+ required,
491
+ };
492
+ }
493
+
439
494
  const llmRes = await toolsInvoke<unknown>(api, {
440
495
  tool: 'llm-task',
441
496
  action: 'json',
@@ -445,6 +500,7 @@ export async function runWorkflowWorkerTick(api: OpenClawPluginApi, opts: {
445
500
  timeoutMs,
446
501
  ...(provider ? { provider } : {}),
447
502
  ...(model ? { model } : {}),
503
+ ...(outputSchema ? { schema: outputSchema } : {}),
448
504
  },
449
505
  });
450
506
 
@@ -956,11 +1012,14 @@ export async function runWorkflowWorkerTick(api: OpenClawPluginApi, opts: {
956
1012
  const timeoutMsRaw = Number(asString(config['timeoutMs'] ?? '300000'));
957
1013
  const timeoutMs = Number.isFinite(timeoutMsRaw) && timeoutMsRaw > 0 ? timeoutMsRaw : 300000;
958
1014
 
959
- // ── Step 1: Prompt refinement (optional — skip for images, use llm-task for video) ──
1015
+ // ── Step 1: Prompt refinement (optional) ──
1016
+ // skipRefinement: when the upstream LLM already produced a clean brief,
1017
+ // skip the extra llm-task call that tends to over-elaborate.
1018
+ const skipRefinement = String(config['skipRefinement'] ?? config['skip_refinement'] ?? 'false').toLowerCase() === 'true';
960
1019
  let refinedPrompt = prompt.trim();
961
1020
 
962
- if (mediaType !== 'image') {
963
- // Only use llm-task refinement for non-image media (video/audio)
1021
+ if (!skipRefinement && mediaType !== 'image') {
1022
+ // Use llm-task refinement for non-image media (video/audio)
964
1023
  const step1Text = [
965
1024
  `You are a media prompt engineer for teamId=${teamId}.`,
966
1025
  `Workflow: ${workflow.name ?? workflow.id ?? workflowFile}`,
@@ -1066,18 +1125,64 @@ export async function runWorkflowWorkerTick(api: OpenClawPluginApi, opts: {
1066
1125
  try {
1067
1126
  const cfgRaw = await fs.readFile(path.join(homedir, '.openclaw', 'openclaw.json'), 'utf8');
1068
1127
  const cfgParsed = JSON.parse(cfgRaw);
1069
- if (cfgParsed?.env && typeof cfgParsed.env === 'object') {
1128
+
1129
+ // openclaw.json supports multiple shapes historically:
1130
+ // - { env: { KEY: "..." } }
1131
+ // - { env: { vars: { KEY: "..." } } } (current)
1132
+ const envBlock = (cfgParsed as any)?.env;
1133
+ const maybeVars = envBlock && typeof envBlock === 'object' ? (envBlock as any).vars : null;
1134
+ const rawVars = (maybeVars && typeof maybeVars === 'object') ? maybeVars : envBlock;
1135
+
1136
+ if (rawVars && typeof rawVars === 'object') {
1070
1137
  configEnv = Object.fromEntries(
1071
- Object.entries(cfgParsed.env).filter(([, v]) => typeof v === 'string')
1138
+ Object.entries(rawVars).filter(([, v]) => typeof v === 'string')
1072
1139
  ) as Record<string, string>;
1073
1140
  }
1074
1141
  } catch { /* config read failed — proceed with process.env only */ }
1075
1142
 
1076
- const runner = scriptPath.endsWith('.py') ? 'python3' : 'bash';
1077
- const scriptOutput = execSync(
1078
- `${runner} ${JSON.stringify(scriptPath)}`,
1079
- { cwd: mediaDir, timeout: timeoutMs, encoding: 'utf8', input: refinedPrompt, env: { ...process.env, ...configEnv, HOME: homedir } }
1080
- ).trim();
1143
+ // If the .py script has a venv alongside it, use that Python; otherwise system python3.
1144
+ let runner = 'bash';
1145
+ if (scriptPath.endsWith('.py')) {
1146
+ const scriptDir = path.dirname(scriptPath);
1147
+ const venvPython = path.join(scriptDir, '.venv', 'bin', 'python');
1148
+ try {
1149
+ await fs.access(venvPython);
1150
+ runner = venvPython;
1151
+ } catch {
1152
+ runner = 'python3';
1153
+ }
1154
+ }
1155
+
1156
+ let scriptOutput = '';
1157
+ try {
1158
+ scriptOutput = execSync(
1159
+ `${runner} ${JSON.stringify(scriptPath)}`,
1160
+ {
1161
+ cwd: mediaDir,
1162
+ timeout: timeoutMs,
1163
+ encoding: 'utf8',
1164
+ input: refinedPrompt,
1165
+ env: {
1166
+ ...process.env,
1167
+ ...configEnv,
1168
+ HOME: homedir,
1169
+ MEDIA_OUTPUT_DIR: mediaDir,
1170
+ },
1171
+ }
1172
+ ).trim();
1173
+ } catch (err) {
1174
+ // Surface stderr/stdout to make debugging skill scripts possible.
1175
+ // execSync throws an Error with extra fields: stdout/stderr (Buffer|string)
1176
+ const e = err as any;
1177
+ const stdout = typeof e?.stdout === 'string' ? e.stdout : (Buffer.isBuffer(e?.stdout) ? e.stdout.toString('utf8') : '');
1178
+ const stderr = typeof e?.stderr === 'string' ? e.stderr : (Buffer.isBuffer(e?.stderr) ? e.stderr.toString('utf8') : '');
1179
+ const msg = [
1180
+ e?.message ? String(e.message) : 'Skill script failed',
1181
+ stdout ? `\n--- stdout ---\n${stdout.trim()}` : '',
1182
+ stderr ? `\n--- stderr ---\n${stderr.trim()}` : '',
1183
+ ].filter(Boolean).join('');
1184
+ throw new Error(msg);
1185
+ }
1081
1186
 
1082
1187
  // Parse the output — skill scripts print "MEDIA:/path/to/file"
1083
1188
  const mediaMatch = scriptOutput.match(/MEDIA:(.+)$/m);