@sogni-ai/sogni-creative-agent-skill 3.1.0-alpha.0 β 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +1 -1
- package/generated/creative-agent-runtime.mjs +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -2
- package/skill-package.json +1 -1
- package/sogni-agent.mjs +9 -2
- package/version.mjs +1 -1
package/SKILL.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name: sogni-creative-agent-skill
|
|
3
3
|
description: "Sogni Creative Agent Skill: agent skill and CLI for image, video, and music generation using Sogni AI's decentralized GPU network. Supports personas (named people with saved reference photos and voice clips), persistent memories (user preferences across sessions), custom personality, style transfer, angle synthesis, and multi-step creative workflows. Ask the agent to \"draw\", \"generate\", \"create an image\", \"make a video/animate\", \"make music\", \"apply a style\", or \"generate me as a superhero\"."
|
|
4
4
|
metadata:
|
|
5
|
-
version: "
|
|
5
|
+
version: "3.1.0"
|
|
6
6
|
homepage: https://sogni.ai
|
|
7
7
|
clawdbot:
|
|
8
8
|
emoji: "π¨"
|
|
@@ -2205,7 +2205,7 @@ const PROMPT_CONTRACTS = [
|
|
|
2205
2205
|
"contractId": "generate_image_v1",
|
|
2206
2206
|
"version": "1.1.0",
|
|
2207
2207
|
"toolName": "generate_image",
|
|
2208
|
-
"baseDescription": "generate_image creates images from text descriptions. Use for text-only image generation;\nuse edit_image when uploaded or reference images must guide identity/likeness.\nException: Z-image and Z-image Turbo image-to-image/enhancement requests use generate_image\nwith model=\"z-turbo\" or model=\"z-image\", sourceImageIndex=-1, and starting_image_strength;\ndo not route explicit Z-image Turbo uploaded-image enhancement to edit_image because\nedit_image does not expose Z-image models.\n\nFLUX.2 PROMPT ORDER: [SUBJECT] β [ATTRIBUTES] β [ACTION/POSE] β [CAMERA/FRAMING]\nβ [ENVIRONMENT] β [LIGHTING] β [STYLE/MEDIUM] β [MATERIALS/TEXTURES] β\n[SECONDARY DETAILS]. Always start with the main subject, never mood or atmosphere.\nUse concrete nouns and observable adjectives β \"soft overcast daylight\" not \"nice lighting\".\nGood defaults when user is underspecified: medium shot for portraits, wide shot for\nenvironments, eye-level angle, soft natural light for realism.\n\nDYNAMIC PROMPTS: When numberOfVariations > 1, use Dynamic Prompt syntax to make each\nvariation meaningfully different β not just seed-different. Syntax: {a|b|c} cycles\nsequentially, {@a|b|c} picks randomly, {~a|b} paired cycling across groups. Rules: (1) Vary\nONLY what the user left unspecified β lock in everything they specified. (2) Match option\ncount to numberOfVariations so every result is unique. (3) Briefly tell the user what you're\nvarying β never show raw {|} syntax. (4) Skip when: user wants consistency, prompt is fully\nspecified, user typed their own {|} syntax, or iterating on a specific result. (5) NEVER put\nthe count or the word \"versions\"/\"variations\" inside the prompt β the prompt always describes\na single image. The multiplicity comes ONLY from numberOfVariations + the {|} syntax.\nLINKED VARIANTS: when multiple attributes must stay paired per result, use ONE top-level\nDynamic Prompt branch with one complete self-contained prompt per output. Do NOT split\nlinked fields into separate Dynamic Prompt groups.\n\nSELECTION-GATED IMAGE STAGES: If the user asks for N image options and says they will pick\none before a later dance/video/animation, call generate_image once with numberOfVariations=N.\nAfter images are created, stop and ask the user to choose; do not call dance_montage,\nanimate_photo, or generate_video until they select.\n\nIMAGEβVIDEO DIMENSION RULE: When generating an image that will feed into a video tool\n(animate_photo, sound_to_video, etc.), the image MUST be generated at the SAME aspect\nratio and dimensions as the target video. Default video aspect ratio is 16:9 landscape β\npass aspectRatio=\"16:9\" (or the user's specified/reference ratio) so the source image\nmatches the video output. Never generate a square image for a widescreen video. Exception:\na composite GPT Image 2 storyboard/keyframe sheet for a later Seedance video is a board,\nnot a single source frame; unless the user explicitly specifies a storyboard page/canvas/sheet\nshape, default the sheet image dimensions to a balanced grid that hosts the target\nscene-cell/frame aspect natively (portrait video target -> portrait or square sheet whose\ncolumns x rows grid produces ~9:16 cells; landscape video target -> landscape sheet whose\nrows x columns grid produces ~16:9 cells). Each scene-cell/frame area preserves the target\nvideo aspect ratio.\n\nSTORYBOARD IMAGE BATCH RULE: When rendering scene keyframes from a screenplay/storyboard,\nnumberOfVariations is only the count; the prompt MUST be one Dynamic Prompt branch with one\nfull keyframe prompt per scene:\n{scene 1 full keyframe prompt|scene 2 full keyframe prompt|...|scene N full keyframe prompt}.\nNEVER set numberOfVariations=N with only the first scene prompt β that creates N versions of\nscene 1. For full project requests, one generate_image batch for all scene keyframes, then\none animate_photo batch for all video clips in parallel.\n\nSTORYTELLING / BRAND / SOCIAL IMAGE PROMPTS: If generating a storyboard, ad concept,\ntrailer sheet, meme, creator post, or provocative social concept, make the first frame or\npanel immediately legible. Preserve the user's requested tone and audience. Use concrete\ncomposition, persona, product/brand role, caption placement, readable required text, and a\nclear visual transformation or punchline. For provocative adult social content, keep subjects\nclearly adult and consensual, PG-13/non-explicit, and avoid minor-coded styling or school-coded\nsettings while still optimizing visual magnet, persona, caption bait, and replay/comment value.\n\nGPT IMAGE 2 STORYBOARD SHEET β SEEDANCE AUTO-PROCEED: If the user asks to run the whole\nGPT Image 2 storyboard/keyframe sheet plus Seedance workflow without approval, the FIRST\ngenerate_image call must create ONE composite storyboard/keyframe sheet, not loose concept\nart and not separate keyframes. Use model=\"gpt-image-2\", numberOfVariations=1, and a\ncompiled storyboard prompt that literally includes: \"Create exactly N sequential video\nstoryboard frames as one composite storyboard image\", \"Target final video aspect ratio: X\",\na `SCENES:` section, and exactly N concrete scene entries named `SCENE_01`, `SCENE_02`,\netc. Each scene entry must include `Visual/Action:`, `Camera/Motion:`, `Dialogue/VO:`\n(use `[no dialogue]` when silent), `Audio/SFX:`, and any reference/visible-text notes\nneeded for that scene. Do not send only a source brief, storyboard concept, or generic\nlayout instructions as the prompt; malformed compiled storyboard prompts are blocked by\nquality audit instead of being repaired at runtime. Unless the user explicitly specifies another\nstoryboard page/canvas/sheet shape, default the GPT Image 2 storyboard sheet pixel dimensions\nto a balanced grid that hosts the target cell aspect natively: for a 9:16 portrait video,\npick a portrait-leaning sheet whose columns x rows grid produces ~9:16 cells (e.g., 12 cells\n-> ~3:4 sheet around 1728x2304, 6 cells -> ~27:32 around 1840x2176, 9 cells -> ~9:16 around\n1504x2672); for a 16:9 landscape video, pick a landscape sheet whose rows x columns grid\nproduces ~16:9 cells (e.g., 12 cells -> ~4:3 sheet around 2304x1728). Do not force landscape\n2560x1440 when cells are portrait β a landscape sheet with a portrait-cell grid cannot host\n9:16 cells without crushing them. Preserve the requested final video aspect ratio for every\nframe area. After\nthat image completes, call generate_video once using the generated storyboard board as\n@Image1/referenceImageIndices=[0], with skipPromptProcessing=false only when the user\nexplicitly wants the storyboard text rewritten; otherwise preserve the compiled shot guide\nand use skipPromptProcessing=true, expandPrompt=false.\n\nDO NOT USE generate_image FOR UPLOADED REFERENCE LOOPED VIDEO SEGMENTS: If the user says\nthe same uploaded image/reference should be reused as the first frame and last frame of each\nscripted segment/scene/clip before stitching, they are explicitly asking to animate the\nuploaded image, not to generate new storyboard keyframes. Do not call generate_image for\nthat request. Call animate_photo once with repeated uploaded source indices and per-scene\nprompts.\n\nREUSING RESULTS: When the user asks to redo, retry, or revise (e.g., \"try a new version\",\n\"redo the video with X\"), reuse the existing source images β do NOT regenerate them unless\nthe user explicitly asks for new images or describes changes to the images themselves.\nReference the existing result indices from the prior generation. If unsure whether the user\nwants new images, ask β don't regenerate by default.",
|
|
2208
|
+
"baseDescription": "generate_image creates images from text descriptions. Use for text-only image generation;\nuse edit_image when uploaded or reference images must guide identity/likeness.\nException: Z-image and Z-image Turbo image-to-image/enhancement requests use generate_image\nwith model=\"z-turbo\" or model=\"z-image\", sourceImageIndex=-1, and starting_image_strength;\ndo not route explicit Z-image Turbo uploaded-image enhancement to edit_image because\nedit_image does not expose Z-image models.\n\nBATCH FAN-OUT (HIGHEST-PRIORITY RULE β READ BEFORE ANYTHING ELSE BELOW):\nWhen the user explicitly asks for N images in the CURRENT turn, set numberOfVariations=N\nin ONE call. NEVER split into multiple serial generate_image calls. NEVER omit\nnumberOfVariations and try to \"generate the next one after this finishes\". Trigger phrasings:\n\"draw N\", \"make N\", \"give me N\", \"show me N\", \"render N\", \"create N\", \"generate N\",\n\"N more\", \"another N\", \"N as separate\", \"N separate images\", \"N different images\",\n\"N options\", \"N takes\", \"N versions\", \"N variations\", \"N pictures of\",\n\"all at the same time\", \"in parallel\", \"side by side as separate\".\n\nTHE PRIOR TURN DOES NOT ANCHOR THE CURRENT TURN. If the prior assistant turn used\nnumberOfVariations=1 with a composite \"N subjects in one image\" prompt, and the user\nnow says \"draw N more as separate images\" / \"as separate\" / \"separately\", DO NOT carry\nover numberOfVariations=1 from the prior call. The user is correcting that interpretation;\nset numberOfVariations=N for THIS call with one self-contained prompt per image via {|}\nDynamic Prompt branches. The new turn's count + separation language always wins over the\nprevious turn's pattern.\n\nWHEN BATCH FAN-OUT DOES NOT APPLY: numberOfVariations=1 with multiple subjects packed into\nONE prompt is correct only when the user clearly wants a SINGLE composite image (e.g.\n\"draw 2 goats in a meadow\" with no separation language, or explicit \"in one image\" / \"one\npicture of N\" / \"single image\" / \"composite\" / \"sheet\" / \"side-by-side composition\").\n\nFLUX.2 PROMPT ORDER: [SUBJECT] β [ATTRIBUTES] β [ACTION/POSE] β [CAMERA/FRAMING]\nβ [ENVIRONMENT] β [LIGHTING] β [STYLE/MEDIUM] β [MATERIALS/TEXTURES] β\n[SECONDARY DETAILS]. Always start with the main subject, never mood or atmosphere.\nUse concrete nouns and observable adjectives β \"soft overcast daylight\" not \"nice lighting\".\nGood defaults when user is underspecified: medium shot for portraits, wide shot for\nenvironments, eye-level angle, soft natural light for realism.\n\nDYNAMIC PROMPTS: When numberOfVariations > 1, use Dynamic Prompt syntax to make each\nvariation meaningfully different β not just seed-different. Syntax: {a|b|c} cycles\nsequentially, {@a|b|c} picks randomly, {~a|b} paired cycling across groups. Rules: (1) Vary\nONLY what the user left unspecified β lock in everything they specified. (2) Match option\ncount to numberOfVariations so every result is unique. (3) Briefly tell the user what you're\nvarying β never show raw {|} syntax. (4) Skip when: user wants consistency, prompt is fully\nspecified, user typed their own {|} syntax, or iterating on a specific result. (5) NEVER put\nthe count or the word \"versions\"/\"variations\" inside the prompt β the prompt always describes\na single image. The multiplicity comes ONLY from numberOfVariations + the {|} syntax.\nLINKED VARIANTS: when multiple attributes must stay paired per result, use ONE top-level\nDynamic Prompt branch with one complete self-contained prompt per output. Do NOT split\nlinked fields into separate Dynamic Prompt groups.\n\nSELECTION-GATED IMAGE STAGES: If the user asks for N image options and says they will pick\none before a later dance/video/animation, call generate_image once with numberOfVariations=N.\nAfter images are created, stop and ask the user to choose; do not call dance_montage,\nanimate_photo, or generate_video until they select.\n\nIMAGEβVIDEO DIMENSION RULE: When generating an image that will feed into a video tool\n(animate_photo, sound_to_video, etc.), the image MUST be generated at the SAME aspect\nratio and dimensions as the target video. Default video aspect ratio is 16:9 landscape β\npass aspectRatio=\"16:9\" (or the user's specified/reference ratio) so the source image\nmatches the video output. Never generate a square image for a widescreen video. Exception:\na composite GPT Image 2 storyboard/keyframe sheet for a later Seedance video is a board,\nnot a single source frame; unless the user explicitly specifies a storyboard page/canvas/sheet\nshape, default the sheet image dimensions to a balanced grid that hosts the target\nscene-cell/frame aspect natively (portrait video target -> portrait or square sheet whose\ncolumns x rows grid produces ~9:16 cells; landscape video target -> landscape sheet whose\nrows x columns grid produces ~16:9 cells). Each scene-cell/frame area preserves the target\nvideo aspect ratio.\n\nSTORYBOARD IMAGE BATCH RULE: When rendering scene keyframes from a screenplay/storyboard,\nnumberOfVariations is only the count; the prompt MUST be one Dynamic Prompt branch with one\nfull keyframe prompt per scene:\n{scene 1 full keyframe prompt|scene 2 full keyframe prompt|...|scene N full keyframe prompt}.\nNEVER set numberOfVariations=N with only the first scene prompt β that creates N versions of\nscene 1. For full project requests, one generate_image batch for all scene keyframes, then\none animate_photo batch for all video clips in parallel.\n\nSTORYTELLING / BRAND / SOCIAL IMAGE PROMPTS: If generating a storyboard, ad concept,\ntrailer sheet, meme, creator post, or provocative social concept, make the first frame or\npanel immediately legible. Preserve the user's requested tone and audience. Use concrete\ncomposition, persona, product/brand role, caption placement, readable required text, and a\nclear visual transformation or punchline. For provocative adult social content, keep subjects\nclearly adult and consensual, PG-13/non-explicit, and avoid minor-coded styling or school-coded\nsettings while still optimizing visual magnet, persona, caption bait, and replay/comment value.\n\nGPT IMAGE 2 STORYBOARD SHEET β SEEDANCE AUTO-PROCEED: If the user asks to run the whole\nGPT Image 2 storyboard/keyframe sheet plus Seedance workflow without approval, the FIRST\ngenerate_image call must create ONE composite storyboard/keyframe sheet, not loose concept\nart and not separate keyframes. Use model=\"gpt-image-2\", numberOfVariations=1, and a\ncompiled storyboard prompt that literally includes: \"Create exactly N sequential video\nstoryboard frames as one composite storyboard image\", \"Target final video aspect ratio: X\",\na `SCENES:` section, and exactly N concrete scene entries named `SCENE_01`, `SCENE_02`,\netc. Each scene entry must include `Visual/Action:`, `Camera/Motion:`, `Dialogue/VO:`\n(use `[no dialogue]` when silent), `Audio/SFX:`, and any reference/visible-text notes\nneeded for that scene. Do not send only a source brief, storyboard concept, or generic\nlayout instructions as the prompt; malformed compiled storyboard prompts are blocked by\nquality audit instead of being repaired at runtime. Unless the user explicitly specifies another\nstoryboard page/canvas/sheet shape, default the GPT Image 2 storyboard sheet pixel dimensions\nto a balanced grid that hosts the target cell aspect natively: for a 9:16 portrait video,\npick a portrait-leaning sheet whose columns x rows grid produces ~9:16 cells (e.g., 12 cells\n-> ~3:4 sheet around 1728x2304, 6 cells -> ~27:32 around 1840x2176, 9 cells -> ~9:16 around\n1504x2672); for a 16:9 landscape video, pick a landscape sheet whose rows x columns grid\nproduces ~16:9 cells (e.g., 12 cells -> ~4:3 sheet around 2304x1728). Do not force landscape\n2560x1440 when cells are portrait β a landscape sheet with a portrait-cell grid cannot host\n9:16 cells without crushing them. Preserve the requested final video aspect ratio for every\nframe area. After\nthat image completes, call generate_video once using the generated storyboard board as\n@Image1/referenceImageIndices=[0], with skipPromptProcessing=false only when the user\nexplicitly wants the storyboard text rewritten; otherwise preserve the compiled shot guide\nand use skipPromptProcessing=true, expandPrompt=false.\n\nDO NOT USE generate_image FOR UPLOADED REFERENCE LOOPED VIDEO SEGMENTS: If the user says\nthe same uploaded image/reference should be reused as the first frame and last frame of each\nscripted segment/scene/clip before stitching, they are explicitly asking to animate the\nuploaded image, not to generate new storyboard keyframes. Do not call generate_image for\nthat request. Call animate_photo once with repeated uploaded source indices and per-scene\nprompts.\n\nREUSING RESULTS: When the user asks to redo, retry, or revise (e.g., \"try a new version\",\n\"redo the video with X\"), reuse the existing source images β do NOT regenerate them unless\nthe user explicitly asks for new images or describes changes to the images themselves.\nReference the existing result indices from the prior generation. If unsure whether the user\nwants new images, ask β don't regenerate by default.",
|
|
2209
2209
|
"parameterDocs": {
|
|
2210
2210
|
"prompt": "Text description. Follow FLUX.2 prompt order: subject first. Use Dynamic Prompt syntax when numberOfVariations > 1.",
|
|
2211
2211
|
"numberOfVariations": "Number of distinct outputs. Use Dynamic Prompt {|} syntax to vary one attribute per image. Never put the count in the prompt itself.",
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "sogni-creative-agent-skill",
|
|
3
3
|
"name": "Sogni Creative Agent Skill β Image, Video & Music Generation",
|
|
4
4
|
"description": "Agent skill and CLI for Sogni AI image, video, and music generation.",
|
|
5
|
-
"version": "
|
|
5
|
+
"version": "3.1.0",
|
|
6
6
|
"skills": [
|
|
7
7
|
"."
|
|
8
8
|
],
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sogni-ai/sogni-creative-agent-skill",
|
|
3
|
-
"version": "3.1.0
|
|
3
|
+
"version": "3.1.0",
|
|
4
4
|
"description": "Sogni Creative Agent Skill: agent skill and CLI for Sogni AI image, video, and music generation.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "sogni-agent.mjs",
|
|
@@ -66,7 +66,7 @@
|
|
|
66
66
|
"sogni-agent.mjs"
|
|
67
67
|
],
|
|
68
68
|
"dependencies": {
|
|
69
|
-
"@sogni-ai/sogni-intelligence-client": "^2.
|
|
69
|
+
"@sogni-ai/sogni-intelligence-client": "^2.2.6",
|
|
70
70
|
"execa": "^9.6.1",
|
|
71
71
|
"json5": "^2.2.3",
|
|
72
72
|
"sharp": "^0.34.5"
|
package/skill-package.json
CHANGED
package/sogni-agent.mjs
CHANGED
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
* Usage: sogni-agent [options] "prompt"
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { SogniClientWrapper, ClientEvent, getMaxContextImages as getWrapperMaxContextImages } from '@sogni-ai/sogni-intelligence-client';
|
|
8
7
|
import JSON5 from 'json5';
|
|
9
8
|
import { createHash, randomBytes } from 'crypto';
|
|
10
9
|
import { createRequire } from 'module';
|
|
@@ -59,7 +58,15 @@ import {
|
|
|
59
58
|
} from '@sogni-ai/sogni-intelligence-client/replay';
|
|
60
59
|
|
|
61
60
|
const require = createRequire(import.meta.url);
|
|
62
|
-
const
|
|
61
|
+
const rootClientModule = process.env.SOGNI_AGENT_TEST_STATE_PATH
|
|
62
|
+
? await import('@sogni-ai/sogni-intelligence-client')
|
|
63
|
+
: require('@sogni-ai/sogni-intelligence-client');
|
|
64
|
+
const {
|
|
65
|
+
SogniClientWrapper,
|
|
66
|
+
ClientEvent,
|
|
67
|
+
getMaxContextImages: getWrapperMaxContextImages,
|
|
68
|
+
parseCreativeWorkflowSseChunk
|
|
69
|
+
} = rootClientModule;
|
|
63
70
|
|
|
64
71
|
// ---------------------------------------------------------------------------
|
|
65
72
|
// Path sanitization β defense-in-depth for any value that becomes a file path
|
package/version.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const PACKAGE_VERSION = '3.
|
|
1
|
+
export const PACKAGE_VERSION = '3.1.0';
|