sogni-gen 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,7 +12,15 @@ An [OpenClaw](https://github.com/OpenClaw/OpenClaw) plugin for AI image + video
12
12
 
13
13
  ### Quick Install (OpenClaw) - Recommended
14
14
 
15
- This repo ships an `openclaw.plugin.json` manifest so OpenClaw can automatically download and set everything up:
15
+ Point your OpenClaw to the [`llm.txt`](https://raw.githubusercontent.com/Sogni-AI/openclaw-sogni-gen/main/llm.txt) and everything is set up just paste the URL into Telegram, WhatsApp, or iMessages and the bot handles image and video generation automatically.
16
+
17
+ ```
18
+ https://raw.githubusercontent.com/Sogni-AI/openclaw-sogni-gen/main/llm.txt
19
+ ```
20
+
21
+ ### Plugin Install
22
+
23
+ This repo also ships an `openclaw.plugin.json` manifest so OpenClaw can automatically download and set everything up:
16
24
 
17
25
  ```bash
18
26
  # One command to install from GitHub
@@ -22,8 +30,6 @@ openclaw plugins install git@github.com:Sogni-AI/openclaw-sogni-gen.git
22
30
  openclaw plugins install sogni-gen
23
31
  ```
24
32
 
25
- That's it! OpenClaw will handle the rest.
26
-
27
33
  ### Manual Installation
28
34
 
29
35
  ```bash
@@ -46,6 +52,7 @@ If OpenClaw loads this plugin, `sogni-gen` will read defaults from your OpenClaw
46
52
  "config": {
47
53
  "defaultImageModel": "z_image_turbo_bf16",
48
54
  "defaultEditModel": "qwen_image_edit_2511_fp8_lightning",
55
+ "defaultPhotoboothModel": "coreml-sogniXLturbo_alpha1_ad",
49
56
  "videoModels": {
50
57
  "t2v": "wan_v2.2-14b-fp8_t2v_lightx2v",
51
58
  "i2v": "wan_v2.2-14b-fp8_i2v_lightx2v",
@@ -117,6 +124,10 @@ node sogni-gen.mjs -m flux1-schnell-fp8 "a dragon eating tacos"
117
124
  # JPG output
118
125
  node sogni-gen.mjs --output-format jpg -o dragon.jpg "a dragon eating tacos"
119
126
 
127
+ # Photobooth (face transfer)
128
+ node sogni-gen.mjs --photobooth --ref face.jpg "80s fashion portrait"
129
+ node sogni-gen.mjs --photobooth --ref face.jpg -n 4 "LinkedIn professional headshot"
130
+
120
131
  # Image edit with LoRA
121
132
  node sogni-gen.mjs -c subject.jpg --lora sogni_lora_v1 --lora-strength 0.7 \
122
133
  "add a neon cyberpunk glow"
@@ -155,6 +166,26 @@ node sogni-gen.mjs --video --estimate-video-cost --steps 20 \
155
166
  -m wan_v2.2-14b-fp8_t2v_lightx2v "ocean waves at sunset"
156
167
  ```
157
168
 
169
+ ## Photobooth (Face Transfer)
170
+
171
+ Generate stylized portraits from a face photo using InstantID ControlNet:
172
+
173
+ ```bash
174
+ # Basic photobooth
175
+ node sogni-gen.mjs --photobooth --ref face.jpg "80s fashion portrait"
176
+
177
+ # Multiple outputs
178
+ node sogni-gen.mjs --photobooth --ref face.jpg -n 4 "LinkedIn professional headshot"
179
+
180
+ # Custom ControlNet tuning
181
+ node sogni-gen.mjs --photobooth --ref face.jpg --cn-strength 0.6 --cn-guidance-end 0.5 "oil painting"
182
+
183
+ # Custom model
184
+ node sogni-gen.mjs --photobooth --ref face.jpg -m coreml-dreamshaperXL_v21TurboDPMSDE "anime style"
185
+ ```
186
+
187
+ Uses SDXL Turbo (`coreml-sogniXLturbo_alpha1_ad`) at 1024x1024 by default. The face image is passed via `--ref` and styled according to the prompt. Cannot be combined with `--video` or `-c/--context`.
188
+
158
189
  Multi-angle mode auto-builds the `<sks>` prompt and applies the `multiple_angles` LoRA.
159
190
  `--angles-360-video` generates i2v clips between consecutive angles (including last→first) and concatenates them with ffmpeg for a seamless loop.
160
191
  `--balance` / `--balances` does not require a prompt and exits after printing current `SPARK` and `SOGNI` balances.
@@ -215,7 +246,10 @@ Multi-angle mode auto-builds the `<sks>` prompt and applies the `multiple_angles
215
246
  --auto-resize-assets Auto-resize video reference assets
216
247
  --no-auto-resize-assets Disable auto-resize for video assets
217
248
  --estimate-video-cost Estimate video cost and exit (requires --steps)
218
- --ref <path|url> Reference image for i2v/s2v/animate
249
+ --photobooth Face transfer mode (InstantID + SDXL Turbo)
250
+ --cn-strength <n> ControlNet strength (default: 0.8)
251
+ --cn-guidance-end <n> ControlNet guidance end point (default: 0.3)
252
+ --ref <path|url> Reference image for i2v/s2v/animate/photobooth
219
253
  --ref-end <path|url> End frame for i2v interpolation
220
254
  --ref-audio <path> Reference audio for s2v
221
255
  --ref-video <path> Reference video for animate workflows
@@ -236,6 +270,7 @@ Multi-angle mode auto-builds the `<sks>` prompt and applies the `multiple_angles
236
270
  | `chroma-v.46-flash_fp8` | ~30s | Balanced |
237
271
  | `qwen_image_edit_2511_fp8` | ~30s | Image editing with context |
238
272
  | `qwen_image_edit_2511_fp8_lightning` | ~8s | Fast image editing |
273
+ | `coreml-sogniXLturbo_alpha1_ad` | Fast | Photobooth face transfer (SDXL Turbo) |
239
274
  | `wan_v2.2-14b-fp8_t2v_lightx2v` | ~5min | Text-to-video |
240
275
  | `wan_v2.2-14b-fp8_i2v_lightx2v` | ~3-5min | Image-to-video |
241
276
  | `wan_v2.2-14b-fp8_s2v_lightx2v` | ~5min | Sound-to-video |
package/SKILL.md CHANGED
@@ -110,7 +110,10 @@ node sogni-gen.mjs -q -o /tmp/cat.png "a cat wearing a hat"
110
110
  | `--auto-resize-assets` | Auto-resize video assets | true |
111
111
  | `--no-auto-resize-assets` | Disable auto-resize | - |
112
112
  | `--estimate-video-cost` | Estimate video cost and exit (requires --steps) | - |
113
- | `--ref <path>` | Reference image for video | required for video |
113
+ | `--photobooth` | Face transfer mode (InstantID + SDXL Turbo) | - |
114
+ | `--cn-strength <n>` | ControlNet strength (photobooth) | 0.8 |
115
+ | `--cn-guidance-end <n>` | ControlNet guidance end point (photobooth) | 0.3 |
116
+ | `--ref <path>` | Reference image for video or photobooth face | required for video/photobooth |
114
117
  | `--ref-end <path>` | End frame for i2v interpolation | - |
115
118
  | `--ref-audio <path>` | Reference audio for s2v | - |
116
119
  | `--ref-video <path>` | Reference video for animate workflows | - |
@@ -134,6 +137,7 @@ When installed as an OpenClaw plugin, `sogni-gen` will read defaults from:
134
137
  "config": {
135
138
  "defaultImageModel": "z_image_turbo_bf16",
136
139
  "defaultEditModel": "qwen_image_edit_2511_fp8_lightning",
140
+ "defaultPhotoboothModel": "coreml-sogniXLturbo_alpha1_ad",
137
141
  "videoModels": {
138
142
  "t2v": "wan_v2.2-14b-fp8_t2v_lightx2v",
139
143
  "i2v": "wan_v2.2-14b-fp8_i2v_lightx2v",
@@ -177,6 +181,7 @@ Seed strategies: `prompt-hash` (deterministic) or `random`.
177
181
  | `chroma-v.46-flash_fp8` | Medium | Balanced |
178
182
  | `qwen_image_edit_2511_fp8` | Medium | Image editing with context (up to 3) |
179
183
  | `qwen_image_edit_2511_fp8_lightning` | Fast | Quick image editing |
184
+ | `coreml-sogniXLturbo_alpha1_ad` | Fast | Photobooth face transfer (SDXL Turbo) |
180
185
 
181
186
  ## Video Models
182
187
 
@@ -206,6 +211,32 @@ node sogni-gen.mjs --last-image "make it more vibrant"
206
211
 
207
212
  When context images are provided without `-m`, defaults to `qwen_image_edit_2511_fp8_lightning`.
208
213
 
214
+ ## Photobooth (Face Transfer)
215
+
216
+ Generate stylized portraits from a face photo using InstantID ControlNet. When a user mentions "photobooth", wants a stylized portrait of themselves, or asks to transfer their face into a style, use `--photobooth` with `--ref` pointing to their face image.
217
+
218
+ ```bash
219
+ # Basic photobooth
220
+ node sogni-gen.mjs --photobooth --ref face.jpg "80s fashion portrait"
221
+
222
+ # Multiple outputs
223
+ node sogni-gen.mjs --photobooth --ref face.jpg -n 4 "LinkedIn professional headshot"
224
+
225
+ # Custom ControlNet tuning
226
+ node sogni-gen.mjs --photobooth --ref face.jpg --cn-strength 0.6 --cn-guidance-end 0.5 "oil painting"
227
+ ```
228
+
229
+ Uses SDXL Turbo (`coreml-sogniXLturbo_alpha1_ad`) at 1024x1024 by default. The face image is passed via `--ref` and styled according to the prompt. Cannot be combined with `--video` or `-c/--context`.
230
+
231
+ **Agent usage:**
232
+ ```bash
233
+ # Photobooth: stylize a face photo
234
+ node {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -o /tmp/stylized.png "80s fashion portrait"
235
+
236
+ # Multiple photobooth outputs
237
+ node {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -n 4 -o /tmp/stylized.png "LinkedIn professional headshot"
238
+ ```
239
+
209
240
  ## Multiple Angles (Turnaround)
210
241
 
211
242
  Generate specific camera angles from a single reference image using the Multiple Angles LoRA:
@@ -319,6 +350,11 @@ ls -la ~/.clawdbot/media/inbound/*.jpg | tail -3
319
350
  cp ~/.clawdbot/media/inbound/<latest>.jpg /tmp/to_restore.jpg
320
351
  ```
321
352
 
353
+ ## IMPORTANT KEYWORD RULE
354
+
355
+ - If the user message includes the word "photobooth" (case-insensitive), always use `--photobooth` mode with `--ref` set to the user-provided face image.
356
+ - Prioritize this rule over generic image-edit flows (`-c`) for that request.
357
+
322
358
  ## Agent Usage
323
359
 
324
360
  When user asks to generate/draw/create an image:
@@ -336,6 +372,9 @@ node {{skillDir}}/sogni-gen.mjs -q --video --ref /path/to/image.png -o /tmp/vide
336
372
  # Generate text-to-video
337
373
  node {{skillDir}}/sogni-gen.mjs -q --video -o /tmp/video.mp4 "ocean waves at sunset"
338
374
 
375
+ # Photobooth: stylize a face photo
376
+ node {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -o /tmp/stylized.png "80s fashion portrait"
377
+
339
378
  # Check current SPARK/SOGNI balances (no prompt required)
340
379
  node {{skillDir}}/sogni-gen.mjs --json --balance
341
380
 
package/llm.txt CHANGED
@@ -138,6 +138,7 @@ node {{skillDir}}/sogni-gen.mjs --json --balance
138
138
 
139
139
  ## Agent Behavior Guidelines
140
140
 
141
+ 0. If the user includes the keyword "photobooth" (case-insensitive), always use `--photobooth` with `--ref` to the user face image. Do not fall back to `-c` edit flow for that request.
141
142
  1. When the user asks to "draw", "generate", "create", or "make" an image: generate an image and send it.
142
143
  2. When they ask to "animate", "make a video", or "create a video": use --video mode.
143
144
  3. When they send a photo and ask to edit/change/modify it: use -c with their image.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sogni-gen",
3
- "version": "1.2.3",
3
+ "version": "1.2.5",
4
4
  "description": "Sogni AI image generation plugin for OpenClaw",
5
5
  "type": "module",
6
6
  "main": "sogni-gen.mjs",
package/sogni-gen.mjs CHANGED
@@ -593,7 +593,10 @@ const options = {
593
593
  refAudio: null, // Reference audio for s2v
594
594
  refVideo: null, // Reference video for animate workflows
595
595
  contextImages: [], // Context images for image editing
596
- looping: false // Create looping video (i2v only): generate A→B then B→A and concatenate
596
+ looping: false, // Create looping video (i2v only): generate A→B then B→A and concatenate
597
+ photobooth: false, // Photobooth mode (InstantID face transfer)
598
+ cnStrength: null, // ControlNet strength override
599
+ cnGuidanceEnd: null // ControlNet guidance end override
597
600
  };
598
601
  const cliSet = {
599
602
  output: false,
@@ -630,7 +633,10 @@ const cliSet = {
630
633
  refImageEnd: false,
631
634
  refAudio: false,
632
635
  refVideo: false,
633
- context: false
636
+ context: false,
637
+ photobooth: false,
638
+ cnStrength: false,
639
+ cnGuidanceEnd: false
634
640
  };
635
641
 
636
642
  // Parse CLI args
@@ -762,6 +768,15 @@ for (let i = 0; i < args.length; i++) {
762
768
  } else if (arg === '-c' || arg === '--context') {
763
769
  options.contextImages.push(args[++i]);
764
770
  cliSet.context = true;
771
+ } else if (arg === '--photobooth') {
772
+ options.photobooth = true;
773
+ cliSet.photobooth = true;
774
+ } else if (arg === '--cn-strength') {
775
+ options.cnStrength = parseFloat(args[++i]);
776
+ cliSet.cnStrength = true;
777
+ } else if (arg === '--cn-guidance-end') {
778
+ options.cnGuidanceEnd = parseFloat(args[++i]);
779
+ cliSet.cnGuidanceEnd = true;
765
780
  } else if (arg === '--last-image') {
766
781
  // Use image from last render as reference/context
767
782
  if (existsSync(LAST_RENDER_PATH)) {
@@ -830,6 +845,12 @@ Image Options:
830
845
  -c, --context <path> Context image for editing (can use multiple)
831
846
  --last-image Use last generated image as context
832
847
 
848
+ Photobooth (Face Transfer):
849
+ --photobooth Face transfer mode (InstantID + SDXL Turbo)
850
+ --ref <path|url> Face image (required with --photobooth)
851
+ --cn-strength <n> ControlNet strength (default: 0.8)
852
+ --cn-guidance-end <n> ControlNet guidance end point (default: 0.3)
853
+
833
854
  Video Options:
834
855
  --video, -v Generate video instead of image
835
856
  --workflow <type> Video workflow: t2v|i2v|s2v|animate-move|animate-replace
@@ -885,6 +906,8 @@ Examples:
885
906
  sogni-gen --video --last-image "gentle camera pan"
886
907
  sogni-gen -c photo.jpg "make the background a beach" -m qwen_image_edit_2511_fp8
887
908
  sogni-gen -c subject.jpg -c style.jpg "apply the style to the subject"
909
+ sogni-gen --photobooth --ref face.jpg "80s fashion portrait"
910
+ sogni-gen --photobooth --ref face.jpg -n 4 "LinkedIn professional headshot"
888
911
  `);
889
912
  process.exit(0);
890
913
  } else if (!arg.startsWith('-') && !options.prompt) {
@@ -1142,6 +1165,8 @@ if (options._lastImagePath) {
1142
1165
  } else if (!options.quiet) {
1143
1166
  console.error('Warning: --last-image ignored for text-to-video workflow.');
1144
1167
  }
1168
+ } else if (options.photobooth) {
1169
+ if (!options.refImage) options.refImage = options._lastImagePath;
1145
1170
  } else {
1146
1171
  options.contextImages.push(options._lastImagePath);
1147
1172
  }
@@ -1156,6 +1181,14 @@ if (options.video) {
1156
1181
  if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
1157
1182
  options.timeout = 300000; // 5 min for video
1158
1183
  }
1184
+ } else if (options.photobooth) {
1185
+ // Photobooth uses SDXL Turbo + InstantID ControlNet
1186
+ options.model = options.model || openclawConfig?.defaultPhotoboothModel || 'coreml-sogniXLturbo_alpha1_ad';
1187
+ if (!cliSet.width) options.width = 1024;
1188
+ if (!cliSet.height) options.height = 1024;
1189
+ if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
1190
+ options.timeout = 60000;
1191
+ }
1159
1192
  } else if (options.contextImages.length > 0) {
1160
1193
  // Use qwen edit model when context images provided (unless model explicitly set)
1161
1194
  options.model = options.model || openclawConfig?.defaultEditModel || 'qwen_image_edit_2511_fp8_lightning';
@@ -1176,6 +1209,18 @@ if (!options.video && (options.refAudio || options.refVideo || options.videoWork
1176
1209
  });
1177
1210
  }
1178
1211
 
1212
+ if (options.photobooth) {
1213
+ if (!options.refImage) {
1214
+ fatalCliError('--photobooth requires --ref <face-image>.', { code: 'INVALID_ARGUMENT' });
1215
+ }
1216
+ if (options.video) {
1217
+ fatalCliError('--photobooth cannot be combined with --video.', { code: 'INVALID_ARGUMENT' });
1218
+ }
1219
+ if (options.contextImages.length > 0) {
1220
+ fatalCliError('--photobooth cannot be combined with -c/--context.', { code: 'INVALID_ARGUMENT' });
1221
+ }
1222
+ }
1223
+
1179
1224
  if (options.video) {
1180
1225
  if (options.videoWorkflow === 't2v') {
1181
1226
  if (options.refImage || options.refImageEnd || options.refAudio || options.refVideo) {
@@ -2416,6 +2461,53 @@ async function main() {
2416
2461
  }
2417
2462
 
2418
2463
  await client.createImageEditProject(editConfig);
2464
+ } else if (options.photobooth) {
2465
+ // Photobooth: face transfer with InstantID ControlNet
2466
+ log(`Photobooth with ${options.model}...`);
2467
+ if (options.seed !== null && options.seed !== undefined) log(`Using seed: ${options.seed}`);
2468
+
2469
+ const faceBuffer = await fetchMediaBuffer(options.refImage);
2470
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
2471
+ const steps = options.steps ?? modelDefaults?.steps ?? 7;
2472
+ const guidance = options.guidance ?? modelDefaults?.guidance ?? 2;
2473
+
2474
+ const projectConfig = {
2475
+ modelId: options.model,
2476
+ positivePrompt: options.prompt,
2477
+ negativePrompt: '',
2478
+ stylePrompt: '',
2479
+ numberOfMedia: options.count,
2480
+ tokenType: options.tokenType || 'spark',
2481
+ waitForCompletion: false,
2482
+ sizePreset: 'custom',
2483
+ width: options.width,
2484
+ height: options.height,
2485
+ steps,
2486
+ guidance,
2487
+ disableNSFWFilter: true,
2488
+ sampler: options.sampler || 'dpmpp_sde',
2489
+ scheduler: options.scheduler || 'karras',
2490
+ controlNet: {
2491
+ name: 'instantid',
2492
+ image: faceBuffer,
2493
+ strength: options.cnStrength ?? 0.7,
2494
+ mode: 'balanced',
2495
+ guidanceStart: 0,
2496
+ guidanceEnd: options.cnGuidanceEnd ?? 0.6,
2497
+ }
2498
+ };
2499
+
2500
+ if (options.outputFormat) projectConfig.outputFormat = options.outputFormat;
2501
+ if (options.seed !== null && options.seed !== undefined) projectConfig.seed = options.seed;
2502
+ if (options.loras.length > 0) projectConfig.loras = options.loras;
2503
+ if (options.loraStrengths.length > 0) projectConfig.loraStrengths = options.loraStrengths;
2504
+
2505
+ const projectResult = await client.createImageProject(projectConfig);
2506
+
2507
+ // Check for errors in the response (e.g., insufficient tokens)
2508
+ if (projectResult?.error || projectResult?.message) {
2509
+ throw new Error(projectResult.error || projectResult.message);
2510
+ }
2419
2511
  } else {
2420
2512
  // Standard image generation
2421
2513
  log(`Generating with ${options.model}...`);
@@ -2513,6 +2605,10 @@ async function main() {
2513
2605
  if (options.contextImages.length > 0) {
2514
2606
  renderInfo.contextImages = options.contextImages;
2515
2607
  }
2608
+ if (options.photobooth) {
2609
+ renderInfo.photobooth = true;
2610
+ renderInfo.refImage = options.refImage;
2611
+ }
2516
2612
  saveLastRender(renderInfo);
2517
2613
 
2518
2614
  // Save to file if requested
@@ -2700,6 +2796,15 @@ async function main() {
2700
2796
  if (options.contextImages.length > 0) {
2701
2797
  output.contextImages = options.contextImages;
2702
2798
  }
2799
+ if (options.photobooth) {
2800
+ output.photobooth = true;
2801
+ output.refImage = options.refImage;
2802
+ output.controlNet = {
2803
+ name: 'instantid',
2804
+ strength: options.cnStrength ?? 0.7,
2805
+ guidanceEnd: options.cnGuidanceEnd ?? 0.6,
2806
+ };
2807
+ }
2703
2808
  console.log(JSON.stringify(output));
2704
2809
  } else {
2705
2810
  urls.forEach(url => console.log(url));