@sogni-ai/sogni-client 4.2.0-alpha.2 → 4.2.0-alpha.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +148 -0
  2. package/CLAUDE.md +25 -3
  3. package/README.md +411 -136
  4. package/dist/Account/index.d.ts +4 -2
  5. package/dist/Account/index.js +27 -23
  6. package/dist/Account/index.js.map +1 -1
  7. package/dist/Account/types.d.ts +7 -0
  8. package/dist/ApiClient/WebSocketClient/BrowserWebSocketClient/index.d.ts +3 -1
  9. package/dist/ApiClient/WebSocketClient/BrowserWebSocketClient/index.js +26 -2
  10. package/dist/ApiClient/WebSocketClient/BrowserWebSocketClient/index.js.map +1 -1
  11. package/dist/ApiClient/WebSocketClient/eventSubscriptions.d.ts +33 -0
  12. package/dist/ApiClient/WebSocketClient/eventSubscriptions.js +39 -0
  13. package/dist/ApiClient/WebSocketClient/eventSubscriptions.js.map +1 -0
  14. package/dist/ApiClient/WebSocketClient/events.d.ts +24 -7
  15. package/dist/ApiClient/WebSocketClient/index.d.ts +5 -1
  16. package/dist/ApiClient/WebSocketClient/index.js +24 -1
  17. package/dist/ApiClient/WebSocketClient/index.js.map +1 -1
  18. package/dist/ApiClient/WebSocketClient/messages.d.ts +2 -0
  19. package/dist/ApiClient/WebSocketClient/types.d.ts +2 -0
  20. package/dist/ApiClient/index.d.ts +6 -1
  21. package/dist/ApiClient/index.js +7 -3
  22. package/dist/ApiClient/index.js.map +1 -1
  23. package/dist/Chat/ChatTools.d.ts +5 -49
  24. package/dist/Chat/ChatTools.js +311 -88
  25. package/dist/Chat/ChatTools.js.map +1 -1
  26. package/dist/Chat/index.d.ts +11 -2
  27. package/dist/Chat/index.js +78 -4
  28. package/dist/Chat/index.js.map +1 -1
  29. package/dist/Chat/modelRouting.d.ts +100 -0
  30. package/dist/Chat/modelRouting.js +441 -0
  31. package/dist/Chat/modelRouting.js.map +1 -0
  32. package/dist/Chat/sogniHostedTools.generated.json +529 -0
  33. package/dist/Chat/tools.d.ts +9 -55
  34. package/dist/Chat/tools.js +72 -228
  35. package/dist/Chat/tools.js.map +1 -1
  36. package/dist/Chat/types.d.ts +91 -2
  37. package/dist/CreativeWorkflows/index.d.ts +23 -0
  38. package/dist/CreativeWorkflows/index.js +274 -0
  39. package/dist/CreativeWorkflows/index.js.map +1 -0
  40. package/dist/CreativeWorkflows/types.d.ts +106 -0
  41. package/dist/CreativeWorkflows/types.js +3 -0
  42. package/dist/CreativeWorkflows/types.js.map +1 -0
  43. package/dist/Projects/Job.d.ts +6 -0
  44. package/dist/Projects/Job.js +60 -5
  45. package/dist/Projects/Job.js.map +1 -1
  46. package/dist/Projects/Project.js +15 -3
  47. package/dist/Projects/Project.js.map +1 -1
  48. package/dist/Projects/createJobRequestMessage.js +140 -6
  49. package/dist/Projects/createJobRequestMessage.js.map +1 -1
  50. package/dist/Projects/index.d.ts +10 -1
  51. package/dist/Projects/index.js +197 -58
  52. package/dist/Projects/index.js.map +1 -1
  53. package/dist/Projects/types/ModelOptions.d.ts +3 -3
  54. package/dist/Projects/types/ModelOptions.js +12 -5
  55. package/dist/Projects/types/ModelOptions.js.map +1 -1
  56. package/dist/Projects/types/ModelTiersRaw.d.ts +7 -7
  57. package/dist/Projects/types/RawProject.d.ts +2 -0
  58. package/dist/Projects/types/events.d.ts +5 -4
  59. package/dist/Projects/types/index.d.ts +77 -7
  60. package/dist/Projects/types/index.js.map +1 -1
  61. package/dist/Projects/utils/index.d.ts +8 -1
  62. package/dist/Projects/utils/index.js +22 -8
  63. package/dist/Projects/utils/index.js.map +1 -1
  64. package/dist/index.d.ts +28 -3
  65. package/dist/index.js +19 -1
  66. package/dist/index.js.map +1 -1
  67. package/dist/lib/RestClient.d.ts +4 -1
  68. package/dist/lib/RestClient.js +17 -9
  69. package/dist/lib/RestClient.js.map +1 -1
  70. package/dist/lib/mediaValidation.d.ts +16 -0
  71. package/dist/lib/mediaValidation.js +280 -0
  72. package/dist/lib/mediaValidation.js.map +1 -0
  73. package/dist/lib/validation.d.ts +6 -1
  74. package/dist/lib/validation.js +28 -2
  75. package/dist/lib/validation.js.map +1 -1
  76. package/llms-full.txt +372 -133
  77. package/llms.txt +197 -86
  78. package/package.json +13 -4
  79. package/src/Account/index.ts +22 -2
  80. package/src/Account/types.ts +7 -0
  81. package/src/ApiClient/WebSocketClient/BrowserWebSocketClient/index.ts +47 -3
  82. package/src/ApiClient/WebSocketClient/eventSubscriptions.ts +92 -0
  83. package/src/ApiClient/WebSocketClient/events.ts +25 -7
  84. package/src/ApiClient/WebSocketClient/index.ts +33 -1
  85. package/src/ApiClient/WebSocketClient/messages.ts +2 -0
  86. package/src/ApiClient/WebSocketClient/types.ts +2 -0
  87. package/src/ApiClient/index.ts +32 -2
  88. package/src/Chat/ChatTools.ts +395 -95
  89. package/src/Chat/index.ts +149 -5
  90. package/src/Chat/modelRouting.ts +602 -0
  91. package/src/Chat/sogniHostedTools.generated.json +529 -0
  92. package/src/Chat/tools.ts +98 -245
  93. package/src/Chat/types.ts +100 -2
  94. package/src/CreativeWorkflows/index.ts +290 -0
  95. package/src/CreativeWorkflows/types.ts +134 -0
  96. package/src/Projects/Job.ts +76 -5
  97. package/src/Projects/Project.ts +13 -3
  98. package/src/Projects/createJobRequestMessage.ts +152 -13
  99. package/src/Projects/index.ts +230 -52
  100. package/src/Projects/types/ModelOptions.ts +15 -8
  101. package/src/Projects/types/ModelTiersRaw.ts +7 -7
  102. package/src/Projects/types/RawProject.ts +2 -0
  103. package/src/Projects/types/events.ts +5 -4
  104. package/src/Projects/types/index.ts +86 -6
  105. package/src/Projects/utils/index.ts +24 -8
  106. package/src/index.ts +93 -0
  107. package/src/lib/RestClient.ts +15 -5
  108. package/src/lib/mediaValidation.ts +367 -0
  109. package/src/lib/validation.ts +38 -2
package/llms-full.txt CHANGED
@@ -15,54 +15,62 @@ TABLE OF CONTENTS
15
15
  4. Image Generation
16
16
  5. Video Generation - WAN 2.2 Models
17
17
  6. Video Generation - LTX-2.3 Models
18
- 7. Audio Generation - ACE-Step 1.5 Models
19
- 8. LLM Text Generation
20
- 9. LLM Tool Calling & Sogni Platform Tools
21
- 10. Vision Chat (Multimodal Image Understanding)
22
- 11. Complete Parameter Reference
23
- 12. Events & Progress Tracking
24
- 13. Models, Presets & Options
25
- 14. Cost Estimation
26
- 15. Error Handling
27
- 16. Advanced Patterns
28
- 17. Type Definitions
18
+ 7. Video Generation - Seedance 2.0 Models
19
+ 8. Audio Generation - ACE-Step 1.5 Models
20
+ 9. LLM Text Generation
21
+ 10. LLM Tool Calling & Sogni Platform Tools
22
+ 11. Vision Chat (Multimodal Image Understanding)
23
+ 12. Complete Parameter Reference
24
+ 13. Events & Progress Tracking
25
+ 14. Models, Presets & Options
26
+ 15. Cost Estimation
27
+ 16. Error Handling
28
+ 17. Advanced Patterns
29
+ 18. Type Definitions
29
30
 
30
31
  ================================================================================
31
- 1. OVERVIEW & INSTALLATION
32
- ================================================================================
32
+
33
+ 1. # OVERVIEW & INSTALLATION
33
34
 
34
35
  The Sogni SDK provides access to the Sogni Supernet - a decentralized network
35
36
  for AI inference. It supports:
36
37
 
37
38
  - Image Generation: Stable Diffusion, Flux, Z-Image, Z-Image Turbo, Qwen Image Edit
38
- - Video Generation: WAN 2.2 (5 workflows), LTX-2.3 models
39
+ - Video Generation: WAN 2.2 (5 workflows), LTX-2.3 models, Seedance 2.0 external API models
39
40
  - Audio Generation: ACE-Step 1.5 music generation (Turbo and SFT models)
40
41
  - LLM Text Generation: Chat completions with streaming, multi-turn, and thinking mode
41
42
  - LLM Tool Calling: OpenAI-compatible function calling with custom and platform tools
42
- - Sogni Platform Tools: Generate images, videos, and music via natural language chat
43
+ - Sogni Platform Tools: Generate images, image edits, videos, audio-driven videos, video transforms, and music via natural language chat
43
44
  - Vision Chat: Multimodal image understanding via VLM (scene description, OCR, object detection, visual analysis)
44
45
  - Real-time Progress: WebSocket-based event system
45
46
  - Multiple Networks: Fast (GPU) and Relaxed (Mac)
46
47
 
47
48
  INSTALLATION:
49
+
48
50
  ```bash
49
51
  npm install @sogni-ai/sogni-client
50
52
  ```
51
53
 
52
54
  REQUIREMENTS:
55
+
53
56
  - Node.js 18+ or modern browser
54
57
  - Sogni account with token balance (free signup at app.sogni.ai)
58
+ - Each email-verified account is allowed 400 free Spark render credits per month. On the API,
59
+ free credits can be used with Z-Image Turbo; paid credits can access all models and features.
55
60
 
56
- ================================================================================
57
- 2. QUICK START EXAMPLES
61
+ ================================================================================ 2. QUICK START EXAMPLES
58
62
  ================================================================================
59
63
 
60
64
  MINIMAL IMAGE GENERATION:
65
+
61
66
  ```javascript
62
67
  import { SogniClient } from '@sogni-ai/sogni-client';
63
68
 
64
69
  // Option 1: API key auth (recommended) — no login() needed
65
- const sogni = await SogniClient.createInstance({ appId: 'my-unique-app-id', apiKey: 'your-api-key' });
70
+ const sogni = await SogniClient.createInstance({
71
+ appId: 'my-unique-app-id',
72
+ apiKey: 'your-api-key'
73
+ });
66
74
 
67
75
  // Option 2: Username/password auth
68
76
  // const sogni = await SogniClient.createInstance({ appId: 'my-unique-app-id' });
@@ -84,6 +92,7 @@ console.log('Image URL:', urls[0]); // Valid for 24 hours
84
92
  ```
85
93
 
86
94
  MINIMAL VIDEO GENERATION:
95
+
87
96
  ```javascript
88
97
  const project = await sogni.projects.create({
89
98
  type: 'video',
@@ -99,17 +108,17 @@ const urls = await project.waitForCompletion();
99
108
  console.log('Video URL:', urls[0]);
100
109
  ```
101
110
 
102
- ================================================================================
103
- 3. CLIENT INITIALIZATION
111
+ ================================================================================ 3. CLIENT INITIALIZATION
104
112
  ================================================================================
105
113
 
106
114
  CREATING THE CLIENT:
115
+
107
116
  ```javascript
108
117
  import { SogniClient } from '@sogni-ai/sogni-client';
109
118
 
110
119
  const sogni = await SogniClient.createInstance({
111
- appId: 'your-unique-app-id', // Required - UUID recommended
112
- network: 'fast' // 'fast' or 'relaxed'
120
+ appId: 'your-unique-app-id', // Required - UUID recommended
121
+ network: 'fast' // 'fast' or 'relaxed'
113
122
  });
114
123
  ```
115
124
 
@@ -128,6 +137,7 @@ const sogni = await SogniClient.createInstance({
128
137
  ```
129
138
 
130
139
  AUTHENTICATION - USERNAME/PASSWORD:
140
+
131
141
  ```javascript
132
142
  // Username/password login
133
143
  const sogni = await SogniClient.createInstance({
@@ -138,12 +148,14 @@ await sogni.account.login('username', 'password');
138
148
  ```
139
149
 
140
150
  API KEY VS USERNAME/PASSWORD:
151
+
141
152
  - API key: Pass `apiKey` to `createInstance()`. Auto-authenticates via WebSocket
142
153
  header. No `login()` call needed. Most REST API calls (balance, profile, etc.)
143
154
  available. Sensitive operations (withdrawals, staking, 2FA) not available.
144
155
  - Username/password: Call `login()` after `createInstance()`. Full REST API access.
145
156
 
146
157
  ACCOUNT INFO (Available with both auth methods):
158
+
147
159
  ```javascript
148
160
  // Check if authenticated
149
161
  const isLoggedIn = await sogni.checkAuth();
@@ -158,10 +170,12 @@ console.log(sogni.account.currentAccount.balance);
158
170
  ```
159
171
 
160
172
  NETWORK TYPES:
173
+
161
174
  - 'fast': High-end GPU workers. Faster, higher cost. REQUIRED for video.
162
175
  - 'relaxed': Mac device workers. Slower, lower cost. Image only.
163
176
 
164
177
  CONNECTION EVENTS:
178
+
165
179
  ```javascript
166
180
  sogni.apiClient.on('connected', ({ network }) => {
167
181
  console.log('Connected to:', network);
@@ -172,11 +186,11 @@ sogni.apiClient.on('disconnected', ({ code, reason }) => {
172
186
  });
173
187
  ```
174
188
 
175
- ================================================================================
176
- 4. IMAGE GENERATION
189
+ ================================================================================ 4. IMAGE GENERATION
177
190
  ================================================================================
178
191
 
179
192
  BASIC IMAGE GENERATION:
193
+
180
194
  ```javascript
181
195
  const project = await sogni.projects.create({
182
196
  type: 'image',
@@ -194,6 +208,7 @@ const urls = await project.waitForCompletion();
194
208
  ```
195
209
 
196
210
  WITH SIZE PRESET:
211
+
197
212
  ```javascript
198
213
  const project = await sogni.projects.create({
199
214
  type: 'image',
@@ -207,6 +222,7 @@ const project = await sogni.projects.create({
207
222
  ```
208
223
 
209
224
  WITH CUSTOM DIMENSIONS:
225
+
210
226
  ```javascript
211
227
  const project = await sogni.projects.create({
212
228
  type: 'image',
@@ -222,6 +238,7 @@ const project = await sogni.projects.create({
222
238
  ```
223
239
 
224
240
  IMAGE-TO-IMAGE (Starting Image):
241
+
225
242
  ```javascript
226
243
  import fs from 'fs';
227
244
 
@@ -230,7 +247,7 @@ const project = await sogni.projects.create({
230
247
  modelId: 'coreml-cyberrealistic_v70_768',
231
248
  positivePrompt: 'Transform into oil painting style',
232
249
  startingImage: fs.readFileSync('./input.png'),
233
- startingImageStrength: 0.7, // 0-1, higher = more original preserved
250
+ startingImageStrength: 0.7, // 0-1, higher = more original preserved
234
251
  numberOfMedia: 1,
235
252
  steps: 20,
236
253
  guidance: 7.5
@@ -238,6 +255,7 @@ const project = await sogni.projects.create({
238
255
  ```
239
256
 
240
257
  WITH CONTROLNET:
258
+
241
259
  ```javascript
242
260
  const project = await sogni.projects.create({
243
261
  type: 'image',
@@ -258,6 +276,7 @@ const project = await sogni.projects.create({
258
276
  ```
259
277
 
260
278
  CONTROLNET TYPES:
279
+
261
280
  - canny: Edge detection
262
281
  - depth: Depth map
263
282
  - openpose: Body pose
@@ -275,6 +294,7 @@ CONTROLNET TYPES:
275
294
  - instantid: Face identity
276
295
 
277
296
  WITH CONTEXT IMAGES (Qwen Image Edit):
297
+
278
298
  ```javascript
279
299
  const project = await sogni.projects.create({
280
300
  type: 'image',
@@ -291,6 +311,10 @@ const project = await sogni.projects.create({
291
311
  });
292
312
  ```
293
313
 
314
+ GPT Image 2 uses `modelId: 'gpt-image-2'`, supports up to 16 `contextImages`,
315
+ accepts `gptImageQuality: 'low' | 'medium' | 'high'`, supports
316
+ `outputFormat: 'png' | 'jpg' | 'webp'`, and is Spark-only.
317
+
294
318
  RECOMMENDED MODELS:
295
319
  | Model ID | Type | Steps | Guidance | Notes |
296
320
  |----------|------|-------|----------|-------|
@@ -300,22 +324,23 @@ RECOMMENDED MODELS:
300
324
  | qwen_image_edit_2511_fp8_lightning | Edit | 4 | 1 | Multi-image context |
301
325
  | coreml-cyberrealistic_v70_768 | SD 1.5 | 20-40 | 7.5 | Photorealistic |
302
326
 
327
+ ================================================================================ 5. VIDEO GENERATION - WAN 2.2 MODELS
303
328
  ================================================================================
304
- 5. VIDEO GENERATION - WAN 2.2 MODELS
305
- ================================================================================
306
329
 
307
- CRITICAL: WAN 2.2 FPS BEHAVIOR
308
- ------------------------------
330
+ ## CRITICAL: WAN 2.2 FPS BEHAVIOR
331
+
309
332
  WAN 2.2 models ALWAYS generate video at 16fps internally.
310
333
  The 'fps' parameter (16 or 32) only controls POST-RENDER frame interpolation:
334
+
311
335
  - fps: 16 -> No interpolation, output is 16fps
312
336
  - fps: 32 -> Frames are doubled via interpolation, output is 32fps
313
337
 
314
- Frame calculation: duration * 16 + 1 (IGNORES fps setting)
338
+ Frame calculation: duration \* 16 + 1 (IGNORES fps setting)
315
339
  Example: 5 seconds always = 81 frames generated, regardless of fps
316
340
 
317
341
  MODEL VARIANTS:
318
- - Speed models (with _lightx2v suffix): 4-step, faster, good quality
342
+
343
+ - Speed models (with \_lightx2v suffix): 4-step, faster, good quality
319
344
  - Quality models (without suffix): More steps, slower, best quality
320
345
 
321
346
  WORKFLOW REFERENCE TABLE:
@@ -328,6 +353,7 @@ WORKFLOW REFERENCE TABLE:
328
353
  | Animate-Replace | wan_v2.2-14b-fp8_animate-replace_lightx2v | referenceImage + referenceVideo |
329
354
 
330
355
  TEXT-TO-VIDEO:
356
+
331
357
  ```javascript
332
358
  const project = await sogni.projects.create({
333
359
  type: 'video',
@@ -338,13 +364,14 @@ const project = await sogni.projects.create({
338
364
  numberOfMedia: 1,
339
365
  duration: 5,
340
366
  fps: 16,
341
- shift: 5.0 // Motion intensity: 1.0-8.0
367
+ shift: 5.0 // Motion intensity: 1.0-8.0
342
368
  });
343
369
 
344
370
  const urls = await project.waitForCompletion();
345
371
  ```
346
372
 
347
373
  IMAGE-TO-VIDEO:
374
+
348
375
  ```javascript
349
376
  const project = await sogni.projects.create({
350
377
  type: 'video',
@@ -359,6 +386,7 @@ const project = await sogni.projects.create({
359
386
  ```
360
387
 
361
388
  IMAGE-TO-VIDEO WITH END FRAME (Interpolation):
389
+
362
390
  ```javascript
363
391
  const project = await sogni.projects.create({
364
392
  type: 'video',
@@ -374,16 +402,17 @@ const project = await sogni.projects.create({
374
402
  ```
375
403
 
376
404
  SOUND-TO-VIDEO (Lip Sync):
405
+
377
406
  ```javascript
378
407
  const project = await sogni.projects.create({
379
408
  type: 'video',
380
409
  network: 'fast',
381
410
  modelId: 'wan_v2.2-14b-fp8_s2v_lightx2v',
382
- positivePrompt: '', // Optional for s2v
411
+ positivePrompt: '', // Optional for s2v
383
412
  referenceImage: fs.readFileSync('./face.jpg'),
384
413
  referenceAudio: fs.readFileSync('./speech.m4a'),
385
- audioStart: 0, // Where to start in audio file (seconds)
386
- audioDuration: 5, // How much audio to use (seconds)
414
+ audioStart: 0, // Where to start in audio file (seconds)
415
+ audioDuration: 5, // How much audio to use (seconds)
387
416
  numberOfMedia: 1,
388
417
  duration: 5,
389
418
  fps: 16
@@ -391,15 +420,16 @@ const project = await sogni.projects.create({
391
420
  ```
392
421
 
393
422
  ANIMATE-MOVE (Motion Transfer):
423
+
394
424
  ```javascript
395
425
  const project = await sogni.projects.create({
396
426
  type: 'video',
397
427
  network: 'fast',
398
428
  modelId: 'wan_v2.2-14b-fp8_animate-move_lightx2v',
399
429
  positivePrompt: '',
400
- referenceImage: fs.readFileSync('./character.jpg'), // Subject to animate
430
+ referenceImage: fs.readFileSync('./character.jpg'), // Subject to animate
401
431
  referenceVideo: fs.readFileSync('./motion-source.mp4'), // Motion source
402
- videoStart: 0, // Where to start in video (seconds)
432
+ videoStart: 0, // Where to start in video (seconds)
403
433
  numberOfMedia: 1,
404
434
  duration: 5,
405
435
  fps: 16
@@ -407,13 +437,14 @@ const project = await sogni.projects.create({
407
437
  ```
408
438
 
409
439
  ANIMATE-REPLACE (Subject Replacement):
440
+
410
441
  ```javascript
411
442
  const project = await sogni.projects.create({
412
443
  type: 'video',
413
444
  network: 'fast',
414
445
  modelId: 'wan_v2.2-14b-fp8_animate-replace_lightx2v',
415
446
  positivePrompt: '',
416
- referenceImage: fs.readFileSync('./new-character.jpg'), // New subject
447
+ referenceImage: fs.readFileSync('./new-character.jpg'), // New subject
417
448
  referenceVideo: fs.readFileSync('./original-video.mp4'), // Video with subject to replace
418
449
  videoStart: 0,
419
450
  numberOfMedia: 1,
@@ -434,12 +465,11 @@ WAN 2.2 VIDEO PARAMETERS:
434
465
  | audioDuration | number | 1-30 | 30 | s2v: audio length to use |
435
466
  | videoStart | number | 0+ | 0 | animate: video start position |
436
467
 
437
- ================================================================================
438
- 6. VIDEO GENERATION - LTX-2.3 MODELS
468
+ ================================================================================ 6. VIDEO GENERATION - LTX-2.3 MODELS
439
469
  ================================================================================
440
470
 
441
- CRITICAL: LTX FPS BEHAVIOR (Different from WAN!)
442
- --------------------------------------------------
471
+ ## CRITICAL: LTX FPS BEHAVIOR (Different from WAN!)
472
+
443
473
  LTX-2.3 models generate at the ACTUAL specified FPS (1-60 range).
444
474
  There is NO post-render interpolation.
445
475
 
@@ -447,17 +477,18 @@ Frame calculation: duration * fps + 1
447
477
  Frame count must follow pattern: 1 + n*8 (valid: 1, 9, 17, 25, 33, 41, ...)
448
478
  The SDK automatically snaps to valid frame counts.
449
479
 
450
- Example: 5 seconds at 24fps = 121 frames (1 + 15*8 = 121)
480
+ Example: 5 seconds at 24fps = 121 frames (1 + 15\*8 = 121)
451
481
 
452
482
  LTX-2.3 22B MODELS (Recommended):
453
- - Speed models (with _distilled suffix): 8-step, faster, good quality
454
- - Quality models (with _dev suffix): 20-step, slower, best quality
455
483
 
456
- | Workflow | Model ID (Fast) | Model ID (Quality) |
457
- |----------|-----------------|---------------------|
458
- | Text-to-Video | ltx23-22b-fp8_t2v_distilled | ltx23-22b-fp8_t2v_dev |
459
- | Image-to-Video | ltx23-22b-fp8_i2v_distilled | ltx23-22b-fp8_i2v_dev |
460
- | Audio-to-Video | ltx23-22b-fp8_a2v_distilled | ltx23-22b-fp8_a2v_dev |
484
+ - Speed models (with \_distilled suffix): 8-step, faster, good quality
485
+ - Quality models (with \_dev suffix): 20-step, slower, best quality
486
+
487
+ | Workflow | Model ID (Fast) | Model ID (Quality) |
488
+ | -------------------- | ---------------------------- | ---------------------- |
489
+ | Text-to-Video | ltx23-22b-fp8_t2v_distilled | ltx23-22b-fp8_t2v_dev |
490
+ | Image-to-Video | ltx23-22b-fp8_i2v_distilled | ltx23-22b-fp8_i2v_dev |
491
+ | Audio-to-Video | ltx23-22b-fp8_a2v_distilled | ltx23-22b-fp8_a2v_dev |
461
492
  | Image+Audio-to-Video | ltx23-22b-fp8_ia2v_distilled | ltx23-22b-fp8_ia2v_dev |
462
493
 
463
494
  VIDEO-TO-VIDEO CONTROLNET (LTX-2.3):
@@ -466,6 +497,7 @@ VIDEO-TO-VIDEO CONTROLNET (LTX-2.3):
466
497
  | Video-to-Video (ControlNet) | ltx23-22b-fp8_v2v_distilled | ltx23-22b-fp8_v2v_dev |
467
498
 
468
499
  LTX-2.3 TEXT-TO-VIDEO:
500
+
469
501
  ```javascript
470
502
  const project = await sogni.projects.create({
471
503
  type: 'video',
@@ -479,6 +511,7 @@ const project = await sogni.projects.create({
479
511
  ```
480
512
 
481
513
  LTX-2.3 IMAGE-TO-VIDEO WITH KEYFRAMES:
514
+
482
515
  ```javascript
483
516
  const project = await sogni.projects.create({
484
517
  type: 'video',
@@ -487,8 +520,8 @@ const project = await sogni.projects.create({
487
520
  positivePrompt: 'Smooth camera movement',
488
521
  referenceImage: fs.readFileSync('./start.jpg'),
489
522
  referenceImageEnd: fs.readFileSync('./end.jpg'),
490
- firstFrameStrength: 0.6, // 0-1, how closely to match start
491
- lastFrameStrength: 0.6, // 0-1, how closely to match end
523
+ firstFrameStrength: 0.6, // 0-1, how closely to match start
524
+ lastFrameStrength: 0.6, // 0-1, how closely to match end
492
525
  numberOfMedia: 1,
493
526
  duration: 8,
494
527
  fps: 24
@@ -496,16 +529,17 @@ const project = await sogni.projects.create({
496
529
  ```
497
530
 
498
531
  LTX-2.3 VIDEO-TO-VIDEO WITH CONTROLNET:
532
+
499
533
  ```javascript
500
534
  const project = await sogni.projects.create({
501
535
  type: 'video',
502
536
  network: 'fast',
503
- modelId: 'ltx23-22b-fp8_v2v_distilled', // Must use v2v model for ControlNet
537
+ modelId: 'ltx23-22b-fp8_v2v_distilled', // Must use v2v model for ControlNet
504
538
  positivePrompt: 'Transform into anime style',
505
539
  referenceVideo: fs.readFileSync('./source-video.mp4'),
506
540
  controlNet: {
507
- name: 'canny', // 'canny', 'pose', 'depth', or 'detailer'
508
- strength: 0.6 // Recommended: canny/depth 0.6, pose/detailer 0.85
541
+ name: 'canny', // 'canny', 'pose', 'depth', or 'detailer'
542
+ strength: 0.6 // Recommended: canny/depth 0.6, pose/detailer 0.85
509
543
  },
510
544
  numberOfMedia: 1,
511
545
  duration: 5,
@@ -521,8 +555,106 @@ LTX VIDEO PARAMETERS:
521
555
  | firstFrameStrength | number | 0.0-1.0 | 0.6 | Keyframe matching |
522
556
  | lastFrameStrength | number | 0.0-1.0 | 0.6 | Keyframe matching |
523
557
 
558
+ ================================================================================ 7. VIDEO GENERATION - SEEDANCE 2.0 MODELS
524
559
  ================================================================================
525
- 7. AUDIO GENERATION - ACE-STEP 1.5 MODELS
560
+
561
+ Seedance 2.0 models use the external API path. They generate at fixed 24fps and
562
+ currently support 4-15 second direct SDK outputs. Seedance models are premium/external API models and are Spark-only.
563
+ Seedance 2.0 Fast accepts the same optional image, video, and audio references and caps output at 720p.
564
+ Seedance can combine reference media in one request: up to 9 image assets, 3 video
565
+ assets, 3 audio assets, and 12 total assets. Text+audio-only is unsupported; use at
566
+ least one image or video reference with audio references. Prompt-visible reference
567
+ tags use `@Image1`, `@Video1`, and `@Audio1`, counted independently by modality
568
+ in attachment order. Assign every useful reference a role and prefer positive
569
+ preservation constraints. Exact readable text/logos, lip-sync, voice cloning, and
570
+ real-human-reference behavior need review.
571
+
572
+ SEEDANCE MODEL IDS:
573
+ | Model ID | Context Assets | Notes |
574
+ |----------|----------------|-------|
575
+ | seedance-2-0 | Optional image, video, and audio refs | 24fps, 4-15s, 1080p |
576
+ | seedance-2-0-fast | Optional image, video, and audio refs | 24fps, 4-15s, 720p cap |
577
+
578
+ SEEDANCE TEXT-TO-VIDEO:
579
+
580
+ ```javascript
581
+ const project = await sogni.projects.create({
582
+ type: 'video',
583
+ network: 'fast',
584
+ modelId: 'seedance-2-0',
585
+ positivePrompt: 'A cinematic neon skyline time lapse',
586
+ numberOfMedia: 1,
587
+ duration: 5,
588
+ fps: 24,
589
+ width: 1920,
590
+ height: 1088,
591
+ tokenType: 'spark'
592
+ });
593
+ ```
594
+
595
+ SEEDANCE MULTIMODAL CONTEXT:
596
+
597
+ Use URL arrays for loose Seedance context references that should not lock first/last
598
+ frames. URLs must be HTTPS and reachable by the vendor. Use role-tagged prompts
599
+ such as `Use @Image1 for product identity, @Video1 for camera movement, and
600
+ @Audio1 for music rhythm. Keep the product silhouette and logo placement
601
+ consistent.`
602
+
603
+ ```javascript
604
+ const project = await sogni.projects.create({
605
+ type: 'video',
606
+ network: 'fast',
607
+ modelId: 'seedance-2-0',
608
+ positivePrompt: 'Use @Image1 as the product identity, @Image2 for detail inserts, @Video1 for camera movement, and @Audio1 for music rhythm. Create one cohesive launch spot with smooth continuity and crisp product preservation.',
609
+ duration: 8,
610
+ fps: 24,
611
+ width: 1920,
612
+ height: 1088,
613
+ referenceImageUrls: [
614
+ 'https://cdn.example.com/product-front.png',
615
+ 'https://cdn.example.com/product-detail.png'
616
+ ],
617
+ referenceVideoUrls: ['https://cdn.example.com/motion-reference.mp4'],
618
+ referenceAudioUrls: ['https://cdn.example.com/music-reference.m4a']
619
+ });
620
+ ```
621
+
622
+ SEEDANCE IMAGE-TO-VIDEO:
623
+
624
+ ```javascript
625
+ const project = await sogni.projects.create({
626
+ type: 'video',
627
+ network: 'fast',
628
+ modelId: 'seedance-2-0',
629
+ positivePrompt: 'slow push-in, soft parallax, cinematic lighting',
630
+ referenceImage: fs.readFileSync('./subject.jpg'),
631
+ duration: 5,
632
+ fps: 24
633
+ });
634
+ ```
635
+
636
+ SEEDANCE VIDEO-TO-VIDEO:
637
+
638
+ ```javascript
639
+ const project = await sogni.projects.create({
640
+ type: 'video',
641
+ network: 'fast',
642
+ modelId: 'seedance-2-0',
643
+ positivePrompt: 'Restyle this clip as a painted anime scene',
644
+ referenceVideo: fs.readFileSync('./source.mp4'),
645
+ duration: 5,
646
+ fps: 24
647
+ });
648
+ ```
649
+
650
+ Chat/tool aliases: `seedance2` and `seedance2-fast`. V2V uses `seedance2`
651
+ as the model selector and `seedance-v2v` as the control mode.
652
+
653
+ For focused endpoint coverage with server-side Seedance prompt expansion, see
654
+ `examples/workflow_partner_seedance_video.mjs`. It covers Seedance T2V, I2V,
655
+ IA2V, V2V, and multimodal context through the hosted Sogni tools and hosted workflow endpoint.
656
+
657
+ ================================================================================ 8. AUDIO GENERATION - ACE-STEP 1.5 MODELS
526
658
  ================================================================================
527
659
 
528
660
  ACE-Step 1.5 models generate music from text descriptions with optional lyrics.
@@ -535,14 +667,15 @@ MODEL VARIANTS:
535
667
  | ace_step_1.5_sft | More Control | More accurate lyrics, less stable |
536
668
 
537
669
  TEXT-TO-MUSIC (Instrumental):
670
+
538
671
  ```javascript
539
672
  const project = await sogni.projects.create({
540
673
  type: 'audio',
541
674
  modelId: 'ace_step_1.5_turbo',
542
675
  positivePrompt: 'Upbeat electronic dance music with synth leads and driving bass',
543
676
  numberOfMedia: 1,
544
- duration: 30, // seconds (10-600)
545
- bpm: 128, // beats per minute (30-300)
677
+ duration: 30, // seconds (10-600)
678
+ bpm: 128, // beats per minute (30-300)
546
679
  keyscale: 'C major',
547
680
  timesignature: '4', // 4/4 time
548
681
  steps: 8,
@@ -554,14 +687,16 @@ console.log(urls[0]); // Audio URL (valid 24 hours)
554
687
  ```
555
688
 
556
689
  TEXT-TO-MUSIC (With Lyrics):
690
+
557
691
  ```javascript
558
692
  const project = await sogni.projects.create({
559
693
  type: 'audio',
560
694
  modelId: 'ace_step_1.5_sft',
561
695
  positivePrompt: 'Soft acoustic folk ballad with fingerpicked guitar',
562
- lyrics: 'Verse 1:\nWalking down a quiet road\nWith the wind upon my face\n\nChorus:\nThis is where I find my peace\nIn this gentle, open space',
696
+ lyrics:
697
+ 'Verse 1:\nWalking down a quiet road\nWith the wind upon my face\n\nChorus:\nThis is where I find my peace\nIn this gentle, open space',
563
698
  language: 'en',
564
- numberOfMedia: 2, // Generate 2 versions
699
+ numberOfMedia: 2, // Generate 2 versions
565
700
  duration: 60,
566
701
  bpm: 90,
567
702
  keyscale: 'A minor',
@@ -598,6 +733,7 @@ C, C#, Db, D, D#, Eb, E, F, F#, Gb, G, G#, Ab, A, A#, Bb, B
598
733
  (each with major and minor variants, e.g., "C major", "A minor")
599
734
 
600
735
  TIME SIGNATURES:
736
+
601
737
  - 2 = 2/4 time (marches, polka)
602
738
  - 3 = 3/4 time (waltzes, ballads)
603
739
  - 4 = 4/4 time (most pop, rock, hip-hop)
@@ -610,8 +746,7 @@ pl (Polish), tr (Turkish), vi (Vietnamese), cs (Czech), fa (Persian),
610
746
  id (Indonesian), ko (Korean), uk (Ukrainian), hu (Hungarian), ar (Arabic),
611
747
  sv (Swedish), ro (Romanian), el (Greek), and more. Use 'unknown' for auto-detect.
612
748
 
613
- ================================================================================
614
- 8. LLM TEXT GENERATION
749
+ ================================================================================ 9. LLM TEXT GENERATION
615
750
  ================================================================================
616
751
 
617
752
  The Sogni SDK supports LLM text generation through the Supernet, providing an
@@ -621,6 +756,7 @@ thinking/reasoning mode, and tool calling.
621
756
  DEFAULT LLM MODEL: qwen3.6-35b-a3b-gguf-iq4xs
622
757
 
623
758
  CHAT COMPLETION (Non-Streaming):
759
+
624
760
  ```javascript
625
761
  const response = await sogni.projects.chatCompletion({
626
762
  model: 'qwen3.6-35b-a3b-gguf-iq4xs',
@@ -637,6 +773,7 @@ console.log(response.choices[0].message.content);
637
773
  ```
638
774
 
639
775
  STREAMING CHAT COMPLETION:
776
+
640
777
  ```javascript
641
778
  const stream = await sogni.projects.chatCompletionStream({
642
779
  model: 'qwen3.6-35b-a3b-gguf-iq4xs',
@@ -652,10 +789,9 @@ for await (const chunk of stream) {
652
789
  ```
653
790
 
654
791
  MULTI-TURN CONVERSATION:
792
+
655
793
  ```javascript
656
- const messages = [
657
- { role: 'system', content: 'You are a helpful assistant.' }
658
- ];
794
+ const messages = [{ role: 'system', content: 'You are a helpful assistant.' }];
659
795
 
660
796
  // Turn 1
661
797
  messages.push({ role: 'user', content: 'What is the Sogni Supernet?' });
@@ -675,6 +811,7 @@ const response2 = await sogni.projects.chatCompletion({
675
811
 
676
812
  THINKING MODE:
677
813
  Enable model reasoning/thinking via `chat_template_kwargs`:
814
+
678
815
  - When enabled, the model outputs `<think>...</think>` blocks showing its reasoning
679
816
  - When disabled (default), the model provides direct responses
680
817
 
@@ -693,13 +830,13 @@ LLM CHAT PARAMETERS:
693
830
  | tool_choice | string | auto | Tool selection: 'auto', 'none', or specific |
694
831
 
695
832
  MESSAGE ROLES:
833
+
696
834
  - 'system': System instructions (first message)
697
835
  - 'user': User input
698
836
  - 'assistant': Model responses (and tool call requests)
699
837
  - 'tool': Tool execution results (with tool_call_id)
700
838
 
701
- ================================================================================
702
- 9. LLM TOOL CALLING & SOGNI PLATFORM TOOLS
839
+ ================================================================================ 10. LLM TOOL CALLING & SOGNI PLATFORM TOOLS
703
840
  ================================================================================
704
841
 
705
842
  TOOL CALLING (Function Calling):
@@ -708,6 +845,7 @@ when to use a tool, returns structured arguments, and you execute the function
708
845
  locally before feeding results back for a natural language answer.
709
846
 
710
847
  DEFINING TOOLS (OpenAI-compatible format):
848
+
711
849
  ```javascript
712
850
  const tools = [
713
851
  {
@@ -742,6 +880,7 @@ const tools = [
742
880
  ```
743
881
 
744
882
  TOOL CALLING FLOW:
883
+
745
884
  ```javascript
746
885
  // 1. Send message with tools
747
886
  const response = await sogni.projects.chatCompletion({
@@ -789,17 +928,64 @@ detects when a user wants to create media, enhances the prompt, and calls
789
928
  Sogni's generation APIs automatically:
790
929
 
791
930
  - Image Generation: "Create an image of a cyberpunk city at night"
931
+ - Image Editing / Reference-Guided Images: "Edit this portrait to look like a renaissance painting"
792
932
  - Video Generation: "Generate a video of ocean waves crashing at sunset"
933
+ - Sound-to-Video: "Turn this song into a music video"
934
+ - Video-to-Video: "Restyle this video as anime"
793
935
  - Music Generation: "Compose a jazz song about the rain"
794
936
 
795
- DEFAULT GENERATION MODELS (used by platform tools):
796
- | Media Type | Model ID | Description |
797
- |------------|----------|-------------|
798
- | Image | z_image_turbo_bf16 | Z-Image Turbo, ultra-fast 8-step generation |
799
- | Video | ltx23-22b-fp8_t2v_distilled | LTX-2.3 text-to-video, fast distilled |
800
- | Audio | ace_step_1.5_turbo | ACE-Step 1.5 Turbo, fast music generation |
937
+ SDK-LOCAL PUBLIC TOOL NAMES (`SogniTools.all`, client-managed execution):
938
+
939
+ - `sogni_generate_image`
940
+ - `sogni_edit_image`
941
+ - `sogni_generate_video`
942
+ - `sogni_sound_to_video`
943
+ - `sogni_video_to_video`
944
+ - `sogni_generate_music`
945
+
946
+ HOSTED CREATIVE TOOL FAMILIES:
947
+ The Sogni API can also inject hosted Sogni tool families into the same chat completions endpoint. The default `creative-tools` surface provides media, post-production, analysis, metadata, and synchronous composition tools; `sogni_tools: "creative-agent"` adds workflow control and asset-manifest tools:
948
+
949
+ - `generate_image`, `edit_image`, `generate_video`, `generate_music`, `sound_to_video`, `video_to_video` — core media-generation tools with creative-agent naming.
950
+ - `restore_photo`, `apply_style`, `refine_result`, `change_angle` — image-edit adapters backed by the shared image-edit workflow.
951
+ - `animate_photo` — image-to-video, with multi-source fan-out via `sourceImageIndices`. Fan-out (>1 source image) is composed into one MP4 by default; opt out with `stitched: false` for a flat clip list.
952
+ - `stitch_video` — concatenate selected video clips (with optional audio overlay via `audioIndex`) into one MP4.
953
+ - `orbit_video` — generate orbit clips around a subject and stitch them into one MP4.
954
+ - `dance_montage` — generate dance clips and stitch when multi-clip.
955
+ - `extend_video` — append new content to an existing video while preserving the original portion.
956
+ - `replace_video_segment` — replace a bounded window inside an existing video. Replacement sources can use `replacementStartSeconds` / `replacementEndSeconds` to splice a trimmed source slice.
957
+ - `overlay_video` — burn static text or image overlays onto an existing video.
958
+ - `add_subtitles` — burn subtitle cues onto an existing video.
959
+ - `enhance_prompt` — model-ready image/video/music/edit prompt expansion.
960
+ - `compose_script` — scripts, storyboards, trailers, social shorts, campaign beats, and video prompts.
961
+ - `compose_lyrics`, `compose_instrumental` — vocal lyrics or instrumental music structures.
962
+
963
+ PER-REQUEST MEDIA CONTEXT:
964
+ Generated images, videos, and audio are addressable by stable indices across tool rounds. Every hosted Sogni tool result returns `startIndex` and `indices` in its envelope so a later tool can target the result with `sourceImageIndex`, `videoIndices`, `audioIndex`, etc., without the model having to copy URLs back into prompt text.
965
+
966
+ COMPOSITION TOOLS:
967
+ `stitch_video`, `orbit_video`, `dance_montage`, and `animate_photo` fan-out return a single composed MP4 URL. Per-clip source URLs are preserved on the result envelope for inspection.
968
+
969
+ MEDIA-CONDITIONED TOOL INPUTS:
970
+
971
+ - `source_image_url`, `reference_image_urls`
972
+ - `reference_image_url`, `reference_image_end_url`
973
+ - `reference_audio_url`, `reference_audio_identity_url`
974
+ - `reference_video_url`
975
+ All media-conditioned tool inputs must be inline base64-encoded `data:` URIs. Remote `http(s)` URLs are not allowed. Tool image inputs accept PNG/JPEG only, tool audio inputs accept MP3/M4A/WAV only, and tool video inputs accept MP4 or MOV/QuickTime only.
976
+
977
+ DEFAULT GENERATION MODELS (used by SDK-local platform tools):
978
+ | Tool | Model ID | Description |
979
+ |------|----------|-------------|
980
+ | `sogni_generate_image` | z_image_turbo_bf16 | Z-Image Turbo, ultra-fast 8-step generation |
981
+ | `sogni_edit_image` | workflow/model-dependent | Reference-guided image editing with edit-capable Qwen/Flux models |
982
+ | `sogni_generate_video` | ltx23-22b-fp8_t2v_distilled / ltx23-22b-fp8_i2v_distilled / Seedance selectors | Text-to-video or image-to-video |
983
+ | `sogni_sound_to_video` | ltx23-22b-fp8_a2v_distilled / ltx23-22b-fp8_ia2v_distilled | Audio-driven video generation |
984
+ | `sogni_video_to_video` | ltx23-22b-fp8_v2v_distilled / seedance-2-0 / WAN animate models | Video transformation / motion transfer |
985
+ | `sogni_generate_music` | ace_step_1.5_turbo | ACE-Step 1.5 Turbo, fast music generation |
801
986
 
802
987
  PLATFORM TOOL WORKFLOW:
988
+
803
989
  1. User sends natural language request (e.g., "Make me an image of a dragon")
804
990
  2. LLM detects media generation intent
805
991
  3. LLM enhances the prompt for optimal output quality
@@ -807,10 +993,19 @@ PLATFORM TOOL WORKFLOW:
807
993
  5. Your code executes the Sogni project creation (image/video/audio)
808
994
  6. Result URL is returned and opened automatically
809
995
 
810
- See `workflow_text_chat_sogni_tools.mjs` for the complete implementation with
811
- image, video, and music generation tools wired to Sogni's Projects API.
996
+ See `workflow_text_chat_sogni_tools.mjs` for a composition-pipeline example
997
+ covering the core image/video/music flows. See `workflow_creative_agent_tools.mjs`
998
+ for server-side hosted Sogni tool injection with `sogni_tools`.
999
+ For the full built-in SDK tool surface, use `SogniTools.all` with
1000
+ `sogni.chat.tools.executeAll()` or `autoExecuteTools`.
1001
+
1002
+ DURABLE CREATIVE WORKFLOWS:
1003
+ Use `sogni.creativeWorkflows` for `/v1/creative-agent/workflows` coverage:
1004
+ `startImageToVideo`, `list`, `get`, `events`, `streamEvents`, and `cancel`.
1005
+ These endpoints require API-key auth. See `workflow_creative_agent_workflows.mjs`.
812
1006
 
813
1007
  EXAMPLE CLI USAGE:
1008
+
814
1009
  ```bash
815
1010
  # Generate an image through natural language
816
1011
  node workflow_text_chat_sogni_tools.mjs "Create an image of a cyberpunk city"
@@ -824,12 +1019,17 @@ node workflow_text_chat_sogni_tools.mjs "Compose a jazz song about the rain"
824
1019
  # Generate multiple items
825
1020
  node workflow_text_chat_sogni_tools.mjs -n 4 "Create images of fantasy landscapes"
826
1021
 
1022
+ # Server-side hosted creative tools
1023
+ node workflow_creative_agent_tools.mjs "Create an orbit video plan for a crystal perfume bottle"
1024
+
1025
+ # Durable creative-agent workflow
1026
+ node workflow_creative_agent_workflows.mjs "A chrome monorail over neon gardens" --watch
1027
+
827
1028
  # Tool calling with external tools (weather, time, math, conversions)
828
1029
  node workflow_text_chat_tool_calling.mjs "What's the weather in Austin, TX?"
829
1030
  ```
830
1031
 
831
- ================================================================================
832
- 10. VISION CHAT (MULTIMODAL IMAGE UNDERSTANDING)
1032
+ ================================================================================ 11. VISION CHAT (MULTIMODAL IMAGE UNDERSTANDING)
833
1033
  ================================================================================
834
1034
 
835
1035
  The Sogni SDK supports multimodal vision chat via VLM (Vision-Language Model)
@@ -840,10 +1040,10 @@ and multi-image comparison.
840
1040
  VLM MODEL:
841
1041
  | Model ID | Name | Description |
842
1042
  |----------|------|-------------|
843
- | qwen3.6-35b-a3b-gguf-iq4xs | Qwen3.6 35B VLM | Vision-language model with 128K context target |
1043
+ | qwen3.6-35b-a3b-gguf-iq4xs | Qwen3.6 35B VLM | Vision-language model with 262,144 native context length |
844
1044
 
845
1045
  MULTIMODAL MESSAGE FORMAT:
846
- Images are sent as base64-encoded data URIs using the OpenAI-compatible
1046
+ Images are sent as base64-encoded JPEG or PNG data URIs using the OpenAI-compatible
847
1047
  `image_url` content type within a multipart user message:
848
1048
 
849
1049
  ```javascript
@@ -874,6 +1074,7 @@ for await (const chunk of stream) {
874
1074
  ```
875
1075
 
876
1076
  QWEN3.6 PRESET SELECTION:
1077
+
877
1078
  - `think: true, taskProfile: 'general'` -> thinking mode for general tasks
878
1079
  - `think: true, taskProfile: 'coding'` -> thinking mode for precise coding
879
1080
  - `think: false, taskProfile: 'general'` -> instruct / non-thinking general
@@ -886,6 +1087,7 @@ You can override the preset with explicit sampling params:
886
1087
 
887
1088
  MULTI-IMAGE COMPARISON:
888
1089
  Send two images in the same message for side-by-side analysis:
1090
+
889
1091
  ```javascript
890
1092
  const userMessage = {
891
1093
  role: 'user',
@@ -898,6 +1100,7 @@ const userMessage = {
898
1100
  ```
899
1101
 
900
1102
  LOADING LOCAL IMAGES:
1103
+
901
1104
  ```javascript
902
1105
  import * as fs from 'node:fs';
903
1106
  import * as path from 'node:path';
@@ -907,9 +1110,10 @@ const base64 = buffer.toString('base64');
907
1110
  const dataUri = `data:image/jpeg;base64,${base64}`;
908
1111
  ```
909
1112
 
910
- SUPPORTED IMAGE FORMATS: JPEG, PNG, WebP, GIF (max 20MB recommended)
1113
+ VISION INPUT LIMITS: JPEG or PNG only, max 20 images per request, max 10MB per image, longest side capped at 1024px. This 1024px dimension cap applies only to vision `image_url` inputs, not to media-generation tool image inputs.
911
1114
 
912
1115
  VISION CAPABILITIES:
1116
+
913
1117
  - Scene description: Main subject, background, colors, lighting, mood, spatial
914
1118
  relationships, composition, and visible text
915
1119
  - OCR / Text extraction: All visible text with approximate location and layout
@@ -922,6 +1126,7 @@ VISION CAPABILITIES:
922
1126
  composition, mood, quality, and notable differences
923
1127
 
924
1128
  EXAMPLE CLI USAGE:
1129
+
925
1130
  ```bash
926
1131
  # Interactive vision chat
927
1132
  node workflow_text_chat_vision.mjs
@@ -935,11 +1140,11 @@ node workflow_text_chat_vision.mjs --image photo.jpg
935
1140
  See `workflow_text_chat_vision.mjs` for a complete interactive vision chat
936
1141
  implementation with all commands and multi-turn conversation support.
937
1142
 
938
- ================================================================================
939
- 11. COMPLETE PARAMETER REFERENCE
1143
+ ================================================================================ 12. COMPLETE PARAMETER REFERENCE
940
1144
  ================================================================================
941
1145
 
942
1146
  BASE PARAMETERS (Both Image and Video):
1147
+
943
1148
  ```typescript
944
1149
  {
945
1150
  modelId: string; // Required - model identifier
@@ -959,6 +1164,7 @@ BASE PARAMETERS (Both Image and Video):
959
1164
  ```
960
1165
 
961
1166
  IMAGE-SPECIFIC PARAMETERS:
1167
+
962
1168
  ```typescript
963
1169
  {
964
1170
  type: 'image';
@@ -977,6 +1183,7 @@ IMAGE-SPECIFIC PARAMETERS:
977
1183
  ```
978
1184
 
979
1185
  VIDEO-SPECIFIC PARAMETERS:
1186
+
980
1187
  ```typescript
981
1188
  {
982
1189
  type: 'video';
@@ -1005,6 +1212,7 @@ VIDEO-SPECIFIC PARAMETERS:
1005
1212
  ```
1006
1213
 
1007
1214
  AUDIO-SPECIFIC PARAMETERS:
1215
+
1008
1216
  ```typescript
1009
1217
  {
1010
1218
  type: 'audio';
@@ -1024,11 +1232,11 @@ AUDIO-SPECIFIC PARAMETERS:
1024
1232
  }
1025
1233
  ```
1026
1234
 
1027
- ================================================================================
1028
- 12. EVENTS & PROGRESS TRACKING
1235
+ ================================================================================ 13. EVENTS & PROGRESS TRACKING
1029
1236
  ================================================================================
1030
1237
 
1031
1238
  PROMISE-BASED COMPLETION:
1239
+
1032
1240
  ```javascript
1033
1241
  const project = await sogni.projects.create({ ... });
1034
1242
  const urls = await project.waitForCompletion();
@@ -1036,6 +1244,7 @@ const urls = await project.waitForCompletion();
1036
1244
  ```
1037
1245
 
1038
1246
  PROJECT EVENTS:
1247
+
1039
1248
  ```javascript
1040
1249
  project.on('progress', (percent) => {
1041
1250
  console.log(`Overall: ${percent}%`);
@@ -1063,6 +1272,7 @@ project.on('failed', (error) => {
1063
1272
  ```
1064
1273
 
1065
1274
  JOB EVENTS:
1275
+
1066
1276
  ```javascript
1067
1277
  const job = project.jobs[0];
1068
1278
 
@@ -1083,36 +1293,43 @@ job.on('failed', (error) => {
1083
1293
  });
1084
1294
  ```
1085
1295
 
1296
+ External API-backed jobs may not report diffusion steps. The SDK keeps project/job
1297
+ progress finite by using provider progress or ETA-derived progress, and direct
1298
+ provider result URLs are preserved on `job.resultUrl` / `job.getResultUrl()`.
1299
+
1086
1300
  PROJECT PROPERTIES:
1301
+
1087
1302
  ```javascript
1088
- project.id // string
1089
- project.status // 'pending' | 'queued' | 'processing' | 'completed' | 'failed' | 'canceled'
1090
- project.progress // number (0-100)
1091
- project.queuePosition // number | null
1092
- project.eta // number | null (seconds)
1093
- project.jobs // Job[]
1094
- project.resultUrls // string[] (after completion)
1303
+ project.id; // string
1304
+ project.status; // 'pending' | 'queued' | 'processing' | 'completed' | 'failed' | 'canceled'
1305
+ project.progress; // number (0-100)
1306
+ project.queuePosition; // number | null
1307
+ project.eta; // number | null (seconds)
1308
+ project.jobs; // Job[]
1309
+ project.resultUrls; // string[] (after completion)
1095
1310
  ```
1096
1311
 
1097
1312
  JOB PROPERTIES:
1313
+
1098
1314
  ```javascript
1099
- job.id // string
1100
- job.status // 'pending' | 'initiating' | 'processing' | 'completed' | 'failed' | 'canceled'
1101
- job.progress // number (0-100)
1102
- job.step // number (current step)
1103
- job.stepCount // number (total steps)
1104
- job.seed // number
1105
- job.previewUrl // string | null
1106
- job.resultUrl // string | null
1107
- job.hasResultMedia // boolean
1108
- job.isNSFW // boolean
1109
- job.workerName // string
1110
- job.eta // string | null
1111
- job.etaSeconds // number | null
1112
- job.type // 'image' | 'video'
1315
+ job.id; // string
1316
+ job.status; // 'pending' | 'initiating' | 'processing' | 'completed' | 'failed' | 'canceled'
1317
+ job.progress; // number (0-100)
1318
+ job.step; // number (current step)
1319
+ job.stepCount; // number (total steps)
1320
+ job.seed; // number
1321
+ job.previewUrl; // string | null
1322
+ job.resultUrl; // string | null
1323
+ job.hasResultMedia; // boolean
1324
+ job.isNSFW; // boolean
1325
+ job.workerName; // string
1326
+ job.eta; // string | null
1327
+ job.etaSeconds; // number | null
1328
+ job.type; // 'image' | 'video'
1113
1329
  ```
1114
1330
 
1115
1331
  FETCHING RESULTS:
1332
+
1116
1333
  ```javascript
1117
1334
  // Get signed URL (valid ~1 hour)
1118
1335
  const signedUrl = await job.getResultUrl();
@@ -1121,17 +1338,18 @@ const signedUrl = await job.getResultUrl();
1121
1338
  const blob = await job.getResultData();
1122
1339
  ```
1123
1340
 
1124
- ================================================================================
1125
- 13. MODELS, PRESETS & OPTIONS
1341
+ ================================================================================ 14. MODELS, PRESETS & OPTIONS
1126
1342
  ================================================================================
1127
1343
 
1128
1344
  WAIT FOR MODELS:
1345
+
1129
1346
  ```javascript
1130
1347
  const models = await sogni.projects.waitForModels();
1131
1348
  // Returns AvailableModel[]
1132
1349
  ```
1133
1350
 
1134
1351
  GET AVAILABLE MODELS:
1352
+
1135
1353
  ```javascript
1136
1354
  const models = sogni.projects.availableModels;
1137
1355
  // or
@@ -1140,12 +1358,14 @@ const models = await sogni.projects.getAvailableModels('fast');
1140
1358
  ```
1141
1359
 
1142
1360
  GET ALL SUPPORTED MODELS:
1361
+
1143
1362
  ```javascript
1144
1363
  const allModels = await sogni.projects.getSupportedModels();
1145
1364
  // Returns full model list with tiers
1146
1365
  ```
1147
1366
 
1148
1367
  GET SIZE PRESETS:
1368
+
1149
1369
  ```javascript
1150
1370
  const presets = await sogni.projects.getSizePresets('fast', 'flux1-schnell-fp8');
1151
1371
  /*
@@ -1160,6 +1380,7 @@ Returns: [
1160
1380
  ```
1161
1381
 
1162
1382
  GET SAMPLER/SCHEDULER OPTIONS:
1383
+
1163
1384
  ```javascript
1164
1385
  const options = await sogni.projects.getModelOptions('flux1-schnell-fp8');
1165
1386
  /*
@@ -1176,11 +1397,11 @@ Returns: {
1176
1397
  */
1177
1398
  ```
1178
1399
 
1179
- ================================================================================
1180
- 14. COST ESTIMATION
1400
+ ================================================================================ 15. COST ESTIMATION
1181
1401
  ================================================================================
1182
1402
 
1183
1403
  ESTIMATE IMAGE COST:
1404
+
1184
1405
  ```javascript
1185
1406
  const cost = await sogni.projects.estimate({
1186
1407
  network: 'fast',
@@ -1192,12 +1413,13 @@ const cost = await sogni.projects.estimate({
1192
1413
  sizePreset: 'square_hd'
1193
1414
  });
1194
1415
 
1195
- console.log(cost.sogni); // Cost in Sogni tokens
1196
- console.log(cost.spark); // Cost in Spark tokens
1197
- console.log(cost.usd); // Cost in USD
1416
+ console.log(cost.sogni); // Cost in Sogni tokens
1417
+ console.log(cost.spark); // Cost in Spark tokens
1418
+ console.log(cost.usd); // Cost in USD
1198
1419
  ```
1199
1420
 
1200
1421
  ESTIMATE VIDEO COST:
1422
+
1201
1423
  ```javascript
1202
1424
  const cost = await sogni.projects.estimateVideo({
1203
1425
  tokenType: 'sogni',
@@ -1212,17 +1434,18 @@ const cost = await sogni.projects.estimateVideo({
1212
1434
  ```
1213
1435
 
1214
1436
  CHECK BALANCE:
1437
+
1215
1438
  ```javascript
1216
1439
  await sogni.account.refreshBalance();
1217
1440
  const balance = sogni.account.currentAccount.balance;
1218
1441
  console.log(balance.sogni, balance.spark);
1219
1442
  ```
1220
1443
 
1221
- ================================================================================
1222
- 15. ERROR HANDLING
1444
+ ================================================================================ 16. ERROR HANDLING
1223
1445
  ================================================================================
1224
1446
 
1225
1447
  TRY-CATCH PATTERN:
1448
+
1226
1449
  ```javascript
1227
1450
  try {
1228
1451
  const project = await sogni.projects.create({ ... });
@@ -1239,6 +1462,7 @@ try {
1239
1462
  ```
1240
1463
 
1241
1464
  EVENT-BASED ERROR HANDLING:
1465
+
1242
1466
  ```javascript
1243
1467
  project.on('failed', (error) => {
1244
1468
  console.error('Project failed:', error.code, error.message);
@@ -1250,6 +1474,7 @@ project.on('jobFailed', (job) => {
1250
1474
  ```
1251
1475
 
1252
1476
  CANCEL A PROJECT:
1477
+
1253
1478
  ```javascript
1254
1479
  await sogni.projects.cancel(project.id);
1255
1480
  // or
@@ -1257,6 +1482,7 @@ await project.cancel();
1257
1482
  ```
1258
1483
 
1259
1484
  COMMON ERROR SCENARIOS:
1485
+
1260
1486
  - Insufficient balance
1261
1487
  - Invalid model ID
1262
1488
  - Missing required parameters (e.g., referenceImage for i2v)
@@ -1264,11 +1490,11 @@ COMMON ERROR SCENARIOS:
1264
1490
  - NSFW content detected (if filter enabled)
1265
1491
  - Invalid dimensions or parameters
1266
1492
 
1267
- ================================================================================
1268
- 16. ADVANCED PATTERNS
1493
+ ================================================================================ 17. ADVANCED PATTERNS
1269
1494
  ================================================================================
1270
1495
 
1271
1496
  BATCH PROCESSING:
1497
+
1272
1498
  ```javascript
1273
1499
  const images = ['image1.jpg', 'image2.jpg', 'image3.jpg'];
1274
1500
  const projects = [];
@@ -1287,12 +1513,11 @@ for (const img of images) {
1287
1513
  projects.push(project);
1288
1514
  }
1289
1515
 
1290
- const results = await Promise.all(
1291
- projects.map(p => p.waitForCompletion())
1292
- );
1516
+ const results = await Promise.all(projects.map((p) => p.waitForCompletion()));
1293
1517
  ```
1294
1518
 
1295
1519
  IMAGE ENHANCEMENT:
1520
+
1296
1521
  ```javascript
1297
1522
  // After a job completes, enhance it
1298
1523
  const enhancedProject = await job.enhance('medium', {
@@ -1303,16 +1528,18 @@ const enhancedUrls = await enhancedProject.waitForCompletion();
1303
1528
  ```
1304
1529
 
1305
1530
  TOKEN MANAGEMENT:
1531
+
1306
1532
  ```javascript
1307
1533
  // Use Spark tokens instead of Sogni
1308
1534
  const project = await sogni.projects.create({
1309
1535
  type: 'image',
1310
- tokenType: 'spark',
1536
+ tokenType: 'spark'
1311
1537
  // ... other params
1312
1538
  });
1313
1539
  ```
1314
1540
 
1315
1541
  USING LORAS:
1542
+
1316
1543
  ```javascript
1317
1544
  const project = await sogni.projects.create({
1318
1545
  type: 'image',
@@ -1324,11 +1551,11 @@ const project = await sogni.projects.create({
1324
1551
  });
1325
1552
  ```
1326
1553
 
1327
- ================================================================================
1328
- 17. TYPE DEFINITIONS
1554
+ ================================================================================ 18. TYPE DEFINITIONS
1329
1555
  ================================================================================
1330
1556
 
1331
1557
  PROJECT PARAMS:
1558
+
1332
1559
  ```typescript
1333
1560
  type ProjectParams = ImageProjectParams | VideoProjectParams;
1334
1561
 
@@ -1365,7 +1592,7 @@ interface ImageProjectParams extends BaseProjectParams {
1365
1592
 
1366
1593
  interface VideoProjectParams extends BaseProjectParams {
1367
1594
  type: 'video';
1368
- frames?: number; // deprecated
1595
+ frames?: number; // deprecated
1369
1596
  duration?: number;
1370
1597
  fps?: number;
1371
1598
  shift?: number;
@@ -1390,12 +1617,24 @@ interface VideoProjectParams extends BaseProjectParams {
1390
1617
  ```
1391
1618
 
1392
1619
  CONTROLNET TYPES:
1620
+
1393
1621
  ```typescript
1394
1622
  type ControlNetName =
1395
- | 'canny' | 'depth' | 'inpaint' | 'instrp2p'
1396
- | 'lineart' | 'lineartanime' | 'mlsd' | 'normalbae'
1397
- | 'openpose' | 'scribble' | 'segmentation'
1398
- | 'shuffle' | 'softedge' | 'tile' | 'instantid';
1623
+ | 'canny'
1624
+ | 'depth'
1625
+ | 'inpaint'
1626
+ | 'instrp2p'
1627
+ | 'lineart'
1628
+ | 'lineartanime'
1629
+ | 'mlsd'
1630
+ | 'normalbae'
1631
+ | 'openpose'
1632
+ | 'scribble'
1633
+ | 'segmentation'
1634
+ | 'shuffle'
1635
+ | 'softedge'
1636
+ | 'tile'
1637
+ | 'instantid';
1399
1638
 
1400
1639
  type ControlNetMode = 'balanced' | 'prompt_priority' | 'cn_priority';
1401
1640
 
@@ -1417,6 +1656,7 @@ interface VideoControlNetParams {
1417
1656
  ```
1418
1657
 
1419
1658
  MODEL TYPES:
1659
+
1420
1660
  ```typescript
1421
1661
  interface AvailableModel {
1422
1662
  id: string;
@@ -1441,17 +1681,15 @@ interface SizePreset {
1441
1681
  ```
1442
1682
 
1443
1683
  STATUS TYPES:
1684
+
1444
1685
  ```typescript
1445
- type ProjectStatus =
1446
- | 'pending' | 'queued' | 'processing'
1447
- | 'completed' | 'failed' | 'canceled';
1686
+ type ProjectStatus = 'pending' | 'queued' | 'processing' | 'completed' | 'failed' | 'canceled';
1448
1687
 
1449
- type JobStatus =
1450
- | 'pending' | 'initiating' | 'processing'
1451
- | 'completed' | 'failed' | 'canceled';
1688
+ type JobStatus = 'pending' | 'initiating' | 'processing' | 'completed' | 'failed' | 'canceled';
1452
1689
  ```
1453
1690
 
1454
1691
  ERROR TYPE:
1692
+
1455
1693
  ```typescript
1456
1694
  interface ErrorData {
1457
1695
  code: number;
@@ -1464,6 +1702,7 @@ END OF DOCUMENT
1464
1702
  ================================================================================
1465
1703
 
1466
1704
  For additional resources:
1705
+
1467
1706
  - Examples: https://github.com/Sogni-AI/sogni-client/tree/main/examples
1468
1707
  - API Docs: https://sdk-docs.sogni.ai
1469
1708
  - Support: https://www.sogni.ai/support