@sogni-ai/sogni-client 4.2.0-alpha.2 → 4.2.0-alpha.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +148 -0
  2. package/CLAUDE.md +25 -3
  3. package/README.md +411 -136
  4. package/dist/Account/index.d.ts +4 -2
  5. package/dist/Account/index.js +27 -23
  6. package/dist/Account/index.js.map +1 -1
  7. package/dist/Account/types.d.ts +7 -0
  8. package/dist/ApiClient/WebSocketClient/BrowserWebSocketClient/index.d.ts +3 -1
  9. package/dist/ApiClient/WebSocketClient/BrowserWebSocketClient/index.js +26 -2
  10. package/dist/ApiClient/WebSocketClient/BrowserWebSocketClient/index.js.map +1 -1
  11. package/dist/ApiClient/WebSocketClient/eventSubscriptions.d.ts +33 -0
  12. package/dist/ApiClient/WebSocketClient/eventSubscriptions.js +39 -0
  13. package/dist/ApiClient/WebSocketClient/eventSubscriptions.js.map +1 -0
  14. package/dist/ApiClient/WebSocketClient/events.d.ts +24 -7
  15. package/dist/ApiClient/WebSocketClient/index.d.ts +5 -1
  16. package/dist/ApiClient/WebSocketClient/index.js +24 -1
  17. package/dist/ApiClient/WebSocketClient/index.js.map +1 -1
  18. package/dist/ApiClient/WebSocketClient/messages.d.ts +2 -0
  19. package/dist/ApiClient/WebSocketClient/types.d.ts +2 -0
  20. package/dist/ApiClient/index.d.ts +6 -1
  21. package/dist/ApiClient/index.js +7 -3
  22. package/dist/ApiClient/index.js.map +1 -1
  23. package/dist/Chat/ChatTools.d.ts +5 -49
  24. package/dist/Chat/ChatTools.js +311 -88
  25. package/dist/Chat/ChatTools.js.map +1 -1
  26. package/dist/Chat/index.d.ts +11 -2
  27. package/dist/Chat/index.js +78 -4
  28. package/dist/Chat/index.js.map +1 -1
  29. package/dist/Chat/modelRouting.d.ts +100 -0
  30. package/dist/Chat/modelRouting.js +441 -0
  31. package/dist/Chat/modelRouting.js.map +1 -0
  32. package/dist/Chat/sogniHostedTools.generated.json +529 -0
  33. package/dist/Chat/tools.d.ts +9 -55
  34. package/dist/Chat/tools.js +72 -228
  35. package/dist/Chat/tools.js.map +1 -1
  36. package/dist/Chat/types.d.ts +91 -2
  37. package/dist/CreativeWorkflows/index.d.ts +23 -0
  38. package/dist/CreativeWorkflows/index.js +274 -0
  39. package/dist/CreativeWorkflows/index.js.map +1 -0
  40. package/dist/CreativeWorkflows/types.d.ts +106 -0
  41. package/dist/CreativeWorkflows/types.js +3 -0
  42. package/dist/CreativeWorkflows/types.js.map +1 -0
  43. package/dist/Projects/Job.d.ts +6 -0
  44. package/dist/Projects/Job.js +60 -5
  45. package/dist/Projects/Job.js.map +1 -1
  46. package/dist/Projects/Project.js +15 -3
  47. package/dist/Projects/Project.js.map +1 -1
  48. package/dist/Projects/createJobRequestMessage.js +140 -6
  49. package/dist/Projects/createJobRequestMessage.js.map +1 -1
  50. package/dist/Projects/index.d.ts +10 -1
  51. package/dist/Projects/index.js +197 -58
  52. package/dist/Projects/index.js.map +1 -1
  53. package/dist/Projects/types/ModelOptions.d.ts +3 -3
  54. package/dist/Projects/types/ModelOptions.js +12 -5
  55. package/dist/Projects/types/ModelOptions.js.map +1 -1
  56. package/dist/Projects/types/ModelTiersRaw.d.ts +7 -7
  57. package/dist/Projects/types/RawProject.d.ts +2 -0
  58. package/dist/Projects/types/events.d.ts +5 -4
  59. package/dist/Projects/types/index.d.ts +77 -7
  60. package/dist/Projects/types/index.js.map +1 -1
  61. package/dist/Projects/utils/index.d.ts +8 -1
  62. package/dist/Projects/utils/index.js +22 -8
  63. package/dist/Projects/utils/index.js.map +1 -1
  64. package/dist/index.d.ts +28 -3
  65. package/dist/index.js +19 -1
  66. package/dist/index.js.map +1 -1
  67. package/dist/lib/RestClient.d.ts +4 -1
  68. package/dist/lib/RestClient.js +17 -9
  69. package/dist/lib/RestClient.js.map +1 -1
  70. package/dist/lib/mediaValidation.d.ts +16 -0
  71. package/dist/lib/mediaValidation.js +280 -0
  72. package/dist/lib/mediaValidation.js.map +1 -0
  73. package/dist/lib/validation.d.ts +6 -1
  74. package/dist/lib/validation.js +28 -2
  75. package/dist/lib/validation.js.map +1 -1
  76. package/llms-full.txt +372 -133
  77. package/llms.txt +197 -86
  78. package/package.json +13 -4
  79. package/src/Account/index.ts +22 -2
  80. package/src/Account/types.ts +7 -0
  81. package/src/ApiClient/WebSocketClient/BrowserWebSocketClient/index.ts +47 -3
  82. package/src/ApiClient/WebSocketClient/eventSubscriptions.ts +92 -0
  83. package/src/ApiClient/WebSocketClient/events.ts +25 -7
  84. package/src/ApiClient/WebSocketClient/index.ts +33 -1
  85. package/src/ApiClient/WebSocketClient/messages.ts +2 -0
  86. package/src/ApiClient/WebSocketClient/types.ts +2 -0
  87. package/src/ApiClient/index.ts +32 -2
  88. package/src/Chat/ChatTools.ts +395 -95
  89. package/src/Chat/index.ts +149 -5
  90. package/src/Chat/modelRouting.ts +602 -0
  91. package/src/Chat/sogniHostedTools.generated.json +529 -0
  92. package/src/Chat/tools.ts +98 -245
  93. package/src/Chat/types.ts +100 -2
  94. package/src/CreativeWorkflows/index.ts +290 -0
  95. package/src/CreativeWorkflows/types.ts +134 -0
  96. package/src/Projects/Job.ts +76 -5
  97. package/src/Projects/Project.ts +13 -3
  98. package/src/Projects/createJobRequestMessage.ts +152 -13
  99. package/src/Projects/index.ts +230 -52
  100. package/src/Projects/types/ModelOptions.ts +15 -8
  101. package/src/Projects/types/ModelTiersRaw.ts +7 -7
  102. package/src/Projects/types/RawProject.ts +2 -0
  103. package/src/Projects/types/events.ts +5 -4
  104. package/src/Projects/types/index.ts +86 -6
  105. package/src/Projects/utils/index.ts +24 -8
  106. package/src/index.ts +93 -0
  107. package/src/lib/RestClient.ts +15 -5
  108. package/src/lib/mediaValidation.ts +367 -0
  109. package/src/lib/validation.ts +38 -2
package/README.md CHANGED
@@ -1,10 +1,12 @@
1
1
  # Sogni SDK for JavaScript & Node.js
2
+
2
3
  This library provides an easy way to interact with the [Sogni Supernet](https://www.sogni.ai/supernet) - a DePIN protocol for creative AI inference. It is written in TypeScript and can be used
3
4
  in both TypeScript and JavaScript projects such as backend Node.js and browser environments.
4
5
 
5
6
  Behind the scenes this SDK uses a WebSocket connection for communication between clients, server, and workers. It harnesses an event-based API to interact with Supernet to make things super efficient.
6
7
 
7
8
  ## Features
9
+
8
10
  - 🎨 **Image Generation** - Create images with the latest frontier Open Source models like Stable Diffusion, Qwen Image, Z-Image Turbo, and Flux
9
11
  - 🎨 **Image Edit** - Modify, merge, restyle, and transform images using prompts and/or multiple reference images using powerful models like Qwen Image Edit.
10
12
  - 🎬 **Video Generation** - Generate videos using **Wan 2.2 14B FP8** models with five workflow types:
@@ -18,28 +20,40 @@ Behind the scenes this SDK uses a WebSocket connection for communication between
18
20
  - 🎯 **Advanced Controls** - Fine-tune generation with samplers, schedulers, ControlNets, and more
19
21
  - 🤖 **LLM Text Generation** - Chat completions with streaming, multi-turn conversations, and thinking/reasoning mode via OpenAI-compatible API
20
22
  - 🔧 **LLM Tool Calling** - Define custom tools (functions) that the LLM can invoke during conversations for real-time data and actions
21
- - 🎨🎬🎵 **Sogni Platform Tools** - Generate images, videos, and music through natural language chat — the LLM detects media intent, enhances prompts, and calls Sogni's generation APIs automatically
23
+ - 🎨🎬🎵 **Sogni Platform Tools** - Generate images, reference-guided image edits, videos, audio-driven videos, video transforms, and music through natural language chat
24
+ - 🤖 **Hosted Creative Tools** - Use the default `creative-tools` surface for media generation, editing, analysis, metadata, prompt enhancement, script writing, lyrics, and instrumental structures; set `sogni_tools: "creative-agent"` to add workflow control and asset-manifest tools
25
+ - ⏱️ **Durable Creative Workflows** - Persistent server-side multi-step workflows with SSE event streaming, `Last-Event-ID` resume, and cooperative cancellation via `/v1/creative-agent/workflows`
22
26
  - 👁️ **Vision Chat** - Multimodal image understanding with scene description, OCR, object detection, visual analysis, and multi-image comparison via Qwen3.6 VLM
27
+
23
28
  ## Migration notes
29
+
24
30
  ### v3.x.x to v4.x.x
31
+
25
32
  Version 4 adds support for video generation, including the new **Wan 2.2 14B FP8** model family with five workflow types (text-to-video, image-to-video, sound-to-video, animate-move, and animate-replace). There are the following breaking changes:
33
+
26
34
  - `type` is required when calling `sogni.projects.create(params)`, valid values are `image`, `video` and `audio`. See code examples below.
27
35
  - `numberOfImages` renamed to `numberOfMedia`
28
36
  - `hasResultImage` in `Job` class is now `hasResultMedia`
29
37
  - `Job` and `Project` classes now have `type` property that can be `image`, `video` or `audio`
30
38
 
31
39
  ## Installation
40
+
32
41
  Add library to your project using npm or yarn:
42
+
33
43
  ```bash
34
44
  npm install @sogni-ai/sogni-client
35
45
  ```
46
+
36
47
  or
48
+
37
49
  ```bash
38
50
  yarn add @sogni-ai/sogni-client
39
51
  ```
52
+
40
53
  ## Core concepts
54
+
41
55
  In order to use Sogni Supernet, you need an active Sogni account with a positive SOGNI or Spark token balance. You can authenticate using either an **API key** (recommended) or **username and password**.
42
- You can create a free account in our [Web App](https://app.sogni.ai) or [Mac App](https://www.sogni.ai/studio) which will give you tokens just for signing up and confirming your email. You can get daily bonus tokens by claiming them (under rewards) each 24-hours.
56
+ You can create a free account in our [Web App](https://app.sogni.ai) or [Mac App](https://www.sogni.ai/studio) which will give you tokens just for signing up and confirming your email. Each email-verified account is allowed 400 free Spark render credits per month. On the API, free credits can be used with Z-Image Turbo; paid credits can access all models and features.
43
57
 
44
58
  **To get your API key:** Log in to [dashboard.sogni.ai](https://dashboard.sogni.ai), click your **Username** dropdown in the top-right corner, and provision your API key.
45
59
 
@@ -48,13 +62,16 @@ Spark tokens can be purchased with a credit card in a Mac or Web app.
48
62
  Your account is tied to a [Base](https://www.base.org/) Wallet that is created during signup.
49
63
 
50
64
  ### Supernet Types
65
+
51
66
  There are 2 worker network types available:
67
+
52
68
  - `fast` - this network runs on high-end GPUs and is optimized for speed. It is more expensive than `relaxed` network. **Required for video generation**.
53
69
  - `relaxed` - this network runs on Apple Mac devices and is optimized for cost. It is cheaper than `fast` network. Supports image generation only.
54
70
 
55
71
  In both options, the more complex your query is (the more steps), the higher the cost in tokens.
56
72
 
57
73
  ### Inference definitions: Projects and Jobs
74
+
58
75
  One request for image or video generation is called a **Project**. A project can generate one or more images or videos.
59
76
  Each generated image or video is represented by a **Job**.
60
77
 
@@ -62,6 +79,7 @@ When you send a project to Supernet, it will be processed by one or more workers
62
79
  uploaded to Sogni servers where it will be stored for 24 hours. After this period, media files will be auto-deleted.
63
80
 
64
81
  ## Client initialization
82
+
65
83
  To initialize a client, you need to provide `appId`, and account credentials.
66
84
 
67
85
  ### Option 1: API Key Authentication (Recommended)
@@ -75,7 +93,7 @@ import { SogniClient } from '@sogni-ai/sogni-client';
75
93
 
76
94
  const sogni = await SogniClient.createInstance({
77
95
  appId: 'your-app-id', // Required, must be unique string, UUID is recommended
78
- network: 'fast', // Network to use, 'fast' or 'relaxed'
96
+ network: 'fast', // Network to use, 'fast' or 'relaxed'
79
97
  apiKey: 'your-api-key' // API key for authentication
80
98
  });
81
99
 
@@ -92,7 +110,7 @@ import { SogniClient } from '@sogni-ai/sogni-client';
92
110
 
93
111
  const sogni = await SogniClient.createInstance({
94
112
  appId: 'your-app-id',
95
- network: 'fast',
113
+ network: 'fast'
96
114
  });
97
115
 
98
116
  await sogni.account.login('your-username', 'your-password');
@@ -100,42 +118,81 @@ const models = await sogni.projects.waitForModels();
100
118
  ```
101
119
 
102
120
  **Important Note:**
121
+
103
122
  - These samples assume you are using ES modules, which allow `await` on the top level, if you are CommonJS you will need to wrap `await` calls in an async function.
104
123
  - `appId` must be unique string, UUID is recommended. It is used to identify your application.
105
124
  - Only one connection per `appId` is allowed. If you try to connect with the same `appId` multiple times, the previous connection will be closed.
106
125
 
126
+ ### Connection metadata and event subscriptions
127
+
128
+ Use `appSource` to identify the product or integration behind a client connection. The SDK forwards it during authentication and on socket-backed project/chat requests so server-side reporting can attribute usage consistently.
129
+
130
+ By default, the SDK receives the full socket event stream, including live model worker counts through `swarmModels` and `swarmLLMModels`. Proxy, server-side, or headless clients that do not need ongoing worker count updates can opt out of the grouped model availability stream:
131
+
132
+ ```javascript
133
+ const sogni = await SogniClient.createInstance({
134
+ appId: 'your-app-id',
135
+ appSource: 'my-integration',
136
+ network: 'fast',
137
+ apiKey: 'your-api-key',
138
+ socketEventSubscriptions: {
139
+ modelAvailability: false
140
+ }
141
+ });
142
+ ```
143
+
144
+ If your process needs the initial model list before submitting work, keep the default subscription, wait for models, then unsubscribe from future count updates:
145
+
146
+ ```javascript
147
+ const models = await sogni.projects.waitForModels();
148
+
149
+ await sogni.setSocketEventSubscriptions({
150
+ modelAvailability: false
151
+ });
152
+ ```
153
+
154
+ `modelAvailability` is a subscription group covering `swarmModels` and `swarmLLMModels`. You can also opt in or out of individual socket event names with the same boolean map, and omitted subscriptions preserve default server behavior.
155
+
156
+ **Group flags dominate individual flags.** Disabling a group (e.g. `modelAvailability: false`) suppresses every event in the group, and re-enabling a single event under that group later (e.g. `swarmModels: true`) does **not** override the group-level suppression — the group flag still wins. To re-enable a single event, re-enable the group it belongs to (or clear it via `setSocketEventSubscriptions({ reset: true })` before reapplying selective subscriptions). Subscriptions you do not list keep their current server-side state.
157
+
158
+ Runtime subscription changes made via `setSocketEventSubscriptions` are remembered locally and re-applied on every reconnect, so a long-lived client only needs to express its preference once.
159
+
107
160
  ## Usage
161
+
108
162
  After authentication, the client will have an active WebSocket connection to Sogni Supernet. Within a short period of time the
109
163
  client will receive the current balance and list of available models. After this you can start using the client to generate images or videos.
110
164
 
111
165
  It is advised to watch for `connected` and `disconnected` events on the client instance to be notified when the connection is established or lost:
166
+
112
167
  ```typescript
113
168
  // Will be triggered when the client is connected to Supernet
114
- sogni.client.on('connected', ({network}) => {
169
+ sogni.client.on('connected', ({ network }) => {
115
170
  console.log('Connected to Supernet:', network);
116
171
  });
117
172
 
118
173
  // Will be triggered when websocket connection is lost or the client is disconnected from Supernet
119
- sogni.client.on('disconnected', ({code, reason}) => {
174
+ sogni.client.on('disconnected', ({ code, reason }) => {
120
175
  console.log('Disconnected from Supernet:', code, reason);
121
176
  });
122
177
  ```
123
178
 
124
179
  ## Image Generation
125
180
 
126
- Sogni supports a wide range of models for image generation. You can find a list of available models in
181
+ Sogni supports a wide range of models for image generation. You can find a list of available models in
127
182
  `sogni.projects.availableModels` property during runtime or query it using `sogni.projects.getAvailableModels()` method.
128
183
 
129
184
  For a start, you can try FLUX.1 \[schnell\] with the following parameters:
185
+
130
186
  ```javascript
131
187
  const fluxDefaults = {
132
188
  modelId: 'flux1-schnell-fp8',
133
189
  steps: 4,
134
190
  guidance: 1
135
- }
191
+ };
136
192
  ```
137
193
 
138
194
  ### Creating an image project
195
+
139
196
  ```javascript
140
197
  // Find model that has the most workers
141
198
  const mostPopularModel = sogni.projects.availableModels.reduce((a, b) =>
@@ -149,22 +206,26 @@ const project = await sogni.projects.create({
149
206
  negativePrompt:
150
207
  'malformation, bad anatomy, bad hands, missing fingers, cropped, low quality, bad quality, jpeg artifacts, watermark',
151
208
  stylePrompt: 'anime',
152
- steps: 20,
153
- guidance: 7.5,
209
+ steps: 20,
210
+ guidance: 7.5,
154
211
  numberOfMedia: 1,
155
212
  outputFormat: 'jpg', // Can be 'png' or 'jpg', defaults to 'png'
156
213
  tokenType: 'spark', // 'sogni' or 'spark'
157
214
  network: 'fast' // 'fast' or 'relaxed'
158
215
  });
159
216
  ```
217
+
160
218
  **Note:** Full project parameter list can be found in [ProjectParams](https://sdk-docs.sogni.ai/interfaces/ProjectParams.html) docs.
161
219
 
162
220
  ### Getting project status and results
221
+
163
222
  In general, there are 2 ways to work with API:
223
+
164
224
  1. Using promises or `async/await` syntax.
165
225
  2. Listening to events on `Project` and `Job` class instances.
166
226
 
167
227
  #### Using promises
228
+
168
229
  ```javascript
169
230
  const project = await sogni.projects.create({
170
231
  type: 'image',
@@ -185,12 +246,13 @@ project.on('progress', (progress) => {
185
246
  });
186
247
 
187
248
  const imageUrls = await project.waitForCompletion();
188
- // Now you can use image URLs to download images.
249
+ // Now you can use image URLs to download images.
189
250
  // Note that images will be available for 24 hours only!
190
251
  console.log('Image URLs:', imageUrls);
191
252
  ```
192
253
 
193
254
  #### Using events
255
+
194
256
  ```javascript
195
257
  const project = await sogni.projects.create({
196
258
  type: 'image',
@@ -233,28 +295,32 @@ project.on('failed', async (errorData) => {
233
295
  });
234
296
  ```
235
297
 
298
+ External API-backed jobs may not report diffusion steps. For those jobs, SDK progress uses provider progress or ETA-derived progress and remains a finite 0-100 number. GPT Image 2 and Seedance results can arrive as direct hosted URLs; the SDK preserves those URLs on `job.resultUrl` and `job.getResultUrl()` returns the cached URL without requesting a Sogni signed download URL.
299
+
236
300
  ### Project parameters
301
+
237
302
  Here is a full list of project parameters that you can use:
303
+
238
304
  - `modelId` - ID of the model to use for image generation.
239
305
  - `positivePrompt` - text prompt that describes what you want to see in the image. Can be an empty string.
240
306
  - `negativePrompt` - text prompt that describes what you don't want to see in the image. Can be an empty string.
241
307
  - `stylePrompt` - text prompt that describes the style of the image. Can be an empty string.
242
308
  - `numberOfImages` - number of images to generate.
243
- - `tokenType` - select token type to pay for render. Can be either `sogni` or `spark`.
244
- - `sizePreset` - optionally pass the ID of a size preset to use. If not passed, the default output is a square at
245
- either 512x512, 768x768 or 1024x1024 (SDXL and Flux) based on the default resolution of the selected model.
246
- See **Detecting available output presets** section below for available presets for your model. The token cost and
247
- render time of the job is heavily influenced by total pixel count where a 2048x2048 image is 4x the cost and render
248
- time of a 1024x1024 image as it is 4x the generated pixel count. You may also pass `custom` along with `width` and
249
- `height` project parameters to request a custom dimension. Note that not all size presets and custom aspect ratios
250
- produce consistently good results with all models. If your output features skewed anatomy or doubling of features
251
- you should experiment with a different model or output size.
309
+ - `tokenType` - select token type to pay for render. Can be either `sogni` or `spark`. External API-backed models such as GPT Image 2 and Seedance are Spark-only.
310
+ - `sizePreset` - optionally pass the ID of a size preset to use. If not passed, the default output is a square at
311
+ either 512x512, 768x768 or 1024x1024 (SDXL and Flux) based on the default resolution of the selected model.
312
+ See **Detecting available output presets** section below for available presets for your model. The token cost and
313
+ render time of the job is heavily influenced by total pixel count where a 2048x2048 image is 4x the cost and render
314
+ time of a 1024x1024 image as it is 4x the generated pixel count. You may also pass `custom` along with `width` and
315
+ `height` project parameters to request a custom dimension. Note that not all size presets and custom aspect ratios
316
+ produce consistently good results with all models. If your output features skewed anatomy or doubling of features
317
+ you should experiment with a different model or output size.
252
318
  - `width` - if 'sizePreset' is set to 'custom' you may pass a custom pixel width between 256 and 2048
253
319
  - `height` - if 'sizePreset' is set to 'custom' you may pass a custom pixel height between 256 and 2048
254
- - `steps` - number of inference steps between random pixels to final image. Higher steps generally lead to higher
255
- quality images and more details but varies by model, prompt, guidance, and desired look. For most Stable Diffusion
256
- models 20-40 steps is ideal with 20 being 2x faster to render than 40. For Flux 4 steps is optimal. Lightning,
257
- Turbo and LCM models are designed for quality output in as little as 1 step. ([More info](https://docs.sogni.ai/learn/basics/inference-steps)).
320
+ - `steps` - number of inference steps between random pixels to final image. Higher steps generally lead to higher
321
+ quality images and more details but varies by model, prompt, guidance, and desired look. For most Stable Diffusion
322
+ models 20-40 steps is ideal with 20 being 2x faster to render than 40. For Flux 4 steps is optimal. Lightning,
323
+ Turbo and LCM models are designed for quality output in as little as 1 step. ([More info](https://docs.sogni.ai/learn/basics/inference-steps)).
258
324
  - `guidance` - guidance scale. For most Stable Diffusion models, optimal value is 7.5 ([More info](https://docs.sogni.ai/learn/basics/guidance-scale)).
259
325
  - `network` - network type to use, `fast` or `relaxed`. This parameter allows to override default network type for this project.
260
326
  - `disableNSFWFilter` - disable NSFW filter for this project. NSFW filter is enabled by default and workers won't upload resulting images if they are detected as NSFW.
@@ -263,108 +329,137 @@ Turbo and LCM models are designed for quality output in as little as 1 step. ([M
263
329
  - `sampler` - sampler algorithm ([More info](https://docs.sogni.ai/sogni-studio/advanced/samplers-and-schedulers)). For available options, see the **"Samplers"** section below.
264
330
  - `scheduler` - scheduler to use ([More info](https://docs.sogni.ai/sogni-studio/advanced/samplers-and-schedulers)). For available options, see the **"Schedulers"** section below.
265
331
  - `startingImage` - guide image in PNG format. Can be [File](https://developer.mozilla.org/en-US/docs/Web/API/File), [Blob](https://developer.mozilla.org/en-US/docs/Web/API/Blob) or [Buffer](https://nodejs.org/api/buffer.html)
266
- - `startingImageStrength` - strong effect of starting image should be. From 0 to 1, default 0.5.
332
+ - `startingImageStrength` - strong effect of starting image should be. From 0 to 1, default 0.5.
267
333
  - `controlNet` - Stable Diffusion ControlNet parameters. See **ControlNets** section below for more info.
268
- - `outputFormat` - output image format. Can be `png` or `jpg`. If not specified, `png` will be used. JPG format results in smaller file sizes but may have slightly lower quality due to compression.
334
+ - `outputFormat` - output image format. Can be `png`, `jpg`, or `webp` for GPT Image 2; most native image models support `png` or `jpg`. If not specified, `png` will be used.
269
335
 
270
336
  TypeScript type definitions for project parameters can be found in [ProjectParams](https://sdk-docs.sogni.ai/interfaces/ProjectParams.html) docs.
271
337
 
338
+ ### GPT Image 2
339
+
340
+ GPT Image 2 is available through the normal image project API:
341
+
342
+ ```javascript
343
+ const project = await sogni.projects.create({
344
+ type: 'image',
345
+ network: 'fast',
346
+ modelId: 'gpt-image-2',
347
+ positivePrompt: 'A clean product render of translucent headphones on a white background',
348
+ numberOfMedia: 1,
349
+ width: 1024,
350
+ height: 1024,
351
+ gptImageQuality: 'high',
352
+ outputFormat: 'webp',
353
+ tokenType: 'spark'
354
+ });
355
+
356
+ const imageUrls = await project.waitForCompletion();
357
+ ```
358
+
359
+ For GPT Image 2 edits, pass `contextImages`; the SDK supports up to 16 context images for this model. Cost estimates can include `gptImageQuality`, `outputFormat`, and `contextImages` so external input-image pricing is represented.
360
+
272
361
  ### Detecting available output presets
362
+
273
363
  You can get a list of available output presets for a specific network and model using `sogni.projects.getOutputPresets` method.
364
+
274
365
  ```javascript
275
366
  const presets = await sogni.projects.getSizePresets('fast', 'flux1-schnell-fp8');
276
367
  console.log('Available output presets:', presets);
277
368
  ```
369
+
278
370
  Sample response:
371
+
279
372
  ```json
280
373
  [
281
- {
282
- "label": "Square",
283
- "id": "square",
284
- "width": 512,
285
- "height": 512,
286
- "ratio": "1:1",
287
- "aspect": "1"
288
- },
289
- {
290
- "label": "Square HD",
291
- "id": "square_hd",
292
- "width": 1024,
293
- "height": 1024,
294
- "ratio": "1:1",
295
- "aspect": "1"
296
- },
297
- {
298
- "label": "Portrait: Standard",
299
- "id": "portrait_7_9",
300
- "width": 896,
301
- "height": 1152,
302
- "ratio": "7:9",
303
- "aspect": "0.78"
304
- },
305
- {
306
- "label": "Portrait: 35mm",
307
- "id": "portrait_13_19",
308
- "width": 832,
309
- "height": 1216,
310
- "ratio": "13:19",
311
- "aspect": "0.68"
312
- },
313
- {
314
- "label": "Portrait: Mobile",
315
- "id": "portrait_4_7",
316
- "width": 768,
317
- "height": 1344,
318
- "ratio": "4:7",
319
- "aspect": "0.57"
320
- },
321
- {
322
- "label": "Portrait: Extended",
323
- "id": "portrait_5_12",
324
- "width": 640,
325
- "height": 1536,
326
- "ratio": "5:12",
327
- "aspect": "0.42"
328
- },
329
- {
330
- "label": "Landscape: Standard",
331
- "id": "landscape_9_7",
332
- "width": 1152,
333
- "height": 896,
334
- "ratio": "9:7",
335
- "aspect": "1.28"
336
- },
337
- {
338
- "label": "Landscape: 35mm",
339
- "id": "landscape_19_13",
340
- "width": 1216,
341
- "height": 832,
342
- "ratio": "19:13",
343
- "aspect": "1.46"
344
- },
345
- {
346
- "label": "Landscape: Widescreen",
347
- "id": "landscape_7_4",
348
- "width": 1344,
349
- "height": 768,
350
- "ratio": "7:4",
351
- "aspect": "1.75"
352
- },
353
- {
354
- "label": "Landscape: Ultrawide",
355
- "id": "landscape_12_5",
356
- "width": 1536,
357
- "height": 640,
358
- "ratio": "12:5",
359
- "aspect": "2.4"
360
- }
374
+ {
375
+ "label": "Square",
376
+ "id": "square",
377
+ "width": 512,
378
+ "height": 512,
379
+ "ratio": "1:1",
380
+ "aspect": "1"
381
+ },
382
+ {
383
+ "label": "Square HD",
384
+ "id": "square_hd",
385
+ "width": 1024,
386
+ "height": 1024,
387
+ "ratio": "1:1",
388
+ "aspect": "1"
389
+ },
390
+ {
391
+ "label": "Portrait: Standard",
392
+ "id": "portrait_7_9",
393
+ "width": 896,
394
+ "height": 1152,
395
+ "ratio": "7:9",
396
+ "aspect": "0.78"
397
+ },
398
+ {
399
+ "label": "Portrait: 35mm",
400
+ "id": "portrait_13_19",
401
+ "width": 832,
402
+ "height": 1216,
403
+ "ratio": "13:19",
404
+ "aspect": "0.68"
405
+ },
406
+ {
407
+ "label": "Portrait: Mobile",
408
+ "id": "portrait_4_7",
409
+ "width": 768,
410
+ "height": 1344,
411
+ "ratio": "4:7",
412
+ "aspect": "0.57"
413
+ },
414
+ {
415
+ "label": "Portrait: Extended",
416
+ "id": "portrait_5_12",
417
+ "width": 640,
418
+ "height": 1536,
419
+ "ratio": "5:12",
420
+ "aspect": "0.42"
421
+ },
422
+ {
423
+ "label": "Landscape: Standard",
424
+ "id": "landscape_9_7",
425
+ "width": 1152,
426
+ "height": 896,
427
+ "ratio": "9:7",
428
+ "aspect": "1.28"
429
+ },
430
+ {
431
+ "label": "Landscape: 35mm",
432
+ "id": "landscape_19_13",
433
+ "width": 1216,
434
+ "height": 832,
435
+ "ratio": "19:13",
436
+ "aspect": "1.46"
437
+ },
438
+ {
439
+ "label": "Landscape: Widescreen",
440
+ "id": "landscape_7_4",
441
+ "width": 1344,
442
+ "height": 768,
443
+ "ratio": "7:4",
444
+ "aspect": "1.75"
445
+ },
446
+ {
447
+ "label": "Landscape: Ultrawide",
448
+ "id": "landscape_12_5",
449
+ "width": 1536,
450
+ "height": 640,
451
+ "ratio": "12:5",
452
+ "aspect": "2.4"
453
+ }
361
454
  ]
362
455
  ```
363
456
 
364
457
  ### Samplers
458
+
365
459
  Samplers control the denoising process — the sequence of steps that transforms random noise into your final image.
366
460
 
367
461
  Avaliable sampler options depend on a model. You can use api to get available samplers for a specific model:
462
+
368
463
  ```javascript
369
464
  const modelOptions = await sogni.projects.getModelOptions('flux1-schnell-fp8');
370
465
  console.log(modelOptions.sampler);
@@ -375,12 +470,15 @@ console.log(modelOptions.sampler);
375
470
  }
376
471
  */
377
472
  ```
473
+
378
474
  See [Samplers and Schedulers](https://docs.sogni.ai/sogni-studio/advanced/samplers-and-schedulers) docs for more info.
379
475
 
380
476
  ### Schedulers
477
+
381
478
  Control how steps are distributed. For more info see [Schedulers and Samplers](https://docs.sogni.ai/sogni-studio/advanced/samplers-and-schedulers#schedulers) docs.
382
479
 
383
480
  Available scheduler options depend on a model. You can use api to get available schedulers for a specific model:
481
+
384
482
  ```javascript
385
483
  const modelOptions = await sogni.projects.getModelOptions(modelId);
386
484
  console.log(modelOptions.scheduler);
@@ -402,12 +500,14 @@ console.log(modelOptions.scheduler);
402
500
  ```
403
501
 
404
502
  ### ControlNets
503
+
405
504
  **EXPERIMENTAL FEATURE:** This feature is still in development and may not work as expected. Use at your own risk.
406
505
 
407
- ControlNet is a neural network that controls image generation in Stable Diffusion by adding extra conditions. See more
506
+ ControlNet is a neural network that controls image generation in Stable Diffusion by adding extra conditions. See more
408
507
  info and usage samples in [ControlNets](https://docs.sogni.ai/learn/basics/controlnet) docs for Sogni Studio.
409
508
 
410
509
  To use ControlNet in your project, you need to provide `controlNet` object with the following properties:
510
+
411
511
  - `name` - name of the ControlNet to use. Currently supported:
412
512
  - `canny`
413
513
  - `depth`
@@ -434,6 +534,7 @@ To use ControlNet in your project, you need to provide `controlNet` object with
434
534
  - `guidanceEnd` - step when ControlNet last applied, 0 means first step, 1 means last step. Must be greater than guidanceStart
435
535
 
436
536
  Example:
537
+
437
538
  ```javascript
438
539
  const cnImage = fs.readFileSync('./cn.jpg');
439
540
  const project = await sogni.projects.create({
@@ -450,7 +551,9 @@ const project = await sogni.projects.create({
450
551
  }
451
552
  });
452
553
  ```
554
+
453
555
  Full ControlNet type definition:
556
+
454
557
  ```typescript
455
558
  export type ControlNetName =
456
559
  | 'canny'
@@ -480,7 +583,6 @@ export interface ControlNetParams {
480
583
  }
481
584
  ```
482
585
 
483
-
484
586
  ## Video Generation with Wan 2.2 Models
485
587
 
486
588
  The Sogni SDK supports advanced video generation workflows powered by **Wan 2.2 14B FP8** models. These models are available on the `fast` network and support various video generation workflows.
@@ -497,12 +599,15 @@ The Wan 2.2 model family supports five distinct video generation workflows:
497
599
 
498
600
  ### Model Variants
499
601
 
500
- Each workflow has two model variants optimized for different use cases:
602
+ WAN workflows have two model variants optimized for different use cases:
501
603
 
502
604
  - **Speed variant** (with `_lightx2v` suffix) - Faster inference (4-step), good quality
503
605
  - **Quality variant** (without `_lightx2v`) - Slower inference, best quality
504
606
 
607
+ LTX-2.3 models use `distilled` and `dev` variants for fast/high-quality generation with native audio. Seedance 2.0 models use the external API path and are available through two canonical multimodal model IDs: `seedance-2-0` and `seedance-2-0-fast`. Both accept optional image, video, and audio references, run at fixed 24fps, and require Spark billing; the Fast model caps output at 720p.
608
+
505
609
  Example model IDs:
610
+
506
611
  - `wan_v2.2-14b-fp8_t2v_lightx2v` (Text-to-Video, speed)
507
612
  - `wan_v2.2-14b-fp8_t2v` (Text-to-Video, quality)
508
613
  - `wan_v2.2-14b-fp8_i2v_lightx2v` (Image-to-Video, speed)
@@ -511,20 +616,32 @@ Example model IDs:
511
616
  - `wan_v2.2-14b-fp8_s2v` (Sound-to-Video, quality)
512
617
  - `wan_v2.2-14b-fp8_animate-move_lightx2v` (Animate-Move, speed)
513
618
  - `wan_v2.2-14b-fp8_animate-replace_lightx2v` (Animate-Replace, speed)
619
+ - `ltx23-22b-fp8_t2v_distilled` (LTX-2.3 Text-to-Video, fast)
620
+ - `ltx23-22b-fp8_i2v_distilled` (LTX-2.3 Image-to-Video, fast)
621
+ - `ltx23-22b-fp8_v2v_distilled` (LTX-2.3 Video-to-Video ControlNet, fast)
622
+ - `seedance-2-0` (Seedance 2.0 multimodal video, external API)
623
+ - `seedance-2-0-fast` (Seedance 2.0 Fast multimodal video, external API, 720p cap)
514
624
 
515
625
  ### Video Parameters
516
626
 
517
627
  When creating video projects, you can specify:
518
628
 
519
- - `fps` - Frames per second: 16 or 32 (default: 16)
520
- - `frames` - Number of frames: 17-161 (default: 81, which is ~5 seconds at 16fps)
629
+ - `duration` - Duration in seconds. WAN supports 1-10s, LTX-2.3 supports 4-20s, Seedance direct SDK projects currently support 4-15s.
630
+ - `fps` - Frames per second. WAN supports 16/32 output, LTX-2.3 supports 1-60 native FPS, Seedance is fixed at 24fps.
631
+ - `frames` - Number of frames. Prefer `duration`; the SDK calculates model-correct frame counts.
521
632
  - `width` - Video width in pixels
522
633
  - `height` - Video height in pixels
523
634
  - `steps` - Increase inference steps to increase quality
524
635
  - `seed` - Random seed for reproducibility
525
636
  - `referenceImage` - Reference image for workflows that require it (i2v, s2v, animate-move, animate-replace)
526
- - `referenceVideo` - Reference video for animate workflows (animate-move, animate-replace)
637
+ - `referenceVideo` - Reference video for animate and v2v workflows
527
638
  - `referenceAudio` - Reference audio for sound-to-video workflow
639
+ - `referenceImageUrls` - Seedance-only loose image context URLs; combined with `referenceImage`/`referenceImageEnd`, max 9 image assets
640
+ - `referenceVideoUrls` - Seedance-only video context URLs; combined with `referenceVideo`, max 3 video assets
641
+ - `referenceAudioUrls` - Seedance-only audio context URLs; combined with `referenceAudio`/`referenceAudioIdentity`, max 3 audio assets
642
+ - `hasVideoInput` - Estimate-only flag for `estimateVideoCost`; set this when estimating a canonical Seedance video-input job without passing `referenceVideo`/`referenceVideoUrls`
643
+
644
+ Seedance 2.0 can combine image, video, and audio reference assets in one external API request. Reference limits are up to 9 image assets, 3 video assets, 3 audio assets, and 12 asset files total. Text+audio without at least one image or video reference is not supported by Seedance. URL-array references must be HTTPS URLs that the vendor can fetch; local multi-reference files should be uploaded first, as shown in `examples/workflow_partner_seedance_video.mjs`. In prompts and creative briefs, refer to attachments by Seedance-style tags: `@Image1`, `@Video1`, and `@Audio1`, counted independently by modality in attachment order. Assign each useful reference a role, such as product identity, motion timing, camera path, edit rhythm, background music, or speech reference. Prefer positive preservation language like "maintain the same product silhouette and logo placement from @Image1"; exact readable text, logos, lip-sync, voice cloning, and real-human-reference behavior still need review. Seedance dispatch omits negative prompts; Wan 2.2 and LTX 2.3 video models can still use `negativePrompt`. Seedance jobs are Spark-only and should not use SOGNI token fallback.
528
645
 
529
646
  ### Text-to-Video Example
530
647
 
@@ -544,6 +661,49 @@ const videoUrls = await project.waitForCompletion();
544
661
  console.log('Video URL:', videoUrls[0]);
545
662
  ```
546
663
 
664
+ Seedance 2.0 example:
665
+
666
+ ```javascript
667
+ const project = await sogni.projects.create({
668
+ type: 'video',
669
+ network: 'fast',
670
+ modelId: 'seedance-2-0',
671
+ positivePrompt: 'A cinematic neon skyline time lapse, sweeping camera motion',
672
+ duration: 5,
673
+ fps: 24,
674
+ width: 1920,
675
+ height: 1088,
676
+ tokenType: 'spark'
677
+ });
678
+
679
+ const videoUrls = await project.waitForCompletion();
680
+ ```
681
+
682
+ Seedance multimodal context example:
683
+
684
+ ```javascript
685
+ const project = await sogni.projects.create({
686
+ type: 'video',
687
+ network: 'fast',
688
+ modelId: 'seedance-2-0',
689
+ positivePrompt:
690
+ 'Use @Image1 as the product identity, @Image2 for detail inserts, @Video1 for camera movement, and @Audio1 for music rhythm. Create one cohesive launch spot with smooth continuity and crisp product preservation.',
691
+ duration: 8,
692
+ fps: 24,
693
+ width: 1920,
694
+ height: 1088,
695
+ referenceImageUrls: [
696
+ 'https://cdn.example.com/product-front.png',
697
+ 'https://cdn.example.com/product-detail.png'
698
+ ],
699
+ referenceVideoUrls: ['https://cdn.example.com/motion-reference.mp4'],
700
+ referenceAudioUrls: ['https://cdn.example.com/music-reference.m4a'],
701
+ tokenType: 'spark'
702
+ });
703
+
704
+ const videoUrls = await project.waitForCompletion();
705
+ ```
706
+
547
707
  ### Image-to-Video Example
548
708
 
549
709
  ```javascript
@@ -691,37 +851,148 @@ const response = await sogni.chat.completions.create({
691
851
 
692
852
  ### Sogni Platform Tools — Generate Media via Chat
693
853
 
694
- Combine LLM intelligence with Sogni's media generation capabilities. The LLM detects when a user wants to create an image, video, or music, enhances the prompt, and calls Sogni's generation APIs — turning natural language into creative output:
854
+ Combine LLM intelligence with Sogni's media generation capabilities. The SDK exposes built-in public platform tool definitions for client-managed chat completions:
855
+
856
+ - **`sogni_generate_image`** — text-to-image generation
857
+ - **`sogni_edit_image`** — reference-guided image editing using `source_image_url` and `reference_image_urls`
858
+ - **`sogni_generate_video`** — text-to-video and image-to-video generation
859
+ - **`sogni_sound_to_video`** — audio-driven video generation using `reference_audio_url`
860
+ - **`sogni_video_to_video`** — video transformation / motion transfer using `reference_video_url`
861
+ - **`sogni_generate_music`** — music generation with optional lyrics and advanced controls
862
+
863
+ Use `SogniTools.all` to expose the full tool surface, then execute tool calls with `sogni.chat.tools.execute()` / `executeAll()` or `autoExecuteTools: true` for non-streaming flows.
864
+
865
+ For direct `/v1/chat/completions` hosted-tool execution, media-bearing tool arguments are intentionally constrained to inline `data:` URIs so the chat API remains OpenAI-compatible and does not implicitly fetch arbitrary user URLs. For user-uploaded image/audio/video inputs, use the SDK video project examples or `/v1/creative-agent/workflows` hosted tool sequences, which run outside the chat tool-selection loop and can consume HTTPS artifact URLs produced by Sogni's upload endpoints. Durable hosted sequences validate step arguments before the workflow starts and can bind request-level `mediaReferences` into tool arguments with `sourceStepId: "$input_media"`.
866
+
867
+ The `workflow_text_chat_sogni_tools.mjs` example demonstrates the core text-to-image, text-to-video, and text-to-music composition flows. Dedicated workflow examples like `workflow_image_edit.mjs`, `workflow_sound_to_video.mjs`, and `workflow_video_to_video.mjs` cover the asset-backed workflows directly.
868
+
869
+ ### Hosted Creative Tool Families (server-side)
695
870
 
696
- - **Image Generation** via tool call "Create an image of a cyberpunk city at night"
697
- - **Video Generation** via tool call — "Generate a video of ocean waves at sunset"
698
- - **Music Generation** via tool call — "Compose a jazz song about the rain"
871
+ The Sogni API can inject hosted creative tool surfaces through `sogni.chat.hosted.create()`, the SDK wrapper for `/v1/chat/completions`. The default `creative-tools` surface includes media generation, editing, post-production, analysis, metadata extraction, and synchronous composition tools; `sogni_tools: "creative-agent"` includes that base surface and adds workflow control and asset-manifest tools. Generated artifacts are threaded through a per-request media context so later rounds can reference them by index.
699
872
 
700
- See the `workflow_text_chat_sogni_tools.mjs` example for a complete implementation that wires LLM tool calling to Sogni's image, video, and audio generation APIs.
873
+ Use default `sogni_tools: true` or `sogni_tools: "creative-tools"` when you want the full creative media tool surface plus synchronous composition tools: `enhance_prompt`, `compose_script`, `compose_lyrics`, and `compose_instrumental`. The legacy `"rich"` alias is still accepted and maps to `"creative-tools"`.
874
+
875
+ Notable creative-tools include:
876
+
877
+ | Tool | Behavior |
878
+ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
879
+ | `restore_photo`, `apply_style`, `refine_result`, `change_angle` | Image-edit adapters backed by the shared image-edit workflow |
880
+ | `animate_photo` | Image-to-video, with multi-source fan-out via `sourceImageIndices` (composed into one MP4 by default; opt out with `stitched: false`) |
881
+ | `stitch_video` | Compose selected video clips into one MP4 (with optional audio overlay) |
882
+ | `orbit_video` | Generate orbit clips around a subject and stitch them |
883
+ | `dance_montage` | Generate dance clips and stitch when multi-clip |
884
+ | `extend_video`, `replace_video_segment` | Extend or replace a bounded segment of an existing video; replacements may trim source windows with `replacementStartSeconds` / `replacementEndSeconds` |
885
+ | `overlay_video`, `add_subtitles` | Burn in text/logo overlays or subtitle cues onto existing videos via ffmpeg |
886
+ | `enhance_prompt` | Expand or adapt rough prompts into model-ready image, video, music, or edit prompts |
887
+ | `compose_script` | Draft scripts, storyboards, trailers, social shorts, campaign beats, or video prompts |
888
+ | `compose_lyrics` | Write vocal song lyrics and suggested musical parameters |
889
+ | `compose_instrumental` | Write instrumental structure and suggested musical parameters |
890
+
891
+ Composition and post-production tools (`stitch_video`, `orbit_video`, `dance_montage`, `animate_photo` fan-out, `extend_video`, `replace_video_segment`, `overlay_video`, and `add_subtitles`) return or update MP4 artifacts. `stitch_video` concatenates whole clips end-to-end; alternating or interleaved time slices should be represented as repeated `replace_video_segment` steps with bounded replacement source windows. See the LLM API reference for the full schema list.
892
+
893
+ ```javascript
894
+ import { SogniClient } from '@sogni-ai/sogni-client';
895
+
896
+ const sogni = await SogniClient.createInstance({
897
+ appId: 'creative-agent-demo',
898
+ apiKey: process.env.SOGNI_API_KEY,
899
+ network: 'fast'
900
+ });
901
+
902
+ const result = await sogni.chat.hosted.create({
903
+ model: 'qwen3.6-35b-a3b-gguf-iq4xs',
904
+ messages: [{ role: 'user', content: 'Create a 15s trailer-style launch video for this product concept' }],
905
+ sogni_tools: 'creative-tools',
906
+ sogni_tool_execution: true,
907
+ tokenType: 'spark'
908
+ });
909
+ ```
910
+
911
+ See `examples/workflow_creative_agent_tools.mjs` for a runnable REST API-key example that can toggle tool surface and server-side execution.
912
+
913
+ For focused partner Seedance video tests, `examples/workflow_partner_seedance_video.mjs` starts with a guided workflow picker when run with no arguments. The guided path covers T2V, I2V, IA2V, V2V, the full and fast Seedance tiers where available, hosted workflow execution, native audio, keyframe interpolation, multimodal context, and cost estimation. The command-line path still supports direct scripted calls:
914
+
915
+ ```bash
916
+ node examples/workflow_partner_seedance_video.mjs
917
+ node examples/workflow_partner_seedance_video.mjs "A glass whale swimming through a neon city" --duration 4
918
+ node examples/workflow_partner_seedance_video.mjs "A glass whale swimming through a neon city" --fast --duration 4
919
+ node examples/workflow_partner_seedance_video.mjs "slow cinematic reveal" --context examples/test-assets/placeholder.jpg
920
+ node examples/workflow_partner_seedance_video.mjs "slow cinematic reveal" --context examples/test-assets/placeholder.jpg --fast
921
+ node examples/workflow_partner_seedance_video.mjs "the portrait sings with stage lighting" --mode ia2v --context examples/test-assets/placeholder.jpg --audio examples/test-assets/placeholder.m4a
922
+ node examples/workflow_partner_seedance_video.mjs "turn the clip into a polished perfume commercial" --video examples/test-assets/placeholder.mp4
923
+ node examples/workflow_partner_seedance_video.mjs "Use @Video1 as the source clip, @Video2 for edit rhythm, @Image1 for product identity, @Image2 for palette, and @Audio1 as the music guide. Preserve the product silhouette and create one launch spot." --workflow --mode v2v --video examples/test-assets/placeholder.mp4 --video https://cdn.example.com/motion-2.mp4 --context examples/test-assets/placeholder.jpg --context examples/test-assets/placeholder2.jpg --audio examples/test-assets/placeholder.m4a
924
+ ```
925
+
926
+ Guided mode defaults text-to-video to `/v1/creative-agent/workflows` so the entrypoint exercises the durable hosted workflow API first. Scripted T2V calls without media still default to `/v1/chat/completions` unless `--workflow` is passed. Media modes default to `/v1/creative-agent/workflows` with `kind: "hosted_tool_sequence"` and upload local media from `examples/test-assets` automatically. Pass `--image`/`--context`, `--audio`, or `--video` repeatedly to use Seedance multimodal context; the example enforces the vendor limits of 9 image assets, 3 video assets, 3 audio assets, and 12 total assets. `--expand-prompt` is enabled by default and sends `expand_prompt: true` so the API runs the shared `@sogni/creative-agent` Seedance LLM prompt shaper before dispatch; pass `--no-expand-prompt` only when you want to submit the compact prompt directly. Keep prompts as compact creative briefs with explicit `@ImageN`/`@VideoN`/`@AudioN` role assignments rather than BytePlus JSON. `--no-execute` prints the workflow request without submitting it; local media is still uploaded first so the printed request contains real HTTPS media URLs. Use `--no-estimate` when you only want to inspect request construction.
927
+
928
+ ### Durable Creative Workflows (server-side)
929
+
930
+ Long-running multi-step creative workflows can be persisted on the server and observed independently of the chat completion that started them. The SDK exposes these API-key-only endpoints through `sogni.creativeWorkflows`:
931
+
932
+ - `sogni.creativeWorkflows.startImageToVideo(input, options)` — start a durable workflow
933
+ - `sogni.creativeWorkflows.startHostedToolSequence(input, options)` — run exact hosted Sogni tool steps as a durable backend workflow
934
+ - `sogni.creativeWorkflows.get(workflowId)` and `.list()` — inspect snapshots
935
+ - `sogni.creativeWorkflows.events(workflowId)` — poll event history
936
+ - `sogni.creativeWorkflows.streamEvents(workflowId, { after, lastEventId })` — SSE event stream with resume support
937
+ - `sogni.creativeWorkflows.cancel(workflowId)` — cooperative cancellation
938
+
939
+ `startHostedToolSequence()` accepts exact hosted-tool steps plus optional request-level `mediaReferences` / `media_references`. A dependency with `sourceStepId: "$input_media"` can inject the matching uploaded image, video, or audio URL or index into a later step. The API validates the compiled step arguments before accepting the workflow and again before each execution step, so shape errors fail before billing later media work.
940
+
941
+ ```javascript
942
+ const sogni = await SogniClient.createInstance({
943
+ appId: 'creative-workflow-demo',
944
+ apiKey: process.env.SOGNI_API_KEY,
945
+ disableSocket: true
946
+ });
947
+
948
+ const workflow = await sogni.creativeWorkflows.startImageToVideo(
949
+ {
950
+ prompt: 'A graphite sketch of a robot pianist in a smoky jazz club',
951
+ videoPrompt: 'slow dolly-in, warm stage lights, subtle hand motion',
952
+ imageModel: 'flux2',
953
+ videoModel: 'ltx23',
954
+ duration: 5
955
+ },
956
+ { tokenType: 'spark' }
957
+ );
958
+
959
+ for await (const event of sogni.creativeWorkflows.streamEvents(workflow.workflowId)) {
960
+ console.log(event.event, event.data);
961
+ }
962
+ ```
963
+
964
+ See `examples/workflow_creative_agent_workflows.mjs` for start/list/get/events/stream/cancel coverage. The underlying REST endpoints remain documented in the [LLM API durable workflows reference](https://github.com/Sogni-AI/sogni-api/blob/main/docs/llm-api.md#durable-creative-agent-workflows).
701
965
 
702
966
  ## Code Examples
703
967
 
704
968
  The [examples](https://github.com/Sogni-AI/sogni-client/tree/main/examples) directory contains working examples for all workflows:
705
969
 
706
970
  ### Image Workflow Examples
971
+
707
972
  - **`workflow_text_to_image.mjs`** - Text-to-image generation with multiple model options
708
973
  - **`workflow_image_edit.mjs`** - Reference-based image generation using context images
709
974
 
710
975
  ### Video Workflow Examples
711
- - **`workflow_text_to_video.mjs`** - Text-to-video generation with WAN 2.2 models
712
- - **`workflow_image_to_video.mjs`** - Animate static images into videos
976
+
977
+ - **`workflow_text_to_video.mjs`** - Text-to-video generation with WAN 2.2 and LTX-2.3
978
+ - **`workflow_image_to_video.mjs`** - Animate static images into videos with WAN 2.2 and LTX-2.3
713
979
  - **`workflow_sound_to_video.mjs`** - Audio-synchronized video generation with lip-sync
714
- - **`workflow_video_to_video.mjs`** - Motion transfer and character replacement (Animate-Move/Animate-Replace)
980
+ - **`workflow_video_to_video.mjs`** - Motion transfer, character replacement, and LTX-2.3 ControlNet v2v
715
981
 
716
982
  ### LLM Text Chat, Vision & Tool Calling Examples
983
+
717
984
  - **`workflow_text_chat.mjs`** - Single-turn chat completion (non-streaming)
718
985
  - **`workflow_text_chat_streaming.mjs`** - Streaming chat with token-by-token output
719
986
  - **`workflow_text_chat_multi_turn.mjs`** - Multi-turn conversation with history, in-chat commands, and session stats
720
987
  - **`workflow_text_chat_vision.mjs`** - Vision chat with multimodal image understanding (scene description, OCR, object detection, visual analysis, multi-image comparison)
721
988
  - **`workflow_text_chat_tool_calling.mjs`** - LLM tool calling with built-in tools (weather, time, unit conversion, math)
722
- - **`workflow_text_chat_sogni_tools.mjs`** - Generate images, videos, and music through natural language via LLM tool calling
989
+ - **`workflow_text_chat_sogni_tools.mjs`** - Core image/video/music generation through natural language via LLM tool calling
990
+ - **`workflow_creative_agent_tools.mjs`** - Server-side hosted Sogni tool injection for `/v1/chat/completions`
991
+ - **`workflow_creative_agent_workflows.mjs`** - Durable `/v1/creative-agent/workflows` start/list/get/events/stream/cancel through the SDK
992
+ - **`workflow_partner_seedance_video.mjs`** - Focused partner Seedance coverage: chat-completions T2V plus hosted-workflow T2V, I2V, IA2V, V2V, and multimodal context with uploaded media
723
993
 
724
994
  ### Basic Examples
995
+
725
996
  - **`promise_based.mjs`** - Image generation using promises/async-await
726
997
  - **`event_driven.js`** - Image generation using event listeners
727
998
 
@@ -729,16 +1000,19 @@ The [examples](https://github.com/Sogni-AI/sogni-client/tree/main/examples) dire
729
1000
 
730
1001
  The workflow examples showcase a few powerful open-source frontier models supported by Sogni Supernet:
731
1002
 
732
- | Model ID | Description | Use Case |
733
- |----------|-------------|----------|
734
- | `z_image_turbo_bf16` | **Z-Image Turbo** - Ultra-fast 8-step generation | Quick text-to-image prototyping and iteration |
735
- | `z_image_bf16` | **Z-Image** - High quality 20-step generation | Detailed, high quality image output |
736
- | `qwen_image_edit_2511_fp8_lightning` | **Qwen Image Edit Lightning** - Fast 4-step editing | Rapid reference-based image generation |
737
- | `qwen_image_edit_2511_fp8` | **Qwen Image Edit** - High quality 20-step editing | Professional image editing with context awareness |
738
- | `wan_v2.2-14b-fp8_t2v_lightx2v` | **Wan 2.2 T2V** - Text-to-video | Generate videos from text prompts |
739
- | `qwen3.6-35b-a3b-gguf-iq4xs` | **Qwen3.6 35B VLM** - LLM chat, tool calling & vision | Latest model with 128K context target, reasoning, tool calling, and multimodal image understanding |
1003
+ | Model ID | Description | Use Case |
1004
+ | ------------------------------------ | ----------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ |
1005
+ | `z_image_turbo_bf16` | **Z-Image Turbo** - Ultra-fast 8-step generation | Quick text-to-image prototyping and iteration |
1006
+ | `z_image_bf16` | **Z-Image** - High quality 20-step generation | Detailed, high quality image output |
1007
+ | `qwen_image_edit_2511_fp8_lightning` | **Qwen Image Edit Lightning** - Fast 4-step editing | Rapid reference-based image generation |
1008
+ | `qwen_image_edit_2511_fp8` | **Qwen Image Edit** - High quality 20-step editing | Professional image editing with context awareness |
1009
+ | `wan_v2.2-14b-fp8_t2v_lightx2v` | **Wan 2.2 T2V** - Text-to-video | Generate videos from text prompts |
1010
+ | `seedance-2-0` | **Seedance 2.0** - External API multimodal video | Full Seedance 2.0 24fps video generation with optional image, video, and audio context |
1011
+ | `seedance-2-0-fast` | **Seedance 2.0 Fast** - 720p external API video | Faster 24fps video generation where fast tiers are enabled |
1012
+ | `qwen3.6-35b-a3b-gguf-iq4xs` | **Qwen3.6 35B VLM** - LLM chat, tool calling & vision | Latest model with 262,144 native context length, reasoning, tool calling, and multimodal image understanding |
740
1013
 
741
1014
  All workflow examples include:
1015
+
742
1016
  - Interactive model and parameter selection
743
1017
  - Balance checking and cost confirmation
744
1018
  - Real-time progress tracking with ETA
@@ -746,6 +1020,7 @@ All workflow examples include:
746
1020
  - Automatic file download and preview
747
1021
 
748
1022
  Run any workflow example:
1023
+
749
1024
  ```bash
750
1025
  cd examples
751
1026
  npm install
@@ -761,11 +1036,11 @@ node workflow_text_chat_vision.mjs --image photo.jpg
761
1036
 
762
1037
  This SDK provides documentation optimized for AI coding assistants like Claude Code, GitHub Copilot, Cursor, and [Open Claw](https://openclaw.ai/):
763
1038
 
764
- | File | Description |
765
- |------|-------------|
766
- | [`llms.txt`](./llms.txt) | Indexed quick reference with code examples |
1039
+ | File | Description |
1040
+ | ---------------------------------- | ------------------------------------------------------- |
1041
+ | [`llms.txt`](./llms.txt) | Indexed quick reference with code examples |
767
1042
  | [`llms-full.txt`](./llms-full.txt) | Comprehensive documentation with complete API reference |
768
- | [`CLAUDE.md`](./CLAUDE.md) | Claude Code-specific guidance and project context |
1043
+ | [`CLAUDE.md`](./CLAUDE.md) | Claude Code-specific guidance and project context |
769
1044
 
770
1045
  These files follow the [llms.txt convention](https://llmstxt.org/) for LLM-friendly documentation.
771
1046
 
@@ -777,9 +1052,9 @@ When helping users generate images, videos, or use LLM features with Sogni:
777
1052
  2. **Video generation**: Use `type: 'video'` with `network: 'fast'` (required)
778
1053
  3. **Audio generation**: Use `type: 'audio'` with ACE-Step 1.5 models
779
1054
  4. **LLM text chat**: Use `sogni.projects.chatCompletion()` for text generation with streaming and tool calling
780
- 5. **Sogni Platform Tools**: Combine LLM tool calling with Sogni media generation to create images, videos, and music from natural language
781
- 6. **Vision chat**: Use `qwen3.6-35b-a3b-gguf-iq4xs` VLM for multimodal image understanding with `image_url` content type
782
- 7. **WAN 2.2 vs LTX-2.3**: These model families have different FPS behaviors - see `llms-full.txt` for details
1055
+ 5. **Sogni Platform Tools**: Combine LLM tool calling with Sogni media generation to create images, image edits, videos, audio-driven videos, video transforms, and music from natural language
1056
+ 6. **Vision chat**: Use `qwen3.6-35b-a3b-gguf-iq4xs` VLM for multimodal image understanding with `image_url` content parts carrying inline base64 JPEG/PNG `data:` URIs. Vision requests allow up to 20 images, 10MB each, with longest side capped at 1024px. This 1024px dimension cap applies only to the vision `image_url` path, not to media-generation tool image inputs.
1057
+ 7. **WAN 2.2, LTX-2.3, and Seedance**: These video families have different duration/FPS behaviors - see `llms-full.txt` for details
783
1058
 
784
1059
  ## API Documentation
785
1060