@octavus/docs 2.5.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/content/03-client-sdk/10-client-tools.md +4 -0
- package/content/04-protocol/06-handlers.md +23 -1
- package/content/04-protocol/07-agent-config.md +13 -3
- package/dist/{chunk-SX6AIMRO.js → chunk-BY3IBD5N.js} +13 -13
- package/dist/chunk-BY3IBD5N.js.map +1 -0
- package/dist/{chunk-WQ7BTD5T.js → chunk-HMXCAQPP.js} +17 -17
- package/dist/chunk-HMXCAQPP.js.map +1 -0
- package/dist/{chunk-G5OOF4JJ.js → chunk-JBFWLUBN.js} +11 -11
- package/dist/chunk-JBFWLUBN.js.map +1 -0
- package/dist/chunk-TE6Q4675.js +1471 -0
- package/dist/chunk-TE6Q4675.js.map +1 -0
- package/dist/chunk-V35N3I3V.js +1471 -0
- package/dist/chunk-V35N3I3V.js.map +1 -0
- package/dist/content.js +1 -1
- package/dist/docs.json +6 -6
- package/dist/index.js +1 -1
- package/dist/search-index.json +1 -1
- package/dist/search.js +1 -1
- package/dist/search.js.map +1 -1
- package/dist/sections.json +6 -6
- package/package.json +1 -1
- package/dist/chunk-G5OOF4JJ.js.map +0 -1
- package/dist/chunk-SX6AIMRO.js.map +0 -1
- package/dist/chunk-WQ7BTD5T.js.map +0 -1
|
@@ -135,6 +135,7 @@ interface ClientToolContext {
|
|
|
135
135
|
toolCallId: string; // Unique ID for this call
|
|
136
136
|
toolName: string; // Name of the tool
|
|
137
137
|
signal: AbortSignal; // Aborted if user stops generation
|
|
138
|
+
addFile: (file: FileReference) => void; // Attach a file to the result
|
|
138
139
|
}
|
|
139
140
|
```
|
|
140
141
|
|
|
@@ -149,6 +150,8 @@ Use the signal to cancel long-running operations:
|
|
|
149
150
|
}
|
|
150
151
|
```
|
|
151
152
|
|
|
153
|
+
Tools that produce files (e.g., screenshots) can call `ctx.addFile()` to attach them to the result. Attached files are sent to the platform alongside the tool result so the LLM can see them as visual content on the next turn.
|
|
154
|
+
|
|
152
155
|
## Interactive Client Tools
|
|
153
156
|
|
|
154
157
|
Interactive tools require user input before completing. Use these for confirmations, forms, or any UI that needs user action.
|
|
@@ -541,6 +544,7 @@ interface ClientToolContext {
|
|
|
541
544
|
toolCallId: string;
|
|
542
545
|
toolName: string;
|
|
543
546
|
signal: AbortSignal;
|
|
547
|
+
addFile: (file: FileReference) => void;
|
|
544
548
|
}
|
|
545
549
|
|
|
546
550
|
// Interactive tool (with bound methods)
|
|
@@ -186,7 +186,29 @@ Generate image:
|
|
|
186
186
|
description: Generating your image... # Shown in UI
|
|
187
187
|
```
|
|
188
188
|
|
|
189
|
-
|
|
189
|
+
Edit an existing image using reference images:
|
|
190
|
+
|
|
191
|
+
```yaml
|
|
192
|
+
Edit image:
|
|
193
|
+
block: generate-image
|
|
194
|
+
prompt: EDIT_INSTRUCTIONS # e.g., "Remove the background"
|
|
195
|
+
referenceImages: [SOURCE_IMAGE_URL] # Variable(s) containing image URLs
|
|
196
|
+
imageModel: google/gemini-2.5-flash-image
|
|
197
|
+
output: EDITED_IMAGE
|
|
198
|
+
description: Editing image...
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
| Field | Required | Description |
|
|
202
|
+
| ----------------- | -------- | --------------------------------------------------------------- |
|
|
203
|
+
| `prompt` | Yes | Variable name containing the image prompt or edit instructions |
|
|
204
|
+
| `imageModel` | Yes | Image model identifier (e.g., `google/gemini-2.5-flash-image`) |
|
|
205
|
+
| `size` | No | Image dimensions: `1024x1024`, `1792x1024`, or `1024x1792` |
|
|
206
|
+
| `referenceImages` | No | Variable names containing image URLs for editing/transformation |
|
|
207
|
+
| `output` | No | Variable name to store the generated image URL |
|
|
208
|
+
| `thread` | No | Thread to associate the output file with |
|
|
209
|
+
| `description` | No | Description shown in the UI during generation |
|
|
210
|
+
|
|
211
|
+
This block is for deterministic image generation pipelines where the prompt is constructed programmatically (e.g., via prompt engineering in a separate thread). When `referenceImages` are provided, the prompt describes how to modify those images.
|
|
190
212
|
|
|
191
213
|
For agentic image generation where the LLM decides when to generate, configure `imageModel` in the [agent config](/docs/protocol/agent-config#image-generation).
|
|
192
214
|
|
|
@@ -200,7 +200,7 @@ agent:
|
|
|
200
200
|
agentic: true
|
|
201
201
|
```
|
|
202
202
|
|
|
203
|
-
When `imageModel` is configured, the `octavus_generate_image` tool becomes available. The LLM can decide when to generate images based on user requests.
|
|
203
|
+
When `imageModel` is configured, the `octavus_generate_image` tool becomes available. The LLM can decide when to generate images based on user requests. The tool supports both text-to-image generation and image editing/transformation using reference images.
|
|
204
204
|
|
|
205
205
|
### Supported Image Providers
|
|
206
206
|
|
|
@@ -220,16 +220,26 @@ The tool supports three image sizes:
|
|
|
220
220
|
- `1792x1024` — Landscape (16:9)
|
|
221
221
|
- `1024x1792` — Portrait (9:16)
|
|
222
222
|
|
|
223
|
+
### Image Editing with Reference Images
|
|
224
|
+
|
|
225
|
+
Both the agentic tool and the `generate-image` block support reference images for editing and transformation. When reference images are provided, the prompt describes how to modify or use those images.
|
|
226
|
+
|
|
227
|
+
| Provider | Models | Reference Image Support |
|
|
228
|
+
| -------- | -------------------------------- | ----------------------- |
|
|
229
|
+
| OpenAI | `gpt-image-1` | Yes |
|
|
230
|
+
| Google | Gemini native (`gemini-*-image`) | Yes |
|
|
231
|
+
| Google | Imagen (`imagen-*`) | No |
|
|
232
|
+
|
|
223
233
|
### Agentic vs Deterministic
|
|
224
234
|
|
|
225
235
|
Use `imageModel` in agent config when:
|
|
226
236
|
|
|
227
|
-
- The LLM should decide when to generate images
|
|
237
|
+
- The LLM should decide when to generate or edit images
|
|
228
238
|
- Users ask for images in natural language
|
|
229
239
|
|
|
230
240
|
Use `generate-image` block (see [Handlers](/docs/protocol/handlers#generate-image)) when:
|
|
231
241
|
|
|
232
|
-
- You want explicit control over image generation
|
|
242
|
+
- You want explicit control over image generation or editing
|
|
233
243
|
- Building prompt engineering pipelines
|
|
234
244
|
- Images are generated at specific handler steps
|
|
235
245
|
|