@octavus/docs 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -180,6 +180,68 @@ await computer.stop();
180
180
 
181
181
  Always call `stop()` when the session ends to clean up MCP subprocesses. For managed processes (like Chrome), pass them in the config for automatic cleanup.
182
182
 
183
+ ## Dynamic Entries
184
+
185
+ You can add or remove MCP entries on a running `Computer` after `start()` has returned. This is useful when MCP configurations arrive after construction - for example, when a session-manager receives per-session entries from a dispatch payload and wants to wire them into the existing computer instead of rebuilding it.
186
+
187
+ ### `addEntry(namespace, entry, options?)`
188
+
189
+ Registers a new MCP entry under `namespace`. By default, connects immediately:
190
+
191
+ ```typescript
192
+ await computer.addEntry(
193
+ 'github',
194
+ Computer.stdio('@modelcontextprotocol/server-github', [], {
195
+ env: { GITHUB_PERSONAL_ACCESS_TOKEN: process.env.GH_TOKEN! },
196
+ }),
197
+ );
198
+ ```
199
+
200
+ Pass `{ deferred: true }` to register the entry without connecting. The entry starts in a degraded state and connects on the next `restartEntry(namespace)` call - useful for lazy MCPs the agent activates on demand:
201
+
202
+ ```typescript
203
+ await computer.addEntry('github', githubEntry, { deferred: true });
204
+
205
+ // Later, when the agent decides it needs GitHub:
206
+ await computer.restartEntry('github');
207
+ ```
208
+
209
+ `addEntry` throws if the namespace already exists. To replace an entry, call `removeEntry` first.
210
+
211
+ If the immediate connection fails, `addEntry` does not throw - the entry is registered as degraded with the error message attached. Inspect via `getHealth()` or `restartEntry()` to retry.
212
+
213
+ ### `removeEntry(namespace)`
214
+
215
+ Closes the entry's connection (if any) and drops it from the configuration. No-op when the namespace doesn't exist:
216
+
217
+ ```typescript
218
+ await computer.removeEntry('github');
219
+ ```
220
+
221
+ ### `restartEntry(namespace)`
222
+
223
+ Closes the existing connection (if any) and reconnects with the current configuration:
224
+
225
+ ```typescript
226
+ await computer.restartEntry('github');
227
+ ```
228
+
229
+ Use this to bring a deferred entry online for the first time, or to recover an entry that became degraded mid-session.
230
+
231
+ ### Detecting dynamic-entry support
232
+
233
+ Consumers that work with arbitrary `ToolProvider` implementations can detect dynamic-entry capability with `isDynamicMcpProvider`:
234
+
235
+ ```typescript
236
+ import { isDynamicMcpProvider } from '@octavus/server-sdk';
237
+
238
+ if (isDynamicMcpProvider(provider)) {
239
+ await provider.addEntry('github', githubEntry);
240
+ }
241
+ ```
242
+
243
+ `Computer` always passes this check.
244
+
183
245
  ## Chrome Launch Helper
184
246
 
185
247
  For desktop applications that need to control a browser, `Computer.launchChrome()` launches Chrome with remote debugging enabled:
@@ -384,10 +446,38 @@ class Computer implements ToolProvider {
384
446
  start(): Promise<{ errors: string[] }>;
385
447
  stop(): Promise<void>;
386
448
 
449
+ // Dynamic entries
450
+ addEntry(namespace: string, entry: McpEntry, options?: { deferred?: boolean }): Promise<void>;
451
+ removeEntry(namespace: string): Promise<void>;
452
+ restartEntry(namespace: string): Promise<void>;
453
+ stopEntry(namespace: string): Promise<void>;
454
+
455
+ // Health
456
+ getHealth(): Promise<ComputerHealth>;
457
+ ensureReady(): Promise<EnsureReadyResult>;
458
+ retryDegraded(): Promise<{ recovered: string[]; stillDegraded: string[] }>;
459
+
387
460
  // ToolProvider implementation
388
461
  toolHandlers(): Record<string, ToolHandler>;
389
462
  toolSchemas(): ToolSchema[];
390
463
  }
464
+
465
+ interface ComputerHealth {
466
+ healthy: boolean;
467
+ entries: EntryHealth[];
468
+ totalTools: number;
469
+ }
470
+
471
+ interface EntryHealth {
472
+ name: string;
473
+ healthy: boolean;
474
+ error?: string;
475
+ }
476
+
477
+ interface EnsureReadyResult extends ComputerHealth {
478
+ recovered?: string[];
479
+ failedEntries?: string[];
480
+ }
391
481
  ```
392
482
 
393
483
  ### ComputerConfig
@@ -396,6 +486,8 @@ class Computer implements ToolProvider {
396
486
  interface ComputerConfig {
397
487
  mcpServers: Record<string, McpEntry>;
398
488
  managedProcesses?: { process: ChildProcess }[];
489
+ /** Namespaces to skip during start() - they begin as degraded and can be connected on demand via restartEntry(). */
490
+ deferredEntries?: string[];
399
491
  }
400
492
 
401
493
  type McpEntry = StdioConfig | HttpConfig | ShellConfig;
@@ -31,7 +31,9 @@ type UIMessagePart =
31
31
  | UIOperationPart
32
32
  | UISourcePart
33
33
  | UIFilePart
34
- | UIObjectPart;
34
+ | UIObjectPart
35
+ | UITodoPart
36
+ | UIWorkerPart;
35
37
 
36
38
  // Text content
37
39
  interface UITextPart {
@@ -107,6 +109,31 @@ interface UIObjectPart {
107
109
  error?: string;
108
110
  thread?: string;
109
111
  }
112
+
113
+ // Structured task list (when the agent uses octavus_todo_write)
114
+ interface UITodoPart {
115
+ type: 'todo';
116
+ todos: {
117
+ id: string;
118
+ content: string;
119
+ status: 'pending' | 'in_progress' | 'completed' | 'cancelled';
120
+ }[];
121
+ status: 'streaming' | 'done';
122
+ thread?: string;
123
+ }
124
+
125
+ // Sub-agent execution container (when an agent invokes a worker)
126
+ interface UIWorkerPart {
127
+ type: 'worker';
128
+ workerId: string;
129
+ workerSlug: string;
130
+ description?: string;
131
+ input?: Record<string, unknown>;
132
+ parts: UIMessagePart[]; // Nested parts from the worker (excluding nested workers)
133
+ output?: unknown;
134
+ error?: string;
135
+ status: 'running' | 'done' | 'error';
136
+ }
110
137
  ```
111
138
 
112
139
  ## Sending Messages
@@ -285,11 +285,17 @@ The `file` type is a built-in type representing uploaded files. Use `file[]` for
285
285
 
286
286
  ## Supported File Types
287
287
 
288
- | Type | Media Types |
289
- | --------- | -------------------------------------------------------------------- |
290
- | Images | `image/jpeg`, `image/png`, `image/gif`, `image/webp` |
291
- | Video | `video/mp4`, `video/webm`, `video/quicktime`, `video/mpeg` |
292
- | Documents | `application/pdf`, `text/plain`, `text/markdown`, `application/json` |
288
+ | Type | Media Types |
289
+ | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
290
+ | Images | `image/jpeg`, `image/png`, `image/gif`, `image/webp` |
291
+ | Video | `video/mp4`, `video/webm`, `video/quicktime`, `video/mpeg` |
292
+ | Documents | `application/pdf`, `text/plain`, `text/markdown`, `text/csv`, `application/json` |
293
+ | Office documents | `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (`.docx`), `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` (`.xlsx`), `application/vnd.openxmlformats-officedocument.presentationml.presentation` (`.pptx`), `application/msword` (`.doc`), `application/vnd.ms-excel` (`.xls`), `application/vnd.ms-powerpoint` (`.ppt`) |
294
+
295
+ Images, video, PDFs, and text-based formats are sent directly to the model as
296
+ file parts. Office documents are not natively readable by LLM providers, so
297
+ they are surfaced to the agent as presigned download URLs - the agent fetches
298
+ and parses them with code or skills (e.g. via a sandboxed computer).
293
299
 
294
300
  ## File Limits
295
301
 
@@ -90,6 +90,7 @@ agent:
90
90
  skills: [qr-code] # Enable skills
91
91
  imageModel: google/gemini-2.5-flash-image # Enable image generation
92
92
  webSearch: true # Enable web search
93
+ todoList: true # Enable structured task tracking
93
94
  agentic: true # Allow multiple tool calls
94
95
  thinking: medium # Extended reasoning
95
96
 
@@ -5,7 +5,7 @@ description: Using Octavus skills for code execution and specialized capabilitie
5
5
 
6
6
  # Skills
7
7
 
8
- Skills are knowledge packages that enable agents to execute code and generate files in isolated sandbox environments. Unlike external tools (which you implement in your backend), skills are self-contained packages with documentation and scripts that run in secure sandboxes.
8
+ Skills are knowledge packages that enable agents to execute code and generate files. Unlike external tools (which you implement in your backend), skills are self-contained packages with documentation and scripts. By default, skills run in isolated sandbox environments, but they can also run directly on the agent's computer.
9
9
 
10
10
  ## Overview
11
11
 
@@ -15,8 +15,8 @@ Octavus Skills provide **provider-agnostic** code execution. They work with any
15
15
 
16
16
  1. **Skill Definition**: Skills are defined in the protocol's `skills:` section
17
17
  2. **Skill Resolution**: Skills are resolved from available sources (see below)
18
- 3. **Sandbox Execution**: When a skill is used, code runs in an isolated sandbox environment
19
- 4. **File Generation**: Files saved to `/output/` are automatically captured and made available for download
18
+ 3. **Execution**: Code runs in an isolated sandbox (default) or on the agent's computer
19
+ 4. **File Generation**: Files saved to `/output/` are automatically captured and made available for download (sandbox skills)
20
20
 
21
21
  ### Skill Sources
22
22
 
@@ -49,6 +49,7 @@ skills:
49
49
  | ------------- | -------- | ------------------------------------------------------------------------------------- |
50
50
  | `display` | No | How to show in UI: `hidden`, `name`, `description`, `stream` (default: `description`) |
51
51
  | `description` | No | Custom description shown to users (overrides skill's built-in description) |
52
+ | `execution` | No | Where the skill runs: `sandbox` (default) or `device` |
52
53
 
53
54
  ### Display Modes
54
55
 
@@ -107,19 +108,66 @@ This also works for named threads in interactive agents, allowing different thre
107
108
 
108
109
  When skills are enabled, the LLM has access to these tools:
109
110
 
110
- | Tool | Purpose | Availability |
111
- | -------------------- | --------------------------------------- | -------------------- |
112
- | `octavus_skill_read` | Read skill documentation (SKILL.md) | All skills |
113
- | `octavus_skill_list` | List available scripts in a skill | All skills |
114
- | `octavus_skill_run` | Execute a pre-built script from a skill | All skills |
115
- | `octavus_code_run` | Execute arbitrary Python/Bash code | Standard skills only |
116
- | `octavus_file_write` | Create files in the sandbox | Standard skills only |
117
- | `octavus_file_read` | Read files from the sandbox | Standard skills only |
111
+ | Tool | Purpose | Availability |
112
+ | --------------------- | ----------------------------------------------- | ------------------------------ |
113
+ | `octavus_skill_read` | Read skill documentation (SKILL.md) | All skills |
114
+ | `octavus_skill_list` | List available scripts in a skill | All skills |
115
+ | `octavus_skill_run` | Execute a pre-built script from a skill | All skills |
116
+ | `octavus_skill_setup` | Install a skill on the device for file browsing | Device skills only |
117
+ | `octavus_code_run` | Execute arbitrary Python/Bash code | Sandbox skills (standard) only |
118
+ | `octavus_file_write` | Create files in the sandbox | Sandbox skills (standard) only |
119
+ | `octavus_file_read` | Read files from the sandbox | Sandbox skills (standard) only |
118
120
 
119
121
  The LLM learns about available skills through system prompt injection and can use these tools to interact with skills.
120
122
 
121
123
  Skills that have [secrets](#skill-secrets) configured run in **secure mode**, where only `octavus_skill_read`, `octavus_skill_list`, and `octavus_skill_run` are available. See [Skill Secrets](#skill-secrets) below.
122
124
 
125
+ ## Device Execution
126
+
127
+ By default, skills run in an isolated sandbox. When `execution: device` is set, the skill runs on the agent's computer (VM or desktop) instead.
128
+
129
+ ```yaml
130
+ skills:
131
+ deploy-tool:
132
+ display: description
133
+ description: Deploy applications to production
134
+ execution: device
135
+ qr-code:
136
+ display: description
137
+ description: Generating QR codes
138
+ # execution defaults to sandbox
139
+ ```
140
+
141
+ ### How Device Skills Work
142
+
143
+ Device skills are installed on the agent's computer so the agent can browse their files and run their scripts directly. After attaching a skill via integrations, the agent uses `octavus_skill_setup` to install it on the device. Once installed, the agent can:
144
+
145
+ - Read the skill's documentation with `octavus_skill_read`
146
+ - List available scripts with `octavus_skill_list`
147
+ - Run pre-built scripts with `octavus_skill_run`
148
+
149
+ The generic workspace tools (`octavus_code_run`, `octavus_file_write`, `octavus_file_read`) are **not available** for device skills. Instead, the agent uses the device's own shell and filesystem MCP servers to interact with files and run commands.
150
+
151
+ ### Sandbox vs Device Skills
152
+
153
+ | Aspect | Sandbox (default) | Device |
154
+ | ------------------- | ---------------------------------- | ------------------------------------------------------ |
155
+ | **Environment** | Isolated sandbox | Agent's computer (VM or desktop) |
156
+ | **Available tools** | All 6 skill tools | `skill_read`, `skill_list`, `skill_run`, `skill_setup` |
157
+ | **File access** | Via `octavus_file_read/write` | Via device filesystem MCP |
158
+ | **Code execution** | Via `octavus_code_run` | Via device shell MCP |
159
+ | **Isolation** | Fully sandboxed | Runs alongside other device processes |
160
+ | **File output** | `/output/` directory auto-captured | Files written to device filesystem |
161
+
162
+ ### When to Use Device Execution
163
+
164
+ Use `execution: device` when the skill needs to:
165
+
166
+ - Access the agent's local filesystem or running processes
167
+ - Use tools or CLIs installed on the device
168
+ - Interact with services running on the device
169
+ - Persist files beyond a single execution cycle
170
+
123
171
  ## Example: QR Code Generation
124
172
 
125
173
  ```yaml
@@ -297,14 +345,14 @@ skills:
297
345
 
298
346
  ## Comparison: Skills vs Tools vs Provider Options
299
347
 
300
- | Feature | Octavus Skills | External Tools | Provider Tools/Skills |
301
- | ------------------ | ----------------- | ------------------- | --------------------- |
302
- | **Execution** | Isolated sandbox | Your backend | Provider servers |
303
- | **Provider** | Any (agnostic) | N/A | Provider-specific |
304
- | **Code Execution** | Yes | No | Yes (provider tools) |
305
- | **File Output** | Yes | No | Yes (provider skills) |
306
- | **Implementation** | Skill packages | Your code | Built-in |
307
- | **Cost** | Sandbox + LLM API | Your infrastructure | Included in API |
348
+ | Feature | Octavus Skills | External Tools | Provider Tools/Skills |
349
+ | ------------------ | --------------------------- | ------------------- | --------------------- |
350
+ | **Execution** | Sandbox or agent's computer | Your backend | Provider servers |
351
+ | **Provider** | Any (agnostic) | N/A | Provider-specific |
352
+ | **Code Execution** | Yes | No | Yes (provider tools) |
353
+ | **File Output** | Yes | No | Yes (provider skills) |
354
+ | **Implementation** | Skill packages | Your code | Built-in |
355
+ | **Cost** | Sandbox + LLM API | Your infrastructure | Included in API |
308
356
 
309
357
  ## Uploading Custom Skills
310
358
 
@@ -343,9 +391,21 @@ agent:
343
391
  skills: [my-skill]
344
392
  ```
345
393
 
394
+ ## On-Demand Skills
395
+
396
+ On-demand skills (`onDemandSkills`) also support the `execution` field:
397
+
398
+ ```yaml
399
+ onDemandSkills:
400
+ display: description
401
+ execution: device
402
+ ```
403
+
404
+ When `execution: device` is set on the on-demand skills declaration, any skill attached at runtime via integrations runs on the agent's computer instead of in a sandbox.
405
+
346
406
  ## Sandbox Timeout
347
407
 
348
- The default sandbox timeout is 5 minutes. You can configure a custom timeout using `sandboxTimeout` in the agent config or on individual `start-thread` blocks:
408
+ The default sandbox timeout is 5 minutes (applies to sandbox skills only). You can configure a custom timeout using `sandboxTimeout` in the agent config or on individual `start-thread` blocks:
349
409
 
350
410
  ```yaml
351
411
  # Agent-level timeout (applies to main thread)
@@ -436,7 +496,7 @@ For standard skills (without secrets), scripts receive input as CLI arguments. F
436
496
 
437
497
  ## Security
438
498
 
439
- Skills run in isolated sandbox environments:
499
+ Sandbox skills run in isolated environments:
440
500
 
441
501
  - **No network access** (unless explicitly configured)
442
502
  - **No persistent storage** (sandbox destroyed after each `next-message` execution)
@@ -444,6 +504,8 @@ Skills run in isolated sandbox environments:
444
504
  - **Time limits** enforced (5-minute default, configurable via `sandboxTimeout`)
445
505
  - **Secret redaction** - output from secure skills is automatically scanned for secret values
446
506
 
507
+ Device skills run on the agent's computer and share its environment. They do not have sandbox isolation but benefit from restricted tool access (only slug-bearing tools are available).
508
+
447
509
  ## Next Steps
448
510
 
449
511
  - [Agent Config](/docs/protocol/agent-config) - Configuring skills in agent settings
@@ -47,11 +47,11 @@ Specify models in `provider/model-id` format. Any model supported by the provide
47
47
 
48
48
  ### Supported Providers
49
49
 
50
- | Provider | Format | Examples |
51
- | --------- | ---------------------- | -------------------------------------------------------------------- |
52
- | Anthropic | `anthropic/{model-id}` | `claude-opus-4-5`, `claude-sonnet-4-5`, `claude-haiku-4-5` |
53
- | Google | `google/{model-id}` | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-flash` |
54
- | OpenAI | `openai/{model-id}` | `gpt-5`, `gpt-4o`, `o4-mini`, `o3`, `o3-mini`, `o1` |
50
+ | Provider | Format | Examples |
51
+ | --------- | ---------------------- | -------------------------------------------------------------------------------------------------- |
52
+ | Anthropic | `anthropic/{model-id}` | `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-sonnet-4-5`, `claude-haiku-4-5` |
53
+ | Google | `google/{model-id}` | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-flash` |
54
+ | OpenAI | `openai/{model-id}` | `gpt-5`, `gpt-4o`, `o4-mini`, `o3`, `o3-mini`, `o1` |
55
55
 
56
56
  ### Examples
57
57
 
@@ -225,14 +225,28 @@ agent:
225
225
  thinking: medium # low | medium | high
226
226
  ```
227
227
 
228
- | Level | Token Budget | Use Case |
229
- | -------- | ------------ | ------------------- |
230
- | `low` | ~5,000 | Simple reasoning |
231
- | `medium` | ~10,000 | Moderate complexity |
232
- | `high` | ~20,000 | Complex analysis |
228
+ | Level | Use Case |
229
+ | -------- | ------------------- |
230
+ | `low` | Simple reasoning |
231
+ | `medium` | Moderate complexity |
232
+ | `high` | Complex analysis |
233
233
 
234
234
  Thinking content streams to the UI and can be displayed to users.
235
235
 
236
+ ### How levels are applied
237
+
238
+ Each provider translates `thinking` into its own reasoning controls:
239
+
240
+ | Provider | Level mapping |
241
+ | -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |
242
+ | Anthropic 4.6+ (`claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-6`) | Adaptive thinking - the model decides how much to reason, guided by `effort: low / medium / high` |
243
+ | Anthropic older (4.5 and earlier) | Fixed token budgets: `low` ~5,000, `medium` ~10,000, `high` ~20,000 |
244
+ | OpenAI (GPT-5.x, o-series) | `reasoningEffort: low / medium / high` |
245
+ | Google (Gemini 3.x) | `thinkingLevel: low / high` (`medium` rounds up to `high`) |
246
+ | Google (Gemini 1.x / 2.x) | Token budgets: `low` 1,024, `medium` 8,192, `high` 24,576 |
247
+ | OpenRouter | Unified `reasoning.max_tokens` (translated upstream) |
248
+ | Vercel AI Gateway | Forwards the underlying provider's options |
249
+
236
250
  ## Prompt Caching
237
251
 
238
252
  Providers charge less for tokens served from their prompt cache (often 10% of the uncached rate). Octavus exposes a single `cache` field that picks the right retention policy per provider, so the stable prefix of your agent - tools, system prompt, and historical messages - gets billed at the cache-read rate on repeat requests.
@@ -400,6 +414,28 @@ Use cases:
400
414
  - Fact verification and documentation lookups
401
415
  - Any information that may have changed since the model's training
402
416
 
417
+ ## TODO List
418
+
419
+ Enable the LLM to maintain a structured task list while it works:
420
+
421
+ ```yaml
422
+ agent:
423
+ model: anthropic/claude-sonnet-4-5
424
+ system: system
425
+ todoList: true
426
+ agentic: true
427
+ ```
428
+
429
+ When `todoList` is enabled, the `octavus_todo_write` tool becomes available. The LLM creates and updates a list of items - each with `id`, `content`, and `status` (`pending`, `in_progress`, `completed`, `cancelled`) - and the platform emits a `todo-update` stream event with the resolved snapshot. The Client SDK accumulates updates into a single `UITodoPart` per assistant message, so consumers render an evolving "Plan" card without managing state themselves.
430
+
431
+ The list persists across messages: the LLM can use `merge=true` to update items by id (sending only the changed fields), or `merge=false` to replace the list entirely.
432
+
433
+ Use cases:
434
+
435
+ - Multi-step tasks where the user benefits from seeing progress
436
+ - Long-running agentic loops that should communicate intent
437
+ - Workflows where the agent plans before acting
438
+
403
439
  ## Temperature
404
440
 
405
441
  Control response randomness:
@@ -460,9 +496,10 @@ handlers:
460
496
  references: [escalation-policy] # Thread-specific references
461
497
  imageModel: google/gemini-2.5-flash-image # Thread-specific image model
462
498
  webSearch: true # Thread-specific web search
499
+ todoList: true # Thread-specific task list
463
500
  ```
464
501
 
465
- Each thread can have its own model, backup model, cache mode, MCP servers, skills, references, image model, and web search setting. Skills must be defined in the protocol's `skills:` section. References must exist in the agent's `references/` directory. Workers use this same pattern since they don't have a global `agent:` section.
502
+ Each thread can have its own model, backup model, cache mode, MCP servers, skills, references, image model, web search setting, and task list setting. Skills must be defined in the protocol's `skills:` section. References must exist in the agent's `references/` directory. Workers use this same pattern since they don't have a global `agent:` section.
466
503
 
467
504
  ## Full Example
468
505
 
@@ -520,6 +557,7 @@ agent:
520
557
  skills: [qr-code] # Octavus skills
521
558
  references: [support-policies] # On-demand context
522
559
  webSearch: true # Built-in web search
560
+ todoList: true # Structured task tracking
523
561
  agentic: true
524
562
  maxSteps: 10
525
563
  thinking: medium
@@ -66,6 +66,24 @@ steps:
66
66
  maxSteps: 10
67
67
  ```
68
68
 
69
+ ### Execution Mode
70
+
71
+ The `execution` field is set at the skill definition level and applies to all threads that use the skill:
72
+
73
+ ```yaml
74
+ skills:
75
+ deploy-tool:
76
+ display: description
77
+ description: Deploy applications
78
+ execution: device # All threads using this skill run it on the device
79
+ qr-code:
80
+ display: description
81
+ description: Generating QR codes
82
+ # Defaults to sandbox execution
83
+ ```
84
+
85
+ You don't set `execution` per-thread - a skill's execution mode is consistent wherever it's used.
86
+
69
87
  ### Match Skills to Use Cases
70
88
 
71
89
  Different threads can have different skills. Define all skills at the protocol level, then scope them to each thread:
@@ -311,15 +329,15 @@ Pattern:
311
329
 
312
330
  When a skill declares secrets and an organization configures them, the skill runs in secure mode with its own isolated sandbox.
313
331
 
314
- ### Standard vs Secure Skills
332
+ ### Standard vs Secure vs Device Skills
315
333
 
316
- | Aspect | Standard Skills | Secure Skills |
317
- | ------------------- | --------------------------------- | --------------------------------------------------- |
318
- | **Sandbox** | Shared with other standard skills | Isolated (one per skill) |
319
- | **Available tools** | All 6 skill tools | `skill_read`, `skill_list`, `skill_run` only |
320
- | **Script input** | CLI arguments via `args` | JSON via stdin (use `input` parameter) |
321
- | **Environment** | No secrets | Secrets as env vars |
322
- | **Output** | Raw stdout/stderr | Redacted (secret values replaced with `[REDACTED]`) |
334
+ | Aspect | Standard Skills | Secure Skills | Device Skills |
335
+ | ------------------- | ------------------------ | --------------------------------------------------- | ------------------------------------------------------ |
336
+ | **Environment** | Shared sandbox | Isolated sandbox (one per skill) | Agent's computer (VM or desktop) |
337
+ | **Available tools** | All 6 skill tools | `skill_read`, `skill_list`, `skill_run` only | `skill_read`, `skill_list`, `skill_run`, `skill_setup` |
338
+ | **Script input** | CLI arguments via `args` | JSON via stdin (use `input` parameter) | CLI arguments via `args` |
339
+ | **Secrets** | No secrets | Secrets as env vars | No secrets |
340
+ | **Output** | Raw stdout/stderr | Redacted (secret values replaced with `[REDACTED]`) | Raw stdout/stderr |
323
341
 
324
342
  ### Writing Scripts for Secure Skills
325
343
 
@@ -416,7 +416,9 @@ steps:
416
416
  maxSteps: 10
417
417
  ```
418
418
 
419
- Workers define their own skills independently -- they don't inherit skills from a parent interactive agent. Each thread gets its own sandbox scoped to only its listed skills.
419
+ Workers define their own skills independently - they don't inherit skills from a parent interactive agent. Each thread gets its own sandbox scoped to only its listed skills.
420
+
421
+ Skills with `execution: device` work the same way in workers as in interactive agents - the skill runs on the agent's computer. Workers resolve their device execution independently, so a worker can use device skills even if the parent agent does not.
420
422
 
421
423
  See [Skills](/docs/protocol/skills) for full documentation.
422
424
 
@@ -33,11 +33,13 @@ mcpServers:
33
33
 
34
34
  ### Fields
35
35
 
36
- | Field | Required | Description |
37
- | ------------- | -------- | ------------------------------------------------------------------------------------- |
38
- | `description` | Yes | What the MCP server provides |
39
- | `source` | Yes | `remote` (platform-managed) or `device` (consumer-provided) |
40
- | `display` | No | How tool calls appear in UI: `hidden`, `name`, `description` (default: `description`) |
36
+ | Field | Required | Description |
37
+ | ------------- | -------- | ------------------------------------------------------------------------------------------------------- |
38
+ | `description` | Yes | What the MCP server provides |
39
+ | `source` | Yes | `remote` (platform-managed) or `device` (consumer-provided) |
40
+ | `display` | No | How tool calls appear in UI: `hidden`, `name`, `description` (default: `description`) |
41
+ | `connection` | No | When to connect: `eager` or `lazy` (default: `lazy`). Remote only. |
42
+ | `execution` | No | Where the MCP process runs: `sandbox` (default) or `device`. See [Device Execution](#device-execution). |
41
43
 
42
44
  ### Display Modes
43
45
 
@@ -134,6 +136,34 @@ Configuration happens in the Octavus platform UI:
134
136
  2. The server's slug must match the namespace in your protocol
135
137
  3. The platform connects, discovers tools, and makes them available to the agent
136
138
 
139
+ ### Connection Modes
140
+
141
+ The `connection` field controls when the platform connects to a remote MCP server:
142
+
143
+ | Mode | Behavior |
144
+ | ------- | ---------------------------------------------------------------------------------------------------------------------- |
145
+ | `lazy` | (default) The agent activates integrations on demand at runtime. The agent starts responding immediately. |
146
+ | `eager` | The platform connects and discovers tools before the first LLM request. Tools are guaranteed available from message 1. |
147
+
148
+ ```yaml
149
+ mcpServers:
150
+ sentry:
151
+ source: remote
152
+ connection: eager # Always connected upfront
153
+ display: name
154
+
155
+ notion:
156
+ source: remote
157
+ # connection defaults to lazy - agent activates when needed
158
+ display: description
159
+ ```
160
+
161
+ With **lazy connection** (the default), the agent receives two built-in tools - one for listing available integrations and one for activating them. The agent decides which integrations it needs based on the conversation and activates them on demand. This avoids paying connection latency for integrations the agent doesn't end up using.
162
+
163
+ With **eager connection**, the platform connects to the MCP server before the first LLM request, exactly like a declared tool. Use this when the agent needs the MCP's tools from the very first message.
164
+
165
+ The `connection` field is only valid on `source: remote` - device MCPs (`source: device`) have their own connection mechanism through the server-sdk. The `connection` field is respected for remote MCPs with `execution: device` the same way as sandbox MCPs.
166
+
137
167
  ### Authentication
138
168
 
139
169
  Remote MCP servers support multiple authentication methods:
@@ -147,6 +177,35 @@ Remote MCP servers support multiple authentication methods:
147
177
 
148
178
  Authentication is configured per-project - different projects can connect to the same MCP server with different credentials.
149
179
 
180
+ ## Device Execution
181
+
182
+ The `execution` field controls where a remote MCP server's STDIO process runs. By default (`execution: sandbox`), the process runs in the platform's sandbox. When set to `execution: device`, the STDIO process runs on the agent's computer (VM or desktop) instead.
183
+
184
+ ```yaml
185
+ mcpServers:
186
+ code-tools:
187
+ description: Code analysis and refactoring tools
188
+ source: remote
189
+ execution: device # STDIO process runs on the agent's computer
190
+ display: name
191
+
192
+ sentry:
193
+ description: Error tracking
194
+ source: remote
195
+ # execution defaults to sandbox - runs in the platform
196
+ display: name
197
+ ```
198
+
199
+ ### When to Use
200
+
201
+ Use `execution: device` when the MCP server needs access to the agent's local environment - for example, tools that read from the local filesystem, interact with running processes, or need CLIs installed on the device.
202
+
203
+ ### Rules
204
+
205
+ - `execution` is only meaningful for `source: remote` MCPs that use STDIO transport. HTTP-transport remote MCPs always connect from the platform regardless of the `execution` setting.
206
+ - `execution: device` is **invalid** on `source: device` MCPs (they already run on the device by definition). Using it produces a validation error.
207
+ - The `connection` field (`eager` or `lazy`) is respected for device-executed MCPs the same way as sandbox-executed MCPs.
208
+
150
209
  ## Device MCP Servers
151
210
 
152
211
  Device MCP servers (`source: device`) run on the consumer's machine. The consumer provides the MCP tools via the `@octavus/computer` package (or any `ToolProvider` implementation) through the server-sdk.
@@ -224,10 +283,13 @@ onDemandMcpServers:
224
283
  remote:
225
284
  description: Additional connected integrations
226
285
  display: name
286
+ execution: device # on-demand MCPs run on the agent's computer
227
287
  contextRetention:
228
288
  toolResults: { retainLast: 5 }
229
289
  ```
230
290
 
291
+ On-demand MCP definitions also support the `execution` field. When set, all MCPs matched by that on-demand source inherit the execution mode.
292
+
231
293
  ### Scope-level opt-in
232
294
 
233
295
  The agent and individual `start-thread` blocks each choose whether to pick up on-demand MCPs, by listing the sources they want:
@@ -295,6 +357,7 @@ mcpServers:
295
357
  figma:
296
358
  description: Figma design tool integration
297
359
  source: remote
360
+ connection: eager
298
361
  display: description
299
362
  sentry:
300
363
  description: Error tracking and debugging
@@ -355,10 +418,12 @@ mcpServers:
355
418
  figma:
356
419
  description: Figma design tool integration
357
420
  source: remote
421
+ connection: eager # Need design tools from message 1
358
422
  display: description
359
423
  sentry:
360
424
  description: Error tracking and debugging
361
425
  source: remote
426
+ # Lazy (default) - agent activates when debugging is needed
362
427
  display: name
363
428
 
364
429
  tools: