npm - @octavus/docs - Versions diffs - 3.1.0 → 3.3.0 - Mend

@octavus/docs 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/content/02-server-sdk/08-computer.md +92 -0
package/content/03-client-sdk/02-messages.md +28 -1
package/content/03-client-sdk/08-file-uploads.md +11 -5
package/content/04-protocol/01-overview.md +1 -0
package/content/04-protocol/05-skills.md +83 -21
package/content/04-protocol/07-agent-config.md +49 -11
package/content/04-protocol/09-skills-advanced.md +26 -8
package/content/04-protocol/11-workers.md +3 -1
package/content/04-protocol/13-mcp-servers.md +70 -5
package/dist/{chunk-PD34BHI2.js → chunk-WYXUBTV7.js} +27 -27
package/dist/chunk-WYXUBTV7.js.map +1 -0
package/dist/content.js +1 -1
package/dist/docs.json +13 -13
package/dist/index.js +1 -1
package/dist/search-index.json +1 -1
package/dist/search.js +1 -1
package/dist/search.js.map +1 -1
package/dist/sections.json +13 -13
package/package.json +1 -1
package/dist/chunk-PD34BHI2.js.map +0 -1

package/content/02-server-sdk/08-computer.md CHANGED Viewed

@@ -180,6 +180,68 @@ await computer.stop();
 Always call `stop()` when the session ends to clean up MCP subprocesses. For managed processes (like Chrome), pass them in the config for automatic cleanup.
+## Dynamic Entries
+You can add or remove MCP entries on a running `Computer` after `start()` has returned. This is useful when MCP configurations arrive after construction - for example, when a session-manager receives per-session entries from a dispatch payload and wants to wire them into the existing computer instead of rebuilding it.
+### `addEntry(namespace, entry, options?)`
+Registers a new MCP entry under `namespace`. By default, connects immediately:
+```typescript
+await computer.addEntry(
+  'github',
+  Computer.stdio('@modelcontextprotocol/server-github', [], {
+    env: { GITHUB_PERSONAL_ACCESS_TOKEN: process.env.GH_TOKEN! },
+  }),
+);
+```
+Pass `{ deferred: true }` to register the entry without connecting. The entry starts in a degraded state and connects on the next `restartEntry(namespace)` call - useful for lazy MCPs the agent activates on demand:
+```typescript
+await computer.addEntry('github', githubEntry, { deferred: true });
+// Later, when the agent decides it needs GitHub:
+await computer.restartEntry('github');
+```
+`addEntry` throws if the namespace already exists. To replace an entry, call `removeEntry` first.
+If the immediate connection fails, `addEntry` does not throw - the entry is registered as degraded with the error message attached. Inspect via `getHealth()` or `restartEntry()` to retry.
+### `removeEntry(namespace)`
+Closes the entry's connection (if any) and drops it from the configuration. No-op when the namespace doesn't exist:
+```typescript
+await computer.removeEntry('github');
+```
+### `restartEntry(namespace)`
+Closes the existing connection (if any) and reconnects with the current configuration:
+```typescript
+await computer.restartEntry('github');
+```
+Use this to bring a deferred entry online for the first time, or to recover an entry that became degraded mid-session.
+### Detecting dynamic-entry support
+Consumers that work with arbitrary `ToolProvider` implementations can detect dynamic-entry capability with `isDynamicMcpProvider`:
+```typescript
+import { isDynamicMcpProvider } from '@octavus/server-sdk';
+if (isDynamicMcpProvider(provider)) {
+  await provider.addEntry('github', githubEntry);
+}
+```
+`Computer` always passes this check.
 ## Chrome Launch Helper
 For desktop applications that need to control a browser, `Computer.launchChrome()` launches Chrome with remote debugging enabled:
@@ -384,10 +446,38 @@ class Computer implements ToolProvider {
   start(): Promise<{ errors: string[] }>;
   stop(): Promise<void>;
+  // Dynamic entries
+  addEntry(namespace: string, entry: McpEntry, options?: { deferred?: boolean }): Promise<void>;
+  removeEntry(namespace: string): Promise<void>;
+  restartEntry(namespace: string): Promise<void>;
+  stopEntry(namespace: string): Promise<void>;
+  // Health
+  getHealth(): Promise<ComputerHealth>;
+  ensureReady(): Promise<EnsureReadyResult>;
+  retryDegraded(): Promise<{ recovered: string[]; stillDegraded: string[] }>;
   // ToolProvider implementation
   toolHandlers(): Record<string, ToolHandler>;
   toolSchemas(): ToolSchema[];
 }
+interface ComputerHealth {
+  healthy: boolean;
+  entries: EntryHealth[];
+  totalTools: number;
+}
+interface EntryHealth {
+  name: string;
+  healthy: boolean;
+  error?: string;
+}
+interface EnsureReadyResult extends ComputerHealth {
+  recovered?: string[];
+  failedEntries?: string[];
+}
 ```
 ### ComputerConfig
@@ -396,6 +486,8 @@ class Computer implements ToolProvider {
 interface ComputerConfig {
   mcpServers: Record<string, McpEntry>;
   managedProcesses?: { process: ChildProcess }[];
+  /** Namespaces to skip during start() - they begin as degraded and can be connected on demand via restartEntry(). */
+  deferredEntries?: string[];
 }
 type McpEntry = StdioConfig | HttpConfig | ShellConfig;

package/content/03-client-sdk/02-messages.md CHANGED Viewed

@@ -31,7 +31,9 @@ type UIMessagePart =
   | UIOperationPart
   | UISourcePart
   | UIFilePart
-  | UIObjectPart;
+  | UIObjectPart
+  | UITodoPart
+  | UIWorkerPart;
 // Text content
 interface UITextPart {
@@ -107,6 +109,31 @@ interface UIObjectPart {
   error?: string;
   thread?: string;
 }
+// Structured task list (when the agent uses octavus_todo_write)
+interface UITodoPart {
+  type: 'todo';
+  todos: {
+    id: string;
+    content: string;
+    status: 'pending' | 'in_progress' | 'completed' | 'cancelled';
+  }[];
+  status: 'streaming' | 'done';
+  thread?: string;
+}
+// Sub-agent execution container (when an agent invokes a worker)
+interface UIWorkerPart {
+  type: 'worker';
+  workerId: string;
+  workerSlug: string;
+  description?: string;
+  input?: Record<string, unknown>;
+  parts: UIMessagePart[]; // Nested parts from the worker (excluding nested workers)
+  output?: unknown;
+  error?: string;
+  status: 'running' | 'done' | 'error';
+}
 ```
 ## Sending Messages

package/content/03-client-sdk/08-file-uploads.md CHANGED Viewed

@@ -285,11 +285,17 @@ The `file` type is a built-in type representing uploaded files. Use `file[]` for
 ## Supported File Types
-| Type      | Media Types                                                          |
-| --------- | -------------------------------------------------------------------- |
-| Images    | `image/jpeg`, `image/png`, `image/gif`, `image/webp`                 |
-| Video     | `video/mp4`, `video/webm`, `video/quicktime`, `video/mpeg`           |
-| Documents | `application/pdf`, `text/plain`, `text/markdown`, `application/json` |
+| Type             | Media Types                                                                                                                                                                                                                                                                                                                                                             |
+| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Images           | `image/jpeg`, `image/png`, `image/gif`, `image/webp`                                                                                                                                                                                                                                                                                                                    |
+| Video            | `video/mp4`, `video/webm`, `video/quicktime`, `video/mpeg`                                                                                                                                                                                                                                                                                                              |
+| Documents        | `application/pdf`, `text/plain`, `text/markdown`, `text/csv`, `application/json`                                                                                                                                                                                                                                                                                        |
+| Office documents | `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (`.docx`), `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` (`.xlsx`), `application/vnd.openxmlformats-officedocument.presentationml.presentation` (`.pptx`), `application/msword` (`.doc`), `application/vnd.ms-excel` (`.xls`), `application/vnd.ms-powerpoint` (`.ppt`) |
+Images, video, PDFs, and text-based formats are sent directly to the model as
+file parts. Office documents are not natively readable by LLM providers, so
+they are surfaced to the agent as presigned download URLs - the agent fetches
+and parses them with code or skills (e.g. via a sandboxed computer).
 ## File Limits

package/content/04-protocol/01-overview.md CHANGED Viewed

@@ -90,6 +90,7 @@ agent:
   skills: [qr-code] # Enable skills
   imageModel: google/gemini-2.5-flash-image # Enable image generation
   webSearch: true # Enable web search
+  todoList: true # Enable structured task tracking
   agentic: true # Allow multiple tool calls
   thinking: medium # Extended reasoning

package/content/04-protocol/05-skills.md CHANGED Viewed

@@ -5,7 +5,7 @@ description: Using Octavus skills for code execution and specialized capabilitie
 # Skills
-Skills are knowledge packages that enable agents to execute code and generate files in isolated sandbox environments. Unlike external tools (which you implement in your backend), skills are self-contained packages with documentation and scripts that run in secure sandboxes.
+Skills are knowledge packages that enable agents to execute code and generate files. Unlike external tools (which you implement in your backend), skills are self-contained packages with documentation and scripts. By default, skills run in isolated sandbox environments, but they can also run directly on the agent's computer.
 ## Overview
@@ -15,8 +15,8 @@ Octavus Skills provide **provider-agnostic** code execution. They work with any
 1. **Skill Definition**: Skills are defined in the protocol's `skills:` section
 2. **Skill Resolution**: Skills are resolved from available sources (see below)
-3. **Sandbox Execution**: When a skill is used, code runs in an isolated sandbox environment
-4. **File Generation**: Files saved to `/output/` are automatically captured and made available for download
+3. **Execution**: Code runs in an isolated sandbox (default) or on the agent's computer
+4. **File Generation**: Files saved to `/output/` are automatically captured and made available for download (sandbox skills)
 ### Skill Sources
@@ -49,6 +49,7 @@ skills:
 | ------------- | -------- | ------------------------------------------------------------------------------------- |
 | `display`     | No       | How to show in UI: `hidden`, `name`, `description`, `stream` (default: `description`) |
 | `description` | No       | Custom description shown to users (overrides skill's built-in description)            |
+| `execution`   | No       | Where the skill runs: `sandbox` (default) or `device`                                 |
 ### Display Modes
@@ -107,19 +108,66 @@ This also works for named threads in interactive agents, allowing different thre
 When skills are enabled, the LLM has access to these tools:
-| Tool                 | Purpose                                 | Availability         |
-| -------------------- | --------------------------------------- | -------------------- |
-| `octavus_skill_read` | Read skill documentation (SKILL.md)     | All skills           |
-| `octavus_skill_list` | List available scripts in a skill       | All skills           |
-| `octavus_skill_run`  | Execute a pre-built script from a skill | All skills           |
-| `octavus_code_run`   | Execute arbitrary Python/Bash code      | Standard skills only |
-| `octavus_file_write` | Create files in the sandbox             | Standard skills only |
-| `octavus_file_read`  | Read files from the sandbox             | Standard skills only |
+| Tool                  | Purpose                                         | Availability                   |
+| --------------------- | ----------------------------------------------- | ------------------------------ |
+| `octavus_skill_read`  | Read skill documentation (SKILL.md)             | All skills                     |
+| `octavus_skill_list`  | List available scripts in a skill               | All skills                     |
+| `octavus_skill_run`   | Execute a pre-built script from a skill         | All skills                     |
+| `octavus_skill_setup` | Install a skill on the device for file browsing | Device skills only             |
+| `octavus_code_run`    | Execute arbitrary Python/Bash code              | Sandbox skills (standard) only |
+| `octavus_file_write`  | Create files in the sandbox                     | Sandbox skills (standard) only |
+| `octavus_file_read`   | Read files from the sandbox                     | Sandbox skills (standard) only |
 The LLM learns about available skills through system prompt injection and can use these tools to interact with skills.
 Skills that have [secrets](#skill-secrets) configured run in **secure mode**, where only `octavus_skill_read`, `octavus_skill_list`, and `octavus_skill_run` are available. See [Skill Secrets](#skill-secrets) below.
+## Device Execution
+By default, skills run in an isolated sandbox. When `execution: device` is set, the skill runs on the agent's computer (VM or desktop) instead.
+```yaml
+skills:
+  deploy-tool:
+    display: description
+    description: Deploy applications to production
+    execution: device
+  qr-code:
+    display: description
+    description: Generating QR codes
+    # execution defaults to sandbox
+```
+### How Device Skills Work
+Device skills are installed on the agent's computer so the agent can browse their files and run their scripts directly. After attaching a skill via integrations, the agent uses `octavus_skill_setup` to install it on the device. Once installed, the agent can:
+- Read the skill's documentation with `octavus_skill_read`
+- List available scripts with `octavus_skill_list`
+- Run pre-built scripts with `octavus_skill_run`
+The generic workspace tools (`octavus_code_run`, `octavus_file_write`, `octavus_file_read`) are **not available** for device skills. Instead, the agent uses the device's own shell and filesystem MCP servers to interact with files and run commands.
+### Sandbox vs Device Skills
+| Aspect              | Sandbox (default)                  | Device                                                 |
+| ------------------- | ---------------------------------- | ------------------------------------------------------ |
+| **Environment**     | Isolated sandbox                   | Agent's computer (VM or desktop)                       |
+| **Available tools** | All 6 skill tools                  | `skill_read`, `skill_list`, `skill_run`, `skill_setup` |
+| **File access**     | Via `octavus_file_read/write`      | Via device filesystem MCP                              |
+| **Code execution**  | Via `octavus_code_run`             | Via device shell MCP                                   |
+| **Isolation**       | Fully sandboxed                    | Runs alongside other device processes                  |
+| **File output**     | `/output/` directory auto-captured | Files written to device filesystem                     |
+### When to Use Device Execution
+Use `execution: device` when the skill needs to:
+- Access the agent's local filesystem or running processes
+- Use tools or CLIs installed on the device
+- Interact with services running on the device
+- Persist files beyond a single execution cycle
 ## Example: QR Code Generation
 ```yaml
@@ -297,14 +345,14 @@ skills:
 ## Comparison: Skills vs Tools vs Provider Options
-| Feature            | Octavus Skills    | External Tools      | Provider Tools/Skills |
-| ------------------ | ----------------- | ------------------- | --------------------- |
-| **Execution**      | Isolated sandbox  | Your backend        | Provider servers      |
-| **Provider**       | Any (agnostic)    | N/A                 | Provider-specific     |
-| **Code Execution** | Yes               | No                  | Yes (provider tools)  |
-| **File Output**    | Yes               | No                  | Yes (provider skills) |
-| **Implementation** | Skill packages    | Your code           | Built-in              |
-| **Cost**           | Sandbox + LLM API | Your infrastructure | Included in API       |
+| Feature            | Octavus Skills              | External Tools      | Provider Tools/Skills |
+| ------------------ | --------------------------- | ------------------- | --------------------- |
+| **Execution**      | Sandbox or agent's computer | Your backend        | Provider servers      |
+| **Provider**       | Any (agnostic)              | N/A                 | Provider-specific     |
+| **Code Execution** | Yes                         | No                  | Yes (provider tools)  |
+| **File Output**    | Yes                         | No                  | Yes (provider skills) |
+| **Implementation** | Skill packages              | Your code           | Built-in              |
+| **Cost**           | Sandbox + LLM API           | Your infrastructure | Included in API       |
 ## Uploading Custom Skills
@@ -343,9 +391,21 @@ agent:
   skills: [my-skill]
 ```
+## On-Demand Skills
+On-demand skills (`onDemandSkills`) also support the `execution` field:
+```yaml
+onDemandSkills:
+  display: description
+  execution: device
+```
+When `execution: device` is set on the on-demand skills declaration, any skill attached at runtime via integrations runs on the agent's computer instead of in a sandbox.
 ## Sandbox Timeout
-The default sandbox timeout is 5 minutes. You can configure a custom timeout using `sandboxTimeout` in the agent config or on individual `start-thread` blocks:
+The default sandbox timeout is 5 minutes (applies to sandbox skills only). You can configure a custom timeout using `sandboxTimeout` in the agent config or on individual `start-thread` blocks:
 ```yaml
 # Agent-level timeout (applies to main thread)
@@ -436,7 +496,7 @@ For standard skills (without secrets), scripts receive input as CLI arguments. F
 ## Security
-Skills run in isolated sandbox environments:
+Sandbox skills run in isolated environments:
 - **No network access** (unless explicitly configured)
 - **No persistent storage** (sandbox destroyed after each `next-message` execution)
@@ -444,6 +504,8 @@ Skills run in isolated sandbox environments:
 - **Time limits** enforced (5-minute default, configurable via `sandboxTimeout`)
 - **Secret redaction** - output from secure skills is automatically scanned for secret values
+Device skills run on the agent's computer and share its environment. They do not have sandbox isolation but benefit from restricted tool access (only slug-bearing tools are available).
 ## Next Steps
 - [Agent Config](/docs/protocol/agent-config) - Configuring skills in agent settings

package/content/04-protocol/07-agent-config.md CHANGED Viewed

@@ -47,11 +47,11 @@ Specify models in `provider/model-id` format. Any model supported by the provide
 ### Supported Providers
-| Provider  | Format                 | Examples                                                             |
-| --------- | ---------------------- | -------------------------------------------------------------------- |
-| Anthropic | `anthropic/{model-id}` | `claude-opus-4-5`, `claude-sonnet-4-5`, `claude-haiku-4-5`           |
-| Google    | `google/{model-id}`    | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-flash` |
-| OpenAI    | `openai/{model-id}`    | `gpt-5`, `gpt-4o`, `o4-mini`, `o3`, `o3-mini`, `o1`                  |
+| Provider  | Format                 | Examples                                                                                           |
+| --------- | ---------------------- | -------------------------------------------------------------------------------------------------- |
+| Anthropic | `anthropic/{model-id}` | `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-sonnet-4-5`, `claude-haiku-4-5` |
+| Google    | `google/{model-id}`    | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-flash`                               |
+| OpenAI    | `openai/{model-id}`    | `gpt-5`, `gpt-4o`, `o4-mini`, `o3`, `o3-mini`, `o1`                                                |
 ### Examples
@@ -225,14 +225,28 @@ agent:
   thinking: medium # low | medium | high
 ```
-| Level    | Token Budget | Use Case            |
-| -------- | ------------ | ------------------- |
-| `low`    | ~5,000       | Simple reasoning    |
-| `medium` | ~10,000      | Moderate complexity |
-| `high`   | ~20,000      | Complex analysis    |
+| Level    | Use Case            |
+| -------- | ------------------- |
+| `low`    | Simple reasoning    |
+| `medium` | Moderate complexity |
+| `high`   | Complex analysis    |
 Thinking content streams to the UI and can be displayed to users.
+### How levels are applied
+Each provider translates `thinking` into its own reasoning controls:
+| Provider                                                                   | Level mapping                                                                                     |
+| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |
+| Anthropic 4.6+ (`claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-6`) | Adaptive thinking - the model decides how much to reason, guided by `effort: low / medium / high` |
+| Anthropic older (4.5 and earlier)                                          | Fixed token budgets: `low` ~5,000, `medium` ~10,000, `high` ~20,000                               |
+| OpenAI (GPT-5.x, o-series)                                                 | `reasoningEffort: low / medium / high`                                                            |
+| Google (Gemini 3.x)                                                        | `thinkingLevel: low / high` (`medium` rounds up to `high`)                                        |
+| Google (Gemini 1.x / 2.x)                                                  | Token budgets: `low` 1,024, `medium` 8,192, `high` 24,576                                         |
+| OpenRouter                                                                 | Unified `reasoning.max_tokens` (translated upstream)                                              |
+| Vercel AI Gateway                                                          | Forwards the underlying provider's options                                                        |
 ## Prompt Caching
 Providers charge less for tokens served from their prompt cache (often 10% of the uncached rate). Octavus exposes a single `cache` field that picks the right retention policy per provider, so the stable prefix of your agent - tools, system prompt, and historical messages - gets billed at the cache-read rate on repeat requests.
@@ -400,6 +414,28 @@ Use cases:
 - Fact verification and documentation lookups
 - Any information that may have changed since the model's training
+## TODO List
+Enable the LLM to maintain a structured task list while it works:
+```yaml
+agent:
+  model: anthropic/claude-sonnet-4-5
+  system: system
+  todoList: true
+  agentic: true
+```
+When `todoList` is enabled, the `octavus_todo_write` tool becomes available. The LLM creates and updates a list of items - each with `id`, `content`, and `status` (`pending`, `in_progress`, `completed`, `cancelled`) - and the platform emits a `todo-update` stream event with the resolved snapshot. The Client SDK accumulates updates into a single `UITodoPart` per assistant message, so consumers render an evolving "Plan" card without managing state themselves.
+The list persists across messages: the LLM can use `merge=true` to update items by id (sending only the changed fields), or `merge=false` to replace the list entirely.
+Use cases:
+- Multi-step tasks where the user benefits from seeing progress
+- Long-running agentic loops that should communicate intent
+- Workflows where the agent plans before acting
 ## Temperature
 Control response randomness:
@@ -460,9 +496,10 @@ handlers:
       references: [escalation-policy] # Thread-specific references
       imageModel: google/gemini-2.5-flash-image # Thread-specific image model
       webSearch: true # Thread-specific web search
+      todoList: true # Thread-specific task list
 ```
-Each thread can have its own model, backup model, cache mode, MCP servers, skills, references, image model, and web search setting. Skills must be defined in the protocol's `skills:` section. References must exist in the agent's `references/` directory. Workers use this same pattern since they don't have a global `agent:` section.
+Each thread can have its own model, backup model, cache mode, MCP servers, skills, references, image model, web search setting, and task list setting. Skills must be defined in the protocol's `skills:` section. References must exist in the agent's `references/` directory. Workers use this same pattern since they don't have a global `agent:` section.
 ## Full Example
@@ -520,6 +557,7 @@ agent:
   skills: [qr-code] # Octavus skills
   references: [support-policies] # On-demand context
   webSearch: true # Built-in web search
+  todoList: true # Structured task tracking
   agentic: true
   maxSteps: 10
   thinking: medium

package/content/04-protocol/09-skills-advanced.md CHANGED Viewed

@@ -66,6 +66,24 @@ steps:
     maxSteps: 10
 ```
+### Execution Mode
+The `execution` field is set at the skill definition level and applies to all threads that use the skill:
+```yaml
+skills:
+  deploy-tool:
+    display: description
+    description: Deploy applications
+    execution: device # All threads using this skill run it on the device
+  qr-code:
+    display: description
+    description: Generating QR codes
+    # Defaults to sandbox execution
+```
+You don't set `execution` per-thread - a skill's execution mode is consistent wherever it's used.
 ### Match Skills to Use Cases
 Different threads can have different skills. Define all skills at the protocol level, then scope them to each thread:
@@ -311,15 +329,15 @@ Pattern:
 When a skill declares secrets and an organization configures them, the skill runs in secure mode with its own isolated sandbox.
-### Standard vs Secure Skills
+### Standard vs Secure vs Device Skills
-| Aspect              | Standard Skills                   | Secure Skills                                       |
-| ------------------- | --------------------------------- | --------------------------------------------------- |
-| **Sandbox**         | Shared with other standard skills | Isolated (one per skill)                            |
-| **Available tools** | All 6 skill tools                 | `skill_read`, `skill_list`, `skill_run` only        |
-| **Script input**    | CLI arguments via `args`          | JSON via stdin (use `input` parameter)              |
-| **Environment**     | No secrets                        | Secrets as env vars                                 |
-| **Output**          | Raw stdout/stderr                 | Redacted (secret values replaced with `[REDACTED]`) |
+| Aspect              | Standard Skills          | Secure Skills                                       | Device Skills                                          |
+| ------------------- | ------------------------ | --------------------------------------------------- | ------------------------------------------------------ |
+| **Environment**     | Shared sandbox           | Isolated sandbox (one per skill)                    | Agent's computer (VM or desktop)                       |
+| **Available tools** | All 6 skill tools        | `skill_read`, `skill_list`, `skill_run` only        | `skill_read`, `skill_list`, `skill_run`, `skill_setup` |
+| **Script input**    | CLI arguments via `args` | JSON via stdin (use `input` parameter)              | CLI arguments via `args`                               |
+| **Secrets**         | No secrets               | Secrets as env vars                                 | No secrets                                             |
+| **Output**          | Raw stdout/stderr        | Redacted (secret values replaced with `[REDACTED]`) | Raw stdout/stderr                                      |
 ### Writing Scripts for Secure Skills

package/content/04-protocol/11-workers.md CHANGED Viewed

@@ -416,7 +416,9 @@ steps:
     maxSteps: 10
 ```
-Workers define their own skills independently -- they don't inherit skills from a parent interactive agent. Each thread gets its own sandbox scoped to only its listed skills.
+Workers define their own skills independently - they don't inherit skills from a parent interactive agent. Each thread gets its own sandbox scoped to only its listed skills.
+Skills with `execution: device` work the same way in workers as in interactive agents - the skill runs on the agent's computer. Workers resolve their device execution independently, so a worker can use device skills even if the parent agent does not.
 See [Skills](/docs/protocol/skills) for full documentation.

package/content/04-protocol/13-mcp-servers.md CHANGED Viewed

@@ -33,11 +33,13 @@ mcpServers:
 ### Fields
-| Field         | Required | Description                                                                           |
-| ------------- | -------- | ------------------------------------------------------------------------------------- |
-| `description` | Yes      | What the MCP server provides                                                          |
-| `source`      | Yes      | `remote` (platform-managed) or `device` (consumer-provided)                           |
-| `display`     | No       | How tool calls appear in UI: `hidden`, `name`, `description` (default: `description`) |
+| Field         | Required | Description                                                                                             |
+| ------------- | -------- | ------------------------------------------------------------------------------------------------------- |
+| `description` | Yes      | What the MCP server provides                                                                            |
+| `source`      | Yes      | `remote` (platform-managed) or `device` (consumer-provided)                                             |
+| `display`     | No       | How tool calls appear in UI: `hidden`, `name`, `description` (default: `description`)                   |
+| `connection`  | No       | When to connect: `eager` or `lazy` (default: `lazy`). Remote only.                                      |
+| `execution`   | No       | Where the MCP process runs: `sandbox` (default) or `device`. See [Device Execution](#device-execution). |
 ### Display Modes
@@ -134,6 +136,34 @@ Configuration happens in the Octavus platform UI:
 2. The server's slug must match the namespace in your protocol
 3. The platform connects, discovers tools, and makes them available to the agent
+### Connection Modes
+The `connection` field controls when the platform connects to a remote MCP server:
+| Mode    | Behavior                                                                                                               |
+| ------- | ---------------------------------------------------------------------------------------------------------------------- |
+| `lazy`  | (default) The agent activates integrations on demand at runtime. The agent starts responding immediately.              |
+| `eager` | The platform connects and discovers tools before the first LLM request. Tools are guaranteed available from message 1. |
+```yaml
+mcpServers:
+  sentry:
+    source: remote
+    connection: eager # Always connected upfront
+    display: name
+  notion:
+    source: remote
+    # connection defaults to lazy - agent activates when needed
+    display: description
+```
+With **lazy connection** (the default), the agent receives two built-in tools - one for listing available integrations and one for activating them. The agent decides which integrations it needs based on the conversation and activates them on demand. This avoids paying connection latency for integrations the agent doesn't end up using.
+With **eager connection**, the platform connects to the MCP server before the first LLM request, exactly like a declared tool. Use this when the agent needs the MCP's tools from the very first message.
+The `connection` field is only valid on `source: remote` - device MCPs (`source: device`) have their own connection mechanism through the server-sdk. The `connection` field is respected for remote MCPs with `execution: device` the same way as sandbox MCPs.
 ### Authentication
 Remote MCP servers support multiple authentication methods:
@@ -147,6 +177,35 @@ Remote MCP servers support multiple authentication methods:
 Authentication is configured per-project - different projects can connect to the same MCP server with different credentials.
+## Device Execution
+The `execution` field controls where a remote MCP server's STDIO process runs. By default (`execution: sandbox`), the process runs in the platform's sandbox. When set to `execution: device`, the STDIO process runs on the agent's computer (VM or desktop) instead.
+```yaml
+mcpServers:
+  code-tools:
+    description: Code analysis and refactoring tools
+    source: remote
+    execution: device # STDIO process runs on the agent's computer
+    display: name
+  sentry:
+    description: Error tracking
+    source: remote
+    # execution defaults to sandbox - runs in the platform
+    display: name
+```
+### When to Use
+Use `execution: device` when the MCP server needs access to the agent's local environment - for example, tools that read from the local filesystem, interact with running processes, or need CLIs installed on the device.
+### Rules
+- `execution` is only meaningful for `source: remote` MCPs that use STDIO transport. HTTP-transport remote MCPs always connect from the platform regardless of the `execution` setting.
+- `execution: device` is **invalid** on `source: device` MCPs (they already run on the device by definition). Using it produces a validation error.
+- The `connection` field (`eager` or `lazy`) is respected for device-executed MCPs the same way as sandbox-executed MCPs.
 ## Device MCP Servers
 Device MCP servers (`source: device`) run on the consumer's machine. The consumer provides the MCP tools via the `@octavus/computer` package (or any `ToolProvider` implementation) through the server-sdk.
@@ -224,10 +283,13 @@ onDemandMcpServers:
   remote:
     description: Additional connected integrations
     display: name
+    execution: device # on-demand MCPs run on the agent's computer
     contextRetention:
       toolResults: { retainLast: 5 }
 ```
+On-demand MCP definitions also support the `execution` field. When set, all MCPs matched by that on-demand source inherit the execution mode.
 ### Scope-level opt-in
 The agent and individual `start-thread` blocks each choose whether to pick up on-demand MCPs, by listing the sources they want:
@@ -295,6 +357,7 @@ mcpServers:
   figma:
     description: Figma design tool integration
     source: remote
+    connection: eager
     display: description
   sentry:
     description: Error tracking and debugging
@@ -355,10 +418,12 @@ mcpServers:
   figma:
     description: Figma design tool integration
     source: remote
+    connection: eager # Need design tools from message 1
     display: description
   sentry:
     description: Error tracking and debugging
     source: remote
+    # Lazy (default) - agent activates when debugging is needed
     display: name
 tools: