npm - @octavus/docs - Versions diffs - 3.1.0 → 3.2.0 - Mend

@octavus/docs 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/content/03-client-sdk/02-messages.md +28 -1
package/content/04-protocol/01-overview.md +1 -0
package/content/04-protocol/07-agent-config.md +49 -11
package/content/04-protocol/13-mcp-servers.md +32 -0
package/dist/{chunk-PD34BHI2.js → chunk-R4UMXGAC.js} +17 -17
package/dist/chunk-R4UMXGAC.js.map +1 -0
package/dist/content.js +1 -1
package/dist/docs.json +8 -8
package/dist/index.js +1 -1
package/dist/search-index.json +1 -1
package/dist/search.js +1 -1
package/dist/search.js.map +1 -1
package/dist/sections.json +8 -8
package/package.json +1 -1
package/dist/chunk-PD34BHI2.js.map +0 -1

package/content/03-client-sdk/02-messages.md CHANGED Viewed

@@ -31,7 +31,9 @@ type UIMessagePart =
   | UIOperationPart
   | UISourcePart
   | UIFilePart
-  | UIObjectPart;
+  | UIObjectPart
+  | UITodoPart
+  | UIWorkerPart;
 // Text content
 interface UITextPart {
@@ -107,6 +109,31 @@ interface UIObjectPart {
   error?: string;
   thread?: string;
 }
+// Structured task list (when the agent uses octavus_todo_write)
+interface UITodoPart {
+  type: 'todo';
+  todos: {
+    id: string;
+    content: string;
+    status: 'pending' | 'in_progress' | 'completed' | 'cancelled';
+  }[];
+  status: 'streaming' | 'done';
+  thread?: string;
+}
+// Sub-agent execution container (when an agent invokes a worker)
+interface UIWorkerPart {
+  type: 'worker';
+  workerId: string;
+  workerSlug: string;
+  description?: string;
+  input?: Record<string, unknown>;
+  parts: UIMessagePart[]; // Nested parts from the worker (excluding nested workers)
+  output?: unknown;
+  error?: string;
+  status: 'running' | 'done' | 'error';
+}
 ```
 ## Sending Messages

package/content/04-protocol/01-overview.md CHANGED Viewed

@@ -90,6 +90,7 @@ agent:
   skills: [qr-code] # Enable skills
   imageModel: google/gemini-2.5-flash-image # Enable image generation
   webSearch: true # Enable web search
+  todoList: true # Enable structured task tracking
   agentic: true # Allow multiple tool calls
   thinking: medium # Extended reasoning

package/content/04-protocol/07-agent-config.md CHANGED Viewed

@@ -47,11 +47,11 @@ Specify models in `provider/model-id` format. Any model supported by the provide
 ### Supported Providers
-| Provider  | Format                 | Examples                                                             |
-| --------- | ---------------------- | -------------------------------------------------------------------- |
-| Anthropic | `anthropic/{model-id}` | `claude-opus-4-5`, `claude-sonnet-4-5`, `claude-haiku-4-5`           |
-| Google    | `google/{model-id}`    | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-flash` |
-| OpenAI    | `openai/{model-id}`    | `gpt-5`, `gpt-4o`, `o4-mini`, `o3`, `o3-mini`, `o1`                  |
+| Provider  | Format                 | Examples                                                                                           |
+| --------- | ---------------------- | -------------------------------------------------------------------------------------------------- |
+| Anthropic | `anthropic/{model-id}` | `claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-sonnet-4-5`, `claude-haiku-4-5` |
+| Google    | `google/{model-id}`    | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-flash`                               |
+| OpenAI    | `openai/{model-id}`    | `gpt-5`, `gpt-4o`, `o4-mini`, `o3`, `o3-mini`, `o1`                                                |
 ### Examples
@@ -225,14 +225,28 @@ agent:
   thinking: medium # low | medium | high
 ```
-| Level    | Token Budget | Use Case            |
-| -------- | ------------ | ------------------- |
-| `low`    | ~5,000       | Simple reasoning    |
-| `medium` | ~10,000      | Moderate complexity |
-| `high`   | ~20,000      | Complex analysis    |
+| Level    | Use Case            |
+| -------- | ------------------- |
+| `low`    | Simple reasoning    |
+| `medium` | Moderate complexity |
+| `high`   | Complex analysis    |
 Thinking content streams to the UI and can be displayed to users.
+### How levels are applied
+Each provider translates `thinking` into its own reasoning controls:
+| Provider                                                                   | Level mapping                                                                                     |
+| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |
+| Anthropic 4.6+ (`claude-opus-4-7`, `claude-opus-4-6`, `claude-sonnet-4-6`) | Adaptive thinking - the model decides how much to reason, guided by `effort: low / medium / high` |
+| Anthropic older (4.5 and earlier)                                          | Fixed token budgets: `low` ~5,000, `medium` ~10,000, `high` ~20,000                               |
+| OpenAI (GPT-5.x, o-series)                                                 | `reasoningEffort: low / medium / high`                                                            |
+| Google (Gemini 3.x)                                                        | `thinkingLevel: low / high` (`medium` rounds up to `high`)                                        |
+| Google (Gemini 1.x / 2.x)                                                  | Token budgets: `low` 1,024, `medium` 8,192, `high` 24,576                                         |
+| OpenRouter                                                                 | Unified `reasoning.max_tokens` (translated upstream)                                              |
+| Vercel AI Gateway                                                          | Forwards the underlying provider's options                                                        |
 ## Prompt Caching
 Providers charge less for tokens served from their prompt cache (often 10% of the uncached rate). Octavus exposes a single `cache` field that picks the right retention policy per provider, so the stable prefix of your agent - tools, system prompt, and historical messages - gets billed at the cache-read rate on repeat requests.
@@ -400,6 +414,28 @@ Use cases:
 - Fact verification and documentation lookups
 - Any information that may have changed since the model's training
+## TODO List
+Enable the LLM to maintain a structured task list while it works:
+```yaml
+agent:
+  model: anthropic/claude-sonnet-4-5
+  system: system
+  todoList: true
+  agentic: true
+```
+When `todoList` is enabled, the `octavus_todo_write` tool becomes available. The LLM creates and updates a list of items - each with `id`, `content`, and `status` (`pending`, `in_progress`, `completed`, `cancelled`) - and the platform emits a `todo-update` stream event with the resolved snapshot. The Client SDK accumulates updates into a single `UITodoPart` per assistant message, so consumers render an evolving "Plan" card without managing state themselves.
+The list persists across messages: the LLM can use `merge=true` to update items by id (sending only the changed fields), or `merge=false` to replace the list entirely.
+Use cases:
+- Multi-step tasks where the user benefits from seeing progress
+- Long-running agentic loops that should communicate intent
+- Workflows where the agent plans before acting
 ## Temperature
 Control response randomness:
@@ -460,9 +496,10 @@ handlers:
       references: [escalation-policy] # Thread-specific references
       imageModel: google/gemini-2.5-flash-image # Thread-specific image model
       webSearch: true # Thread-specific web search
+      todoList: true # Thread-specific task list
 ```
-Each thread can have its own model, backup model, cache mode, MCP servers, skills, references, image model, and web search setting. Skills must be defined in the protocol's `skills:` section. References must exist in the agent's `references/` directory. Workers use this same pattern since they don't have a global `agent:` section.
+Each thread can have its own model, backup model, cache mode, MCP servers, skills, references, image model, web search setting, and task list setting. Skills must be defined in the protocol's `skills:` section. References must exist in the agent's `references/` directory. Workers use this same pattern since they don't have a global `agent:` section.
 ## Full Example
@@ -520,6 +557,7 @@ agent:
   skills: [qr-code] # Octavus skills
   references: [support-policies] # On-demand context
   webSearch: true # Built-in web search
+  todoList: true # Structured task tracking
   agentic: true
   maxSteps: 10
   thinking: medium

package/content/04-protocol/13-mcp-servers.md CHANGED Viewed

@@ -38,6 +38,7 @@ mcpServers:
 | `description` | Yes      | What the MCP server provides                                                          |
 | `source`      | Yes      | `remote` (platform-managed) or `device` (consumer-provided)                           |
 | `display`     | No       | How tool calls appear in UI: `hidden`, `name`, `description` (default: `description`) |
+| `connection`  | No       | When to connect: `eager` or `lazy` (default: `lazy`). Remote only.                    |
 ### Display Modes
@@ -134,6 +135,34 @@ Configuration happens in the Octavus platform UI:
 2. The server's slug must match the namespace in your protocol
 3. The platform connects, discovers tools, and makes them available to the agent
+### Connection Modes
+The `connection` field controls when the platform connects to a remote MCP server:
+| Mode    | Behavior                                                                                                               |
+| ------- | ---------------------------------------------------------------------------------------------------------------------- |
+| `lazy`  | (default) The agent activates integrations on demand at runtime. The agent starts responding immediately.              |
+| `eager` | The platform connects and discovers tools before the first LLM request. Tools are guaranteed available from message 1. |
+```yaml
+mcpServers:
+  sentry:
+    source: remote
+    connection: eager # Always connected upfront
+    display: name
+  notion:
+    source: remote
+    # connection defaults to lazy - agent activates when needed
+    display: description
+```
+With **lazy connection** (the default), the agent receives two built-in tools - one for listing available integrations and one for activating them. The agent decides which integrations it needs based on the conversation and activates them on demand. This avoids paying connection latency for integrations the agent doesn't end up using.
+With **eager connection**, the platform connects to the MCP server before the first LLM request, exactly like a declared tool. Use this when the agent needs the MCP's tools from the very first message.
+The `connection` field is only valid on `source: remote` - device MCPs have their own connection mechanism through the server-sdk.
 ### Authentication
 Remote MCP servers support multiple authentication methods:
@@ -295,6 +324,7 @@ mcpServers:
   figma:
     description: Figma design tool integration
     source: remote
+    connection: eager
     display: description
   sentry:
     description: Error tracking and debugging
@@ -355,10 +385,12 @@ mcpServers:
   figma:
     description: Figma design tool integration
     source: remote
+    connection: eager # Need design tools from message 1
     display: description
   sentry:
     description: Error tracking and debugging
     source: remote
+    # Lazy (default) - agent activates when debugging is needed
     display: name
 tools: