npm - @vellumai/assistant - Versions diffs - 0.5.2 → 0.5.4 - Mend

@vellumai/assistant 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/ARCHITECTURE.md +109 -0
package/docs/architecture/memory.md +105 -0
package/docs/skills.md +100 -0
package/package.json +1 -1
package/src/__tests__/archive-recall.test.ts +560 -0
package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
package/src/__tests__/conversation-agent-loop.test.ts +7 -0
package/src/__tests__/conversation-clear-safety.test.ts +259 -0
package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
package/src/__tests__/conversation-wipe.test.ts +226 -0
package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
package/src/__tests__/inline-command-runner.test.ts +311 -0
package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
package/src/__tests__/list-messages-attachments.test.ts +96 -0
package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
package/src/__tests__/memory-brief-time.test.ts +285 -0
package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
package/src/__tests__/memory-chunk-archive.test.ts +400 -0
package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
package/src/__tests__/memory-episode-archive.test.ts +370 -0
package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
package/src/__tests__/memory-observation-archive.test.ts +375 -0
package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
package/src/__tests__/memory-recall-quality.test.ts +2 -2
package/src/__tests__/memory-reducer-job.test.ts +538 -0
package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
package/src/__tests__/memory-reducer-store.test.ts +728 -0
package/src/__tests__/memory-reducer-types.test.ts +707 -0
package/src/__tests__/memory-reducer.test.ts +704 -0
package/src/__tests__/memory-regressions.test.ts +30 -8
package/src/__tests__/memory-simplified-config.test.ts +281 -0
package/src/__tests__/parse-identity-fields.test.ts +129 -0
package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
package/src/__tests__/skill-load-inline-command.test.ts +598 -0
package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
package/src/__tests__/skills-transitive-hash.test.ts +333 -0
package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
package/src/cli/commands/conversations.ts +18 -0
package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
package/src/config/feature-flag-registry.json +16 -0
package/src/config/raw-config-utils.ts +28 -0
package/src/config/schema.ts +12 -0
package/src/config/schemas/memory-simplified.ts +101 -0
package/src/config/schemas/memory.ts +4 -0
package/src/config/skills.ts +50 -4
package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
package/src/daemon/conversation-agent-loop.ts +71 -1
package/src/daemon/conversation-lifecycle.ts +11 -1
package/src/daemon/conversation-memory.ts +117 -0
package/src/daemon/conversation-runtime-assembly.ts +3 -1
package/src/daemon/conversation-surfaces.ts +31 -8
package/src/daemon/conversation.ts +40 -23
package/src/daemon/handlers/config-embeddings.ts +10 -2
package/src/daemon/handlers/config-model.ts +0 -9
package/src/daemon/handlers/conversations.ts +11 -0
package/src/daemon/handlers/identity.ts +12 -1
package/src/daemon/lifecycle.ts +52 -1
package/src/daemon/message-types/conversations.ts +0 -1
package/src/daemon/server.ts +1 -1
package/src/followups/followup-store.ts +47 -1
package/src/memory/archive-recall.ts +516 -0
package/src/memory/archive-store.ts +400 -0
package/src/memory/brief-formatting.ts +33 -0
package/src/memory/brief-open-loops.ts +266 -0
package/src/memory/brief-time.ts +162 -0
package/src/memory/brief.ts +75 -0
package/src/memory/conversation-crud.ts +455 -101
package/src/memory/conversation-key-store.ts +33 -4
package/src/memory/db-init.ts +16 -0
package/src/memory/indexer.ts +106 -15
package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
package/src/memory/job-handlers/conversation-starters.ts +9 -3
package/src/memory/job-handlers/embedding.test.ts +1 -0
package/src/memory/job-handlers/embedding.ts +83 -0
package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
package/src/memory/job-utils.ts +1 -1
package/src/memory/jobs-store.ts +8 -0
package/src/memory/jobs-worker.ts +20 -0
package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
package/src/memory/migrations/141-rename-verification-table.ts +8 -0
package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
package/src/memory/migrations/185-memory-brief-state.ts +52 -0
package/src/memory/migrations/186-memory-archive.ts +109 -0
package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
package/src/memory/migrations/index.ts +4 -0
package/src/memory/qdrant-client.ts +23 -4
package/src/memory/reducer-scheduler.ts +242 -0
package/src/memory/reducer-store.ts +271 -0
package/src/memory/reducer-types.ts +106 -0
package/src/memory/reducer.ts +467 -0
package/src/memory/schema/conversations.ts +3 -0
package/src/memory/schema/index.ts +2 -0
package/src/memory/schema/infrastructure.ts +1 -0
package/src/memory/schema/memory-archive.ts +121 -0
package/src/memory/schema/memory-brief.ts +55 -0
package/src/memory/search/semantic.ts +17 -4
package/src/oauth/oauth-store.ts +3 -1
package/src/permissions/checker.ts +89 -6
package/src/permissions/defaults.ts +14 -0
package/src/runtime/auth/route-policy.ts +10 -1
package/src/runtime/routes/conversation-management-routes.ts +94 -2
package/src/runtime/routes/conversation-query-routes.ts +7 -0
package/src/runtime/routes/conversation-routes.ts +52 -5
package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
package/src/runtime/routes/identity-routes.ts +2 -35
package/src/runtime/routes/llm-context-normalization.ts +14 -1
package/src/runtime/routes/memory-item-routes.ts +90 -5
package/src/runtime/routes/secret-routes.ts +3 -0
package/src/runtime/routes/surface-action-routes.ts +68 -1
package/src/schedule/schedule-store.ts +28 -0
package/src/schedule/scheduler.ts +6 -2
package/src/skills/inline-command-expansions.ts +204 -0
package/src/skills/inline-command-render.ts +127 -0
package/src/skills/inline-command-runner.ts +242 -0
package/src/skills/transitive-version-hash.ts +88 -0
package/src/tasks/task-store.ts +43 -1
package/src/telemetry/usage-telemetry-reporter.ts +1 -1
package/src/tools/filesystem/edit.ts +6 -1
package/src/tools/filesystem/read.ts +6 -1
package/src/tools/filesystem/write.ts +6 -1
package/src/tools/memory/handlers.ts +129 -1
package/src/tools/permission-checker.ts +8 -1
package/src/tools/schedule/create.ts +3 -0
package/src/tools/schedule/list.ts +5 -1
package/src/tools/schedule/update.ts +6 -0
package/src/tools/skills/load.ts +140 -6
package/src/util/platform.ts +18 -0
package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
package/src/workspace/migrations/registry.ts +1 -1

package/ARCHITECTURE.md CHANGED Viewed

@@ -1261,6 +1261,115 @@ graph TB
     TRUST -->|"Deny rule matches"| DENY["Blocked"]
 ```
+### Inline Skill Command Expansion
+Skills can embed dynamic shell output in their SKILL.md body using `!`command``tokens. When`skill_load` processes a skill containing these tokens, the commands are executed at load time through a sandboxed runner and their output is substituted inline. This enables externally authored skills to include project-specific context (e.g., directory listings, config values) without requiring manual edits.
+**Feature flag:** `feature_flags.inline-skill-commands.enabled` (default: enabled). When disabled, loading a skill that contains `!`command`` tokens fails closed with an error rather than leaving raw tokens in the prompt.
+#### Syntax and Parsing
+The `!`command``syntax is parsed by`parseInlineCommandExpansions()` from the SKILL.md body after frontmatter extraction. The parser:
+- Extracts all `!`command`` tokens outside fenced code blocks (documentation examples in fenced blocks are ignored)
+- Assigns each token a stable `placeholderId` (0-indexed encounter order)
+- Rejects malformed tokens fail-closed: empty commands, nested backticks, and unmatched opening backticks produce `InlineCommandExpansionError` entries rather than best-effort expansions
+#### Transitive Version Hash
+When a skill contains inline command expansions, the permission system computes a **transitive version hash** (`tv1:<sha256>`) that covers the root skill and all its included children (DFS pre-order). The hash folds:
+1. Each visited skill ID (graph structure)
+2. Each visited skill's directory content hash (file changes)
+Editing any file in the root skill or any included child invalidates the transitive hash, which forces re-approval. The hash is computed by `computeTransitiveSkillVersionHash()` and fails closed (`TransitiveHashError`) on missing children or cycles in the include graph.
+#### Permission Gating (`skill_load_dynamic:*`)
+Skills containing inline command expansions use a separate permission candidate namespace (`skill_load_dynamic:*`) instead of the normal `skill_load:*` namespace. This prevents them from falling through to the permissive default `skill_load:*` allow rule. The permission checker emits candidates in specificity order:
+1. `skill_load_dynamic:<skill-id>@<transitive-hash>` — version-pinned approval (most specific)
+2. `skill_load_dynamic:<skill-id>` — any-version approval
+A default ask rule at priority 200 (`default:ask-skill_load_dynamic-global`) catches these candidates, ensuring the guardian is always prompted before inline commands execute. The user can create a pinned trust rule for a specific transitive hash to auto-approve known-good versions. Non-interactive sessions (no human present) deny dynamic skill loads rather than silently auto-approving.
+```mermaid
+graph TB
+    LOAD["skill_load(selector)"] --> PARSE["Parse SKILL.md body"]
+    PARSE --> CHECK{"Has !\x60command\x60<br/>tokens?"}
+    CHECK -->|"No"| NORMAL["Normal skill_load:* candidate<br/>(auto-allowed)"]
+    CHECK -->|"Yes"| FLAG{"inline-skill-commands<br/>flag enabled?"}
+    FLAG -->|"No"| FAIL_FLAG["Fail closed:<br/>error returned"]
+    FLAG -->|"Yes"| SOURCE{"Eligible source?<br/>(bundled/managed/workspace)"}
+    SOURCE -->|"No (extra)"| FAIL_SOURCE["Fail closed:<br/>source not eligible"]
+    SOURCE -->|"Yes"| HASH["Compute transitive hash"]
+    HASH --> DYN["skill_load_dynamic:id@hash<br/>candidate emitted"]
+    DYN --> PERM["PermissionChecker"]
+    PERM --> RULE{"Trust rule?"}
+    RULE -->|"Pinned allow"| RENDER["Execute + render"]
+    RULE -->|"No rule"| PROMPT["Prompt guardian"]
+    RULE -->|"Deny"| DENY["Blocked"]
+```
+#### Sandbox-Only Execution
+Inline commands are executed through `runInlineCommand()`, a purpose-built sandbox runner with strict security constraints:
+- **Sandbox enforced**: The sandbox is always enabled with `networkMode: "off"` — no outbound network connections
+- **Sanitized environment**: Uses `buildSanitizedEnv()` — no API keys, tokens, credentials, gateway URLs, or workspace paths in the environment
+- **No host fallback**: Unlike the general `bash` tool, there is no fallback to host execution when the sandbox is unavailable
+- **No credential proxy**: No CES client, no credential materialization
+- **Timeout**: 10-second wall-clock limit (killed with SIGKILL on timeout)
+- **Output cap**: 20,000 characters maximum (truncated with `[output truncated]` marker)
+- **Binary rejection**: Output with >10% non-printable characters (after ANSI stripping) is rejected
+- **Stdout only**: stderr is discarded; ANSI escape sequences are stripped from stdout
+The runner returns a deterministic `InlineCommandResult` with machine-readable failure reasons (`timeout`, `non_zero_exit`, `binary_output`, `spawn_failure`) — raw stderr is never surfaced.
+#### Rendering Flow
+The `renderInlineCommands()` function processes expansions sequentially (not in parallel) to maintain deterministic order. Each `!`command`` token is replaced with an XML-wrapped result:
+- **Success**: `<inline_skill_command index="N">...output...</inline_skill_command>`
+- **Failure**: `<inline_skill_command index="N">[inline command unavailable: <reason>]</inline_skill_command>`
+Rendering applies at two levels during `skill_load`:
+1. **Root skill**: If the loaded skill has inline expansions, they are rendered before the skill body is emitted. A root skill with inline commands that fail the feature-flag or source-eligibility check returns an error (fail closed, no `<loaded_skill>` marker).
+2. **Included children**: Each included child skill's body is rendered independently. A render failure in one child does not prevent sibling rendering — the failed child's body falls back to raw (unexpanded) text with a warning log.
+#### v1 Source Restriction
+In the initial release, only skills from **bundled**, **managed**, and **workspace** sources are eligible for inline command expansion. Skills from **extra** (third-party) roots are explicitly rejected with an error message. The `INLINE_COMMAND_ELIGIBLE_SOURCES` set in `load.ts` enforces this restriction. Unknown or future source types also fail closed.
+#### Fail-Closed Behavior Summary
+Every layer in the pipeline defaults to rejection rather than silent degradation:
+| Layer            | Failure mode                                         | Behavior                                               |
+| ---------------- | ---------------------------------------------------- | ------------------------------------------------------ |
+| Parser           | Malformed token (empty, nested backtick, unmatched)  | Logged as error, not expanded                          |
+| Feature flag     | Flag disabled                                        | `skill_load` returns error, no `<loaded_skill>` marker |
+| Source check     | `extra` or unknown source                            | `skill_load` returns error, no `<loaded_skill>` marker |
+| Transitive hash  | Missing child or cycle in include graph              | `TransitiveHashError` thrown, permission check fails   |
+| Permission       | No trust rule and non-interactive                    | Denied (never silently auto-approved)                  |
+| Sandbox runner   | Timeout, non-zero exit, binary output, spawn failure | Deterministic stub rendered, no raw stderr             |
+| Renderer (root)  | Feature flag off or ineligible source                | Error returned from `skill_load`                       |
+| Renderer (child) | Exception during render                              | Raw body used, sibling rendering continues             |
+#### Key Source Files
+| File                                                | Role                                                                             |
+| --------------------------------------------------- | -------------------------------------------------------------------------------- |
+| `assistant/src/skills/inline-command-expansions.ts` | `parseInlineCommandExpansions()` — parser for `!`command`` tokens                |
+| `assistant/src/skills/inline-command-runner.ts`     | `runInlineCommand()` — sandbox-only command executor                             |
+| `assistant/src/skills/inline-command-render.ts`     | `renderInlineCommands()` — token replacement and XML wrapping                    |
+| `assistant/src/skills/transitive-version-hash.ts`   | `computeTransitiveSkillVersionHash()` — hash covering root + included children   |
+| `assistant/src/tools/skills/load.ts`                | `skill_load` execute path — feature flag check, source check, render integration |
+| `assistant/src/permissions/checker.ts`              | `skill_load_dynamic:*` candidate emission and allowlist options                  |
+| `assistant/src/permissions/defaults.ts`             | `default:ask-skill_load_dynamic-global` rule (priority 200)                      |
+| `meta/feature-flags/feature-flag-registry.json`     | `inline-skill-commands` flag definition                                          |
 ### Key Source Files
 | File                                                | Role                                                                                       |

package/docs/architecture/memory.md CHANGED Viewed

@@ -2,6 +2,111 @@
 Assistant memory and context-injection architecture details.
+## Simplified Memory System (Default)
+The simplified memory system replaces the legacy item/tier/staleness model with a two-layer architecture: a **brief** (time-relevant context + open loops) plus **archive recall** (observations, chunks, episodes). It is enabled by default via `memory.simplified.enabled: true`.
+### Architecture Overview
+```mermaid
+graph TB
+    subgraph "Write Path (Simplified)"
+        MSG["Incoming Message"] --> REDUCER["Memory Reducer<br/>(LLM-backed, delayed)"]
+        REDUCER --> TC["time_contexts<br/>(brief state)"]
+        REDUCER --> OL["open_loops<br/>(brief state)"]
+        REDUCER --> OBS_R["Archive Observations<br/>(reducer output)"]
+        REDUCER --> EP_R["Archive Episodes<br/>(reducer output)"]
+        MSG --> INDEXER["Dual-Write Indexer"]
+        INDEXER --> OBS["memory_observations"]
+        INDEXER --> CHK["memory_chunks<br/>(content-hash deduped)"]
+        COMPACT["Context Compaction"] --> EP["memory_episodes"]
+    end
+    subgraph "Read Path (Simplified)"
+        TURN["User Turn"] --> BRIEF["Memory Brief Compiler"]
+        BRIEF --> TC
+        BRIEF --> OL
+        BRIEF --> BRIEF_OUT["&lt;memory_brief&gt;<br/>Time contexts + Open loops"]
+        TURN --> RECALL_GATE["Archive Recall Gate<br/>(keyword + pattern match)"]
+        RECALL_GATE --> PREFETCH["Prefetch<br/>(episodes + observations)"]
+        RECALL_GATE --> DEEP["Deeper Recall<br/>(episodes + observations + chunks)"]
+        DEEP --> RECALL_OUT["&lt;supporting_recall&gt;<br/>Source-linked bullets"]
+        BRIEF_OUT --> INJECT["Runtime Injection<br/>(prepend to user message)"]
+        RECALL_OUT --> INJECT
+    end
+    subgraph "Memory Tools (Simplified)"
+        SAVE["memory_save"] --> OBS
+        RECALL_TOOL["memory_recall"] --> RECALL_GATE
+    end
+```
+### Tables
+| Table                 | Purpose                                         | Write source                                            |
+| --------------------- | ----------------------------------------------- | ------------------------------------------------------- |
+| `time_contexts`       | Bounded temporal windows for the brief          | Reducer                                                 |
+| `open_loops`          | Unresolved follow-up items for the brief        | Reducer                                                 |
+| `memory_observations` | Raw factual statements from conversation turns  | Indexer dual-write, reducer, memory_save tool, backfill |
+| `memory_chunks`       | Deduplicated content units for embedding/recall | Derived from observations, content-hash deduped         |
+| `memory_episodes`     | Narrative summaries of interaction spans        | Compaction, reducer, backfill                           |
+### Reducer
+The memory reducer is a provider-backed (LLM) background process that analyzes unreduced conversation turns and produces structured CRUD operations for brief-state tables and archive candidates. It runs on a delay after conversation idle or switch, scheduled via the `reduce_conversation_memory` job. The reducer is side-effect-free; results are applied transactionally via `applyReducerResult`.
+### Brief
+The memory brief is compiled fresh on every turn from active `time_contexts` and `open_loops`. It is rendered as `<memory_brief>` XML and injected as a text block prepended to the user message. Empty sections are omitted.
+### Archive Recall
+Archive recall runs when the user's turn triggers a recall gate (past-reference language, analogy/debugging patterns, or strong prefetch hits). It queries episodes, observations, and chunks via keyword matching and returns up to 3 source-linked bullets in `<supporting_recall>`. No recall tag is emitted when results are empty.
+### Backfill
+Existing users have legacy data in `memory_segments`, `memory_summaries`, and `memory_items`. The `backfill_simplified_memory` job migrates this data into the simplified tables:
+- `memory_segments` -> `memory_observations` + `memory_chunks`
+- `memory_summaries` -> `memory_episodes`
+- Active, high-confidence `memory_items` -> `memory_observations` + `memory_chunks`, with unambiguous items also mapped to `time_contexts` or `open_loops`
+The backfill is idempotent (content-hash dedup + checkpoint tracking), processes in batches of 200, and self-enqueues continuation jobs for large datasets.
+### Rollback Posture
+The legacy memory system remains fully available as a short-lived rollback path:
+- **Legacy tables are preserved**: `memory_segments`, `memory_items`, `memory_summaries`, and `memory_item_sources` remain in the schema and continue to receive writes from the legacy indexer/extraction pipeline.
+- **Flag-gated**: Setting `memory.simplified.enabled: false` reverts to the legacy item/tier/staleness model for both read and write paths.
+- **Memory tools**: `memory_save` and `memory_recall` check the flag at call time and route to the appropriate path (simplified observations or legacy items).
+- **No data loss**: The backfill copies data without deleting legacy rows. Both systems can coexist.
+### Key Files
+| File                                                              | Role                                             |
+| ----------------------------------------------------------------- | ------------------------------------------------ |
+| `assistant/src/config/schemas/memory-simplified.ts`               | Config schema with `enabled: true` default       |
+| `assistant/src/memory/reducer.ts`                                 | Provider-backed reducer (LLM call + parse)       |
+| `assistant/src/memory/reducer-store.ts`                           | Transactional result application                 |
+| `assistant/src/memory/reducer-scheduler.ts`                       | Idle-delay and conversation-switch scheduling    |
+| `assistant/src/memory/archive-store.ts`                           | Observation/chunk/episode write helpers          |
+| `assistant/src/memory/archive-recall.ts`                          | Prefetch + deeper recall over archive tables     |
+| `assistant/src/memory/brief.ts`                                   | Brief composer (time contexts + open loops)      |
+| `assistant/src/memory/job-handlers/backfill-simplified-memory.ts` | Legacy data migration handler                    |
+| `assistant/src/tools/memory/handlers.ts`                          | Memory tool handlers (simplified/legacy routing) |
+| `assistant/src/__tests__/simplified-memory-e2e.test.ts`           | End-to-end test suite                            |
+---
+## Legacy Memory System — Daemon Data Flow
+> **Note**: The legacy system below is retained as rollback support. New installations use the simplified system by default.
 ## Memory System — Daemon Data Flow
 ```mermaid

package/docs/skills.md CHANGED Viewed

@@ -156,3 +156,103 @@ Trust rules are stored in `~/.vellum/protected/trust.json`. You can inspect this
 ### "A skill tool keeps prompting even though I approved it."
 Check whether the rule has the correct `executionTarget` — a rule scoped to `sandbox` will not match a tool running on `host`.
+## Inline Command Expansions
+Skills can embed dynamic content by using the **inline command expansion** syntax. When a skill containing these tokens is loaded, each token is executed and replaced with its output before the skill body is delivered to the model. The syntax is shown in the fenced block below.
+This syntax is intentionally compatible with the convention established by [inline skill commands](https://x.com) for portable cross-agent skill authoring. Vellum adopts the exact same token format so that externally authored skills load without rewriting — but applies stricter execution constraints.
+### Syntax
+The canonical syntax is:
+```
+!`command`
+```
+Where `command` is any shell command string. The exclamation mark immediately precedes the opening backtick with no whitespace in between. Examples:
+```markdown
+Current branch: !`git branch --show-current`
+Recent changes: !`git log --oneline -5`
+Project info: !`cat package.json | jq '.name, .version'`
+```
+Tokens inside fenced code blocks (` ``` ` or `~~~`) are **not** expanded — they are treated as documentation examples. This allows skills to safely include syntax examples without triggering execution.
+### Parsing rules
+The parser (`parseInlineCommandExpansions`) enforces fail-closed semantics:
+| Condition                                         | Behavior               |
+| ------------------------------------------------- | ---------------------- |
+| Well-formed token outside fenced code             | Parsed as an expansion |
+| Token inside a fenced code block                  | Skipped (not expanded) |
+| Empty command text (no content between backticks) | Rejected as malformed  |
+| Whitespace-only command text                      | Rejected as malformed  |
+| Unmatched opening (no closing backtick found)     | Rejected as malformed  |
+| Nested backticks inside command text              | Rejected as malformed  |
+Malformed tokens do not silently pass through — they are collected as errors and logged. If a skill body contains any malformed tokens, the valid tokens are still expanded, but the errors are reported for diagnostics.
+### Feature flag
+Inline command expansion is gated by the `inline-skill-commands` feature flag (key: `feature_flags.inline-skill-commands.enabled`). The flag defaults to **enabled**.
+When the flag is disabled and a skill contains inline command expansion tokens, `skill_load` returns an error rather than delivering unexpanded tokens to the model. This fail-closed behavior prevents the LLM from seeing raw expansion tokens and attempting to interpret them.
+### Approval model
+Skills with inline command expansions use a separate permission namespace: `skill_load_dynamic:*`. This ensures they do not silently inherit the permissive default `skill_load:*` allow rule.
+When a user is prompted to approve a dynamic skill load, the allowlist options are:
+| Option         | Pattern                                     | Behavior                                                                                            |
+| -------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------- |
+| Version-pinned | `skill_load_dynamic:<id>@<transitive-hash>` | Approved for this exact version only. Any change to the skill or its includes invalidates the rule. |
+| Any-version    | `skill_load_dynamic:<id>`                   | Approved for all versions of this skill.                                                            |
+The transitive hash covers the skill's own content plus all included skills, so a change anywhere in the dependency graph triggers re-approval for version-pinned rules.
+### v1 execution limits
+In the initial implementation, inline command execution enforces these constraints:
+| Constraint       | Value                                                   |
+| ---------------- | ------------------------------------------------------- |
+| Execution target | Sandbox only (no host fallback)                         |
+| Network access   | Off (no outbound connections)                           |
+| Environment      | Sanitized (no API keys, tokens, or credentials)         |
+| Timeout          | 10 seconds per command                                  |
+| Output cap       | 20,000 characters (truncated with `[output truncated]`) |
+| Binary output    | Rejected if >10% non-printable characters               |
+| ANSI sequences   | Stripped before output processing                       |
+| stderr           | Discarded (only stdout is captured)                     |
+Commands that fail (timeout, non-zero exit, spawn failure, binary output) produce a deterministic stub in the rendered body rather than leaking raw error output:
+```
+<inline_skill_command index="0">[inline command unavailable: command timed out]</inline_skill_command>
+```
+### Eligible skill sources
+Only **bundled**, **managed**, and **workspace** skills may use inline command expansions. Third-party **extra** skill sources are explicitly rejected — `skill_load` returns an error if an extra-source skill contains inline expansion tokens.
+| Source      | Eligible | Reason                                 |
+| ----------- | -------- | -------------------------------------- |
+| `bundled`   | Yes      | Shipped with the application, trusted  |
+| `managed`   | Yes      | User-installed, subject to approval    |
+| `workspace` | Yes      | Project-local, subject to approval     |
+| `extra`     | No       | Third-party roots, out of scope for v1 |
+### Fail-closed summary
+The system fails closed at every layer:
+1. **Flag off** — skill_load returns an error, tokens never reach the model.
+2. **Malformed syntax** — rejected by the parser, logged as errors.
+3. **Unsupported source** — skill_load returns an error for extra-source skills.
+4. **Command failure** — deterministic stub replaces the token, no raw stderr.
+5. **No permission** — `skill_load_dynamic:*` namespace requires explicit approval.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.5.2",
+  "version": "0.5.4",
   "type": "module",
   "exports": {
     ".": "./src/index.ts"