@vellumai/assistant 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/ARCHITECTURE.md +109 -0
  2. package/docs/architecture/memory.md +105 -0
  3. package/docs/skills.md +100 -0
  4. package/package.json +1 -1
  5. package/src/__tests__/archive-recall.test.ts +560 -0
  6. package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
  7. package/src/__tests__/conversation-agent-loop.test.ts +7 -0
  8. package/src/__tests__/conversation-clear-safety.test.ts +259 -0
  9. package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
  10. package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
  11. package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
  12. package/src/__tests__/conversation-wipe.test.ts +226 -0
  13. package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
  14. package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
  15. package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
  16. package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
  17. package/src/__tests__/inline-command-runner.test.ts +311 -0
  18. package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
  19. package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
  20. package/src/__tests__/list-messages-attachments.test.ts +96 -0
  21. package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
  22. package/src/__tests__/memory-brief-time.test.ts +285 -0
  23. package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
  24. package/src/__tests__/memory-chunk-archive.test.ts +400 -0
  25. package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
  26. package/src/__tests__/memory-episode-archive.test.ts +370 -0
  27. package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
  28. package/src/__tests__/memory-observation-archive.test.ts +375 -0
  29. package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
  30. package/src/__tests__/memory-recall-quality.test.ts +2 -2
  31. package/src/__tests__/memory-reducer-job.test.ts +538 -0
  32. package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
  33. package/src/__tests__/memory-reducer-store.test.ts +728 -0
  34. package/src/__tests__/memory-reducer-types.test.ts +707 -0
  35. package/src/__tests__/memory-reducer.test.ts +704 -0
  36. package/src/__tests__/memory-regressions.test.ts +30 -8
  37. package/src/__tests__/memory-simplified-config.test.ts +281 -0
  38. package/src/__tests__/parse-identity-fields.test.ts +129 -0
  39. package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
  40. package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
  41. package/src/__tests__/skill-load-inline-command.test.ts +598 -0
  42. package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
  43. package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
  44. package/src/__tests__/skills-transitive-hash.test.ts +333 -0
  45. package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
  46. package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
  47. package/src/cli/commands/conversations.ts +18 -0
  48. package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
  49. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  50. package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
  51. package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
  52. package/src/config/feature-flag-registry.json +16 -0
  53. package/src/config/raw-config-utils.ts +28 -0
  54. package/src/config/schema.ts +12 -0
  55. package/src/config/schemas/memory-simplified.ts +101 -0
  56. package/src/config/schemas/memory.ts +4 -0
  57. package/src/config/skills.ts +50 -4
  58. package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
  59. package/src/daemon/conversation-agent-loop.ts +71 -1
  60. package/src/daemon/conversation-lifecycle.ts +11 -1
  61. package/src/daemon/conversation-memory.ts +117 -0
  62. package/src/daemon/conversation-runtime-assembly.ts +3 -1
  63. package/src/daemon/conversation-surfaces.ts +31 -8
  64. package/src/daemon/conversation.ts +40 -23
  65. package/src/daemon/handlers/config-embeddings.ts +10 -2
  66. package/src/daemon/handlers/config-model.ts +0 -9
  67. package/src/daemon/handlers/conversations.ts +11 -0
  68. package/src/daemon/handlers/identity.ts +12 -1
  69. package/src/daemon/lifecycle.ts +52 -1
  70. package/src/daemon/message-types/conversations.ts +0 -1
  71. package/src/daemon/server.ts +1 -1
  72. package/src/followups/followup-store.ts +47 -1
  73. package/src/memory/archive-recall.ts +516 -0
  74. package/src/memory/archive-store.ts +400 -0
  75. package/src/memory/brief-formatting.ts +33 -0
  76. package/src/memory/brief-open-loops.ts +266 -0
  77. package/src/memory/brief-time.ts +162 -0
  78. package/src/memory/brief.ts +75 -0
  79. package/src/memory/conversation-crud.ts +455 -101
  80. package/src/memory/conversation-key-store.ts +33 -4
  81. package/src/memory/db-init.ts +16 -0
  82. package/src/memory/indexer.ts +106 -15
  83. package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
  84. package/src/memory/job-handlers/conversation-starters.ts +9 -3
  85. package/src/memory/job-handlers/embedding.test.ts +1 -0
  86. package/src/memory/job-handlers/embedding.ts +83 -0
  87. package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
  88. package/src/memory/job-utils.ts +1 -1
  89. package/src/memory/jobs-store.ts +8 -0
  90. package/src/memory/jobs-worker.ts +20 -0
  91. package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
  92. package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
  93. package/src/memory/migrations/141-rename-verification-table.ts +8 -0
  94. package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
  95. package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
  96. package/src/memory/migrations/185-memory-brief-state.ts +52 -0
  97. package/src/memory/migrations/186-memory-archive.ts +109 -0
  98. package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
  99. package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
  100. package/src/memory/migrations/index.ts +4 -0
  101. package/src/memory/qdrant-client.ts +23 -4
  102. package/src/memory/reducer-scheduler.ts +242 -0
  103. package/src/memory/reducer-store.ts +271 -0
  104. package/src/memory/reducer-types.ts +106 -0
  105. package/src/memory/reducer.ts +467 -0
  106. package/src/memory/schema/conversations.ts +3 -0
  107. package/src/memory/schema/index.ts +2 -0
  108. package/src/memory/schema/infrastructure.ts +1 -0
  109. package/src/memory/schema/memory-archive.ts +121 -0
  110. package/src/memory/schema/memory-brief.ts +55 -0
  111. package/src/memory/search/semantic.ts +17 -4
  112. package/src/oauth/oauth-store.ts +3 -1
  113. package/src/permissions/checker.ts +89 -6
  114. package/src/permissions/defaults.ts +14 -0
  115. package/src/runtime/auth/route-policy.ts +10 -1
  116. package/src/runtime/routes/conversation-management-routes.ts +94 -2
  117. package/src/runtime/routes/conversation-query-routes.ts +7 -0
  118. package/src/runtime/routes/conversation-routes.ts +52 -5
  119. package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
  120. package/src/runtime/routes/identity-routes.ts +2 -35
  121. package/src/runtime/routes/llm-context-normalization.ts +14 -1
  122. package/src/runtime/routes/memory-item-routes.ts +90 -5
  123. package/src/runtime/routes/secret-routes.ts +3 -0
  124. package/src/runtime/routes/surface-action-routes.ts +68 -1
  125. package/src/schedule/schedule-store.ts +28 -0
  126. package/src/schedule/scheduler.ts +6 -2
  127. package/src/skills/inline-command-expansions.ts +204 -0
  128. package/src/skills/inline-command-render.ts +127 -0
  129. package/src/skills/inline-command-runner.ts +242 -0
  130. package/src/skills/transitive-version-hash.ts +88 -0
  131. package/src/tasks/task-store.ts +43 -1
  132. package/src/telemetry/usage-telemetry-reporter.ts +1 -1
  133. package/src/tools/filesystem/edit.ts +6 -1
  134. package/src/tools/filesystem/read.ts +6 -1
  135. package/src/tools/filesystem/write.ts +6 -1
  136. package/src/tools/memory/handlers.ts +129 -1
  137. package/src/tools/permission-checker.ts +8 -1
  138. package/src/tools/schedule/create.ts +3 -0
  139. package/src/tools/schedule/list.ts +5 -1
  140. package/src/tools/schedule/update.ts +6 -0
  141. package/src/tools/skills/load.ts +140 -6
  142. package/src/util/platform.ts +18 -0
  143. package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
  144. package/src/workspace/migrations/registry.ts +1 -1
package/ARCHITECTURE.md CHANGED
@@ -1261,6 +1261,115 @@ graph TB
1261
1261
  TRUST -->|"Deny rule matches"| DENY["Blocked"]
1262
1262
  ```
1263
1263
 
1264
+ ### Inline Skill Command Expansion
1265
+
1266
+ Skills can embed dynamic shell output in their SKILL.md body using `!`command``tokens. When`skill_load` processes a skill containing these tokens, the commands are executed at load time through a sandboxed runner and their output is substituted inline. This enables externally authored skills to include project-specific context (e.g., directory listings, config values) without requiring manual edits.
1267
+
1268
+ **Feature flag:** `feature_flags.inline-skill-commands.enabled` (default: enabled). When disabled, loading a skill that contains `!`command`` tokens fails closed with an error rather than leaving raw tokens in the prompt.
1269
+
1270
+ #### Syntax and Parsing
1271
+
1272
+ The `!`command``syntax is parsed by`parseInlineCommandExpansions()` from the SKILL.md body after frontmatter extraction. The parser:
1273
+
1274
+ - Extracts all `!`command`` tokens outside fenced code blocks (documentation examples in fenced blocks are ignored)
1275
+ - Assigns each token a stable `placeholderId` (0-indexed encounter order)
1276
+ - Rejects malformed tokens fail-closed: empty commands, nested backticks, and unmatched opening backticks produce `InlineCommandExpansionError` entries rather than best-effort expansions
1277
+
1278
+ #### Transitive Version Hash
1279
+
1280
+ When a skill contains inline command expansions, the permission system computes a **transitive version hash** (`tv1:<sha256>`) that covers the root skill and all its included children (DFS pre-order). The hash folds:
1281
+
1282
+ 1. Each visited skill ID (graph structure)
1283
+ 2. Each visited skill's directory content hash (file changes)
1284
+
1285
+ Editing any file in the root skill or any included child invalidates the transitive hash, which forces re-approval. The hash is computed by `computeTransitiveSkillVersionHash()` and fails closed (`TransitiveHashError`) on missing children or cycles in the include graph.
1286
+
1287
+ #### Permission Gating (`skill_load_dynamic:*`)
1288
+
1289
+ Skills containing inline command expansions use a separate permission candidate namespace (`skill_load_dynamic:*`) instead of the normal `skill_load:*` namespace. This prevents them from falling through to the permissive default `skill_load:*` allow rule. The permission checker emits candidates in specificity order:
1290
+
1291
+ 1. `skill_load_dynamic:<skill-id>@<transitive-hash>` — version-pinned approval (most specific)
1292
+ 2. `skill_load_dynamic:<skill-id>` — any-version approval
1293
+
1294
+ A default ask rule at priority 200 (`default:ask-skill_load_dynamic-global`) catches these candidates, ensuring the guardian is always prompted before inline commands execute. The user can create a pinned trust rule for a specific transitive hash to auto-approve known-good versions. Non-interactive sessions (no human present) deny dynamic skill loads rather than silently auto-approving.
1295
+
1296
+ ```mermaid
1297
+ graph TB
1298
+ LOAD["skill_load(selector)"] --> PARSE["Parse SKILL.md body"]
1299
+ PARSE --> CHECK{"Has !\x60command\x60<br/>tokens?"}
1300
+ CHECK -->|"No"| NORMAL["Normal skill_load:* candidate<br/>(auto-allowed)"]
1301
+ CHECK -->|"Yes"| FLAG{"inline-skill-commands<br/>flag enabled?"}
1302
+ FLAG -->|"No"| FAIL_FLAG["Fail closed:<br/>error returned"]
1303
+ FLAG -->|"Yes"| SOURCE{"Eligible source?<br/>(bundled/managed/workspace)"}
1304
+ SOURCE -->|"No (extra)"| FAIL_SOURCE["Fail closed:<br/>source not eligible"]
1305
+ SOURCE -->|"Yes"| HASH["Compute transitive hash"]
1306
+ HASH --> DYN["skill_load_dynamic:id@hash<br/>candidate emitted"]
1307
+ DYN --> PERM["PermissionChecker"]
1308
+ PERM --> RULE{"Trust rule?"}
1309
+ RULE -->|"Pinned allow"| RENDER["Execute + render"]
1310
+ RULE -->|"No rule"| PROMPT["Prompt guardian"]
1311
+ RULE -->|"Deny"| DENY["Blocked"]
1312
+ ```
1313
+
1314
+ #### Sandbox-Only Execution
1315
+
1316
+ Inline commands are executed through `runInlineCommand()`, a purpose-built sandbox runner with strict security constraints:
1317
+
1318
+ - **Sandbox enforced**: The sandbox is always enabled with `networkMode: "off"` — no outbound network connections
1319
+ - **Sanitized environment**: Uses `buildSanitizedEnv()` — no API keys, tokens, credentials, gateway URLs, or workspace paths in the environment
1320
+ - **No host fallback**: Unlike the general `bash` tool, there is no fallback to host execution when the sandbox is unavailable
1321
+ - **No credential proxy**: No CES client, no credential materialization
1322
+ - **Timeout**: 10-second wall-clock limit (killed with SIGKILL on timeout)
1323
+ - **Output cap**: 20,000 characters maximum (truncated with `[output truncated]` marker)
1324
+ - **Binary rejection**: Output with >10% non-printable characters (after ANSI stripping) is rejected
1325
+ - **Stdout only**: stderr is discarded; ANSI escape sequences are stripped from stdout
1326
+
1327
+ The runner returns a deterministic `InlineCommandResult` with machine-readable failure reasons (`timeout`, `non_zero_exit`, `binary_output`, `spawn_failure`) — raw stderr is never surfaced.
1328
+
1329
+ #### Rendering Flow
1330
+
1331
+ The `renderInlineCommands()` function processes expansions sequentially (not in parallel) to maintain deterministic order. Each `!`command`` token is replaced with an XML-wrapped result:
1332
+
1333
+ - **Success**: `<inline_skill_command index="N">...output...</inline_skill_command>`
1334
+ - **Failure**: `<inline_skill_command index="N">[inline command unavailable: <reason>]</inline_skill_command>`
1335
+
1336
+ Rendering applies at two levels during `skill_load`:
1337
+
1338
+ 1. **Root skill**: If the loaded skill has inline expansions, they are rendered before the skill body is emitted. A root skill with inline commands that fail the feature-flag or source-eligibility check returns an error (fail closed, no `<loaded_skill>` marker).
1339
+ 2. **Included children**: Each included child skill's body is rendered independently. A render failure in one child does not prevent sibling rendering — the failed child's body falls back to raw (unexpanded) text with a warning log.
1340
+
1341
+ #### v1 Source Restriction
1342
+
1343
+ In the initial release, only skills from **bundled**, **managed**, and **workspace** sources are eligible for inline command expansion. Skills from **extra** (third-party) roots are explicitly rejected with an error message. The `INLINE_COMMAND_ELIGIBLE_SOURCES` set in `load.ts` enforces this restriction. Unknown or future source types also fail closed.
1344
+
1345
+ #### Fail-Closed Behavior Summary
1346
+
1347
+ Every layer in the pipeline defaults to rejection rather than silent degradation:
1348
+
1349
+ | Layer | Failure mode | Behavior |
1350
+ | ---------------- | ---------------------------------------------------- | ------------------------------------------------------ |
1351
+ | Parser | Malformed token (empty, nested backtick, unmatched) | Logged as error, not expanded |
1352
+ | Feature flag | Flag disabled | `skill_load` returns error, no `<loaded_skill>` marker |
1353
+ | Source check | `extra` or unknown source | `skill_load` returns error, no `<loaded_skill>` marker |
1354
+ | Transitive hash | Missing child or cycle in include graph | `TransitiveHashError` thrown, permission check fails |
1355
+ | Permission | No trust rule and non-interactive | Denied (never silently auto-approved) |
1356
+ | Sandbox runner | Timeout, non-zero exit, binary output, spawn failure | Deterministic stub rendered, no raw stderr |
1357
+ | Renderer (root) | Feature flag off or ineligible source | Error returned from `skill_load` |
1358
+ | Renderer (child) | Exception during render | Raw body used, sibling rendering continues |
1359
+
1360
+ #### Key Source Files
1361
+
1362
+ | File | Role |
1363
+ | --------------------------------------------------- | -------------------------------------------------------------------------------- |
1364
+ | `assistant/src/skills/inline-command-expansions.ts` | `parseInlineCommandExpansions()` — parser for `!`command`` tokens |
1365
+ | `assistant/src/skills/inline-command-runner.ts` | `runInlineCommand()` — sandbox-only command executor |
1366
+ | `assistant/src/skills/inline-command-render.ts` | `renderInlineCommands()` — token replacement and XML wrapping |
1367
+ | `assistant/src/skills/transitive-version-hash.ts` | `computeTransitiveSkillVersionHash()` — hash covering root + included children |
1368
+ | `assistant/src/tools/skills/load.ts` | `skill_load` execute path — feature flag check, source check, render integration |
1369
+ | `assistant/src/permissions/checker.ts` | `skill_load_dynamic:*` candidate emission and allowlist options |
1370
+ | `assistant/src/permissions/defaults.ts` | `default:ask-skill_load_dynamic-global` rule (priority 200) |
1371
+ | `meta/feature-flags/feature-flag-registry.json` | `inline-skill-commands` flag definition |
1372
+
1264
1373
  ### Key Source Files
1265
1374
 
1266
1375
  | File | Role |
@@ -2,6 +2,111 @@
2
2
 
3
3
  Assistant memory and context-injection architecture details.
4
4
 
5
+ ## Simplified Memory System (Default)
6
+
7
+ The simplified memory system replaces the legacy item/tier/staleness model with a two-layer architecture: a **brief** (time-relevant context + open loops) plus **archive recall** (observations, chunks, episodes). It is enabled by default via `memory.simplified.enabled: true`.
8
+
9
+ ### Architecture Overview
10
+
11
+ ```mermaid
12
+ graph TB
13
+ subgraph "Write Path (Simplified)"
14
+ MSG["Incoming Message"] --> REDUCER["Memory Reducer<br/>(LLM-backed, delayed)"]
15
+ REDUCER --> TC["time_contexts<br/>(brief state)"]
16
+ REDUCER --> OL["open_loops<br/>(brief state)"]
17
+ REDUCER --> OBS_R["Archive Observations<br/>(reducer output)"]
18
+ REDUCER --> EP_R["Archive Episodes<br/>(reducer output)"]
19
+
20
+ MSG --> INDEXER["Dual-Write Indexer"]
21
+ INDEXER --> OBS["memory_observations"]
22
+ INDEXER --> CHK["memory_chunks<br/>(content-hash deduped)"]
23
+
24
+ COMPACT["Context Compaction"] --> EP["memory_episodes"]
25
+ end
26
+
27
+ subgraph "Read Path (Simplified)"
28
+ TURN["User Turn"] --> BRIEF["Memory Brief Compiler"]
29
+ BRIEF --> TC
30
+ BRIEF --> OL
31
+ BRIEF --> BRIEF_OUT["&lt;memory_brief&gt;<br/>Time contexts + Open loops"]
32
+
33
+ TURN --> RECALL_GATE["Archive Recall Gate<br/>(keyword + pattern match)"]
34
+ RECALL_GATE --> PREFETCH["Prefetch<br/>(episodes + observations)"]
35
+ RECALL_GATE --> DEEP["Deeper Recall<br/>(episodes + observations + chunks)"]
36
+ DEEP --> RECALL_OUT["&lt;supporting_recall&gt;<br/>Source-linked bullets"]
37
+
38
+ BRIEF_OUT --> INJECT["Runtime Injection<br/>(prepend to user message)"]
39
+ RECALL_OUT --> INJECT
40
+ end
41
+
42
+ subgraph "Memory Tools (Simplified)"
43
+ SAVE["memory_save"] --> OBS
44
+ RECALL_TOOL["memory_recall"] --> RECALL_GATE
45
+ end
46
+ ```
47
+
48
+ ### Tables
49
+
50
+ | Table | Purpose | Write source |
51
+ | --------------------- | ----------------------------------------------- | ------------------------------------------------------- |
52
+ | `time_contexts` | Bounded temporal windows for the brief | Reducer |
53
+ | `open_loops` | Unresolved follow-up items for the brief | Reducer |
54
+ | `memory_observations` | Raw factual statements from conversation turns | Indexer dual-write, reducer, memory_save tool, backfill |
55
+ | `memory_chunks` | Deduplicated content units for embedding/recall | Derived from observations, content-hash deduped |
56
+ | `memory_episodes` | Narrative summaries of interaction spans | Compaction, reducer, backfill |
57
+
58
+ ### Reducer
59
+
60
+ The memory reducer is a provider-backed (LLM) background process that analyzes unreduced conversation turns and produces structured CRUD operations for brief-state tables and archive candidates. It runs on a delay after conversation idle or switch, scheduled via the `reduce_conversation_memory` job. The reducer is side-effect-free; results are applied transactionally via `applyReducerResult`.
61
+
62
+ ### Brief
63
+
64
+ The memory brief is compiled fresh on every turn from active `time_contexts` and `open_loops`. It is rendered as `<memory_brief>` XML and injected as a text block prepended to the user message. Empty sections are omitted.
65
+
66
+ ### Archive Recall
67
+
68
+ Archive recall runs when the user's turn triggers a recall gate (past-reference language, analogy/debugging patterns, or strong prefetch hits). It queries episodes, observations, and chunks via keyword matching and returns up to 3 source-linked bullets in `<supporting_recall>`. No recall tag is emitted when results are empty.
69
+
70
+ ### Backfill
71
+
72
+ Existing users have legacy data in `memory_segments`, `memory_summaries`, and `memory_items`. The `backfill_simplified_memory` job migrates this data into the simplified tables:
73
+
74
+ - `memory_segments` -> `memory_observations` + `memory_chunks`
75
+ - `memory_summaries` -> `memory_episodes`
76
+ - Active, high-confidence `memory_items` -> `memory_observations` + `memory_chunks`, with unambiguous items also mapped to `time_contexts` or `open_loops`
77
+
78
+ The backfill is idempotent (content-hash dedup + checkpoint tracking), processes in batches of 200, and self-enqueues continuation jobs for large datasets.
79
+
80
+ ### Rollback Posture
81
+
82
+ The legacy memory system remains fully available as a short-lived rollback path:
83
+
84
+ - **Legacy tables are preserved**: `memory_segments`, `memory_items`, `memory_summaries`, and `memory_item_sources` remain in the schema and continue to receive writes from the legacy indexer/extraction pipeline.
85
+ - **Flag-gated**: Setting `memory.simplified.enabled: false` reverts to the legacy item/tier/staleness model for both read and write paths.
86
+ - **Memory tools**: `memory_save` and `memory_recall` check the flag at call time and route to the appropriate path (simplified observations or legacy items).
87
+ - **No data loss**: The backfill copies data without deleting legacy rows. Both systems can coexist.
88
+
89
+ ### Key Files
90
+
91
+ | File | Role |
92
+ | ----------------------------------------------------------------- | ------------------------------------------------ |
93
+ | `assistant/src/config/schemas/memory-simplified.ts` | Config schema with `enabled: true` default |
94
+ | `assistant/src/memory/reducer.ts` | Provider-backed reducer (LLM call + parse) |
95
+ | `assistant/src/memory/reducer-store.ts` | Transactional result application |
96
+ | `assistant/src/memory/reducer-scheduler.ts` | Idle-delay and conversation-switch scheduling |
97
+ | `assistant/src/memory/archive-store.ts` | Observation/chunk/episode write helpers |
98
+ | `assistant/src/memory/archive-recall.ts` | Prefetch + deeper recall over archive tables |
99
+ | `assistant/src/memory/brief.ts` | Brief composer (time contexts + open loops) |
100
+ | `assistant/src/memory/job-handlers/backfill-simplified-memory.ts` | Legacy data migration handler |
101
+ | `assistant/src/tools/memory/handlers.ts` | Memory tool handlers (simplified/legacy routing) |
102
+ | `assistant/src/__tests__/simplified-memory-e2e.test.ts` | End-to-end test suite |
103
+
104
+ ---
105
+
106
+ ## Legacy Memory System — Daemon Data Flow
107
+
108
+ > **Note**: The legacy system below is retained as rollback support. New installations use the simplified system by default.
109
+
5
110
  ## Memory System — Daemon Data Flow
6
111
 
7
112
  ```mermaid
package/docs/skills.md CHANGED
@@ -156,3 +156,103 @@ Trust rules are stored in `~/.vellum/protected/trust.json`. You can inspect this
156
156
  ### "A skill tool keeps prompting even though I approved it."
157
157
 
158
158
  Check whether the rule has the correct `executionTarget` — a rule scoped to `sandbox` will not match a tool running on `host`.
159
+
160
+ ## Inline Command Expansions
161
+
162
+ Skills can embed dynamic content by using the **inline command expansion** syntax. When a skill containing these tokens is loaded, each token is executed and replaced with its output before the skill body is delivered to the model. The syntax is shown in the fenced block below.
163
+
164
+ This syntax is intentionally compatible with the convention established by [inline skill commands](https://x.com) for portable cross-agent skill authoring. Vellum adopts the exact same token format so that externally authored skills load without rewriting — but applies stricter execution constraints.
165
+
166
+ ### Syntax
167
+
168
+ The canonical syntax is:
169
+
170
+ ```
171
+ !`command`
172
+ ```
173
+
174
+ Where `command` is any shell command string. The exclamation mark immediately precedes the opening backtick with no whitespace in between. Examples:
175
+
176
+ ```markdown
177
+ Current branch: !`git branch --show-current`
178
+ Recent changes: !`git log --oneline -5`
179
+ Project info: !`cat package.json | jq '.name, .version'`
180
+ ```
181
+
182
+ Tokens inside fenced code blocks (` ``` ` or `~~~`) are **not** expanded — they are treated as documentation examples. This allows skills to safely include syntax examples without triggering execution.
183
+
184
+ ### Parsing rules
185
+
186
+ The parser (`parseInlineCommandExpansions`) enforces fail-closed semantics:
187
+
188
+ | Condition | Behavior |
189
+ | ------------------------------------------------- | ---------------------- |
190
+ | Well-formed token outside fenced code | Parsed as an expansion |
191
+ | Token inside a fenced code block | Skipped (not expanded) |
192
+ | Empty command text (no content between backticks) | Rejected as malformed |
193
+ | Whitespace-only command text | Rejected as malformed |
194
+ | Unmatched opening (no closing backtick found) | Rejected as malformed |
195
+ | Nested backticks inside command text | Rejected as malformed |
196
+
197
+ Malformed tokens do not silently pass through — they are collected as errors and logged. If a skill body contains any malformed tokens, the valid tokens are still expanded, but the errors are reported for diagnostics.
198
+
199
+ ### Feature flag
200
+
201
+ Inline command expansion is gated by the `inline-skill-commands` feature flag (key: `feature_flags.inline-skill-commands.enabled`). The flag defaults to **enabled**.
202
+
203
+ When the flag is disabled and a skill contains inline command expansion tokens, `skill_load` returns an error rather than delivering unexpanded tokens to the model. This fail-closed behavior prevents the LLM from seeing raw expansion tokens and attempting to interpret them.
204
+
205
+ ### Approval model
206
+
207
+ Skills with inline command expansions use a separate permission namespace: `skill_load_dynamic:*`. This ensures they do not silently inherit the permissive default `skill_load:*` allow rule.
208
+
209
+ When a user is prompted to approve a dynamic skill load, the allowlist options are:
210
+
211
+ | Option | Pattern | Behavior |
212
+ | -------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------- |
213
+ | Version-pinned | `skill_load_dynamic:<id>@<transitive-hash>` | Approved for this exact version only. Any change to the skill or its includes invalidates the rule. |
214
+ | Any-version | `skill_load_dynamic:<id>` | Approved for all versions of this skill. |
215
+
216
+ The transitive hash covers the skill's own content plus all included skills, so a change anywhere in the dependency graph triggers re-approval for version-pinned rules.
217
+
218
+ ### v1 execution limits
219
+
220
+ In the initial implementation, inline command execution enforces these constraints:
221
+
222
+ | Constraint | Value |
223
+ | ---------------- | ------------------------------------------------------- |
224
+ | Execution target | Sandbox only (no host fallback) |
225
+ | Network access | Off (no outbound connections) |
226
+ | Environment | Sanitized (no API keys, tokens, or credentials) |
227
+ | Timeout | 10 seconds per command |
228
+ | Output cap | 20,000 characters (truncated with `[output truncated]`) |
229
+ | Binary output | Rejected if >10% non-printable characters |
230
+ | ANSI sequences | Stripped before output processing |
231
+ | stderr | Discarded (only stdout is captured) |
232
+
233
+ Commands that fail (timeout, non-zero exit, spawn failure, binary output) produce a deterministic stub in the rendered body rather than leaking raw error output:
234
+
235
+ ```
236
+ <inline_skill_command index="0">[inline command unavailable: command timed out]</inline_skill_command>
237
+ ```
238
+
239
+ ### Eligible skill sources
240
+
241
+ Only **bundled**, **managed**, and **workspace** skills may use inline command expansions. Third-party **extra** skill sources are explicitly rejected — `skill_load` returns an error if an extra-source skill contains inline expansion tokens.
242
+
243
+ | Source | Eligible | Reason |
244
+ | ----------- | -------- | -------------------------------------- |
245
+ | `bundled` | Yes | Shipped with the application, trusted |
246
+ | `managed` | Yes | User-installed, subject to approval |
247
+ | `workspace` | Yes | Project-local, subject to approval |
248
+ | `extra` | No | Third-party roots, out of scope for v1 |
249
+
250
+ ### Fail-closed summary
251
+
252
+ The system fails closed at every layer:
253
+
254
+ 1. **Flag off** — skill_load returns an error, tokens never reach the model.
255
+ 2. **Malformed syntax** — rejected by the parser, logged as errors.
256
+ 3. **Unsupported source** — skill_load returns an error for extra-source skills.
257
+ 4. **Command failure** — deterministic stub replaces the token, no raw stderr.
258
+ 5. **No permission** — `skill_load_dynamic:*` namespace requires explicit approval.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.5.2",
3
+ "version": "0.5.4",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": "./src/index.ts"