@vellumai/assistant 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +109 -0
- package/docs/skills.md +100 -0
- package/package.json +1 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
- package/src/__tests__/conversation-agent-loop.test.ts +7 -0
- package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
- package/src/__tests__/conversation-wipe.test.ts +226 -0
- package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
- package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
- package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
- package/src/__tests__/inline-command-runner.test.ts +311 -0
- package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
- package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
- package/src/__tests__/list-messages-attachments.test.ts +96 -0
- package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
- package/src/__tests__/memory-brief-time.test.ts +285 -0
- package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
- package/src/__tests__/memory-chunk-archive.test.ts +400 -0
- package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
- package/src/__tests__/memory-episode-archive.test.ts +370 -0
- package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
- package/src/__tests__/memory-observation-archive.test.ts +375 -0
- package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
- package/src/__tests__/memory-recall-quality.test.ts +2 -2
- package/src/__tests__/memory-reducer-store.test.ts +728 -0
- package/src/__tests__/memory-reducer-types.test.ts +699 -0
- package/src/__tests__/memory-reducer.test.ts +698 -0
- package/src/__tests__/memory-regressions.test.ts +6 -4
- package/src/__tests__/memory-simplified-config.test.ts +281 -0
- package/src/__tests__/parse-identity-fields.test.ts +129 -0
- package/src/__tests__/skill-load-inline-command.test.ts +598 -0
- package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
- package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
- package/src/__tests__/skills-transitive-hash.test.ts +333 -0
- package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
- package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
- package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
- package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
- package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
- package/src/config/feature-flag-registry.json +16 -0
- package/src/config/loader.ts +1 -0
- package/src/config/raw-config-utils.ts +28 -0
- package/src/config/schema.ts +12 -0
- package/src/config/schemas/memory-simplified.ts +101 -0
- package/src/config/schemas/memory.ts +4 -0
- package/src/config/skills.ts +50 -4
- package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
- package/src/daemon/conversation-agent-loop.ts +71 -1
- package/src/daemon/conversation-lifecycle.ts +11 -1
- package/src/daemon/conversation-runtime-assembly.ts +2 -1
- package/src/daemon/conversation-surfaces.ts +31 -8
- package/src/daemon/conversation.ts +40 -23
- package/src/daemon/handlers/config-embeddings.ts +10 -2
- package/src/daemon/handlers/config-model.ts +0 -9
- package/src/daemon/handlers/identity.ts +12 -1
- package/src/daemon/lifecycle.ts +9 -1
- package/src/daemon/message-types/conversations.ts +0 -1
- package/src/daemon/server.ts +1 -1
- package/src/followups/followup-store.ts +47 -1
- package/src/memory/archive-store.ts +400 -0
- package/src/memory/brief-formatting.ts +33 -0
- package/src/memory/brief-open-loops.ts +266 -0
- package/src/memory/brief-time.ts +161 -0
- package/src/memory/brief.ts +75 -0
- package/src/memory/conversation-crud.ts +245 -101
- package/src/memory/db-init.ts +12 -0
- package/src/memory/indexer.ts +106 -15
- package/src/memory/job-handlers/embedding.test.ts +1 -0
- package/src/memory/job-handlers/embedding.ts +83 -0
- package/src/memory/job-utils.ts +1 -1
- package/src/memory/jobs-store.ts +6 -0
- package/src/memory/jobs-worker.ts +12 -0
- package/src/memory/migrations/185-memory-brief-state.ts +52 -0
- package/src/memory/migrations/186-memory-archive.ts +109 -0
- package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
- package/src/memory/migrations/index.ts +3 -0
- package/src/memory/qdrant-client.ts +23 -4
- package/src/memory/reducer-store.ts +271 -0
- package/src/memory/reducer-types.ts +99 -0
- package/src/memory/reducer.ts +453 -0
- package/src/memory/schema/conversations.ts +3 -0
- package/src/memory/schema/index.ts +2 -0
- package/src/memory/schema/memory-archive.ts +121 -0
- package/src/memory/schema/memory-brief.ts +55 -0
- package/src/memory/search/semantic.ts +17 -4
- package/src/oauth/oauth-store.ts +3 -1
- package/src/permissions/checker.ts +89 -6
- package/src/permissions/defaults.ts +14 -0
- package/src/runtime/routes/conversation-management-routes.ts +6 -0
- package/src/runtime/routes/conversation-query-routes.ts +7 -0
- package/src/runtime/routes/conversation-routes.ts +52 -5
- package/src/runtime/routes/identity-routes.ts +2 -35
- package/src/runtime/routes/llm-context-normalization.ts +14 -1
- package/src/runtime/routes/memory-item-routes.ts +90 -5
- package/src/runtime/routes/secret-routes.ts +2 -0
- package/src/runtime/routes/surface-action-routes.ts +68 -1
- package/src/schedule/schedule-store.ts +21 -0
- package/src/skills/inline-command-expansions.ts +204 -0
- package/src/skills/inline-command-render.ts +127 -0
- package/src/skills/inline-command-runner.ts +242 -0
- package/src/skills/transitive-version-hash.ts +88 -0
- package/src/tasks/task-store.ts +43 -1
- package/src/tools/permission-checker.ts +8 -1
- package/src/tools/skills/load.ts +140 -6
- package/src/util/platform.ts +18 -0
- package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
- package/src/workspace/migrations/registry.ts +1 -1
package/ARCHITECTURE.md
CHANGED
|
@@ -1261,6 +1261,115 @@ graph TB
|
|
|
1261
1261
|
TRUST -->|"Deny rule matches"| DENY["Blocked"]
|
|
1262
1262
|
```
|
|
1263
1263
|
|
|
1264
|
+
### Inline Skill Command Expansion
|
|
1265
|
+
|
|
1266
|
+
Skills can embed dynamic shell output in their SKILL.md body using `!`command``tokens. When`skill_load` processes a skill containing these tokens, the commands are executed at load time through a sandboxed runner and their output is substituted inline. This enables externally authored skills to include project-specific context (e.g., directory listings, config values) without requiring manual edits.
|
|
1267
|
+
|
|
1268
|
+
**Feature flag:** `feature_flags.inline-skill-commands.enabled` (default: enabled). When disabled, loading a skill that contains `!`command`` tokens fails closed with an error rather than leaving raw tokens in the prompt.
|
|
1269
|
+
|
|
1270
|
+
#### Syntax and Parsing
|
|
1271
|
+
|
|
1272
|
+
The `!`command``syntax is parsed by`parseInlineCommandExpansions()` from the SKILL.md body after frontmatter extraction. The parser:
|
|
1273
|
+
|
|
1274
|
+
- Extracts all `!`command`` tokens outside fenced code blocks (documentation examples in fenced blocks are ignored)
|
|
1275
|
+
- Assigns each token a stable `placeholderId` (0-indexed encounter order)
|
|
1276
|
+
- Rejects malformed tokens fail-closed: empty commands, nested backticks, and unmatched opening backticks produce `InlineCommandExpansionError` entries rather than best-effort expansions
|
|
1277
|
+
|
|
1278
|
+
#### Transitive Version Hash
|
|
1279
|
+
|
|
1280
|
+
When a skill contains inline command expansions, the permission system computes a **transitive version hash** (`tv1:<sha256>`) that covers the root skill and all its included children (DFS pre-order). The hash folds:
|
|
1281
|
+
|
|
1282
|
+
1. Each visited skill ID (graph structure)
|
|
1283
|
+
2. Each visited skill's directory content hash (file changes)
|
|
1284
|
+
|
|
1285
|
+
Editing any file in the root skill or any included child invalidates the transitive hash, which forces re-approval. The hash is computed by `computeTransitiveSkillVersionHash()` and fails closed (`TransitiveHashError`) on missing children or cycles in the include graph.
|
|
1286
|
+
|
|
1287
|
+
#### Permission Gating (`skill_load_dynamic:*`)
|
|
1288
|
+
|
|
1289
|
+
Skills containing inline command expansions use a separate permission candidate namespace (`skill_load_dynamic:*`) instead of the normal `skill_load:*` namespace. This prevents them from falling through to the permissive default `skill_load:*` allow rule. The permission checker emits candidates in specificity order:
|
|
1290
|
+
|
|
1291
|
+
1. `skill_load_dynamic:<skill-id>@<transitive-hash>` — version-pinned approval (most specific)
|
|
1292
|
+
2. `skill_load_dynamic:<skill-id>` — any-version approval
|
|
1293
|
+
|
|
1294
|
+
A default ask rule at priority 200 (`default:ask-skill_load_dynamic-global`) catches these candidates, ensuring the guardian is always prompted before inline commands execute. The user can create a pinned trust rule for a specific transitive hash to auto-approve known-good versions. Non-interactive sessions (no human present) deny dynamic skill loads rather than silently auto-approving.
|
|
1295
|
+
|
|
1296
|
+
```mermaid
|
|
1297
|
+
graph TB
|
|
1298
|
+
LOAD["skill_load(selector)"] --> PARSE["Parse SKILL.md body"]
|
|
1299
|
+
PARSE --> CHECK{"Has !\x60command\x60<br/>tokens?"}
|
|
1300
|
+
CHECK -->|"No"| NORMAL["Normal skill_load:* candidate<br/>(auto-allowed)"]
|
|
1301
|
+
CHECK -->|"Yes"| FLAG{"inline-skill-commands<br/>flag enabled?"}
|
|
1302
|
+
FLAG -->|"No"| FAIL_FLAG["Fail closed:<br/>error returned"]
|
|
1303
|
+
FLAG -->|"Yes"| SOURCE{"Eligible source?<br/>(bundled/managed/workspace)"}
|
|
1304
|
+
SOURCE -->|"No (extra)"| FAIL_SOURCE["Fail closed:<br/>source not eligible"]
|
|
1305
|
+
SOURCE -->|"Yes"| HASH["Compute transitive hash"]
|
|
1306
|
+
HASH --> DYN["skill_load_dynamic:id@hash<br/>candidate emitted"]
|
|
1307
|
+
DYN --> PERM["PermissionChecker"]
|
|
1308
|
+
PERM --> RULE{"Trust rule?"}
|
|
1309
|
+
RULE -->|"Pinned allow"| RENDER["Execute + render"]
|
|
1310
|
+
RULE -->|"No rule"| PROMPT["Prompt guardian"]
|
|
1311
|
+
RULE -->|"Deny"| DENY["Blocked"]
|
|
1312
|
+
```
|
|
1313
|
+
|
|
1314
|
+
#### Sandbox-Only Execution
|
|
1315
|
+
|
|
1316
|
+
Inline commands are executed through `runInlineCommand()`, a purpose-built sandbox runner with strict security constraints:
|
|
1317
|
+
|
|
1318
|
+
- **Sandbox enforced**: The sandbox is always enabled with `networkMode: "off"` — no outbound network connections
|
|
1319
|
+
- **Sanitized environment**: Uses `buildSanitizedEnv()` — no API keys, tokens, credentials, gateway URLs, or workspace paths in the environment
|
|
1320
|
+
- **No host fallback**: Unlike the general `bash` tool, there is no fallback to host execution when the sandbox is unavailable
|
|
1321
|
+
- **No credential proxy**: No CES client, no credential materialization
|
|
1322
|
+
- **Timeout**: 10-second wall-clock limit (killed with SIGKILL on timeout)
|
|
1323
|
+
- **Output cap**: 20,000 characters maximum (truncated with `[output truncated]` marker)
|
|
1324
|
+
- **Binary rejection**: Output with >10% non-printable characters (after ANSI stripping) is rejected
|
|
1325
|
+
- **Stdout only**: stderr is discarded; ANSI escape sequences are stripped from stdout
|
|
1326
|
+
|
|
1327
|
+
The runner returns a deterministic `InlineCommandResult` with machine-readable failure reasons (`timeout`, `non_zero_exit`, `binary_output`, `spawn_failure`) — raw stderr is never surfaced.
|
|
1328
|
+
|
|
1329
|
+
#### Rendering Flow
|
|
1330
|
+
|
|
1331
|
+
The `renderInlineCommands()` function processes expansions sequentially (not in parallel) to maintain deterministic order. Each `!`command`` token is replaced with an XML-wrapped result:
|
|
1332
|
+
|
|
1333
|
+
- **Success**: `<inline_skill_command index="N">...output...</inline_skill_command>`
|
|
1334
|
+
- **Failure**: `<inline_skill_command index="N">[inline command unavailable: <reason>]</inline_skill_command>`
|
|
1335
|
+
|
|
1336
|
+
Rendering applies at two levels during `skill_load`:
|
|
1337
|
+
|
|
1338
|
+
1. **Root skill**: If the loaded skill has inline expansions, they are rendered before the skill body is emitted. A root skill with inline commands that fail the feature-flag or source-eligibility check returns an error (fail closed, no `<loaded_skill>` marker).
|
|
1339
|
+
2. **Included children**: Each included child skill's body is rendered independently. A render failure in one child does not prevent sibling rendering — the failed child's body falls back to raw (unexpanded) text with a warning log.
|
|
1340
|
+
|
|
1341
|
+
#### v1 Source Restriction
|
|
1342
|
+
|
|
1343
|
+
In the initial release, only skills from **bundled**, **managed**, and **workspace** sources are eligible for inline command expansion. Skills from **extra** (third-party) roots are explicitly rejected with an error message. The `INLINE_COMMAND_ELIGIBLE_SOURCES` set in `load.ts` enforces this restriction. Unknown or future source types also fail closed.
|
|
1344
|
+
|
|
1345
|
+
#### Fail-Closed Behavior Summary
|
|
1346
|
+
|
|
1347
|
+
Every layer in the pipeline defaults to rejection rather than silent degradation:
|
|
1348
|
+
|
|
1349
|
+
| Layer | Failure mode | Behavior |
|
|
1350
|
+
| ---------------- | ---------------------------------------------------- | ------------------------------------------------------ |
|
|
1351
|
+
| Parser | Malformed token (empty, nested backtick, unmatched) | Logged as error, not expanded |
|
|
1352
|
+
| Feature flag | Flag disabled | `skill_load` returns error, no `<loaded_skill>` marker |
|
|
1353
|
+
| Source check | `extra` or unknown source | `skill_load` returns error, no `<loaded_skill>` marker |
|
|
1354
|
+
| Transitive hash | Missing child or cycle in include graph | `TransitiveHashError` thrown, permission check fails |
|
|
1355
|
+
| Permission | No trust rule and non-interactive | Denied (never silently auto-approved) |
|
|
1356
|
+
| Sandbox runner | Timeout, non-zero exit, binary output, spawn failure | Deterministic stub rendered, no raw stderr |
|
|
1357
|
+
| Renderer (root) | Feature flag off or ineligible source | Error returned from `skill_load` |
|
|
1358
|
+
| Renderer (child) | Exception during render | Raw body used, sibling rendering continues |
|
|
1359
|
+
|
|
1360
|
+
#### Key Source Files
|
|
1361
|
+
|
|
1362
|
+
| File | Role |
|
|
1363
|
+
| --------------------------------------------------- | -------------------------------------------------------------------------------- |
|
|
1364
|
+
| `assistant/src/skills/inline-command-expansions.ts` | `parseInlineCommandExpansions()` — parser for `!`command`` tokens |
|
|
1365
|
+
| `assistant/src/skills/inline-command-runner.ts` | `runInlineCommand()` — sandbox-only command executor |
|
|
1366
|
+
| `assistant/src/skills/inline-command-render.ts` | `renderInlineCommands()` — token replacement and XML wrapping |
|
|
1367
|
+
| `assistant/src/skills/transitive-version-hash.ts` | `computeTransitiveSkillVersionHash()` — hash covering root + included children |
|
|
1368
|
+
| `assistant/src/tools/skills/load.ts` | `skill_load` execute path — feature flag check, source check, render integration |
|
|
1369
|
+
| `assistant/src/permissions/checker.ts` | `skill_load_dynamic:*` candidate emission and allowlist options |
|
|
1370
|
+
| `assistant/src/permissions/defaults.ts` | `default:ask-skill_load_dynamic-global` rule (priority 200) |
|
|
1371
|
+
| `meta/feature-flags/feature-flag-registry.json` | `inline-skill-commands` flag definition |
|
|
1372
|
+
|
|
1264
1373
|
### Key Source Files
|
|
1265
1374
|
|
|
1266
1375
|
| File | Role |
|
package/docs/skills.md
CHANGED
|
@@ -156,3 +156,103 @@ Trust rules are stored in `~/.vellum/protected/trust.json`. You can inspect this
|
|
|
156
156
|
### "A skill tool keeps prompting even though I approved it."
|
|
157
157
|
|
|
158
158
|
Check whether the rule has the correct `executionTarget` — a rule scoped to `sandbox` will not match a tool running on `host`.
|
|
159
|
+
|
|
160
|
+
## Inline Command Expansions
|
|
161
|
+
|
|
162
|
+
Skills can embed dynamic content by using the **inline command expansion** syntax. When a skill containing these tokens is loaded, each token is executed and replaced with its output before the skill body is delivered to the model. The syntax is shown in the fenced block below.
|
|
163
|
+
|
|
164
|
+
This syntax is intentionally compatible with the convention established by [inline skill commands](https://x.com) for portable cross-agent skill authoring. Vellum adopts the exact same token format so that externally authored skills load without rewriting — but applies stricter execution constraints.
|
|
165
|
+
|
|
166
|
+
### Syntax
|
|
167
|
+
|
|
168
|
+
The canonical syntax is:
|
|
169
|
+
|
|
170
|
+
```
|
|
171
|
+
!`command`
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Where `command` is any shell command string. The exclamation mark immediately precedes the opening backtick with no whitespace in between. Examples:
|
|
175
|
+
|
|
176
|
+
```markdown
|
|
177
|
+
Current branch: !`git branch --show-current`
|
|
178
|
+
Recent changes: !`git log --oneline -5`
|
|
179
|
+
Project info: !`cat package.json | jq '.name, .version'`
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Tokens inside fenced code blocks (` ``` ` or `~~~`) are **not** expanded — they are treated as documentation examples. This allows skills to safely include syntax examples without triggering execution.
|
|
183
|
+
|
|
184
|
+
### Parsing rules
|
|
185
|
+
|
|
186
|
+
The parser (`parseInlineCommandExpansions`) enforces fail-closed semantics:
|
|
187
|
+
|
|
188
|
+
| Condition | Behavior |
|
|
189
|
+
| ------------------------------------------------- | ---------------------- |
|
|
190
|
+
| Well-formed token outside fenced code | Parsed as an expansion |
|
|
191
|
+
| Token inside a fenced code block | Skipped (not expanded) |
|
|
192
|
+
| Empty command text (no content between backticks) | Rejected as malformed |
|
|
193
|
+
| Whitespace-only command text | Rejected as malformed |
|
|
194
|
+
| Unmatched opening (no closing backtick found) | Rejected as malformed |
|
|
195
|
+
| Nested backticks inside command text | Rejected as malformed |
|
|
196
|
+
|
|
197
|
+
Malformed tokens do not silently pass through — they are collected as errors and logged. If a skill body contains any malformed tokens, the valid tokens are still expanded, but the errors are reported for diagnostics.
|
|
198
|
+
|
|
199
|
+
### Feature flag
|
|
200
|
+
|
|
201
|
+
Inline command expansion is gated by the `inline-skill-commands` feature flag (key: `feature_flags.inline-skill-commands.enabled`). The flag defaults to **enabled**.
|
|
202
|
+
|
|
203
|
+
When the flag is disabled and a skill contains inline command expansion tokens, `skill_load` returns an error rather than delivering unexpanded tokens to the model. This fail-closed behavior prevents the LLM from seeing raw expansion tokens and attempting to interpret them.
|
|
204
|
+
|
|
205
|
+
### Approval model
|
|
206
|
+
|
|
207
|
+
Skills with inline command expansions use a separate permission namespace: `skill_load_dynamic:*`. This ensures they do not silently inherit the permissive default `skill_load:*` allow rule.
|
|
208
|
+
|
|
209
|
+
When a user is prompted to approve a dynamic skill load, the allowlist options are:
|
|
210
|
+
|
|
211
|
+
| Option | Pattern | Behavior |
|
|
212
|
+
| -------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------- |
|
|
213
|
+
| Version-pinned | `skill_load_dynamic:<id>@<transitive-hash>` | Approved for this exact version only. Any change to the skill or its includes invalidates the rule. |
|
|
214
|
+
| Any-version | `skill_load_dynamic:<id>` | Approved for all versions of this skill. |
|
|
215
|
+
|
|
216
|
+
The transitive hash covers the skill's own content plus all included skills, so a change anywhere in the dependency graph triggers re-approval for version-pinned rules.
|
|
217
|
+
|
|
218
|
+
### v1 execution limits
|
|
219
|
+
|
|
220
|
+
In the initial implementation, inline command execution enforces these constraints:
|
|
221
|
+
|
|
222
|
+
| Constraint | Value |
|
|
223
|
+
| ---------------- | ------------------------------------------------------- |
|
|
224
|
+
| Execution target | Sandbox only (no host fallback) |
|
|
225
|
+
| Network access | Off (no outbound connections) |
|
|
226
|
+
| Environment | Sanitized (no API keys, tokens, or credentials) |
|
|
227
|
+
| Timeout | 10 seconds per command |
|
|
228
|
+
| Output cap | 20,000 characters (truncated with `[output truncated]`) |
|
|
229
|
+
| Binary output | Rejected if >10% non-printable characters |
|
|
230
|
+
| ANSI sequences | Stripped before output processing |
|
|
231
|
+
| stderr | Discarded (only stdout is captured) |
|
|
232
|
+
|
|
233
|
+
Commands that fail (timeout, non-zero exit, spawn failure, binary output) produce a deterministic stub in the rendered body rather than leaking raw error output:
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
<inline_skill_command index="0">[inline command unavailable: command timed out]</inline_skill_command>
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Eligible skill sources
|
|
240
|
+
|
|
241
|
+
Only **bundled**, **managed**, and **workspace** skills may use inline command expansions. Third-party **extra** skill sources are explicitly rejected — `skill_load` returns an error if an extra-source skill contains inline expansion tokens.
|
|
242
|
+
|
|
243
|
+
| Source | Eligible | Reason |
|
|
244
|
+
| ----------- | -------- | -------------------------------------- |
|
|
245
|
+
| `bundled` | Yes | Shipped with the application, trusted |
|
|
246
|
+
| `managed` | Yes | User-installed, subject to approval |
|
|
247
|
+
| `workspace` | Yes | Project-local, subject to approval |
|
|
248
|
+
| `extra` | No | Third-party roots, out of scope for v1 |
|
|
249
|
+
|
|
250
|
+
### Fail-closed summary
|
|
251
|
+
|
|
252
|
+
The system fails closed at every layer:
|
|
253
|
+
|
|
254
|
+
1. **Flag off** — skill_load returns an error, tokens never reach the model.
|
|
255
|
+
2. **Malformed syntax** — rejected by the parser, logged as errors.
|
|
256
|
+
3. **Unsupported source** — skill_load returns an error for extra-source skills.
|
|
257
|
+
4. **Command failure** — deterministic stub replaces the token, no raw stderr.
|
|
258
|
+
5. **No permission** — `skill_load_dynamic:*` namespace requires explicit approval.
|
package/package.json
CHANGED
|
@@ -332,6 +332,13 @@ mock.module("../memory/llm-request-log-store.js", () => ({
|
|
|
332
332
|
backfillMessageIdOnLogs: () => {},
|
|
333
333
|
}));
|
|
334
334
|
|
|
335
|
+
mock.module("../memory/archive-store.js", () => ({
|
|
336
|
+
insertCompactionEpisode: () => ({
|
|
337
|
+
episodeId: "mock-episode-id",
|
|
338
|
+
jobId: "mock-job-id",
|
|
339
|
+
}),
|
|
340
|
+
}));
|
|
341
|
+
|
|
335
342
|
// ── Imports (after mocks) ────────────────────────────────────────────
|
|
336
343
|
|
|
337
344
|
import {
|
|
@@ -315,6 +315,13 @@ mock.module("../agent/message-types.js", () => ({
|
|
|
315
315
|
}),
|
|
316
316
|
}));
|
|
317
317
|
|
|
318
|
+
mock.module("../memory/archive-store.js", () => ({
|
|
319
|
+
insertCompactionEpisode: () => ({
|
|
320
|
+
episodeId: "mock-episode-id",
|
|
321
|
+
jobId: "mock-job-id",
|
|
322
|
+
}),
|
|
323
|
+
}));
|
|
324
|
+
|
|
318
325
|
mock.module("../memory/llm-request-log-store.js", () => ({
|
|
319
326
|
recordRequestLog: recordRequestLogMock,
|
|
320
327
|
backfillMessageIdOnLogs: () => {},
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
5
|
+
|
|
6
|
+
const testDir = mkdtempSync(join(tmpdir(), "conv-dirty-tail-test-"));
|
|
7
|
+
|
|
8
|
+
mock.module("../util/platform.js", () => ({
|
|
9
|
+
getDataDir: () => testDir,
|
|
10
|
+
isMacOS: () => process.platform === "darwin",
|
|
11
|
+
isLinux: () => process.platform === "linux",
|
|
12
|
+
isWindows: () => process.platform === "win32",
|
|
13
|
+
getPidPath: () => join(testDir, "test.pid"),
|
|
14
|
+
getDbPath: () => join(testDir, "test.db"),
|
|
15
|
+
getLogPath: () => join(testDir, "test.log"),
|
|
16
|
+
ensureDataDir: () => {},
|
|
17
|
+
}));
|
|
18
|
+
|
|
19
|
+
mock.module("../util/logger.js", () => ({
|
|
20
|
+
getLogger: () =>
|
|
21
|
+
new Proxy({} as Record<string, unknown>, {
|
|
22
|
+
get: () => () => {},
|
|
23
|
+
}),
|
|
24
|
+
}));
|
|
25
|
+
|
|
26
|
+
import {
|
|
27
|
+
addMessage,
|
|
28
|
+
createConversation,
|
|
29
|
+
getConversation,
|
|
30
|
+
getMessages,
|
|
31
|
+
markConversationMemoryDirty,
|
|
32
|
+
} from "../memory/conversation-crud.js";
|
|
33
|
+
import { getDb, initializeDb, resetDb } from "../memory/db.js";
|
|
34
|
+
|
|
35
|
+
initializeDb();
|
|
36
|
+
|
|
37
|
+
afterAll(() => {
|
|
38
|
+
resetDb();
|
|
39
|
+
try {
|
|
40
|
+
rmSync(testDir, { recursive: true });
|
|
41
|
+
} catch {
|
|
42
|
+
/* best effort */
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
describe("markConversationMemoryDirty", () => {
|
|
47
|
+
beforeEach(() => {
|
|
48
|
+
const db = getDb();
|
|
49
|
+
db.run(`DELETE FROM messages`);
|
|
50
|
+
db.run(`DELETE FROM conversations`);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("first message marks the conversation dirty with its message ID", async () => {
|
|
54
|
+
const conv = createConversation("test");
|
|
55
|
+
const msg = await addMessage(conv.id, "user", "hello world", undefined, {
|
|
56
|
+
skipIndexing: true,
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const updated = getConversation(conv.id);
|
|
60
|
+
expect(updated).not.toBeNull();
|
|
61
|
+
expect(updated!.memoryDirtyTailSinceMessageId).toBe(msg.id);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("repeated messages preserve the original dirty boundary", async () => {
|
|
65
|
+
const conv = createConversation("test");
|
|
66
|
+
const msg1 = await addMessage(conv.id, "user", "first message", undefined, {
|
|
67
|
+
skipIndexing: true,
|
|
68
|
+
});
|
|
69
|
+
const msg2 = await addMessage(
|
|
70
|
+
conv.id,
|
|
71
|
+
"assistant",
|
|
72
|
+
"second message",
|
|
73
|
+
undefined,
|
|
74
|
+
{ skipIndexing: true },
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
const updated = getConversation(conv.id);
|
|
78
|
+
expect(updated).not.toBeNull();
|
|
79
|
+
// The dirty tail should still point to msg1, not msg2.
|
|
80
|
+
expect(updated!.memoryDirtyTailSinceMessageId).toBe(msg1.id);
|
|
81
|
+
// msg2 should still be persisted normally.
|
|
82
|
+
expect(msg2.id).not.toBe(msg1.id);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test("markConversationMemoryDirty is a no-op when already dirty", () => {
|
|
86
|
+
const conv = createConversation("test");
|
|
87
|
+
const firstMessageId = "first-msg-id";
|
|
88
|
+
const secondMessageId = "second-msg-id";
|
|
89
|
+
|
|
90
|
+
markConversationMemoryDirty(conv.id, firstMessageId);
|
|
91
|
+
const after1 = getConversation(conv.id);
|
|
92
|
+
expect(after1!.memoryDirtyTailSinceMessageId).toBe(firstMessageId);
|
|
93
|
+
|
|
94
|
+
markConversationMemoryDirty(conv.id, secondMessageId);
|
|
95
|
+
const after2 = getConversation(conv.id);
|
|
96
|
+
// Still points to the first message — boundary preserved.
|
|
97
|
+
expect(after2!.memoryDirtyTailSinceMessageId).toBe(firstMessageId);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test("message ordering and persistence semantics are unchanged", async () => {
|
|
101
|
+
const conv = createConversation("test");
|
|
102
|
+
const msg1 = await addMessage(conv.id, "user", "question", undefined, {
|
|
103
|
+
skipIndexing: true,
|
|
104
|
+
});
|
|
105
|
+
const msg2 = await addMessage(conv.id, "assistant", "answer", undefined, {
|
|
106
|
+
skipIndexing: true,
|
|
107
|
+
});
|
|
108
|
+
const msg3 = await addMessage(conv.id, "user", "follow-up", undefined, {
|
|
109
|
+
skipIndexing: true,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const allMessages = getMessages(conv.id);
|
|
113
|
+
expect(allMessages).toHaveLength(3);
|
|
114
|
+
// Messages are ordered by createdAt ascending.
|
|
115
|
+
expect(allMessages[0].id).toBe(msg1.id);
|
|
116
|
+
expect(allMessages[1].id).toBe(msg2.id);
|
|
117
|
+
expect(allMessages[2].id).toBe(msg3.id);
|
|
118
|
+
expect(allMessages[0].content).toBe("question");
|
|
119
|
+
expect(allMessages[1].content).toBe("answer");
|
|
120
|
+
expect(allMessages[2].content).toBe("follow-up");
|
|
121
|
+
// createdAt is monotonically increasing.
|
|
122
|
+
expect(allMessages[1].createdAt).toBeGreaterThan(allMessages[0].createdAt);
|
|
123
|
+
expect(allMessages[2].createdAt).toBeGreaterThan(allMessages[1].createdAt);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("every persisted message marks the conversation dirty", async () => {
|
|
127
|
+
const conv = createConversation("test");
|
|
128
|
+
|
|
129
|
+
// Before any messages, the conversation is not dirty.
|
|
130
|
+
const before = getConversation(conv.id);
|
|
131
|
+
expect(before!.memoryDirtyTailSinceMessageId).toBeNull();
|
|
132
|
+
|
|
133
|
+
// After the first message, it becomes dirty.
|
|
134
|
+
const msg1 = await addMessage(conv.id, "user", "msg1", undefined, {
|
|
135
|
+
skipIndexing: true,
|
|
136
|
+
});
|
|
137
|
+
const after1 = getConversation(conv.id);
|
|
138
|
+
expect(after1!.memoryDirtyTailSinceMessageId).toBe(msg1.id);
|
|
139
|
+
|
|
140
|
+
// After subsequent messages, the dirty boundary stays on msg1.
|
|
141
|
+
await addMessage(conv.id, "assistant", "msg2", undefined, {
|
|
142
|
+
skipIndexing: true,
|
|
143
|
+
});
|
|
144
|
+
await addMessage(conv.id, "user", "msg3", undefined, {
|
|
145
|
+
skipIndexing: true,
|
|
146
|
+
});
|
|
147
|
+
const afterAll = getConversation(conv.id);
|
|
148
|
+
expect(afterAll!.memoryDirtyTailSinceMessageId).toBe(msg1.id);
|
|
149
|
+
});
|
|
150
|
+
});
|
|
@@ -27,6 +27,9 @@ mock.module("../providers/registry.js", () => ({
|
|
|
27
27
|
mock.module("../config/loader.js", () => ({
|
|
28
28
|
getConfig: () => ({
|
|
29
29
|
ui: {},
|
|
30
|
+
daemon: {
|
|
31
|
+
titleGenerationMaxTokens: 30,
|
|
32
|
+
},
|
|
30
33
|
|
|
31
34
|
provider: "mock-provider",
|
|
32
35
|
maxTokens: 4096,
|
|
@@ -174,6 +177,10 @@ mock.module("../memory/conversation-queries.js", () => ({
|
|
|
174
177
|
listConversations: () => [],
|
|
175
178
|
}));
|
|
176
179
|
|
|
180
|
+
mock.module("../memory/archive-store.js", () => ({
|
|
181
|
+
insertCompactionEpisode: () => {},
|
|
182
|
+
}));
|
|
183
|
+
|
|
177
184
|
mock.module("../memory/retriever.js", () => ({
|
|
178
185
|
buildMemoryRecall: async () => ({
|
|
179
186
|
enabled: false,
|
|
@@ -26,6 +26,7 @@ mock.module("../util/logger.js", () => ({
|
|
|
26
26
|
import {
|
|
27
27
|
addMessage,
|
|
28
28
|
createConversation,
|
|
29
|
+
deleteConversation,
|
|
29
30
|
getConversation,
|
|
30
31
|
getMessages,
|
|
31
32
|
wipeConversation,
|
|
@@ -436,3 +437,228 @@ describe("wipeConversation", () => {
|
|
|
436
437
|
expect(itemBRow).not.toBeNull();
|
|
437
438
|
});
|
|
438
439
|
});
|
|
440
|
+
|
|
441
|
+
describe("deleteConversation — private scope cleanup", () => {
|
|
442
|
+
beforeEach(() => {
|
|
443
|
+
const db = getDb();
|
|
444
|
+
db.run(`DELETE FROM conversation_starters`);
|
|
445
|
+
db.run(`DELETE FROM memory_item_sources`);
|
|
446
|
+
db.run(`DELETE FROM memory_segments`);
|
|
447
|
+
db.run(`DELETE FROM memory_items`);
|
|
448
|
+
db.run(`DELETE FROM memory_summaries`);
|
|
449
|
+
db.run(`DELETE FROM memory_embeddings`);
|
|
450
|
+
db.run(`DELETE FROM memory_jobs`);
|
|
451
|
+
db.run(`DELETE FROM tool_invocations`);
|
|
452
|
+
db.run(`DELETE FROM llm_request_logs`);
|
|
453
|
+
db.run(`DELETE FROM messages`);
|
|
454
|
+
db.run(`DELETE FROM conversations`);
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
test("sourceless items cleaned up", () => {
|
|
458
|
+
const conv = createConversation({ conversationType: "private" });
|
|
459
|
+
const scopeId = conv.memoryScopeId;
|
|
460
|
+
const now = Date.now();
|
|
461
|
+
|
|
462
|
+
const raw = (
|
|
463
|
+
getDb() as unknown as {
|
|
464
|
+
$client: import("bun:sqlite").Database;
|
|
465
|
+
}
|
|
466
|
+
).$client;
|
|
467
|
+
|
|
468
|
+
// Insert a memory item with matching scopeId but no memory_item_sources
|
|
469
|
+
raw
|
|
470
|
+
.query(
|
|
471
|
+
`INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
|
|
472
|
+
VALUES ('priv-item-1', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-priv-1', ?, ?, ?)`,
|
|
473
|
+
)
|
|
474
|
+
.run(scopeId, now, now);
|
|
475
|
+
|
|
476
|
+
const result = deleteConversation(conv.id);
|
|
477
|
+
|
|
478
|
+
// Item should be gone
|
|
479
|
+
const itemRow = raw
|
|
480
|
+
.query("SELECT * FROM memory_items WHERE id = 'priv-item-1'")
|
|
481
|
+
.get();
|
|
482
|
+
expect(itemRow).toBeNull();
|
|
483
|
+
|
|
484
|
+
// Its ID should be in orphanedItemIds
|
|
485
|
+
expect(result.orphanedItemIds).toContain("priv-item-1");
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
test("summaries cleaned up", () => {
|
|
489
|
+
const conv = createConversation({ conversationType: "private" });
|
|
490
|
+
const scopeId = conv.memoryScopeId;
|
|
491
|
+
const now = Date.now();
|
|
492
|
+
|
|
493
|
+
const raw = (
|
|
494
|
+
getDb() as unknown as {
|
|
495
|
+
$client: import("bun:sqlite").Database;
|
|
496
|
+
}
|
|
497
|
+
).$client;
|
|
498
|
+
|
|
499
|
+
// Insert a memory summary with matching scopeId
|
|
500
|
+
raw
|
|
501
|
+
.query(
|
|
502
|
+
`INSERT INTO memory_summaries (id, scope, scope_key, summary, token_estimate, version, scope_id, start_at, end_at, created_at, updated_at)
|
|
503
|
+
VALUES ('priv-sum-1', 'global', 'all', 'private summary', 100, 1, ?, ?, ?, ?, ?)`,
|
|
504
|
+
)
|
|
505
|
+
.run(scopeId, now, now, now, now);
|
|
506
|
+
|
|
507
|
+
const result = deleteConversation(conv.id);
|
|
508
|
+
|
|
509
|
+
// Summary should be gone
|
|
510
|
+
const summaryRow = raw
|
|
511
|
+
.query("SELECT * FROM memory_summaries WHERE id = 'priv-sum-1'")
|
|
512
|
+
.get();
|
|
513
|
+
expect(summaryRow).toBeNull();
|
|
514
|
+
|
|
515
|
+
// Its ID should be in deletedSummaryIds
|
|
516
|
+
expect(result.deletedSummaryIds).toContain("priv-sum-1");
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
test("standard conversations unaffected", async () => {
|
|
520
|
+
const conv = createConversation("standard test");
|
|
521
|
+
const now = Date.now();
|
|
522
|
+
|
|
523
|
+
const raw = (
|
|
524
|
+
getDb() as unknown as {
|
|
525
|
+
$client: import("bun:sqlite").Database;
|
|
526
|
+
}
|
|
527
|
+
).$client;
|
|
528
|
+
|
|
529
|
+
// Insert items with scopeId = "default"
|
|
530
|
+
raw
|
|
531
|
+
.query(
|
|
532
|
+
`INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
|
|
533
|
+
VALUES ('default-item-1', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-default', 'default', ?, ?)`,
|
|
534
|
+
)
|
|
535
|
+
.run(now, now);
|
|
536
|
+
|
|
537
|
+
deleteConversation(conv.id);
|
|
538
|
+
|
|
539
|
+
// Default-scope items should still exist
|
|
540
|
+
const itemRow = raw
|
|
541
|
+
.query("SELECT * FROM memory_items WHERE id = 'default-item-1'")
|
|
542
|
+
.get();
|
|
543
|
+
expect(itemRow).not.toBeNull();
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
test("embeddings cleaned up", () => {
|
|
547
|
+
const conv = createConversation({ conversationType: "private" });
|
|
548
|
+
const scopeId = conv.memoryScopeId;
|
|
549
|
+
const now = Date.now();
|
|
550
|
+
|
|
551
|
+
const raw = (
|
|
552
|
+
getDb() as unknown as {
|
|
553
|
+
$client: import("bun:sqlite").Database;
|
|
554
|
+
}
|
|
555
|
+
).$client;
|
|
556
|
+
|
|
557
|
+
// Insert a memory item with matching scopeId
|
|
558
|
+
raw
|
|
559
|
+
.query(
|
|
560
|
+
`INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
|
|
561
|
+
VALUES ('priv-item-emb', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-priv-emb', ?, ?, ?)`,
|
|
562
|
+
)
|
|
563
|
+
.run(scopeId, now, now);
|
|
564
|
+
|
|
565
|
+
// Insert a corresponding embedding
|
|
566
|
+
raw
|
|
567
|
+
.query(
|
|
568
|
+
`INSERT INTO memory_embeddings (id, target_type, target_id, provider, model, dimensions, created_at, updated_at)
|
|
569
|
+
VALUES ('emb-priv-item', 'item', 'priv-item-emb', 'test', 'test', 384, ?, ?)`,
|
|
570
|
+
)
|
|
571
|
+
.run(now, now);
|
|
572
|
+
|
|
573
|
+
deleteConversation(conv.id);
|
|
574
|
+
|
|
575
|
+
// Both item and embedding should be deleted
|
|
576
|
+
const itemRow = raw
|
|
577
|
+
.query("SELECT * FROM memory_items WHERE id = 'priv-item-emb'")
|
|
578
|
+
.get();
|
|
579
|
+
expect(itemRow).toBeNull();
|
|
580
|
+
|
|
581
|
+
const embeddingRow = raw
|
|
582
|
+
.query("SELECT * FROM memory_embeddings WHERE id = 'emb-priv-item'")
|
|
583
|
+
.get();
|
|
584
|
+
expect(embeddingRow).toBeNull();
|
|
585
|
+
});
|
|
586
|
+
|
|
587
|
+
test("conversationStarters cleaned up", () => {
|
|
588
|
+
const conv = createConversation({ conversationType: "private" });
|
|
589
|
+
const scopeId = conv.memoryScopeId;
|
|
590
|
+
const now = Date.now();
|
|
591
|
+
|
|
592
|
+
const raw = (
|
|
593
|
+
getDb() as unknown as {
|
|
594
|
+
$client: import("bun:sqlite").Database;
|
|
595
|
+
}
|
|
596
|
+
).$client;
|
|
597
|
+
|
|
598
|
+
// Insert a conversation_starters row with the private scopeId
|
|
599
|
+
raw
|
|
600
|
+
.query(
|
|
601
|
+
`INSERT INTO conversation_starters (id, label, prompt, generation_batch, scope_id, card_type, created_at)
|
|
602
|
+
VALUES ('starter-1', 'Test starter', 'Tell me about tests', 1, ?, 'chip', ?)`,
|
|
603
|
+
)
|
|
604
|
+
.run(scopeId, now);
|
|
605
|
+
|
|
606
|
+
// Also insert a default-scope starter that should NOT be deleted
|
|
607
|
+
raw
|
|
608
|
+
.query(
|
|
609
|
+
`INSERT INTO conversation_starters (id, label, prompt, generation_batch, scope_id, card_type, created_at)
|
|
610
|
+
VALUES ('starter-default', 'Default starter', 'Hello', 1, 'default', 'chip', ?)`,
|
|
611
|
+
)
|
|
612
|
+
.run(now);
|
|
613
|
+
|
|
614
|
+
deleteConversation(conv.id);
|
|
615
|
+
|
|
616
|
+
// Private-scope starter should be gone
|
|
617
|
+
const starterRow = raw
|
|
618
|
+
.query("SELECT * FROM conversation_starters WHERE id = 'starter-1'")
|
|
619
|
+
.get();
|
|
620
|
+
expect(starterRow).toBeNull();
|
|
621
|
+
|
|
622
|
+
// Default-scope starter should still exist
|
|
623
|
+
const defaultStarterRow = raw
|
|
624
|
+
.query("SELECT * FROM conversation_starters WHERE id = 'starter-default'")
|
|
625
|
+
.get();
|
|
626
|
+
expect(defaultStarterRow).not.toBeNull();
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
test("no duplicate IDs", async () => {
|
|
630
|
+
const conv = createConversation({ conversationType: "private" });
|
|
631
|
+
const scopeId = conv.memoryScopeId;
|
|
632
|
+
const msg = await addMessage(conv.id, "user", "hello");
|
|
633
|
+
const now = Date.now();
|
|
634
|
+
|
|
635
|
+
const raw = (
|
|
636
|
+
getDb() as unknown as {
|
|
637
|
+
$client: import("bun:sqlite").Database;
|
|
638
|
+
}
|
|
639
|
+
).$client;
|
|
640
|
+
|
|
641
|
+
// Insert a memory item with the private scopeId AND a source linking to the message
|
|
642
|
+
raw
|
|
643
|
+
.query(
|
|
644
|
+
`INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
|
|
645
|
+
VALUES ('priv-item-dup', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-priv-dup', ?, ?, ?)`,
|
|
646
|
+
)
|
|
647
|
+
.run(scopeId, now, now);
|
|
648
|
+
|
|
649
|
+
raw
|
|
650
|
+
.query(
|
|
651
|
+
`INSERT INTO memory_item_sources (memory_item_id, message_id, created_at) VALUES ('priv-item-dup', ?, ?)`,
|
|
652
|
+
)
|
|
653
|
+
.run(msg.id, now);
|
|
654
|
+
|
|
655
|
+
const result = deleteConversation(conv.id);
|
|
656
|
+
|
|
657
|
+
// The item ID should appear exactly once in orphanedItemIds (caught by
|
|
658
|
+
// source-based cleanup, not double-counted by scope sweep).
|
|
659
|
+
const count = result.orphanedItemIds.filter(
|
|
660
|
+
(id) => id === "priv-item-dup",
|
|
661
|
+
).length;
|
|
662
|
+
expect(count).toBe(1);
|
|
663
|
+
});
|
|
664
|
+
});
|