@vellumai/assistant 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/ARCHITECTURE.md +109 -0
  2. package/docs/skills.md +100 -0
  3. package/package.json +1 -1
  4. package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
  5. package/src/__tests__/conversation-agent-loop.test.ts +7 -0
  6. package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
  7. package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
  8. package/src/__tests__/conversation-wipe.test.ts +226 -0
  9. package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
  10. package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
  11. package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
  12. package/src/__tests__/inline-command-runner.test.ts +311 -0
  13. package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
  14. package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
  15. package/src/__tests__/list-messages-attachments.test.ts +96 -0
  16. package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
  17. package/src/__tests__/memory-brief-time.test.ts +285 -0
  18. package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
  19. package/src/__tests__/memory-chunk-archive.test.ts +400 -0
  20. package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
  21. package/src/__tests__/memory-episode-archive.test.ts +370 -0
  22. package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
  23. package/src/__tests__/memory-observation-archive.test.ts +375 -0
  24. package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
  25. package/src/__tests__/memory-recall-quality.test.ts +2 -2
  26. package/src/__tests__/memory-reducer-store.test.ts +728 -0
  27. package/src/__tests__/memory-reducer-types.test.ts +699 -0
  28. package/src/__tests__/memory-reducer.test.ts +698 -0
  29. package/src/__tests__/memory-regressions.test.ts +6 -4
  30. package/src/__tests__/memory-simplified-config.test.ts +281 -0
  31. package/src/__tests__/parse-identity-fields.test.ts +129 -0
  32. package/src/__tests__/skill-load-inline-command.test.ts +598 -0
  33. package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
  34. package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
  35. package/src/__tests__/skills-transitive-hash.test.ts +333 -0
  36. package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
  37. package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
  38. package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
  39. package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
  40. package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
  41. package/src/config/feature-flag-registry.json +16 -0
  42. package/src/config/loader.ts +1 -0
  43. package/src/config/raw-config-utils.ts +28 -0
  44. package/src/config/schema.ts +12 -0
  45. package/src/config/schemas/memory-simplified.ts +101 -0
  46. package/src/config/schemas/memory.ts +4 -0
  47. package/src/config/skills.ts +50 -4
  48. package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
  49. package/src/daemon/conversation-agent-loop.ts +71 -1
  50. package/src/daemon/conversation-lifecycle.ts +11 -1
  51. package/src/daemon/conversation-runtime-assembly.ts +2 -1
  52. package/src/daemon/conversation-surfaces.ts +31 -8
  53. package/src/daemon/conversation.ts +40 -23
  54. package/src/daemon/handlers/config-embeddings.ts +10 -2
  55. package/src/daemon/handlers/config-model.ts +0 -9
  56. package/src/daemon/handlers/identity.ts +12 -1
  57. package/src/daemon/lifecycle.ts +9 -1
  58. package/src/daemon/message-types/conversations.ts +0 -1
  59. package/src/daemon/server.ts +1 -1
  60. package/src/followups/followup-store.ts +47 -1
  61. package/src/memory/archive-store.ts +400 -0
  62. package/src/memory/brief-formatting.ts +33 -0
  63. package/src/memory/brief-open-loops.ts +266 -0
  64. package/src/memory/brief-time.ts +161 -0
  65. package/src/memory/brief.ts +75 -0
  66. package/src/memory/conversation-crud.ts +245 -101
  67. package/src/memory/db-init.ts +12 -0
  68. package/src/memory/indexer.ts +106 -15
  69. package/src/memory/job-handlers/embedding.test.ts +1 -0
  70. package/src/memory/job-handlers/embedding.ts +83 -0
  71. package/src/memory/job-utils.ts +1 -1
  72. package/src/memory/jobs-store.ts +6 -0
  73. package/src/memory/jobs-worker.ts +12 -0
  74. package/src/memory/migrations/185-memory-brief-state.ts +52 -0
  75. package/src/memory/migrations/186-memory-archive.ts +109 -0
  76. package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
  77. package/src/memory/migrations/index.ts +3 -0
  78. package/src/memory/qdrant-client.ts +23 -4
  79. package/src/memory/reducer-store.ts +271 -0
  80. package/src/memory/reducer-types.ts +99 -0
  81. package/src/memory/reducer.ts +453 -0
  82. package/src/memory/schema/conversations.ts +3 -0
  83. package/src/memory/schema/index.ts +2 -0
  84. package/src/memory/schema/memory-archive.ts +121 -0
  85. package/src/memory/schema/memory-brief.ts +55 -0
  86. package/src/memory/search/semantic.ts +17 -4
  87. package/src/oauth/oauth-store.ts +3 -1
  88. package/src/permissions/checker.ts +89 -6
  89. package/src/permissions/defaults.ts +14 -0
  90. package/src/runtime/routes/conversation-management-routes.ts +6 -0
  91. package/src/runtime/routes/conversation-query-routes.ts +7 -0
  92. package/src/runtime/routes/conversation-routes.ts +52 -5
  93. package/src/runtime/routes/identity-routes.ts +2 -35
  94. package/src/runtime/routes/llm-context-normalization.ts +14 -1
  95. package/src/runtime/routes/memory-item-routes.ts +90 -5
  96. package/src/runtime/routes/secret-routes.ts +2 -0
  97. package/src/runtime/routes/surface-action-routes.ts +68 -1
  98. package/src/schedule/schedule-store.ts +21 -0
  99. package/src/skills/inline-command-expansions.ts +204 -0
  100. package/src/skills/inline-command-render.ts +127 -0
  101. package/src/skills/inline-command-runner.ts +242 -0
  102. package/src/skills/transitive-version-hash.ts +88 -0
  103. package/src/tasks/task-store.ts +43 -1
  104. package/src/tools/permission-checker.ts +8 -1
  105. package/src/tools/skills/load.ts +140 -6
  106. package/src/util/platform.ts +18 -0
  107. package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
  108. package/src/workspace/migrations/registry.ts +1 -1
package/ARCHITECTURE.md CHANGED
@@ -1261,6 +1261,115 @@ graph TB
1261
1261
  TRUST -->|"Deny rule matches"| DENY["Blocked"]
1262
1262
  ```
1263
1263
 
1264
+ ### Inline Skill Command Expansion
1265
+
1266
+ Skills can embed dynamic shell output in their SKILL.md body using `!`command``tokens. When`skill_load` processes a skill containing these tokens, the commands are executed at load time through a sandboxed runner and their output is substituted inline. This enables externally authored skills to include project-specific context (e.g., directory listings, config values) without requiring manual edits.
1267
+
1268
+ **Feature flag:** `feature_flags.inline-skill-commands.enabled` (default: enabled). When disabled, loading a skill that contains `!`command`` tokens fails closed with an error rather than leaving raw tokens in the prompt.
1269
+
1270
+ #### Syntax and Parsing
1271
+
1272
+ The `!`command``syntax is parsed by`parseInlineCommandExpansions()` from the SKILL.md body after frontmatter extraction. The parser:
1273
+
1274
+ - Extracts all `!`command`` tokens outside fenced code blocks (documentation examples in fenced blocks are ignored)
1275
+ - Assigns each token a stable `placeholderId` (0-indexed encounter order)
1276
+ - Rejects malformed tokens fail-closed: empty commands, nested backticks, and unmatched opening backticks produce `InlineCommandExpansionError` entries rather than best-effort expansions
1277
+
1278
+ #### Transitive Version Hash
1279
+
1280
+ When a skill contains inline command expansions, the permission system computes a **transitive version hash** (`tv1:<sha256>`) that covers the root skill and all its included children (DFS pre-order). The hash folds:
1281
+
1282
+ 1. Each visited skill ID (graph structure)
1283
+ 2. Each visited skill's directory content hash (file changes)
1284
+
1285
+ Editing any file in the root skill or any included child invalidates the transitive hash, which forces re-approval. The hash is computed by `computeTransitiveSkillVersionHash()` and fails closed (`TransitiveHashError`) on missing children or cycles in the include graph.
1286
+
1287
+ #### Permission Gating (`skill_load_dynamic:*`)
1288
+
1289
+ Skills containing inline command expansions use a separate permission candidate namespace (`skill_load_dynamic:*`) instead of the normal `skill_load:*` namespace. This prevents them from falling through to the permissive default `skill_load:*` allow rule. The permission checker emits candidates in specificity order:
1290
+
1291
+ 1. `skill_load_dynamic:<skill-id>@<transitive-hash>` — version-pinned approval (most specific)
1292
+ 2. `skill_load_dynamic:<skill-id>` — any-version approval
1293
+
1294
+ A default ask rule at priority 200 (`default:ask-skill_load_dynamic-global`) catches these candidates, ensuring the guardian is always prompted before inline commands execute. The user can create a pinned trust rule for a specific transitive hash to auto-approve known-good versions. Non-interactive sessions (no human present) deny dynamic skill loads rather than silently auto-approving.
1295
+
1296
+ ```mermaid
1297
+ graph TB
1298
+ LOAD["skill_load(selector)"] --> PARSE["Parse SKILL.md body"]
1299
+ PARSE --> CHECK{"Has !\x60command\x60<br/>tokens?"}
1300
+ CHECK -->|"No"| NORMAL["Normal skill_load:* candidate<br/>(auto-allowed)"]
1301
+ CHECK -->|"Yes"| FLAG{"inline-skill-commands<br/>flag enabled?"}
1302
+ FLAG -->|"No"| FAIL_FLAG["Fail closed:<br/>error returned"]
1303
+ FLAG -->|"Yes"| SOURCE{"Eligible source?<br/>(bundled/managed/workspace)"}
1304
+ SOURCE -->|"No (extra)"| FAIL_SOURCE["Fail closed:<br/>source not eligible"]
1305
+ SOURCE -->|"Yes"| HASH["Compute transitive hash"]
1306
+ HASH --> DYN["skill_load_dynamic:id@hash<br/>candidate emitted"]
1307
+ DYN --> PERM["PermissionChecker"]
1308
+ PERM --> RULE{"Trust rule?"}
1309
+ RULE -->|"Pinned allow"| RENDER["Execute + render"]
1310
+ RULE -->|"No rule"| PROMPT["Prompt guardian"]
1311
+ RULE -->|"Deny"| DENY["Blocked"]
1312
+ ```
1313
+
1314
+ #### Sandbox-Only Execution
1315
+
1316
+ Inline commands are executed through `runInlineCommand()`, a purpose-built sandbox runner with strict security constraints:
1317
+
1318
+ - **Sandbox enforced**: The sandbox is always enabled with `networkMode: "off"` — no outbound network connections
1319
+ - **Sanitized environment**: Uses `buildSanitizedEnv()` — no API keys, tokens, credentials, gateway URLs, or workspace paths in the environment
1320
+ - **No host fallback**: Unlike the general `bash` tool, there is no fallback to host execution when the sandbox is unavailable
1321
+ - **No credential proxy**: No CES client, no credential materialization
1322
+ - **Timeout**: 10-second wall-clock limit (killed with SIGKILL on timeout)
1323
+ - **Output cap**: 20,000 characters maximum (truncated with `[output truncated]` marker)
1324
+ - **Binary rejection**: Output with >10% non-printable characters (after ANSI stripping) is rejected
1325
+ - **Stdout only**: stderr is discarded; ANSI escape sequences are stripped from stdout
1326
+
1327
+ The runner returns a deterministic `InlineCommandResult` with machine-readable failure reasons (`timeout`, `non_zero_exit`, `binary_output`, `spawn_failure`) — raw stderr is never surfaced.
1328
+
1329
+ #### Rendering Flow
1330
+
1331
+ The `renderInlineCommands()` function processes expansions sequentially (not in parallel) to maintain deterministic order. Each `!`command`` token is replaced with an XML-wrapped result:
1332
+
1333
+ - **Success**: `<inline_skill_command index="N">...output...</inline_skill_command>`
1334
+ - **Failure**: `<inline_skill_command index="N">[inline command unavailable: <reason>]</inline_skill_command>`
1335
+
1336
+ Rendering applies at two levels during `skill_load`:
1337
+
1338
+ 1. **Root skill**: If the loaded skill has inline expansions, they are rendered before the skill body is emitted. A root skill with inline commands that fail the feature-flag or source-eligibility check returns an error (fail closed, no `<loaded_skill>` marker).
1339
+ 2. **Included children**: Each included child skill's body is rendered independently. A render failure in one child does not prevent sibling rendering — the failed child's body falls back to raw (unexpanded) text with a warning log.
1340
+
1341
+ #### v1 Source Restriction
1342
+
1343
+ In the initial release, only skills from **bundled**, **managed**, and **workspace** sources are eligible for inline command expansion. Skills from **extra** (third-party) roots are explicitly rejected with an error message. The `INLINE_COMMAND_ELIGIBLE_SOURCES` set in `load.ts` enforces this restriction. Unknown or future source types also fail closed.
1344
+
1345
+ #### Fail-Closed Behavior Summary
1346
+
1347
+ Every layer in the pipeline defaults to rejection rather than silent degradation:
1348
+
1349
+ | Layer | Failure mode | Behavior |
1350
+ | ---------------- | ---------------------------------------------------- | ------------------------------------------------------ |
1351
+ | Parser | Malformed token (empty, nested backtick, unmatched) | Logged as error, not expanded |
1352
+ | Feature flag | Flag disabled | `skill_load` returns error, no `<loaded_skill>` marker |
1353
+ | Source check | `extra` or unknown source | `skill_load` returns error, no `<loaded_skill>` marker |
1354
+ | Transitive hash | Missing child or cycle in include graph | `TransitiveHashError` thrown, permission check fails |
1355
+ | Permission | No trust rule and non-interactive | Denied (never silently auto-approved) |
1356
+ | Sandbox runner | Timeout, non-zero exit, binary output, spawn failure | Deterministic stub rendered, no raw stderr |
1357
+ | Renderer (root) | Feature flag off or ineligible source | Error returned from `skill_load` |
1358
+ | Renderer (child) | Exception during render | Raw body used, sibling rendering continues |
1359
+
1360
+ #### Key Source Files
1361
+
1362
+ | File | Role |
1363
+ | --------------------------------------------------- | -------------------------------------------------------------------------------- |
1364
+ | `assistant/src/skills/inline-command-expansions.ts` | `parseInlineCommandExpansions()` — parser for `!`command`` tokens |
1365
+ | `assistant/src/skills/inline-command-runner.ts` | `runInlineCommand()` — sandbox-only command executor |
1366
+ | `assistant/src/skills/inline-command-render.ts` | `renderInlineCommands()` — token replacement and XML wrapping |
1367
+ | `assistant/src/skills/transitive-version-hash.ts` | `computeTransitiveSkillVersionHash()` — hash covering root + included children |
1368
+ | `assistant/src/tools/skills/load.ts` | `skill_load` execute path — feature flag check, source check, render integration |
1369
+ | `assistant/src/permissions/checker.ts` | `skill_load_dynamic:*` candidate emission and allowlist options |
1370
+ | `assistant/src/permissions/defaults.ts` | `default:ask-skill_load_dynamic-global` rule (priority 200) |
1371
+ | `meta/feature-flags/feature-flag-registry.json` | `inline-skill-commands` flag definition |
1372
+
1264
1373
  ### Key Source Files
1265
1374
 
1266
1375
  | File | Role |
package/docs/skills.md CHANGED
@@ -156,3 +156,103 @@ Trust rules are stored in `~/.vellum/protected/trust.json`. You can inspect this
156
156
  ### "A skill tool keeps prompting even though I approved it."
157
157
 
158
158
  Check whether the rule has the correct `executionTarget` — a rule scoped to `sandbox` will not match a tool running on `host`.
159
+
160
+ ## Inline Command Expansions
161
+
162
+ Skills can embed dynamic content by using the **inline command expansion** syntax. When a skill containing these tokens is loaded, each token is executed and replaced with its output before the skill body is delivered to the model. The syntax is shown in the fenced block below.
163
+
164
+ This syntax is intentionally compatible with the convention established by [inline skill commands](https://x.com) for portable cross-agent skill authoring. Vellum adopts the exact same token format so that externally authored skills load without rewriting — but applies stricter execution constraints.
165
+
166
+ ### Syntax
167
+
168
+ The canonical syntax is:
169
+
170
+ ```
171
+ !`command`
172
+ ```
173
+
174
+ Where `command` is any shell command string. The exclamation mark immediately precedes the opening backtick with no whitespace in between. Examples:
175
+
176
+ ```markdown
177
+ Current branch: !`git branch --show-current`
178
+ Recent changes: !`git log --oneline -5`
179
+ Project info: !`cat package.json | jq '.name, .version'`
180
+ ```
181
+
182
+ Tokens inside fenced code blocks (` ``` ` or `~~~`) are **not** expanded — they are treated as documentation examples. This allows skills to safely include syntax examples without triggering execution.
183
+
184
+ ### Parsing rules
185
+
186
+ The parser (`parseInlineCommandExpansions`) enforces fail-closed semantics:
187
+
188
+ | Condition | Behavior |
189
+ | ------------------------------------------------- | ---------------------- |
190
+ | Well-formed token outside fenced code | Parsed as an expansion |
191
+ | Token inside a fenced code block | Skipped (not expanded) |
192
+ | Empty command text (no content between backticks) | Rejected as malformed |
193
+ | Whitespace-only command text | Rejected as malformed |
194
+ | Unmatched opening (no closing backtick found) | Rejected as malformed |
195
+ | Nested backticks inside command text | Rejected as malformed |
196
+
197
+ Malformed tokens do not silently pass through — they are collected as errors and logged. If a skill body contains any malformed tokens, the valid tokens are still expanded, but the errors are reported for diagnostics.
198
+
199
+ ### Feature flag
200
+
201
+ Inline command expansion is gated by the `inline-skill-commands` feature flag (key: `feature_flags.inline-skill-commands.enabled`). The flag defaults to **enabled**.
202
+
203
+ When the flag is disabled and a skill contains inline command expansion tokens, `skill_load` returns an error rather than delivering unexpanded tokens to the model. This fail-closed behavior prevents the LLM from seeing raw expansion tokens and attempting to interpret them.
204
+
205
+ ### Approval model
206
+
207
+ Skills with inline command expansions use a separate permission namespace: `skill_load_dynamic:*`. This ensures they do not silently inherit the permissive default `skill_load:*` allow rule.
208
+
209
+ When a user is prompted to approve a dynamic skill load, the allowlist options are:
210
+
211
+ | Option | Pattern | Behavior |
212
+ | -------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------- |
213
+ | Version-pinned | `skill_load_dynamic:<id>@<transitive-hash>` | Approved for this exact version only. Any change to the skill or its includes invalidates the rule. |
214
+ | Any-version | `skill_load_dynamic:<id>` | Approved for all versions of this skill. |
215
+
216
+ The transitive hash covers the skill's own content plus all included skills, so a change anywhere in the dependency graph triggers re-approval for version-pinned rules.
217
+
218
+ ### v1 execution limits
219
+
220
+ In the initial implementation, inline command execution enforces these constraints:
221
+
222
+ | Constraint | Value |
223
+ | ---------------- | ------------------------------------------------------- |
224
+ | Execution target | Sandbox only (no host fallback) |
225
+ | Network access | Off (no outbound connections) |
226
+ | Environment | Sanitized (no API keys, tokens, or credentials) |
227
+ | Timeout | 10 seconds per command |
228
+ | Output cap | 20,000 characters (truncated with `[output truncated]`) |
229
+ | Binary output | Rejected if >10% non-printable characters |
230
+ | ANSI sequences | Stripped before output processing |
231
+ | stderr | Discarded (only stdout is captured) |
232
+
233
+ Commands that fail (timeout, non-zero exit, spawn failure, binary output) produce a deterministic stub in the rendered body rather than leaking raw error output:
234
+
235
+ ```
236
+ <inline_skill_command index="0">[inline command unavailable: command timed out]</inline_skill_command>
237
+ ```
238
+
239
+ ### Eligible skill sources
240
+
241
+ Only **bundled**, **managed**, and **workspace** skills may use inline command expansions. Third-party **extra** skill sources are explicitly rejected — `skill_load` returns an error if an extra-source skill contains inline expansion tokens.
242
+
243
+ | Source | Eligible | Reason |
244
+ | ----------- | -------- | -------------------------------------- |
245
+ | `bundled` | Yes | Shipped with the application, trusted |
246
+ | `managed` | Yes | User-installed, subject to approval |
247
+ | `workspace` | Yes | Project-local, subject to approval |
248
+ | `extra` | No | Third-party roots, out of scope for v1 |
249
+
250
+ ### Fail-closed summary
251
+
252
+ The system fails closed at every layer:
253
+
254
+ 1. **Flag off** — skill_load returns an error, tokens never reach the model.
255
+ 2. **Malformed syntax** — rejected by the parser, logged as errors.
256
+ 3. **Unsupported source** — skill_load returns an error for extra-source skills.
257
+ 4. **Command failure** — deterministic stub replaces the token, no raw stderr.
258
+ 5. **No permission** — `skill_load_dynamic:*` namespace requires explicit approval.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": "./src/index.ts"
@@ -332,6 +332,13 @@ mock.module("../memory/llm-request-log-store.js", () => ({
332
332
  backfillMessageIdOnLogs: () => {},
333
333
  }));
334
334
 
335
+ mock.module("../memory/archive-store.js", () => ({
336
+ insertCompactionEpisode: () => ({
337
+ episodeId: "mock-episode-id",
338
+ jobId: "mock-job-id",
339
+ }),
340
+ }));
341
+
335
342
  // ── Imports (after mocks) ────────────────────────────────────────────
336
343
 
337
344
  import {
@@ -315,6 +315,13 @@ mock.module("../agent/message-types.js", () => ({
315
315
  }),
316
316
  }));
317
317
 
318
+ mock.module("../memory/archive-store.js", () => ({
319
+ insertCompactionEpisode: () => ({
320
+ episodeId: "mock-episode-id",
321
+ jobId: "mock-job-id",
322
+ }),
323
+ }));
324
+
318
325
  mock.module("../memory/llm-request-log-store.js", () => ({
319
326
  recordRequestLog: recordRequestLogMock,
320
327
  backfillMessageIdOnLogs: () => {},
@@ -0,0 +1,150 @@
1
+ import { mkdtempSync, rmSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
5
+
6
+ const testDir = mkdtempSync(join(tmpdir(), "conv-dirty-tail-test-"));
7
+
8
+ mock.module("../util/platform.js", () => ({
9
+ getDataDir: () => testDir,
10
+ isMacOS: () => process.platform === "darwin",
11
+ isLinux: () => process.platform === "linux",
12
+ isWindows: () => process.platform === "win32",
13
+ getPidPath: () => join(testDir, "test.pid"),
14
+ getDbPath: () => join(testDir, "test.db"),
15
+ getLogPath: () => join(testDir, "test.log"),
16
+ ensureDataDir: () => {},
17
+ }));
18
+
19
+ mock.module("../util/logger.js", () => ({
20
+ getLogger: () =>
21
+ new Proxy({} as Record<string, unknown>, {
22
+ get: () => () => {},
23
+ }),
24
+ }));
25
+
26
+ import {
27
+ addMessage,
28
+ createConversation,
29
+ getConversation,
30
+ getMessages,
31
+ markConversationMemoryDirty,
32
+ } from "../memory/conversation-crud.js";
33
+ import { getDb, initializeDb, resetDb } from "../memory/db.js";
34
+
35
+ initializeDb();
36
+
37
+ afterAll(() => {
38
+ resetDb();
39
+ try {
40
+ rmSync(testDir, { recursive: true });
41
+ } catch {
42
+ /* best effort */
43
+ }
44
+ });
45
+
46
+ describe("markConversationMemoryDirty", () => {
47
+ beforeEach(() => {
48
+ const db = getDb();
49
+ db.run(`DELETE FROM messages`);
50
+ db.run(`DELETE FROM conversations`);
51
+ });
52
+
53
+ test("first message marks the conversation dirty with its message ID", async () => {
54
+ const conv = createConversation("test");
55
+ const msg = await addMessage(conv.id, "user", "hello world", undefined, {
56
+ skipIndexing: true,
57
+ });
58
+
59
+ const updated = getConversation(conv.id);
60
+ expect(updated).not.toBeNull();
61
+ expect(updated!.memoryDirtyTailSinceMessageId).toBe(msg.id);
62
+ });
63
+
64
+ test("repeated messages preserve the original dirty boundary", async () => {
65
+ const conv = createConversation("test");
66
+ const msg1 = await addMessage(conv.id, "user", "first message", undefined, {
67
+ skipIndexing: true,
68
+ });
69
+ const msg2 = await addMessage(
70
+ conv.id,
71
+ "assistant",
72
+ "second message",
73
+ undefined,
74
+ { skipIndexing: true },
75
+ );
76
+
77
+ const updated = getConversation(conv.id);
78
+ expect(updated).not.toBeNull();
79
+ // The dirty tail should still point to msg1, not msg2.
80
+ expect(updated!.memoryDirtyTailSinceMessageId).toBe(msg1.id);
81
+ // msg2 should still be persisted normally.
82
+ expect(msg2.id).not.toBe(msg1.id);
83
+ });
84
+
85
+ test("markConversationMemoryDirty is a no-op when already dirty", () => {
86
+ const conv = createConversation("test");
87
+ const firstMessageId = "first-msg-id";
88
+ const secondMessageId = "second-msg-id";
89
+
90
+ markConversationMemoryDirty(conv.id, firstMessageId);
91
+ const after1 = getConversation(conv.id);
92
+ expect(after1!.memoryDirtyTailSinceMessageId).toBe(firstMessageId);
93
+
94
+ markConversationMemoryDirty(conv.id, secondMessageId);
95
+ const after2 = getConversation(conv.id);
96
+ // Still points to the first message — boundary preserved.
97
+ expect(after2!.memoryDirtyTailSinceMessageId).toBe(firstMessageId);
98
+ });
99
+
100
+ test("message ordering and persistence semantics are unchanged", async () => {
101
+ const conv = createConversation("test");
102
+ const msg1 = await addMessage(conv.id, "user", "question", undefined, {
103
+ skipIndexing: true,
104
+ });
105
+ const msg2 = await addMessage(conv.id, "assistant", "answer", undefined, {
106
+ skipIndexing: true,
107
+ });
108
+ const msg3 = await addMessage(conv.id, "user", "follow-up", undefined, {
109
+ skipIndexing: true,
110
+ });
111
+
112
+ const allMessages = getMessages(conv.id);
113
+ expect(allMessages).toHaveLength(3);
114
+ // Messages are ordered by createdAt ascending.
115
+ expect(allMessages[0].id).toBe(msg1.id);
116
+ expect(allMessages[1].id).toBe(msg2.id);
117
+ expect(allMessages[2].id).toBe(msg3.id);
118
+ expect(allMessages[0].content).toBe("question");
119
+ expect(allMessages[1].content).toBe("answer");
120
+ expect(allMessages[2].content).toBe("follow-up");
121
+ // createdAt is monotonically increasing.
122
+ expect(allMessages[1].createdAt).toBeGreaterThan(allMessages[0].createdAt);
123
+ expect(allMessages[2].createdAt).toBeGreaterThan(allMessages[1].createdAt);
124
+ });
125
+
126
+ test("every persisted message marks the conversation dirty", async () => {
127
+ const conv = createConversation("test");
128
+
129
+ // Before any messages, the conversation is not dirty.
130
+ const before = getConversation(conv.id);
131
+ expect(before!.memoryDirtyTailSinceMessageId).toBeNull();
132
+
133
+ // After the first message, it becomes dirty.
134
+ const msg1 = await addMessage(conv.id, "user", "msg1", undefined, {
135
+ skipIndexing: true,
136
+ });
137
+ const after1 = getConversation(conv.id);
138
+ expect(after1!.memoryDirtyTailSinceMessageId).toBe(msg1.id);
139
+
140
+ // After subsequent messages, the dirty boundary stays on msg1.
141
+ await addMessage(conv.id, "assistant", "msg2", undefined, {
142
+ skipIndexing: true,
143
+ });
144
+ await addMessage(conv.id, "user", "msg3", undefined, {
145
+ skipIndexing: true,
146
+ });
147
+ const afterAll = getConversation(conv.id);
148
+ expect(afterAll!.memoryDirtyTailSinceMessageId).toBe(msg1.id);
149
+ });
150
+ });
@@ -27,6 +27,9 @@ mock.module("../providers/registry.js", () => ({
27
27
  mock.module("../config/loader.js", () => ({
28
28
  getConfig: () => ({
29
29
  ui: {},
30
+ daemon: {
31
+ titleGenerationMaxTokens: 30,
32
+ },
30
33
 
31
34
  provider: "mock-provider",
32
35
  maxTokens: 4096,
@@ -174,6 +177,10 @@ mock.module("../memory/conversation-queries.js", () => ({
174
177
  listConversations: () => [],
175
178
  }));
176
179
 
180
+ mock.module("../memory/archive-store.js", () => ({
181
+ insertCompactionEpisode: () => {},
182
+ }));
183
+
177
184
  mock.module("../memory/retriever.js", () => ({
178
185
  buildMemoryRecall: async () => ({
179
186
  enabled: false,
@@ -26,6 +26,7 @@ mock.module("../util/logger.js", () => ({
26
26
  import {
27
27
  addMessage,
28
28
  createConversation,
29
+ deleteConversation,
29
30
  getConversation,
30
31
  getMessages,
31
32
  wipeConversation,
@@ -436,3 +437,228 @@ describe("wipeConversation", () => {
436
437
  expect(itemBRow).not.toBeNull();
437
438
  });
438
439
  });
440
+
441
+ describe("deleteConversation — private scope cleanup", () => {
442
+ beforeEach(() => {
443
+ const db = getDb();
444
+ db.run(`DELETE FROM conversation_starters`);
445
+ db.run(`DELETE FROM memory_item_sources`);
446
+ db.run(`DELETE FROM memory_segments`);
447
+ db.run(`DELETE FROM memory_items`);
448
+ db.run(`DELETE FROM memory_summaries`);
449
+ db.run(`DELETE FROM memory_embeddings`);
450
+ db.run(`DELETE FROM memory_jobs`);
451
+ db.run(`DELETE FROM tool_invocations`);
452
+ db.run(`DELETE FROM llm_request_logs`);
453
+ db.run(`DELETE FROM messages`);
454
+ db.run(`DELETE FROM conversations`);
455
+ });
456
+
457
+ test("sourceless items cleaned up", () => {
458
+ const conv = createConversation({ conversationType: "private" });
459
+ const scopeId = conv.memoryScopeId;
460
+ const now = Date.now();
461
+
462
+ const raw = (
463
+ getDb() as unknown as {
464
+ $client: import("bun:sqlite").Database;
465
+ }
466
+ ).$client;
467
+
468
+ // Insert a memory item with matching scopeId but no memory_item_sources
469
+ raw
470
+ .query(
471
+ `INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
472
+ VALUES ('priv-item-1', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-priv-1', ?, ?, ?)`,
473
+ )
474
+ .run(scopeId, now, now);
475
+
476
+ const result = deleteConversation(conv.id);
477
+
478
+ // Item should be gone
479
+ const itemRow = raw
480
+ .query("SELECT * FROM memory_items WHERE id = 'priv-item-1'")
481
+ .get();
482
+ expect(itemRow).toBeNull();
483
+
484
+ // Its ID should be in orphanedItemIds
485
+ expect(result.orphanedItemIds).toContain("priv-item-1");
486
+ });
487
+
488
+ test("summaries cleaned up", () => {
489
+ const conv = createConversation({ conversationType: "private" });
490
+ const scopeId = conv.memoryScopeId;
491
+ const now = Date.now();
492
+
493
+ const raw = (
494
+ getDb() as unknown as {
495
+ $client: import("bun:sqlite").Database;
496
+ }
497
+ ).$client;
498
+
499
+ // Insert a memory summary with matching scopeId
500
+ raw
501
+ .query(
502
+ `INSERT INTO memory_summaries (id, scope, scope_key, summary, token_estimate, version, scope_id, start_at, end_at, created_at, updated_at)
503
+ VALUES ('priv-sum-1', 'global', 'all', 'private summary', 100, 1, ?, ?, ?, ?, ?)`,
504
+ )
505
+ .run(scopeId, now, now, now, now);
506
+
507
+ const result = deleteConversation(conv.id);
508
+
509
+ // Summary should be gone
510
+ const summaryRow = raw
511
+ .query("SELECT * FROM memory_summaries WHERE id = 'priv-sum-1'")
512
+ .get();
513
+ expect(summaryRow).toBeNull();
514
+
515
+ // Its ID should be in deletedSummaryIds
516
+ expect(result.deletedSummaryIds).toContain("priv-sum-1");
517
+ });
518
+
519
+ test("standard conversations unaffected", async () => {
520
+ const conv = createConversation("standard test");
521
+ const now = Date.now();
522
+
523
+ const raw = (
524
+ getDb() as unknown as {
525
+ $client: import("bun:sqlite").Database;
526
+ }
527
+ ).$client;
528
+
529
+ // Insert items with scopeId = "default"
530
+ raw
531
+ .query(
532
+ `INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
533
+ VALUES ('default-item-1', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-default', 'default', ?, ?)`,
534
+ )
535
+ .run(now, now);
536
+
537
+ deleteConversation(conv.id);
538
+
539
+ // Default-scope items should still exist
540
+ const itemRow = raw
541
+ .query("SELECT * FROM memory_items WHERE id = 'default-item-1'")
542
+ .get();
543
+ expect(itemRow).not.toBeNull();
544
+ });
545
+
546
+ test("embeddings cleaned up", () => {
547
+ const conv = createConversation({ conversationType: "private" });
548
+ const scopeId = conv.memoryScopeId;
549
+ const now = Date.now();
550
+
551
+ const raw = (
552
+ getDb() as unknown as {
553
+ $client: import("bun:sqlite").Database;
554
+ }
555
+ ).$client;
556
+
557
+ // Insert a memory item with matching scopeId
558
+ raw
559
+ .query(
560
+ `INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
561
+ VALUES ('priv-item-emb', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-priv-emb', ?, ?, ?)`,
562
+ )
563
+ .run(scopeId, now, now);
564
+
565
+ // Insert a corresponding embedding
566
+ raw
567
+ .query(
568
+ `INSERT INTO memory_embeddings (id, target_type, target_id, provider, model, dimensions, created_at, updated_at)
569
+ VALUES ('emb-priv-item', 'item', 'priv-item-emb', 'test', 'test', 384, ?, ?)`,
570
+ )
571
+ .run(now, now);
572
+
573
+ deleteConversation(conv.id);
574
+
575
+ // Both item and embedding should be deleted
576
+ const itemRow = raw
577
+ .query("SELECT * FROM memory_items WHERE id = 'priv-item-emb'")
578
+ .get();
579
+ expect(itemRow).toBeNull();
580
+
581
+ const embeddingRow = raw
582
+ .query("SELECT * FROM memory_embeddings WHERE id = 'emb-priv-item'")
583
+ .get();
584
+ expect(embeddingRow).toBeNull();
585
+ });
586
+
587
+ test("conversationStarters cleaned up", () => {
588
+ const conv = createConversation({ conversationType: "private" });
589
+ const scopeId = conv.memoryScopeId;
590
+ const now = Date.now();
591
+
592
+ const raw = (
593
+ getDb() as unknown as {
594
+ $client: import("bun:sqlite").Database;
595
+ }
596
+ ).$client;
597
+
598
+ // Insert a conversation_starters row with the private scopeId
599
+ raw
600
+ .query(
601
+ `INSERT INTO conversation_starters (id, label, prompt, generation_batch, scope_id, card_type, created_at)
602
+ VALUES ('starter-1', 'Test starter', 'Tell me about tests', 1, ?, 'chip', ?)`,
603
+ )
604
+ .run(scopeId, now);
605
+
606
+ // Also insert a default-scope starter that should NOT be deleted
607
+ raw
608
+ .query(
609
+ `INSERT INTO conversation_starters (id, label, prompt, generation_batch, scope_id, card_type, created_at)
610
+ VALUES ('starter-default', 'Default starter', 'Hello', 1, 'default', 'chip', ?)`,
611
+ )
612
+ .run(now);
613
+
614
+ deleteConversation(conv.id);
615
+
616
+ // Private-scope starter should be gone
617
+ const starterRow = raw
618
+ .query("SELECT * FROM conversation_starters WHERE id = 'starter-1'")
619
+ .get();
620
+ expect(starterRow).toBeNull();
621
+
622
+ // Default-scope starter should still exist
623
+ const defaultStarterRow = raw
624
+ .query("SELECT * FROM conversation_starters WHERE id = 'starter-default'")
625
+ .get();
626
+ expect(defaultStarterRow).not.toBeNull();
627
+ });
628
+
629
+ test("no duplicate IDs", async () => {
630
+ const conv = createConversation({ conversationType: "private" });
631
+ const scopeId = conv.memoryScopeId;
632
+ const msg = await addMessage(conv.id, "user", "hello");
633
+ const now = Date.now();
634
+
635
+ const raw = (
636
+ getDb() as unknown as {
637
+ $client: import("bun:sqlite").Database;
638
+ }
639
+ ).$client;
640
+
641
+ // Insert a memory item with the private scopeId AND a source linking to the message
642
+ raw
643
+ .query(
644
+ `INSERT INTO memory_items (id, status, kind, subject, statement, confidence, fingerprint, scope_id, first_seen_at, last_seen_at)
645
+ VALUES ('priv-item-dup', 'active', 'fact', 'test', 'test fact', 0.8, 'fp-priv-dup', ?, ?, ?)`,
646
+ )
647
+ .run(scopeId, now, now);
648
+
649
+ raw
650
+ .query(
651
+ `INSERT INTO memory_item_sources (memory_item_id, message_id, created_at) VALUES ('priv-item-dup', ?, ?)`,
652
+ )
653
+ .run(msg.id, now);
654
+
655
+ const result = deleteConversation(conv.id);
656
+
657
+ // The item ID should appear exactly once in orphanedItemIds (caught by
658
+ // source-based cleanup, not double-counted by scope sweep).
659
+ const count = result.orphanedItemIds.filter(
660
+ (id) => id === "priv-item-dup",
661
+ ).length;
662
+ expect(count).toBe(1);
663
+ });
664
+ });