@vellumai/assistant 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/AGENTS.md +4 -0
  2. package/ARCHITECTURE.md +68 -15
  3. package/Dockerfile +2 -2
  4. package/bun.lock +6 -2
  5. package/docker-entrypoint.sh +32 -1
  6. package/docs/architecture/integrations.md +1 -1
  7. package/docs/architecture/memory.md +21 -24
  8. package/openapi.yaml +538 -3
  9. package/package.json +5 -1
  10. package/src/__tests__/anthropic-provider.test.ts +160 -95
  11. package/src/__tests__/app-dir-path-guard.test.ts +1 -0
  12. package/src/__tests__/app-executors.test.ts +47 -1
  13. package/src/__tests__/app-source-watcher.test.ts +159 -0
  14. package/src/__tests__/checker.test.ts +38 -6
  15. package/src/__tests__/config-schema.test.ts +5 -0
  16. package/src/__tests__/conversation-agent-loop-overflow.test.ts +4 -6
  17. package/src/__tests__/conversation-agent-loop.test.ts +4 -51
  18. package/src/__tests__/conversation-history-web-search.test.ts +1 -1
  19. package/src/__tests__/conversation-runtime-assembly.test.ts +653 -832
  20. package/src/__tests__/conversation-runtime-workspace.test.ts +1 -93
  21. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +17 -4
  22. package/src/__tests__/conversation-wipe.test.ts +2 -6
  23. package/src/__tests__/conversation-workspace-cache-state.test.ts +6 -12
  24. package/src/__tests__/conversation-workspace-injection.test.ts +25 -26
  25. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
  26. package/src/__tests__/copy-composer-tc-templates.test.ts +335 -0
  27. package/src/__tests__/date-context.test.ts +76 -210
  28. package/src/__tests__/db-schedule-syntax-migration.test.ts +16 -1
  29. package/src/__tests__/file-list-tool.test.ts +219 -0
  30. package/src/__tests__/first-greeting.test.ts +1 -1
  31. package/src/__tests__/heartbeat-service.test.ts +180 -3
  32. package/src/__tests__/identity-routes.test.ts +328 -0
  33. package/src/__tests__/injection-block.test.ts +24 -0
  34. package/src/__tests__/install-skill-routing.test.ts +7 -6
  35. package/src/__tests__/jobs-store-qdrant-breaker.test.ts +15 -14
  36. package/src/__tests__/list-messages-tool-merge.test.ts +300 -0
  37. package/src/__tests__/llm-context-normalization.test.ts +18 -18
  38. package/src/__tests__/llm-context-route-provider.test.ts +101 -0
  39. package/src/__tests__/llm-request-log-turn-query.test.ts +162 -0
  40. package/src/__tests__/log-export-workspace.test.ts +72 -105
  41. package/src/__tests__/mcp-abort-signal.test.ts +5 -0
  42. package/src/__tests__/mcp-client-auth.test.ts +5 -0
  43. package/src/__tests__/memory-recall-log-store.test.ts +132 -0
  44. package/src/__tests__/migration-export-streaming.test.ts +304 -0
  45. package/src/__tests__/migration-import-commit-http.test.ts +11 -10
  46. package/src/__tests__/mock-fetch.ts +87 -0
  47. package/src/__tests__/notification-decision-recipient-context.test.ts +282 -0
  48. package/src/__tests__/onboarding-template-contract.test.ts +62 -14
  49. package/src/__tests__/parser.test.ts +32 -0
  50. package/src/__tests__/permission-checker-host-gate.test.ts +452 -0
  51. package/src/__tests__/permission-controls-v2-flag.test.ts +55 -0
  52. package/src/__tests__/permission-mode-sse.test.ts +418 -0
  53. package/src/__tests__/permission-mode-store.test.ts +277 -0
  54. package/src/__tests__/permission-mode.test.ts +101 -0
  55. package/src/__tests__/platform-bash-auto-approve.test.ts +359 -0
  56. package/src/__tests__/profiler-routes.test.ts +502 -0
  57. package/src/__tests__/profiler-run-store.test.ts +441 -0
  58. package/src/__tests__/proxy-approval-callback.test.ts +4 -75
  59. package/src/__tests__/registry.test.ts +1 -1
  60. package/src/__tests__/sandbox-host-parity.test.ts +5 -4
  61. package/src/__tests__/scheduler-reuse-conversation.test.ts +368 -0
  62. package/src/__tests__/scrub-corrupted-image-attachments.test.ts +278 -0
  63. package/src/__tests__/search-skills-unified.test.ts +4 -3
  64. package/src/__tests__/send-endpoint-busy.test.ts +42 -3
  65. package/src/__tests__/set-permission-mode.test.ts +274 -0
  66. package/src/__tests__/skill-load-feature-flag.test.ts +12 -0
  67. package/src/__tests__/skill-memory.test.ts +2 -783
  68. package/src/__tests__/strip-memory-injections.test.ts +187 -0
  69. package/src/__tests__/subagent-detail.test.ts +84 -0
  70. package/src/__tests__/subagent-disposal.test.ts +308 -0
  71. package/src/__tests__/subagent-manager-notify.test.ts +19 -10
  72. package/src/__tests__/subagent-notify-parent.test.ts +390 -0
  73. package/src/__tests__/subagent-role-registry.test.ts +108 -0
  74. package/src/__tests__/subagent-tool-filtering.test.ts +71 -0
  75. package/src/__tests__/subagent-tools.test.ts +464 -4
  76. package/src/__tests__/system-prompt-ask-mode.test.ts +139 -0
  77. package/src/__tests__/task-memory-cleanup.test.ts +12 -12
  78. package/src/__tests__/terminal-tools.test.ts +17 -27
  79. package/src/__tests__/test-preload.ts +4 -0
  80. package/src/__tests__/tool-executor.test.ts +4 -26
  81. package/src/__tests__/tool-side-effects-slack-dm.test.ts +1 -0
  82. package/src/__tests__/top-level-renderer.test.ts +10 -13
  83. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +116 -2
  84. package/src/__tests__/workspace-migration-028-recover-conversations-from-disk-view.test.ts +387 -0
  85. package/src/agent/loop.ts +6 -0
  86. package/src/approvals/guardian-request-resolvers.ts +24 -0
  87. package/src/avatar/traits-png-sync.ts +3 -3
  88. package/src/cli/__tests__/run-assistant-command.ts +29 -0
  89. package/src/cli/commands/__tests__/email-download.test.ts +245 -0
  90. package/src/cli/commands/__tests__/email-list.test.ts +192 -0
  91. package/src/cli/commands/__tests__/email-register.test.ts +186 -0
  92. package/src/cli/commands/__tests__/email-send.test.ts +291 -0
  93. package/src/cli/commands/__tests__/email-status.test.ts +181 -0
  94. package/src/cli/commands/__tests__/email-unregister.test.ts +139 -0
  95. package/src/cli/commands/__tests__/routes.test.ts +562 -0
  96. package/src/cli/commands/conversations.ts +1 -8
  97. package/src/cli/commands/email.ts +584 -835
  98. package/src/cli/commands/memory.ts +1 -34
  99. package/src/cli/commands/notifications.ts +7 -2
  100. package/src/cli/commands/oauth/connect.ts +14 -5
  101. package/src/cli/commands/routes.ts +396 -0
  102. package/src/cli/commands/skills.ts +130 -20
  103. package/src/cli/program.ts +2 -0
  104. package/src/cli.ts +1 -120
  105. package/src/config/bundled-skills/app-builder/SKILL.md +4 -1
  106. package/src/config/bundled-skills/gmail/SKILL.md +2 -2
  107. package/src/config/bundled-skills/messaging/SKILL.md +7 -0
  108. package/src/config/bundled-skills/schedule/SKILL.md +22 -2
  109. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  110. package/src/config/bundled-skills/settings/tools/avatar-get.ts +3 -13
  111. package/src/config/bundled-skills/settings/tools/avatar-remove.ts +2 -4
  112. package/src/config/bundled-skills/settings/tools/avatar-update.ts +5 -2
  113. package/src/config/bundled-skills/slack/SKILL.md +2 -0
  114. package/src/config/bundled-skills/subagent/SKILL.md +43 -3
  115. package/src/config/bundled-skills/subagent/TOOLS.json +29 -4
  116. package/src/config/env-registry.ts +63 -0
  117. package/src/config/feature-flag-registry.json +17 -1
  118. package/src/config/schema.ts +8 -0
  119. package/src/config/schemas/filing.ts +51 -0
  120. package/src/config/schemas/heartbeat.ts +15 -12
  121. package/src/config/schemas/memory-lifecycle.ts +12 -0
  122. package/src/config/schemas/security.ts +14 -0
  123. package/src/daemon/app-source-watcher.ts +93 -0
  124. package/src/daemon/config-watcher.ts +79 -1
  125. package/src/daemon/conversation-agent-loop-handlers.ts +20 -0
  126. package/src/daemon/conversation-agent-loop.ts +158 -65
  127. package/src/daemon/conversation-history.ts +4 -19
  128. package/src/daemon/conversation-lifecycle.ts +8 -14
  129. package/src/daemon/conversation-process.ts +13 -7
  130. package/src/daemon/conversation-runtime-assembly.ts +300 -306
  131. package/src/daemon/conversation-tool-setup.ts +44 -14
  132. package/src/daemon/conversation-workspace.ts +1 -2
  133. package/src/daemon/conversation.ts +18 -0
  134. package/src/daemon/date-context.ts +26 -53
  135. package/src/daemon/first-greeting.ts +1 -1
  136. package/src/daemon/handlers/conversations.ts +4 -7
  137. package/src/daemon/handlers/shared.test.ts +143 -0
  138. package/src/daemon/handlers/shared.ts +63 -5
  139. package/src/daemon/handlers/skills.ts +11 -18
  140. package/src/daemon/lifecycle.ts +199 -157
  141. package/src/daemon/message-types/conversations.ts +25 -6
  142. package/src/daemon/message-types/messages.ts +9 -1
  143. package/src/daemon/message-types/schedules.ts +1 -0
  144. package/src/daemon/message-types/settings.ts +6 -0
  145. package/src/daemon/profiler-run-store.ts +557 -0
  146. package/src/daemon/server.ts +89 -9
  147. package/src/daemon/shutdown-handlers.ts +5 -0
  148. package/src/daemon/tool-side-effects.ts +23 -3
  149. package/src/export/transcript-formatter.ts +148 -0
  150. package/src/filing/filing-service.ts +228 -0
  151. package/src/heartbeat/heartbeat-service.ts +96 -7
  152. package/src/mcp/client.ts +6 -0
  153. package/src/mcp/mcp-oauth-provider.ts +149 -27
  154. package/src/memory/admin.ts +33 -32
  155. package/src/memory/app-store.ts +69 -0
  156. package/src/memory/conversation-bootstrap.ts +1 -1
  157. package/src/memory/conversation-crud.ts +136 -107
  158. package/src/memory/conversation-group-migration.ts +1 -1
  159. package/src/memory/conversation-queries.ts +58 -12
  160. package/src/memory/conversation-title-service.ts +1 -0
  161. package/src/memory/db-init.ts +182 -376
  162. package/src/memory/graph/bootstrap.ts +75 -66
  163. package/src/memory/graph/capability-seed.ts +167 -15
  164. package/src/memory/graph/consolidation.ts +38 -4
  165. package/src/memory/graph/conversation-graph-memory.ts +133 -104
  166. package/src/memory/graph/extraction-job.ts +9 -4
  167. package/src/memory/graph/extraction.ts +66 -23
  168. package/src/memory/graph/graph-memory-state-store.ts +37 -0
  169. package/src/memory/graph/graph-search.ts +29 -15
  170. package/src/memory/graph/injection.ts +38 -8
  171. package/src/memory/graph/inspect.ts +12 -3
  172. package/src/memory/graph/retriever.ts +365 -262
  173. package/src/memory/graph/store.test.ts +48 -0
  174. package/src/memory/graph/store.ts +150 -11
  175. package/src/memory/graph/tool-handlers.ts +84 -209
  176. package/src/memory/graph/tools.ts +8 -52
  177. package/src/memory/graph/types.ts +24 -0
  178. package/src/memory/job-handlers/cleanup.ts +44 -1
  179. package/src/memory/jobs-store.ts +70 -60
  180. package/src/memory/jobs-worker.ts +44 -28
  181. package/src/memory/llm-request-log-store.ts +96 -12
  182. package/src/memory/memory-recall-log-store.ts +49 -5
  183. package/src/memory/migrations/203-drop-memory-items-tables.ts +33 -1
  184. package/src/memory/migrations/206-memory-graph-node-edits.ts +19 -0
  185. package/src/memory/migrations/206-scrub-corrupted-image-attachments.ts +131 -0
  186. package/src/memory/migrations/207-conversation-graph-memory-state.ts +20 -0
  187. package/src/memory/migrations/208-conversations-last-message-at.ts +35 -0
  188. package/src/memory/migrations/209-strip-thinking-from-consolidated.ts +85 -0
  189. package/src/memory/migrations/210-schedule-reuse-conversation.ts +13 -0
  190. package/src/memory/migrations/211-memory-recall-logs-query-context.ts +21 -0
  191. package/src/memory/migrations/212-llm-request-logs-created-at-index.ts +19 -0
  192. package/src/memory/migrations/index.ts +8 -0
  193. package/src/memory/migrations/registry.ts +8 -0
  194. package/src/memory/schema/conversations.ts +14 -0
  195. package/src/memory/schema/infrastructure.ts +8 -1
  196. package/src/memory/schema/memory-core.ts +0 -51
  197. package/src/memory/schema/memory-graph.ts +15 -0
  198. package/src/memory/task-memory-cleanup.ts +30 -11
  199. package/src/notifications/copy-composer.ts +86 -0
  200. package/src/notifications/decision-engine.ts +35 -0
  201. package/src/permissions/checker.ts +12 -1
  202. package/src/permissions/permission-mode-store.ts +180 -0
  203. package/src/permissions/permission-mode.ts +31 -0
  204. package/src/permissions/workspace-policy.ts +9 -0
  205. package/src/prompts/system-prompt.ts +59 -7
  206. package/src/prompts/templates/BOOTSTRAP-REFERENCE.md +100 -0
  207. package/src/prompts/templates/BOOTSTRAP.md +70 -165
  208. package/src/prompts/templates/HEARTBEAT.md +3 -1
  209. package/src/prompts/templates/SOUL.md +25 -4
  210. package/src/prompts/templates/UPDATES.md +8 -0
  211. package/src/providers/anthropic/client.ts +107 -219
  212. package/src/runtime/auth/route-policy.ts +23 -0
  213. package/src/runtime/http-server.ts +32 -2
  214. package/src/runtime/http-types.ts +12 -1
  215. package/src/runtime/migrations/vbundle-builder.ts +389 -3
  216. package/src/runtime/migrations/vbundle-importer.ts +8 -6
  217. package/src/runtime/routes/__tests__/user-route-dispatcher.test.ts +378 -0
  218. package/src/runtime/routes/app-management-routes.ts +1 -11
  219. package/src/runtime/routes/approval-strategies/guardian-callback-strategy.ts +26 -0
  220. package/src/runtime/routes/archive-utils.ts +29 -0
  221. package/src/runtime/routes/avatar-routes.ts +2 -9
  222. package/src/runtime/routes/btw-routes.ts +14 -1
  223. package/src/runtime/routes/conversation-analysis-routes.ts +173 -0
  224. package/src/runtime/routes/conversation-management-routes.ts +1 -14
  225. package/src/runtime/routes/conversation-query-routes.ts +49 -3
  226. package/src/runtime/routes/conversation-routes.ts +264 -44
  227. package/src/runtime/routes/heartbeat-routes.ts +4 -10
  228. package/src/runtime/routes/identity-routes.ts +53 -18
  229. package/src/runtime/routes/llm-context-normalization.ts +14 -10
  230. package/src/runtime/routes/log-export-routes.ts +23 -275
  231. package/src/runtime/routes/memory-item-routes.test.ts +168 -233
  232. package/src/runtime/routes/migration-routes.ts +18 -7
  233. package/src/runtime/routes/profiler-routes.ts +350 -0
  234. package/src/runtime/routes/schedule-routes.ts +27 -12
  235. package/src/runtime/routes/settings-routes.ts +95 -8
  236. package/src/runtime/routes/subagents-routes.ts +28 -7
  237. package/src/runtime/routes/user-route-dispatcher.ts +223 -0
  238. package/src/runtime/routes/user-routes.ts +41 -0
  239. package/src/runtime/routes/workspace-routes.ts +0 -1
  240. package/src/schedule/schedule-store.ts +30 -0
  241. package/src/schedule/scheduler.ts +45 -18
  242. package/src/skills/catalog-install.ts +10 -2
  243. package/src/skills/managed-store.ts +2 -2
  244. package/src/skills/skill-memory.ts +1 -293
  245. package/src/subagent/index.ts +13 -3
  246. package/src/subagent/manager.ts +308 -29
  247. package/src/subagent/types.ts +68 -0
  248. package/src/tasks/task-runner.ts +4 -4
  249. package/src/tools/apps/executors.ts +29 -4
  250. package/src/tools/filesystem/list.ts +93 -0
  251. package/src/tools/permission-checker.ts +78 -0
  252. package/src/tools/registry.ts +4 -0
  253. package/src/tools/schedule/create.ts +3 -0
  254. package/src/tools/schedule/list.ts +1 -0
  255. package/src/tools/schedule/update.ts +6 -0
  256. package/src/tools/shared/filesystem/errors.ts +5 -0
  257. package/src/tools/shared/filesystem/file-ops-service.ts +90 -2
  258. package/src/tools/shared/filesystem/types.ts +17 -0
  259. package/src/tools/shared/shell-output.ts +31 -2
  260. package/src/tools/subagent/abort.ts +12 -2
  261. package/src/tools/subagent/message.ts +9 -2
  262. package/src/tools/subagent/notify-parent.ts +79 -0
  263. package/src/tools/subagent/read.ts +29 -8
  264. package/src/tools/subagent/resolve.ts +21 -0
  265. package/src/tools/subagent/spawn.ts +2 -0
  266. package/src/tools/subagent/status.ts +11 -1
  267. package/src/tools/system/avatar-generator.ts +3 -3
  268. package/src/tools/system/register.ts +23 -0
  269. package/src/tools/system/set-permission-mode.ts +103 -0
  270. package/src/tools/terminal/parser.ts +30 -5
  271. package/src/tools/terminal/safe-env.ts +16 -1
  272. package/src/tools/tool-manifest.ts +6 -0
  273. package/src/tools/types.ts +2 -0
  274. package/src/util/logger.ts +1 -1
  275. package/src/util/platform.ts +50 -17
  276. package/src/workspace/migrations/023-move-config-files-to-workspace.ts +2 -2
  277. package/src/workspace/migrations/024-move-runtime-files-to-workspace.ts +2 -2
  278. package/src/workspace/migrations/028-recover-conversations-from-disk-view.ts +270 -0
  279. package/src/workspace/migrations/029-seed-pkb.ts +84 -0
  280. package/src/workspace/migrations/registry.ts +4 -0
  281. package/src/workspace/top-level-renderer.ts +5 -9
  282. package/src/__tests__/cli-memory.test.ts +0 -377
  283. package/src/__tests__/clipboard.test.ts +0 -88
  284. package/src/cli/cli-memory.ts +0 -179
  285. package/src/util/clipboard.ts +0 -34
@@ -0,0 +1,441 @@
1
+ /**
2
+ * Tests for the profiler run store: manifest management, retention sweep,
3
+ * active-run protection, oldest-first pruning, max-run-count pruning,
4
+ * active-run-over-budget signaling, and idempotent rescans.
5
+ */
6
+ import {
7
+ existsSync,
8
+ mkdirSync,
9
+ readFileSync,
10
+ rmSync,
11
+ writeFileSync,
12
+ } from "node:fs";
13
+ import { tmpdir } from "node:os";
14
+ import { join } from "node:path";
15
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
16
+
17
+ import type { ProfilerRunManifest } from "../daemon/profiler-run-store.js";
18
+ import { rescanRuns, runProfilerSweep } from "../daemon/profiler-run-store.js";
19
+
20
+ // ── Test scaffolding ────────────────────────────────────────────────────
21
+
22
+ let testDir: string;
23
+ let runsDir: string;
24
+ let origEnv: Record<string, string | undefined>;
25
+
26
+ /**
27
+ * Create a fake profiler run directory with some payload files.
28
+ */
29
+ function createRun(
30
+ runId: string,
31
+ opts?: {
32
+ sizeBytes?: number;
33
+ manifest?: Partial<ProfilerRunManifest>;
34
+ },
35
+ ): string {
36
+ const dir = join(runsDir, runId);
37
+ mkdirSync(dir, { recursive: true });
38
+
39
+ // Write a payload file of the requested size
40
+ const size = opts?.sizeBytes ?? 1024;
41
+ writeFileSync(join(dir, "profile.cpuprofile"), Buffer.alloc(size));
42
+
43
+ // Optionally write a pre-existing manifest
44
+ if (opts?.manifest) {
45
+ const m: ProfilerRunManifest = {
46
+ runId,
47
+ status: opts.manifest.status ?? "completed",
48
+ createdAt: opts.manifest.createdAt ?? new Date().toISOString(),
49
+ updatedAt: opts.manifest.updatedAt ?? new Date().toISOString(),
50
+ totalBytes: opts.manifest.totalBytes ?? size,
51
+ };
52
+ writeFileSync(join(dir, "manifest.json"), JSON.stringify(m, null, 2));
53
+ }
54
+
55
+ return dir;
56
+ }
57
+
58
+ function readManifestFromDisk(runId: string): ProfilerRunManifest | null {
59
+ const manifestPath = join(runsDir, runId, "manifest.json");
60
+ try {
61
+ return JSON.parse(readFileSync(manifestPath, "utf-8"));
62
+ } catch {
63
+ return null;
64
+ }
65
+ }
66
+
67
+ beforeEach(() => {
68
+ testDir = join(
69
+ tmpdir(),
70
+ `vellum-profiler-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
71
+ );
72
+ runsDir = join(testDir, "data", "profiler", "runs");
73
+ mkdirSync(runsDir, { recursive: true });
74
+
75
+ // Save and override env
76
+ origEnv = {
77
+ VELLUM_WORKSPACE_DIR: process.env.VELLUM_WORKSPACE_DIR,
78
+ VELLUM_PROFILER_RUN_ID: process.env.VELLUM_PROFILER_RUN_ID,
79
+ VELLUM_PROFILER_MAX_BYTES: process.env.VELLUM_PROFILER_MAX_BYTES,
80
+ VELLUM_PROFILER_MAX_RUNS: process.env.VELLUM_PROFILER_MAX_RUNS,
81
+ VELLUM_PROFILER_MIN_FREE_MB: process.env.VELLUM_PROFILER_MIN_FREE_MB,
82
+ };
83
+
84
+ // Point workspace dir to our temp directory
85
+ process.env.VELLUM_WORKSPACE_DIR = testDir;
86
+
87
+ // Clear profiler env vars
88
+ delete process.env.VELLUM_PROFILER_RUN_ID;
89
+ delete process.env.VELLUM_PROFILER_MAX_BYTES;
90
+ delete process.env.VELLUM_PROFILER_MAX_RUNS;
91
+ delete process.env.VELLUM_PROFILER_MIN_FREE_MB;
92
+ });
93
+
94
+ afterEach(() => {
95
+ // Restore env
96
+ for (const [key, value] of Object.entries(origEnv)) {
97
+ if (value === undefined) {
98
+ delete process.env[key];
99
+ } else {
100
+ process.env[key] = value;
101
+ }
102
+ }
103
+
104
+ // Clean up temp directory
105
+ if (existsSync(testDir)) {
106
+ rmSync(testDir, { recursive: true, force: true });
107
+ }
108
+ });
109
+
110
+ // ── Tests ───────────────────────────────────────────────────────────────
111
+
112
+ describe("Profiler run store", () => {
113
+ describe("rescanRuns", () => {
114
+ test("returns empty array when no runs directory exists", () => {
115
+ // Remove the runs directory
116
+ rmSync(runsDir, { recursive: true, force: true });
117
+ const manifests = rescanRuns();
118
+ expect(manifests).toEqual([]);
119
+ });
120
+
121
+ test("returns empty array when runs directory is empty", () => {
122
+ const manifests = rescanRuns();
123
+ expect(manifests).toEqual([]);
124
+ });
125
+
126
+ test("creates manifests for run directories without existing manifests", () => {
127
+ createRun("run-001", { sizeBytes: 2048 });
128
+ createRun("run-002", { sizeBytes: 4096 });
129
+
130
+ const manifests = rescanRuns();
131
+ expect(manifests).toHaveLength(2);
132
+
133
+ const run1 = manifests.find((m) => m.runId === "run-001");
134
+ expect(run1).toBeDefined();
135
+ expect(run1!.status).toBe("completed");
136
+ // totalBytes includes manifest.json that rescan just wrote
137
+ expect(run1!.totalBytes).toBeGreaterThanOrEqual(2048);
138
+
139
+ const run2 = manifests.find((m) => m.runId === "run-002");
140
+ expect(run2).toBeDefined();
141
+ expect(run2!.status).toBe("completed");
142
+ expect(run2!.totalBytes).toBeGreaterThanOrEqual(4096);
143
+ });
144
+
145
+ test("marks the active run correctly", () => {
146
+ process.env.VELLUM_PROFILER_RUN_ID = "active-run";
147
+ createRun("active-run", { sizeBytes: 1024 });
148
+ createRun("old-run", { sizeBytes: 1024 });
149
+
150
+ const manifests = rescanRuns();
151
+ const active = manifests.find((m) => m.runId === "active-run");
152
+ const old = manifests.find((m) => m.runId === "old-run");
153
+
154
+ expect(active!.status).toBe("active");
155
+ expect(old!.status).toBe("completed");
156
+ });
157
+
158
+ test("transitions previously-active run to completed when no longer active", () => {
159
+ // Create a run with an "active" manifest
160
+ createRun("old-active", {
161
+ sizeBytes: 1024,
162
+ manifest: { status: "active", createdAt: "2025-01-01T00:00:00Z" },
163
+ });
164
+
165
+ // No VELLUM_PROFILER_RUN_ID set, so nothing is active
166
+ const manifests = rescanRuns();
167
+ const run = manifests.find((m) => m.runId === "old-active");
168
+
169
+ expect(run!.status).toBe("completed");
170
+
171
+ // Verify it was persisted to disk
172
+ const onDisk = readManifestFromDisk("old-active");
173
+ expect(onDisk!.status).toBe("completed");
174
+ });
175
+
176
+ test("is idempotent — repeated calls after initial scan produce the same result", () => {
177
+ createRun("run-a", { sizeBytes: 1024 });
178
+ process.env.VELLUM_PROFILER_RUN_ID = "run-a";
179
+
180
+ // First call writes the manifest, which changes totalBytes
181
+ rescanRuns();
182
+ // Second and third calls should be stable
183
+ const second = rescanRuns();
184
+ const third = rescanRuns();
185
+
186
+ expect(second).toHaveLength(1);
187
+ expect(third).toHaveLength(1);
188
+ expect(second[0]!.runId).toBe(third[0]!.runId);
189
+ expect(second[0]!.status).toBe(third[0]!.status);
190
+ expect(second[0]!.totalBytes).toBe(third[0]!.totalBytes);
191
+ });
192
+
193
+ test("preserves createdAt from existing manifest", () => {
194
+ const originalCreatedAt = "2024-06-15T12:00:00Z";
195
+ createRun("preserved-run", {
196
+ sizeBytes: 1024,
197
+ manifest: {
198
+ status: "completed",
199
+ createdAt: originalCreatedAt,
200
+ },
201
+ });
202
+
203
+ const manifests = rescanRuns();
204
+ const run = manifests.find((m) => m.runId === "preserved-run");
205
+ expect(run!.createdAt).toBe(originalCreatedAt);
206
+ });
207
+ });
208
+
209
+ describe("runProfilerSweep", () => {
210
+ test("no-ops when no runs exist", () => {
211
+ const result = runProfilerSweep();
212
+ expect(result.prunedCount).toBe(0);
213
+ expect(result.freedBytes).toBe(0);
214
+ expect(result.activeRunOverBudget).toBe(false);
215
+ expect(result.remainingRuns).toBe(0);
216
+ });
217
+
218
+ test("does not prune when under all budgets", () => {
219
+ process.env.VELLUM_PROFILER_MAX_BYTES = "1000000"; // 1 MB
220
+ process.env.VELLUM_PROFILER_MAX_RUNS = "10";
221
+
222
+ createRun("run-1", { sizeBytes: 1024 });
223
+ createRun("run-2", { sizeBytes: 1024 });
224
+
225
+ const result = runProfilerSweep();
226
+ expect(result.prunedCount).toBe(0);
227
+ expect(result.remainingRuns).toBe(2);
228
+
229
+ // Both directories still exist
230
+ expect(existsSync(join(runsDir, "run-1"))).toBe(true);
231
+ expect(existsSync(join(runsDir, "run-2"))).toBe(true);
232
+ });
233
+
234
+ test("prunes oldest completed runs when byte budget exceeded", () => {
235
+ // Set a very small byte budget
236
+ process.env.VELLUM_PROFILER_MAX_BYTES = "3000";
237
+ process.env.VELLUM_PROFILER_MAX_RUNS = "100";
238
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
239
+
240
+ // Create runs with explicit timestamps for ordering
241
+ createRun("oldest", {
242
+ sizeBytes: 2000,
243
+ manifest: {
244
+ status: "completed",
245
+ createdAt: "2025-01-01T00:00:00Z",
246
+ },
247
+ });
248
+ createRun("middle", {
249
+ sizeBytes: 2000,
250
+ manifest: {
251
+ status: "completed",
252
+ createdAt: "2025-02-01T00:00:00Z",
253
+ },
254
+ });
255
+ createRun("newest", {
256
+ sizeBytes: 2000,
257
+ manifest: {
258
+ status: "completed",
259
+ createdAt: "2025-03-01T00:00:00Z",
260
+ },
261
+ });
262
+
263
+ const result = runProfilerSweep();
264
+
265
+ // Should prune until total bytes fit within 3000.
266
+ // Each run is ~2000 payload + manifest overhead. The sweep recomputes
267
+ // sizes so actual totals include the manifest file. At least 1 run
268
+ // should be pruned (the oldest).
269
+ expect(result.prunedCount).toBeGreaterThanOrEqual(1);
270
+ expect(result.freedBytes).toBeGreaterThan(0);
271
+
272
+ // The oldest should be gone
273
+ expect(existsSync(join(runsDir, "oldest"))).toBe(false);
274
+ });
275
+
276
+ test("prunes oldest completed runs when max-run-count exceeded", () => {
277
+ process.env.VELLUM_PROFILER_MAX_BYTES = "999999999";
278
+ process.env.VELLUM_PROFILER_MAX_RUNS = "2";
279
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
280
+
281
+ createRun("run-a", {
282
+ sizeBytes: 100,
283
+ manifest: {
284
+ status: "completed",
285
+ createdAt: "2025-01-01T00:00:00Z",
286
+ },
287
+ });
288
+ createRun("run-b", {
289
+ sizeBytes: 100,
290
+ manifest: {
291
+ status: "completed",
292
+ createdAt: "2025-02-01T00:00:00Z",
293
+ },
294
+ });
295
+ createRun("run-c", {
296
+ sizeBytes: 100,
297
+ manifest: {
298
+ status: "completed",
299
+ createdAt: "2025-03-01T00:00:00Z",
300
+ },
301
+ });
302
+ createRun("run-d", {
303
+ sizeBytes: 100,
304
+ manifest: {
305
+ status: "completed",
306
+ createdAt: "2025-04-01T00:00:00Z",
307
+ },
308
+ });
309
+
310
+ const result = runProfilerSweep();
311
+
312
+ // 4 completed runs, max 2: should prune 2 oldest
313
+ expect(result.prunedCount).toBe(2);
314
+ expect(existsSync(join(runsDir, "run-a"))).toBe(false);
315
+ expect(existsSync(join(runsDir, "run-b"))).toBe(false);
316
+ expect(existsSync(join(runsDir, "run-c"))).toBe(true);
317
+ expect(existsSync(join(runsDir, "run-d"))).toBe(true);
318
+ expect(result.remainingRuns).toBe(2);
319
+ });
320
+
321
+ test("never deletes the active run", () => {
322
+ process.env.VELLUM_PROFILER_RUN_ID = "current";
323
+ process.env.VELLUM_PROFILER_MAX_BYTES = "500";
324
+ process.env.VELLUM_PROFILER_MAX_RUNS = "1";
325
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
326
+
327
+ createRun("current", { sizeBytes: 2000 });
328
+ createRun("old-completed", {
329
+ sizeBytes: 2000,
330
+ manifest: {
331
+ status: "completed",
332
+ createdAt: "2025-01-01T00:00:00Z",
333
+ },
334
+ });
335
+
336
+ const result = runProfilerSweep();
337
+
338
+ // old-completed should be pruned, current should survive
339
+ expect(existsSync(join(runsDir, "current"))).toBe(true);
340
+ expect(existsSync(join(runsDir, "old-completed"))).toBe(false);
341
+ expect(result.prunedCount).toBe(1);
342
+ });
343
+
344
+ test("signals active-run-over-budget when active run exceeds byte budget", () => {
345
+ process.env.VELLUM_PROFILER_RUN_ID = "big-active";
346
+ process.env.VELLUM_PROFILER_MAX_BYTES = "500";
347
+ process.env.VELLUM_PROFILER_MAX_RUNS = "100";
348
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
349
+
350
+ createRun("big-active", { sizeBytes: 10000 });
351
+
352
+ const result = runProfilerSweep();
353
+
354
+ expect(result.activeRunOverBudget).toBe(true);
355
+ // Active run must still exist
356
+ expect(existsSync(join(runsDir, "big-active"))).toBe(true);
357
+ expect(result.remainingRuns).toBe(1);
358
+ });
359
+
360
+ test("deletes single oversized completed run to recover space", () => {
361
+ process.env.VELLUM_PROFILER_MAX_BYTES = "100";
362
+ process.env.VELLUM_PROFILER_MAX_RUNS = "100";
363
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
364
+
365
+ createRun("huge-completed", {
366
+ sizeBytes: 50000,
367
+ manifest: {
368
+ status: "completed",
369
+ createdAt: "2025-01-01T00:00:00Z",
370
+ },
371
+ });
372
+
373
+ const result = runProfilerSweep();
374
+
375
+ expect(result.prunedCount).toBe(1);
376
+ expect(result.freedBytes).toBeGreaterThanOrEqual(50000);
377
+ expect(existsSync(join(runsDir, "huge-completed"))).toBe(false);
378
+ });
379
+
380
+ test("creates profiler directories on first sweep if missing", () => {
381
+ // Remove everything
382
+ rmSync(join(testDir, "data", "profiler"), {
383
+ recursive: true,
384
+ force: true,
385
+ });
386
+
387
+ const result = runProfilerSweep();
388
+ expect(result.prunedCount).toBe(0);
389
+ expect(existsSync(runsDir)).toBe(true);
390
+ });
391
+
392
+ test("sweep is idempotent — repeated calls produce consistent state", () => {
393
+ process.env.VELLUM_PROFILER_MAX_BYTES = "999999";
394
+ process.env.VELLUM_PROFILER_MAX_RUNS = "10";
395
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
396
+
397
+ createRun("stable-1", { sizeBytes: 1024 });
398
+ createRun("stable-2", { sizeBytes: 1024 });
399
+
400
+ const first = runProfilerSweep();
401
+ const second = runProfilerSweep();
402
+
403
+ expect(first.prunedCount).toBe(0);
404
+ expect(second.prunedCount).toBe(0);
405
+ expect(first.remainingRuns).toBe(second.remainingRuns);
406
+ });
407
+
408
+ test("active run is not counted against max completed runs", () => {
409
+ process.env.VELLUM_PROFILER_RUN_ID = "live";
410
+ process.env.VELLUM_PROFILER_MAX_BYTES = "999999";
411
+ process.env.VELLUM_PROFILER_MAX_RUNS = "2";
412
+ process.env.VELLUM_PROFILER_MIN_FREE_MB = "0";
413
+
414
+ createRun("live", { sizeBytes: 100 });
415
+ createRun("done-1", {
416
+ sizeBytes: 100,
417
+ manifest: {
418
+ status: "completed",
419
+ createdAt: "2025-01-01T00:00:00Z",
420
+ },
421
+ });
422
+ createRun("done-2", {
423
+ sizeBytes: 100,
424
+ manifest: {
425
+ status: "completed",
426
+ createdAt: "2025-02-01T00:00:00Z",
427
+ },
428
+ });
429
+
430
+ const result = runProfilerSweep();
431
+
432
+ // 2 completed runs = max, so nothing should be pruned
433
+ expect(result.prunedCount).toBe(0);
434
+ // Active + 2 completed = 3 remaining
435
+ expect(result.remainingRuns).toBe(3);
436
+ expect(existsSync(join(runsDir, "live"))).toBe(true);
437
+ expect(existsSync(join(runsDir, "done-1"))).toBe(true);
438
+ expect(existsSync(join(runsDir, "done-2"))).toBe(true);
439
+ });
440
+ });
441
+ });
@@ -189,7 +189,7 @@ describe("createProxyApprovalCallback", () => {
189
189
  expect(prompterSendToClient).not.toHaveBeenCalled();
190
190
  });
191
191
 
192
- test("high-risk with plain allow rule (no allowHighRisk) falls through to prompt", async () => {
192
+ test("ask_missing_credential with allow rule auto-allows (medium risk)", async () => {
193
193
  findHighestPriorityRuleMock.mockReturnValue({
194
194
  id: "rule-hr-1",
195
195
  tool: "network_request",
@@ -198,53 +198,17 @@ describe("createProxyApprovalCallback", () => {
198
198
  decision: "allow" as const,
199
199
  priority: 100,
200
200
  createdAt: Date.now(),
201
- // No allowHighRisk — should NOT auto-allow for high-risk decisions
202
201
  });
203
202
 
204
203
  const ctx = makeContext();
205
204
  const prompterSendToClient = mock(() => {});
206
205
  const prompter = new PermissionPrompter(prompterSendToClient);
207
206
 
208
- const originalPrompt = prompter.prompt.bind(prompter);
209
- prompter.prompt = async (...args) => {
210
- const p = originalPrompt(...args);
211
- await new Promise((r) => setTimeout(r, 10));
212
- const call = (prompterSendToClient.mock.calls as unknown[][])[0];
213
- const msg = call[0] as { requestId: string };
214
- prompter.resolveConfirmation(msg.requestId, "allow");
215
- return p;
216
- };
217
-
218
207
  const callback = createProxyApprovalCallback(prompter, ctx);
219
- // ask_missing_credential is high risk
220
208
  const result = await callback(makeAskMissingCredentialRequest());
221
209
 
222
210
  expect(result).toBe(true);
223
- // Prompter SHOULD have been called — plain allow rule doesn't auto-allow high-risk
224
- expect(prompterSendToClient).toHaveBeenCalled();
225
- });
226
-
227
- test("high-risk with allowHighRisk allow rule auto-allows without prompting", async () => {
228
- findHighestPriorityRuleMock.mockReturnValue({
229
- id: "rule-hr-2",
230
- tool: "network_request",
231
- pattern: "network_request:https://api.fal.ai:443/*",
232
- scope: "/tmp/test-project",
233
- decision: "allow" as const,
234
- priority: 100,
235
- createdAt: Date.now(),
236
- allowHighRisk: true,
237
- });
238
-
239
- const ctx = makeContext();
240
- const prompterSendToClient = mock(() => {});
241
- const prompter = new PermissionPrompter(prompterSendToClient);
242
-
243
- const callback = createProxyApprovalCallback(prompter, ctx);
244
- const result = await callback(makeAskMissingCredentialRequest());
245
-
246
- expect(result).toBe(true);
247
- // Prompter should NOT have been called — allowHighRisk rule auto-allows
211
+ // Plain allow rule auto-allows medium-risk requests
248
212
  expect(prompterSendToClient).not.toHaveBeenCalled();
249
213
  });
250
214
 
@@ -436,7 +400,7 @@ describe("createProxyApprovalCallback", () => {
436
400
  await callback(makeAskUnauthenticatedRequest());
437
401
  });
438
402
 
439
- test("uses high risk level for ask_missing_credential decisions", async () => {
403
+ test("uses medium risk level for ask_missing_credential decisions", async () => {
440
404
  const ctx = makeContext();
441
405
  const prompterSendToClient = mock(() => {});
442
406
  const prompter = new PermissionPrompter(prompterSendToClient);
@@ -447,8 +411,7 @@ describe("createProxyApprovalCallback", () => {
447
411
  await new Promise((r) => setTimeout(r, 10));
448
412
  const call = (prompterSendToClient.mock.calls as unknown[][])[0];
449
413
  const msg = call[0] as { requestId: string; riskLevel: string };
450
- // Missing credential prompts are high risk — the target wants auth
451
- expect(msg.riskLevel).toBe("high");
414
+ expect(msg.riskLevel).toBe("medium");
452
415
  prompter.resolveConfirmation(msg.requestId, "allow");
453
416
  return p;
454
417
  };
@@ -523,40 +486,6 @@ describe("createProxyApprovalCallback", () => {
523
486
  // in contrast to the proxied bash activation path which CANNOT (tested
524
487
  // in tool-executor.test.ts).
525
488
 
526
- test("always_allow_high_risk persists rule with allowHighRisk flag", async () => {
527
- const ctx = makeContext();
528
- const prompterSendToClient = mock(() => {});
529
- const prompter = new PermissionPrompter(prompterSendToClient);
530
-
531
- const originalPrompt = prompter.prompt.bind(prompter);
532
- prompter.prompt = async (...args) => {
533
- const p = originalPrompt(...args);
534
- await new Promise((r) => setTimeout(r, 10));
535
- const call = (prompterSendToClient.mock.calls as unknown[][])[0];
536
- const msg = call[0] as { requestId: string };
537
- prompter.resolveConfirmation(
538
- msg.requestId,
539
- "always_allow_high_risk",
540
- "network_request:https://api.fal.ai:443/*",
541
- "/tmp/test-project",
542
- );
543
- return p;
544
- };
545
-
546
- const callback = createProxyApprovalCallback(prompter, ctx);
547
- const result = await callback(makeAskMissingCredentialRequest());
548
-
549
- expect(result).toBe(true);
550
- expect(addRuleMock).toHaveBeenCalledWith(
551
- "network_request",
552
- "network_request:https://api.fal.ai:443/*",
553
- "/tmp/test-project",
554
- "allow",
555
- 100,
556
- { allowHighRisk: true },
557
- );
558
- });
559
-
560
489
  test("one-time allow does NOT persist any rule", async () => {
561
490
  const ctx = makeContext();
562
491
  const prompterSendToClient = mock(() => {});
@@ -109,7 +109,7 @@ describe("tool registry dynamic-tools tools", () => {
109
109
 
110
110
  describe("tool manifest", () => {
111
111
  test("eager module tool names list contains expected count", () => {
112
- expect(eagerModuleToolNames.length).toBe(9);
112
+ expect(eagerModuleToolNames.length).toBe(11);
113
113
  });
114
114
 
115
115
  test("explicit tools list includes memory and credential tools", () => {
@@ -705,10 +705,11 @@ describe("Terminal output format: formatShellOutput shared by sandbox and host",
705
705
  const longOutput = "x".repeat(MAX_OUTPUT_LENGTH + 100);
706
706
  const result = formatShellOutput(longOutput, "", 0, false, 120);
707
707
 
708
- expect(result.content.length).toBe(
709
- MAX_OUTPUT_LENGTH + 1 + '<output_truncated limit="50K" />'.length,
710
- );
711
- expect(result.content).toContain("<output_truncated");
708
+ expect(result.content).toContain('limit="20K"');
709
+ expect(result.content).toContain('file="');
710
+ // The <output_truncated tag starts right after MAX_OUTPUT_LENGTH chars + 1 newline
711
+ const tagStart = result.content.indexOf("<output_truncated");
712
+ expect(tagStart).toBe(MAX_OUTPUT_LENGTH + 1);
712
713
  });
713
714
 
714
715
  test("timed-out command appends timeout tag and sets isError", () => {