@vellumai/assistant 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/ARCHITECTURE.md +29 -28
  2. package/Dockerfile +1 -0
  3. package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
  4. package/bun.lock +3 -0
  5. package/knip.json +1 -0
  6. package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
  7. package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
  8. package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
  9. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
  10. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
  11. package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
  12. package/openapi.yaml +22 -4
  13. package/package.json +3 -1
  14. package/src/__tests__/annotate-risk-options.test.ts +291 -0
  15. package/src/__tests__/approval-cascade.test.ts +8 -16
  16. package/src/__tests__/approval-routes-http.test.ts +6 -0
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
  18. package/src/__tests__/call-constants.test.ts +10 -1
  19. package/src/__tests__/call-controller.test.ts +127 -0
  20. package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
  21. package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
  22. package/src/__tests__/context-search-memory-source.test.ts +3 -26
  23. package/src/__tests__/context-search-pkb-source.test.ts +12 -6
  24. package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
  25. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  26. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  27. package/src/__tests__/conversation-agent-loop.test.ts +3 -3
  28. package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
  29. package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
  30. package/src/__tests__/conversation-process-callsite.test.ts +1 -6
  31. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
  32. package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
  33. package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
  34. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
  35. package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
  36. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
  37. package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
  38. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
  39. package/src/__tests__/filing-service.test.ts +2 -19
  40. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
  41. package/src/__tests__/injector-chain.test.ts +24 -16
  42. package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
  43. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
  44. package/src/__tests__/notification-decision-fallback.test.ts +91 -0
  45. package/src/__tests__/notification-decision-strategy.test.ts +22 -0
  46. package/src/__tests__/oauth-cli.test.ts +121 -0
  47. package/src/__tests__/relay-server.test.ts +46 -2
  48. package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
  49. package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
  50. package/src/__tests__/secret-response-routing.test.ts +7 -5
  51. package/src/__tests__/server-history-render.test.ts +82 -0
  52. package/src/__tests__/skill-include-graph.test.ts +31 -0
  53. package/src/__tests__/skill-load-tool.test.ts +44 -16
  54. package/src/__tests__/skills.test.ts +39 -0
  55. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
  56. package/src/__tests__/tool-executor.test.ts +155 -0
  57. package/src/__tests__/voice-session-bridge.test.ts +3 -0
  58. package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
  59. package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
  60. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
  61. package/src/agent/loop.ts +11 -0
  62. package/src/approvals/guardian-decision-primitive.ts +0 -13
  63. package/src/approvals/guardian-request-resolvers.ts +4 -32
  64. package/src/calls/call-constants.ts +5 -8
  65. package/src/calls/call-controller.ts +130 -67
  66. package/src/calls/relay-server.ts +7 -1
  67. package/src/calls/voice-session-bridge.ts +1 -1
  68. package/src/cli/commands/memory-v2.ts +7 -7
  69. package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
  70. package/src/cli/commands/oauth/connect.ts +10 -52
  71. package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
  72. package/src/config/feature-flag-registry.json +1 -17
  73. package/src/config/loader.ts +72 -19
  74. package/src/config/schemas/memory-v2.ts +1 -1
  75. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
  76. package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
  77. package/src/daemon/conversation-agent-loop.ts +13 -10
  78. package/src/daemon/conversation-lifecycle.ts +22 -8
  79. package/src/daemon/conversation-surfaces.ts +16 -14
  80. package/src/daemon/conversation-tool-setup.ts +9 -5
  81. package/src/daemon/conversation.ts +1 -1
  82. package/src/daemon/handlers/shared.ts +26 -0
  83. package/src/daemon/host-bash-proxy.ts +1 -1
  84. package/src/daemon/host-browser-proxy.ts +1 -1
  85. package/src/daemon/host-cu-proxy.ts +1 -1
  86. package/src/daemon/host-file-proxy.ts +1 -1
  87. package/src/daemon/host-transfer-proxy.ts +2 -2
  88. package/src/daemon/lifecycle.ts +88 -73
  89. package/src/daemon/memory-v2-startup.ts +55 -14
  90. package/src/daemon/message-types/messages.ts +19 -1
  91. package/src/documents/document-store.ts +35 -1
  92. package/src/filing/filing-service.ts +2 -3
  93. package/src/heartbeat/heartbeat-service.ts +1 -1
  94. package/src/ipc/assistant-server.ts +93 -36
  95. package/src/ipc/skill-server.ts +99 -42
  96. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
  97. package/src/memory/context-search/sources/memory-v2.ts +1 -17
  98. package/src/memory/context-search/sources/memory.ts +2 -2
  99. package/src/memory/context-search/sources/pkb.ts +2 -3
  100. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
  101. package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
  102. package/src/memory/graph/conversation-graph-memory.ts +32 -9
  103. package/src/memory/graph/graph-search.test.ts +6 -5
  104. package/src/memory/graph/graph-search.ts +3 -4
  105. package/src/memory/graph/retriever.test.ts +12 -7
  106. package/src/memory/graph/retriever.ts +4 -5
  107. package/src/memory/graph/tool-handlers.ts +3 -4
  108. package/src/memory/graph/tools.ts +4 -4
  109. package/src/memory/indexer.ts +1 -2
  110. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
  111. package/src/memory/jobs/embed-concept-page.ts +223 -87
  112. package/src/memory/jobs-worker.ts +8 -4
  113. package/src/memory/pkb/pkb-search.test.ts +6 -5
  114. package/src/memory/pkb/pkb-search.ts +4 -5
  115. package/src/memory/qdrant-client.ts +3 -0
  116. package/src/memory/search/semantic.ts +4 -5
  117. package/src/memory/v2/__tests__/activation.test.ts +35 -5
  118. package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
  119. package/src/memory/v2/__tests__/injection.test.ts +140 -23
  120. package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
  121. package/src/memory/v2/__tests__/sim.test.ts +118 -7
  122. package/src/memory/v2/__tests__/static-context.test.ts +1 -13
  123. package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
  124. package/src/memory/v2/consolidation-job.ts +7 -8
  125. package/src/memory/v2/injection.ts +32 -12
  126. package/src/memory/v2/page-store.ts +39 -0
  127. package/src/memory/v2/prompts/consolidation.ts +5 -0
  128. package/src/memory/v2/qdrant.ts +209 -48
  129. package/src/memory/v2/sim.ts +67 -26
  130. package/src/memory/v2/static-context.ts +4 -8
  131. package/src/memory/v2/sweep-job.ts +5 -6
  132. package/src/memory/v2/types.ts +7 -0
  133. package/src/notifications/copy-composer.ts +46 -12
  134. package/src/notifications/decision-engine.ts +46 -0
  135. package/src/permissions/gateway-threshold-reader.ts +116 -8
  136. package/src/permissions/prompter.ts +86 -96
  137. package/src/permissions/secret-prompter.ts +31 -31
  138. package/src/plugins/defaults/injectors.ts +1 -2
  139. package/src/proactive-artifact/job.test.ts +51 -4
  140. package/src/proactive-artifact/job.ts +16 -2
  141. package/src/proactive-artifact/message-copy.ts +18 -1
  142. package/src/prompts/templates/SOUL.md +13 -28
  143. package/src/runtime/auth/route-policy.ts +1 -0
  144. package/src/runtime/channel-approvals.ts +3 -2
  145. package/src/runtime/guardian-reply-router.ts +0 -10
  146. package/src/runtime/pending-interactions.ts +19 -15
  147. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
  148. package/src/runtime/routes/approval-routes.ts +7 -3
  149. package/src/runtime/routes/consolidation-routes.ts +8 -9
  150. package/src/runtime/routes/conversation-query-routes.ts +44 -1
  151. package/src/runtime/routes/debug-bash-routes.ts +2 -0
  152. package/src/runtime/routes/filing-routes.ts +2 -3
  153. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
  154. package/src/runtime/routes/memory-item-routes.test.ts +3 -9
  155. package/src/runtime/routes/memory-item-routes.ts +5 -6
  156. package/src/runtime/routes/memory-v2-routes.ts +103 -17
  157. package/src/skills/include-graph.ts +35 -13
  158. package/src/tools/document/document-tool.ts +20 -0
  159. package/src/tools/executor.ts +18 -2
  160. package/src/tools/memory/register.test.ts +7 -5
  161. package/src/tools/permission-checker.ts +15 -0
  162. package/src/tools/skills/load.ts +24 -20
  163. package/src/tools/tool-name-aliases.ts +19 -0
  164. package/src/tools/types.ts +19 -1
  165. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
  166. package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
  167. package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
  168. package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
  169. package/src/workspace/migrations/registry.ts +6 -0
@@ -362,7 +362,9 @@ function getLatestAssistantText(conversationId: string): string | null {
362
362
  if (Array.isArray(parsed)) {
363
363
  return parsed
364
364
  .filter(
365
- (block): block is {
365
+ (
366
+ block,
367
+ ): block is {
366
368
  type: string;
367
369
  text?: string;
368
370
  surfaceType?: string;
@@ -2332,7 +2334,7 @@ describe("relay-server", () => {
2332
2334
 
2333
2335
  expect(relay.getConnectionState()).toBe("awaiting_name");
2334
2336
 
2335
- // Fallback prompt should NOT include assistant name but should include guardian label
2337
+ // Fallback prompt should use the existing guardian-label wording.
2336
2338
  const textMessages = ws.sentMessages
2337
2339
  .map((raw) => JSON.parse(raw) as { type: string; token?: string })
2338
2340
  .filter((m) => m.type === "text");
@@ -2348,6 +2350,48 @@ describe("relay-server", () => {
2348
2350
  }
2349
2351
  });
2350
2352
 
2353
+ test("inbound voice: unknown caller name capture does not speak a UUID assistant name", async () => {
2354
+ const prevName = mockAssistantName;
2355
+ mockAssistantName = "11111111-2222-4333-8444-555555555555";
2356
+ const db = getDb();
2357
+ db.run("DELETE FROM contact_channels");
2358
+ db.run("DELETE FROM contacts");
2359
+ try {
2360
+ ensureConversation("conv-invite-uuid-name");
2361
+ const session = createCallSession({
2362
+ conversationId: "conv-invite-uuid-name",
2363
+ provider: "twilio",
2364
+ fromNumber: "+12125550157",
2365
+ toNumber: "+12125550111",
2366
+ });
2367
+
2368
+ const { ws, relay } = createMockWs(session.id);
2369
+
2370
+ await relay.handleMessage(
2371
+ JSON.stringify({
2372
+ type: "setup",
2373
+ callSid: "CA_invite_uuid_name",
2374
+ from: "+12125550157",
2375
+ to: "+12125550111",
2376
+ }),
2377
+ );
2378
+
2379
+ expect(relay.getConnectionState()).toBe("awaiting_name");
2380
+
2381
+ const promptText = ws.sentMessages
2382
+ .map((raw) => JSON.parse(raw) as { type: string; token?: string })
2383
+ .filter((m) => m.type === "text")
2384
+ .map((m) => m.token ?? "")
2385
+ .join("");
2386
+ expect(promptText).toContain("Hi, this is my human's assistant.");
2387
+ expect(promptText).not.toContain("11111111-2222-4333-8444-555555555555");
2388
+
2389
+ relay.destroy();
2390
+ } finally {
2391
+ mockAssistantName = prevName;
2392
+ }
2393
+ });
2394
+
2351
2395
  // ── Friend-initiated in-call guardian approval flow ────────────────────
2352
2396
 
2353
2397
  test("name capture flow: caller provides name and enters guardian decision wait", async () => {
@@ -39,15 +39,17 @@ mock.module("../runtime/assistant-event-hub.js", () => ({
39
39
  }));
40
40
 
41
41
  // Stub pendingInteractions — SecretPrompter registers/resolves there now
42
+ // Use a real Map so SecretPrompter can store and retrieve promptResolve/promptReject callbacks.
43
+ const _piStore = new Map<string, object>();
42
44
  mock.module("../runtime/pending-interactions.js", () => ({
43
- register: () => {},
44
- resolve: () => undefined,
45
- get: () => undefined,
46
- getAll: () => [],
45
+ register: (id: string, entry: object) => _piStore.set(id, entry),
46
+ resolve: (id: string) => { const e = _piStore.get(id); _piStore.delete(id); return e; },
47
+ get: (id: string) => _piStore.get(id),
48
+ getAll: () => [..._piStore.values()],
47
49
  getByConversation: () => [],
48
50
  getByKind: () => [],
49
51
  removeByConversation: () => {},
50
- clear: () => {},
52
+ clear: () => _piStore.clear(),
51
53
  }));
52
54
 
53
55
  // Use a tiny timeout so the setTimeout branch fires quickly in tests
@@ -38,15 +38,17 @@ mock.module("../runtime/assistant-event-hub.js", () => ({
38
38
  broadcastMessage: (msg: ServerMessage) => broadcastMessages.push(msg),
39
39
  }));
40
40
 
41
+ // Use a real Map so SecretPrompter can store and retrieve promptResolve/promptReject callbacks.
42
+ const _piStore = new Map<string, object>();
41
43
  mock.module("../runtime/pending-interactions.js", () => ({
42
- register: () => {},
43
- resolve: () => undefined,
44
- get: () => undefined,
45
- getAll: () => [],
44
+ register: (id: string, entry: object) => _piStore.set(id, entry),
45
+ resolve: (id: string) => { const e = _piStore.get(id); _piStore.delete(id); return e; },
46
+ get: (id: string) => _piStore.get(id),
47
+ getAll: () => [..._piStore.values()],
46
48
  getByConversation: () => [],
47
49
  getByKind: () => [],
48
50
  removeByConversation: () => {},
49
- clear: () => {},
51
+ clear: () => _piStore.clear(),
50
52
  }));
51
53
 
52
54
  const { SecretPrompter } = await import("../permissions/secret-prompter.js");
@@ -11,15 +11,17 @@ mock.module("../runtime/assistant-event-hub.js", () => ({
11
11
  broadcastMessage: (msg: ServerMessage) => broadcastedMessages.push(msg),
12
12
  }));
13
13
 
14
+ // Use a real Map so SecretPrompter can store and retrieve promptResolve/promptReject callbacks.
15
+ const _piStore = new Map<string, object>();
14
16
  mock.module("../runtime/pending-interactions.js", () => ({
15
- register: () => {},
16
- resolve: () => undefined,
17
- get: () => undefined,
18
- getAll: () => [],
17
+ register: (id: string, entry: object) => _piStore.set(id, entry),
18
+ resolve: (id: string) => { const e = _piStore.get(id); _piStore.delete(id); return e; },
19
+ get: (id: string) => _piStore.get(id),
20
+ getAll: () => [..._piStore.values()],
19
21
  getByConversation: () => [],
20
22
  getByKind: () => [],
21
23
  removeByConversation: () => {},
22
- clear: () => {},
24
+ clear: () => _piStore.clear(),
23
25
  }));
24
26
 
25
27
  const { SecretPrompter } = await import("../permissions/secret-prompter.js");
@@ -154,6 +154,88 @@ describe("renderHistoryContent", () => {
154
154
  ]);
155
155
  });
156
156
 
157
+ // ── Persisted risk-option ladders (Phase B of conflation track) ─────────────
158
+
159
+ test("hydrates persisted _risk*Options annotations onto tool calls", () => {
160
+ // Mirrors what `annotatePersistedAssistantMessage` writes to the DB so the
161
+ // rule editor's chip ladder survives chat-history reload. Without these,
162
+ // hydrated chips fall back to the synthesized `*` allowlist (see web's
163
+ // `synthesizeFallbackOption` in RuleEditorModal.tsx).
164
+ const scopeOptions = [
165
+ { pattern: "exact", label: "exact: rm -rf /tmp" },
166
+ { pattern: "by-program", label: "All rm" },
167
+ ];
168
+ const allowlistOptions = [
169
+ { label: "exact", description: "exact match", pattern: "rm -rf /tmp" },
170
+ { label: "All rm", description: "All rm commands", pattern: "rm *" },
171
+ ];
172
+ const directoryScopeOptions = [
173
+ { scope: "/Users/me/code", label: "in code/" },
174
+ { scope: "everywhere", label: "Everywhere" },
175
+ ];
176
+
177
+ const output = renderHistoryContent([
178
+ {
179
+ type: "tool_use",
180
+ id: "tu_1",
181
+ name: "bash",
182
+ input: { command: "rm -rf /tmp" },
183
+ _riskLevel: "high",
184
+ _matchedTrustRuleId: "rule_42",
185
+ _riskScopeOptions: scopeOptions,
186
+ _riskAllowlistOptions: allowlistOptions,
187
+ _riskDirectoryScopeOptions: directoryScopeOptions,
188
+ },
189
+ ]);
190
+
191
+ const [entry] = output.toolCalls;
192
+ expect(entry.riskLevel).toBe("high");
193
+ expect(entry.matchedTrustRuleId).toBe("rule_42");
194
+ expect(entry.riskScopeOptions).toEqual(scopeOptions);
195
+ expect(entry.riskAllowlistOptions).toEqual(allowlistOptions);
196
+ expect(entry.riskDirectoryScopeOptions).toEqual(directoryScopeOptions);
197
+ });
198
+
199
+ test("ignores non-array _risk*Options annotations", () => {
200
+ // Defensive: a malformed persisted block should not throw or coerce.
201
+ const output = renderHistoryContent([
202
+ {
203
+ type: "tool_use",
204
+ id: "tu_1",
205
+ name: "bash",
206
+ input: { command: "ls" },
207
+ _riskLevel: "low",
208
+ _riskScopeOptions: "not an array",
209
+ _riskAllowlistOptions: { not: "an array" },
210
+ _riskDirectoryScopeOptions: 42,
211
+ },
212
+ ]);
213
+
214
+ const [entry] = output.toolCalls;
215
+ expect(entry.riskLevel).toBe("low");
216
+ expect(entry.riskScopeOptions).toBeUndefined();
217
+ expect(entry.riskAllowlistOptions).toBeUndefined();
218
+ expect(entry.riskDirectoryScopeOptions).toBeUndefined();
219
+ });
220
+
221
+ test("omits absent _risk*Options annotations", () => {
222
+ const output = renderHistoryContent([
223
+ {
224
+ type: "tool_use",
225
+ id: "tu_1",
226
+ name: "bash",
227
+ input: { command: "ls" },
228
+ _riskLevel: "low",
229
+ },
230
+ ]);
231
+
232
+ const [entry] = output.toolCalls;
233
+ expect(entry.riskLevel).toBe("low");
234
+ expect(entry.riskScopeOptions).toBeUndefined();
235
+ expect(entry.riskAllowlistOptions).toBeUndefined();
236
+ expect(entry.riskDirectoryScopeOptions).toBeUndefined();
237
+ });
238
+
157
239
  test("handles mixed text and tool blocks", () => {
158
240
  const output = renderHistoryContent([
159
241
  { type: "text", text: "Let me look that up." },
@@ -6,6 +6,7 @@ import {
6
6
  getImmediateChildren,
7
7
  indexCatalogById,
8
8
  traverseIncludes,
9
+ validateIncludeCycles,
9
10
  validateIncludes,
10
11
  } from "../skills/include-graph.js";
11
12
 
@@ -299,6 +300,36 @@ describe("validateIncludes — cycle detection", () => {
299
300
  });
300
301
  });
301
302
 
303
+ describe("validateIncludeCycles", () => {
304
+ test("skips missing children while still detecting available cycles", () => {
305
+ const catalog = [
306
+ makeSkill("root", ["missing", "a"]),
307
+ makeSkill("a", ["b"]),
308
+ makeSkill("b", ["a"]),
309
+ ];
310
+ const index = indexCatalogById(catalog);
311
+
312
+ const result = validateIncludeCycles("root", index);
313
+
314
+ expect(result.ok).toBe(false);
315
+ if (!result.ok && result.error === "cycle") {
316
+ expect(result.cyclePath).toEqual(["a", "b", "a"]);
317
+ }
318
+ });
319
+
320
+ test("returns success when the only invalid edges are missing children", () => {
321
+ const catalog = [makeSkill("root", ["missing"])];
322
+ const index = indexCatalogById(catalog);
323
+
324
+ const result = validateIncludeCycles("root", index);
325
+
326
+ expect(result.ok).toBe(true);
327
+ if (result.ok) {
328
+ expect(result.visited).toEqual(["root"]);
329
+ }
330
+ });
331
+ });
332
+
302
333
  describe("collectAllMissing", () => {
303
334
  test("returns empty set when skill has no includes", () => {
304
335
  const catalog = [makeSkill("root")];
@@ -266,17 +266,19 @@ describe("skill_load tool", () => {
266
266
  expect(markers.length).toBe(1);
267
267
  });
268
268
 
269
- test("returns error when skill has missing include", async () => {
269
+ test("continues when skill has missing include", async () => {
270
270
  writeSkillWithIncludes("parent", "Parent", "Has missing child", "Body", [
271
271
  "missing-child",
272
272
  ]);
273
273
  writeFileSync(join(TEST_DIR, "skills", "SKILLS.md"), "- parent\n");
274
274
 
275
275
  const result = await executeSkillLoad({ skill: "parent" });
276
- expect(result.isError).toBe(true);
276
+ expect(result.isError).toBe(false);
277
+ expect(result.content).toContain("Skill: Parent");
278
+ expect(result.content).toContain("Suggested Included Skills (not loaded):");
277
279
  expect(result.content).toContain("missing-child");
278
- expect(result.content).toContain("not found");
279
- expect(result.content).not.toContain("<loaded_skill");
280
+ expect(result.content).toContain('<loaded_skill id="parent"');
281
+ expect(result.content).not.toContain('<loaded_skill id="missing-child"');
280
282
  });
281
283
 
282
284
  test("returns error when skill has circular include", async () => {
@@ -317,7 +319,7 @@ describe("skill_load tool", () => {
317
319
  expect(result.content).toContain("<loaded_skill");
318
320
  });
319
321
 
320
- test("failed include validation (missing) emits no loaded_skill marker", async () => {
322
+ test("missing include emits only the parent loaded_skill marker", async () => {
321
323
  const skillDir = join(TEST_DIR, "skills", "marker-missing");
322
324
  mkdirSync(skillDir, { recursive: true });
323
325
  writeFileSync(
@@ -327,9 +329,13 @@ describe("skill_load tool", () => {
327
329
  writeFileSync(join(TEST_DIR, "skills", "SKILLS.md"), "- marker-missing\n");
328
330
 
329
331
  const result = await executeSkillLoad({ skill: "marker-missing" });
330
- expect(result.isError).toBe(true);
331
- expect(result.content).not.toContain("<loaded_skill");
332
- expect(result.content).not.toMatch(/<loaded_skill\s/);
332
+ expect(result.isError).toBe(false);
333
+ expect(result.content).toContain("Suggested Included Skills (not loaded):");
334
+ expect(result.content).toContain("nonexistent");
335
+ const markers = result.content.match(/<loaded_skill/g) || [];
336
+ expect(markers.length).toBe(1);
337
+ expect(result.content).toContain('<loaded_skill id="marker-missing"');
338
+ expect(result.content).not.toContain('<loaded_skill id="nonexistent"');
333
339
  });
334
340
 
335
341
  test("failed include validation (cycle) emits no loaded_skill marker", async () => {
@@ -365,6 +371,28 @@ describe("skill_load tool", () => {
365
371
  expect(result.content).toContain("Skill: No Includes");
366
372
  });
367
373
 
374
+ test("bundled app-builder loads when frontend-design is unavailable", async () => {
375
+ const result = await executeSkillLoad({ skill: "app-builder" });
376
+
377
+ expect(result.isError).toBe(false);
378
+ expect(result.content).toContain("Skill: App Builder");
379
+ expect(result.content).toContain("Suggested Included Skills (not loaded):");
380
+ expect(result.content).toContain("frontend-design");
381
+ expect(result.content).toContain('<loaded_skill id="app-builder"');
382
+ expect(result.content).not.toContain('<loaded_skill id="frontend-design"');
383
+ });
384
+
385
+ test("bundled phone-calls loads when setup includes are unavailable", async () => {
386
+ const result = await executeSkillLoad({ skill: "phone-calls" });
387
+
388
+ expect(result.isError).toBe(false);
389
+ expect(result.content).toContain("Skill: Phone Calls");
390
+ expect(result.content).toContain("Suggested Included Skills (not loaded):");
391
+ expect(result.content).toContain("twilio-setup");
392
+ expect(result.content).toContain('<loaded_skill id="phone-calls"');
393
+ expect(result.content).not.toContain('<loaded_skill id="twilio-setup"');
394
+ });
395
+
368
396
  test("skill_load output includes immediate child metadata", async () => {
369
397
  writeSkill("child-skill", "Child Skill", "A child skill", "Child body");
370
398
  const parentDir = join(TEST_DIR, "skills", "parent-with-children");
@@ -883,7 +911,7 @@ describe("skill_load tool", () => {
883
911
  expect(mockAutoInstall).toHaveBeenCalledWith("trans-c");
884
912
  });
885
913
 
886
- test("returns error when auto-install of missing include fails", async () => {
914
+ test("continues when auto-install of missing include fails", async () => {
887
915
  writeSkillWithIncludes(
888
916
  "fail-parent",
889
917
  "Fail Parent",
@@ -902,10 +930,12 @@ describe("skill_load tool", () => {
902
930
  });
903
931
 
904
932
  const result = await executeSkillLoad({ skill: "fail-parent" });
905
- expect(result.isError).toBe(true);
933
+ expect(result.isError).toBe(false);
934
+ expect(result.content).toContain("Skill: Fail Parent");
935
+ expect(result.content).toContain("Suggested Included Skills (not loaded):");
906
936
  expect(result.content).toContain("dep-x");
907
- expect(result.content).toContain("not found");
908
- expect(result.content).not.toContain("<loaded_skill");
937
+ expect(result.content).toContain('<loaded_skill id="fail-parent"');
938
+ expect(result.content).not.toContain('<loaded_skill id="dep-x"');
909
939
  });
910
940
 
911
941
  test("stops after MAX_INSTALL_ROUNDS", async () => {
@@ -941,10 +971,8 @@ describe("skill_load tool", () => {
941
971
  });
942
972
 
943
973
  const result = await executeSkillLoad({ skill: "loop-root" });
944
- // Should terminate with an error (the final dep is still missing)
945
- expect(result.isError).toBe(true);
946
- expect(result.content).toContain("not found");
947
- // Should have terminated — installCount should be bounded by MAX_INSTALL_ROUNDS (5)
974
+ expect(result.isError).toBe(false);
975
+ expect(result.content).toContain("Suggested Included Skills (not loaded):");
948
976
  expect(installCount).toBeLessThanOrEqual(5);
949
977
  });
950
978
  });
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  existsSync,
3
3
  mkdirSync,
4
+ readdirSync,
4
5
  readFileSync,
5
6
  rmSync,
6
7
  symlinkSync,
@@ -678,3 +679,41 @@ describe("bundled computer-use skill", () => {
678
679
  ]);
679
680
  });
680
681
  });
682
+
683
+ describe("skill source ownership", () => {
684
+ const BUNDLED_SKILLS_DIR = join(
685
+ import.meta.dir,
686
+ "..",
687
+ "config",
688
+ "bundled-skills",
689
+ );
690
+ const FIRST_PARTY_SKILLS_DIR = join(
691
+ import.meta.dir,
692
+ "..",
693
+ "..",
694
+ "..",
695
+ "skills",
696
+ );
697
+
698
+ function collectSourceSkillIds(rootDir: string): string[] {
699
+ return readdirSync(rootDir, { withFileTypes: true })
700
+ .filter(
701
+ (entry) =>
702
+ entry.isDirectory() &&
703
+ existsSync(join(rootDir, entry.name, "SKILL.md")),
704
+ )
705
+ .map((entry) => entry.name)
706
+ .sort((a, b) => a.localeCompare(b));
707
+ }
708
+
709
+ test("bundled skills are not duplicated in the first-party source catalog", () => {
710
+ const firstPartyIds = new Set(
711
+ collectSourceSkillIds(FIRST_PARTY_SKILLS_DIR),
712
+ );
713
+ const duplicates = collectSourceSkillIds(BUNDLED_SKILLS_DIR).filter((id) =>
714
+ firstPartyIds.has(id),
715
+ );
716
+
717
+ expect(duplicates).toEqual([]);
718
+ });
719
+ });
@@ -30,16 +30,6 @@ mock.module("../util/logger.js", () => ({
30
30
  }),
31
31
  }));
32
32
 
33
- // Allow toggling between no-rule and matched-rule paths
34
- let mockRuleResponse: import("../permissions/types.js").TrustRule | null = null;
35
-
36
- mock.module("../permissions/trust-store.js", () => ({
37
- addRule: () => {},
38
- findHighestPriorityRule: () => mockRuleResponse,
39
- onRulesChanged: () => {},
40
- clearCache: () => {},
41
- }));
42
-
43
33
  mock.module("../config/loader.js", () => ({
44
34
  getConfig: () => ({
45
35
  ui: {},
@@ -302,38 +292,6 @@ describe("Tool execution pipeline benchmark", () => {
302
292
  expect(results[0].decision).toBe("allow");
303
293
  });
304
294
 
305
- test("check: matched allow-rule path for medium-risk tool", async () => {
306
- // Exercise the code path where findHighestPriorityRule returns a matching
307
- // allow rule, rather than always falling through to the no-rule default.
308
- mockRuleResponse = {
309
- id: "bench:allow-file_write",
310
- tool: "file_write",
311
- pattern: "**",
312
- scope: "/tmp",
313
- decision: "allow",
314
- priority: 90,
315
- createdAt: Date.now(),
316
- };
317
-
318
- try {
319
- const { timings, results } = await benchmarkAsync(
320
- () => check("file_write", { path: "/tmp/out.txt" }, "/tmp"),
321
- ITERATIONS,
322
- );
323
-
324
- const p50 = percentile(timings, 50);
325
- const p95 = percentile(timings, 95);
326
-
327
- expect(p50).toBeLessThan(10);
328
- expect(p95).toBeLessThan(20);
329
- // Medium-risk with a matching allow rule should auto-allow
330
- expect(results[0].decision).toBe("allow");
331
- expect(results[0].matchedRule?.id).toBe("bench:allow-file_write");
332
- } finally {
333
- mockRuleResponse = null;
334
- }
335
- });
336
-
337
295
  test("check: permission cost is stable across different input paths", async () => {
338
296
  // Verify that the permission check cost doesn't vary with input path length/complexity.
339
297
  // Actual tool-execution-time independence is tested in the ToolExecutor section below.
@@ -73,6 +73,11 @@ let cachedAssessmentOverride:
73
73
  riskLevel: string;
74
74
  reason: string;
75
75
  scopeOptions: Array<{ pattern: string; label: string }>;
76
+ allowlistOptions?: Array<{
77
+ label: string;
78
+ description: string;
79
+ pattern: string;
80
+ }>;
76
81
  directoryScopeOptions?: Array<{ scope: string; label: string }>;
77
82
  matchType: string;
78
83
  }
@@ -202,6 +207,32 @@ describe("ToolExecutor allowedToolNames gating", () => {
202
207
  expect(result.content).toBe("ok");
203
208
  });
204
209
 
210
+ test("canonicalizes app-builder create_app alias before active-tool gating", async () => {
211
+ const executor = new ToolExecutor(makePrompter());
212
+ const allowed = new Set(["app_create"]);
213
+ const result = await executor.execute(
214
+ "create_app",
215
+ { name: "Calculator" },
216
+ makeContext({ allowedToolNames: allowed }),
217
+ );
218
+
219
+ expect(result.isError).toBe(false);
220
+ expect(result.content).toBe("ok");
221
+ });
222
+
223
+ test("preserves exact active create_app tool before applying compatibility aliases", async () => {
224
+ const executor = new ToolExecutor(makePrompter());
225
+ const allowed = new Set(["create_app", "app_create"]);
226
+ const result = await executor.execute(
227
+ "create_app",
228
+ { name: "Custom App" },
229
+ makeContext({ allowedToolNames: allowed }),
230
+ );
231
+
232
+ expect(result.isError).toBe(false);
233
+ expect(lastCheckArgs?.toolName).toBe("create_app");
234
+ });
235
+
205
236
  test("blocks execution when tool is NOT in the allowed set", async () => {
206
237
  const executor = new ToolExecutor(makePrompter());
207
238
  const allowed = new Set(["file_read", "bash"]);
@@ -1123,4 +1154,128 @@ describe("ToolExecutionResult includes risk metadata from classifier assessment"
1123
1154
  { scope: "/tmp", label: "Anywhere in tmp/" },
1124
1155
  ]);
1125
1156
  });
1157
+
1158
+ test("auto-approved tool result includes riskAllowlistOptions when classifier emits them (Minimatch-glob shape for save path)", async () => {
1159
+ cachedAssessmentOverride = {
1160
+ riskLevel: "medium",
1161
+ reason: "Reads workspace files",
1162
+ // Display ladder (regex shape — not for save).
1163
+ scopeOptions: [
1164
+ { pattern: "^echo\\b.*hello$", label: "echo hello" },
1165
+ { pattern: "^echo\\b", label: "echo *" },
1166
+ ],
1167
+ // Save ladder (Minimatch-glob shape — what gateway matches against).
1168
+ allowlistOptions: [
1169
+ {
1170
+ label: "echo hello",
1171
+ description: "This exact command",
1172
+ pattern: "echo hello",
1173
+ },
1174
+ {
1175
+ label: "echo *",
1176
+ description: "Any echo command",
1177
+ pattern: "action:echo",
1178
+ },
1179
+ ],
1180
+ matchType: "registry",
1181
+ };
1182
+
1183
+ const executor = new ToolExecutor(makePrompter());
1184
+ const result = await executor.execute(
1185
+ "file_read",
1186
+ { path: "README.md" },
1187
+ makeContext({ requireFreshApproval: true }),
1188
+ );
1189
+
1190
+ expect(result.isError).toBe(false);
1191
+ // Both shapes flow through independently — same labels, different patterns.
1192
+ expect(result.riskScopeOptions).toEqual([
1193
+ { pattern: "^echo\\b.*hello$", label: "echo hello" },
1194
+ { pattern: "^echo\\b", label: "echo *" },
1195
+ ]);
1196
+ expect(result.riskAllowlistOptions).toEqual([
1197
+ {
1198
+ label: "echo hello",
1199
+ description: "This exact command",
1200
+ pattern: "echo hello",
1201
+ },
1202
+ {
1203
+ label: "echo *",
1204
+ description: "Any echo command",
1205
+ pattern: "action:echo",
1206
+ },
1207
+ ]);
1208
+ });
1209
+
1210
+ test("riskAllowlistOptions is undefined when classifier did not produce allowlist (e.g. web-risk classifier)", async () => {
1211
+ cachedAssessmentOverride = {
1212
+ riskLevel: "low",
1213
+ reason: "GET request to public URL",
1214
+ scopeOptions: [{ pattern: "https://example.com/.*", label: "example.com" }],
1215
+ // allowlistOptions intentionally omitted — some classifiers don't emit them.
1216
+ matchType: "registry",
1217
+ };
1218
+
1219
+ const executor = new ToolExecutor(makePrompter());
1220
+ const result = await executor.execute(
1221
+ "file_read",
1222
+ { path: "README.md" },
1223
+ makeContext({ requireFreshApproval: true }),
1224
+ );
1225
+
1226
+ expect(result.isError).toBe(false);
1227
+ // Display ladder still flows; save ladder is absent so the client must
1228
+ // fall back to a synthesized option (or omit save).
1229
+ expect(result.riskScopeOptions).toEqual([
1230
+ { pattern: "https://example.com/.*", label: "example.com" },
1231
+ ]);
1232
+ expect(result.riskAllowlistOptions).toBeUndefined();
1233
+ });
1234
+
1235
+ test("riskAllowlistOptions is undefined when no classifier ran (MCP tools)", async () => {
1236
+ // cachedAssessmentOverride is undefined — no classifier ran.
1237
+ const executor = new ToolExecutor(makePrompter());
1238
+ const result = await executor.execute(
1239
+ "file_read",
1240
+ { path: "README.md" },
1241
+ makeContext(),
1242
+ );
1243
+
1244
+ expect(result.isError).toBe(false);
1245
+ expect(result.riskScopeOptions).toBeUndefined();
1246
+ expect(result.riskAllowlistOptions).toBeUndefined();
1247
+ });
1248
+
1249
+ test("denied tool result still carries riskAllowlistOptions for the rule editor save path", async () => {
1250
+ checkResultOverride = { decision: "deny", reason: "Blocked by deny rule" };
1251
+ cachedAssessmentOverride = {
1252
+ riskLevel: "high",
1253
+ reason: "Recursive force delete",
1254
+ scopeOptions: [{ pattern: "^rm\\s+-rf", label: "rm -rf *" }],
1255
+ allowlistOptions: [
1256
+ {
1257
+ label: "rm -rf *",
1258
+ description: "Any rm -rf command",
1259
+ pattern: "action:rm",
1260
+ },
1261
+ ],
1262
+ matchType: "registry",
1263
+ };
1264
+
1265
+ const executor = new ToolExecutor(makePrompter());
1266
+ const result = await executor.execute(
1267
+ "file_read",
1268
+ { path: "anything" },
1269
+ makeContext({ requireFreshApproval: true }),
1270
+ );
1271
+
1272
+ expect(result.isError).toBe(true);
1273
+ expect(result.riskAllowlistOptions).toEqual([
1274
+ {
1275
+ label: "rm -rf *",
1276
+ description: "Any rm -rf command",
1277
+ pattern: "action:rm",
1278
+ },
1279
+ ]);
1280
+ });
1126
1281
  });
@@ -652,6 +652,9 @@ describe("voice-session-bridge", () => {
652
652
  expect(prompt).toContain(
653
653
  "If your assistant name is not known, skip the name and just identify yourself as the guardian's assistant.",
654
654
  );
655
+ expect(prompt).toContain(
656
+ "Never use a UUID-shaped internal assistant ID as your spoken name.",
657
+ );
655
658
  expect(prompt).toContain(
656
659
  'Do NOT say "I\'m calling" or "I\'m calling on behalf of".',
657
660
  );