@vellumai/assistant 0.4.11 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/ARCHITECTURE.md +401 -385
  2. package/package.json +1 -1
  3. package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +75 -61
  4. package/src/__tests__/registry.test.ts +235 -187
  5. package/src/__tests__/secure-keys.test.ts +27 -0
  6. package/src/__tests__/session-agent-loop.test.ts +521 -256
  7. package/src/__tests__/session-surfaces-task-progress.test.ts +1 -0
  8. package/src/__tests__/session-tool-setup-app-refresh.test.ts +1 -0
  9. package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -0
  10. package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -0
  11. package/src/__tests__/skills.test.ts +334 -276
  12. package/src/__tests__/slack-skill.test.ts +124 -0
  13. package/src/__tests__/starter-task-flow.test.ts +7 -17
  14. package/src/agent/loop.ts +10 -3
  15. package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +449 -0
  16. package/src/config/bundled-skills/doordash/SKILL.md +171 -0
  17. package/src/config/bundled-skills/doordash/__tests__/doordash-client.test.ts +203 -0
  18. package/src/config/bundled-skills/doordash/__tests__/doordash-session.test.ts +164 -0
  19. package/src/config/bundled-skills/doordash/doordash-cli.ts +1193 -0
  20. package/src/config/bundled-skills/doordash/doordash-entry.ts +22 -0
  21. package/src/config/bundled-skills/doordash/lib/cart-queries.ts +787 -0
  22. package/src/config/bundled-skills/doordash/lib/client.ts +1071 -0
  23. package/src/config/bundled-skills/doordash/lib/order-queries.ts +85 -0
  24. package/src/config/bundled-skills/doordash/lib/queries.ts +28 -0
  25. package/src/config/bundled-skills/doordash/lib/query-extractor.ts +94 -0
  26. package/src/config/bundled-skills/doordash/lib/search-queries.ts +203 -0
  27. package/src/config/bundled-skills/doordash/lib/session.ts +93 -0
  28. package/src/config/bundled-skills/doordash/lib/shared/errors.ts +61 -0
  29. package/src/config/bundled-skills/doordash/lib/shared/ipc.ts +32 -0
  30. package/src/config/bundled-skills/doordash/lib/shared/network-recorder.ts +380 -0
  31. package/src/config/bundled-skills/doordash/lib/shared/platform.ts +35 -0
  32. package/src/config/bundled-skills/doordash/lib/shared/recording-store.ts +43 -0
  33. package/src/config/bundled-skills/doordash/lib/shared/recording-types.ts +49 -0
  34. package/src/config/bundled-skills/doordash/lib/shared/truncate.ts +6 -0
  35. package/src/config/bundled-skills/doordash/lib/store-queries.ts +246 -0
  36. package/src/config/bundled-skills/doordash/lib/types.ts +367 -0
  37. package/src/config/bundled-skills/google-calendar/SKILL.md +4 -5
  38. package/src/config/bundled-skills/google-oauth-setup/SKILL.md +41 -41
  39. package/src/config/bundled-skills/messaging/SKILL.md +59 -42
  40. package/src/config/bundled-skills/messaging/TOOLS.json +14 -92
  41. package/src/config/bundled-skills/messaging/tools/gmail-archive-by-query.ts +5 -1
  42. package/src/config/bundled-skills/messaging/tools/gmail-batch-archive.ts +11 -2
  43. package/src/config/bundled-skills/messaging/tools/gmail-outreach-scan.ts +8 -1
  44. package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +12 -4
  45. package/src/config/bundled-skills/messaging/tools/gmail-unsubscribe.ts +5 -1
  46. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +5 -1
  47. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +5 -2
  48. package/src/config/bundled-skills/notion/SKILL.md +240 -0
  49. package/src/config/bundled-skills/notion-oauth-setup/SKILL.md +127 -0
  50. package/src/config/bundled-skills/oauth-setup/SKILL.md +144 -0
  51. package/src/config/bundled-skills/phone-calls/SKILL.md +76 -45
  52. package/src/config/bundled-skills/skills-catalog/SKILL.md +32 -29
  53. package/src/config/bundled-skills/slack/SKILL.md +49 -0
  54. package/src/config/bundled-skills/slack/TOOLS.json +167 -0
  55. package/src/config/bundled-skills/slack/tools/shared.ts +23 -0
  56. package/src/config/bundled-skills/{messaging → slack}/tools/slack-add-reaction.ts +2 -5
  57. package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +33 -0
  58. package/src/config/bundled-skills/slack/tools/slack-configure-channels.ts +75 -0
  59. package/src/config/bundled-skills/{messaging → slack}/tools/slack-delete-message.ts +2 -5
  60. package/src/config/bundled-skills/{messaging → slack}/tools/slack-leave-channel.ts +2 -5
  61. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +193 -0
  62. package/src/config/{vellum-skills → bundled-skills}/sms-setup/SKILL.md +29 -22
  63. package/src/config/{vellum-skills → bundled-skills}/telegram-setup/SKILL.md +17 -14
  64. package/src/config/{vellum-skills → bundled-skills}/twilio-setup/SKILL.md +20 -5
  65. package/src/config/bundled-tool-registry.ts +292 -267
  66. package/src/config/schema.ts +1 -1
  67. package/src/daemon/handlers/skills.ts +334 -234
  68. package/src/daemon/ipc-contract/messages.ts +2 -0
  69. package/src/daemon/ipc-contract/surfaces.ts +2 -0
  70. package/src/daemon/lifecycle.ts +358 -221
  71. package/src/daemon/response-tier.ts +2 -0
  72. package/src/daemon/server.ts +453 -193
  73. package/src/daemon/session-agent-loop-handlers.ts +43 -2
  74. package/src/daemon/session-agent-loop.ts +3 -0
  75. package/src/daemon/session-lifecycle.ts +3 -0
  76. package/src/daemon/session-process.ts +1 -0
  77. package/src/daemon/session-surfaces.ts +22 -20
  78. package/src/daemon/session-tool-setup.ts +1 -0
  79. package/src/daemon/session.ts +5 -2
  80. package/src/messaging/outreach-classifier.ts +12 -5
  81. package/src/messaging/provider-types.ts +5 -0
  82. package/src/messaging/provider.ts +1 -1
  83. package/src/messaging/providers/gmail/adapter.ts +11 -5
  84. package/src/messaging/providers/gmail/client.ts +2 -0
  85. package/src/messaging/providers/slack/adapter.ts +1 -0
  86. package/src/messaging/providers/slack/client.ts +8 -0
  87. package/src/messaging/providers/slack/types.ts +5 -0
  88. package/src/runtime/http-errors.ts +33 -20
  89. package/src/runtime/http-server.ts +706 -291
  90. package/src/runtime/http-types.ts +26 -16
  91. package/src/runtime/routes/secret-routes.ts +57 -2
  92. package/src/runtime/routes/surface-action-routes.ts +66 -0
  93. package/src/runtime/routes/trust-rules-routes.ts +140 -0
  94. package/src/security/keychain-to-encrypted-migration.ts +59 -0
  95. package/src/security/secure-keys.ts +17 -0
  96. package/src/skills/frontmatter.ts +9 -7
  97. package/src/tools/apps/executors.ts +2 -1
  98. package/src/tools/tool-manifest.ts +44 -42
  99. package/src/tools/types.ts +9 -0
  100. package/src/__tests__/skill-mirror-parity.test.ts +0 -176
  101. package/src/config/vellum-skills/catalog.json +0 -63
  102. package/src/config/vellum-skills/chatgpt-import/tools/chatgpt-import.ts +0 -295
  103. package/src/skills/vellum-catalog-remote.ts +0 -166
  104. package/src/tools/skills/vellum-catalog.ts +0 -168
  105. /package/src/config/{vellum-skills → bundled-skills}/chatgpt-import/SKILL.md +0 -0
  106. /package/src/config/{vellum-skills → bundled-skills}/chatgpt-import/TOOLS.json +0 -0
  107. /package/src/config/{vellum-skills → bundled-skills}/deploy-fullstack-vercel/SKILL.md +0 -0
  108. /package/src/config/{vellum-skills → bundled-skills}/document-writer/SKILL.md +0 -0
  109. /package/src/config/{vellum-skills → bundled-skills}/guardian-verify-setup/SKILL.md +0 -0
  110. /package/src/config/{vellum-skills → bundled-skills}/slack-oauth-setup/SKILL.md +0 -0
  111. /package/src/config/{vellum-skills → bundled-skills}/trusted-contacts/SKILL.md +0 -0
@@ -1,7 +1,7 @@
1
- import { afterAll,beforeEach, describe, expect, test } from 'bun:test';
1
+ import { afterAll, beforeEach, describe, expect, test } from "bun:test";
2
2
 
3
- import { RiskLevel } from '../permissions/types.js';
4
- import type { ToolDefinition } from '../providers/types.js';
3
+ import { RiskLevel } from "../permissions/types.js";
4
+ import type { ToolDefinition } from "../providers/types.js";
5
5
  // We cannot import the private LazyTool class directly, so we test through
6
6
  // registerLazyTool + getTool which exercise the same code path.
7
7
  import {
@@ -17,29 +17,38 @@ import {
17
17
  registerSkillTools,
18
18
  registerTool,
19
19
  unregisterSkillTools,
20
- } from '../tools/registry.js';
21
- import { eagerModuleToolNames, explicitTools, lazyTools } from '../tools/tool-manifest.js';
22
- import type { Tool, ToolContext, ToolExecutionResult } from '../tools/types.js';
20
+ } from "../tools/registry.js";
21
+ import {
22
+ eagerModuleToolNames,
23
+ explicitTools,
24
+ lazyTools,
25
+ } from "../tools/tool-manifest.js";
26
+ import type { Tool, ToolContext, ToolExecutionResult } from "../tools/types.js";
23
27
 
24
28
  // Clean up global registry after this file completes to prevent
25
29
  // contamination of subsequent test files in combined runs.
26
- afterAll(() => { __resetRegistryForTesting(); });
30
+ afterAll(() => {
31
+ __resetRegistryForTesting();
32
+ });
27
33
 
28
34
  function makeFakeTool(name: string): Tool {
29
35
  return {
30
36
  name,
31
37
  description: `Fake ${name}`,
32
- category: 'test',
38
+ category: "test",
33
39
  defaultRiskLevel: RiskLevel.Low,
34
40
  getDefinition(): ToolDefinition {
35
41
  return {
36
42
  name,
37
43
  description: `Fake ${name}`,
38
- input_schema: { type: 'object', properties: {}, required: [] },
44
+ input_schema: { type: "object", properties: {}, required: [] },
39
45
  };
40
46
  },
41
- async execute(_input: Record<string, unknown>, _context: ToolContext): Promise<ToolExecutionResult> {
42
- return { content: 'ok', isError: false };
47
+ async execute(
48
+ _input: Record<string, unknown>,
49
+ _context: ToolContext,
50
+ ): Promise<ToolExecutionResult> {
51
+ return { content: "ok", isError: false };
43
52
  },
44
53
  };
45
54
  }
@@ -47,56 +56,63 @@ function makeFakeTool(name: string): Tool {
47
56
  function makeSkillTool(name: string, ownerSkillId: string): Tool {
48
57
  return {
49
58
  ...makeFakeTool(name),
50
- origin: 'skill' as const,
59
+ origin: "skill" as const,
51
60
  ownerSkillId,
52
61
  };
53
62
  }
54
63
 
55
- describe('LazyTool', () => {
56
- test('clears cached promise on load failure so subsequent call can retry', async () => {
64
+ describe("LazyTool", () => {
65
+ test("clears cached promise on load failure so subsequent call can retry", async () => {
57
66
  let callCount = 0;
58
67
 
59
68
  registerLazyTool({
60
- name: 'test-retry-tool',
61
- description: 'A tool that fails on first load then succeeds',
62
- category: 'test',
69
+ name: "test-retry-tool",
70
+ description: "A tool that fails on first load then succeeds",
71
+ category: "test",
63
72
  defaultRiskLevel: RiskLevel.Low,
64
73
  definition: {
65
- name: 'test-retry-tool',
66
- description: 'A tool that fails on first load then succeeds',
67
- input_schema: { type: 'object', properties: {}, required: [] },
74
+ name: "test-retry-tool",
75
+ description: "A tool that fails on first load then succeeds",
76
+ input_schema: { type: "object", properties: {}, required: [] },
68
77
  },
69
78
  loader: async () => {
70
79
  callCount++;
71
80
  if (callCount === 1) {
72
- throw new Error('transient load failure');
81
+ throw new Error("transient load failure");
73
82
  }
74
- return makeFakeTool('test-retry-tool');
83
+ return makeFakeTool("test-retry-tool");
75
84
  },
76
85
  });
77
86
 
78
- const tool = getTool('test-retry-tool')!;
87
+ const tool = getTool("test-retry-tool")!;
79
88
  expect(tool).toBeDefined();
80
89
 
81
90
  const dummyContext = {} as ToolContext;
82
91
 
83
92
  // First call should throw the transient error
84
- await expect(tool.execute({}, dummyContext)).rejects.toThrow('transient load failure');
93
+ await expect(tool.execute({}, dummyContext)).rejects.toThrow(
94
+ "transient load failure",
95
+ );
85
96
  expect(callCount).toBe(1);
86
97
 
87
98
  // Second call should retry the loader and succeed
88
99
  const result = await tool.execute({}, dummyContext);
89
- expect(result.content).toBe('ok');
100
+ expect(result.content).toBe("ok");
90
101
  expect(result.isError).toBe(false);
91
102
  expect(callCount).toBe(2);
92
103
  });
93
104
  });
94
105
 
95
- describe('tool registry host tools', () => {
96
- test('registers host tools and exposes them in tool definitions', async () => {
106
+ describe("tool registry host tools", () => {
107
+ test("registers host tools and exposes them in tool definitions", async () => {
97
108
  await initializeTools();
98
109
 
99
- const hostToolNames = ['host_file_read', 'host_file_write', 'host_file_edit', 'host_bash'] as const;
110
+ const hostToolNames = [
111
+ "host_file_read",
112
+ "host_file_write",
113
+ "host_file_edit",
114
+ "host_bash",
115
+ ] as const;
100
116
 
101
117
  for (const toolName of hostToolNames) {
102
118
  const tool = getTool(toolName);
@@ -111,14 +127,14 @@ describe('tool registry host tools', () => {
111
127
  });
112
128
  });
113
129
 
114
- describe('tool registry dynamic-tools tools', () => {
115
- test('registers scaffold, delete, and skill_load tools', async () => {
130
+ describe("tool registry dynamic-tools tools", () => {
131
+ test("registers scaffold, delete, and skill_load tools", async () => {
116
132
  await initializeTools();
117
133
 
118
134
  const dynamicToolNames = [
119
- 'scaffold_managed_skill',
120
- 'delete_managed_skill',
121
- 'skill_load',
135
+ "scaffold_managed_skill",
136
+ "delete_managed_skill",
137
+ "skill_load",
122
138
  ] as const;
123
139
 
124
140
  for (const toolName of dynamicToolNames) {
@@ -132,25 +148,25 @@ describe('tool registry dynamic-tools tools', () => {
132
148
  }
133
149
  });
134
150
 
135
- test('scaffold and delete are registered as High risk', async () => {
151
+ test("scaffold and delete are registered as High risk", async () => {
136
152
  await initializeTools();
137
- for (const name of ['scaffold_managed_skill', 'delete_managed_skill']) {
153
+ for (const name of ["scaffold_managed_skill", "delete_managed_skill"]) {
138
154
  const tool = getTool(name);
139
155
  expect(tool).toBeDefined();
140
156
  expect(tool?.defaultRiskLevel).toBe(RiskLevel.High);
141
157
  }
142
158
  });
143
159
 
144
- test('skill_load is registered as Low risk', async () => {
160
+ test("skill_load is registered as Low risk", async () => {
145
161
  await initializeTools();
146
- const tool = getTool('skill_load');
162
+ const tool = getTool("skill_load");
147
163
  expect(tool).toBeDefined();
148
164
  expect(tool?.defaultRiskLevel).toBe(RiskLevel.Low);
149
165
  });
150
166
  });
151
167
 
152
- describe('tool manifest', () => {
153
- test('all manifest lazy tools are registered after init', async () => {
168
+ describe("tool manifest", () => {
169
+ test("all manifest lazy tools are registered after init", async () => {
154
170
  await initializeTools();
155
171
  const registered = new Set(getAllTools().map((t) => t.name));
156
172
 
@@ -159,99 +175,122 @@ describe('tool manifest', () => {
159
175
  }
160
176
  });
161
177
 
162
- test('manifest declares expected core lazy tools', () => {
178
+ test("manifest declares expected core lazy tools", () => {
163
179
  // bash and swarm_delegate moved from lazy to eager registration
164
180
  const lazyNames = new Set(lazyTools.map((t) => t.name));
165
- expect(lazyNames.has('bash')).toBe(false);
166
- expect(lazyNames.has('evaluate_typescript_code')).toBe(false);
167
- expect(lazyNames.has('claude_code')).toBe(false);
168
- expect(lazyNames.has('swarm_delegate')).toBe(false);
181
+ expect(lazyNames.has("bash")).toBe(false);
182
+ expect(lazyNames.has("evaluate_typescript_code")).toBe(false);
183
+ expect(lazyNames.has("claude_code")).toBe(false);
184
+ expect(lazyNames.has("swarm_delegate")).toBe(false);
169
185
  // Verify they are in eager tools instead
170
- expect(eagerModuleToolNames).toContain('bash');
171
- expect(eagerModuleToolNames).toContain('swarm_delegate');
186
+ expect(eagerModuleToolNames).toContain("bash");
187
+ expect(eagerModuleToolNames).toContain("swarm_delegate");
172
188
  });
173
189
 
174
- test('eager module tool names list contains expected count', () => {
190
+ test("eager module tool names list contains expected count", () => {
175
191
  expect(eagerModuleToolNames.length).toBe(15);
176
192
  });
177
193
 
178
- test('explicit tools list includes memory, credential, watch, and catalog tools', () => {
194
+ test("explicit tools list includes memory, credential, and watch tools", () => {
179
195
  const names = explicitTools.map((t) => t.name);
180
- expect(names).toContain('memory_search');
181
- expect(names).toContain('memory_save');
182
- expect(names).toContain('memory_update');
183
- expect(names).toContain('credential_store');
184
- expect(names).toContain('account_manage');
185
- expect(names).toContain('start_screen_watch');
186
- expect(names).toContain('vellum_skills_catalog');
196
+ expect(names).toContain("memory_search");
197
+ expect(names).toContain("memory_save");
198
+ expect(names).toContain("memory_update");
199
+ expect(names).toContain("credential_store");
200
+ expect(names).toContain("account_manage");
201
+ expect(names).toContain("start_screen_watch");
187
202
  });
188
203
 
189
- test('registered tool count is at least eager + lazy + host', async () => {
204
+ test("registered tool count is at least eager + lazy + host", async () => {
190
205
  await initializeTools();
191
206
  const tools = getAllTools();
192
- expect(tools.length).toBeGreaterThanOrEqual(eagerModuleToolNames.length + lazyTools.length);
207
+ expect(tools.length).toBeGreaterThanOrEqual(
208
+ eagerModuleToolNames.length + lazyTools.length,
209
+ );
193
210
  });
194
211
  });
195
212
 
196
- describe('baseline characterization: hardcoded tool loading', () => {
197
- test('gmail tools are NOT registered in the global registry after initializeTools()', async () => {
213
+ describe("baseline characterization: hardcoded tool loading", () => {
214
+ test("gmail tools are NOT registered in the global registry after initializeTools()", async () => {
198
215
  await initializeTools();
199
216
  const allTools = getAllTools();
200
- const toolNames = allTools.map(t => t.name);
201
-
202
- const gmailTools = ['gmail_search', 'gmail_list_messages', 'gmail_get_message', 'gmail_mark_read',
203
- 'gmail_draft', 'gmail_archive', 'gmail_batch_archive', 'gmail_label', 'gmail_batch_label',
204
- 'gmail_trash', 'gmail_send', 'gmail_unsubscribe'];
217
+ const toolNames = allTools.map((t) => t.name);
218
+
219
+ const gmailTools = [
220
+ "gmail_search",
221
+ "gmail_list_messages",
222
+ "gmail_get_message",
223
+ "gmail_mark_read",
224
+ "gmail_draft",
225
+ "gmail_archive",
226
+ "gmail_batch_archive",
227
+ "gmail_label",
228
+ "gmail_batch_label",
229
+ "gmail_trash",
230
+ "gmail_send",
231
+ "gmail_unsubscribe",
232
+ ];
205
233
  for (const name of gmailTools) {
206
234
  expect(toolNames).not.toContain(name);
207
235
  }
208
236
  });
209
237
 
210
- test('gmail tool names are NOT in eagerModuleToolNames manifest', () => {
211
- const gmailTools = ['gmail_search', 'gmail_list_messages', 'gmail_get_message', 'gmail_mark_read',
212
- 'gmail_draft', 'gmail_archive', 'gmail_batch_archive', 'gmail_label', 'gmail_batch_label',
213
- 'gmail_trash', 'gmail_send', 'gmail_unsubscribe'];
238
+ test("gmail tool names are NOT in eagerModuleToolNames manifest", () => {
239
+ const gmailTools = [
240
+ "gmail_search",
241
+ "gmail_list_messages",
242
+ "gmail_get_message",
243
+ "gmail_mark_read",
244
+ "gmail_draft",
245
+ "gmail_archive",
246
+ "gmail_batch_archive",
247
+ "gmail_label",
248
+ "gmail_batch_label",
249
+ "gmail_trash",
250
+ "gmail_send",
251
+ "gmail_unsubscribe",
252
+ ];
214
253
  for (const name of gmailTools) {
215
254
  expect(eagerModuleToolNames).not.toContain(name);
216
255
  }
217
256
  });
218
257
 
219
- test('weather tool is NOT in global registry after initializeTools()', async () => {
258
+ test("weather tool is NOT in global registry after initializeTools()", async () => {
220
259
  await initializeTools();
221
- const tool = getTool('get_weather');
260
+ const tool = getTool("get_weather");
222
261
  expect(tool).toBeUndefined();
223
262
  });
224
263
 
225
- test('weather tool name is NOT in eagerModuleToolNames manifest', () => {
226
- expect(eagerModuleToolNames).not.toContain('get_weather');
264
+ test("weather tool name is NOT in eagerModuleToolNames manifest", () => {
265
+ expect(eagerModuleToolNames).not.toContain("get_weather");
227
266
  });
228
267
 
229
- test('claude_code is NOT in global registry after initializeTools()', async () => {
268
+ test("claude_code is NOT in global registry after initializeTools()", async () => {
230
269
  await initializeTools();
231
- const tool = getTool('claude_code');
270
+ const tool = getTool("claude_code");
232
271
  expect(tool).toBeUndefined();
233
272
  });
234
273
 
235
- test('claude_code is NOT in lazyTools manifest', () => {
236
- const lazyNames = lazyTools.map(t => t.name);
237
- expect(lazyNames).not.toContain('claude_code');
274
+ test("claude_code is NOT in lazyTools manifest", () => {
275
+ const lazyNames = lazyTools.map((t) => t.name);
276
+ expect(lazyNames).not.toContain("claude_code");
238
277
  });
239
278
  });
240
279
 
241
- describe('baseline characterization: core app tool surface', () => {
242
- test('non-proxy app tools are NOT in core registry (now skill-provided)', async () => {
280
+ describe("baseline characterization: core app tool surface", () => {
281
+ test("non-proxy app tools are NOT in core registry (now skill-provided)", async () => {
243
282
  await initializeTools();
244
283
 
245
284
  const nonProxyAppTools = [
246
- 'app_create',
247
- 'app_list',
248
- 'app_query',
249
- 'app_update',
250
- 'app_delete',
251
- 'app_file_list',
252
- 'app_file_read',
253
- 'app_file_edit',
254
- 'app_file_write',
285
+ "app_create",
286
+ "app_list",
287
+ "app_query",
288
+ "app_update",
289
+ "app_delete",
290
+ "app_file_list",
291
+ "app_file_read",
292
+ "app_file_edit",
293
+ "app_file_write",
255
294
  ];
256
295
 
257
296
  for (const name of nonProxyAppTools) {
@@ -265,225 +304,232 @@ describe('baseline characterization: core app tool surface', () => {
265
304
  }
266
305
  });
267
306
 
268
- test('core registry includes app_open proxy tool', async () => {
307
+ test("core registry includes app_open proxy tool", async () => {
269
308
  await initializeTools();
270
309
 
271
- const tool = getTool('app_open');
310
+ const tool = getTool("app_open");
272
311
  expect(tool).toBeDefined();
273
- expect(tool?.executionMode).toBe('proxy');
312
+ expect(tool?.executionMode).toBe("proxy");
274
313
 
275
314
  // Proxy tools are excluded from getAllToolDefinitions() by design
276
315
  const definitionNames = getAllToolDefinitions().map((def) => def.name);
277
- expect(definitionNames).not.toContain('app_open');
316
+ expect(definitionNames).not.toContain("app_open");
278
317
  });
279
318
 
280
- test('bundled app-builder skill has TOOLS.json manifest', async () => {
281
- const path = await import('node:path');
282
- const fs = await import('node:fs');
319
+ test("bundled app-builder skill has TOOLS.json manifest", async () => {
320
+ const path = await import("node:path");
321
+ const fs = await import("node:fs");
283
322
 
284
323
  // Resolve the bundled skill directory relative to the source config
285
324
  const skillDir = path.resolve(
286
325
  import.meta.dirname,
287
- '../config/bundled-skills/app-builder',
326
+ "../config/bundled-skills/app-builder",
288
327
  );
289
- const toolsJsonPath = path.join(skillDir, 'TOOLS.json');
328
+ const toolsJsonPath = path.join(skillDir, "TOOLS.json");
290
329
 
291
330
  expect(fs.existsSync(toolsJsonPath)).toBe(true);
292
331
  });
293
332
  });
294
333
 
295
- describe('tool origin metadata', () => {
296
- beforeEach(() => { __resetRegistryForTesting(); });
334
+ describe("tool origin metadata", () => {
335
+ beforeEach(() => {
336
+ __resetRegistryForTesting();
337
+ });
297
338
 
298
- test('registers a skill-origin tool and preserves metadata via getTool()', () => {
339
+ test("registers a skill-origin tool and preserves metadata via getTool()", () => {
299
340
  const skillTool: Tool = {
300
- ...makeFakeTool('test-skill-origin-tool'),
301
- origin: 'skill',
302
- ownerSkillId: 'test-skill',
341
+ ...makeFakeTool("test-skill-origin-tool"),
342
+ origin: "skill",
343
+ ownerSkillId: "test-skill",
303
344
  };
304
345
 
305
346
  registerTool(skillTool);
306
347
 
307
- const retrieved = getTool('test-skill-origin-tool');
348
+ const retrieved = getTool("test-skill-origin-tool");
308
349
  expect(retrieved).toBeDefined();
309
- expect(retrieved?.origin).toBe('skill');
310
- expect(retrieved?.ownerSkillId).toBe('test-skill');
350
+ expect(retrieved?.origin).toBe("skill");
351
+ expect(retrieved?.ownerSkillId).toBe("test-skill");
311
352
  });
312
353
 
313
- test('core tools default to no origin metadata (undefined)', async () => {
354
+ test("core tools default to no origin metadata (undefined)", async () => {
314
355
  await initializeTools();
315
356
 
316
- const coreTool = getTool('host_file_read');
357
+ const coreTool = getTool("host_file_read");
317
358
  expect(coreTool).toBeDefined();
318
359
  expect(coreTool?.origin).toBeUndefined();
319
360
  expect(coreTool?.ownerSkillId).toBeUndefined();
320
361
  });
321
362
  });
322
363
 
323
- describe('dynamic skill tool registry', () => {
324
- beforeEach(() => { __resetRegistryForTesting(); });
364
+ describe("dynamic skill tool registry", () => {
365
+ beforeEach(() => {
366
+ __resetRegistryForTesting();
367
+ });
325
368
 
326
- test('registers skill tools and retrieves them', () => {
369
+ test("registers skill tools and retrieves them", () => {
327
370
  const tools = [
328
- makeSkillTool('sk_tool_a', 'my-skill'),
329
- makeSkillTool('sk_tool_b', 'my-skill'),
371
+ makeSkillTool("sk_tool_a", "my-skill"),
372
+ makeSkillTool("sk_tool_b", "my-skill"),
330
373
  ];
331
374
  registerSkillTools(tools);
332
375
 
333
- expect(getTool('sk_tool_a')).toBeDefined();
334
- expect(getTool('sk_tool_a')?.origin).toBe('skill');
335
- expect(getTool('sk_tool_a')?.ownerSkillId).toBe('my-skill');
376
+ expect(getTool("sk_tool_a")).toBeDefined();
377
+ expect(getTool("sk_tool_a")?.origin).toBe("skill");
378
+ expect(getTool("sk_tool_a")?.ownerSkillId).toBe("my-skill");
336
379
 
337
- expect(getTool('sk_tool_b')).toBeDefined();
338
- expect(getTool('sk_tool_b')?.origin).toBe('skill');
380
+ expect(getTool("sk_tool_b")).toBeDefined();
381
+ expect(getTool("sk_tool_b")?.origin).toBe("skill");
339
382
  });
340
383
 
341
- test('skips skill tool that collides with a core tool without throwing', async () => {
384
+ test("skips skill tool that collides with a core tool without throwing", async () => {
342
385
  await initializeTools();
343
386
 
344
387
  // host_file_read is a core tool registered during init
345
- const colliding = makeSkillTool('host_file_read', 'rogue-skill');
388
+ const colliding = makeSkillTool("host_file_read", "rogue-skill");
346
389
  const accepted = registerSkillTools([colliding]);
347
390
 
348
391
  // The colliding tool should be silently skipped
349
392
  expect(accepted).toHaveLength(0);
350
393
  // The core tool should still be in place (not overwritten)
351
- const retrieved = getTool('host_file_read');
394
+ const retrieved = getTool("host_file_read");
352
395
  expect(retrieved?.origin).toBeUndefined(); // core tools have no origin
353
396
  });
354
397
 
355
- test('allows replacement within the same owning skill', () => {
356
- const original = makeSkillTool('sk_replaceable', 'owner-skill');
398
+ test("allows replacement within the same owning skill", () => {
399
+ const original = makeSkillTool("sk_replaceable", "owner-skill");
357
400
  registerSkillTools([original]);
358
401
 
359
402
  const replacement: Tool = {
360
- ...makeSkillTool('sk_replaceable', 'owner-skill'),
361
- description: 'Updated description',
403
+ ...makeSkillTool("sk_replaceable", "owner-skill"),
404
+ description: "Updated description",
362
405
  };
363
406
  // Should not throw
364
407
  registerSkillTools([replacement]);
365
408
 
366
- const retrieved = getTool('sk_replaceable');
367
- expect(retrieved?.description).toBe('Updated description');
409
+ const retrieved = getTool("sk_replaceable");
410
+ expect(retrieved?.description).toBe("Updated description");
368
411
  });
369
412
 
370
- test('rejects replacement from a different owning skill', () => {
371
- const original = makeSkillTool('sk_owned', 'skill-alpha');
413
+ test("rejects replacement from a different owning skill", () => {
414
+ const original = makeSkillTool("sk_owned", "skill-alpha");
372
415
  registerSkillTools([original]);
373
416
 
374
- const intruder = makeSkillTool('sk_owned', 'skill-beta');
417
+ const intruder = makeSkillTool("sk_owned", "skill-beta");
375
418
  expect(() => registerSkillTools([intruder])).toThrow(
376
419
  'already registered by skill "skill-alpha"',
377
420
  );
378
421
  });
379
422
 
380
- test('unregisterSkillTools removes all tools for a skill', () => {
423
+ test("unregisterSkillTools removes all tools for a skill", () => {
381
424
  const tools = [
382
- makeSkillTool('sk_rm_1', 'removable-skill'),
383
- makeSkillTool('sk_rm_2', 'removable-skill'),
425
+ makeSkillTool("sk_rm_1", "removable-skill"),
426
+ makeSkillTool("sk_rm_2", "removable-skill"),
384
427
  ];
385
428
  registerSkillTools(tools);
386
- expect(getTool('sk_rm_1')).toBeDefined();
387
- expect(getTool('sk_rm_2')).toBeDefined();
429
+ expect(getTool("sk_rm_1")).toBeDefined();
430
+ expect(getTool("sk_rm_2")).toBeDefined();
388
431
 
389
- unregisterSkillTools('removable-skill');
432
+ unregisterSkillTools("removable-skill");
390
433
 
391
- expect(getTool('sk_rm_1')).toBeUndefined();
392
- expect(getTool('sk_rm_2')).toBeUndefined();
434
+ expect(getTool("sk_rm_1")).toBeUndefined();
435
+ expect(getTool("sk_rm_2")).toBeUndefined();
393
436
  });
394
437
 
395
- test('unregisterSkillTools does not affect tools from other skills', () => {
396
- registerSkillTools([makeSkillTool('sk_keep', 'keep-skill')]);
397
- registerSkillTools([makeSkillTool('sk_remove', 'nuke-skill')]);
438
+ test("unregisterSkillTools does not affect tools from other skills", () => {
439
+ registerSkillTools([makeSkillTool("sk_keep", "keep-skill")]);
440
+ registerSkillTools([makeSkillTool("sk_remove", "nuke-skill")]);
398
441
 
399
- unregisterSkillTools('nuke-skill');
442
+ unregisterSkillTools("nuke-skill");
400
443
 
401
- expect(getTool('sk_keep')).toBeDefined();
402
- expect(getTool('sk_remove')).toBeUndefined();
444
+ expect(getTool("sk_keep")).toBeDefined();
445
+ expect(getTool("sk_remove")).toBeUndefined();
403
446
  });
404
447
 
405
- test('getSkillToolNames returns only skill tool names', async () => {
448
+ test("getSkillToolNames returns only skill tool names", async () => {
406
449
  await initializeTools();
407
450
 
408
451
  registerSkillTools([
409
- makeSkillTool('sk_names_a', 'names-skill'),
410
- makeSkillTool('sk_names_b', 'names-skill'),
452
+ makeSkillTool("sk_names_a", "names-skill"),
453
+ makeSkillTool("sk_names_b", "names-skill"),
411
454
  ]);
412
455
 
413
456
  const skillNames = getSkillToolNames();
414
- expect(skillNames).toContain('sk_names_a');
415
- expect(skillNames).toContain('sk_names_b');
457
+ expect(skillNames).toContain("sk_names_a");
458
+ expect(skillNames).toContain("sk_names_b");
416
459
  // Core tools should not appear
417
- expect(skillNames).not.toContain('host_file_read');
418
- expect(skillNames).not.toContain('bash');
460
+ expect(skillNames).not.toContain("host_file_read");
461
+ expect(skillNames).not.toContain("bash");
419
462
  });
420
463
 
421
- test('registerSkillTools skips core-colliding tools but registers the rest', async () => {
464
+ test("registerSkillTools skips core-colliding tools but registers the rest", async () => {
422
465
  await initializeTools();
423
466
 
424
467
  const tools = [
425
- makeSkillTool('sk_atomic_ok', 'atomic-skill'),
426
- makeSkillTool('host_file_read', 'atomic-skill'), // collides with core
468
+ makeSkillTool("sk_atomic_ok", "atomic-skill"),
469
+ makeSkillTool("host_file_read", "atomic-skill"), // collides with core
427
470
  ];
428
471
 
429
472
  const accepted = registerSkillTools(tools);
430
473
  // Only the non-colliding tool should be accepted
431
474
  expect(accepted).toHaveLength(1);
432
- expect(accepted[0].name).toBe('sk_atomic_ok');
475
+ expect(accepted[0].name).toBe("sk_atomic_ok");
433
476
  // The non-colliding tool should be registered
434
- expect(getTool('sk_atomic_ok')).toBeDefined();
477
+ expect(getTool("sk_atomic_ok")).toBeDefined();
435
478
  // The core tool should be untouched
436
- expect(getTool('host_file_read')?.origin).toBeUndefined();
479
+ expect(getTool("host_file_read")?.origin).toBeUndefined();
437
480
  });
438
481
  });
439
482
 
440
- describe('skill tool reference counting', () => {
441
- beforeEach(() => { __resetRegistryForTesting(); });
483
+ describe("skill tool reference counting", () => {
484
+ beforeEach(() => {
485
+ __resetRegistryForTesting();
486
+ });
442
487
 
443
- test('ref count increments on each registerSkillTools call', () => {
444
- registerSkillTools([makeSkillTool('rc_a', 'rc-skill')]);
445
- expect(getSkillRefCount('rc-skill')).toBe(1);
488
+ test("ref count increments on each registerSkillTools call", () => {
489
+ registerSkillTools([makeSkillTool("rc_a", "rc-skill")]);
490
+ expect(getSkillRefCount("rc-skill")).toBe(1);
446
491
 
447
492
  // Second session registers the same skill (same ownerSkillId allows replacement)
448
- registerSkillTools([makeSkillTool('rc_a', 'rc-skill')]);
449
- expect(getSkillRefCount('rc-skill')).toBe(2);
493
+ registerSkillTools([makeSkillTool("rc_a", "rc-skill")]);
494
+ expect(getSkillRefCount("rc-skill")).toBe(2);
450
495
  });
451
496
 
452
- test('unregister decrements ref count but keeps tools when count > 0', () => {
453
- registerSkillTools([makeSkillTool('rc_keep', 'rc-multi')]);
454
- registerSkillTools([makeSkillTool('rc_keep', 'rc-multi')]);
455
- expect(getSkillRefCount('rc-multi')).toBe(2);
497
+ test("unregister decrements ref count but keeps tools when count > 0", () => {
498
+ registerSkillTools([makeSkillTool("rc_keep", "rc-multi")]);
499
+ registerSkillTools([makeSkillTool("rc_keep", "rc-multi")]);
500
+ expect(getSkillRefCount("rc-multi")).toBe(2);
456
501
 
457
- unregisterSkillTools('rc-multi');
458
- expect(getSkillRefCount('rc-multi')).toBe(1);
502
+ unregisterSkillTools("rc-multi");
503
+ expect(getSkillRefCount("rc-multi")).toBe(1);
459
504
  // Tools still present
460
- expect(getTool('rc_keep')).toBeDefined();
505
+ expect(getTool("rc_keep")).toBeDefined();
461
506
  });
462
507
 
463
- test('tools are removed only when last reference is unregistered', () => {
464
- registerSkillTools([makeSkillTool('rc_last', 'rc-final')]);
465
- registerSkillTools([makeSkillTool('rc_last', 'rc-final')]);
508
+ test("tools are removed only when last reference is unregistered", () => {
509
+ registerSkillTools([makeSkillTool("rc_last", "rc-final")]);
510
+ registerSkillTools([makeSkillTool("rc_last", "rc-final")]);
466
511
 
467
- unregisterSkillTools('rc-final');
468
- expect(getTool('rc_last')).toBeDefined();
512
+ unregisterSkillTools("rc-final");
513
+ expect(getTool("rc_last")).toBeDefined();
469
514
 
470
- unregisterSkillTools('rc-final');
471
- expect(getTool('rc_last')).toBeUndefined();
472
- expect(getSkillRefCount('rc-final')).toBe(0);
515
+ unregisterSkillTools("rc-final");
516
+ expect(getTool("rc_last")).toBeUndefined();
517
+ expect(getSkillRefCount("rc-final")).toBe(0);
473
518
  });
474
519
 
475
- test('unregister with no prior registration is a no-op', () => {
476
- unregisterSkillTools('nonexistent-skill');
477
- expect(getSkillRefCount('nonexistent-skill')).toBe(0);
520
+ test("unregister with no prior registration is a no-op", () => {
521
+ unregisterSkillTools("nonexistent-skill");
522
+ expect(getSkillRefCount("nonexistent-skill")).toBe(0);
478
523
  });
479
524
  });
480
525
 
481
- describe('computer-use registration split', () => {
526
+ describe("computer-use registration split", () => {
482
527
  // Start each test from a completely empty registry so assertions are
483
528
  // non-vacuous — the split functions must actually register tools.
484
529
 
485
- test('registerComputerUseActionTools registers all 12 CU action tools and nothing else', async () => {
486
- const { registerComputerUseActionTools } = await import('../tools/computer-use/registry.js');
530
+ test("registerComputerUseActionTools registers all 12 CU action tools and nothing else", async () => {
531
+ const { registerComputerUseActionTools } =
532
+ await import("../tools/computer-use/registry.js");
487
533
 
488
534
  __clearRegistryForTesting();
489
535
  expect(getAllTools()).toHaveLength(0);
@@ -492,7 +538,9 @@ describe('computer-use registration split', () => {
492
538
 
493
539
  const registered = getAllTools();
494
540
  expect(registered).toHaveLength(12);
495
- expect(registered.every((t) => t.name.startsWith('computer_use_'))).toBe(true);
496
- expect(getTool('computer_use_request_control')).toBeUndefined();
541
+ expect(registered.every((t) => t.name.startsWith("computer_use_"))).toBe(
542
+ true,
543
+ );
544
+ expect(getTool("computer_use_request_control")).toBeUndefined();
497
545
  });
498
546
  });