@vellumai/assistant 0.4.11 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +401 -385
- package/package.json +1 -1
- package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +75 -61
- package/src/__tests__/registry.test.ts +235 -187
- package/src/__tests__/secure-keys.test.ts +27 -0
- package/src/__tests__/session-agent-loop.test.ts +521 -256
- package/src/__tests__/session-surfaces-task-progress.test.ts +1 -0
- package/src/__tests__/session-tool-setup-app-refresh.test.ts +1 -0
- package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -0
- package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -0
- package/src/__tests__/skills.test.ts +334 -276
- package/src/__tests__/slack-skill.test.ts +124 -0
- package/src/__tests__/starter-task-flow.test.ts +7 -17
- package/src/agent/loop.ts +10 -3
- package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +449 -0
- package/src/config/bundled-skills/doordash/SKILL.md +171 -0
- package/src/config/bundled-skills/doordash/__tests__/doordash-client.test.ts +203 -0
- package/src/config/bundled-skills/doordash/__tests__/doordash-session.test.ts +164 -0
- package/src/config/bundled-skills/doordash/doordash-cli.ts +1193 -0
- package/src/config/bundled-skills/doordash/doordash-entry.ts +22 -0
- package/src/config/bundled-skills/doordash/lib/cart-queries.ts +787 -0
- package/src/config/bundled-skills/doordash/lib/client.ts +1071 -0
- package/src/config/bundled-skills/doordash/lib/order-queries.ts +85 -0
- package/src/config/bundled-skills/doordash/lib/queries.ts +28 -0
- package/src/config/bundled-skills/doordash/lib/query-extractor.ts +94 -0
- package/src/config/bundled-skills/doordash/lib/search-queries.ts +203 -0
- package/src/config/bundled-skills/doordash/lib/session.ts +93 -0
- package/src/config/bundled-skills/doordash/lib/shared/errors.ts +61 -0
- package/src/config/bundled-skills/doordash/lib/shared/ipc.ts +32 -0
- package/src/config/bundled-skills/doordash/lib/shared/network-recorder.ts +380 -0
- package/src/config/bundled-skills/doordash/lib/shared/platform.ts +35 -0
- package/src/config/bundled-skills/doordash/lib/shared/recording-store.ts +43 -0
- package/src/config/bundled-skills/doordash/lib/shared/recording-types.ts +49 -0
- package/src/config/bundled-skills/doordash/lib/shared/truncate.ts +6 -0
- package/src/config/bundled-skills/doordash/lib/store-queries.ts +246 -0
- package/src/config/bundled-skills/doordash/lib/types.ts +367 -0
- package/src/config/bundled-skills/google-calendar/SKILL.md +4 -5
- package/src/config/bundled-skills/google-oauth-setup/SKILL.md +41 -41
- package/src/config/bundled-skills/messaging/SKILL.md +59 -42
- package/src/config/bundled-skills/messaging/TOOLS.json +14 -92
- package/src/config/bundled-skills/messaging/tools/gmail-archive-by-query.ts +5 -1
- package/src/config/bundled-skills/messaging/tools/gmail-batch-archive.ts +11 -2
- package/src/config/bundled-skills/messaging/tools/gmail-outreach-scan.ts +8 -1
- package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +12 -4
- package/src/config/bundled-skills/messaging/tools/gmail-unsubscribe.ts +5 -1
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +5 -1
- package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +5 -2
- package/src/config/bundled-skills/notion/SKILL.md +240 -0
- package/src/config/bundled-skills/notion-oauth-setup/SKILL.md +127 -0
- package/src/config/bundled-skills/oauth-setup/SKILL.md +144 -0
- package/src/config/bundled-skills/phone-calls/SKILL.md +76 -45
- package/src/config/bundled-skills/skills-catalog/SKILL.md +32 -29
- package/src/config/bundled-skills/slack/SKILL.md +49 -0
- package/src/config/bundled-skills/slack/TOOLS.json +167 -0
- package/src/config/bundled-skills/slack/tools/shared.ts +23 -0
- package/src/config/bundled-skills/{messaging → slack}/tools/slack-add-reaction.ts +2 -5
- package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +33 -0
- package/src/config/bundled-skills/slack/tools/slack-configure-channels.ts +75 -0
- package/src/config/bundled-skills/{messaging → slack}/tools/slack-delete-message.ts +2 -5
- package/src/config/bundled-skills/{messaging → slack}/tools/slack-leave-channel.ts +2 -5
- package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +193 -0
- package/src/config/{vellum-skills → bundled-skills}/sms-setup/SKILL.md +29 -22
- package/src/config/{vellum-skills → bundled-skills}/telegram-setup/SKILL.md +17 -14
- package/src/config/{vellum-skills → bundled-skills}/twilio-setup/SKILL.md +20 -5
- package/src/config/bundled-tool-registry.ts +292 -267
- package/src/config/schema.ts +1 -1
- package/src/daemon/handlers/skills.ts +334 -234
- package/src/daemon/ipc-contract/messages.ts +2 -0
- package/src/daemon/ipc-contract/surfaces.ts +2 -0
- package/src/daemon/lifecycle.ts +358 -221
- package/src/daemon/response-tier.ts +2 -0
- package/src/daemon/server.ts +453 -193
- package/src/daemon/session-agent-loop-handlers.ts +43 -2
- package/src/daemon/session-agent-loop.ts +3 -0
- package/src/daemon/session-lifecycle.ts +3 -0
- package/src/daemon/session-process.ts +1 -0
- package/src/daemon/session-surfaces.ts +22 -20
- package/src/daemon/session-tool-setup.ts +1 -0
- package/src/daemon/session.ts +5 -2
- package/src/messaging/outreach-classifier.ts +12 -5
- package/src/messaging/provider-types.ts +5 -0
- package/src/messaging/provider.ts +1 -1
- package/src/messaging/providers/gmail/adapter.ts +11 -5
- package/src/messaging/providers/gmail/client.ts +2 -0
- package/src/messaging/providers/slack/adapter.ts +1 -0
- package/src/messaging/providers/slack/client.ts +8 -0
- package/src/messaging/providers/slack/types.ts +5 -0
- package/src/runtime/http-errors.ts +33 -20
- package/src/runtime/http-server.ts +706 -291
- package/src/runtime/http-types.ts +26 -16
- package/src/runtime/routes/secret-routes.ts +57 -2
- package/src/runtime/routes/surface-action-routes.ts +66 -0
- package/src/runtime/routes/trust-rules-routes.ts +140 -0
- package/src/security/keychain-to-encrypted-migration.ts +59 -0
- package/src/security/secure-keys.ts +17 -0
- package/src/skills/frontmatter.ts +9 -7
- package/src/tools/apps/executors.ts +2 -1
- package/src/tools/tool-manifest.ts +44 -42
- package/src/tools/types.ts +9 -0
- package/src/__tests__/skill-mirror-parity.test.ts +0 -176
- package/src/config/vellum-skills/catalog.json +0 -63
- package/src/config/vellum-skills/chatgpt-import/tools/chatgpt-import.ts +0 -295
- package/src/skills/vellum-catalog-remote.ts +0 -166
- package/src/tools/skills/vellum-catalog.ts +0 -168
- /package/src/config/{vellum-skills → bundled-skills}/chatgpt-import/SKILL.md +0 -0
- /package/src/config/{vellum-skills → bundled-skills}/chatgpt-import/TOOLS.json +0 -0
- /package/src/config/{vellum-skills → bundled-skills}/deploy-fullstack-vercel/SKILL.md +0 -0
- /package/src/config/{vellum-skills → bundled-skills}/document-writer/SKILL.md +0 -0
- /package/src/config/{vellum-skills → bundled-skills}/guardian-verify-setup/SKILL.md +0 -0
- /package/src/config/{vellum-skills → bundled-skills}/slack-oauth-setup/SKILL.md +0 -0
- /package/src/config/{vellum-skills → bundled-skills}/trusted-contacts/SKILL.md +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { afterAll,beforeEach, describe, expect, test } from
|
|
1
|
+
import { afterAll, beforeEach, describe, expect, test } from "bun:test";
|
|
2
2
|
|
|
3
|
-
import { RiskLevel } from
|
|
4
|
-
import type { ToolDefinition } from
|
|
3
|
+
import { RiskLevel } from "../permissions/types.js";
|
|
4
|
+
import type { ToolDefinition } from "../providers/types.js";
|
|
5
5
|
// We cannot import the private LazyTool class directly, so we test through
|
|
6
6
|
// registerLazyTool + getTool which exercise the same code path.
|
|
7
7
|
import {
|
|
@@ -17,29 +17,38 @@ import {
|
|
|
17
17
|
registerSkillTools,
|
|
18
18
|
registerTool,
|
|
19
19
|
unregisterSkillTools,
|
|
20
|
-
} from
|
|
21
|
-
import {
|
|
22
|
-
|
|
20
|
+
} from "../tools/registry.js";
|
|
21
|
+
import {
|
|
22
|
+
eagerModuleToolNames,
|
|
23
|
+
explicitTools,
|
|
24
|
+
lazyTools,
|
|
25
|
+
} from "../tools/tool-manifest.js";
|
|
26
|
+
import type { Tool, ToolContext, ToolExecutionResult } from "../tools/types.js";
|
|
23
27
|
|
|
24
28
|
// Clean up global registry after this file completes to prevent
|
|
25
29
|
// contamination of subsequent test files in combined runs.
|
|
26
|
-
afterAll(() => {
|
|
30
|
+
afterAll(() => {
|
|
31
|
+
__resetRegistryForTesting();
|
|
32
|
+
});
|
|
27
33
|
|
|
28
34
|
function makeFakeTool(name: string): Tool {
|
|
29
35
|
return {
|
|
30
36
|
name,
|
|
31
37
|
description: `Fake ${name}`,
|
|
32
|
-
category:
|
|
38
|
+
category: "test",
|
|
33
39
|
defaultRiskLevel: RiskLevel.Low,
|
|
34
40
|
getDefinition(): ToolDefinition {
|
|
35
41
|
return {
|
|
36
42
|
name,
|
|
37
43
|
description: `Fake ${name}`,
|
|
38
|
-
input_schema: { type:
|
|
44
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
39
45
|
};
|
|
40
46
|
},
|
|
41
|
-
async execute(
|
|
42
|
-
|
|
47
|
+
async execute(
|
|
48
|
+
_input: Record<string, unknown>,
|
|
49
|
+
_context: ToolContext,
|
|
50
|
+
): Promise<ToolExecutionResult> {
|
|
51
|
+
return { content: "ok", isError: false };
|
|
43
52
|
},
|
|
44
53
|
};
|
|
45
54
|
}
|
|
@@ -47,56 +56,63 @@ function makeFakeTool(name: string): Tool {
|
|
|
47
56
|
function makeSkillTool(name: string, ownerSkillId: string): Tool {
|
|
48
57
|
return {
|
|
49
58
|
...makeFakeTool(name),
|
|
50
|
-
origin:
|
|
59
|
+
origin: "skill" as const,
|
|
51
60
|
ownerSkillId,
|
|
52
61
|
};
|
|
53
62
|
}
|
|
54
63
|
|
|
55
|
-
describe(
|
|
56
|
-
test(
|
|
64
|
+
describe("LazyTool", () => {
|
|
65
|
+
test("clears cached promise on load failure so subsequent call can retry", async () => {
|
|
57
66
|
let callCount = 0;
|
|
58
67
|
|
|
59
68
|
registerLazyTool({
|
|
60
|
-
name:
|
|
61
|
-
description:
|
|
62
|
-
category:
|
|
69
|
+
name: "test-retry-tool",
|
|
70
|
+
description: "A tool that fails on first load then succeeds",
|
|
71
|
+
category: "test",
|
|
63
72
|
defaultRiskLevel: RiskLevel.Low,
|
|
64
73
|
definition: {
|
|
65
|
-
name:
|
|
66
|
-
description:
|
|
67
|
-
input_schema: { type:
|
|
74
|
+
name: "test-retry-tool",
|
|
75
|
+
description: "A tool that fails on first load then succeeds",
|
|
76
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
68
77
|
},
|
|
69
78
|
loader: async () => {
|
|
70
79
|
callCount++;
|
|
71
80
|
if (callCount === 1) {
|
|
72
|
-
throw new Error(
|
|
81
|
+
throw new Error("transient load failure");
|
|
73
82
|
}
|
|
74
|
-
return makeFakeTool(
|
|
83
|
+
return makeFakeTool("test-retry-tool");
|
|
75
84
|
},
|
|
76
85
|
});
|
|
77
86
|
|
|
78
|
-
const tool = getTool(
|
|
87
|
+
const tool = getTool("test-retry-tool")!;
|
|
79
88
|
expect(tool).toBeDefined();
|
|
80
89
|
|
|
81
90
|
const dummyContext = {} as ToolContext;
|
|
82
91
|
|
|
83
92
|
// First call should throw the transient error
|
|
84
|
-
await expect(tool.execute({}, dummyContext)).rejects.toThrow(
|
|
93
|
+
await expect(tool.execute({}, dummyContext)).rejects.toThrow(
|
|
94
|
+
"transient load failure",
|
|
95
|
+
);
|
|
85
96
|
expect(callCount).toBe(1);
|
|
86
97
|
|
|
87
98
|
// Second call should retry the loader and succeed
|
|
88
99
|
const result = await tool.execute({}, dummyContext);
|
|
89
|
-
expect(result.content).toBe(
|
|
100
|
+
expect(result.content).toBe("ok");
|
|
90
101
|
expect(result.isError).toBe(false);
|
|
91
102
|
expect(callCount).toBe(2);
|
|
92
103
|
});
|
|
93
104
|
});
|
|
94
105
|
|
|
95
|
-
describe(
|
|
96
|
-
test(
|
|
106
|
+
describe("tool registry host tools", () => {
|
|
107
|
+
test("registers host tools and exposes them in tool definitions", async () => {
|
|
97
108
|
await initializeTools();
|
|
98
109
|
|
|
99
|
-
const hostToolNames = [
|
|
110
|
+
const hostToolNames = [
|
|
111
|
+
"host_file_read",
|
|
112
|
+
"host_file_write",
|
|
113
|
+
"host_file_edit",
|
|
114
|
+
"host_bash",
|
|
115
|
+
] as const;
|
|
100
116
|
|
|
101
117
|
for (const toolName of hostToolNames) {
|
|
102
118
|
const tool = getTool(toolName);
|
|
@@ -111,14 +127,14 @@ describe('tool registry host tools', () => {
|
|
|
111
127
|
});
|
|
112
128
|
});
|
|
113
129
|
|
|
114
|
-
describe(
|
|
115
|
-
test(
|
|
130
|
+
describe("tool registry dynamic-tools tools", () => {
|
|
131
|
+
test("registers scaffold, delete, and skill_load tools", async () => {
|
|
116
132
|
await initializeTools();
|
|
117
133
|
|
|
118
134
|
const dynamicToolNames = [
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
135
|
+
"scaffold_managed_skill",
|
|
136
|
+
"delete_managed_skill",
|
|
137
|
+
"skill_load",
|
|
122
138
|
] as const;
|
|
123
139
|
|
|
124
140
|
for (const toolName of dynamicToolNames) {
|
|
@@ -132,25 +148,25 @@ describe('tool registry dynamic-tools tools', () => {
|
|
|
132
148
|
}
|
|
133
149
|
});
|
|
134
150
|
|
|
135
|
-
test(
|
|
151
|
+
test("scaffold and delete are registered as High risk", async () => {
|
|
136
152
|
await initializeTools();
|
|
137
|
-
for (const name of [
|
|
153
|
+
for (const name of ["scaffold_managed_skill", "delete_managed_skill"]) {
|
|
138
154
|
const tool = getTool(name);
|
|
139
155
|
expect(tool).toBeDefined();
|
|
140
156
|
expect(tool?.defaultRiskLevel).toBe(RiskLevel.High);
|
|
141
157
|
}
|
|
142
158
|
});
|
|
143
159
|
|
|
144
|
-
test(
|
|
160
|
+
test("skill_load is registered as Low risk", async () => {
|
|
145
161
|
await initializeTools();
|
|
146
|
-
const tool = getTool(
|
|
162
|
+
const tool = getTool("skill_load");
|
|
147
163
|
expect(tool).toBeDefined();
|
|
148
164
|
expect(tool?.defaultRiskLevel).toBe(RiskLevel.Low);
|
|
149
165
|
});
|
|
150
166
|
});
|
|
151
167
|
|
|
152
|
-
describe(
|
|
153
|
-
test(
|
|
168
|
+
describe("tool manifest", () => {
|
|
169
|
+
test("all manifest lazy tools are registered after init", async () => {
|
|
154
170
|
await initializeTools();
|
|
155
171
|
const registered = new Set(getAllTools().map((t) => t.name));
|
|
156
172
|
|
|
@@ -159,99 +175,122 @@ describe('tool manifest', () => {
|
|
|
159
175
|
}
|
|
160
176
|
});
|
|
161
177
|
|
|
162
|
-
test(
|
|
178
|
+
test("manifest declares expected core lazy tools", () => {
|
|
163
179
|
// bash and swarm_delegate moved from lazy to eager registration
|
|
164
180
|
const lazyNames = new Set(lazyTools.map((t) => t.name));
|
|
165
|
-
expect(lazyNames.has(
|
|
166
|
-
expect(lazyNames.has(
|
|
167
|
-
expect(lazyNames.has(
|
|
168
|
-
expect(lazyNames.has(
|
|
181
|
+
expect(lazyNames.has("bash")).toBe(false);
|
|
182
|
+
expect(lazyNames.has("evaluate_typescript_code")).toBe(false);
|
|
183
|
+
expect(lazyNames.has("claude_code")).toBe(false);
|
|
184
|
+
expect(lazyNames.has("swarm_delegate")).toBe(false);
|
|
169
185
|
// Verify they are in eager tools instead
|
|
170
|
-
expect(eagerModuleToolNames).toContain(
|
|
171
|
-
expect(eagerModuleToolNames).toContain(
|
|
186
|
+
expect(eagerModuleToolNames).toContain("bash");
|
|
187
|
+
expect(eagerModuleToolNames).toContain("swarm_delegate");
|
|
172
188
|
});
|
|
173
189
|
|
|
174
|
-
test(
|
|
190
|
+
test("eager module tool names list contains expected count", () => {
|
|
175
191
|
expect(eagerModuleToolNames.length).toBe(15);
|
|
176
192
|
});
|
|
177
193
|
|
|
178
|
-
test(
|
|
194
|
+
test("explicit tools list includes memory, credential, and watch tools", () => {
|
|
179
195
|
const names = explicitTools.map((t) => t.name);
|
|
180
|
-
expect(names).toContain(
|
|
181
|
-
expect(names).toContain(
|
|
182
|
-
expect(names).toContain(
|
|
183
|
-
expect(names).toContain(
|
|
184
|
-
expect(names).toContain(
|
|
185
|
-
expect(names).toContain(
|
|
186
|
-
expect(names).toContain('vellum_skills_catalog');
|
|
196
|
+
expect(names).toContain("memory_search");
|
|
197
|
+
expect(names).toContain("memory_save");
|
|
198
|
+
expect(names).toContain("memory_update");
|
|
199
|
+
expect(names).toContain("credential_store");
|
|
200
|
+
expect(names).toContain("account_manage");
|
|
201
|
+
expect(names).toContain("start_screen_watch");
|
|
187
202
|
});
|
|
188
203
|
|
|
189
|
-
test(
|
|
204
|
+
test("registered tool count is at least eager + lazy + host", async () => {
|
|
190
205
|
await initializeTools();
|
|
191
206
|
const tools = getAllTools();
|
|
192
|
-
expect(tools.length).toBeGreaterThanOrEqual(
|
|
207
|
+
expect(tools.length).toBeGreaterThanOrEqual(
|
|
208
|
+
eagerModuleToolNames.length + lazyTools.length,
|
|
209
|
+
);
|
|
193
210
|
});
|
|
194
211
|
});
|
|
195
212
|
|
|
196
|
-
describe(
|
|
197
|
-
test(
|
|
213
|
+
describe("baseline characterization: hardcoded tool loading", () => {
|
|
214
|
+
test("gmail tools are NOT registered in the global registry after initializeTools()", async () => {
|
|
198
215
|
await initializeTools();
|
|
199
216
|
const allTools = getAllTools();
|
|
200
|
-
const toolNames = allTools.map(t => t.name);
|
|
201
|
-
|
|
202
|
-
const gmailTools = [
|
|
203
|
-
|
|
204
|
-
|
|
217
|
+
const toolNames = allTools.map((t) => t.name);
|
|
218
|
+
|
|
219
|
+
const gmailTools = [
|
|
220
|
+
"gmail_search",
|
|
221
|
+
"gmail_list_messages",
|
|
222
|
+
"gmail_get_message",
|
|
223
|
+
"gmail_mark_read",
|
|
224
|
+
"gmail_draft",
|
|
225
|
+
"gmail_archive",
|
|
226
|
+
"gmail_batch_archive",
|
|
227
|
+
"gmail_label",
|
|
228
|
+
"gmail_batch_label",
|
|
229
|
+
"gmail_trash",
|
|
230
|
+
"gmail_send",
|
|
231
|
+
"gmail_unsubscribe",
|
|
232
|
+
];
|
|
205
233
|
for (const name of gmailTools) {
|
|
206
234
|
expect(toolNames).not.toContain(name);
|
|
207
235
|
}
|
|
208
236
|
});
|
|
209
237
|
|
|
210
|
-
test(
|
|
211
|
-
const gmailTools = [
|
|
212
|
-
|
|
213
|
-
|
|
238
|
+
test("gmail tool names are NOT in eagerModuleToolNames manifest", () => {
|
|
239
|
+
const gmailTools = [
|
|
240
|
+
"gmail_search",
|
|
241
|
+
"gmail_list_messages",
|
|
242
|
+
"gmail_get_message",
|
|
243
|
+
"gmail_mark_read",
|
|
244
|
+
"gmail_draft",
|
|
245
|
+
"gmail_archive",
|
|
246
|
+
"gmail_batch_archive",
|
|
247
|
+
"gmail_label",
|
|
248
|
+
"gmail_batch_label",
|
|
249
|
+
"gmail_trash",
|
|
250
|
+
"gmail_send",
|
|
251
|
+
"gmail_unsubscribe",
|
|
252
|
+
];
|
|
214
253
|
for (const name of gmailTools) {
|
|
215
254
|
expect(eagerModuleToolNames).not.toContain(name);
|
|
216
255
|
}
|
|
217
256
|
});
|
|
218
257
|
|
|
219
|
-
test(
|
|
258
|
+
test("weather tool is NOT in global registry after initializeTools()", async () => {
|
|
220
259
|
await initializeTools();
|
|
221
|
-
const tool = getTool(
|
|
260
|
+
const tool = getTool("get_weather");
|
|
222
261
|
expect(tool).toBeUndefined();
|
|
223
262
|
});
|
|
224
263
|
|
|
225
|
-
test(
|
|
226
|
-
expect(eagerModuleToolNames).not.toContain(
|
|
264
|
+
test("weather tool name is NOT in eagerModuleToolNames manifest", () => {
|
|
265
|
+
expect(eagerModuleToolNames).not.toContain("get_weather");
|
|
227
266
|
});
|
|
228
267
|
|
|
229
|
-
test(
|
|
268
|
+
test("claude_code is NOT in global registry after initializeTools()", async () => {
|
|
230
269
|
await initializeTools();
|
|
231
|
-
const tool = getTool(
|
|
270
|
+
const tool = getTool("claude_code");
|
|
232
271
|
expect(tool).toBeUndefined();
|
|
233
272
|
});
|
|
234
273
|
|
|
235
|
-
test(
|
|
236
|
-
const lazyNames = lazyTools.map(t => t.name);
|
|
237
|
-
expect(lazyNames).not.toContain(
|
|
274
|
+
test("claude_code is NOT in lazyTools manifest", () => {
|
|
275
|
+
const lazyNames = lazyTools.map((t) => t.name);
|
|
276
|
+
expect(lazyNames).not.toContain("claude_code");
|
|
238
277
|
});
|
|
239
278
|
});
|
|
240
279
|
|
|
241
|
-
describe(
|
|
242
|
-
test(
|
|
280
|
+
describe("baseline characterization: core app tool surface", () => {
|
|
281
|
+
test("non-proxy app tools are NOT in core registry (now skill-provided)", async () => {
|
|
243
282
|
await initializeTools();
|
|
244
283
|
|
|
245
284
|
const nonProxyAppTools = [
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
285
|
+
"app_create",
|
|
286
|
+
"app_list",
|
|
287
|
+
"app_query",
|
|
288
|
+
"app_update",
|
|
289
|
+
"app_delete",
|
|
290
|
+
"app_file_list",
|
|
291
|
+
"app_file_read",
|
|
292
|
+
"app_file_edit",
|
|
293
|
+
"app_file_write",
|
|
255
294
|
];
|
|
256
295
|
|
|
257
296
|
for (const name of nonProxyAppTools) {
|
|
@@ -265,225 +304,232 @@ describe('baseline characterization: core app tool surface', () => {
|
|
|
265
304
|
}
|
|
266
305
|
});
|
|
267
306
|
|
|
268
|
-
test(
|
|
307
|
+
test("core registry includes app_open proxy tool", async () => {
|
|
269
308
|
await initializeTools();
|
|
270
309
|
|
|
271
|
-
const tool = getTool(
|
|
310
|
+
const tool = getTool("app_open");
|
|
272
311
|
expect(tool).toBeDefined();
|
|
273
|
-
expect(tool?.executionMode).toBe(
|
|
312
|
+
expect(tool?.executionMode).toBe("proxy");
|
|
274
313
|
|
|
275
314
|
// Proxy tools are excluded from getAllToolDefinitions() by design
|
|
276
315
|
const definitionNames = getAllToolDefinitions().map((def) => def.name);
|
|
277
|
-
expect(definitionNames).not.toContain(
|
|
316
|
+
expect(definitionNames).not.toContain("app_open");
|
|
278
317
|
});
|
|
279
318
|
|
|
280
|
-
test(
|
|
281
|
-
const path = await import(
|
|
282
|
-
const fs = await import(
|
|
319
|
+
test("bundled app-builder skill has TOOLS.json manifest", async () => {
|
|
320
|
+
const path = await import("node:path");
|
|
321
|
+
const fs = await import("node:fs");
|
|
283
322
|
|
|
284
323
|
// Resolve the bundled skill directory relative to the source config
|
|
285
324
|
const skillDir = path.resolve(
|
|
286
325
|
import.meta.dirname,
|
|
287
|
-
|
|
326
|
+
"../config/bundled-skills/app-builder",
|
|
288
327
|
);
|
|
289
|
-
const toolsJsonPath = path.join(skillDir,
|
|
328
|
+
const toolsJsonPath = path.join(skillDir, "TOOLS.json");
|
|
290
329
|
|
|
291
330
|
expect(fs.existsSync(toolsJsonPath)).toBe(true);
|
|
292
331
|
});
|
|
293
332
|
});
|
|
294
333
|
|
|
295
|
-
describe(
|
|
296
|
-
beforeEach(() => {
|
|
334
|
+
describe("tool origin metadata", () => {
|
|
335
|
+
beforeEach(() => {
|
|
336
|
+
__resetRegistryForTesting();
|
|
337
|
+
});
|
|
297
338
|
|
|
298
|
-
test(
|
|
339
|
+
test("registers a skill-origin tool and preserves metadata via getTool()", () => {
|
|
299
340
|
const skillTool: Tool = {
|
|
300
|
-
...makeFakeTool(
|
|
301
|
-
origin:
|
|
302
|
-
ownerSkillId:
|
|
341
|
+
...makeFakeTool("test-skill-origin-tool"),
|
|
342
|
+
origin: "skill",
|
|
343
|
+
ownerSkillId: "test-skill",
|
|
303
344
|
};
|
|
304
345
|
|
|
305
346
|
registerTool(skillTool);
|
|
306
347
|
|
|
307
|
-
const retrieved = getTool(
|
|
348
|
+
const retrieved = getTool("test-skill-origin-tool");
|
|
308
349
|
expect(retrieved).toBeDefined();
|
|
309
|
-
expect(retrieved?.origin).toBe(
|
|
310
|
-
expect(retrieved?.ownerSkillId).toBe(
|
|
350
|
+
expect(retrieved?.origin).toBe("skill");
|
|
351
|
+
expect(retrieved?.ownerSkillId).toBe("test-skill");
|
|
311
352
|
});
|
|
312
353
|
|
|
313
|
-
test(
|
|
354
|
+
test("core tools default to no origin metadata (undefined)", async () => {
|
|
314
355
|
await initializeTools();
|
|
315
356
|
|
|
316
|
-
const coreTool = getTool(
|
|
357
|
+
const coreTool = getTool("host_file_read");
|
|
317
358
|
expect(coreTool).toBeDefined();
|
|
318
359
|
expect(coreTool?.origin).toBeUndefined();
|
|
319
360
|
expect(coreTool?.ownerSkillId).toBeUndefined();
|
|
320
361
|
});
|
|
321
362
|
});
|
|
322
363
|
|
|
323
|
-
describe(
|
|
324
|
-
beforeEach(() => {
|
|
364
|
+
describe("dynamic skill tool registry", () => {
|
|
365
|
+
beforeEach(() => {
|
|
366
|
+
__resetRegistryForTesting();
|
|
367
|
+
});
|
|
325
368
|
|
|
326
|
-
test(
|
|
369
|
+
test("registers skill tools and retrieves them", () => {
|
|
327
370
|
const tools = [
|
|
328
|
-
makeSkillTool(
|
|
329
|
-
makeSkillTool(
|
|
371
|
+
makeSkillTool("sk_tool_a", "my-skill"),
|
|
372
|
+
makeSkillTool("sk_tool_b", "my-skill"),
|
|
330
373
|
];
|
|
331
374
|
registerSkillTools(tools);
|
|
332
375
|
|
|
333
|
-
expect(getTool(
|
|
334
|
-
expect(getTool(
|
|
335
|
-
expect(getTool(
|
|
376
|
+
expect(getTool("sk_tool_a")).toBeDefined();
|
|
377
|
+
expect(getTool("sk_tool_a")?.origin).toBe("skill");
|
|
378
|
+
expect(getTool("sk_tool_a")?.ownerSkillId).toBe("my-skill");
|
|
336
379
|
|
|
337
|
-
expect(getTool(
|
|
338
|
-
expect(getTool(
|
|
380
|
+
expect(getTool("sk_tool_b")).toBeDefined();
|
|
381
|
+
expect(getTool("sk_tool_b")?.origin).toBe("skill");
|
|
339
382
|
});
|
|
340
383
|
|
|
341
|
-
test(
|
|
384
|
+
test("skips skill tool that collides with a core tool without throwing", async () => {
|
|
342
385
|
await initializeTools();
|
|
343
386
|
|
|
344
387
|
// host_file_read is a core tool registered during init
|
|
345
|
-
const colliding = makeSkillTool(
|
|
388
|
+
const colliding = makeSkillTool("host_file_read", "rogue-skill");
|
|
346
389
|
const accepted = registerSkillTools([colliding]);
|
|
347
390
|
|
|
348
391
|
// The colliding tool should be silently skipped
|
|
349
392
|
expect(accepted).toHaveLength(0);
|
|
350
393
|
// The core tool should still be in place (not overwritten)
|
|
351
|
-
const retrieved = getTool(
|
|
394
|
+
const retrieved = getTool("host_file_read");
|
|
352
395
|
expect(retrieved?.origin).toBeUndefined(); // core tools have no origin
|
|
353
396
|
});
|
|
354
397
|
|
|
355
|
-
test(
|
|
356
|
-
const original = makeSkillTool(
|
|
398
|
+
test("allows replacement within the same owning skill", () => {
|
|
399
|
+
const original = makeSkillTool("sk_replaceable", "owner-skill");
|
|
357
400
|
registerSkillTools([original]);
|
|
358
401
|
|
|
359
402
|
const replacement: Tool = {
|
|
360
|
-
...makeSkillTool(
|
|
361
|
-
description:
|
|
403
|
+
...makeSkillTool("sk_replaceable", "owner-skill"),
|
|
404
|
+
description: "Updated description",
|
|
362
405
|
};
|
|
363
406
|
// Should not throw
|
|
364
407
|
registerSkillTools([replacement]);
|
|
365
408
|
|
|
366
|
-
const retrieved = getTool(
|
|
367
|
-
expect(retrieved?.description).toBe(
|
|
409
|
+
const retrieved = getTool("sk_replaceable");
|
|
410
|
+
expect(retrieved?.description).toBe("Updated description");
|
|
368
411
|
});
|
|
369
412
|
|
|
370
|
-
test(
|
|
371
|
-
const original = makeSkillTool(
|
|
413
|
+
test("rejects replacement from a different owning skill", () => {
|
|
414
|
+
const original = makeSkillTool("sk_owned", "skill-alpha");
|
|
372
415
|
registerSkillTools([original]);
|
|
373
416
|
|
|
374
|
-
const intruder = makeSkillTool(
|
|
417
|
+
const intruder = makeSkillTool("sk_owned", "skill-beta");
|
|
375
418
|
expect(() => registerSkillTools([intruder])).toThrow(
|
|
376
419
|
'already registered by skill "skill-alpha"',
|
|
377
420
|
);
|
|
378
421
|
});
|
|
379
422
|
|
|
380
|
-
test(
|
|
423
|
+
test("unregisterSkillTools removes all tools for a skill", () => {
|
|
381
424
|
const tools = [
|
|
382
|
-
makeSkillTool(
|
|
383
|
-
makeSkillTool(
|
|
425
|
+
makeSkillTool("sk_rm_1", "removable-skill"),
|
|
426
|
+
makeSkillTool("sk_rm_2", "removable-skill"),
|
|
384
427
|
];
|
|
385
428
|
registerSkillTools(tools);
|
|
386
|
-
expect(getTool(
|
|
387
|
-
expect(getTool(
|
|
429
|
+
expect(getTool("sk_rm_1")).toBeDefined();
|
|
430
|
+
expect(getTool("sk_rm_2")).toBeDefined();
|
|
388
431
|
|
|
389
|
-
unregisterSkillTools(
|
|
432
|
+
unregisterSkillTools("removable-skill");
|
|
390
433
|
|
|
391
|
-
expect(getTool(
|
|
392
|
-
expect(getTool(
|
|
434
|
+
expect(getTool("sk_rm_1")).toBeUndefined();
|
|
435
|
+
expect(getTool("sk_rm_2")).toBeUndefined();
|
|
393
436
|
});
|
|
394
437
|
|
|
395
|
-
test(
|
|
396
|
-
registerSkillTools([makeSkillTool(
|
|
397
|
-
registerSkillTools([makeSkillTool(
|
|
438
|
+
test("unregisterSkillTools does not affect tools from other skills", () => {
|
|
439
|
+
registerSkillTools([makeSkillTool("sk_keep", "keep-skill")]);
|
|
440
|
+
registerSkillTools([makeSkillTool("sk_remove", "nuke-skill")]);
|
|
398
441
|
|
|
399
|
-
unregisterSkillTools(
|
|
442
|
+
unregisterSkillTools("nuke-skill");
|
|
400
443
|
|
|
401
|
-
expect(getTool(
|
|
402
|
-
expect(getTool(
|
|
444
|
+
expect(getTool("sk_keep")).toBeDefined();
|
|
445
|
+
expect(getTool("sk_remove")).toBeUndefined();
|
|
403
446
|
});
|
|
404
447
|
|
|
405
|
-
test(
|
|
448
|
+
test("getSkillToolNames returns only skill tool names", async () => {
|
|
406
449
|
await initializeTools();
|
|
407
450
|
|
|
408
451
|
registerSkillTools([
|
|
409
|
-
makeSkillTool(
|
|
410
|
-
makeSkillTool(
|
|
452
|
+
makeSkillTool("sk_names_a", "names-skill"),
|
|
453
|
+
makeSkillTool("sk_names_b", "names-skill"),
|
|
411
454
|
]);
|
|
412
455
|
|
|
413
456
|
const skillNames = getSkillToolNames();
|
|
414
|
-
expect(skillNames).toContain(
|
|
415
|
-
expect(skillNames).toContain(
|
|
457
|
+
expect(skillNames).toContain("sk_names_a");
|
|
458
|
+
expect(skillNames).toContain("sk_names_b");
|
|
416
459
|
// Core tools should not appear
|
|
417
|
-
expect(skillNames).not.toContain(
|
|
418
|
-
expect(skillNames).not.toContain(
|
|
460
|
+
expect(skillNames).not.toContain("host_file_read");
|
|
461
|
+
expect(skillNames).not.toContain("bash");
|
|
419
462
|
});
|
|
420
463
|
|
|
421
|
-
test(
|
|
464
|
+
test("registerSkillTools skips core-colliding tools but registers the rest", async () => {
|
|
422
465
|
await initializeTools();
|
|
423
466
|
|
|
424
467
|
const tools = [
|
|
425
|
-
makeSkillTool(
|
|
426
|
-
makeSkillTool(
|
|
468
|
+
makeSkillTool("sk_atomic_ok", "atomic-skill"),
|
|
469
|
+
makeSkillTool("host_file_read", "atomic-skill"), // collides with core
|
|
427
470
|
];
|
|
428
471
|
|
|
429
472
|
const accepted = registerSkillTools(tools);
|
|
430
473
|
// Only the non-colliding tool should be accepted
|
|
431
474
|
expect(accepted).toHaveLength(1);
|
|
432
|
-
expect(accepted[0].name).toBe(
|
|
475
|
+
expect(accepted[0].name).toBe("sk_atomic_ok");
|
|
433
476
|
// The non-colliding tool should be registered
|
|
434
|
-
expect(getTool(
|
|
477
|
+
expect(getTool("sk_atomic_ok")).toBeDefined();
|
|
435
478
|
// The core tool should be untouched
|
|
436
|
-
expect(getTool(
|
|
479
|
+
expect(getTool("host_file_read")?.origin).toBeUndefined();
|
|
437
480
|
});
|
|
438
481
|
});
|
|
439
482
|
|
|
440
|
-
describe(
|
|
441
|
-
beforeEach(() => {
|
|
483
|
+
describe("skill tool reference counting", () => {
|
|
484
|
+
beforeEach(() => {
|
|
485
|
+
__resetRegistryForTesting();
|
|
486
|
+
});
|
|
442
487
|
|
|
443
|
-
test(
|
|
444
|
-
registerSkillTools([makeSkillTool(
|
|
445
|
-
expect(getSkillRefCount(
|
|
488
|
+
test("ref count increments on each registerSkillTools call", () => {
|
|
489
|
+
registerSkillTools([makeSkillTool("rc_a", "rc-skill")]);
|
|
490
|
+
expect(getSkillRefCount("rc-skill")).toBe(1);
|
|
446
491
|
|
|
447
492
|
// Second session registers the same skill (same ownerSkillId allows replacement)
|
|
448
|
-
registerSkillTools([makeSkillTool(
|
|
449
|
-
expect(getSkillRefCount(
|
|
493
|
+
registerSkillTools([makeSkillTool("rc_a", "rc-skill")]);
|
|
494
|
+
expect(getSkillRefCount("rc-skill")).toBe(2);
|
|
450
495
|
});
|
|
451
496
|
|
|
452
|
-
test(
|
|
453
|
-
registerSkillTools([makeSkillTool(
|
|
454
|
-
registerSkillTools([makeSkillTool(
|
|
455
|
-
expect(getSkillRefCount(
|
|
497
|
+
test("unregister decrements ref count but keeps tools when count > 0", () => {
|
|
498
|
+
registerSkillTools([makeSkillTool("rc_keep", "rc-multi")]);
|
|
499
|
+
registerSkillTools([makeSkillTool("rc_keep", "rc-multi")]);
|
|
500
|
+
expect(getSkillRefCount("rc-multi")).toBe(2);
|
|
456
501
|
|
|
457
|
-
unregisterSkillTools(
|
|
458
|
-
expect(getSkillRefCount(
|
|
502
|
+
unregisterSkillTools("rc-multi");
|
|
503
|
+
expect(getSkillRefCount("rc-multi")).toBe(1);
|
|
459
504
|
// Tools still present
|
|
460
|
-
expect(getTool(
|
|
505
|
+
expect(getTool("rc_keep")).toBeDefined();
|
|
461
506
|
});
|
|
462
507
|
|
|
463
|
-
test(
|
|
464
|
-
registerSkillTools([makeSkillTool(
|
|
465
|
-
registerSkillTools([makeSkillTool(
|
|
508
|
+
test("tools are removed only when last reference is unregistered", () => {
|
|
509
|
+
registerSkillTools([makeSkillTool("rc_last", "rc-final")]);
|
|
510
|
+
registerSkillTools([makeSkillTool("rc_last", "rc-final")]);
|
|
466
511
|
|
|
467
|
-
unregisterSkillTools(
|
|
468
|
-
expect(getTool(
|
|
512
|
+
unregisterSkillTools("rc-final");
|
|
513
|
+
expect(getTool("rc_last")).toBeDefined();
|
|
469
514
|
|
|
470
|
-
unregisterSkillTools(
|
|
471
|
-
expect(getTool(
|
|
472
|
-
expect(getSkillRefCount(
|
|
515
|
+
unregisterSkillTools("rc-final");
|
|
516
|
+
expect(getTool("rc_last")).toBeUndefined();
|
|
517
|
+
expect(getSkillRefCount("rc-final")).toBe(0);
|
|
473
518
|
});
|
|
474
519
|
|
|
475
|
-
test(
|
|
476
|
-
unregisterSkillTools(
|
|
477
|
-
expect(getSkillRefCount(
|
|
520
|
+
test("unregister with no prior registration is a no-op", () => {
|
|
521
|
+
unregisterSkillTools("nonexistent-skill");
|
|
522
|
+
expect(getSkillRefCount("nonexistent-skill")).toBe(0);
|
|
478
523
|
});
|
|
479
524
|
});
|
|
480
525
|
|
|
481
|
-
describe(
|
|
526
|
+
describe("computer-use registration split", () => {
|
|
482
527
|
// Start each test from a completely empty registry so assertions are
|
|
483
528
|
// non-vacuous — the split functions must actually register tools.
|
|
484
529
|
|
|
485
|
-
test(
|
|
486
|
-
const { registerComputerUseActionTools } =
|
|
530
|
+
test("registerComputerUseActionTools registers all 12 CU action tools and nothing else", async () => {
|
|
531
|
+
const { registerComputerUseActionTools } =
|
|
532
|
+
await import("../tools/computer-use/registry.js");
|
|
487
533
|
|
|
488
534
|
__clearRegistryForTesting();
|
|
489
535
|
expect(getAllTools()).toHaveLength(0);
|
|
@@ -492,7 +538,9 @@ describe('computer-use registration split', () => {
|
|
|
492
538
|
|
|
493
539
|
const registered = getAllTools();
|
|
494
540
|
expect(registered).toHaveLength(12);
|
|
495
|
-
expect(registered.every((t) => t.name.startsWith(
|
|
496
|
-
|
|
541
|
+
expect(registered.every((t) => t.name.startsWith("computer_use_"))).toBe(
|
|
542
|
+
true,
|
|
543
|
+
);
|
|
544
|
+
expect(getTool("computer_use_request_control")).toBeUndefined();
|
|
497
545
|
});
|
|
498
546
|
});
|