@vellumai/assistant 0.3.19 → 0.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +151 -15
- package/Dockerfile +1 -0
- package/README.md +40 -4
- package/bun.lock +139 -2
- package/docs/architecture/integrations.md +7 -11
- package/package.json +2 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
- package/src/__tests__/approval-primitive.test.ts +540 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
- package/src/__tests__/call-controller.test.ts +439 -108
- package/src/__tests__/channel-invite-transport.test.ts +264 -0
- package/src/__tests__/cli.test.ts +42 -1
- package/src/__tests__/config-schema.test.ts +11 -127
- package/src/__tests__/config-watcher.test.ts +0 -8
- package/src/__tests__/daemon-lifecycle.test.ts +1 -0
- package/src/__tests__/daemon-server-session-init.test.ts +8 -2
- package/src/__tests__/diff.test.ts +22 -0
- package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
- package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
- package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
- package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
- package/src/__tests__/guardian-dispatch.test.ts +124 -0
- package/src/__tests__/guardian-grant-minting.test.ts +6 -17
- package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
- package/src/__tests__/invite-redemption-service.test.ts +306 -0
- package/src/__tests__/ipc-snapshot.test.ts +57 -0
- package/src/__tests__/notification-decision-fallback.test.ts +88 -0
- package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
- package/src/__tests__/sandbox-host-parity.test.ts +6 -13
- package/src/__tests__/scoped-approval-grants.test.ts +6 -6
- package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
- package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
- package/src/__tests__/session-load-history-repair.test.ts +169 -2
- package/src/__tests__/session-runtime-assembly.test.ts +33 -5
- package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
- package/src/__tests__/skill-feature-flags.test.ts +188 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
- package/src/__tests__/skill-mirror-parity.test.ts +1 -0
- package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
- package/src/__tests__/system-prompt.test.ts +1 -1
- package/src/__tests__/terminal-sandbox.test.ts +142 -9
- package/src/__tests__/terminal-tools.test.ts +2 -93
- package/src/__tests__/thread-seed-composer.test.ts +18 -0
- package/src/__tests__/tool-approval-handler.test.ts +350 -0
- package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
- package/src/agent/loop.ts +36 -1
- package/src/approvals/approval-primitive.ts +381 -0
- package/src/approvals/guardian-decision-primitive.ts +191 -0
- package/src/calls/call-controller.ts +252 -209
- package/src/calls/call-domain.ts +44 -6
- package/src/calls/guardian-dispatch.ts +48 -0
- package/src/calls/types.ts +1 -1
- package/src/calls/voice-session-bridge.ts +46 -30
- package/src/cli/core-commands.ts +0 -4
- package/src/cli/mcp.ts +58 -0
- package/src/cli.ts +76 -34
- package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
- package/src/config/assistant-feature-flags.ts +162 -0
- package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
- package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
- package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
- package/src/config/bundled-skills/notifications/SKILL.md +1 -1
- package/src/config/bundled-skills/reminder/SKILL.md +49 -2
- package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
- package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
- package/src/config/core-schema.ts +1 -1
- package/src/config/env-registry.ts +10 -0
- package/src/config/feature-flag-registry.json +61 -0
- package/src/config/loader.ts +22 -1
- package/src/config/mcp-schema.ts +46 -0
- package/src/config/sandbox-schema.ts +0 -39
- package/src/config/schema.ts +18 -2
- package/src/config/skill-state.ts +34 -0
- package/src/config/skills-schema.ts +0 -1
- package/src/config/skills.ts +9 -0
- package/src/config/system-prompt.ts +110 -46
- package/src/config/templates/SOUL.md +1 -1
- package/src/config/types.ts +19 -1
- package/src/config/vellum-skills/catalog.json +1 -1
- package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
- package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
- package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
- package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
- package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
- package/src/daemon/config-watcher.ts +0 -1
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/guardian-invite-intent.ts +124 -0
- package/src/daemon/handlers/avatar.ts +68 -0
- package/src/daemon/handlers/browser.ts +2 -2
- package/src/daemon/handlers/guardian-actions.ts +120 -0
- package/src/daemon/handlers/index.ts +4 -0
- package/src/daemon/handlers/sessions.ts +19 -0
- package/src/daemon/handlers/shared.ts +3 -1
- package/src/daemon/install-cli-launchers.ts +58 -13
- package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
- package/src/daemon/ipc-contract/sessions.ts +8 -2
- package/src/daemon/ipc-contract/settings.ts +25 -2
- package/src/daemon/ipc-contract-inventory.json +10 -0
- package/src/daemon/ipc-contract.ts +4 -0
- package/src/daemon/lifecycle.ts +14 -2
- package/src/daemon/main.ts +1 -0
- package/src/daemon/providers-setup.ts +26 -1
- package/src/daemon/server.ts +1 -0
- package/src/daemon/session-lifecycle.ts +52 -7
- package/src/daemon/session-memory.ts +45 -0
- package/src/daemon/session-process.ts +258 -432
- package/src/daemon/session-runtime-assembly.ts +12 -0
- package/src/daemon/session-skill-tools.ts +14 -1
- package/src/daemon/session-tool-setup.ts +5 -0
- package/src/daemon/session.ts +11 -0
- package/src/daemon/shutdown-handlers.ts +11 -0
- package/src/daemon/tool-side-effects.ts +35 -9
- package/src/index.ts +2 -2
- package/src/mcp/client.ts +152 -0
- package/src/mcp/manager.ts +139 -0
- package/src/memory/conversation-display-order-migration.ts +44 -0
- package/src/memory/conversation-queries.ts +2 -0
- package/src/memory/conversation-store.ts +91 -0
- package/src/memory/db-init.ts +5 -1
- package/src/memory/embedding-local.ts +13 -8
- package/src/memory/guardian-action-store.ts +125 -2
- package/src/memory/ingress-invite-store.ts +95 -1
- package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
- package/src/memory/migrations/index.ts +2 -1
- package/src/memory/schema.ts +5 -1
- package/src/memory/scoped-approval-grants.ts +14 -5
- package/src/messaging/providers/slack/client.ts +12 -0
- package/src/messaging/providers/slack/types.ts +5 -0
- package/src/notifications/decision-engine.ts +49 -12
- package/src/notifications/emit-signal.ts +7 -0
- package/src/notifications/signal.ts +7 -0
- package/src/notifications/thread-seed-composer.ts +2 -1
- package/src/runtime/channel-approval-types.ts +16 -6
- package/src/runtime/channel-approvals.ts +19 -15
- package/src/runtime/channel-invite-transport.ts +85 -0
- package/src/runtime/channel-invite-transports/telegram.ts +105 -0
- package/src/runtime/guardian-action-grant-minter.ts +92 -35
- package/src/runtime/guardian-action-message-composer.ts +30 -0
- package/src/runtime/guardian-decision-types.ts +91 -0
- package/src/runtime/http-server.ts +23 -1
- package/src/runtime/ingress-service.ts +22 -0
- package/src/runtime/invite-redemption-service.ts +181 -0
- package/src/runtime/invite-redemption-templates.ts +39 -0
- package/src/runtime/routes/call-routes.ts +2 -1
- package/src/runtime/routes/guardian-action-routes.ts +206 -0
- package/src/runtime/routes/guardian-approval-interception.ts +66 -190
- package/src/runtime/routes/identity-routes.ts +73 -0
- package/src/runtime/routes/inbound-message-handler.ts +486 -394
- package/src/runtime/routes/pairing-routes.ts +4 -0
- package/src/security/encrypted-store.ts +31 -17
- package/src/security/keychain.ts +176 -2
- package/src/security/secure-keys.ts +97 -0
- package/src/security/tool-approval-digest.ts +1 -1
- package/src/tools/browser/browser-execution.ts +2 -2
- package/src/tools/browser/browser-manager.ts +46 -32
- package/src/tools/browser/browser-screencast.ts +2 -2
- package/src/tools/calls/call-start.ts +1 -1
- package/src/tools/executor.ts +22 -17
- package/src/tools/mcp/mcp-tool-factory.ts +100 -0
- package/src/tools/network/script-proxy/session-manager.ts +1 -5
- package/src/tools/registry.ts +64 -1
- package/src/tools/skills/load.ts +22 -8
- package/src/tools/system/avatar-generator.ts +119 -0
- package/src/tools/system/navigate-settings.ts +65 -0
- package/src/tools/system/open-system-settings.ts +75 -0
- package/src/tools/system/voice-config.ts +121 -32
- package/src/tools/terminal/backends/native.ts +40 -19
- package/src/tools/terminal/backends/types.ts +3 -3
- package/src/tools/terminal/parser.ts +1 -1
- package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
- package/src/tools/terminal/sandbox.ts +1 -12
- package/src/tools/terminal/shell.ts +3 -31
- package/src/tools/tool-approval-handler.ts +141 -3
- package/src/tools/tool-manifest.ts +6 -0
- package/src/tools/types.ts +10 -2
- package/src/util/diff.ts +36 -13
- package/Dockerfile.sandbox +0 -5
- package/src/__tests__/doordash-client.test.ts +0 -187
- package/src/__tests__/doordash-session.test.ts +0 -154
- package/src/__tests__/signup-e2e.test.ts +0 -354
- package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
- package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
- package/src/cli/doordash.ts +0 -1057
- package/src/config/bundled-skills/doordash/SKILL.md +0 -163
- package/src/config/templates/LOOKS.md +0 -25
- package/src/doordash/cart-queries.ts +0 -787
- package/src/doordash/client.ts +0 -1016
- package/src/doordash/order-queries.ts +0 -85
- package/src/doordash/queries.ts +0 -13
- package/src/doordash/query-extractor.ts +0 -94
- package/src/doordash/search-queries.ts +0 -203
- package/src/doordash/session.ts +0 -84
- package/src/doordash/store-queries.ts +0 -246
- package/src/doordash/types.ts +0 -367
- package/src/tools/terminal/backends/docker.ts +0 -379
|
@@ -335,11 +335,6 @@ describe('buildChannelAwarenessSection', () => {
|
|
|
335
335
|
expect(section).toContain('computer-control permissions on non-dashboard');
|
|
336
336
|
});
|
|
337
337
|
|
|
338
|
-
test('includes guardian context contract for channel actors', () => {
|
|
339
|
-
const section = buildChannelAwarenessSection();
|
|
340
|
-
expect(section).toContain('<guardian_context>');
|
|
341
|
-
expect(section).toContain('Never infer guardian status');
|
|
342
|
-
});
|
|
343
338
|
});
|
|
344
339
|
|
|
345
340
|
// ---------------------------------------------------------------------------
|
|
@@ -569,6 +564,39 @@ describe('injectGuardianContext', () => {
|
|
|
569
564
|
expect(text).toContain('source_channel: sms');
|
|
570
565
|
expect(text).toContain('</guardian_context>');
|
|
571
566
|
});
|
|
567
|
+
|
|
568
|
+
test('includes behavioral guidance for non-guardian actors', () => {
|
|
569
|
+
const ctx: GuardianRuntimeContext = {
|
|
570
|
+
sourceChannel: 'telegram',
|
|
571
|
+
actorRole: 'non-guardian',
|
|
572
|
+
guardianExternalUserId: 'guardian-user-1',
|
|
573
|
+
guardianChatId: 'chat-1',
|
|
574
|
+
requesterIdentifier: '@someone',
|
|
575
|
+
requesterExternalUserId: 'other-user-1',
|
|
576
|
+
requesterChatId: 'chat-2',
|
|
577
|
+
};
|
|
578
|
+
|
|
579
|
+
const result = injectGuardianContext(baseUserMessage, ctx);
|
|
580
|
+
const text = (result.content[0] as { type: 'text'; text: string }).text;
|
|
581
|
+
expect(text).toContain('non-guardian account');
|
|
582
|
+
expect(text).toContain('Do not explain the verification system');
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
test('omits non-guardian behavioral guidance for guardian actors', () => {
|
|
586
|
+
const ctx: GuardianRuntimeContext = {
|
|
587
|
+
sourceChannel: 'telegram',
|
|
588
|
+
actorRole: 'guardian',
|
|
589
|
+
guardianExternalUserId: 'guardian-user-1',
|
|
590
|
+
guardianChatId: 'chat-1',
|
|
591
|
+
requesterIdentifier: '@guardian',
|
|
592
|
+
requesterExternalUserId: 'guardian-user-1',
|
|
593
|
+
requesterChatId: 'chat-1',
|
|
594
|
+
};
|
|
595
|
+
|
|
596
|
+
const result = injectGuardianContext(baseUserMessage, ctx);
|
|
597
|
+
const text = (result.content[0] as { type: 'text'; text: string }).text;
|
|
598
|
+
expect(text).not.toContain('non-guardian account');
|
|
599
|
+
});
|
|
572
600
|
});
|
|
573
601
|
|
|
574
602
|
describe('stripGuardianContext', () => {
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for skill feature flag enforcement at system prompt,
|
|
3
|
+
* skill_load, and session-skill-tools projection layers.
|
|
4
|
+
*/
|
|
5
|
+
import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
|
|
6
|
+
import { tmpdir } from 'node:os';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
|
|
10
|
+
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Test-scoped temp directory and config state
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
const TEST_DIR = join(tmpdir(), `vellum-skill-flags-test-${crypto.randomUUID()}`);
|
|
16
|
+
|
|
17
|
+
let currentConfig: Record<string, unknown> = {
|
|
18
|
+
sandbox: { enabled: false, backend: 'native' },
|
|
19
|
+
featureFlags: {},
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
const DECLARED_SKILL_ID = 'hatch-new-assistant';
|
|
23
|
+
const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
|
|
24
|
+
|
|
25
|
+
mock.module('../util/platform.js', () => ({
|
|
26
|
+
getRootDir: () => TEST_DIR,
|
|
27
|
+
getDataDir: () => TEST_DIR,
|
|
28
|
+
getWorkspaceDir: () => TEST_DIR,
|
|
29
|
+
getWorkspaceConfigPath: () => join(TEST_DIR, 'config.json'),
|
|
30
|
+
getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
|
|
31
|
+
getWorkspaceHooksDir: () => join(TEST_DIR, 'hooks'),
|
|
32
|
+
getWorkspacePromptPath: (file: string) => join(TEST_DIR, file),
|
|
33
|
+
ensureDataDir: () => {},
|
|
34
|
+
getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
|
|
35
|
+
getPidPath: () => join(TEST_DIR, 'vellum.pid'),
|
|
36
|
+
getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
|
|
37
|
+
getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
|
|
38
|
+
getHistoryPath: () => join(TEST_DIR, 'history'),
|
|
39
|
+
getHooksDir: () => join(TEST_DIR, 'hooks'),
|
|
40
|
+
getIpcBlobDir: () => join(TEST_DIR, 'ipc-blobs'),
|
|
41
|
+
getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
|
|
42
|
+
getSandboxWorkingDir: () => TEST_DIR,
|
|
43
|
+
getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
|
|
44
|
+
isMacOS: () => false,
|
|
45
|
+
isLinux: () => false,
|
|
46
|
+
isWindows: () => false,
|
|
47
|
+
getPlatformName: () => 'linux',
|
|
48
|
+
getClipboardCommand: () => null,
|
|
49
|
+
removeSocketFile: () => {},
|
|
50
|
+
migratePath: () => {},
|
|
51
|
+
migrateToWorkspaceLayout: () => {},
|
|
52
|
+
migrateToDataLayout: () => {},
|
|
53
|
+
}));
|
|
54
|
+
|
|
55
|
+
mock.module('../util/logger.js', () => ({
|
|
56
|
+
getLogger: () => new Proxy({} as Record<string, unknown>, {
|
|
57
|
+
get: () => () => {},
|
|
58
|
+
}),
|
|
59
|
+
isDebug: () => false,
|
|
60
|
+
truncateForLog: (v: string) => v,
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
mock.module('../config/loader.js', () => ({
|
|
64
|
+
getConfig: () => currentConfig,
|
|
65
|
+
}));
|
|
66
|
+
|
|
67
|
+
mock.module('../config/user-reference.js', () => ({
|
|
68
|
+
resolveUserReference: () => 'TestUser',
|
|
69
|
+
}));
|
|
70
|
+
|
|
71
|
+
mock.module('../security/parental-control-store.js', () => ({
|
|
72
|
+
getParentalControlSettings: () => ({ enabled: false, contentRestrictions: [], blockedToolCategories: [] }),
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
mock.module('../tools/credentials/metadata-store.js', () => ({
|
|
76
|
+
listCredentialMetadata: () => [],
|
|
77
|
+
}));
|
|
78
|
+
|
|
79
|
+
const { buildSystemPrompt } = await import('../config/system-prompt.js');
|
|
80
|
+
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Setup / Teardown
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
beforeEach(() => {
|
|
86
|
+
mkdirSync(TEST_DIR, { recursive: true });
|
|
87
|
+
// Reset config to defaults before each test
|
|
88
|
+
currentConfig = {
|
|
89
|
+
sandbox: { enabled: false, backend: 'native' },
|
|
90
|
+
featureFlags: {},
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
afterEach(() => {
|
|
95
|
+
if (existsSync(TEST_DIR)) {
|
|
96
|
+
rmSync(TEST_DIR, { recursive: true, force: true });
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
// Helpers
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
function createSkillOnDisk(id: string, name: string, description: string): void {
|
|
105
|
+
const skillsDir = join(TEST_DIR, 'skills');
|
|
106
|
+
mkdirSync(join(skillsDir, id), { recursive: true });
|
|
107
|
+
writeFileSync(
|
|
108
|
+
join(skillsDir, id, 'SKILL.md'),
|
|
109
|
+
`---\nname: "${name}"\ndescription: "${description}"\n---\n\nInstructions for ${id}.\n`,
|
|
110
|
+
);
|
|
111
|
+
// Ensure SKILLS.md index references the skill
|
|
112
|
+
const indexPath = join(skillsDir, 'SKILLS.md');
|
|
113
|
+
const existing = existsSync(indexPath) ? readFileSync(indexPath, 'utf-8') : '';
|
|
114
|
+
writeFileSync(indexPath, existing + `- ${id}\n`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
// System prompt — feature flag filtering
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
describe('buildSystemPrompt feature flag filtering', () => {
|
|
122
|
+
test('flag OFF skill does not appear in <available_skills> section', () => {
|
|
123
|
+
createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
|
|
124
|
+
createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
|
|
125
|
+
|
|
126
|
+
currentConfig = {
|
|
127
|
+
sandbox: { enabled: false, backend: 'native' },
|
|
128
|
+
featureFlags: { [DECLARED_LEGACY_KEY]: false },
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
const result = buildSystemPrompt();
|
|
132
|
+
|
|
133
|
+
// twitter should be visible, declared flagged skill should not
|
|
134
|
+
expect(result).toContain('id="twitter"');
|
|
135
|
+
expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test('all skills visible when featureFlags is empty', () => {
|
|
139
|
+
createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
|
|
140
|
+
createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
|
|
141
|
+
|
|
142
|
+
currentConfig = {
|
|
143
|
+
sandbox: { enabled: false, backend: 'native' },
|
|
144
|
+
featureFlags: {},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const result = buildSystemPrompt();
|
|
148
|
+
|
|
149
|
+
expect(result).toContain(`id="${DECLARED_SKILL_ID}"`);
|
|
150
|
+
expect(result).toContain('id="twitter"');
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test('flagged-off skills hidden even when all workspace skill flags are OFF', () => {
|
|
154
|
+
createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
|
|
155
|
+
createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
|
|
156
|
+
|
|
157
|
+
currentConfig = {
|
|
158
|
+
sandbox: { enabled: false, backend: 'native' },
|
|
159
|
+
featureFlags: {
|
|
160
|
+
[DECLARED_LEGACY_KEY]: false,
|
|
161
|
+
'skills.twitter.enabled': false,
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
const result = buildSystemPrompt();
|
|
166
|
+
|
|
167
|
+
// Both are hidden: declared skill via registry, undeclared via persisted override.
|
|
168
|
+
expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
|
|
169
|
+
expect(result).not.toContain('id="twitter"');
|
|
170
|
+
});
|
|
171
|
+
});
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { describe, expect, test } from 'bun:test';
|
|
2
|
+
|
|
3
|
+
import { isAssistantFeatureFlagEnabled } from '../config/assistant-feature-flags.js';
|
|
4
|
+
import type { AssistantConfig } from '../config/schema.js';
|
|
5
|
+
import { isSkillFeatureEnabled, resolveSkillStates } from '../config/skill-state.js';
|
|
6
|
+
import type { SkillSummary } from '../config/skills.js';
|
|
7
|
+
|
|
8
|
+
const DECLARED_FLAG_KEY = 'feature_flags.hatch-new-assistant.enabled';
|
|
9
|
+
const DECLARED_SKILL_ID = 'hatch-new-assistant';
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Helpers
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
/** Create a minimal AssistantConfig with optional feature flag values. */
|
|
15
|
+
function makeConfig(overrides: Partial<AssistantConfig> = {}): AssistantConfig {
|
|
16
|
+
return {
|
|
17
|
+
skills: {
|
|
18
|
+
entries: {},
|
|
19
|
+
load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
|
|
20
|
+
install: { nodeManager: 'npm' },
|
|
21
|
+
allowBundled: null,
|
|
22
|
+
remoteProviders: { skillssh: { enabled: true }, clawhub: { enabled: true } },
|
|
23
|
+
remotePolicy: { blockSuspicious: true, blockMalware: true, maxSkillsShRisk: 'medium' },
|
|
24
|
+
},
|
|
25
|
+
...overrides,
|
|
26
|
+
} as AssistantConfig;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Create a minimal SkillSummary for testing. */
|
|
30
|
+
function makeSkill(id: string, source: 'bundled' | 'managed' = 'bundled'): SkillSummary {
|
|
31
|
+
return {
|
|
32
|
+
id,
|
|
33
|
+
name: `${id} skill`,
|
|
34
|
+
description: `Description for ${id}`,
|
|
35
|
+
directoryPath: `/fake/skills/${id}`,
|
|
36
|
+
skillFilePath: `/fake/skills/${id}/SKILL.md`,
|
|
37
|
+
bundled: source === 'bundled',
|
|
38
|
+
userInvocable: true,
|
|
39
|
+
disableModelInvocation: false,
|
|
40
|
+
source,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// isSkillFeatureEnabled (legacy wrapper — backward compat)
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
describe('isSkillFeatureEnabled', () => {
|
|
49
|
+
test('returns true when no flag overrides', () => {
|
|
50
|
+
const config = makeConfig();
|
|
51
|
+
expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('returns true when skill key is explicitly true', () => {
|
|
55
|
+
const config = makeConfig({
|
|
56
|
+
assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: true },
|
|
57
|
+
});
|
|
58
|
+
expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('returns false when skill key is explicitly false', () => {
|
|
62
|
+
const config = makeConfig({
|
|
63
|
+
assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
|
|
64
|
+
});
|
|
65
|
+
expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(false);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// isAssistantFeatureFlagEnabled (full canonical key)
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
describe('isAssistantFeatureFlagEnabled', () => {
|
|
74
|
+
test('returns true for unknown flags (open by default)', () => {
|
|
75
|
+
const config = makeConfig();
|
|
76
|
+
expect(isAssistantFeatureFlagEnabled('feature_flags.unknown.enabled', config)).toBe(true);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('assistantFeatureFlagValues overrides registry default', () => {
|
|
80
|
+
const config = {
|
|
81
|
+
...makeConfig(),
|
|
82
|
+
assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
|
|
83
|
+
} as AssistantConfig;
|
|
84
|
+
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(false);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test('falls back to registry default when no override', () => {
|
|
88
|
+
const config = makeConfig();
|
|
89
|
+
// hatch-new-assistant defaults to true in the registry
|
|
90
|
+
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
test('respects persisted overrides for undeclared keys', () => {
|
|
94
|
+
const config = makeConfig({
|
|
95
|
+
assistantFeatureFlagValues: { 'feature_flags.browser.enabled': false },
|
|
96
|
+
});
|
|
97
|
+
expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(false);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test('undeclared keys with no persisted override default to enabled', () => {
|
|
101
|
+
const config = makeConfig();
|
|
102
|
+
expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(true);
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
// resolveSkillStates — feature flag filtering
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
describe('resolveSkillStates with feature flags', () => {
|
|
111
|
+
test('flag OFF skill does not appear in resolved list', () => {
|
|
112
|
+
const catalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
|
|
113
|
+
const config = makeConfig({
|
|
114
|
+
assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
const resolved = resolveSkillStates(catalog, config);
|
|
118
|
+
const ids = resolved.map((r) => r.summary.id);
|
|
119
|
+
|
|
120
|
+
expect(ids).not.toContain(DECLARED_SKILL_ID);
|
|
121
|
+
expect(ids).toContain('twitter');
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('flag ON skill appears normally', () => {
|
|
125
|
+
const catalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
|
|
126
|
+
const config = makeConfig({
|
|
127
|
+
assistantFeatureFlagValues: {
|
|
128
|
+
[DECLARED_FLAG_KEY]: true,
|
|
129
|
+
'feature_flags.twitter.enabled': true,
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const resolved = resolveSkillStates(catalog, config);
|
|
134
|
+
const ids = resolved.map((r) => r.summary.id);
|
|
135
|
+
|
|
136
|
+
expect(ids).toContain(DECLARED_SKILL_ID);
|
|
137
|
+
expect(ids).toContain('twitter');
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test('missing flag key defaults to enabled', () => {
|
|
141
|
+
const catalog = [makeSkill(DECLARED_SKILL_ID)];
|
|
142
|
+
const config = makeConfig();
|
|
143
|
+
|
|
144
|
+
const resolved = resolveSkillStates(catalog, config);
|
|
145
|
+
expect(resolved.length).toBe(1);
|
|
146
|
+
expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
test('feature flag OFF takes precedence over user-enabled config entry', () => {
|
|
150
|
+
const catalog = [makeSkill(DECLARED_SKILL_ID)];
|
|
151
|
+
const config = makeConfig({
|
|
152
|
+
assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
|
|
153
|
+
skills: {
|
|
154
|
+
entries: { [DECLARED_SKILL_ID]: { enabled: true } },
|
|
155
|
+
load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
|
|
156
|
+
install: { nodeManager: 'npm' },
|
|
157
|
+
allowBundled: null,
|
|
158
|
+
remoteProviders: { skillssh: { enabled: true }, clawhub: { enabled: true } },
|
|
159
|
+
remotePolicy: { blockSuspicious: true, blockMalware: true, maxSkillsShRisk: 'medium' },
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
const resolved = resolveSkillStates(catalog, config);
|
|
164
|
+
// The skill should not appear at all — feature flag is a higher-priority gate
|
|
165
|
+
expect(resolved.length).toBe(0);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test('multiple skills with mixed flags — persisted overrides respected', () => {
|
|
169
|
+
const catalog = [
|
|
170
|
+
makeSkill(DECLARED_SKILL_ID),
|
|
171
|
+
makeSkill('twitter'),
|
|
172
|
+
makeSkill('deploy'),
|
|
173
|
+
];
|
|
174
|
+
const config = makeConfig({
|
|
175
|
+
assistantFeatureFlagValues: {
|
|
176
|
+
[DECLARED_FLAG_KEY]: false,
|
|
177
|
+
'feature_flags.deploy.enabled': false,
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
const resolved = resolveSkillStates(catalog, config);
|
|
182
|
+
const ids = resolved.map((r) => r.summary.id);
|
|
183
|
+
|
|
184
|
+
// Both declared (hatch-new-assistant) and undeclared (deploy) skills with
|
|
185
|
+
// persisted false overrides are filtered out; only twitter remains.
|
|
186
|
+
expect(ids).toEqual(['twitter']);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests that skill_load rejects loading a skill whose feature flag is OFF
|
|
3
|
+
* with a deterministic error message.
|
|
4
|
+
*/
|
|
5
|
+
import { existsSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
|
|
6
|
+
import { tmpdir } from 'node:os';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
|
|
10
|
+
|
|
11
|
+
const TEST_DIR = join(tmpdir(), `vellum-skill-load-flag-test-${crypto.randomUUID()}`);
|
|
12
|
+
|
|
13
|
+
let currentConfig: Record<string, unknown> = {
|
|
14
|
+
featureFlags: {},
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const DECLARED_SKILL_ID = 'hatch-new-assistant';
|
|
18
|
+
const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
|
|
19
|
+
|
|
20
|
+
const platformOverrides: Record<string, (...args: unknown[]) => unknown> = {
|
|
21
|
+
getRootDir: () => TEST_DIR,
|
|
22
|
+
getDataDir: () => TEST_DIR,
|
|
23
|
+
ensureDataDir: () => {},
|
|
24
|
+
getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
|
|
25
|
+
getPidPath: () => join(TEST_DIR, 'vellum.pid'),
|
|
26
|
+
getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
|
|
27
|
+
getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
|
|
28
|
+
getWorkspaceDir: () => join(TEST_DIR, 'workspace'),
|
|
29
|
+
getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
|
|
30
|
+
getWorkspaceConfigPath: () => join(TEST_DIR, 'workspace', 'config.json'),
|
|
31
|
+
getWorkspaceHooksDir: () => join(TEST_DIR, 'workspace', 'hooks'),
|
|
32
|
+
getWorkspacePromptPath: (f: unknown) => join(TEST_DIR, 'workspace', String(f)),
|
|
33
|
+
getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
|
|
34
|
+
getHooksDir: () => join(TEST_DIR, 'hooks'),
|
|
35
|
+
getIpcBlobDir: () => join(TEST_DIR, 'blobs'),
|
|
36
|
+
getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
|
|
37
|
+
getSandboxWorkingDir: () => join(TEST_DIR, 'sandbox', 'work'),
|
|
38
|
+
getHistoryPath: () => join(TEST_DIR, 'history'),
|
|
39
|
+
getSessionTokenPath: () => join(TEST_DIR, 'session-token'),
|
|
40
|
+
readSessionToken: () => null,
|
|
41
|
+
getClipboardCommand: () => null,
|
|
42
|
+
isMacOS: () => process.platform === 'darwin',
|
|
43
|
+
isLinux: () => process.platform === 'linux',
|
|
44
|
+
isWindows: () => process.platform === 'win32',
|
|
45
|
+
getPlatformName: () => process.platform,
|
|
46
|
+
migratePath: () => {},
|
|
47
|
+
migrateToWorkspaceLayout: () => {},
|
|
48
|
+
migrateToDataLayout: () => {},
|
|
49
|
+
removeSocketFile: () => {},
|
|
50
|
+
};
|
|
51
|
+
mock.module('../util/platform.js', () => platformOverrides);
|
|
52
|
+
|
|
53
|
+
mock.module('../util/logger.js', () => ({
|
|
54
|
+
getLogger: () => new Proxy({} as Record<string, unknown>, {
|
|
55
|
+
get: () => () => {},
|
|
56
|
+
}),
|
|
57
|
+
}));
|
|
58
|
+
|
|
59
|
+
mock.module('../config/loader.js', () => ({
|
|
60
|
+
getConfig: () => currentConfig,
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
await import('../tools/skills/load.js');
|
|
64
|
+
const { getTool } = await import('../tools/registry.js');
|
|
65
|
+
|
|
66
|
+
function writeSkill(skillId: string, name: string, description: string, body: string): void {
|
|
67
|
+
const skillDir = join(TEST_DIR, 'skills', skillId);
|
|
68
|
+
mkdirSync(skillDir, { recursive: true });
|
|
69
|
+
writeFileSync(
|
|
70
|
+
join(skillDir, 'SKILL.md'),
|
|
71
|
+
`---\nname: "${name}"\ndescription: "${description}"\n---\n\n${body}\n`,
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async function executeSkillLoad(input: Record<string, unknown>): Promise<{ content: string; isError: boolean }> {
|
|
76
|
+
const tool = getTool('skill_load');
|
|
77
|
+
if (!tool) throw new Error('skill_load tool was not registered');
|
|
78
|
+
|
|
79
|
+
const result = await tool.execute(input, {
|
|
80
|
+
workingDir: '/tmp',
|
|
81
|
+
sessionId: 'session-1',
|
|
82
|
+
conversationId: 'conversation-1',
|
|
83
|
+
});
|
|
84
|
+
return { content: result.content, isError: result.isError };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
describe('skill_load feature flag enforcement', () => {
|
|
88
|
+
beforeEach(() => {
|
|
89
|
+
mkdirSync(join(TEST_DIR, 'skills'), { recursive: true });
|
|
90
|
+
currentConfig = { featureFlags: {} };
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
afterEach(() => {
|
|
94
|
+
if (existsSync(TEST_DIR)) {
|
|
95
|
+
rmSync(TEST_DIR, { recursive: true, force: true });
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test('returns deterministic error for flag OFF skill', async () => {
|
|
100
|
+
writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
|
|
101
|
+
writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
|
|
102
|
+
|
|
103
|
+
currentConfig = {
|
|
104
|
+
featureFlags: { [DECLARED_LEGACY_KEY]: false },
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
|
|
108
|
+
|
|
109
|
+
expect(result.isError).toBe(true);
|
|
110
|
+
expect(result.content).toContain('disabled by feature flag');
|
|
111
|
+
expect(result.content).toContain(DECLARED_SKILL_ID);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test('loads skill normally when flag is ON', async () => {
|
|
115
|
+
writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
|
|
116
|
+
writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
|
|
117
|
+
|
|
118
|
+
currentConfig = {
|
|
119
|
+
featureFlags: { [DECLARED_LEGACY_KEY]: true },
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
|
|
123
|
+
|
|
124
|
+
expect(result.isError).toBe(false);
|
|
125
|
+
expect(result.content).toContain('Skill: Hatch New Assistant');
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test('loads skill normally when flag key is absent (defaults to enabled)', async () => {
|
|
129
|
+
writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
|
|
130
|
+
writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
|
|
131
|
+
|
|
132
|
+
currentConfig = {
|
|
133
|
+
featureFlags: {},
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
|
|
137
|
+
|
|
138
|
+
expect(result.isError).toBe(false);
|
|
139
|
+
expect(result.content).toContain('Skill: Hatch New Assistant');
|
|
140
|
+
});
|
|
141
|
+
});
|
|
@@ -36,6 +36,7 @@ const TOPLEVEL_CATALOG = join(TOPLEVEL_SKILLS_DIR, 'catalog.json');
|
|
|
36
36
|
// ---------------------------------------------------------------------------
|
|
37
37
|
|
|
38
38
|
const TOPLEVEL_ONLY_SKILLS = new Set([
|
|
39
|
+
'doordash',
|
|
39
40
|
'google-oauth-setup',
|
|
40
41
|
'notion',
|
|
41
42
|
'notion-oauth-setup',
|