@vellumai/assistant 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +109 -0
- package/docs/skills.md +100 -0
- package/package.json +1 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
- package/src/__tests__/conversation-agent-loop.test.ts +7 -0
- package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
- package/src/__tests__/conversation-wipe.test.ts +226 -0
- package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
- package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
- package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
- package/src/__tests__/inline-command-runner.test.ts +311 -0
- package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
- package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
- package/src/__tests__/list-messages-attachments.test.ts +96 -0
- package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
- package/src/__tests__/memory-brief-time.test.ts +285 -0
- package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
- package/src/__tests__/memory-chunk-archive.test.ts +400 -0
- package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
- package/src/__tests__/memory-episode-archive.test.ts +370 -0
- package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
- package/src/__tests__/memory-observation-archive.test.ts +375 -0
- package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
- package/src/__tests__/memory-recall-quality.test.ts +2 -2
- package/src/__tests__/memory-reducer-store.test.ts +728 -0
- package/src/__tests__/memory-reducer-types.test.ts +699 -0
- package/src/__tests__/memory-reducer.test.ts +698 -0
- package/src/__tests__/memory-regressions.test.ts +6 -4
- package/src/__tests__/memory-simplified-config.test.ts +281 -0
- package/src/__tests__/parse-identity-fields.test.ts +129 -0
- package/src/__tests__/skill-load-inline-command.test.ts +598 -0
- package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
- package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
- package/src/__tests__/skills-transitive-hash.test.ts +333 -0
- package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
- package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
- package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
- package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
- package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
- package/src/config/feature-flag-registry.json +16 -0
- package/src/config/loader.ts +1 -0
- package/src/config/raw-config-utils.ts +28 -0
- package/src/config/schema.ts +12 -0
- package/src/config/schemas/memory-simplified.ts +101 -0
- package/src/config/schemas/memory.ts +4 -0
- package/src/config/skills.ts +50 -4
- package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
- package/src/daemon/conversation-agent-loop.ts +71 -1
- package/src/daemon/conversation-lifecycle.ts +11 -1
- package/src/daemon/conversation-runtime-assembly.ts +2 -1
- package/src/daemon/conversation-surfaces.ts +31 -8
- package/src/daemon/conversation.ts +40 -23
- package/src/daemon/handlers/config-embeddings.ts +10 -2
- package/src/daemon/handlers/config-model.ts +0 -9
- package/src/daemon/handlers/identity.ts +12 -1
- package/src/daemon/lifecycle.ts +9 -1
- package/src/daemon/message-types/conversations.ts +0 -1
- package/src/daemon/server.ts +1 -1
- package/src/followups/followup-store.ts +47 -1
- package/src/memory/archive-store.ts +400 -0
- package/src/memory/brief-formatting.ts +33 -0
- package/src/memory/brief-open-loops.ts +266 -0
- package/src/memory/brief-time.ts +161 -0
- package/src/memory/brief.ts +75 -0
- package/src/memory/conversation-crud.ts +245 -101
- package/src/memory/db-init.ts +12 -0
- package/src/memory/indexer.ts +106 -15
- package/src/memory/job-handlers/embedding.test.ts +1 -0
- package/src/memory/job-handlers/embedding.ts +83 -0
- package/src/memory/job-utils.ts +1 -1
- package/src/memory/jobs-store.ts +6 -0
- package/src/memory/jobs-worker.ts +12 -0
- package/src/memory/migrations/185-memory-brief-state.ts +52 -0
- package/src/memory/migrations/186-memory-archive.ts +109 -0
- package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
- package/src/memory/migrations/index.ts +3 -0
- package/src/memory/qdrant-client.ts +23 -4
- package/src/memory/reducer-store.ts +271 -0
- package/src/memory/reducer-types.ts +99 -0
- package/src/memory/reducer.ts +453 -0
- package/src/memory/schema/conversations.ts +3 -0
- package/src/memory/schema/index.ts +2 -0
- package/src/memory/schema/memory-archive.ts +121 -0
- package/src/memory/schema/memory-brief.ts +55 -0
- package/src/memory/search/semantic.ts +17 -4
- package/src/oauth/oauth-store.ts +3 -1
- package/src/permissions/checker.ts +89 -6
- package/src/permissions/defaults.ts +14 -0
- package/src/runtime/routes/conversation-management-routes.ts +6 -0
- package/src/runtime/routes/conversation-query-routes.ts +7 -0
- package/src/runtime/routes/conversation-routes.ts +52 -5
- package/src/runtime/routes/identity-routes.ts +2 -35
- package/src/runtime/routes/llm-context-normalization.ts +14 -1
- package/src/runtime/routes/memory-item-routes.ts +90 -5
- package/src/runtime/routes/secret-routes.ts +2 -0
- package/src/runtime/routes/surface-action-routes.ts +68 -1
- package/src/schedule/schedule-store.ts +21 -0
- package/src/skills/inline-command-expansions.ts +204 -0
- package/src/skills/inline-command-render.ts +127 -0
- package/src/skills/inline-command-runner.ts +242 -0
- package/src/skills/transitive-version-hash.ts +88 -0
- package/src/tasks/task-store.ts +43 -1
- package/src/tools/permission-checker.ts +8 -1
- package/src/tools/skills/load.ts +140 -6
- package/src/util/platform.ts +18 -0
- package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
- package/src/workspace/migrations/registry.ts +1 -1
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests that vellum-self-knowledge uses inline command expansion to inject
|
|
3
|
+
* the current assistant info at skill_load time.
|
|
4
|
+
*
|
|
5
|
+
* Validates that:
|
|
6
|
+
* - The `!\`bun run .../self-info.ts\`` token in SKILL.md is replaced by an
|
|
7
|
+
* `<inline_skill_command>` block containing the runner's output.
|
|
8
|
+
* - The rest of the skill body (architecture, config, references, critical rule)
|
|
9
|
+
* remains unchanged.
|
|
10
|
+
* - The inline command token does NOT appear verbatim in the loaded output
|
|
11
|
+
* (i.e. the model is never told to shell out manually).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
copyFileSync,
|
|
16
|
+
existsSync,
|
|
17
|
+
mkdirSync,
|
|
18
|
+
mkdtempSync,
|
|
19
|
+
rmSync,
|
|
20
|
+
} from "node:fs";
|
|
21
|
+
import { tmpdir } from "node:os";
|
|
22
|
+
import { join } from "node:path";
|
|
23
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
24
|
+
|
|
25
|
+
// ── Paths ──────────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
const TEST_DIR = mkdtempSync(
|
|
28
|
+
join(tmpdir(), "vellum-self-knowledge-inline-test-"),
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
/** Resolve the real skill directory so we can copy SKILL.md into the test. */
|
|
32
|
+
const SKILL_SRC_DIR = join(
|
|
33
|
+
import.meta.dirname ?? __dirname,
|
|
34
|
+
"..",
|
|
35
|
+
"..",
|
|
36
|
+
"..",
|
|
37
|
+
"skills",
|
|
38
|
+
"vellum-self-knowledge",
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// ── Mocks (must be declared before any imports from the project) ──────────
|
|
42
|
+
|
|
43
|
+
const platformOverrides: Record<string, (...args: unknown[]) => unknown> = {
|
|
44
|
+
getRootDir: () => TEST_DIR,
|
|
45
|
+
getDataDir: () => join(TEST_DIR, "data"),
|
|
46
|
+
ensureDataDir: () => {},
|
|
47
|
+
getPidPath: () => join(TEST_DIR, "vellum.pid"),
|
|
48
|
+
getDbPath: () => join(TEST_DIR, "data", "assistant.db"),
|
|
49
|
+
getLogPath: () => join(TEST_DIR, "logs", "vellum.log"),
|
|
50
|
+
getWorkspaceDir: () => join(TEST_DIR, "workspace"),
|
|
51
|
+
getWorkspaceSkillsDir: () => join(TEST_DIR, "skills"),
|
|
52
|
+
getWorkspaceConfigPath: () => join(TEST_DIR, "workspace", "config.json"),
|
|
53
|
+
getWorkspaceHooksDir: () => join(TEST_DIR, "workspace", "hooks"),
|
|
54
|
+
getWorkspacePromptPath: (f: unknown) =>
|
|
55
|
+
join(TEST_DIR, "workspace", String(f)),
|
|
56
|
+
getInterfacesDir: () => join(TEST_DIR, "interfaces"),
|
|
57
|
+
getHooksDir: () => join(TEST_DIR, "hooks"),
|
|
58
|
+
getSandboxRootDir: () => join(TEST_DIR, "sandbox"),
|
|
59
|
+
getSandboxWorkingDir: () => join(TEST_DIR, "sandbox", "work"),
|
|
60
|
+
getHistoryPath: () => join(TEST_DIR, "history"),
|
|
61
|
+
getSessionTokenPath: () => join(TEST_DIR, "session-token"),
|
|
62
|
+
readSessionToken: () => null,
|
|
63
|
+
getClipboardCommand: () => null,
|
|
64
|
+
readLockfile: () => null,
|
|
65
|
+
normalizeAssistantId: (id: unknown) => String(id),
|
|
66
|
+
writeLockfile: () => {},
|
|
67
|
+
getEmbeddingModelsDir: () => join(TEST_DIR, "embedding-models"),
|
|
68
|
+
getTCPPort: () => 8765,
|
|
69
|
+
isTCPEnabled: () => false,
|
|
70
|
+
getTCPHost: () => "127.0.0.1",
|
|
71
|
+
isIOSPairingEnabled: () => false,
|
|
72
|
+
getPlatformTokenPath: () => join(TEST_DIR, "platform-token"),
|
|
73
|
+
readPlatformToken: () => null,
|
|
74
|
+
isMacOS: () => process.platform === "darwin",
|
|
75
|
+
isLinux: () => process.platform === "linux",
|
|
76
|
+
isWindows: () => process.platform === "win32",
|
|
77
|
+
getPlatformName: () => process.platform,
|
|
78
|
+
getWorkspaceDirDisplay: () => "~/.vellum/workspace",
|
|
79
|
+
getConversationsDir: () => join(TEST_DIR, "conversations"),
|
|
80
|
+
};
|
|
81
|
+
mock.module("../util/platform.js", () => platformOverrides);
|
|
82
|
+
|
|
83
|
+
mock.module("../util/logger.js", () => ({
|
|
84
|
+
getLogger: () =>
|
|
85
|
+
new Proxy({} as Record<string, unknown>, {
|
|
86
|
+
get: () => () => {},
|
|
87
|
+
}),
|
|
88
|
+
truncateForLog: (s: unknown) => String(s),
|
|
89
|
+
}));
|
|
90
|
+
|
|
91
|
+
// Track inline command runner calls
|
|
92
|
+
interface RunInlineCommandCall {
|
|
93
|
+
command: string;
|
|
94
|
+
workingDir: string;
|
|
95
|
+
}
|
|
96
|
+
const runInlineCommandCalls: RunInlineCommandCall[] = [];
|
|
97
|
+
|
|
98
|
+
/** Return type matching InlineCommandResult from the runner module. */
|
|
99
|
+
interface MockInlineCommandResult {
|
|
100
|
+
output: string;
|
|
101
|
+
ok: boolean;
|
|
102
|
+
failureReason?:
|
|
103
|
+
| "timeout"
|
|
104
|
+
| "non_zero_exit"
|
|
105
|
+
| "binary_output"
|
|
106
|
+
| "spawn_failure";
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
type MockRunFn = (
|
|
110
|
+
command: string,
|
|
111
|
+
workingDir: string,
|
|
112
|
+
) => Promise<MockInlineCommandResult>;
|
|
113
|
+
|
|
114
|
+
// Default: commands succeed, returning a realistic self-info summary
|
|
115
|
+
const MOCK_SELF_INFO_OUTPUT =
|
|
116
|
+
"You are running as Claude Opus 4.6 via Anthropic (your-own API key).";
|
|
117
|
+
|
|
118
|
+
let mockRunInlineCommand = mock<MockRunFn>(
|
|
119
|
+
(command: string, workingDir: string) => {
|
|
120
|
+
runInlineCommandCalls.push({ command, workingDir });
|
|
121
|
+
return Promise.resolve({
|
|
122
|
+
output: MOCK_SELF_INFO_OUTPUT,
|
|
123
|
+
ok: true,
|
|
124
|
+
});
|
|
125
|
+
},
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
mock.module("../skills/inline-command-runner.js", () => ({
|
|
129
|
+
runInlineCommand: (command: string, workingDir: string, _options?: unknown) =>
|
|
130
|
+
mockRunInlineCommand(command, workingDir),
|
|
131
|
+
}));
|
|
132
|
+
|
|
133
|
+
// Mock autoInstallFromCatalog
|
|
134
|
+
const mockAutoInstall = mock((_skillId: string) => Promise.resolve(false));
|
|
135
|
+
mock.module("../skills/catalog-install.js", () => ({
|
|
136
|
+
autoInstallFromCatalog: (skillId: string) => mockAutoInstall(skillId),
|
|
137
|
+
resolveCatalog: (_skillId?: string) => Promise.resolve([]),
|
|
138
|
+
}));
|
|
139
|
+
|
|
140
|
+
interface TestConfig {
|
|
141
|
+
permissions: { mode: "strict" | "workspace" };
|
|
142
|
+
skills: { load: { extraDirs: string[] } };
|
|
143
|
+
sandbox: { enabled: boolean };
|
|
144
|
+
assistantFeatureFlagValues?: Record<string, boolean>;
|
|
145
|
+
[key: string]: unknown;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const testConfig: TestConfig = {
|
|
149
|
+
permissions: { mode: "workspace" },
|
|
150
|
+
skills: { load: { extraDirs: [] } },
|
|
151
|
+
sandbox: { enabled: true },
|
|
152
|
+
assistantFeatureFlagValues: {
|
|
153
|
+
"feature_flags.inline-skill-commands.enabled": true,
|
|
154
|
+
},
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
mock.module("../config/loader.js", () => ({
|
|
158
|
+
getConfig: () => testConfig,
|
|
159
|
+
loadConfig: () => testConfig,
|
|
160
|
+
invalidateConfigCache: () => {},
|
|
161
|
+
saveConfig: () => {},
|
|
162
|
+
loadRawConfig: () => ({}),
|
|
163
|
+
saveRawConfig: () => {},
|
|
164
|
+
getNestedValue: () => undefined,
|
|
165
|
+
setNestedValue: () => {},
|
|
166
|
+
}));
|
|
167
|
+
|
|
168
|
+
// ── Imports (after mocks) ────────────────────────────────────────────────
|
|
169
|
+
|
|
170
|
+
await import("../tools/skills/load.js");
|
|
171
|
+
const { getTool } = await import("../tools/registry.js");
|
|
172
|
+
|
|
173
|
+
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
174
|
+
|
|
175
|
+
/** Copy the real vellum-self-knowledge SKILL.md into the test skills dir. */
|
|
176
|
+
function installSelfKnowledgeSkill(): void {
|
|
177
|
+
const destDir = join(TEST_DIR, "skills", "vellum-self-knowledge");
|
|
178
|
+
mkdirSync(destDir, { recursive: true });
|
|
179
|
+
copyFileSync(join(SKILL_SRC_DIR, "SKILL.md"), join(destDir, "SKILL.md"));
|
|
180
|
+
// Also copy references/ so that the reference listing still works
|
|
181
|
+
const refsSrc = join(SKILL_SRC_DIR, "references");
|
|
182
|
+
if (existsSync(refsSrc)) {
|
|
183
|
+
const refsDir = join(destDir, "references");
|
|
184
|
+
mkdirSync(refsDir, { recursive: true });
|
|
185
|
+
copyFileSync(join(refsSrc, "inference.md"), join(refsDir, "inference.md"));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
async function executeSkillLoad(
|
|
190
|
+
input: Record<string, unknown>,
|
|
191
|
+
workingDir = "/tmp",
|
|
192
|
+
): Promise<{ content: string; isError: boolean }> {
|
|
193
|
+
const tool = getTool("skill_load");
|
|
194
|
+
if (!tool) throw new Error("skill_load tool was not registered");
|
|
195
|
+
|
|
196
|
+
const result = await tool.execute(input, {
|
|
197
|
+
workingDir,
|
|
198
|
+
conversationId: "conversation-1",
|
|
199
|
+
trustClass: "guardian",
|
|
200
|
+
});
|
|
201
|
+
return { content: result.content, isError: result.isError };
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ── Tests ────────────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
describe("vellum-self-knowledge inline command expansion", () => {
|
|
207
|
+
beforeEach(() => {
|
|
208
|
+
mkdirSync(join(TEST_DIR, "skills"), { recursive: true });
|
|
209
|
+
runInlineCommandCalls.length = 0;
|
|
210
|
+
mockAutoInstall.mockReset();
|
|
211
|
+
mockAutoInstall.mockImplementation(() => Promise.resolve(false));
|
|
212
|
+
|
|
213
|
+
// Reset to default: commands succeed with self-info output
|
|
214
|
+
mockRunInlineCommand = mock<MockRunFn>(
|
|
215
|
+
(command: string, workingDir: string) => {
|
|
216
|
+
runInlineCommandCalls.push({ command, workingDir });
|
|
217
|
+
return Promise.resolve({
|
|
218
|
+
output: MOCK_SELF_INFO_OUTPUT,
|
|
219
|
+
ok: true,
|
|
220
|
+
});
|
|
221
|
+
},
|
|
222
|
+
);
|
|
223
|
+
mock.module("../skills/inline-command-runner.js", () => ({
|
|
224
|
+
runInlineCommand: (
|
|
225
|
+
command: string,
|
|
226
|
+
workingDir: string,
|
|
227
|
+
_options?: unknown,
|
|
228
|
+
) => mockRunInlineCommand(command, workingDir),
|
|
229
|
+
}));
|
|
230
|
+
|
|
231
|
+
// Enable the feature flag
|
|
232
|
+
testConfig.assistantFeatureFlagValues = {
|
|
233
|
+
"feature_flags.inline-skill-commands.enabled": true,
|
|
234
|
+
};
|
|
235
|
+
testConfig.skills = { load: { extraDirs: [] } };
|
|
236
|
+
|
|
237
|
+
installSelfKnowledgeSkill();
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
afterEach(() => {
|
|
241
|
+
if (existsSync(TEST_DIR)) {
|
|
242
|
+
rmSync(TEST_DIR, { recursive: true, force: true });
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
// ── Inline token replacement ─────────────────────────────────────────
|
|
247
|
+
|
|
248
|
+
test("inline token is replaced by an <inline_skill_command> block", async () => {
|
|
249
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
250
|
+
expect(result.isError).toBe(false);
|
|
251
|
+
expect(result.content).toContain(
|
|
252
|
+
`<inline_skill_command index="0">${MOCK_SELF_INFO_OUTPUT}</inline_skill_command>`,
|
|
253
|
+
);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
test("the raw inline token does not appear in the loaded output", async () => {
|
|
257
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
258
|
+
expect(result.isError).toBe(false);
|
|
259
|
+
// The original `!\`...\`` token must be fully replaced
|
|
260
|
+
expect(result.content).not.toContain("!`bun run");
|
|
261
|
+
expect(result.content).not.toContain("scripts/self-info.ts`");
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
test("the model is not told to shell out manually", async () => {
|
|
265
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
266
|
+
expect(result.isError).toBe(false);
|
|
267
|
+
// The old instruction "Always run this script" should be gone
|
|
268
|
+
expect(result.content).not.toContain("Always run this script");
|
|
269
|
+
// No code block instructing manual execution
|
|
270
|
+
expect(result.content).not.toContain("```bash\nbun run");
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
// ── Runner invocation ────────────────────────────────────────────────
|
|
274
|
+
|
|
275
|
+
test("invokes the inline command runner with the self-info script command", async () => {
|
|
276
|
+
await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
277
|
+
expect(runInlineCommandCalls).toHaveLength(1);
|
|
278
|
+
expect(runInlineCommandCalls[0].command).toContain("bun run");
|
|
279
|
+
expect(runInlineCommandCalls[0].command).toContain("scripts/self-info.ts");
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// ── Rest of skill body preserved ─────────────────────────────────────
|
|
283
|
+
|
|
284
|
+
test("architecture section is preserved", async () => {
|
|
285
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
286
|
+
expect(result.isError).toBe(false);
|
|
287
|
+
expect(result.content).toContain("## Architecture at a Glance");
|
|
288
|
+
expect(result.content).toContain("AgentLoop");
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
test("configuration section is preserved", async () => {
|
|
292
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
293
|
+
expect(result.isError).toBe(false);
|
|
294
|
+
expect(result.content).toContain("## Configuration System");
|
|
295
|
+
expect(result.content).toContain("assistant config get");
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
test("references section is preserved", async () => {
|
|
299
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
300
|
+
expect(result.isError).toBe(false);
|
|
301
|
+
expect(result.content).toContain("## When to Consult References");
|
|
302
|
+
expect(result.content).toContain("references/inference.md");
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
test("critical rule section is preserved", async () => {
|
|
306
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
307
|
+
expect(result.isError).toBe(false);
|
|
308
|
+
expect(result.content).toContain("## Critical Rule");
|
|
309
|
+
expect(result.content).toContain(
|
|
310
|
+
"populated at skill-load time and reflects the live configuration",
|
|
311
|
+
);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
test("what is vellum section is preserved", async () => {
|
|
315
|
+
const result = await executeSkillLoad({ skill: "vellum-self-knowledge" });
|
|
316
|
+
expect(result.isError).toBe(false);
|
|
317
|
+
expect(result.content).toContain("## What is Vellum");
|
|
318
|
+
expect(result.content).toContain("personal AI assistant platform");
|
|
319
|
+
});
|
|
320
|
+
});
|
|
@@ -41,7 +41,7 @@ mock.module("../runtime/auth/external-assistant-id.js", () => ({
|
|
|
41
41
|
}));
|
|
42
42
|
|
|
43
43
|
// Import after mocking
|
|
44
|
-
import { backfillInstallationIdMigration } from "../workspace/migrations/
|
|
44
|
+
import { backfillInstallationIdMigration } from "../workspace/migrations/011-backfill-installation-id.js";
|
|
45
45
|
|
|
46
46
|
// ---------------------------------------------------------------------------
|
|
47
47
|
// Helpers
|
|
@@ -68,7 +68,7 @@ function setupFs(fileContents: Record<string, string>): void {
|
|
|
68
68
|
// Tests
|
|
69
69
|
// ---------------------------------------------------------------------------
|
|
70
70
|
|
|
71
|
-
describe("
|
|
71
|
+
describe("011-backfill-installation-id migration", () => {
|
|
72
72
|
beforeEach(() => {
|
|
73
73
|
existsSyncFn.mockClear();
|
|
74
74
|
readFileSyncFn.mockClear();
|
|
@@ -320,9 +320,9 @@ describe("002-backfill-installation-id migration", () => {
|
|
|
320
320
|
expect(parsed.assistants[1].installationId).toBe("sqlite-id");
|
|
321
321
|
});
|
|
322
322
|
|
|
323
|
-
test("has migration id
|
|
323
|
+
test("has migration id 011-backfill-installation-id", () => {
|
|
324
324
|
expect(backfillInstallationIdMigration.id).toBe(
|
|
325
|
-
"
|
|
325
|
+
"011-backfill-installation-id",
|
|
326
326
|
);
|
|
327
327
|
});
|
|
328
328
|
});
|
|
@@ -20,10 +20,10 @@ You are an expert app builder and visual designer. When the user asks you to cre
|
|
|
20
20
|
|
|
21
21
|
## Filesystem Layout
|
|
22
22
|
|
|
23
|
-
Apps live under
|
|
23
|
+
Apps live under `{workspaceDir}/data/apps/`. Each app has a slug-based layout:
|
|
24
24
|
|
|
25
25
|
```
|
|
26
|
-
|
|
26
|
+
{workspaceDir}/data/apps/
|
|
27
27
|
<slug>.json # App metadata
|
|
28
28
|
<slug>/ # App directory (contains all app files)
|
|
29
29
|
index.html # Main page (entry point rendered in WebView)
|
|
@@ -190,20 +190,20 @@ useEffect(() => {
|
|
|
190
190
|
**Example - creating a multi-file project** (assuming app slug is `project-tracker`):
|
|
191
191
|
|
|
192
192
|
```
|
|
193
|
-
file_write("
|
|
193
|
+
file_write("{workspaceDir}/data/apps/project-tracker/src/index.html", `<!DOCTYPE html>
|
|
194
194
|
<html lang="en">
|
|
195
195
|
<head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
196
196
|
<title>Project Tracker</title></head>
|
|
197
197
|
<body><div id="app"></div></body>
|
|
198
198
|
</html>`)
|
|
199
199
|
|
|
200
|
-
file_write("
|
|
200
|
+
file_write("{workspaceDir}/data/apps/project-tracker/src/main.tsx", `import { render } from 'preact';
|
|
201
201
|
import { App } from './components/App';
|
|
202
202
|
import './styles.css';
|
|
203
203
|
|
|
204
204
|
render(<App />, document.getElementById('app')!);`)
|
|
205
205
|
|
|
206
|
-
file_write("
|
|
206
|
+
file_write("{workspaceDir}/data/apps/project-tracker/src/components/App.tsx", `import { FunctionComponent } from 'preact';
|
|
207
207
|
import { useState, useEffect } from 'preact/hooks';
|
|
208
208
|
import { Header } from './Header';
|
|
209
209
|
|
|
@@ -222,7 +222,7 @@ export const App: FunctionComponent = () => {
|
|
|
222
222
|
);
|
|
223
223
|
};`)
|
|
224
224
|
|
|
225
|
-
file_write("
|
|
225
|
+
file_write("{workspaceDir}/data/apps/project-tracker/src/components/Header.tsx", `import { FunctionComponent } from 'preact';
|
|
226
226
|
|
|
227
227
|
interface HeaderProps {
|
|
228
228
|
title: string;
|
|
@@ -236,7 +236,7 @@ export const Header: FunctionComponent<HeaderProps> = ({ title, count }) => (
|
|
|
236
236
|
</header>
|
|
237
237
|
);`)
|
|
238
238
|
|
|
239
|
-
file_write("
|
|
239
|
+
file_write("{workspaceDir}/data/apps/project-tracker/src/styles.css", `.app { padding: var(--v-spacing-lg); }
|
|
240
240
|
.header { display: flex; justify-content: space-between; align-items: center; }
|
|
241
241
|
.badge { background: var(--v-accent); color: white; padding: var(--v-spacing-xs) var(--v-spacing-sm); border-radius: var(--v-radius-pill); }`)
|
|
242
242
|
|
|
@@ -532,7 +532,7 @@ The app is NOT opened in a workspace panel automatically - users open it via the
|
|
|
532
532
|
|
|
533
533
|
When the user requests changes, prefer **`file_edit`** over rewriting the entire file.
|
|
534
534
|
|
|
535
|
-
- **`file_edit`** - preferred for targeted changes (styles, bugs, features). Provide the full file path (e.g.
|
|
535
|
+
- **`file_edit`** - preferred for targeted changes (styles, bugs, features). Provide the full file path (e.g. `{workspaceDir}/data/apps/<slug>/src/components/App.tsx`).
|
|
536
536
|
- **`file_write`** - for creating new files or full rewrites.
|
|
537
537
|
- **`app_refresh`** - call ONCE after all file changes are complete to trigger compilation and surface refresh.
|
|
538
538
|
- For metadata changes (`name`, `description`, `schemaJson`, etc.), edit the `<slug>.json` file directly with `file_edit`, then call `app_refresh`.
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"tools": [
|
|
4
4
|
{
|
|
5
5
|
"name": "scaffold_managed_skill",
|
|
6
|
-
"description": "Create or update a managed skill in
|
|
6
|
+
"description": "Create or update a managed skill in {workspaceDir}/skills. The skill becomes available for skill_load immediately. Never persist a skill without explicit user consent. Before persisting, test the snippet: write to a temp file with bash and run with `bun run /tmp/vellum-eval/snippet.ts`. Iterate up to 3 attempts, then ask the user. Clean up temp files after. Do not use file_write for temp files outside the working directory. After a skill is written, the next turn may run in a recreated conversation due to file-watcher eviction - continue normally.",
|
|
7
7
|
"category": "skills",
|
|
8
8
|
"risk": "high",
|
|
9
9
|
"input_schema": {
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
},
|
|
55
55
|
{
|
|
56
56
|
"name": "delete_managed_skill",
|
|
57
|
-
"description": "Delete a managed skill from
|
|
57
|
+
"description": "Delete a managed skill from {workspaceDir}/skills and remove it from the SKILLS.md index. Never delete a skill without explicit user confirmation. After deletion, the next turn may run in a recreated conversation due to file-watcher eviction - continue normally.",
|
|
58
58
|
"category": "skills",
|
|
59
59
|
"risk": "high",
|
|
60
60
|
"input_schema": {
|
|
@@ -257,6 +257,14 @@
|
|
|
257
257
|
"description": "Show the Google OAuth service card in Models & Services settings",
|
|
258
258
|
"defaultEnabled": false
|
|
259
259
|
},
|
|
260
|
+
{
|
|
261
|
+
"id": "settings-embedding-provider",
|
|
262
|
+
"scope": "assistant",
|
|
263
|
+
"key": "feature_flags.settings-embedding-provider.enabled",
|
|
264
|
+
"label": "Embedding Provider Settings",
|
|
265
|
+
"description": "Show the Embedding service card in Models & Services settings",
|
|
266
|
+
"defaultEnabled": false
|
|
267
|
+
},
|
|
260
268
|
{
|
|
261
269
|
"id": "quick-input",
|
|
262
270
|
"scope": "macos",
|
|
@@ -272,6 +280,14 @@
|
|
|
272
280
|
"label": "Expand Completed Steps",
|
|
273
281
|
"description": "Auto-expand completed tool call step groups instead of showing them collapsed",
|
|
274
282
|
"defaultEnabled": false
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
"id": "inline-skill-commands",
|
|
286
|
+
"scope": "assistant",
|
|
287
|
+
"key": "feature_flags.inline-skill-commands.enabled",
|
|
288
|
+
"label": "Inline Skill Command Expansion",
|
|
289
|
+
"description": "Enable secure inline skill command expansion via !`command` syntax, with version-pinned approval and sandboxed execution at skill load time",
|
|
290
|
+
"defaultEnabled": true
|
|
275
291
|
}
|
|
276
292
|
]
|
|
277
293
|
}
|
package/src/config/loader.ts
CHANGED
|
@@ -58,3 +58,31 @@ export function setMemoryEmbeddingField(
|
|
|
58
58
|
memory.embeddings = embeddings;
|
|
59
59
|
raw.memory = memory;
|
|
60
60
|
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Safely delete a nested field from a raw config object's `memory.embeddings`
|
|
64
|
+
* map, allowing Zod schema defaults to take effect on the next config reload.
|
|
65
|
+
*/
|
|
66
|
+
export function deleteMemoryEmbeddingField(
|
|
67
|
+
raw: Record<string, unknown>,
|
|
68
|
+
field: string,
|
|
69
|
+
): void {
|
|
70
|
+
if (
|
|
71
|
+
raw.memory == null ||
|
|
72
|
+
typeof raw.memory !== "object" ||
|
|
73
|
+
Array.isArray(raw.memory)
|
|
74
|
+
) {
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
const memory = raw.memory as Record<string, unknown>;
|
|
78
|
+
const existing = memory.embeddings;
|
|
79
|
+
if (
|
|
80
|
+
existing == null ||
|
|
81
|
+
typeof existing !== "object" ||
|
|
82
|
+
Array.isArray(existing)
|
|
83
|
+
) {
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
const embeddings = existing as Record<string, unknown>;
|
|
87
|
+
delete embeddings[field];
|
|
88
|
+
}
|
package/src/config/schema.ts
CHANGED
|
@@ -106,6 +106,18 @@ export {
|
|
|
106
106
|
MemoryDynamicBudgetConfigSchema,
|
|
107
107
|
MemoryRetrievalConfigSchema,
|
|
108
108
|
} from "./schemas/memory-retrieval.js";
|
|
109
|
+
export type {
|
|
110
|
+
MemorySimplifiedArchiveRecallConfig,
|
|
111
|
+
MemorySimplifiedBriefConfig,
|
|
112
|
+
MemorySimplifiedConfig,
|
|
113
|
+
MemorySimplifiedReducerConfig,
|
|
114
|
+
} from "./schemas/memory-simplified.js";
|
|
115
|
+
export {
|
|
116
|
+
MemorySimplifiedArchiveRecallConfigSchema,
|
|
117
|
+
MemorySimplifiedBriefConfigSchema,
|
|
118
|
+
MemorySimplifiedConfigSchema,
|
|
119
|
+
MemorySimplifiedReducerConfigSchema,
|
|
120
|
+
} from "./schemas/memory-simplified.js";
|
|
109
121
|
export type {
|
|
110
122
|
MemoryEmbeddingsConfig,
|
|
111
123
|
MemorySegmentationConfig,
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
export const MemorySimplifiedBriefConfigSchema = z
|
|
4
|
+
.object({
|
|
5
|
+
maxTokens: z
|
|
6
|
+
.number({
|
|
7
|
+
error: "memory.simplified.brief.maxTokens must be a number",
|
|
8
|
+
})
|
|
9
|
+
.int("memory.simplified.brief.maxTokens must be an integer")
|
|
10
|
+
.positive("memory.simplified.brief.maxTokens must be a positive integer")
|
|
11
|
+
.default(4000)
|
|
12
|
+
.describe(
|
|
13
|
+
"Maximum token budget for the memory brief injected into conversation context",
|
|
14
|
+
),
|
|
15
|
+
})
|
|
16
|
+
.describe("Controls the memory brief that is injected into conversations");
|
|
17
|
+
|
|
18
|
+
export const MemorySimplifiedReducerConfigSchema = z
|
|
19
|
+
.object({
|
|
20
|
+
idleDelayMs: z
|
|
21
|
+
.number({
|
|
22
|
+
error: "memory.simplified.reducer.idleDelayMs must be a number",
|
|
23
|
+
})
|
|
24
|
+
.int("memory.simplified.reducer.idleDelayMs must be an integer")
|
|
25
|
+
.positive(
|
|
26
|
+
"memory.simplified.reducer.idleDelayMs must be a positive integer",
|
|
27
|
+
)
|
|
28
|
+
.default(30_000)
|
|
29
|
+
.describe(
|
|
30
|
+
"Milliseconds of idle time before the reducer processes new conversation turns into memory",
|
|
31
|
+
),
|
|
32
|
+
switchWaitMs: z
|
|
33
|
+
.number({
|
|
34
|
+
error: "memory.simplified.reducer.switchWaitMs must be a number",
|
|
35
|
+
})
|
|
36
|
+
.int("memory.simplified.reducer.switchWaitMs must be an integer")
|
|
37
|
+
.positive(
|
|
38
|
+
"memory.simplified.reducer.switchWaitMs must be a positive integer",
|
|
39
|
+
)
|
|
40
|
+
.default(5_000)
|
|
41
|
+
.describe(
|
|
42
|
+
"Milliseconds to wait after a conversation switch before running the reducer",
|
|
43
|
+
),
|
|
44
|
+
})
|
|
45
|
+
.describe(
|
|
46
|
+
"Controls when the memory reducer runs to process conversation turns into persistent memory",
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
export const MemorySimplifiedArchiveRecallConfigSchema = z
|
|
50
|
+
.object({
|
|
51
|
+
maxSnippets: z
|
|
52
|
+
.number({
|
|
53
|
+
error: "memory.simplified.archiveRecall.maxSnippets must be a number",
|
|
54
|
+
})
|
|
55
|
+
.int("memory.simplified.archiveRecall.maxSnippets must be an integer")
|
|
56
|
+
.positive(
|
|
57
|
+
"memory.simplified.archiveRecall.maxSnippets must be a positive integer",
|
|
58
|
+
)
|
|
59
|
+
.default(10)
|
|
60
|
+
.describe(
|
|
61
|
+
"Maximum number of archive snippets to recall when supplementing the brief with semantic search",
|
|
62
|
+
),
|
|
63
|
+
})
|
|
64
|
+
.describe(
|
|
65
|
+
"Controls how archived memory snippets are recalled via semantic search",
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
export const MemorySimplifiedConfigSchema = z
|
|
69
|
+
.object({
|
|
70
|
+
enabled: z
|
|
71
|
+
.boolean({
|
|
72
|
+
error: "memory.simplified.enabled must be a boolean",
|
|
73
|
+
})
|
|
74
|
+
.default(false)
|
|
75
|
+
.describe("Whether the simplified memory system is enabled"),
|
|
76
|
+
brief: MemorySimplifiedBriefConfigSchema.default(
|
|
77
|
+
MemorySimplifiedBriefConfigSchema.parse({}),
|
|
78
|
+
),
|
|
79
|
+
reducer: MemorySimplifiedReducerConfigSchema.default(
|
|
80
|
+
MemorySimplifiedReducerConfigSchema.parse({}),
|
|
81
|
+
),
|
|
82
|
+
archiveRecall: MemorySimplifiedArchiveRecallConfigSchema.default(
|
|
83
|
+
MemorySimplifiedArchiveRecallConfigSchema.parse({}),
|
|
84
|
+
),
|
|
85
|
+
})
|
|
86
|
+
.describe(
|
|
87
|
+
"Simplified two-layer memory system — a brief plus archive recall, replacing the legacy item/tier/staleness model",
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
export type MemorySimplifiedConfig = z.infer<
|
|
91
|
+
typeof MemorySimplifiedConfigSchema
|
|
92
|
+
>;
|
|
93
|
+
export type MemorySimplifiedBriefConfig = z.infer<
|
|
94
|
+
typeof MemorySimplifiedBriefConfigSchema
|
|
95
|
+
>;
|
|
96
|
+
export type MemorySimplifiedReducerConfig = z.infer<
|
|
97
|
+
typeof MemorySimplifiedReducerConfigSchema
|
|
98
|
+
>;
|
|
99
|
+
export type MemorySimplifiedArchiveRecallConfig = z.infer<
|
|
100
|
+
typeof MemorySimplifiedArchiveRecallConfigSchema
|
|
101
|
+
>;
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
MemorySummarizationConfigSchema,
|
|
11
11
|
} from "./memory-processing.js";
|
|
12
12
|
import { MemoryRetrievalConfigSchema } from "./memory-retrieval.js";
|
|
13
|
+
import { MemorySimplifiedConfigSchema } from "./memory-simplified.js";
|
|
13
14
|
import {
|
|
14
15
|
MemoryEmbeddingsConfigSchema,
|
|
15
16
|
MemorySegmentationConfigSchema,
|
|
@@ -45,6 +46,9 @@ export const MemoryConfigSchema = z
|
|
|
45
46
|
summarization: MemorySummarizationConfigSchema.default(
|
|
46
47
|
MemorySummarizationConfigSchema.parse({}),
|
|
47
48
|
),
|
|
49
|
+
simplified: MemorySimplifiedConfigSchema.default(
|
|
50
|
+
MemorySimplifiedConfigSchema.parse({}),
|
|
51
|
+
),
|
|
48
52
|
})
|
|
49
53
|
.describe(
|
|
50
54
|
"Long-term memory system — stores, retrieves, and manages persistent knowledge across conversations",
|