@vellumai/assistant 0.4.57 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/assistant-feature-flags-integration.test.ts +7 -9
- package/src/__tests__/conversation-runtime-assembly.test.ts +28 -21
- package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
- package/src/__tests__/encrypted-store.test.ts +24 -12
- package/src/__tests__/file-read-tool.test.ts +40 -0
- package/src/__tests__/filesystem-tools.test.ts +4 -2
- package/src/__tests__/history-repair.test.ts +71 -0
- package/src/__tests__/host-file-read-tool.test.ts +87 -0
- package/src/__tests__/identity-intro-cache.test.ts +209 -0
- package/src/__tests__/model-intents.test.ts +1 -1
- package/src/__tests__/non-member-access-request.test.ts +3 -3
- package/src/__tests__/skill-feature-flags-integration.test.ts +18 -17
- package/src/__tests__/skill-feature-flags.test.ts +13 -13
- package/src/__tests__/skill-load-feature-flag.test.ts +4 -4
- package/src/__tests__/skill-memory.test.ts +14 -12
- package/src/__tests__/system-prompt.test.ts +8 -0
- package/src/config/feature-flag-registry.json +9 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +2 -39
- package/src/daemon/conversation-runtime-assembly.ts +4 -3
- package/src/daemon/history-repair.ts +28 -8
- package/src/daemon/trace-emitter.ts +3 -2
- package/src/memory/search/staleness.ts +4 -1
- package/src/notifications/decision-engine.ts +43 -2
- package/src/notifications/emit-signal.ts +1 -0
- package/src/permissions/checker.ts +0 -20
- package/src/prompts/system-prompt.ts +2 -0
- package/src/prompts/templates/BOOTSTRAP.md +10 -4
- package/src/prompts/templates/IDENTITY.md +1 -2
- package/src/providers/anthropic/client.ts +5 -17
- package/src/runtime/access-request-helper.ts +15 -1
- package/src/runtime/guardian-vellum-migration.ts +1 -3
- package/src/runtime/routes/btw-routes.ts +84 -0
- package/src/runtime/routes/identity-intro-cache.ts +105 -0
- package/src/runtime/routes/identity-routes.ts +51 -0
- package/src/runtime/routes/settings-routes.ts +1 -1
- package/src/security/encrypted-store.ts +1 -2
- package/src/skills/skill-memory.ts +5 -3
- package/src/telemetry/usage-telemetry-reporter.test.ts +6 -1
- package/src/telemetry/usage-telemetry-reporter.ts +2 -0
- package/src/tools/filesystem/read.ts +14 -3
- package/src/tools/host-filesystem/read.ts +17 -1
- package/src/tools/shared/filesystem/format-diff.ts +4 -16
- package/src/util/pricing.ts +4 -0
|
@@ -623,7 +623,7 @@ describe("access-request-helper unit tests", () => {
|
|
|
623
623
|
expect(telegram!.status).toBe("sent");
|
|
624
624
|
});
|
|
625
625
|
|
|
626
|
-
test("notifyGuardianOfAccessRequest
|
|
626
|
+
test("notifyGuardianOfAccessRequest skips vellum fallback for same-channel-only routing (telegram)", async () => {
|
|
627
627
|
mockEmitResult = {
|
|
628
628
|
signalId: "sig-no-vellum",
|
|
629
629
|
deduplicated: false,
|
|
@@ -657,8 +657,8 @@ describe("access-request-helper unit tests", () => {
|
|
|
657
657
|
(d) => d.destinationChannel === "telegram",
|
|
658
658
|
);
|
|
659
659
|
|
|
660
|
-
|
|
661
|
-
expect(vellum
|
|
660
|
+
// Same-channel routing skips vellum delivery entirely — no fallback record
|
|
661
|
+
expect(vellum).toBeUndefined();
|
|
662
662
|
expect(telegram).toBeDefined();
|
|
663
663
|
expect(telegram!.destinationChatId).toBe("guardian-chat-456");
|
|
664
664
|
expect(telegram!.status).toBe("sent");
|
|
@@ -138,14 +138,15 @@ describe("frontmatter feature-flag integration", () => {
|
|
|
138
138
|
expect(key).toBeUndefined();
|
|
139
139
|
});
|
|
140
140
|
|
|
141
|
-
test("resolveSkillStates
|
|
141
|
+
test("resolveSkillStates includes skill with featureFlag when flag defaults to ON", () => {
|
|
142
142
|
const skill = buildSkillSummary("contacts", SKILL_MD_WITH_FLAG)!;
|
|
143
|
-
// "contacts" is in the registry with defaultEnabled:
|
|
143
|
+
// "contacts" is in the registry with defaultEnabled: true
|
|
144
144
|
const config = makeConfig();
|
|
145
145
|
|
|
146
146
|
const resolved = resolveSkillStates([skill], config);
|
|
147
|
-
// Flag defaults to
|
|
148
|
-
expect(resolved.length).toBe(
|
|
147
|
+
// Flag defaults to true → skill passes through
|
|
148
|
+
expect(resolved.length).toBe(1);
|
|
149
|
+
expect(resolved[0].summary.id).toBe("contacts");
|
|
149
150
|
});
|
|
150
151
|
|
|
151
152
|
test("resolveSkillStates includes skill with featureFlag when flag is ON", () => {
|
|
@@ -192,22 +193,22 @@ describe("frontmatter feature-flag integration", () => {
|
|
|
192
193
|
const key = skillFlagKey(skill);
|
|
193
194
|
expect(key).toBe("feature_flags.contacts.enabled");
|
|
194
195
|
|
|
195
|
-
// Step 4: Check flag state — "contacts" has defaultEnabled:
|
|
196
|
-
const
|
|
197
|
-
expect(isAssistantFeatureFlagEnabled(key!,
|
|
196
|
+
// Step 4: Check flag state — "contacts" has defaultEnabled: true in registry
|
|
197
|
+
const configDefault = makeConfig();
|
|
198
|
+
expect(isAssistantFeatureFlagEnabled(key!, configDefault)).toBe(true);
|
|
198
199
|
|
|
199
|
-
// Step 5: resolveSkillStates
|
|
200
|
-
const
|
|
201
|
-
expect(
|
|
200
|
+
// Step 5: resolveSkillStates includes it by default
|
|
201
|
+
const resolvedDefault = resolveSkillStates([skill], configDefault);
|
|
202
|
+
expect(resolvedDefault.length).toBe(1);
|
|
203
|
+
expect(resolvedDefault[0].summary.id).toBe("contacts");
|
|
202
204
|
|
|
203
|
-
// Step 6: With override
|
|
204
|
-
const
|
|
205
|
-
assistantFeatureFlagValues: { [key!]:
|
|
205
|
+
// Step 6: With override disabled, skill is filtered out
|
|
206
|
+
const configOff = makeConfig({
|
|
207
|
+
assistantFeatureFlagValues: { [key!]: false },
|
|
206
208
|
});
|
|
207
|
-
expect(isAssistantFeatureFlagEnabled(key!,
|
|
209
|
+
expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
|
|
208
210
|
|
|
209
|
-
const
|
|
210
|
-
expect(
|
|
211
|
-
expect(resolvedOn[0].summary.id).toBe("contacts");
|
|
211
|
+
const resolvedOff = resolveSkillStates([skill], configOff);
|
|
212
|
+
expect(resolvedOff.length).toBe(0);
|
|
212
213
|
});
|
|
213
214
|
});
|
|
@@ -81,14 +81,14 @@ describe("skillFlagKey", () => {
|
|
|
81
81
|
// ---------------------------------------------------------------------------
|
|
82
82
|
|
|
83
83
|
describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
|
|
84
|
-
test("returns
|
|
84
|
+
test("returns true when no flag overrides (registry default is true)", () => {
|
|
85
85
|
const config = makeConfig();
|
|
86
86
|
expect(
|
|
87
87
|
isAssistantFeatureFlagEnabled(
|
|
88
88
|
skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
|
|
89
89
|
config,
|
|
90
90
|
),
|
|
91
|
-
).toBe(
|
|
91
|
+
).toBe(true);
|
|
92
92
|
});
|
|
93
93
|
|
|
94
94
|
test("returns true when skill key is explicitly true", () => {
|
|
@@ -140,10 +140,8 @@ describe("isAssistantFeatureFlagEnabled", () => {
|
|
|
140
140
|
|
|
141
141
|
test("falls back to registry default when no override", () => {
|
|
142
142
|
const config = makeConfig();
|
|
143
|
-
// contacts defaults to
|
|
144
|
-
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
|
|
145
|
-
false,
|
|
146
|
-
);
|
|
143
|
+
// contacts defaults to true in the registry
|
|
144
|
+
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
|
|
147
145
|
});
|
|
148
146
|
|
|
149
147
|
test("respects persisted overrides for undeclared keys", () => {
|
|
@@ -207,13 +205,14 @@ describe("resolveSkillStates with feature flags", () => {
|
|
|
207
205
|
expect(ids).toContain("browser");
|
|
208
206
|
});
|
|
209
207
|
|
|
210
|
-
test("declared flag key defaults to registry value (
|
|
208
|
+
test("declared flag key defaults to registry value (true)", () => {
|
|
211
209
|
const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
|
|
212
210
|
const config = makeConfig();
|
|
213
211
|
|
|
214
212
|
const resolved = resolveSkillStates(catalog, config);
|
|
215
|
-
// contacts registry default is
|
|
216
|
-
expect(resolved.length).toBe(
|
|
213
|
+
// contacts registry default is true, so it passes through
|
|
214
|
+
expect(resolved.length).toBe(1);
|
|
215
|
+
expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
|
|
217
216
|
});
|
|
218
217
|
|
|
219
218
|
test("skill without featureFlag is never flag-gated", () => {
|
|
@@ -280,14 +279,15 @@ describe("resolveSkillStates with feature flags", () => {
|
|
|
280
279
|
// ---------------------------------------------------------------------------
|
|
281
280
|
|
|
282
281
|
describe("resolveSkillStates with frontmatter featureFlag", () => {
|
|
283
|
-
test("skill with featureFlag (defaultEnabled:
|
|
284
|
-
// contacts has defaultEnabled:
|
|
282
|
+
test("skill with featureFlag (defaultEnabled: true) is included when no config override", () => {
|
|
283
|
+
// contacts has defaultEnabled: true in the registry
|
|
285
284
|
const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
|
|
286
285
|
const config = makeConfig();
|
|
287
286
|
|
|
288
287
|
const resolved = resolveSkillStates(catalog, config);
|
|
289
|
-
// No override, registry default is
|
|
290
|
-
expect(resolved.length).toBe(
|
|
288
|
+
// No override, registry default is true → passes through
|
|
289
|
+
expect(resolved.length).toBe(1);
|
|
290
|
+
expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
|
|
291
291
|
});
|
|
292
292
|
|
|
293
293
|
test("skill with featureFlag is included when config override enables it", () => {
|
|
@@ -166,7 +166,7 @@ describe("skill_load feature flag enforcement", () => {
|
|
|
166
166
|
expect(result.content).toContain("Skill: Contacts");
|
|
167
167
|
});
|
|
168
168
|
|
|
169
|
-
test("
|
|
169
|
+
test("loads skill when flag key is absent (registry defaults to enabled)", async () => {
|
|
170
170
|
writeSkill(
|
|
171
171
|
DECLARED_SKILL_ID,
|
|
172
172
|
"Contacts",
|
|
@@ -184,8 +184,8 @@ describe("skill_load feature flag enforcement", () => {
|
|
|
184
184
|
|
|
185
185
|
const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
|
|
186
186
|
|
|
187
|
-
// contacts is declared in the registry with defaultEnabled:
|
|
188
|
-
expect(result.isError).toBe(
|
|
189
|
-
expect(result.content).toContain("
|
|
187
|
+
// contacts is declared in the registry with defaultEnabled: true
|
|
188
|
+
expect(result.isError).toBe(false);
|
|
189
|
+
expect(result.content).toContain("Skill: Contacts");
|
|
190
190
|
});
|
|
191
191
|
});
|
|
@@ -1,14 +1,7 @@
|
|
|
1
1
|
import { mkdtempSync, rmSync } from "node:fs";
|
|
2
2
|
import { tmpdir } from "node:os";
|
|
3
3
|
import { join } from "node:path";
|
|
4
|
-
import {
|
|
5
|
-
afterAll,
|
|
6
|
-
beforeEach,
|
|
7
|
-
describe,
|
|
8
|
-
expect,
|
|
9
|
-
mock,
|
|
10
|
-
test,
|
|
11
|
-
} from "bun:test";
|
|
4
|
+
import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
12
5
|
|
|
13
6
|
import { eq } from "drizzle-orm";
|
|
14
7
|
|
|
@@ -46,8 +39,9 @@ mock.module("../memory/qdrant-client.js", () => ({
|
|
|
46
39
|
}));
|
|
47
40
|
|
|
48
41
|
// Controllable mock for resolveCatalog used by seedCatalogSkillMemories
|
|
49
|
-
let mockResolveCatalog: () => Promise<
|
|
50
|
-
|
|
42
|
+
let mockResolveCatalog: () => Promise<
|
|
43
|
+
import("../skills/catalog-install.js").CatalogSkill[]
|
|
44
|
+
> = async () => [];
|
|
51
45
|
|
|
52
46
|
mock.module("../skills/catalog-install.js", () => ({
|
|
53
47
|
resolveCatalog: (..._args: unknown[]) => mockResolveCatalog(),
|
|
@@ -453,7 +447,11 @@ describe("seedCatalogSkillMemories", () => {
|
|
|
453
447
|
|
|
454
448
|
test("skips skills whose feature flag is disabled", async () => {
|
|
455
449
|
const skills: CatalogSkill[] = [
|
|
456
|
-
makeSkill({
|
|
450
|
+
makeSkill({
|
|
451
|
+
id: "unflagged-skill",
|
|
452
|
+
name: "Unflagged",
|
|
453
|
+
description: "No flag",
|
|
454
|
+
}),
|
|
457
455
|
makeSkill({
|
|
458
456
|
id: "flagged-skill",
|
|
459
457
|
name: "Flagged",
|
|
@@ -485,7 +483,11 @@ describe("seedCatalogSkillMemories", () => {
|
|
|
485
483
|
test("prunes pre-existing capability for a skill whose flag becomes disabled", async () => {
|
|
486
484
|
// First seed with both skills, all flags enabled
|
|
487
485
|
const skills: CatalogSkill[] = [
|
|
488
|
-
makeSkill({
|
|
486
|
+
makeSkill({
|
|
487
|
+
id: "unflagged-skill",
|
|
488
|
+
name: "Unflagged",
|
|
489
|
+
description: "No flag",
|
|
490
|
+
}),
|
|
489
491
|
makeSkill({
|
|
490
492
|
id: "flagged-skill",
|
|
491
493
|
name: "Flagged",
|
|
@@ -237,6 +237,14 @@ describe("buildSystemPrompt", () => {
|
|
|
237
237
|
expect(result).toContain("browser automation as last resort");
|
|
238
238
|
});
|
|
239
239
|
|
|
240
|
+
test("includes inline media attachment guidance", () => {
|
|
241
|
+
const result = buildSystemPrompt();
|
|
242
|
+
expect(result).toContain(
|
|
243
|
+
"Image and video attachments can render inline in chat.",
|
|
244
|
+
);
|
|
245
|
+
expect(result).toContain("attach it instead of only printing its path");
|
|
246
|
+
});
|
|
247
|
+
|
|
240
248
|
test("does not include removed sections", () => {
|
|
241
249
|
const result = buildSystemPrompt();
|
|
242
250
|
expect(result).not.toContain("## External Communications Identity");
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"key": "feature_flags.contacts.enabled",
|
|
24
24
|
"label": "Contacts",
|
|
25
25
|
"description": "Show the Contacts tab in Settings for viewing and managing contacts",
|
|
26
|
-
"defaultEnabled":
|
|
26
|
+
"defaultEnabled": true
|
|
27
27
|
},
|
|
28
28
|
{
|
|
29
29
|
"id": "email-channel",
|
|
@@ -256,6 +256,14 @@
|
|
|
256
256
|
"label": "Quick Input",
|
|
257
257
|
"description": "Enable the Quick Input popover on right-click of the menu bar icon",
|
|
258
258
|
"defaultEnabled": false
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
"id": "expand-completed-steps",
|
|
262
|
+
"scope": "macos",
|
|
263
|
+
"key": "expand_completed_steps",
|
|
264
|
+
"label": "Expand Completed Steps",
|
|
265
|
+
"description": "Auto-expand completed tool call step groups instead of showing them collapsed",
|
|
266
|
+
"defaultEnabled": false
|
|
259
267
|
}
|
|
260
268
|
]
|
|
261
269
|
}
|
|
@@ -167,30 +167,6 @@ export function emitLlmCallStartedIfNeeded(
|
|
|
167
167
|
);
|
|
168
168
|
}
|
|
169
169
|
|
|
170
|
-
// ── Client Payload Size Caps ─────────────────────────────────────────
|
|
171
|
-
// The client truncates tool results anyway (20 000 chars in ChatViewModel),
|
|
172
|
-
// but the full string can be megabytes (file_read, bash output). Capping
|
|
173
|
-
// here avoids sending oversized payloads which get decoded on the
|
|
174
|
-
// client's main thread.
|
|
175
|
-
|
|
176
|
-
const TOOL_RESULT_MAX_CHARS = 20_000;
|
|
177
|
-
const TOOL_RESULT_TRUNCATION_SUFFIX = "...[truncated]";
|
|
178
|
-
|
|
179
|
-
// tool_input_delta streams accumulated JSON as tools run. For non-app
|
|
180
|
-
// tools the client discards it (extractCodePreview only handles app tools),
|
|
181
|
-
// so we cap it aggressively to avoid excessive client traffic.
|
|
182
|
-
const TOOL_INPUT_DELTA_MAX_CHARS = 50_000;
|
|
183
|
-
const APP_TOOL_NAMES = new Set(["app_create", "app_update"]);
|
|
184
|
-
|
|
185
|
-
function truncateForClient(
|
|
186
|
-
value: string,
|
|
187
|
-
maxChars: number,
|
|
188
|
-
suffix: string,
|
|
189
|
-
): string {
|
|
190
|
-
if (value.length <= maxChars) return value;
|
|
191
|
-
return value.slice(0, maxChars - suffix.length) + suffix;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
170
|
// ── Friendly Tool Names ──────────────────────────────────────────────
|
|
195
171
|
|
|
196
172
|
const TOOL_FRIENDLY_NAMES: Record<string, string> = {
|
|
@@ -409,19 +385,10 @@ export function handleInputJsonDelta(
|
|
|
409
385
|
deps: EventHandlerDeps,
|
|
410
386
|
event: Extract<AgentEvent, { type: "input_json_delta" }>,
|
|
411
387
|
): void {
|
|
412
|
-
// Cap non-app tool input deltas — the client only uses this data for
|
|
413
|
-
// app_create/app_update code previews; all other tools discard it.
|
|
414
|
-
const content = APP_TOOL_NAMES.has(event.toolName)
|
|
415
|
-
? event.accumulatedJson
|
|
416
|
-
: truncateForClient(
|
|
417
|
-
event.accumulatedJson,
|
|
418
|
-
TOOL_INPUT_DELTA_MAX_CHARS,
|
|
419
|
-
TOOL_RESULT_TRUNCATION_SUFFIX,
|
|
420
|
-
);
|
|
421
388
|
deps.onEvent({
|
|
422
389
|
type: "tool_input_delta",
|
|
423
390
|
toolName: event.toolName,
|
|
424
|
-
content,
|
|
391
|
+
content: event.accumulatedJson,
|
|
425
392
|
conversationId: deps.ctx.conversationId,
|
|
426
393
|
toolUseId: event.toolUseId,
|
|
427
394
|
});
|
|
@@ -438,11 +405,7 @@ export function handleToolResult(
|
|
|
438
405
|
deps.onEvent({
|
|
439
406
|
type: "tool_result",
|
|
440
407
|
toolName: "",
|
|
441
|
-
result:
|
|
442
|
-
event.content,
|
|
443
|
-
TOOL_RESULT_MAX_CHARS,
|
|
444
|
-
TOOL_RESULT_TRUNCATION_SUFFIX,
|
|
445
|
-
),
|
|
408
|
+
result: event.content,
|
|
446
409
|
isError: event.isError,
|
|
447
410
|
diff: event.diff,
|
|
448
411
|
status: event.status,
|
|
@@ -655,7 +655,6 @@ export function injectTurnContext(
|
|
|
655
655
|
};
|
|
656
656
|
}
|
|
657
657
|
|
|
658
|
-
|
|
659
658
|
/**
|
|
660
659
|
* Build the `<inbound_actor_context>` text block used for model grounding.
|
|
661
660
|
*
|
|
@@ -737,7 +736,10 @@ export function buildInboundActorContextBlock(
|
|
|
737
736
|
}
|
|
738
737
|
// Contact metadata - only included when the sender has a contact record
|
|
739
738
|
// with non-default values.
|
|
740
|
-
if (
|
|
739
|
+
if (
|
|
740
|
+
ctx.contactNotes &&
|
|
741
|
+
sanitizeInlineContextValue(ctx.contactNotes) !== ctx.trustClass
|
|
742
|
+
) {
|
|
741
743
|
lines.push(
|
|
742
744
|
`contact_notes: ${sanitizeInlineContextValue(ctx.contactNotes)}`,
|
|
743
745
|
);
|
|
@@ -932,7 +934,6 @@ export interface InterfaceTurnContextParams {
|
|
|
932
934
|
conversationOriginInterface: InterfaceId | null;
|
|
933
935
|
}
|
|
934
936
|
|
|
935
|
-
|
|
936
937
|
/** Strip interface turn context blocks (both legacy separate and unified). */
|
|
937
938
|
export function stripInterfaceTurnContext(messages: Message[]): Message[] {
|
|
938
939
|
return stripUserTextBlocksByPrefix(messages, [
|
|
@@ -69,7 +69,10 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
// Ensure every server_tool_use has a paired web_search_tool_result
|
|
72
|
-
// in the same assistant message (handles interrupted streams)
|
|
72
|
+
// in the same assistant message (handles interrupted streams).
|
|
73
|
+
// Synthetic results are inserted IMMEDIATELY AFTER their corresponding
|
|
74
|
+
// server_tool_use block — not appended to the end — so that
|
|
75
|
+
// ensureToolPairing's split at tool_use boundaries cannot separate them.
|
|
73
76
|
const serverToolIds = new Set(
|
|
74
77
|
cleanedContent
|
|
75
78
|
.filter(
|
|
@@ -82,18 +85,35 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
82
85
|
.filter((b) => b.type === "web_search_tool_result")
|
|
83
86
|
.map((b) => (b as { tool_use_id: string }).tool_use_id),
|
|
84
87
|
);
|
|
88
|
+
const orphanedServerIds = new Set<string>();
|
|
85
89
|
for (const id of serverToolIds) {
|
|
86
90
|
if (!matchedServerIds.has(id)) {
|
|
87
|
-
|
|
88
|
-
type: "web_search_tool_result",
|
|
89
|
-
tool_use_id: id,
|
|
90
|
-
content: SYNTHETIC_WEB_SEARCH_ERROR,
|
|
91
|
-
});
|
|
92
|
-
stats.missingToolResultsInserted++;
|
|
91
|
+
orphanedServerIds.add(id);
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
94
|
|
|
96
|
-
|
|
95
|
+
let repairedContent: ContentBlock[];
|
|
96
|
+
if (orphanedServerIds.size > 0) {
|
|
97
|
+
repairedContent = [];
|
|
98
|
+
for (const block of cleanedContent) {
|
|
99
|
+
repairedContent.push(block);
|
|
100
|
+
if (
|
|
101
|
+
block.type === "server_tool_use" &&
|
|
102
|
+
orphanedServerIds.has(block.id)
|
|
103
|
+
) {
|
|
104
|
+
repairedContent.push({
|
|
105
|
+
type: "web_search_tool_result",
|
|
106
|
+
tool_use_id: block.id,
|
|
107
|
+
content: SYNTHETIC_WEB_SEARCH_ERROR,
|
|
108
|
+
});
|
|
109
|
+
stats.missingToolResultsInserted++;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
repairedContent = cleanedContent;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
result.push({ role: "assistant", content: repairedContent });
|
|
97
117
|
|
|
98
118
|
// Only track client-side tool_use IDs as pending (not server_tool_use)
|
|
99
119
|
pendingToolUseIds = new Set(
|
|
@@ -67,13 +67,14 @@ export class TraceEmitter {
|
|
|
67
67
|
attributes,
|
|
68
68
|
};
|
|
69
69
|
|
|
70
|
+
// Send to client first so synchronous DB writes don't block SSE delivery.
|
|
71
|
+
this.sendToClient(event);
|
|
72
|
+
|
|
70
73
|
try {
|
|
71
74
|
persistTraceEvent(event as TraceEvent);
|
|
72
75
|
} catch (err) {
|
|
73
76
|
log.warn({ err, eventId }, "Failed to persist trace event");
|
|
74
77
|
}
|
|
75
|
-
|
|
76
|
-
this.sendToClient(event);
|
|
77
78
|
}
|
|
78
79
|
}
|
|
79
80
|
|
|
@@ -22,7 +22,10 @@ export function computeStaleness(
|
|
|
22
22
|
now: number,
|
|
23
23
|
): { level: StalenessLevel; ratio: number } {
|
|
24
24
|
const baseLifetime = BASE_LIFETIME_MS[item.kind] ?? DEFAULT_LIFETIME_MS;
|
|
25
|
-
const reinforcement = Math.max(
|
|
25
|
+
const reinforcement = Math.max(
|
|
26
|
+
1,
|
|
27
|
+
1 + 0.3 * (item.sourceConversationCount - 1),
|
|
28
|
+
);
|
|
26
29
|
const effectiveLifetime = baseLifetime * reinforcement;
|
|
27
30
|
const age = now - item.firstSeenAt;
|
|
28
31
|
const ratio = age / effectiveLifetime;
|
|
@@ -852,17 +852,58 @@ async function classifyWithLLM(
|
|
|
852
852
|
*
|
|
853
853
|
* - `all_channels`: force selected channels to all connected channels.
|
|
854
854
|
* - `multi_channel`: ensure at least 2 channels when 2+ are connected.
|
|
855
|
-
* - `single_channel`:
|
|
855
|
+
* - `single_channel`: cap to a single channel. When explicitly set, reduces
|
|
856
|
+
* selected channels to one — preferring the source channel if present.
|
|
856
857
|
*/
|
|
857
858
|
export function enforceRoutingIntent(
|
|
858
859
|
decision: NotificationDecision,
|
|
859
860
|
routingIntent: RoutingIntent | undefined,
|
|
860
861
|
connectedChannels: NotificationChannel[],
|
|
862
|
+
sourceChannel?: string,
|
|
861
863
|
): NotificationDecision {
|
|
862
|
-
if (!routingIntent
|
|
864
|
+
if (!routingIntent) {
|
|
863
865
|
return decision;
|
|
864
866
|
}
|
|
865
867
|
|
|
868
|
+
if (routingIntent === "single_channel") {
|
|
869
|
+
if (!decision.shouldNotify) {
|
|
870
|
+
return decision;
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
// Force delivery to the source channel only. If the source channel
|
|
874
|
+
// is among the connected channels, use it regardless of what the LLM
|
|
875
|
+
// picked (even if the LLM picked exactly one wrong channel).
|
|
876
|
+
// Otherwise fall back to capping at the first selected channel.
|
|
877
|
+
const sourceIsConnected =
|
|
878
|
+
sourceChannel &&
|
|
879
|
+
connectedChannels.includes(sourceChannel as NotificationChannel);
|
|
880
|
+
const preferred = sourceIsConnected
|
|
881
|
+
? (sourceChannel as NotificationChannel)
|
|
882
|
+
: decision.selectedChannels[0];
|
|
883
|
+
|
|
884
|
+
// No change needed if the decision already matches.
|
|
885
|
+
if (
|
|
886
|
+
decision.selectedChannels.length === 1 &&
|
|
887
|
+
decision.selectedChannels[0] === preferred
|
|
888
|
+
) {
|
|
889
|
+
return decision;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
const enforced = { ...decision };
|
|
893
|
+
enforced.selectedChannels = [preferred];
|
|
894
|
+
enforced.reasoningSummary = `${decision.reasoningSummary} [routing_intent=single_channel enforced: capped to ${preferred}]`;
|
|
895
|
+
log.info(
|
|
896
|
+
{
|
|
897
|
+
routingIntent,
|
|
898
|
+
sourceChannel,
|
|
899
|
+
originalChannels: decision.selectedChannels,
|
|
900
|
+
enforcedChannel: preferred,
|
|
901
|
+
},
|
|
902
|
+
"Routing intent enforcement: single_channel → capped to one channel",
|
|
903
|
+
);
|
|
904
|
+
return enforced;
|
|
905
|
+
}
|
|
906
|
+
|
|
866
907
|
if (!decision.shouldNotify) {
|
|
867
908
|
return decision;
|
|
868
909
|
}
|
|
@@ -197,15 +197,6 @@ const LOW_RISK_GIT_SUBCOMMANDS = new Set([
|
|
|
197
197
|
"reflog",
|
|
198
198
|
]);
|
|
199
199
|
|
|
200
|
-
// Vellum/assistant CLI subcommands that are low-risk (read-only)
|
|
201
|
-
const LOW_RISK_CLI_SUBCOMMANDS = new Set([
|
|
202
|
-
"ps",
|
|
203
|
-
"doctor",
|
|
204
|
-
"audit",
|
|
205
|
-
"completions",
|
|
206
|
-
"map",
|
|
207
|
-
]);
|
|
208
|
-
|
|
209
200
|
// Commands that wrap another program — the real program appears as the first
|
|
210
201
|
// non-flag argument. When one of these is the segment program we look through
|
|
211
202
|
// its args to find the effective program (e.g. `env curl …` → curl).
|
|
@@ -671,17 +662,6 @@ async function classifyRiskUncached(
|
|
|
671
662
|
continue;
|
|
672
663
|
}
|
|
673
664
|
|
|
674
|
-
if (prog === "vellum" || prog === "assistant") {
|
|
675
|
-
const subcommand = firstPositionalArg(seg.args);
|
|
676
|
-
if (subcommand && LOW_RISK_CLI_SUBCOMMANDS.has(subcommand)) {
|
|
677
|
-
// Read-only subcommands stay at current risk
|
|
678
|
-
continue;
|
|
679
|
-
}
|
|
680
|
-
// Mutating subcommands are medium
|
|
681
|
-
maxRisk = RiskLevel.Medium;
|
|
682
|
-
continue;
|
|
683
|
-
}
|
|
684
|
-
|
|
685
665
|
if (!LOW_RISK_PROGRAMS.has(prog)) {
|
|
686
666
|
// Unknown program → medium
|
|
687
667
|
if (maxRisk === RiskLevel.Low) {
|
|
@@ -206,6 +206,8 @@ function buildAttachmentSection(): string {
|
|
|
206
206
|
"",
|
|
207
207
|
'Use `source="host"` with an absolute path for host filesystem files. Optional attributes: `filename` (display name override), `mime_type` (override auto-detection).',
|
|
208
208
|
"",
|
|
209
|
+
"Image and video attachments can render inline in chat. If the user asks to preview a media file here, attach it instead of only printing its path.",
|
|
210
|
+
"",
|
|
209
211
|
"Embed images/GIFs inline using markdown: ``.",
|
|
210
212
|
].join("\n");
|
|
211
213
|
}
|
|
@@ -37,14 +37,16 @@ Onboarding has two phases. Phase 1 is about proving value. Phase 2 is about maki
|
|
|
37
37
|
|
|
38
38
|
### Phase 1: Prove It (Priority: HIGH)
|
|
39
39
|
|
|
40
|
-
**Goal:**
|
|
40
|
+
**Goal:** Complete whatever task the user wants to do. Once they've gotten initial value, bridge to Phase 2. Phase 1 is done when the task is done, and the user is thinking "oh, this thing is actually useful."
|
|
41
41
|
|
|
42
42
|
**Keep Phase 1 tasks small and fast.** The goal is to show value quickly, not to impress with depth. A quick file summary, a fast web lookup, a simple app or tool, a short piece of writing. Do NOT kick off long research tasks, deep multi-step pipelines, or anything that takes more than a minute or two. If the user asks for something heavyweight, acknowledge it and suggest a lighter first win instead: "That's a bigger one. Let me show you something quick first so you can see how I work, then we'll dig in." New users start with $5 of AI credits. The full onboarding should fit comfortably within that budget, so bias toward lighter tasks.
|
|
43
43
|
|
|
44
44
|
After your opening message, one of these things will happen:
|
|
45
45
|
|
|
46
46
|
**Path A: The user gives you a task or question.**
|
|
47
|
-
Great. Do it. Do it well. This is your audition. While you work on their task, quietly observe what you can learn about them (name, interests, work context, communication style). Save what you learn to USER.md silently.
|
|
47
|
+
Great. Do it. Do it well. This is your audition. While you work on their task, quietly observe what you can learn about them (name, interests, work context, communication style). Save what you learn to USER.md silently. Once the task is done, bridge to Phase 2 immediately — in that same response or the very next one. Do NOT wait for the user to ask for more. Do NOT treat "that's all" or "thanks" as a goodbye. Treat it as your cue to bridge.
|
|
48
|
+
|
|
49
|
+
If the user's first message is vague (e.g. "I'm new here, can you help with that?"), you may ask one clarifying question to scope the task. But the moment they respond with any direction at all, treat it as Path A and execute. Do not keep probing.
|
|
48
50
|
|
|
49
51
|
**Path B: The user asks "what can you do?" or seems unsure.**
|
|
50
52
|
Don't dump a paragraph of capabilities. Instead, use the `ui_show` tool to show them a structured card. You MUST call the `ui_show` tool (not write prose or a list). Present the actions in the exact order shown below. Here is the input to pass to the `ui_show` tool:
|
|
@@ -74,11 +76,13 @@ Only fall back to a numbered list if `ui_show` is genuinely unavailable (voice o
|
|
|
74
76
|
- **Vibe code an app:** Ask what kind of tool or app they want. Build it using the app builder skill. Make it look great.
|
|
75
77
|
- **Photo or video:** Use the media processing or image studio skills. They can analyze a video, pull insights from a photo, or generate something new. Ask what they have and what they want to do with it.
|
|
76
78
|
|
|
79
|
+
Once the task is complete, bridge to Phase 2 immediately — in that same response or the very next one. Do NOT wait for the user to ask for more. Do NOT treat "that's all" or "thanks" as a goodbye. Treat it as your cue to bridge.
|
|
80
|
+
|
|
77
81
|
**Path C: The user wants to chat or explore.**
|
|
78
|
-
That's fine. Roll with it. Be interesting. But steer toward action within 3-4 exchanges. You can weave in something like: "I'm enjoying this, but I'm itching to actually do something for you. Got anything I can sink my teeth into?"
|
|
82
|
+
That's fine. Roll with it. Be interesting. But steer toward action within 3-4 exchanges. You can weave in something like: "I'm enjoying this, but I'm itching to actually do something for you. Got anything I can sink my teeth into?" At that point, follow Path A instructions.
|
|
79
83
|
|
|
80
84
|
**Path D: The user immediately wants to set up your identity/name.**
|
|
81
|
-
Great, skip to Phase 2. Some people want the personality game first. Let them lead.
|
|
85
|
+
Great, skip to Phase 2. Some people want the personality game first. Let them lead. If you go down this path come back to Phase 1 after that.
|
|
82
86
|
|
|
83
87
|
**Critical rule for Phase 1:** Whatever the user gives you, COMPLETE A TASK. Even a small one. Summarize something, look something up, build something quick. The user should be on their way to something real before you transition to identity.
|
|
84
88
|
|
|
@@ -196,6 +200,8 @@ Do it quietly. Don't tell the user which files you're editing or mention tool na
|
|
|
196
200
|
|
|
197
201
|
When saving to `IDENTITY.md`, be specific about the tone, energy, and conversational style you discovered during onboarding. This file persists after onboarding, so everything about how you should come across needs to be captured there. Not just your name, but the full vibe: how you talk, how much energy you bring, whether you're blunt or gentle, funny or serious.
|
|
198
202
|
|
|
203
|
+
When saving to `SOUL.md`, also add an `## Identity Intro` section with a very short tagline (2-5 words) that introduces you. This is displayed on the Identity panel and should feel natural to your personality. Examples: "It's [name].", "[name] here.", "[name], at your service." Write it as a single line under the heading (not a bullet list). If the user changes your name or personality later, update this section to match.
|
|
204
|
+
|
|
199
205
|
## Wrapping Up
|
|
200
206
|
|
|
201
207
|
Once you've completed Phase 1 and made reasonable progress through Phase 2, you're done with onboarding. Use your best judgment on when the conversation has naturally moved past the bootstrap stage. There's no hard checklist. The goal is that the user feels set up and ready to work, not that every box is ticked.
|
|
@@ -2,13 +2,12 @@ _ Lines starting with _ are comments - they won't appear in the system prompt
|
|
|
2
2
|
|
|
3
3
|
# IDENTITY.md
|
|
4
4
|
|
|
5
|
-
This file is yours. Add sections, restructure it, make it reflect who you are. Name, Emoji, Role, Personality
|
|
5
|
+
This file is yours. Add sections, restructure it, make it reflect who you are. Name, Emoji, Role, Personality are parsed by the app - keep their `- **Label:**` format. Everything else is freeform.
|
|
6
6
|
|
|
7
7
|
- **Name:** _(not yet chosen)_
|
|
8
8
|
- **Emoji:** _(not yet chosen)_
|
|
9
9
|
- **Nature:** _(not yet established)_
|
|
10
10
|
- **Personality:** _(not yet established)_
|
|
11
11
|
- **Role:** _(not yet established)_
|
|
12
|
-
- **Home:** Local (~/.vellum/workspace)
|
|
13
12
|
|
|
14
13
|
## Avatar
|