@vellumai/assistant 0.10.1-dev.202606240206.7c2bca6 → 0.10.1-dev.202606240317.ea25efe
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/disk-pressure-guard.test.ts +41 -0
- package/src/__tests__/provider-usage-tracking.test.ts +39 -0
- package/src/__tests__/registry.test.ts +3 -0
- package/src/__tests__/workspace-tool-loader.test.ts +3 -0
- package/src/agent/loop-exclusive-tool.test.ts +150 -0
- package/src/agent/loop.ts +56 -0
- package/src/daemon/conversation.ts +4 -1
- package/src/daemon/disk-pressure-guard.ts +12 -2
- package/src/plugins/defaults/advisor/__tests__/consult.test.ts +44 -0
- package/src/plugins/defaults/advisor/__tests__/context-pack-gating.test.ts +106 -0
- package/src/plugins/defaults/advisor/__tests__/context-pack.test.ts +60 -0
- package/src/plugins/defaults/advisor/consult.ts +48 -6
- package/src/plugins/defaults/advisor/context-pack.ts +288 -0
- package/src/plugins/defaults/advisor/steering.ts +14 -2
- package/src/plugins/defaults/advisor/tools/advisor.ts +28 -5
- package/src/providers/anthropic/client.ts +5 -0
- package/src/providers/call-site-routing.ts +4 -0
- package/src/providers/openai/responses-provider.ts +5 -0
- package/src/providers/openrouter/client.ts +5 -0
- package/src/providers/provider-send-message.ts +4 -0
- package/src/providers/ratelimit.ts +4 -0
- package/src/providers/retry.ts +4 -0
- package/src/providers/types.ts +9 -0
- package/src/providers/usage-tracking.ts +4 -0
- package/src/tools/tool-defaults.ts +2 -0
- package/src/tools/types.ts +17 -2
package/package.json
CHANGED
|
@@ -38,6 +38,7 @@ mock.module("../runtime/assistant-event-hub.js", () => ({
|
|
|
38
38
|
|
|
39
39
|
const {
|
|
40
40
|
DISK_PRESSURE_CLEAR_THRESHOLD_PERCENT,
|
|
41
|
+
DISK_PRESSURE_MIN_FREE_FLOOR_MB,
|
|
41
42
|
DISK_PRESSURE_OVERRIDE_CONFIRMATION,
|
|
42
43
|
DISK_PRESSURE_THRESHOLD_PERCENT,
|
|
43
44
|
DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT,
|
|
@@ -342,4 +343,44 @@ describe("disk pressure guard", () => {
|
|
|
342
343
|
setDiskUsage(DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT - 1);
|
|
343
344
|
expect(evaluateDiskPressureNow().state).toBe("ok");
|
|
344
345
|
});
|
|
346
|
+
|
|
347
|
+
test("stays ok at a critical usage percentage while ample free space remains", () => {
|
|
348
|
+
// 99% used of a large volume still leaves gigabytes free — above the floor.
|
|
349
|
+
const totalMb = 1_000_000;
|
|
350
|
+
const usedMb = Math.round(totalMb * 0.99); // freeMb ~= 10_000 MiB
|
|
351
|
+
setDiskUsage(usedMb, totalMb);
|
|
352
|
+
expect(diskSample!.freeMb).toBeGreaterThanOrEqual(
|
|
353
|
+
DISK_PRESSURE_MIN_FREE_FLOOR_MB,
|
|
354
|
+
);
|
|
355
|
+
|
|
356
|
+
const status = evaluateDiskPressureNow();
|
|
357
|
+
|
|
358
|
+
expect(status.state).toBe("ok");
|
|
359
|
+
expect(status.locked).toBe(false);
|
|
360
|
+
expect(status.effectivelyLocked).toBe(false);
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
test("stays ok at a warning usage percentage while ample free space remains", () => {
|
|
364
|
+
const totalMb = 1_000_000;
|
|
365
|
+
const usedMb = Math.round(totalMb * 0.85); // 85% used, freeMb ~= 150_000 MiB
|
|
366
|
+
setDiskUsage(usedMb, totalMb);
|
|
367
|
+
|
|
368
|
+
const status = evaluateDiskPressureNow();
|
|
369
|
+
|
|
370
|
+
expect(status.state).toBe("ok");
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
test("locks at a critical usage percentage once free space drops below the floor", () => {
|
|
374
|
+
// High percentage AND little absolute headroom: floor does not apply.
|
|
375
|
+
const totalMb = 100_000;
|
|
376
|
+
const freeMb = DISK_PRESSURE_MIN_FREE_FLOOR_MB - 1;
|
|
377
|
+
setDiskUsage(totalMb - freeMb, totalMb);
|
|
378
|
+
expect(diskSample!.freeMb).toBeLessThan(DISK_PRESSURE_MIN_FREE_FLOOR_MB);
|
|
379
|
+
|
|
380
|
+
const status = evaluateDiskPressureNow();
|
|
381
|
+
|
|
382
|
+
expect(status.state).toBe("critical");
|
|
383
|
+
expect(status.locked).toBe(true);
|
|
384
|
+
expect(status.effectivelyLocked).toBe(true);
|
|
385
|
+
});
|
|
345
386
|
});
|
|
@@ -200,3 +200,42 @@ describe("UsageTrackingProvider", () => {
|
|
|
200
200
|
});
|
|
201
201
|
});
|
|
202
202
|
});
|
|
203
|
+
|
|
204
|
+
describe("native web-search capability survives the wrapper chain", () => {
|
|
205
|
+
function leaf(supports: boolean | undefined): Provider {
|
|
206
|
+
return {
|
|
207
|
+
name: "anthropic",
|
|
208
|
+
...(supports === undefined ? {} : { supportsNativeWebSearch: supports }),
|
|
209
|
+
async sendMessage(): Promise<ProviderResponse> {
|
|
210
|
+
return {
|
|
211
|
+
content: [{ type: "text", text: "" }],
|
|
212
|
+
model: "m",
|
|
213
|
+
usage: { inputTokens: 0, outputTokens: 0 },
|
|
214
|
+
stopReason: "end_turn",
|
|
215
|
+
};
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
test("UsageTrackingProvider forwards supportsNativeWebSearch", () => {
|
|
221
|
+
expect(new UsageTrackingProvider(leaf(true)).supportsNativeWebSearch).toBe(
|
|
222
|
+
true,
|
|
223
|
+
);
|
|
224
|
+
expect(new UsageTrackingProvider(leaf(false)).supportsNativeWebSearch).toBe(
|
|
225
|
+
false,
|
|
226
|
+
);
|
|
227
|
+
expect(
|
|
228
|
+
new UsageTrackingProvider(leaf(undefined)).supportsNativeWebSearch,
|
|
229
|
+
).toBeUndefined();
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
test("CallSiteConfiguredProvider forwards it through a nested wrapper", () => {
|
|
233
|
+
// The exact chain getConfiguredProvider returns: CallSiteConfigured →
|
|
234
|
+
// UsageTracking → leaf. The advisor consult reads the flag off the top.
|
|
235
|
+
const wrapped = new CallSiteConfiguredProvider(
|
|
236
|
+
new UsageTrackingProvider(leaf(true)),
|
|
237
|
+
"advisor",
|
|
238
|
+
);
|
|
239
|
+
expect(wrapped.supportsNativeWebSearch).toBe(true);
|
|
240
|
+
});
|
|
241
|
+
});
|
|
@@ -36,6 +36,9 @@ function makeFakeTool(name: string): Tool {
|
|
|
36
36
|
category: "test",
|
|
37
37
|
defaultRiskLevel: RiskLevel.Low,
|
|
38
38
|
executionTarget: "sandbox",
|
|
39
|
+
// Match the finalized shape the registry stores, so identity comparisons
|
|
40
|
+
// (`getTool(name)` toEqual coreTool) hold after registration fills defaults.
|
|
41
|
+
exclusive: false,
|
|
39
42
|
input_schema: { type: "object", properties: {}, required: [] },
|
|
40
43
|
async execute(
|
|
41
44
|
_input: Record<string, unknown>,
|
|
@@ -94,6 +94,9 @@ function makeFakeCoreTool(name: string): Tool {
|
|
|
94
94
|
category: "test",
|
|
95
95
|
defaultRiskLevel: RiskLevel.Low,
|
|
96
96
|
executionTarget: "sandbox",
|
|
97
|
+
// Match the finalized shape the registry stores (defaults filled), so
|
|
98
|
+
// `getCoreToolOverride(name)` toEqual comparisons hold after registration.
|
|
99
|
+
exclusive: false,
|
|
97
100
|
input_schema: { type: "object", properties: {}, required: [] },
|
|
98
101
|
async execute(
|
|
99
102
|
_input: Record<string, unknown>,
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifies the agent loop's exclusive-tool dispatch: when a tool the loop is
|
|
3
|
+
* told is exclusive (e.g. the advisor) appears in a multi-call turn, only that
|
|
4
|
+
* tool runs and the siblings are deferred un-run with a benign result — so the
|
|
5
|
+
* model incorporates the exclusive tool's output before acting on anything
|
|
6
|
+
* else. Drives the REAL loop, mocking only the provider boundary.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, expect, test } from "bun:test";
|
|
9
|
+
|
|
10
|
+
import { createMockProvider } from "../__tests__/helpers/mock-provider.js";
|
|
11
|
+
import type { ContentBlock, ProviderResponse } from "../providers/types.js";
|
|
12
|
+
import { AgentLoop } from "./loop.js";
|
|
13
|
+
|
|
14
|
+
const endTurn = (text: string): ProviderResponse => ({
|
|
15
|
+
content: [{ type: "text", text }],
|
|
16
|
+
model: "mock-model",
|
|
17
|
+
usage: { inputTokens: 1, outputTokens: 1 },
|
|
18
|
+
stopReason: "end_turn",
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const toolUseTurn = (
|
|
22
|
+
blocks: Array<{ id: string; name: string }>,
|
|
23
|
+
): ProviderResponse => ({
|
|
24
|
+
content: [
|
|
25
|
+
{ type: "text", text: "working" },
|
|
26
|
+
...blocks.map((b) => ({
|
|
27
|
+
type: "tool_use" as const,
|
|
28
|
+
id: b.id,
|
|
29
|
+
name: b.name,
|
|
30
|
+
input: {},
|
|
31
|
+
})),
|
|
32
|
+
],
|
|
33
|
+
model: "mock-model",
|
|
34
|
+
usage: { inputTokens: 1, outputTokens: 1 },
|
|
35
|
+
stopReason: "tool_use",
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
function toolResults(history: { content: ContentBlock[] }[]) {
|
|
39
|
+
return history
|
|
40
|
+
.flatMap((m) => m.content)
|
|
41
|
+
.filter(
|
|
42
|
+
(b): b is Extract<ContentBlock, { type: "tool_result" }> =>
|
|
43
|
+
b.type === "tool_result",
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const baseRun = {
|
|
48
|
+
requestId: "req-excl",
|
|
49
|
+
onEvent: () => {},
|
|
50
|
+
callSite: "mainAgent" as const,
|
|
51
|
+
trust: { sourceChannel: "vellum" as const, trustClass: "unknown" as const },
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
describe("AgentLoop — exclusive tool deferral", () => {
|
|
55
|
+
test("runs the exclusive tool alone and defers sibling calls un-run", async () => {
|
|
56
|
+
const { provider } = createMockProvider([
|
|
57
|
+
toolUseTurn([
|
|
58
|
+
{ id: "call-advisor", name: "advisor" },
|
|
59
|
+
{ id: "call-edit", name: "write_file" },
|
|
60
|
+
]),
|
|
61
|
+
endTurn("done"),
|
|
62
|
+
]);
|
|
63
|
+
|
|
64
|
+
const executed: string[] = [];
|
|
65
|
+
const loop = new AgentLoop({
|
|
66
|
+
provider,
|
|
67
|
+
systemPrompt: "sys",
|
|
68
|
+
conversationId: "excl-1",
|
|
69
|
+
tools: [
|
|
70
|
+
{ name: "advisor", description: "", input_schema: { type: "object" } },
|
|
71
|
+
{
|
|
72
|
+
name: "write_file",
|
|
73
|
+
description: "",
|
|
74
|
+
input_schema: { type: "object" },
|
|
75
|
+
},
|
|
76
|
+
],
|
|
77
|
+
toolExecutor: async (name) => {
|
|
78
|
+
executed.push(name);
|
|
79
|
+
return { content: `ran ${name}`, isError: false };
|
|
80
|
+
},
|
|
81
|
+
isExclusiveTool: (name) => name === "advisor",
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
const { history } = await loop.run({
|
|
85
|
+
...baseRun,
|
|
86
|
+
messages: [{ role: "user", content: [{ type: "text", text: "do it" }] }],
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Only the exclusive tool actually executed.
|
|
90
|
+
expect(executed).toEqual(["advisor"]);
|
|
91
|
+
|
|
92
|
+
const results = toolResults(history);
|
|
93
|
+
const advisorResult = results.find(
|
|
94
|
+
(b) => b.tool_use_id === "call-advisor",
|
|
95
|
+
)!;
|
|
96
|
+
const editResult = results.find((b) => b.tool_use_id === "call-edit")!;
|
|
97
|
+
|
|
98
|
+
// The advisor ran; the sibling came back un-run (not an error) so the model
|
|
99
|
+
// can re-issue it after reading the guidance.
|
|
100
|
+
expect(advisorResult.content).toBe("ran advisor");
|
|
101
|
+
expect(editResult.content).toContain("not run");
|
|
102
|
+
expect(editResult.content).toContain("advisor");
|
|
103
|
+
expect(editResult.is_error).toBe(false);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("runs sibling tools normally when no exclusive tool is present", async () => {
|
|
107
|
+
const { provider } = createMockProvider([
|
|
108
|
+
toolUseTurn([
|
|
109
|
+
{ id: "call-read", name: "read_file" },
|
|
110
|
+
{ id: "call-write", name: "write_file" },
|
|
111
|
+
]),
|
|
112
|
+
endTurn("done"),
|
|
113
|
+
]);
|
|
114
|
+
|
|
115
|
+
const executed: string[] = [];
|
|
116
|
+
const loop = new AgentLoop({
|
|
117
|
+
provider,
|
|
118
|
+
systemPrompt: "sys",
|
|
119
|
+
conversationId: "excl-2",
|
|
120
|
+
tools: [
|
|
121
|
+
{
|
|
122
|
+
name: "read_file",
|
|
123
|
+
description: "",
|
|
124
|
+
input_schema: { type: "object" },
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
name: "write_file",
|
|
128
|
+
description: "",
|
|
129
|
+
input_schema: { type: "object" },
|
|
130
|
+
},
|
|
131
|
+
],
|
|
132
|
+
toolExecutor: async (name) => {
|
|
133
|
+
executed.push(name);
|
|
134
|
+
return { content: `ran ${name}`, isError: false };
|
|
135
|
+
},
|
|
136
|
+
isExclusiveTool: (name) => name === "advisor",
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
const { history } = await loop.run({
|
|
140
|
+
...baseRun,
|
|
141
|
+
messages: [{ role: "user", content: [{ type: "text", text: "do it" }] }],
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Both non-exclusive tools ran; nothing was deferred.
|
|
145
|
+
expect(executed.sort()).toEqual(["read_file", "write_file"]);
|
|
146
|
+
for (const result of toolResults(history)) {
|
|
147
|
+
expect(result.content).not.toContain("not run");
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
});
|
package/src/agent/loop.ts
CHANGED
|
@@ -625,6 +625,20 @@ export type LoopToolExecutor = (
|
|
|
625
625
|
activityMetadata?: ToolActivityMetadata;
|
|
626
626
|
}>;
|
|
627
627
|
|
|
628
|
+
/**
|
|
629
|
+
* The benign result returned for a sibling tool call that was deferred because
|
|
630
|
+
* an exclusive tool ran in the same turn. Phrased so the model treats it as a
|
|
631
|
+
* "not run yet" signal — read the exclusive tool's output, then re-issue this
|
|
632
|
+
* call if it is still the right next step.
|
|
633
|
+
*/
|
|
634
|
+
function deferredForExclusiveMessage(exclusiveToolName: string): string {
|
|
635
|
+
return (
|
|
636
|
+
`(not run: \`${exclusiveToolName}\` was called this turn and runs first, on its own, ` +
|
|
637
|
+
`so the rest of your tool calls were held back. Read its output, then call this tool ` +
|
|
638
|
+
`again if it is still the right next step.)`
|
|
639
|
+
);
|
|
640
|
+
}
|
|
641
|
+
|
|
628
642
|
export interface AgentLoopConstructorOptions {
|
|
629
643
|
/** LLM provider the loop issues every call through. */
|
|
630
644
|
provider: Provider;
|
|
@@ -634,6 +648,14 @@ export interface AgentLoopConstructorOptions {
|
|
|
634
648
|
tools?: ToolDefinition[];
|
|
635
649
|
toolExecutor?: LoopToolExecutor;
|
|
636
650
|
resolveTools?: (history: Message[]) => ToolDefinition[];
|
|
651
|
+
/**
|
|
652
|
+
* Decide whether a tool runs exclusively in its turn (see
|
|
653
|
+
* {@link ToolDefinition.exclusive}). When it returns true for a tool present
|
|
654
|
+
* in a multi-call turn, the loop runs only that tool and defers the siblings
|
|
655
|
+
* un-run. Injected by the conversation wiring, which can read the tool
|
|
656
|
+
* registry; lightweight loops that omit it never defer.
|
|
657
|
+
*/
|
|
658
|
+
isExclusiveTool?: (toolName: string) => boolean;
|
|
637
659
|
/**
|
|
638
660
|
* Conversation this loop drives. Scopes the loop-held compaction circuit
|
|
639
661
|
* breaker and is the source of truth the loop's pipeline contexts and
|
|
@@ -659,6 +681,7 @@ export class AgentLoop {
|
|
|
659
681
|
private tools: ToolDefinition[];
|
|
660
682
|
private resolveTools: ((history: Message[]) => ToolDefinition[]) | null;
|
|
661
683
|
private toolExecutor: LoopToolExecutor | null;
|
|
684
|
+
private isExclusiveTool: ((toolName: string) => boolean) | null;
|
|
662
685
|
|
|
663
686
|
/**
|
|
664
687
|
* Conversation this loop drives. Source of truth for the `conversationId`
|
|
@@ -688,6 +711,7 @@ export class AgentLoop {
|
|
|
688
711
|
tools,
|
|
689
712
|
toolExecutor,
|
|
690
713
|
resolveTools,
|
|
714
|
+
isExclusiveTool,
|
|
691
715
|
conversationId,
|
|
692
716
|
resolveConversationDir,
|
|
693
717
|
} = options;
|
|
@@ -697,6 +721,7 @@ export class AgentLoop {
|
|
|
697
721
|
this.tools = tools ?? [];
|
|
698
722
|
this.resolveTools = resolveTools ?? null;
|
|
699
723
|
this.toolExecutor = toolExecutor ?? null;
|
|
724
|
+
this.isExclusiveTool = isExclusiveTool ?? null;
|
|
700
725
|
this.conversationId = conversationId;
|
|
701
726
|
this.resolveConversationDir = resolveConversationDir ?? null;
|
|
702
727
|
this.compactionCircuit = new CompactionCircuit(this.conversationId);
|
|
@@ -1883,8 +1908,39 @@ export class AgentLoop {
|
|
|
1883
1908
|
"Tool execution start",
|
|
1884
1909
|
);
|
|
1885
1910
|
|
|
1911
|
+
// When an exclusive tool (e.g. the advisor) is among this turn's calls,
|
|
1912
|
+
// it must run alone: the model should incorporate its output before
|
|
1913
|
+
// acting on anything else. Run only the first exclusive call and defer
|
|
1914
|
+
// the siblings with a benign, un-run result so the model re-issues them
|
|
1915
|
+
// next turn if still needed. Every tool_use still gets a matching
|
|
1916
|
+
// tool_result, so history stays well-formed.
|
|
1917
|
+
const exclusiveBlock = this.isExclusiveTool
|
|
1918
|
+
? toolUseBlocks.find((block) => this.isExclusiveTool!(block.name))
|
|
1919
|
+
: undefined;
|
|
1920
|
+
const deferSiblings =
|
|
1921
|
+
exclusiveBlock !== undefined && toolUseBlocks.length > 1;
|
|
1922
|
+
if (deferSiblings) {
|
|
1923
|
+
rlog.info(
|
|
1924
|
+
{
|
|
1925
|
+
turn: toolUseTurns,
|
|
1926
|
+
exclusiveTool: exclusiveBlock!.name,
|
|
1927
|
+
deferred: toolUseBlocks
|
|
1928
|
+
.filter((block) => block !== exclusiveBlock)
|
|
1929
|
+
.map((block) => block.name),
|
|
1930
|
+
},
|
|
1931
|
+
"Exclusive tool present — running it alone and deferring sibling tool calls this turn",
|
|
1932
|
+
);
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1886
1935
|
const toolExecutionPromise = Promise.all(
|
|
1887
1936
|
toolUseBlocks.map(async (toolUse) => {
|
|
1937
|
+
if (deferSiblings && toolUse !== exclusiveBlock) {
|
|
1938
|
+
const result: Awaited<ReturnType<LoopToolExecutor>> = {
|
|
1939
|
+
content: deferredForExclusiveMessage(exclusiveBlock!.name),
|
|
1940
|
+
isError: false,
|
|
1941
|
+
};
|
|
1942
|
+
return { toolUse, result };
|
|
1943
|
+
}
|
|
1888
1944
|
const result = await this.toolExecutor!(
|
|
1889
1945
|
toolUse.name,
|
|
1890
1946
|
toolUse.input,
|
|
@@ -91,7 +91,7 @@ import {
|
|
|
91
91
|
isActivationMomentParam,
|
|
92
92
|
} from "../telemetry/activation-funnel.js";
|
|
93
93
|
import { ToolExecutor } from "../tools/executor.js";
|
|
94
|
-
import { getAllToolDefinitions } from "../tools/registry.js";
|
|
94
|
+
import { getAllToolDefinitions, getTool } from "../tools/registry.js";
|
|
95
95
|
import type { ToolLifecycleEvent } from "../tools/types.js";
|
|
96
96
|
import type { OnboardingContext } from "../types/onboarding-context.js";
|
|
97
97
|
import type { AbortReason } from "../util/abort-reasons.js";
|
|
@@ -702,6 +702,9 @@ export class Conversation {
|
|
|
702
702
|
tools: toolDefs.length > 0 ? toolDefs : undefined,
|
|
703
703
|
toolExecutor: toolDefs.length > 0 ? toolExecutor : undefined,
|
|
704
704
|
resolveTools,
|
|
705
|
+
// A tool the registry marks exclusive (e.g. `advisor`) runs alone in its
|
|
706
|
+
// turn; the loop defers any sibling calls until the next turn.
|
|
707
|
+
isExclusiveTool: (name) => getTool(name)?.exclusive === true,
|
|
705
708
|
resolveConversationDir: () => {
|
|
706
709
|
const conv = getConversation(this.conversationId);
|
|
707
710
|
if (!conv) return null;
|
|
@@ -24,6 +24,12 @@ export const DISK_PRESSURE_CLEAR_THRESHOLD_PERCENT = 90;
|
|
|
24
24
|
// clears the warning state, which discards the banner's (state-scoped) dismissal
|
|
25
25
|
// so it re-appears the moment usage ticks back up.
|
|
26
26
|
export const DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT = 77;
|
|
27
|
+
// Absolute free-space floor (MiB). Regardless of usage percentage, never enter
|
|
28
|
+
// the warning or critical state while at least this much space remains free. A
|
|
29
|
+
// high usage percentage on a large disk can still leave many gigabytes
|
|
30
|
+
// available, where locking is pointless. Small volumes (where a high percentage
|
|
31
|
+
// genuinely means near-full) drop below the floor and remain protected.
|
|
32
|
+
export const DISK_PRESSURE_MIN_FREE_FLOOR_MB = 2048;
|
|
27
33
|
export const DISK_PRESSURE_CHECK_INTERVAL_MS = 60_000;
|
|
28
34
|
export const DISK_PRESSURE_OVERRIDE_CONFIRMATION = "I understand the risks";
|
|
29
35
|
export const DISK_PRESSURE_BLOCKED_CAPABILITIES = [
|
|
@@ -219,7 +225,10 @@ export function evaluateDiskPressureNow(): DiskPressureStatus {
|
|
|
219
225
|
const criticalThreshold = state.status.locked
|
|
220
226
|
? DISK_PRESSURE_CLEAR_THRESHOLD_PERCENT
|
|
221
227
|
: DISK_PRESSURE_THRESHOLD_PERCENT;
|
|
222
|
-
|
|
228
|
+
// Absolute free-space floor overrides the percentage thresholds: while ample
|
|
229
|
+
// space remains free, report "ok" no matter how full the volume is by percent.
|
|
230
|
+
const hasAmpleFreeSpace = usageInfo.freeMb >= DISK_PRESSURE_MIN_FREE_FLOOR_MB;
|
|
231
|
+
const isCritical = !hasAmpleFreeSpace && usagePercent >= criticalThreshold;
|
|
223
232
|
// Mirror the critical deadband for the warning band: once in an active
|
|
224
233
|
// pressure state (warning or critical), hold warning until usage clears the
|
|
225
234
|
// lower warning-clear threshold. Treating "critical" as active here matters
|
|
@@ -235,7 +244,8 @@ export function evaluateDiskPressureNow(): DiskPressureStatus {
|
|
|
235
244
|
const warningThreshold = inActivePressureState
|
|
236
245
|
? DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT
|
|
237
246
|
: DISK_PRESSURE_WARNING_THRESHOLD_PERCENT;
|
|
238
|
-
const isWarning =
|
|
247
|
+
const isWarning =
|
|
248
|
+
!hasAmpleFreeSpace && !isCritical && usagePercent >= warningThreshold;
|
|
239
249
|
const lastCheckedAt = new Date().toISOString();
|
|
240
250
|
|
|
241
251
|
if (!isCritical && !isWarning) {
|
|
@@ -8,10 +8,14 @@ let sendMessageArgs: Record<string, unknown> | null = null;
|
|
|
8
8
|
let responseText = "Use a channel-based worker pool; drain on shutdown.";
|
|
9
9
|
let sendMessageError: Error | null = null;
|
|
10
10
|
let providerResolves = true;
|
|
11
|
+
let providerSupportsWeb = false;
|
|
11
12
|
let streamDeltas: string[] = [];
|
|
12
13
|
|
|
13
14
|
const fakeProvider = {
|
|
14
15
|
name: "mock-advisor-provider",
|
|
16
|
+
get supportsNativeWebSearch() {
|
|
17
|
+
return providerSupportsWeb;
|
|
18
|
+
},
|
|
15
19
|
async sendMessage(messages: unknown, options: unknown) {
|
|
16
20
|
sendMessageArgs = { messages, options } as Record<string, unknown>;
|
|
17
21
|
if (sendMessageError) throw sendMessageError;
|
|
@@ -36,6 +40,14 @@ mock.module("../../../../providers/provider-send-message.js", () => ({
|
|
|
36
40
|
getConfiguredProvider: async () => (providerResolves ? fakeProvider : null),
|
|
37
41
|
}));
|
|
38
42
|
|
|
43
|
+
// Keep the tool tests focused on the consult wiring: stub the context pack so
|
|
44
|
+
// they don't reach into the registry / workspace / memory sources (those have
|
|
45
|
+
// their own coverage). The consult itself never imports this module.
|
|
46
|
+
mock.module("../context-pack.js", () => ({
|
|
47
|
+
buildAdvisorContext: async () => null,
|
|
48
|
+
deriveRecallQuery: () => null,
|
|
49
|
+
}));
|
|
50
|
+
|
|
39
51
|
const { consultAdvisor } = await import("../consult.js");
|
|
40
52
|
const advisorTool = (await import("../tools/advisor.js")).default;
|
|
41
53
|
const { recordSystemPrompt, recordMessages, resetAdvisorStateForTests } =
|
|
@@ -56,6 +68,7 @@ beforeEach(() => {
|
|
|
56
68
|
responseText = "Use a channel-based worker pool; drain on shutdown.";
|
|
57
69
|
sendMessageError = null;
|
|
58
70
|
providerResolves = true;
|
|
71
|
+
providerSupportsWeb = false;
|
|
59
72
|
streamDeltas = [];
|
|
60
73
|
resetAdvisorStateForTests();
|
|
61
74
|
});
|
|
@@ -108,6 +121,37 @@ describe("consultAdvisor", () => {
|
|
|
108
121
|
expect(options.systemPrompt).toContain("You are a coding agent.");
|
|
109
122
|
});
|
|
110
123
|
|
|
124
|
+
test("stays tool-less when the provider has no native web search", async () => {
|
|
125
|
+
providerSupportsWeb = false;
|
|
126
|
+
await consultAdvisor({ systemPrompt: null, messages: [userMsg("hi")] });
|
|
127
|
+
const options = sendMessageArgs?.options as { tools?: unknown };
|
|
128
|
+
expect(options.tools).toBeUndefined();
|
|
129
|
+
expect(optionConfig().tool_choice).toEqual({ type: "none" });
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test("enables native web search when the provider supports it", async () => {
|
|
133
|
+
providerSupportsWeb = true;
|
|
134
|
+
await consultAdvisor({ systemPrompt: null, messages: [userMsg("hi")] });
|
|
135
|
+
|
|
136
|
+
const options = sendMessageArgs?.options as {
|
|
137
|
+
tools?: Array<{ name: string }>;
|
|
138
|
+
};
|
|
139
|
+
expect(options.tools?.map((t) => t.name)).toEqual(["web_search"]);
|
|
140
|
+
// tool_choice must not be `none`, or the provider suppresses its server tool.
|
|
141
|
+
expect(optionConfig().tool_choice).toEqual({ type: "auto" });
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
test("embeds the runtime context in the advisor system prompt", async () => {
|
|
145
|
+
await consultAdvisor({
|
|
146
|
+
systemPrompt: "You are a coding agent.",
|
|
147
|
+
messages: [userMsg("hi")],
|
|
148
|
+
runtimeContext: "## Available tools\n- bash — run commands",
|
|
149
|
+
});
|
|
150
|
+
const options = sendMessageArgs?.options as { systemPrompt: string };
|
|
151
|
+
expect(options.systemPrompt).toContain("<agent_runtime_context>");
|
|
152
|
+
expect(options.systemPrompt).toContain("- bash — run commands");
|
|
153
|
+
});
|
|
154
|
+
|
|
111
155
|
test("soft-fails when no provider is configured", async () => {
|
|
112
156
|
providerResolves = false;
|
|
113
157
|
const advice = await consultAdvisor({
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Personal-memory gating for the advisor context pack: NOW.md and PKB must only
|
|
3
|
+
* reach the advisor when the turn's trust admits personal memory (and, for
|
|
4
|
+
* NOW.md, when the scratchpad-injection toggle is on) — the same policy the
|
|
5
|
+
* runtime memory injectors apply. Without it, a low-risk advisor consult on a
|
|
6
|
+
* remote/trusted-contact turn could forward private content the main agent
|
|
7
|
+
* would never receive.
|
|
8
|
+
*
|
|
9
|
+
* Mocks are isolated to this file (the test runner runs each file in its own
|
|
10
|
+
* process), so the broad module stubs here don't leak into other suites.
|
|
11
|
+
*/
|
|
12
|
+
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
|
13
|
+
|
|
14
|
+
let personalAllowed = false;
|
|
15
|
+
let scratchpadEnabled = true;
|
|
16
|
+
let gateArg: unknown = null;
|
|
17
|
+
|
|
18
|
+
mock.module("../../../../daemon/trust-context.js", () => ({
|
|
19
|
+
isPersonalMemoryAllowed: (trust: unknown) => {
|
|
20
|
+
gateArg = trust;
|
|
21
|
+
return personalAllowed;
|
|
22
|
+
},
|
|
23
|
+
}));
|
|
24
|
+
mock.module("../../../../daemon/now-scratchpad.js", () => ({
|
|
25
|
+
readNowScratchpad: () => "NOW-CONTENT",
|
|
26
|
+
}));
|
|
27
|
+
mock.module("../../../../memory/pkb/context.js", () => ({
|
|
28
|
+
readPkbContext: () => "PKB-CONTENT",
|
|
29
|
+
}));
|
|
30
|
+
mock.module("../../../../config/loader.js", () => ({
|
|
31
|
+
getConfig: () => ({
|
|
32
|
+
memory: {
|
|
33
|
+
retrieval: { scratchpadInjection: { enabled: scratchpadEnabled } },
|
|
34
|
+
},
|
|
35
|
+
llm: {},
|
|
36
|
+
}),
|
|
37
|
+
}));
|
|
38
|
+
// Keep every other section empty so the assertions isolate NOW.md / PKB.
|
|
39
|
+
mock.module("../../../../daemon/conversation-workspace.js", () => ({
|
|
40
|
+
resolveWorkspaceTopLevelContext: () => null,
|
|
41
|
+
}));
|
|
42
|
+
mock.module("../../../../daemon/conversation-runtime-assembly.js", () => ({
|
|
43
|
+
buildActiveDocuments: () => null,
|
|
44
|
+
}));
|
|
45
|
+
mock.module("../../../../runtime/capabilities.js", () => ({
|
|
46
|
+
resolveCapabilities: () => ({ canAccessMemory: false }),
|
|
47
|
+
}));
|
|
48
|
+
mock.module("../../../../config/skills.js", () => ({
|
|
49
|
+
loadSkillCatalog: () => [],
|
|
50
|
+
}));
|
|
51
|
+
|
|
52
|
+
const { buildAdvisorContext } = await import("../context-pack.js");
|
|
53
|
+
|
|
54
|
+
const sources = {
|
|
55
|
+
conversationId: "c1",
|
|
56
|
+
workingDir: "/tmp",
|
|
57
|
+
// A remote, non-guardian per-turn snapshot — the case the live-state read
|
|
58
|
+
// could have wrongly elevated.
|
|
59
|
+
trustClass: "unknown" as const,
|
|
60
|
+
sourceChannel: "telegram",
|
|
61
|
+
transcript: [],
|
|
62
|
+
allowedToolNames: new Set<string>(),
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
beforeEach(() => {
|
|
66
|
+
personalAllowed = false;
|
|
67
|
+
scratchpadEnabled = true;
|
|
68
|
+
gateArg = null;
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe("advisor context pack — personal-memory gating", () => {
|
|
72
|
+
test("withholds NOW.md and PKB when personal memory is disallowed", async () => {
|
|
73
|
+
personalAllowed = false;
|
|
74
|
+
const ctx = (await buildAdvisorContext(sources)) ?? "";
|
|
75
|
+
expect(ctx).not.toContain("NOW-CONTENT");
|
|
76
|
+
expect(ctx).not.toContain("PKB-CONTENT");
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("includes NOW.md and PKB when allowed and the scratchpad toggle is on", async () => {
|
|
80
|
+
personalAllowed = true;
|
|
81
|
+
scratchpadEnabled = true;
|
|
82
|
+
const ctx = await buildAdvisorContext(sources);
|
|
83
|
+
expect(ctx).toContain("NOW-CONTENT");
|
|
84
|
+
expect(ctx).toContain("PKB-CONTENT");
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("withholds NOW.md when the scratchpad toggle is off, PKB still allowed", async () => {
|
|
88
|
+
personalAllowed = true;
|
|
89
|
+
scratchpadEnabled = false;
|
|
90
|
+
const ctx = (await buildAdvisorContext(sources)) ?? "";
|
|
91
|
+
expect(ctx).not.toContain("NOW-CONTENT");
|
|
92
|
+
expect(ctx).toContain("PKB-CONTENT");
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("feeds the gate the per-turn trust snapshot, not live conversation state", async () => {
|
|
96
|
+
personalAllowed = true;
|
|
97
|
+
await buildAdvisorContext(sources);
|
|
98
|
+
// The gate must see exactly the snapshot threaded from ToolContext —
|
|
99
|
+
// trustClass + executionChannel — so a concurrent live-trust change can't
|
|
100
|
+
// elevate this invocation.
|
|
101
|
+
expect(gateArg).toEqual({
|
|
102
|
+
sourceChannel: "telegram",
|
|
103
|
+
trustClass: "unknown",
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
});
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import type { Message } from "../../../../providers/types.js";
|
|
4
|
+
import { buildAdvisorContext, deriveRecallQuery } from "../context-pack.js";
|
|
5
|
+
|
|
6
|
+
const userMsg = (t: string): Message => ({
|
|
7
|
+
role: "user",
|
|
8
|
+
content: [{ type: "text", text: t }],
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
describe("deriveRecallQuery", () => {
|
|
12
|
+
test("returns the most recent user message text", () => {
|
|
13
|
+
const query = deriveRecallQuery([
|
|
14
|
+
userMsg("the original task"),
|
|
15
|
+
{ role: "assistant", content: [{ type: "text", text: "ok" }] },
|
|
16
|
+
userMsg("the latest question"),
|
|
17
|
+
]);
|
|
18
|
+
expect(query).toBe("the latest question");
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test("returns null when there is no user text", () => {
|
|
22
|
+
expect(
|
|
23
|
+
deriveRecallQuery([
|
|
24
|
+
{ role: "assistant", content: [{ type: "text", text: "hi" }] },
|
|
25
|
+
]),
|
|
26
|
+
).toBeNull();
|
|
27
|
+
expect(deriveRecallQuery([])).toBeNull();
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
describe("buildAdvisorContext", () => {
|
|
32
|
+
test("lists the agent's available tools, skipping the advisor itself", async () => {
|
|
33
|
+
const context = await buildAdvisorContext({
|
|
34
|
+
conversationId: "ctx-1",
|
|
35
|
+
workingDir: "/tmp/does-not-exist",
|
|
36
|
+
allowedToolNames: new Set(["bash", "advisor", "read_file"]),
|
|
37
|
+
trustClass: "unknown",
|
|
38
|
+
transcript: [userMsg("hi")],
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
expect(context).toContain("## Available tools");
|
|
42
|
+
expect(context).toContain("- bash");
|
|
43
|
+
expect(context).toContain("- read_file");
|
|
44
|
+
// The advisor advises; it never tells the agent to consult itself.
|
|
45
|
+
expect(context).not.toContain("- advisor");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("omits the tools section when no tools are available", async () => {
|
|
49
|
+
const context = await buildAdvisorContext({
|
|
50
|
+
conversationId: "ctx-2",
|
|
51
|
+
workingDir: "/tmp/does-not-exist",
|
|
52
|
+
allowedToolNames: new Set(),
|
|
53
|
+
trustClass: "unknown",
|
|
54
|
+
transcript: [],
|
|
55
|
+
});
|
|
56
|
+
// Other sources (e.g. the skills catalog) may still contribute, but with no
|
|
57
|
+
// allowed tools the tools section must not appear.
|
|
58
|
+
if (context !== null) expect(context).not.toContain("## Available tools");
|
|
59
|
+
});
|
|
60
|
+
});
|