@vellumai/assistant 0.4.19 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/system-prompt.test.ts +2 -7
- package/src/__tests__/tool-execution-abort-cleanup.test.ts +0 -1
- package/src/agent/loop.ts +324 -163
- package/src/config/bundled-skills/app-builder/SKILL.md +7 -5
- package/src/config/bundled-skills/app-builder/TOOLS.json +2 -2
- package/src/config/system-prompt.ts +563 -539
- package/src/daemon/session-surfaces.ts +28 -0
- package/src/daemon/session.ts +255 -191
- package/src/daemon/tool-side-effects.ts +3 -13
- package/src/security/secure-keys.ts +27 -3
- package/src/tools/apps/definitions.ts +5 -0
- package/src/tools/apps/executors.ts +18 -22
- package/src/__tests__/response-tier.test.ts +0 -195
- package/src/daemon/response-tier.ts +0 -250
|
@@ -10,15 +10,11 @@
|
|
|
10
10
|
import { join } from 'node:path';
|
|
11
11
|
|
|
12
12
|
import { updatePublishedAppDeployment } from '../services/published-app-updater.js';
|
|
13
|
-
import { openAppViaSurface } from '../tools/apps/open-proxy.js';
|
|
14
13
|
import type { ToolExecutionResult } from '../tools/types.js';
|
|
15
14
|
import { getWorkspaceDir } from '../util/platform.js';
|
|
16
15
|
import { isDoordashCommand, updateDoordashProgress } from './doordash-steps.js';
|
|
17
16
|
import type { ServerMessage } from './ipc-protocol.js';
|
|
18
|
-
import {
|
|
19
|
-
refreshSurfacesForApp,
|
|
20
|
-
surfaceProxyResolver,
|
|
21
|
-
} from './session-surfaces.js';
|
|
17
|
+
import { refreshSurfacesForApp } from './session-surfaces.js';
|
|
22
18
|
import type { ToolSetupContext } from './session-tool-setup.js';
|
|
23
19
|
|
|
24
20
|
// ── Types ────────────────────────────────────────────────────────────
|
|
@@ -37,20 +33,16 @@ export type PostExecutionHook = (
|
|
|
37
33
|
|
|
38
34
|
// ── Helpers ──────────────────────────────────────────────────────────
|
|
39
35
|
|
|
40
|
-
/** Shared logic for refreshing app surfaces, broadcasting changes, and auto-
|
|
36
|
+
/** Shared logic for refreshing app surfaces, broadcasting changes, and triggering auto-deploy. */
|
|
41
37
|
function handleAppChange(
|
|
42
38
|
ctx: ToolSetupContext,
|
|
43
39
|
appId: string,
|
|
44
40
|
broadcastToAllClients: ((msg: ServerMessage) => void) | undefined,
|
|
45
41
|
opts?: { fileChange?: boolean; status?: string },
|
|
46
42
|
): void {
|
|
47
|
-
|
|
43
|
+
refreshSurfacesForApp(ctx, appId, opts);
|
|
48
44
|
broadcastToAllClients?.({ type: 'app_files_changed', appId });
|
|
49
45
|
void updatePublishedAppDeployment(appId);
|
|
50
|
-
if (!refreshed && !ctx.hasNoClient && !ctx.headlessLock) {
|
|
51
|
-
const resolver = (tn: string, pi: Record<string, unknown>) => surfaceProxyResolver(ctx, tn, pi);
|
|
52
|
-
void openAppViaSurface(appId, resolver);
|
|
53
|
-
}
|
|
54
46
|
}
|
|
55
47
|
|
|
56
48
|
// ── Registry ─────────────────────────────────────────────────────────
|
|
@@ -82,7 +74,6 @@ registerHook('app_create', (_name, _input, result, { ctx, broadcastToAllClients
|
|
|
82
74
|
});
|
|
83
75
|
|
|
84
76
|
// Auto-refresh workspace surfaces when a persisted app is updated.
|
|
85
|
-
// If no surface is currently showing the app, auto-open it.
|
|
86
77
|
registerHook('app_update', (_name, input, _result, { ctx, broadcastToAllClients }) => {
|
|
87
78
|
const appId = input.app_id as string | undefined;
|
|
88
79
|
if (appId) {
|
|
@@ -109,7 +100,6 @@ registerHook(
|
|
|
109
100
|
);
|
|
110
101
|
|
|
111
102
|
// Auto-refresh workspace surfaces when app files are edited.
|
|
112
|
-
// If no surface is currently showing the app, auto-open it.
|
|
113
103
|
registerHook(
|
|
114
104
|
['app_file_edit', 'app_file_write'],
|
|
115
105
|
(_name, input, _result, { ctx, broadcastToAllClients }) => {
|
|
@@ -158,7 +158,13 @@ export function setSecureKey(account: string, value: string): boolean {
|
|
|
158
158
|
// keychain first) does not read an outdated value.
|
|
159
159
|
if (result && downgradedFromKeychain && getBackend() === "encrypted") {
|
|
160
160
|
keychainMissCache.delete(account);
|
|
161
|
-
try {
|
|
161
|
+
try {
|
|
162
|
+
// Only attempt deletion if the key actually exists in keychain to
|
|
163
|
+
// avoid spawning a subprocess on every write.
|
|
164
|
+
if (keychain.getKey(account) !== undefined) {
|
|
165
|
+
keychain.deleteKey(account);
|
|
166
|
+
}
|
|
167
|
+
} catch { /* best-effort */ }
|
|
162
168
|
}
|
|
163
169
|
return result;
|
|
164
170
|
}
|
|
@@ -291,7 +297,14 @@ export async function setSecureKeyAsync(
|
|
|
291
297
|
// Clean up stale keychain entry (mirrors setSecureKey logic).
|
|
292
298
|
if (result && downgradedFromKeychain) {
|
|
293
299
|
keychainMissCache.delete(account);
|
|
294
|
-
try {
|
|
300
|
+
try {
|
|
301
|
+
// Only attempt deletion if the key actually exists in keychain to
|
|
302
|
+
// avoid spawning a subprocess on every write.
|
|
303
|
+
const exists = await keychain.getKeyAsync(account);
|
|
304
|
+
if (exists !== undefined) {
|
|
305
|
+
await keychain.deleteKeyAsync(account);
|
|
306
|
+
}
|
|
307
|
+
} catch { /* best-effort */ }
|
|
295
308
|
}
|
|
296
309
|
return result;
|
|
297
310
|
}
|
|
@@ -304,7 +317,18 @@ export async function setSecureKeyAsync(
|
|
|
304
317
|
);
|
|
305
318
|
resolvedBackend = "encrypted";
|
|
306
319
|
downgradedFromKeychain = true;
|
|
307
|
-
|
|
320
|
+
const fallbackResult = encryptedStore.setKey(account, value);
|
|
321
|
+
// Clean up stale keychain entry after runtime downgrade
|
|
322
|
+
if (fallbackResult) {
|
|
323
|
+
keychainMissCache.delete(account);
|
|
324
|
+
try {
|
|
325
|
+
const exists = await keychain.getKeyAsync(account);
|
|
326
|
+
if (exists !== undefined) {
|
|
327
|
+
await keychain.deleteKeyAsync(account);
|
|
328
|
+
}
|
|
329
|
+
} catch { /* best-effort */ }
|
|
330
|
+
}
|
|
331
|
+
return fallbackResult;
|
|
308
332
|
}
|
|
309
333
|
return result;
|
|
310
334
|
}
|
|
@@ -43,6 +43,11 @@ const appOpenTool: Tool = {
|
|
|
43
43
|
type: 'string',
|
|
44
44
|
description: 'The ID of the app to open',
|
|
45
45
|
},
|
|
46
|
+
open_mode: {
|
|
47
|
+
type: 'string',
|
|
48
|
+
enum: ['preview', 'workspace'],
|
|
49
|
+
description: "Display mode. 'preview' shows an inline preview card in chat. 'workspace' opens the full app in a workspace panel. Defaults to 'workspace'.",
|
|
50
|
+
},
|
|
46
51
|
},
|
|
47
52
|
required: ['app_id'],
|
|
48
53
|
},
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
import { setHomeBaseAppLink } from '../../home-base/app-link-store.js';
|
|
12
12
|
import type { AppDefinition } from '../../memory/app-store.js';
|
|
13
13
|
import type { EditEngineResult } from '../../memory/app-store.js';
|
|
14
|
-
import { openAppViaSurface } from './open-proxy.js';
|
|
15
14
|
|
|
16
15
|
// ---------------------------------------------------------------------------
|
|
17
16
|
// Shared result type
|
|
@@ -123,33 +122,30 @@ export async function executeAppCreate(
|
|
|
123
122
|
setHomeBaseAppLink(app.id, 'personalized');
|
|
124
123
|
}
|
|
125
124
|
|
|
126
|
-
//
|
|
125
|
+
// Emit the inline preview card via the proxy without opening a workspace panel.
|
|
126
|
+
// open_mode: "preview" signals to the client that this should be shown inline only.
|
|
127
127
|
if (autoOpen && proxyToolResolver) {
|
|
128
128
|
const createPreview = { ...(preview ?? {}), context: 'app_create' as const };
|
|
129
|
-
const extraInput = { preview: createPreview };
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
129
|
+
const extraInput = { preview: createPreview, open_mode: 'preview' };
|
|
130
|
+
try {
|
|
131
|
+
const openResult = await proxyToolResolver('app_open', { app_id: app.id, ...extraInput });
|
|
132
|
+
if (openResult.isError) {
|
|
133
|
+
return {
|
|
134
|
+
content: JSON.stringify({ ...app, auto_opened: false, auto_open_error: openResult.content }),
|
|
135
|
+
isError: false,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
content: JSON.stringify({ ...app, auto_opened: true, open_result: openResult.content }),
|
|
140
|
+
isError: false,
|
|
141
|
+
};
|
|
142
|
+
} catch {
|
|
143
|
+
// Preview emission failure is non-fatal — the app was created successfully.
|
|
135
144
|
return {
|
|
136
|
-
content: JSON.stringify({
|
|
137
|
-
...app,
|
|
138
|
-
auto_opened: true,
|
|
139
|
-
open_result: openResultText,
|
|
140
|
-
}),
|
|
145
|
+
content: JSON.stringify({ ...app, auto_opened: false, auto_open_error: 'Failed to auto-open app. Use app_open to open it manually.' }),
|
|
141
146
|
isError: false,
|
|
142
147
|
};
|
|
143
148
|
}
|
|
144
|
-
|
|
145
|
-
return {
|
|
146
|
-
content: JSON.stringify({
|
|
147
|
-
...app,
|
|
148
|
-
auto_opened: false,
|
|
149
|
-
auto_open_error: openResultText,
|
|
150
|
-
}),
|
|
151
|
-
isError: false,
|
|
152
|
-
};
|
|
153
149
|
}
|
|
154
150
|
|
|
155
151
|
return { content: JSON.stringify(app), isError: false };
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import {
|
|
4
|
-
classifyResponseTierAsync,
|
|
5
|
-
classifyResponseTierDetailed,
|
|
6
|
-
resolveWithHint,
|
|
7
|
-
type SessionTierHint,
|
|
8
|
-
type TierClassification,
|
|
9
|
-
} from "../daemon/response-tier.js";
|
|
10
|
-
|
|
11
|
-
// ── classifyResponseTierDetailed ──────────────────────────────────────
|
|
12
|
-
|
|
13
|
-
describe("classifyResponseTierDetailed", () => {
|
|
14
|
-
describe("high confidence → high tier", () => {
|
|
15
|
-
test("long messages (>500 chars)", () => {
|
|
16
|
-
const result = classifyResponseTierDetailed("x".repeat(501), 0);
|
|
17
|
-
expect(result.tier).toBe("high");
|
|
18
|
-
expect(result.confidence).toBe("high");
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
test("code fences", () => {
|
|
22
|
-
const result = classifyResponseTierDetailed(
|
|
23
|
-
"Here is some code:\n```\nconst x = 1;\n```",
|
|
24
|
-
0,
|
|
25
|
-
);
|
|
26
|
-
expect(result.tier).toBe("high");
|
|
27
|
-
expect(result.confidence).toBe("high");
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
test("file paths", () => {
|
|
31
|
-
const result = classifyResponseTierDetailed("Look at ./src/index.ts", 0);
|
|
32
|
-
expect(result.tier).toBe("high");
|
|
33
|
-
expect(result.confidence).toBe("high");
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
test("multi-paragraph", () => {
|
|
37
|
-
const result = classifyResponseTierDetailed(
|
|
38
|
-
"First paragraph.\n\nSecond paragraph.",
|
|
39
|
-
0,
|
|
40
|
-
);
|
|
41
|
-
expect(result.tier).toBe("high");
|
|
42
|
-
expect(result.confidence).toBe("high");
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
test("build keyword imperatives", () => {
|
|
46
|
-
const result = classifyResponseTierDetailed(
|
|
47
|
-
"Build a REST API for user management",
|
|
48
|
-
0,
|
|
49
|
-
);
|
|
50
|
-
expect(result.tier).toBe("high");
|
|
51
|
-
expect(result.confidence).toBe("high");
|
|
52
|
-
});
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
describe("high confidence → low tier", () => {
|
|
56
|
-
test("pure greetings under 40 chars", () => {
|
|
57
|
-
const result = classifyResponseTierDetailed("hey", 0);
|
|
58
|
-
expect(result.tier).toBe("low");
|
|
59
|
-
expect(result.confidence).toBe("high");
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
test("short messages without build keywords", () => {
|
|
63
|
-
const result = classifyResponseTierDetailed("sounds good", 0);
|
|
64
|
-
expect(result.tier).toBe("low");
|
|
65
|
-
expect(result.confidence).toBe("high");
|
|
66
|
-
});
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
describe("low confidence → medium tier", () => {
|
|
70
|
-
test("questions with build keywords fall to medium/low-confidence", () => {
|
|
71
|
-
const result = classifyResponseTierDetailed(
|
|
72
|
-
"how do I build authentication?",
|
|
73
|
-
0,
|
|
74
|
-
);
|
|
75
|
-
expect(result.tier).toBe("medium");
|
|
76
|
-
expect(result.confidence).toBe("low");
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
test("ambiguous medium-length message", () => {
|
|
80
|
-
const result = classifyResponseTierDetailed(
|
|
81
|
-
"what do you think about the current approach to handling errors in the codebase?",
|
|
82
|
-
0,
|
|
83
|
-
);
|
|
84
|
-
expect(result.tier).toBe("medium");
|
|
85
|
-
expect(result.confidence).toBe("low");
|
|
86
|
-
});
|
|
87
|
-
});
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
// ── resolveWithHint ───────────────────────────────────────────────────
|
|
91
|
-
|
|
92
|
-
describe("resolveWithHint", () => {
|
|
93
|
-
const lowConfMedium: TierClassification = {
|
|
94
|
-
tier: "medium",
|
|
95
|
-
reason: "default",
|
|
96
|
-
confidence: "low",
|
|
97
|
-
};
|
|
98
|
-
const highConfLow: TierClassification = {
|
|
99
|
-
tier: "low",
|
|
100
|
-
reason: "short_no_keywords",
|
|
101
|
-
confidence: "high",
|
|
102
|
-
};
|
|
103
|
-
const highConfHigh: TierClassification = {
|
|
104
|
-
tier: "high",
|
|
105
|
-
reason: "build_keyword",
|
|
106
|
-
confidence: "high",
|
|
107
|
-
};
|
|
108
|
-
|
|
109
|
-
test("high confidence: ignores hint that would downgrade", () => {
|
|
110
|
-
const hint: SessionTierHint = {
|
|
111
|
-
tier: "low",
|
|
112
|
-
turn: 5,
|
|
113
|
-
timestamp: Date.now(),
|
|
114
|
-
};
|
|
115
|
-
expect(resolveWithHint(highConfHigh, hint, 6)).toBe("high");
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
test("high confidence: upgrades when hint is higher", () => {
|
|
119
|
-
const hint: SessionTierHint = {
|
|
120
|
-
tier: "medium",
|
|
121
|
-
turn: 5,
|
|
122
|
-
timestamp: Date.now(),
|
|
123
|
-
};
|
|
124
|
-
expect(resolveWithHint(highConfLow, hint, 6)).toBe("medium");
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
test("high confidence: upgrades to high when hint is high", () => {
|
|
128
|
-
const hint: SessionTierHint = {
|
|
129
|
-
tier: "high",
|
|
130
|
-
turn: 5,
|
|
131
|
-
timestamp: Date.now(),
|
|
132
|
-
};
|
|
133
|
-
expect(resolveWithHint(highConfLow, hint, 6)).toBe("high");
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
test("returns regex tier when no hint available", () => {
|
|
137
|
-
expect(resolveWithHint(lowConfMedium, null, 0)).toBe("medium");
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
test("defers to hint when confidence is low and hint is fresh", () => {
|
|
141
|
-
const hint: SessionTierHint = {
|
|
142
|
-
tier: "high",
|
|
143
|
-
turn: 5,
|
|
144
|
-
timestamp: Date.now(),
|
|
145
|
-
};
|
|
146
|
-
expect(resolveWithHint(lowConfMedium, hint, 6)).toBe("high");
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
test("ignores stale hint (too many turns old)", () => {
|
|
150
|
-
const hint: SessionTierHint = {
|
|
151
|
-
tier: "high",
|
|
152
|
-
turn: 0,
|
|
153
|
-
timestamp: Date.now(),
|
|
154
|
-
};
|
|
155
|
-
// 5 turns later exceeds HINT_MAX_TURN_AGE of 4
|
|
156
|
-
expect(resolveWithHint(lowConfMedium, hint, 5)).toBe("medium");
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
test("ignores stale hint (too old by time)", () => {
|
|
160
|
-
const fiveMinutesAgo = Date.now() - 5 * 60 * 1000 - 1;
|
|
161
|
-
const hint: SessionTierHint = {
|
|
162
|
-
tier: "high",
|
|
163
|
-
turn: 3,
|
|
164
|
-
timestamp: fiveMinutesAgo,
|
|
165
|
-
};
|
|
166
|
-
expect(resolveWithHint(lowConfMedium, hint, 4)).toBe("medium");
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
test("uses hint at exact boundary (4 turns, within time)", () => {
|
|
170
|
-
const hint: SessionTierHint = {
|
|
171
|
-
tier: "high",
|
|
172
|
-
turn: 1,
|
|
173
|
-
timestamp: Date.now(),
|
|
174
|
-
};
|
|
175
|
-
// 5 - 1 = 4, which is not > 4, so hint is still valid
|
|
176
|
-
expect(resolveWithHint(lowConfMedium, hint, 5)).toBe("high");
|
|
177
|
-
});
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
// ── classifyResponseTierAsync ─────────────────────────────────────────
|
|
181
|
-
|
|
182
|
-
describe("classifyResponseTierAsync", () => {
|
|
183
|
-
test("returns null when no provider is available", async () => {
|
|
184
|
-
// getConfiguredProvider returns null when no API key is set
|
|
185
|
-
// We can't easily mock it here, but we can verify the function handles it
|
|
186
|
-
const result = await classifyResponseTierAsync(["hello"]);
|
|
187
|
-
// In test environment without a configured provider, should return null
|
|
188
|
-
expect(
|
|
189
|
-
result === undefined ||
|
|
190
|
-
result === "low" ||
|
|
191
|
-
result === "medium" ||
|
|
192
|
-
result === "high",
|
|
193
|
-
).toBe(true);
|
|
194
|
-
});
|
|
195
|
-
});
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-turn response tier classification.
|
|
3
|
-
*
|
|
4
|
-
* Classifies each user message into a tier that controls:
|
|
5
|
-
* - maxTokens budget for the LLM response
|
|
6
|
-
* - Which system prompt sections are included
|
|
7
|
-
*
|
|
8
|
-
* Two layers:
|
|
9
|
-
* 1. Deterministic regex/heuristic (zero latency, runs every turn)
|
|
10
|
-
* 2. Background Haiku classification (fire-and-forget, advises future turns)
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { createTimeout, extractText, getConfiguredProvider, userMessage } from '../providers/provider-send-message.js';
|
|
14
|
-
import { getLogger } from '../util/logger.js';
|
|
15
|
-
|
|
16
|
-
const log = getLogger('response-tier');
|
|
17
|
-
|
|
18
|
-
export type ResponseTier = 'low' | 'medium' | 'high';
|
|
19
|
-
|
|
20
|
-
export type TierConfidence = 'high' | 'low';
|
|
21
|
-
|
|
22
|
-
export interface TierClassification {
|
|
23
|
-
tier: ResponseTier;
|
|
24
|
-
reason: string;
|
|
25
|
-
confidence: TierConfidence;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface SessionTierHint {
|
|
29
|
-
tier: ResponseTier;
|
|
30
|
-
turn: number;
|
|
31
|
-
timestamp: number;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// ── Patterns ──────────────────────────────────────────────────────────
|
|
35
|
-
|
|
36
|
-
const GREETING_PATTERNS = /^(hey|hi|hello|yo|sup|hiya|howdy|what'?s up|thanks|thank you|thx|ty|cheers|what can you|who are you|how are you)\b/i;
|
|
37
|
-
|
|
38
|
-
const BUILD_KEYWORDS = /\b(build|implement|create|refactor|debug|deploy|migrate|scaffold|architect|redesign|generate|write|develop|fix|convert|add|remove|update|modify|change|delete|replace|integrate|setup|install|configure|optimize|rewrite)\b/i;
|
|
39
|
-
|
|
40
|
-
const SURFACE_ACTION = /^\[User action on \w+ surface:/;
|
|
41
|
-
const CODE_FENCE = /```/;
|
|
42
|
-
const FILE_PATH = /(?:^|[\s"'(])(?:\/|~\/|\.\/)\S/;
|
|
43
|
-
const MULTI_PARAGRAPH = /\n\s*\n/;
|
|
44
|
-
|
|
45
|
-
// ── Confidence thresholds ─────────────────────────────────────────────
|
|
46
|
-
|
|
47
|
-
const HINT_MAX_TURN_AGE = 4;
|
|
48
|
-
const HINT_MAX_AGE_MS = 5 * 60 * 1000; // 5 minutes
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Classify the complexity tier of a user message (backward-compat wrapper).
|
|
52
|
-
*/
|
|
53
|
-
export function classifyResponseTier(message: string, _turnCount: number): ResponseTier {
|
|
54
|
-
return classifyResponseTierDetailed(message, _turnCount).tier;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Classify with confidence scoring. High confidence means the regex
|
|
59
|
-
* matched an unambiguous signal; low confidence means the message
|
|
60
|
-
* fell through to the default medium bucket.
|
|
61
|
-
*/
|
|
62
|
-
export function classifyResponseTierDetailed(message: string, _turnCount: number): TierClassification {
|
|
63
|
-
const trimmed = message.trim();
|
|
64
|
-
const len = trimmed.length;
|
|
65
|
-
|
|
66
|
-
const isPoliteImperative = /^(can|could|would|will)\s+you\s+/i.test(trimmed) && BUILD_KEYWORDS.test(trimmed);
|
|
67
|
-
|
|
68
|
-
const isQuestion = !isPoliteImperative && (
|
|
69
|
-
/\?$/.test(trimmed) || /^(what|who|where|when|why|how|which|can|could|should|would|is|are|do|does|did|will|has|have)\b/i.test(trimmed)
|
|
70
|
-
);
|
|
71
|
-
|
|
72
|
-
// ── High signals (any match → high tier, high confidence) ──
|
|
73
|
-
if (SURFACE_ACTION.test(trimmed)) return tagged('high', 'surface_action', 'high');
|
|
74
|
-
if (len > 500) return tagged('high', 'length>500', 'high');
|
|
75
|
-
if (CODE_FENCE.test(trimmed)) return tagged('high', 'code_fence', 'high');
|
|
76
|
-
if (FILE_PATH.test(trimmed)) return tagged('high', 'file_path', 'high');
|
|
77
|
-
if (MULTI_PARAGRAPH.test(trimmed)) return tagged('high', 'multi_paragraph', 'high');
|
|
78
|
-
if (!isQuestion && BUILD_KEYWORDS.test(trimmed)) return tagged('high', 'build_keyword', 'high');
|
|
79
|
-
|
|
80
|
-
// ── Low signals (any match → low tier, high confidence) ──
|
|
81
|
-
if (GREETING_PATTERNS.test(trimmed) && len < 40 && !BUILD_KEYWORDS.test(trimmed)) return tagged('low', 'greeting', 'high');
|
|
82
|
-
if (len < 80 && !BUILD_KEYWORDS.test(trimmed)) return tagged('low', 'short_no_keywords', 'high');
|
|
83
|
-
|
|
84
|
-
// ── Default (low confidence — ambiguous) ──
|
|
85
|
-
return tagged('medium', 'default', 'low');
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
const TIER_RANK: Record<ResponseTier, number> = { low: 0, medium: 1, high: 2 };
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Resolve the final tier using the regex classification and an optional
|
|
92
|
-
* session hint from a previous background Haiku call.
|
|
93
|
-
*
|
|
94
|
-
* - When confidence is low, defer to a fresh hint (upgrade or downgrade).
|
|
95
|
-
* - When confidence is high, still upgrade if the hint is higher (the
|
|
96
|
-
* conversation trajectory outranks a short-message heuristic), but
|
|
97
|
-
* never downgrade.
|
|
98
|
-
*/
|
|
99
|
-
export function resolveWithHint(
|
|
100
|
-
classification: TierClassification,
|
|
101
|
-
hint: SessionTierHint | null,
|
|
102
|
-
currentTurn: number,
|
|
103
|
-
): ResponseTier {
|
|
104
|
-
if (!hint) {
|
|
105
|
-
return classification.tier;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
const turnAge = currentTurn - hint.turn;
|
|
109
|
-
const timeAge = Date.now() - hint.timestamp;
|
|
110
|
-
|
|
111
|
-
if (turnAge > HINT_MAX_TURN_AGE || timeAge > HINT_MAX_AGE_MS) {
|
|
112
|
-
log.debug({ turnAge, timeAge }, 'Session tier hint is stale, ignoring');
|
|
113
|
-
return classification.tier;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
if (classification.confidence === 'low') {
|
|
117
|
-
// Low confidence: fully defer to hint
|
|
118
|
-
log.info(
|
|
119
|
-
{ regexTier: classification.tier, hintTier: hint.tier, turnAge },
|
|
120
|
-
'Deferring to session tier hint (low confidence)',
|
|
121
|
-
);
|
|
122
|
-
return hint.tier;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// High confidence: only upgrade, never downgrade
|
|
126
|
-
if (TIER_RANK[hint.tier] > TIER_RANK[classification.tier]) {
|
|
127
|
-
log.info(
|
|
128
|
-
{ regexTier: classification.tier, hintTier: hint.tier, turnAge },
|
|
129
|
-
'Upgrading tier via session hint',
|
|
130
|
-
);
|
|
131
|
-
return hint.tier;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return classification.tier;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// ── Async Haiku classification ────────────────────────────────────────
|
|
138
|
-
|
|
139
|
-
const ASYNC_CLASSIFICATION_TIMEOUT_MS = 8_000;
|
|
140
|
-
|
|
141
|
-
const TIER_SYSTEM_PROMPT =
|
|
142
|
-
'Classify the overall complexity of this conversation. ' +
|
|
143
|
-
'Output ONLY one word, nothing else.\n' +
|
|
144
|
-
'low — greetings, thanks, short acknowledgements\n' +
|
|
145
|
-
'medium — simple questions, short requests, clarifications\n' +
|
|
146
|
-
'high — build/implement/refactor requests, multi-step tasks, code-heavy work';
|
|
147
|
-
|
|
148
|
-
/**
|
|
149
|
-
* Fire-and-forget Haiku call to classify the conversation trajectory.
|
|
150
|
-
* Returns the classified tier, or undefined when no provider is configured
|
|
151
|
-
* or on any failure.
|
|
152
|
-
*/
|
|
153
|
-
export async function classifyResponseTierAsync(
|
|
154
|
-
recentUserTexts: string[],
|
|
155
|
-
): Promise<ResponseTier | undefined> {
|
|
156
|
-
const provider = getConfiguredProvider();
|
|
157
|
-
if (!provider) {
|
|
158
|
-
log.debug('No provider available for async tier classification');
|
|
159
|
-
return undefined;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
const combined = recentUserTexts
|
|
163
|
-
.map((t, i) => `[Message ${i + 1}]: ${t}`)
|
|
164
|
-
.join('\n');
|
|
165
|
-
|
|
166
|
-
try {
|
|
167
|
-
const { signal, cleanup } = createTimeout(ASYNC_CLASSIFICATION_TIMEOUT_MS);
|
|
168
|
-
try {
|
|
169
|
-
const response = await provider.sendMessage(
|
|
170
|
-
[userMessage(combined)],
|
|
171
|
-
undefined,
|
|
172
|
-
TIER_SYSTEM_PROMPT,
|
|
173
|
-
{
|
|
174
|
-
config: {
|
|
175
|
-
modelIntent: 'latency-optimized',
|
|
176
|
-
max_tokens: 8,
|
|
177
|
-
},
|
|
178
|
-
signal,
|
|
179
|
-
},
|
|
180
|
-
);
|
|
181
|
-
cleanup();
|
|
182
|
-
|
|
183
|
-
const raw = extractText(response).toLowerCase();
|
|
184
|
-
const match = raw.match(/\b(low|medium|high)\b/);
|
|
185
|
-
if (match) {
|
|
186
|
-
const tier = match[1] as ResponseTier;
|
|
187
|
-
log.debug({ tier, raw }, 'Async tier classification result');
|
|
188
|
-
return tier;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
log.debug({ raw }, 'Async tier classification returned unexpected value');
|
|
192
|
-
return undefined;
|
|
193
|
-
} finally {
|
|
194
|
-
cleanup();
|
|
195
|
-
}
|
|
196
|
-
} catch (err) {
|
|
197
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
198
|
-
log.debug({ err: message }, 'Async tier classification failed');
|
|
199
|
-
return undefined;
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
function tagged(tier: ResponseTier, reason: string, confidence: TierConfidence): TierClassification {
|
|
204
|
-
log.debug({ tier, reason, confidence }, 'Classified response tier');
|
|
205
|
-
return { tier, reason, confidence };
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// ── Token scaling ─────────────────────────────────────────────────────
|
|
209
|
-
|
|
210
|
-
const TIER_SCALE: Record<ResponseTier, number> = {
|
|
211
|
-
low: 0.125,
|
|
212
|
-
medium: 0.375,
|
|
213
|
-
high: 1,
|
|
214
|
-
};
|
|
215
|
-
|
|
216
|
-
/**
|
|
217
|
-
* Scale the configured max tokens ceiling by the tier multiplier.
|
|
218
|
-
*
|
|
219
|
-
* Examples with configuredMax = 16000:
|
|
220
|
-
* low → 2000
|
|
221
|
-
* medium → 6000
|
|
222
|
-
* high → 16000
|
|
223
|
-
*/
|
|
224
|
-
export function tierMaxTokens(tier: ResponseTier, configuredMax: number): number {
|
|
225
|
-
return Math.round(configuredMax * TIER_SCALE[tier]);
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
// ── Model routing ─────────────────────────────────────────────────────
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Map for Anthropic provider: tier → model.
|
|
232
|
-
* low → sonnet (balanced)
|
|
233
|
-
* medium → sonnet (balanced)
|
|
234
|
-
* high → undefined (use configured default, typically opus)
|
|
235
|
-
*/
|
|
236
|
-
const ANTHROPIC_TIER_MODELS: Record<ResponseTier, string | undefined> = {
|
|
237
|
-
low: 'claude-sonnet-4-6',
|
|
238
|
-
medium: 'claude-sonnet-4-6',
|
|
239
|
-
high: undefined, // use configured default
|
|
240
|
-
};
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* Returns a model override for the given tier, or undefined to use the
|
|
244
|
-
* provider's configured default. Only applies model downgrading for
|
|
245
|
-
* the Anthropic provider.
|
|
246
|
-
*/
|
|
247
|
-
export function tierModel(tier: ResponseTier, providerName: string): string | undefined {
|
|
248
|
-
if (providerName !== 'anthropic') return undefined;
|
|
249
|
-
return ANTHROPIC_TIER_MODELS[tier];
|
|
250
|
-
}
|