@vellumai/assistant 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +17 -3
- package/Dockerfile +1 -1
- package/README.md +2 -0
- package/docs/architecture/scheduling.md +81 -0
- package/package.json +1 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +22 -0
- package/src/__tests__/channel-policy.test.ts +19 -0
- package/src/__tests__/guardian-control-plane-policy.test.ts +582 -0
- package/src/__tests__/guardian-outbound-http.test.ts +8 -8
- package/src/__tests__/intent-routing.test.ts +22 -0
- package/src/__tests__/ipc-snapshot.test.ts +10 -0
- package/src/__tests__/notification-routing-intent.test.ts +185 -0
- package/src/__tests__/recording-handler.test.ts +191 -31
- package/src/__tests__/recording-intent-fallback.test.ts +180 -0
- package/src/__tests__/recording-intent-handler.test.ts +597 -74
- package/src/__tests__/recording-intent.test.ts +738 -342
- package/src/__tests__/recording-state-machine.test.ts +1109 -0
- package/src/__tests__/reminder-store.test.ts +20 -18
- package/src/__tests__/reminder.test.ts +2 -1
- package/src/channels/config.ts +1 -1
- package/src/config/bundled-skills/phone-calls/SKILL.md +1 -11
- package/src/config/bundled-skills/screen-recording/SKILL.md +91 -12
- package/src/config/system-prompt.ts +5 -0
- package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
- package/src/daemon/handlers/config-channels.ts +6 -6
- package/src/daemon/handlers/index.ts +1 -1
- package/src/daemon/handlers/misc.ts +258 -102
- package/src/daemon/handlers/recording.ts +417 -5
- package/src/daemon/handlers/sessions.ts +142 -68
- package/src/daemon/ipc-contract/computer-use.ts +23 -3
- package/src/daemon/ipc-contract/messages.ts +3 -1
- package/src/daemon/ipc-contract/shared.ts +6 -0
- package/src/daemon/ipc-contract-inventory.json +2 -0
- package/src/daemon/lifecycle.ts +2 -0
- package/src/daemon/recording-executor.ts +180 -0
- package/src/daemon/recording-intent-fallback.ts +132 -0
- package/src/daemon/recording-intent.ts +306 -15
- package/src/daemon/session-tool-setup.ts +4 -0
- package/src/memory/conversation-attention-store.ts +5 -5
- package/src/notifications/README.md +69 -1
- package/src/notifications/adapters/sms.ts +80 -0
- package/src/notifications/broadcaster.ts +1 -0
- package/src/notifications/copy-composer.ts +3 -3
- package/src/notifications/decision-engine.ts +70 -1
- package/src/notifications/decisions-store.ts +24 -0
- package/src/notifications/destination-resolver.ts +2 -1
- package/src/notifications/emit-signal.ts +35 -3
- package/src/notifications/signal.ts +6 -0
- package/src/notifications/types.ts +3 -0
- package/src/runtime/guardian-outbound-actions.ts +9 -9
- package/src/runtime/http-server.ts +7 -7
- package/src/runtime/routes/conversation-attention-routes.ts +3 -3
- package/src/runtime/routes/integration-routes.ts +5 -5
- package/src/schedule/scheduler.ts +15 -3
- package/src/tools/executor.ts +29 -0
- package/src/tools/guardian-control-plane-policy.ts +141 -0
- package/src/tools/types.ts +2 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// LLM-based fallback classifier for recording intent detection.
|
|
2
|
+
// Fires only when the deterministic resolver returns `none` but the text
|
|
3
|
+
// contains recording-related keywords that suggest an intent the regex missed.
|
|
4
|
+
// Safety: returns `{ action: 'none', confidence: 'low' }` on any failure —
|
|
5
|
+
// never triggers a recording action on error.
|
|
6
|
+
|
|
7
|
+
import { createTimeout, extractText, getConfiguredProvider, userMessage } from '../providers/provider-send-message.js';
|
|
8
|
+
import { getLogger } from '../util/logger.js';
|
|
9
|
+
|
|
10
|
+
const log = getLogger('recording-intent-fallback');
|
|
11
|
+
|
|
12
|
+
const FALLBACK_TIMEOUT_MS = 5000;
|
|
13
|
+
|
|
14
|
+
export type RecordingFallbackAction = 'start' | 'stop' | 'restart' | 'pause' | 'resume' | 'none';
|
|
15
|
+
|
|
16
|
+
export interface RecordingFallbackResult {
|
|
17
|
+
action: RecordingFallbackAction;
|
|
18
|
+
confidence: 'high' | 'medium' | 'low';
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const SAFE_DEFAULT: RecordingFallbackResult = { action: 'none', confidence: 'low' };
|
|
22
|
+
|
|
23
|
+
/** Keywords that gate whether we spend an LLM call on fallback classification. */
|
|
24
|
+
const RECORDING_KEYWORDS = [
|
|
25
|
+
'record',
|
|
26
|
+
'recording',
|
|
27
|
+
'screen capture',
|
|
28
|
+
'screencast',
|
|
29
|
+
'capture screen',
|
|
30
|
+
'capture my screen',
|
|
31
|
+
'screen rec',
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const SYSTEM_PROMPT =
|
|
35
|
+
'You are classifying user messages for a screen recording assistant. ' +
|
|
36
|
+
'Determine if the user wants to: start a recording, stop a recording, restart a recording, ' +
|
|
37
|
+
'pause a recording, resume a recording, or none of these. ' +
|
|
38
|
+
'Only classify as an action if the user is giving an imperative command. ' +
|
|
39
|
+
'Questions about recording (e.g., "how do I record?", "what does recording do?") should be classified as "none". ' +
|
|
40
|
+
'Respond with a JSON object: {"action": "start|stop|restart|pause|resume|none", "confidence": "high|medium|low"}';
|
|
41
|
+
|
|
42
|
+
const VALID_ACTIONS = new Set<RecordingFallbackAction>(['start', 'stop', 'restart', 'pause', 'resume', 'none']);
|
|
43
|
+
const VALID_CONFIDENCES = new Set<string>(['high', 'medium', 'low']);
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Returns true if the text contains any recording-related keywords,
|
|
47
|
+
* indicating it is worth spending an LLM call on fallback classification.
|
|
48
|
+
*/
|
|
49
|
+
export function containsRecordingKeywords(text: string): boolean {
|
|
50
|
+
const lower = text.toLowerCase();
|
|
51
|
+
return RECORDING_KEYWORDS.some((kw) => lower.includes(kw));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Uses a lightweight LLM call to classify whether text contains a recording intent
|
|
56
|
+
* that the deterministic resolver missed.
|
|
57
|
+
*
|
|
58
|
+
* Returns `{ action: 'none', confidence: 'high' }` for informational questions.
|
|
59
|
+
* Only returns an actionable result with 'high' confidence for clear imperative commands.
|
|
60
|
+
*/
|
|
61
|
+
export async function classifyRecordingIntentFallback(
|
|
62
|
+
text: string,
|
|
63
|
+
): Promise<RecordingFallbackResult> {
|
|
64
|
+
const provider = getConfiguredProvider();
|
|
65
|
+
if (!provider) {
|
|
66
|
+
log.debug('No configured provider available for fallback classification');
|
|
67
|
+
return SAFE_DEFAULT;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
const { signal, cleanup } = createTimeout(FALLBACK_TIMEOUT_MS);
|
|
72
|
+
try {
|
|
73
|
+
const response = await provider.sendMessage(
|
|
74
|
+
[userMessage(text)],
|
|
75
|
+
[], // no tools
|
|
76
|
+
SYSTEM_PROMPT,
|
|
77
|
+
{
|
|
78
|
+
config: {
|
|
79
|
+
modelIntent: 'latency-optimized',
|
|
80
|
+
max_tokens: 64,
|
|
81
|
+
},
|
|
82
|
+
signal,
|
|
83
|
+
},
|
|
84
|
+
);
|
|
85
|
+
cleanup();
|
|
86
|
+
|
|
87
|
+
const raw = extractText(response);
|
|
88
|
+
return parseClassificationResponse(raw);
|
|
89
|
+
} finally {
|
|
90
|
+
cleanup();
|
|
91
|
+
}
|
|
92
|
+
} catch (err) {
|
|
93
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
94
|
+
log.warn({ err: message }, 'LLM fallback classification failed');
|
|
95
|
+
return SAFE_DEFAULT;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Parse the LLM's JSON response into a validated RecordingFallbackResult.
|
|
101
|
+
* Returns safe default on any parse/validation failure.
|
|
102
|
+
*/
|
|
103
|
+
function parseClassificationResponse(raw: string): RecordingFallbackResult {
|
|
104
|
+
try {
|
|
105
|
+
// Extract JSON from the response — the LLM may include surrounding text
|
|
106
|
+
const jsonMatch = raw.match(/\{[^}]*\}/);
|
|
107
|
+
if (!jsonMatch) {
|
|
108
|
+
log.debug({ raw }, 'No JSON object found in LLM fallback response');
|
|
109
|
+
return SAFE_DEFAULT;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const parsed = JSON.parse(jsonMatch[0]) as { action?: string; confidence?: string };
|
|
113
|
+
|
|
114
|
+
const action = parsed.action as RecordingFallbackAction | undefined;
|
|
115
|
+
const confidence = parsed.confidence;
|
|
116
|
+
|
|
117
|
+
if (!action || !VALID_ACTIONS.has(action)) {
|
|
118
|
+
log.debug({ raw, action }, 'Invalid action in LLM fallback response');
|
|
119
|
+
return SAFE_DEFAULT;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (!confidence || !VALID_CONFIDENCES.has(confidence)) {
|
|
123
|
+
log.debug({ raw, confidence }, 'Invalid confidence in LLM fallback response');
|
|
124
|
+
return SAFE_DEFAULT;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return { action, confidence: confidence as RecordingFallbackResult['confidence'] };
|
|
128
|
+
} catch (err) {
|
|
129
|
+
log.debug({ err, raw }, 'Failed to parse LLM fallback response as JSON');
|
|
130
|
+
return SAFE_DEFAULT;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -1,8 +1,26 @@
|
|
|
1
|
-
// Recording intent
|
|
2
|
-
//
|
|
1
|
+
// Recording intent resolution for standalone screen recording routing.
|
|
2
|
+
// Exports `resolveRecordingIntent` as the single public entry point for
|
|
3
|
+
// text-based intent detection. Handlers use this (or structured
|
|
4
|
+
// `commandIntent` payloads) to intercept recording-related prompts
|
|
3
5
|
// before they reach the classifier or create a CU session.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
+
//
|
|
7
|
+
// Internal helpers (detect/strip/classify) are kept as private utilities
|
|
8
|
+
// consumed only by `resolveRecordingIntent`.
|
|
9
|
+
|
|
10
|
+
type RecordingIntentClass = 'start_only' | 'stop_only' | 'mixed' | 'none';
|
|
11
|
+
|
|
12
|
+
export type RecordingIntentResult =
|
|
13
|
+
| { kind: 'none' }
|
|
14
|
+
| { kind: 'start_only' }
|
|
15
|
+
| { kind: 'stop_only' }
|
|
16
|
+
| { kind: 'start_with_remainder'; remainder: string }
|
|
17
|
+
| { kind: 'stop_with_remainder'; remainder: string }
|
|
18
|
+
| { kind: 'start_and_stop_only' }
|
|
19
|
+
| { kind: 'start_and_stop_with_remainder'; remainder: string }
|
|
20
|
+
| { kind: 'restart_only' }
|
|
21
|
+
| { kind: 'restart_with_remainder'; remainder: string }
|
|
22
|
+
| { kind: 'pause_only' }
|
|
23
|
+
| { kind: 'resume_only' };
|
|
6
24
|
|
|
7
25
|
// ─── Start recording patterns ────────────────────────────────────────────────
|
|
8
26
|
|
|
@@ -25,6 +43,29 @@ const STOP_RECORDING_PATTERNS: RegExp[] = [
|
|
|
25
43
|
/\bhalt\s+(the\s+)?recording\b/i,
|
|
26
44
|
];
|
|
27
45
|
|
|
46
|
+
// ─── Restart recording patterns (compound: stop + start a new one) ──────────
|
|
47
|
+
|
|
48
|
+
const RESTART_RECORDING_PATTERNS: RegExp[] = [
|
|
49
|
+
/\brestart\s+(the\s+)?recording\b/i,
|
|
50
|
+
/\bredo\s+(the\s+)?recording\b/i,
|
|
51
|
+
/\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)\s+(recording|one)\b/i,
|
|
52
|
+
/\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)[.!?\s]*$/i,
|
|
53
|
+
/\bstop\s+and\s+restart\s+(the\s+)?recording\b/i,
|
|
54
|
+
/\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)\s+(recording|one)\b/i,
|
|
55
|
+
/\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)[.!?\s]*$/i,
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
// ─── Pause/resume recording patterns ────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
const PAUSE_RECORDING_PATTERNS: RegExp[] = [
|
|
61
|
+
/\bpause\s+(the\s+)?recording\b/i,
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
const RESUME_RECORDING_PATTERNS: RegExp[] = [
|
|
65
|
+
/\bresume\s+(the\s+)?recording\b/i,
|
|
66
|
+
/\bunpause\s+(the\s+)?recording\b/i,
|
|
67
|
+
];
|
|
68
|
+
|
|
28
69
|
// ─── Stop-recording clause removal for mixed-intent prompts ─────────────────
|
|
29
70
|
|
|
30
71
|
const STOP_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
|
|
@@ -47,17 +88,41 @@ const RECORDING_CLAUSE_PATTERNS: RegExp[] = [
|
|
|
47
88
|
/\brecord\s+(my\s+|the\s+)?screen\s+while\b/i,
|
|
48
89
|
];
|
|
49
90
|
|
|
91
|
+
// ─── Restart clause removal ─────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
const RESTART_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
|
|
94
|
+
// Longer compound patterns first — avoids partial matches by shorter patterns
|
|
95
|
+
/\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)\s+(recording|one)\b/i,
|
|
96
|
+
/\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)[.!?\s]*$/i,
|
|
97
|
+
/\bstop\s+and\s+restart\s+(the\s+)?recording\b/i,
|
|
98
|
+
/\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)\s+(recording|one)\b/i,
|
|
99
|
+
/\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)[.!?\s]*$/i,
|
|
100
|
+
/\b(and\s+)?(also\s+)?restart\s+(the\s+)?recording\b/i,
|
|
101
|
+
/\b(and\s+)?(also\s+)?redo\s+(the\s+)?recording\b/i,
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
// ─── Pause/resume clause removal ────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
const PAUSE_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
|
|
107
|
+
/\b(and\s+)?(also\s+)?pause\s+(the\s+)?recording\b/i,
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
const RESUME_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
|
|
111
|
+
/\b(and\s+)?(also\s+)?resume\s+(the\s+)?recording\b/i,
|
|
112
|
+
/\b(and\s+)?(also\s+)?unpause\s+(the\s+)?recording\b/i,
|
|
113
|
+
];
|
|
114
|
+
|
|
50
115
|
/** Common polite/filler words stripped before checking intent-only status. */
|
|
51
116
|
const FILLER_PATTERN =
|
|
52
117
|
/\b(please|pls|plz|can\s+you|could\s+you|would\s+you|now|right\s+now|thanks|thank\s+you|thx|ty|for\s+me|ok(ay)?|hey|hi|just)\b/gi;
|
|
53
118
|
|
|
54
|
-
// ───
|
|
119
|
+
// ─── Internal helpers ────────────────────────────────────────────────────────
|
|
55
120
|
|
|
56
121
|
/**
|
|
57
122
|
* Returns true if the user's message includes any recording-related phrases.
|
|
58
123
|
* Does not distinguish between recording-only and mixed-intent prompts.
|
|
59
124
|
*/
|
|
60
|
-
|
|
125
|
+
function detectRecordingIntent(taskText: string): boolean {
|
|
61
126
|
return START_RECORDING_PATTERNS.some((p) => p.test(taskText));
|
|
62
127
|
}
|
|
63
128
|
|
|
@@ -67,7 +132,7 @@ export function detectRecordingIntent(taskText: string): boolean {
|
|
|
67
132
|
* "record my screen while I work" -> false (has CU task component)
|
|
68
133
|
* "open Chrome and record my screen" -> false (has CU task component)
|
|
69
134
|
*/
|
|
70
|
-
|
|
135
|
+
function isRecordingOnly(taskText: string): boolean {
|
|
71
136
|
if (!detectRecordingIntent(taskText)) return false;
|
|
72
137
|
|
|
73
138
|
// Strip the recording clause and check if anything substantive remains
|
|
@@ -84,16 +149,31 @@ export function isRecordingOnly(taskText: string): boolean {
|
|
|
84
149
|
* Requires explicit "stop/end/finish/halt recording" phrasing --
|
|
85
150
|
* bare "stop", "end it", or "quit" are too ambiguous and will not match.
|
|
86
151
|
*/
|
|
87
|
-
|
|
152
|
+
function detectStopRecordingIntent(taskText: string): boolean {
|
|
88
153
|
return STOP_RECORDING_PATTERNS.some((p) => p.test(taskText));
|
|
89
154
|
}
|
|
90
155
|
|
|
156
|
+
/** Returns true if any restart compound pattern matches. */
|
|
157
|
+
function detectRestartRecordingIntent(taskText: string): boolean {
|
|
158
|
+
return RESTART_RECORDING_PATTERNS.some((p) => p.test(taskText));
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Returns true if any pause pattern matches. */
|
|
162
|
+
function detectPauseRecordingIntent(taskText: string): boolean {
|
|
163
|
+
return PAUSE_RECORDING_PATTERNS.some((p) => p.test(taskText));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/** Returns true if any resume pattern matches. */
|
|
167
|
+
function detectResumeRecordingIntent(taskText: string): boolean {
|
|
168
|
+
return RESUME_RECORDING_PATTERNS.some((p) => p.test(taskText));
|
|
169
|
+
}
|
|
170
|
+
|
|
91
171
|
/**
|
|
92
172
|
* Removes recording-related clauses from a task, returning the cleaned text.
|
|
93
173
|
* Used when a recording intent is embedded in a broader CU task so the
|
|
94
174
|
* recording portion can be handled separately while the task continues.
|
|
95
175
|
*/
|
|
96
|
-
|
|
176
|
+
function stripRecordingIntent(taskText: string): string {
|
|
97
177
|
let result = taskText;
|
|
98
178
|
for (const pattern of RECORDING_CLAUSE_PATTERNS) {
|
|
99
179
|
result = result.replace(pattern, '');
|
|
@@ -106,7 +186,7 @@ export function stripRecordingIntent(taskText: string): string {
|
|
|
106
186
|
* Removes stop-recording clauses from a message, returning the cleaned text.
|
|
107
187
|
* Analogous to stripRecordingIntent but for stop-recording phrases.
|
|
108
188
|
*/
|
|
109
|
-
|
|
189
|
+
function stripStopRecordingIntent(taskText: string): string {
|
|
110
190
|
let result = taskText;
|
|
111
191
|
for (const pattern of STOP_RECORDING_CLAUSE_PATTERNS) {
|
|
112
192
|
result = result.replace(pattern, '');
|
|
@@ -114,6 +194,33 @@ export function stripStopRecordingIntent(taskText: string): string {
|
|
|
114
194
|
return result.replace(/\s{2,}/g, ' ').trim();
|
|
115
195
|
}
|
|
116
196
|
|
|
197
|
+
/** Removes restart-recording clauses from text. */
|
|
198
|
+
function stripRestartRecordingIntent(taskText: string): string {
|
|
199
|
+
let result = taskText;
|
|
200
|
+
for (const pattern of RESTART_RECORDING_CLAUSE_PATTERNS) {
|
|
201
|
+
result = result.replace(pattern, '');
|
|
202
|
+
}
|
|
203
|
+
return result.replace(/\s{2,}/g, ' ').trim();
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/** Removes pause-recording clauses from text. */
|
|
207
|
+
function stripPauseRecordingIntent(taskText: string): string {
|
|
208
|
+
let result = taskText;
|
|
209
|
+
for (const pattern of PAUSE_RECORDING_CLAUSE_PATTERNS) {
|
|
210
|
+
result = result.replace(pattern, '');
|
|
211
|
+
}
|
|
212
|
+
return result.replace(/\s{2,}/g, ' ').trim();
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/** Removes resume-recording clauses from text. */
|
|
216
|
+
function stripResumeRecordingIntent(taskText: string): string {
|
|
217
|
+
let result = taskText;
|
|
218
|
+
for (const pattern of RESUME_RECORDING_CLAUSE_PATTERNS) {
|
|
219
|
+
result = result.replace(pattern, '');
|
|
220
|
+
}
|
|
221
|
+
return result.replace(/\s{2,}/g, ' ').trim();
|
|
222
|
+
}
|
|
223
|
+
|
|
117
224
|
/**
|
|
118
225
|
* Returns true if the prompt is purely about stopping recording with no
|
|
119
226
|
* additional task. Analogous to isRecordingOnly but for stop-recording.
|
|
@@ -121,7 +228,7 @@ export function stripStopRecordingIntent(taskText: string): string {
|
|
|
121
228
|
* "how do I stop recording?" -> false (has additional context)
|
|
122
229
|
* "stop recording and close the browser" -> false (has CU task component)
|
|
123
230
|
*/
|
|
124
|
-
|
|
231
|
+
function isStopRecordingOnly(taskText: string): boolean {
|
|
125
232
|
if (!detectStopRecordingIntent(taskText)) return false;
|
|
126
233
|
|
|
127
234
|
const stripped = stripStopRecordingIntent(taskText);
|
|
@@ -130,6 +237,30 @@ export function isStopRecordingOnly(taskText: string): boolean {
|
|
|
130
237
|
return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
|
|
131
238
|
}
|
|
132
239
|
|
|
240
|
+
/** Returns true if the text is purely a restart command (no additional task). */
|
|
241
|
+
function isRestartRecordingOnly(taskText: string): boolean {
|
|
242
|
+
if (!detectRestartRecordingIntent(taskText)) return false;
|
|
243
|
+
const stripped = stripRestartRecordingIntent(taskText);
|
|
244
|
+
const withoutFillers = stripped.replace(FILLER_PATTERN, '');
|
|
245
|
+
return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/** Returns true if the text is purely a pause command (no additional task). */
|
|
249
|
+
function isPauseRecordingOnly(taskText: string): boolean {
|
|
250
|
+
if (!detectPauseRecordingIntent(taskText)) return false;
|
|
251
|
+
const stripped = stripPauseRecordingIntent(taskText);
|
|
252
|
+
const withoutFillers = stripped.replace(FILLER_PATTERN, '');
|
|
253
|
+
return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/** Returns true if the text is purely a resume command (no additional task). */
|
|
257
|
+
function isResumeRecordingOnly(taskText: string): boolean {
|
|
258
|
+
if (!detectResumeRecordingIntent(taskText)) return false;
|
|
259
|
+
const stripped = stripResumeRecordingIntent(taskText);
|
|
260
|
+
const withoutFillers = stripped.replace(FILLER_PATTERN, '');
|
|
261
|
+
return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
|
|
262
|
+
}
|
|
263
|
+
|
|
133
264
|
// ─── Dynamic name normalization ─────────────────────────────────────────────
|
|
134
265
|
|
|
135
266
|
/**
|
|
@@ -159,7 +290,7 @@ export function stripDynamicNames(text: string, dynamicNames: string[]): string
|
|
|
159
290
|
* punctuation, and dynamic assistant names. Used to determine whether
|
|
160
291
|
* remaining text after stripping recording clauses needs further processing.
|
|
161
292
|
*/
|
|
162
|
-
|
|
293
|
+
function hasSubstantiveContent(text: string, dynamicNames?: string[]): boolean {
|
|
163
294
|
let cleaned = text;
|
|
164
295
|
if (dynamicNames && dynamicNames.length > 0) {
|
|
165
296
|
cleaned = stripDynamicNames(cleaned, dynamicNames);
|
|
@@ -175,22 +306,68 @@ export function hasSubstantiveContent(text: string, dynamicNames?: string[]): bo
|
|
|
175
306
|
* triggering recording side effects in the mixed handler. */
|
|
176
307
|
const WH_INTERROGATIVE = /^\s*(how|what|why|when|where|who|which)\b/i;
|
|
177
308
|
|
|
309
|
+
/**
|
|
310
|
+
* Indirect informational patterns that indicate the user is asking *about*
|
|
311
|
+
* recording rather than commanding a recording action. These catch prompts
|
|
312
|
+
* where the WH-word is buried after polite filler or an informational verb:
|
|
313
|
+
*
|
|
314
|
+
* - "can you tell me how to stop recording?"
|
|
315
|
+
* - "explain how to stop the recording"
|
|
316
|
+
* - "is there a way to stop recording?"
|
|
317
|
+
* - "I'd like to know how to pause the recording"
|
|
318
|
+
* - "do you know how to start recording?"
|
|
319
|
+
*
|
|
320
|
+
* Critical: these must NOT match polite imperatives like "can you stop
|
|
321
|
+
* recording?" — the key distinction is the intermediary informational
|
|
322
|
+
* verb/phrase (tell/explain/describe/show + how, "is there a way", etc.).
|
|
323
|
+
*/
|
|
324
|
+
const INDIRECT_INFORMATIONAL_PATTERNS: RegExp[] = [
|
|
325
|
+
// "tell me how...", "can you explain how...", "show me how..."
|
|
326
|
+
/^\s*(can\s+you\s+|could\s+you\s+|would\s+you\s+)?(tell|explain|describe|show)\s+(me\s+)?how\b/i,
|
|
327
|
+
// "is there a way to...", "are there any ways to..."
|
|
328
|
+
/^\s*(is\s+there|are\s+there)\s+(a\s+|any\s+)?(ways?|methods?|options?|means)\s+to\b/i,
|
|
329
|
+
// "I'd like to know...", "I want to know..."
|
|
330
|
+
/^\s*(i('d|\s+would)\s+like\s+to\s+know|i\s+want\s+to\s+know)\b/i,
|
|
331
|
+
// "do you know how to...", "can I learn how to..."
|
|
332
|
+
/^\s*(do\s+you\s+know\s+how|can\s+i\s+learn(\s+how)?)\s+to\b/i,
|
|
333
|
+
// Bare informational verbs at start: "explain how...", "tell me how..."
|
|
334
|
+
/^\s*(explain|describe)\s+(to\s+me\s+)?how\b/i,
|
|
335
|
+
// "tell me about..." (informational, not imperative)
|
|
336
|
+
/^\s*(tell|explain|describe|show)\s+(me\s+)?(about\s+)?(how|what|why|when|where)\b/i,
|
|
337
|
+
];
|
|
338
|
+
|
|
178
339
|
/**
|
|
179
340
|
* Returns true if the text appears to be a question about recording rather
|
|
180
341
|
* than an imperative command that includes recording.
|
|
181
342
|
*
|
|
182
343
|
* "how do I stop recording?" → true (question — don't trigger side effects)
|
|
344
|
+
* "can you tell me how to stop recording?" → true (informational — don't trigger)
|
|
345
|
+
* "explain how to stop the recording" → true (informational — don't trigger)
|
|
346
|
+
* "is there a way to stop recording?" → true (capability question — don't trigger)
|
|
183
347
|
* "open Chrome and record my screen" → false (command — trigger recording)
|
|
184
348
|
* "can you record my screen?" → false (polite imperative — trigger recording)
|
|
349
|
+
* "can you stop recording?" → false (polite imperative — trigger stop)
|
|
185
350
|
*/
|
|
186
|
-
|
|
351
|
+
function isInterrogative(text: string, dynamicNames?: string[]): boolean {
|
|
187
352
|
let cleaned = text;
|
|
188
353
|
if (dynamicNames && dynamicNames.length > 0) {
|
|
189
354
|
cleaned = stripDynamicNames(cleaned, dynamicNames);
|
|
190
355
|
}
|
|
191
356
|
// Strip polite prefixes that don't change interrogative status
|
|
192
357
|
cleaned = cleaned.replace(/^\s*(hey|hi|hello|please|pls|plz)[,\s]+/i, '');
|
|
193
|
-
|
|
358
|
+
|
|
359
|
+
// Direct WH-questions (how/what/why/when/where/who/which)
|
|
360
|
+
if (WH_INTERROGATIVE.test(cleaned)) {
|
|
361
|
+
return true;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Indirect informational patterns — checked on cleaned text after
|
|
365
|
+
// stripping polite prefixes, so "please tell me how..." is caught
|
|
366
|
+
if (INDIRECT_INFORMATIONAL_PATTERNS.some((p) => p.test(cleaned))) {
|
|
367
|
+
return true;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return false;
|
|
194
371
|
}
|
|
195
372
|
|
|
196
373
|
// ─── Unified classification ─────────────────────────────────────────────────
|
|
@@ -206,7 +383,7 @@ export function isInterrogative(text: string, dynamicNames?: string[]): boolean
|
|
|
206
383
|
* If `dynamicNames` are provided, they are stripped from the beginning of the
|
|
207
384
|
* text before classification (e.g., "Nova, record my screen" -> "record my screen").
|
|
208
385
|
*/
|
|
209
|
-
|
|
386
|
+
function _classifyRecordingIntent(
|
|
210
387
|
taskText: string,
|
|
211
388
|
dynamicNames?: string[],
|
|
212
389
|
): RecordingIntentClass {
|
|
@@ -231,3 +408,117 @@ export function classifyRecordingIntent(
|
|
|
231
408
|
|
|
232
409
|
return 'none';
|
|
233
410
|
}
|
|
411
|
+
|
|
412
|
+
// ─── Structured intent resolver ─────────────────────────────────────────────
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Resolves recording intent from user text into a structured result that
|
|
416
|
+
* distinguishes pure recording commands from commands with remaining task text.
|
|
417
|
+
*
|
|
418
|
+
* Pipeline:
|
|
419
|
+
* 1. Strip dynamic assistant names (leading vocative)
|
|
420
|
+
* 2. Strip leading polite wrappers
|
|
421
|
+
* 3. Interrogative gate — questions return `none`
|
|
422
|
+
* 3.5. Restart compound detection (before independent start/stop)
|
|
423
|
+
* 3.6. Pause/resume detection
|
|
424
|
+
* 4. Detect start/stop patterns (start takes precedence when both present)
|
|
425
|
+
* 5. Determine if recording-only or has a remainder, stripping from the
|
|
426
|
+
* ORIGINAL text to preserve the user's exact phrasing
|
|
427
|
+
*/
|
|
428
|
+
export function resolveRecordingIntent(
|
|
429
|
+
text: string,
|
|
430
|
+
dynamicNames?: string[],
|
|
431
|
+
): RecordingIntentResult {
|
|
432
|
+
// Step 1: Strip dynamic assistant names for normalization
|
|
433
|
+
let normalized =
|
|
434
|
+
dynamicNames && dynamicNames.length > 0
|
|
435
|
+
? stripDynamicNames(text, dynamicNames)
|
|
436
|
+
: text;
|
|
437
|
+
|
|
438
|
+
// Step 2: Strip leading polite wrappers for normalization
|
|
439
|
+
normalized = normalized.replace(/^\s*(hey|hi|hello|please|pls|plz)[,\s]+/i, '');
|
|
440
|
+
|
|
441
|
+
// Step 3: Interrogative gate — questions (WH-words and indirect
|
|
442
|
+
// informational patterns) are not commands
|
|
443
|
+
if (isInterrogative(normalized, dynamicNames)) {
|
|
444
|
+
return { kind: 'none' };
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Step 3.5: Restart compound detection — check BEFORE independent start/stop
|
|
448
|
+
// so "stop recording and start a new one" is recognized as restart, not
|
|
449
|
+
// as separate stop + start patterns.
|
|
450
|
+
if (detectRestartRecordingIntent(normalized)) {
|
|
451
|
+
if (isRestartRecordingOnly(normalized)) {
|
|
452
|
+
return { kind: 'restart_only' };
|
|
453
|
+
}
|
|
454
|
+
// Strip from the ORIGINAL text to preserve user's exact phrasing
|
|
455
|
+
const remainder = stripRestartRecordingIntent(text);
|
|
456
|
+
if (hasSubstantiveContent(remainder, dynamicNames)) {
|
|
457
|
+
return { kind: 'restart_with_remainder', remainder };
|
|
458
|
+
}
|
|
459
|
+
return { kind: 'restart_only' };
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Step 3.6: Pause/resume detection — check before start/stop
|
|
463
|
+
if (detectPauseRecordingIntent(normalized)) {
|
|
464
|
+
if (isPauseRecordingOnly(normalized)) {
|
|
465
|
+
return { kind: 'pause_only' };
|
|
466
|
+
}
|
|
467
|
+
// Pause with additional text falls through to normal processing
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (detectResumeRecordingIntent(normalized)) {
|
|
471
|
+
if (isResumeRecordingOnly(normalized)) {
|
|
472
|
+
return { kind: 'resume_only' };
|
|
473
|
+
}
|
|
474
|
+
// Resume with additional text falls through to normal processing
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Step 4: Detect start and stop patterns on the normalized text
|
|
478
|
+
const hasStart = detectRecordingIntent(normalized);
|
|
479
|
+
const hasStop = detectStopRecordingIntent(normalized);
|
|
480
|
+
|
|
481
|
+
// Step 5: Resolve
|
|
482
|
+
if (hasStart) {
|
|
483
|
+
if (hasStop) {
|
|
484
|
+
// Both start and stop detected — use combined variants
|
|
485
|
+
if (isRecordingOnly(normalized)) {
|
|
486
|
+
// Check if stop-only after stripping start patterns
|
|
487
|
+
const withoutStart = stripRecordingIntent(normalized);
|
|
488
|
+
if (isStopRecordingOnly(withoutStart)) {
|
|
489
|
+
return { kind: 'start_and_stop_only' };
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
let remainder = stripRecordingIntent(text);
|
|
493
|
+
remainder = stripStopRecordingIntent(remainder);
|
|
494
|
+
if (hasSubstantiveContent(remainder, dynamicNames)) {
|
|
495
|
+
return { kind: 'start_and_stop_with_remainder', remainder };
|
|
496
|
+
}
|
|
497
|
+
return { kind: 'start_and_stop_only' };
|
|
498
|
+
}
|
|
499
|
+
// Only start detected
|
|
500
|
+
if (isRecordingOnly(normalized)) {
|
|
501
|
+
return { kind: 'start_only' };
|
|
502
|
+
}
|
|
503
|
+
// Strip from the ORIGINAL text to preserve user's exact phrasing
|
|
504
|
+
const remainder = stripRecordingIntent(text);
|
|
505
|
+
if (hasSubstantiveContent(remainder, dynamicNames)) {
|
|
506
|
+
return { kind: 'start_with_remainder', remainder };
|
|
507
|
+
}
|
|
508
|
+
return { kind: 'start_only' };
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
if (hasStop) {
|
|
512
|
+
if (isStopRecordingOnly(normalized)) {
|
|
513
|
+
return { kind: 'stop_only' };
|
|
514
|
+
}
|
|
515
|
+
// Strip from the ORIGINAL text to preserve user's exact phrasing
|
|
516
|
+
const remainder = stripStopRecordingIntent(text);
|
|
517
|
+
if (hasSubstantiveContent(remainder, dynamicNames)) {
|
|
518
|
+
return { kind: 'stop_with_remainder', remainder };
|
|
519
|
+
}
|
|
520
|
+
return { kind: 'stop_only' };
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
return { kind: 'none' };
|
|
524
|
+
}
|
|
@@ -25,6 +25,7 @@ import { requestComputerControlTool } from '../tools/computer-use/request-comput
|
|
|
25
25
|
import type { ProxyApprovalCallback, ProxyApprovalRequest } from '../tools/network/script-proxy/index.js';
|
|
26
26
|
import { getAllToolDefinitions } from '../tools/registry.js';
|
|
27
27
|
import { allUiSurfaceTools } from '../tools/ui-surface/definitions.js';
|
|
28
|
+
import type { GuardianRuntimeContext } from './session-runtime-assembly.js';
|
|
28
29
|
import { projectSkillTools, type SkillProjectionCache } from './session-skill-tools.js';
|
|
29
30
|
import type { SurfaceSessionContext } from './session-surfaces.js';
|
|
30
31
|
import {
|
|
@@ -55,6 +56,8 @@ export interface ToolSetupContext extends SurfaceSessionContext {
|
|
|
55
56
|
headlessLock?: boolean;
|
|
56
57
|
/** When set, this session is executing a task run. Used to retrieve ephemeral permission rules. */
|
|
57
58
|
taskRunId?: string;
|
|
59
|
+
/** Guardian runtime context for the session — actorRole is propagated into ToolContext for control-plane policy enforcement. */
|
|
60
|
+
guardianContext?: GuardianRuntimeContext;
|
|
58
61
|
}
|
|
59
62
|
|
|
60
63
|
// ── buildToolDefinitions ─────────────────────────────────────────────
|
|
@@ -105,6 +108,7 @@ export function createToolExecutor(
|
|
|
105
108
|
assistantId: ctx.assistantId,
|
|
106
109
|
requestId: ctx.currentRequestId,
|
|
107
110
|
taskRunId: ctx.taskRunId,
|
|
111
|
+
guardianActorRole: ctx.guardianContext?.actorRole,
|
|
108
112
|
onOutput,
|
|
109
113
|
signal: ctx.abortController?.signal,
|
|
110
114
|
sandboxOverride: ctx.sandboxOverride,
|
|
@@ -13,7 +13,7 @@ import { getLogger } from '../util/logger.js';
|
|
|
13
13
|
import { getDb } from './db.js';
|
|
14
14
|
import { conversationAssistantAttentionState, conversationAttentionEvents, conversations, messages } from './schema.js';
|
|
15
15
|
|
|
16
|
-
const
|
|
16
|
+
const _log = getLogger('conversation-attention-store');
|
|
17
17
|
|
|
18
18
|
// ── Types ────────────────────────────────────────────────────────────
|
|
19
19
|
|
|
@@ -138,7 +138,7 @@ export function projectAssistantMessage(params: {
|
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
// Monotonic: only advance if the new message is strictly later
|
|
141
|
-
if (existing.latestAssistantMessageAt
|
|
141
|
+
if (existing.latestAssistantMessageAt != null && messageAt <= existing.latestAssistantMessageAt) {
|
|
142
142
|
return;
|
|
143
143
|
}
|
|
144
144
|
|
|
@@ -251,8 +251,8 @@ export function recordConversationSeenSignal(params: {
|
|
|
251
251
|
// Only advance the seen cursor if there is a latest assistant message to mark as seen,
|
|
252
252
|
// and the seen cursor hasn't already reached or passed it (monotonic invariant).
|
|
253
253
|
const shouldAdvanceSeen =
|
|
254
|
-
state.latestAssistantMessageAt
|
|
255
|
-
(state.lastSeenAssistantMessageAt
|
|
254
|
+
state.latestAssistantMessageAt != null &&
|
|
255
|
+
(state.lastSeenAssistantMessageAt == null ||
|
|
256
256
|
state.latestAssistantMessageAt > state.lastSeenAssistantMessageAt);
|
|
257
257
|
|
|
258
258
|
// Guard seen metadata monotonicity: only update lastSeen* metadata when the
|
|
@@ -260,7 +260,7 @@ export function recordConversationSeenSignal(params: {
|
|
|
260
260
|
// Out-of-order delivery (e.g. delayed channel callbacks) must not regress
|
|
261
261
|
// the projected channel/source/confidence metadata.
|
|
262
262
|
const isNewerSignal =
|
|
263
|
-
state.lastSeenEventAt
|
|
263
|
+
state.lastSeenEventAt == null || eventObservedAt >= state.lastSeenEventAt;
|
|
264
264
|
|
|
265
265
|
const updates: Record<string, unknown> = {
|
|
266
266
|
updatedAt: now,
|