@vellumai/assistant 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/ARCHITECTURE.md +17 -3
  2. package/Dockerfile +1 -1
  3. package/README.md +2 -0
  4. package/docs/architecture/scheduling.md +81 -0
  5. package/package.json +1 -1
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +22 -0
  7. package/src/__tests__/channel-policy.test.ts +19 -0
  8. package/src/__tests__/guardian-control-plane-policy.test.ts +582 -0
  9. package/src/__tests__/guardian-outbound-http.test.ts +8 -8
  10. package/src/__tests__/intent-routing.test.ts +22 -0
  11. package/src/__tests__/ipc-snapshot.test.ts +10 -0
  12. package/src/__tests__/notification-routing-intent.test.ts +185 -0
  13. package/src/__tests__/recording-handler.test.ts +191 -31
  14. package/src/__tests__/recording-intent-fallback.test.ts +180 -0
  15. package/src/__tests__/recording-intent-handler.test.ts +597 -74
  16. package/src/__tests__/recording-intent.test.ts +738 -342
  17. package/src/__tests__/recording-state-machine.test.ts +1109 -0
  18. package/src/__tests__/reminder-store.test.ts +20 -18
  19. package/src/__tests__/reminder.test.ts +2 -1
  20. package/src/channels/config.ts +1 -1
  21. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -11
  22. package/src/config/bundled-skills/screen-recording/SKILL.md +91 -12
  23. package/src/config/system-prompt.ts +5 -0
  24. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  25. package/src/daemon/handlers/config-channels.ts +6 -6
  26. package/src/daemon/handlers/index.ts +1 -1
  27. package/src/daemon/handlers/misc.ts +258 -102
  28. package/src/daemon/handlers/recording.ts +417 -5
  29. package/src/daemon/handlers/sessions.ts +142 -68
  30. package/src/daemon/ipc-contract/computer-use.ts +23 -3
  31. package/src/daemon/ipc-contract/messages.ts +3 -1
  32. package/src/daemon/ipc-contract/shared.ts +6 -0
  33. package/src/daemon/ipc-contract-inventory.json +2 -0
  34. package/src/daemon/lifecycle.ts +2 -0
  35. package/src/daemon/recording-executor.ts +180 -0
  36. package/src/daemon/recording-intent-fallback.ts +132 -0
  37. package/src/daemon/recording-intent.ts +306 -15
  38. package/src/daemon/session-tool-setup.ts +4 -0
  39. package/src/memory/conversation-attention-store.ts +5 -5
  40. package/src/notifications/README.md +69 -1
  41. package/src/notifications/adapters/sms.ts +80 -0
  42. package/src/notifications/broadcaster.ts +1 -0
  43. package/src/notifications/copy-composer.ts +3 -3
  44. package/src/notifications/decision-engine.ts +70 -1
  45. package/src/notifications/decisions-store.ts +24 -0
  46. package/src/notifications/destination-resolver.ts +2 -1
  47. package/src/notifications/emit-signal.ts +35 -3
  48. package/src/notifications/signal.ts +6 -0
  49. package/src/notifications/types.ts +3 -0
  50. package/src/runtime/guardian-outbound-actions.ts +9 -9
  51. package/src/runtime/http-server.ts +7 -7
  52. package/src/runtime/routes/conversation-attention-routes.ts +3 -3
  53. package/src/runtime/routes/integration-routes.ts +5 -5
  54. package/src/schedule/scheduler.ts +15 -3
  55. package/src/tools/executor.ts +29 -0
  56. package/src/tools/guardian-control-plane-policy.ts +141 -0
  57. package/src/tools/types.ts +2 -0
@@ -0,0 +1,132 @@
1
+ // LLM-based fallback classifier for recording intent detection.
2
+ // Fires only when the deterministic resolver returns `none` but the text
3
+ // contains recording-related keywords that suggest an intent the regex missed.
4
+ // Safety: returns `{ action: 'none', confidence: 'low' }` on any failure —
5
+ // never triggers a recording action on error.
6
+
7
+ import { createTimeout, extractText, getConfiguredProvider, userMessage } from '../providers/provider-send-message.js';
8
+ import { getLogger } from '../util/logger.js';
9
+
10
+ const log = getLogger('recording-intent-fallback');
11
+
12
+ const FALLBACK_TIMEOUT_MS = 5000;
13
+
14
+ export type RecordingFallbackAction = 'start' | 'stop' | 'restart' | 'pause' | 'resume' | 'none';
15
+
16
+ export interface RecordingFallbackResult {
17
+ action: RecordingFallbackAction;
18
+ confidence: 'high' | 'medium' | 'low';
19
+ }
20
+
21
+ const SAFE_DEFAULT: RecordingFallbackResult = { action: 'none', confidence: 'low' };
22
+
23
+ /** Keywords that gate whether we spend an LLM call on fallback classification. */
24
+ const RECORDING_KEYWORDS = [
25
+ 'record',
26
+ 'recording',
27
+ 'screen capture',
28
+ 'screencast',
29
+ 'capture screen',
30
+ 'capture my screen',
31
+ 'screen rec',
32
+ ];
33
+
34
+ const SYSTEM_PROMPT =
35
+ 'You are classifying user messages for a screen recording assistant. ' +
36
+ 'Determine if the user wants to: start a recording, stop a recording, restart a recording, ' +
37
+ 'pause a recording, resume a recording, or none of these. ' +
38
+ 'Only classify as an action if the user is giving an imperative command. ' +
39
+ 'Questions about recording (e.g., "how do I record?", "what does recording do?") should be classified as "none". ' +
40
+ 'Respond with a JSON object: {"action": "start|stop|restart|pause|resume|none", "confidence": "high|medium|low"}';
41
+
42
+ const VALID_ACTIONS = new Set<RecordingFallbackAction>(['start', 'stop', 'restart', 'pause', 'resume', 'none']);
43
+ const VALID_CONFIDENCES = new Set<string>(['high', 'medium', 'low']);
44
+
45
+ /**
46
+ * Returns true if the text contains any recording-related keywords,
47
+ * indicating it is worth spending an LLM call on fallback classification.
48
+ */
49
+ export function containsRecordingKeywords(text: string): boolean {
50
+ const lower = text.toLowerCase();
51
+ return RECORDING_KEYWORDS.some((kw) => lower.includes(kw));
52
+ }
53
+
54
+ /**
55
+ * Uses a lightweight LLM call to classify whether text contains a recording intent
56
+ * that the deterministic resolver missed.
57
+ *
58
+ * Returns `{ action: 'none', confidence: 'high' }` for informational questions.
59
+ * Only returns an actionable result with 'high' confidence for clear imperative commands.
60
+ */
61
+ export async function classifyRecordingIntentFallback(
62
+ text: string,
63
+ ): Promise<RecordingFallbackResult> {
64
+ const provider = getConfiguredProvider();
65
+ if (!provider) {
66
+ log.debug('No configured provider available for fallback classification');
67
+ return SAFE_DEFAULT;
68
+ }
69
+
70
+ try {
71
+ const { signal, cleanup } = createTimeout(FALLBACK_TIMEOUT_MS);
72
+ try {
73
+ const response = await provider.sendMessage(
74
+ [userMessage(text)],
75
+ [], // no tools
76
+ SYSTEM_PROMPT,
77
+ {
78
+ config: {
79
+ modelIntent: 'latency-optimized',
80
+ max_tokens: 64,
81
+ },
82
+ signal,
83
+ },
84
+ );
85
+ cleanup();
86
+
87
+ const raw = extractText(response);
88
+ return parseClassificationResponse(raw);
89
+ } finally {
90
+ cleanup();
91
+ }
92
+ } catch (err) {
93
+ const message = err instanceof Error ? err.message : String(err);
94
+ log.warn({ err: message }, 'LLM fallback classification failed');
95
+ return SAFE_DEFAULT;
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Parse the LLM's JSON response into a validated RecordingFallbackResult.
101
+ * Returns safe default on any parse/validation failure.
102
+ */
103
+ function parseClassificationResponse(raw: string): RecordingFallbackResult {
104
+ try {
105
+ // Extract JSON from the response — the LLM may include surrounding text
106
+ const jsonMatch = raw.match(/\{[^}]*\}/);
107
+ if (!jsonMatch) {
108
+ log.debug({ raw }, 'No JSON object found in LLM fallback response');
109
+ return SAFE_DEFAULT;
110
+ }
111
+
112
+ const parsed = JSON.parse(jsonMatch[0]) as { action?: string; confidence?: string };
113
+
114
+ const action = parsed.action as RecordingFallbackAction | undefined;
115
+ const confidence = parsed.confidence;
116
+
117
+ if (!action || !VALID_ACTIONS.has(action)) {
118
+ log.debug({ raw, action }, 'Invalid action in LLM fallback response');
119
+ return SAFE_DEFAULT;
120
+ }
121
+
122
+ if (!confidence || !VALID_CONFIDENCES.has(confidence)) {
123
+ log.debug({ raw, confidence }, 'Invalid confidence in LLM fallback response');
124
+ return SAFE_DEFAULT;
125
+ }
126
+
127
+ return { action, confidence: confidence as RecordingFallbackResult['confidence'] };
128
+ } catch (err) {
129
+ log.debug({ err, raw }, 'Failed to parse LLM fallback response as JSON');
130
+ return SAFE_DEFAULT;
131
+ }
132
+ }
@@ -1,8 +1,26 @@
1
- // Recording intent detection for standalone screen recording routing.
2
- // Used by task/message handlers to intercept recording-related prompts
1
+ // Recording intent resolution for standalone screen recording routing.
2
+ // Exports `resolveRecordingIntent` as the single public entry point for
3
+ // text-based intent detection. Handlers use this (or structured
4
+ // `commandIntent` payloads) to intercept recording-related prompts
3
5
  // before they reach the classifier or create a CU session.
4
-
5
- export type RecordingIntentClass = 'start_only' | 'stop_only' | 'mixed' | 'none';
6
+ //
7
+ // Internal helpers (detect/strip/classify) are kept as private utilities
8
+ // consumed only by `resolveRecordingIntent`.
9
+
10
+ type RecordingIntentClass = 'start_only' | 'stop_only' | 'mixed' | 'none';
11
+
12
+ export type RecordingIntentResult =
13
+ | { kind: 'none' }
14
+ | { kind: 'start_only' }
15
+ | { kind: 'stop_only' }
16
+ | { kind: 'start_with_remainder'; remainder: string }
17
+ | { kind: 'stop_with_remainder'; remainder: string }
18
+ | { kind: 'start_and_stop_only' }
19
+ | { kind: 'start_and_stop_with_remainder'; remainder: string }
20
+ | { kind: 'restart_only' }
21
+ | { kind: 'restart_with_remainder'; remainder: string }
22
+ | { kind: 'pause_only' }
23
+ | { kind: 'resume_only' };
6
24
 
7
25
  // ─── Start recording patterns ────────────────────────────────────────────────
8
26
 
@@ -25,6 +43,29 @@ const STOP_RECORDING_PATTERNS: RegExp[] = [
25
43
  /\bhalt\s+(the\s+)?recording\b/i,
26
44
  ];
27
45
 
46
+ // ─── Restart recording patterns (compound: stop + start a new one) ──────────
47
+
48
+ const RESTART_RECORDING_PATTERNS: RegExp[] = [
49
+ /\brestart\s+(the\s+)?recording\b/i,
50
+ /\bredo\s+(the\s+)?recording\b/i,
51
+ /\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)\s+(recording|one)\b/i,
52
+ /\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)[.!?\s]*$/i,
53
+ /\bstop\s+and\s+restart\s+(the\s+)?recording\b/i,
54
+ /\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)\s+(recording|one)\b/i,
55
+ /\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)[.!?\s]*$/i,
56
+ ];
57
+
58
+ // ─── Pause/resume recording patterns ────────────────────────────────────────
59
+
60
+ const PAUSE_RECORDING_PATTERNS: RegExp[] = [
61
+ /\bpause\s+(the\s+)?recording\b/i,
62
+ ];
63
+
64
+ const RESUME_RECORDING_PATTERNS: RegExp[] = [
65
+ /\bresume\s+(the\s+)?recording\b/i,
66
+ /\bunpause\s+(the\s+)?recording\b/i,
67
+ ];
68
+
28
69
  // ─── Stop-recording clause removal for mixed-intent prompts ─────────────────
29
70
 
30
71
  const STOP_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
@@ -47,17 +88,41 @@ const RECORDING_CLAUSE_PATTERNS: RegExp[] = [
47
88
  /\brecord\s+(my\s+|the\s+)?screen\s+while\b/i,
48
89
  ];
49
90
 
91
+ // ─── Restart clause removal ─────────────────────────────────────────────────
92
+
93
+ const RESTART_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
94
+ // Longer compound patterns first — avoids partial matches by shorter patterns
95
+ /\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)\s+(recording|one)\b/i,
96
+ /\bstop\s+(the\s+)?recording\s+and\s+(start|begin)\s+(a\s+)?(new|fresh|another)[.!?\s]*$/i,
97
+ /\bstop\s+and\s+restart\s+(the\s+)?recording\b/i,
98
+ /\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)\s+(recording|one)\b/i,
99
+ /\bstop\s+recording\s+and\s+start\s+(a\s+)?(new|another|fresh)[.!?\s]*$/i,
100
+ /\b(and\s+)?(also\s+)?restart\s+(the\s+)?recording\b/i,
101
+ /\b(and\s+)?(also\s+)?redo\s+(the\s+)?recording\b/i,
102
+ ];
103
+
104
+ // ─── Pause/resume clause removal ────────────────────────────────────────────
105
+
106
+ const PAUSE_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
107
+ /\b(and\s+)?(also\s+)?pause\s+(the\s+)?recording\b/i,
108
+ ];
109
+
110
+ const RESUME_RECORDING_CLAUSE_PATTERNS: RegExp[] = [
111
+ /\b(and\s+)?(also\s+)?resume\s+(the\s+)?recording\b/i,
112
+ /\b(and\s+)?(also\s+)?unpause\s+(the\s+)?recording\b/i,
113
+ ];
114
+
50
115
  /** Common polite/filler words stripped before checking intent-only status. */
51
116
  const FILLER_PATTERN =
52
117
  /\b(please|pls|plz|can\s+you|could\s+you|would\s+you|now|right\s+now|thanks|thank\s+you|thx|ty|for\s+me|ok(ay)?|hey|hi|just)\b/gi;
53
118
 
54
- // ─── Public API ──────────────────────────────────────────────────────────────
119
+ // ─── Internal helpers ────────────────────────────────────────────────────────
55
120
 
56
121
  /**
57
122
  * Returns true if the user's message includes any recording-related phrases.
58
123
  * Does not distinguish between recording-only and mixed-intent prompts.
59
124
  */
60
- export function detectRecordingIntent(taskText: string): boolean {
125
+ function detectRecordingIntent(taskText: string): boolean {
61
126
  return START_RECORDING_PATTERNS.some((p) => p.test(taskText));
62
127
  }
63
128
 
@@ -67,7 +132,7 @@ export function detectRecordingIntent(taskText: string): boolean {
67
132
  * "record my screen while I work" -> false (has CU task component)
68
133
  * "open Chrome and record my screen" -> false (has CU task component)
69
134
  */
70
- export function isRecordingOnly(taskText: string): boolean {
135
+ function isRecordingOnly(taskText: string): boolean {
71
136
  if (!detectRecordingIntent(taskText)) return false;
72
137
 
73
138
  // Strip the recording clause and check if anything substantive remains
@@ -84,16 +149,31 @@ export function isRecordingOnly(taskText: string): boolean {
84
149
  * Requires explicit "stop/end/finish/halt recording" phrasing --
85
150
  * bare "stop", "end it", or "quit" are too ambiguous and will not match.
86
151
  */
87
- export function detectStopRecordingIntent(taskText: string): boolean {
152
+ function detectStopRecordingIntent(taskText: string): boolean {
88
153
  return STOP_RECORDING_PATTERNS.some((p) => p.test(taskText));
89
154
  }
90
155
 
156
+ /** Returns true if any restart compound pattern matches. */
157
+ function detectRestartRecordingIntent(taskText: string): boolean {
158
+ return RESTART_RECORDING_PATTERNS.some((p) => p.test(taskText));
159
+ }
160
+
161
+ /** Returns true if any pause pattern matches. */
162
+ function detectPauseRecordingIntent(taskText: string): boolean {
163
+ return PAUSE_RECORDING_PATTERNS.some((p) => p.test(taskText));
164
+ }
165
+
166
+ /** Returns true if any resume pattern matches. */
167
+ function detectResumeRecordingIntent(taskText: string): boolean {
168
+ return RESUME_RECORDING_PATTERNS.some((p) => p.test(taskText));
169
+ }
170
+
91
171
  /**
92
172
  * Removes recording-related clauses from a task, returning the cleaned text.
93
173
  * Used when a recording intent is embedded in a broader CU task so the
94
174
  * recording portion can be handled separately while the task continues.
95
175
  */
96
- export function stripRecordingIntent(taskText: string): string {
176
+ function stripRecordingIntent(taskText: string): string {
97
177
  let result = taskText;
98
178
  for (const pattern of RECORDING_CLAUSE_PATTERNS) {
99
179
  result = result.replace(pattern, '');
@@ -106,7 +186,7 @@ export function stripRecordingIntent(taskText: string): string {
106
186
  * Removes stop-recording clauses from a message, returning the cleaned text.
107
187
  * Analogous to stripRecordingIntent but for stop-recording phrases.
108
188
  */
109
- export function stripStopRecordingIntent(taskText: string): string {
189
+ function stripStopRecordingIntent(taskText: string): string {
110
190
  let result = taskText;
111
191
  for (const pattern of STOP_RECORDING_CLAUSE_PATTERNS) {
112
192
  result = result.replace(pattern, '');
@@ -114,6 +194,33 @@ export function stripStopRecordingIntent(taskText: string): string {
114
194
  return result.replace(/\s{2,}/g, ' ').trim();
115
195
  }
116
196
 
197
+ /** Removes restart-recording clauses from text. */
198
+ function stripRestartRecordingIntent(taskText: string): string {
199
+ let result = taskText;
200
+ for (const pattern of RESTART_RECORDING_CLAUSE_PATTERNS) {
201
+ result = result.replace(pattern, '');
202
+ }
203
+ return result.replace(/\s{2,}/g, ' ').trim();
204
+ }
205
+
206
+ /** Removes pause-recording clauses from text. */
207
+ function stripPauseRecordingIntent(taskText: string): string {
208
+ let result = taskText;
209
+ for (const pattern of PAUSE_RECORDING_CLAUSE_PATTERNS) {
210
+ result = result.replace(pattern, '');
211
+ }
212
+ return result.replace(/\s{2,}/g, ' ').trim();
213
+ }
214
+
215
+ /** Removes resume-recording clauses from text. */
216
+ function stripResumeRecordingIntent(taskText: string): string {
217
+ let result = taskText;
218
+ for (const pattern of RESUME_RECORDING_CLAUSE_PATTERNS) {
219
+ result = result.replace(pattern, '');
220
+ }
221
+ return result.replace(/\s{2,}/g, ' ').trim();
222
+ }
223
+
117
224
  /**
118
225
  * Returns true if the prompt is purely about stopping recording with no
119
226
  * additional task. Analogous to isRecordingOnly but for stop-recording.
@@ -121,7 +228,7 @@ export function stripStopRecordingIntent(taskText: string): string {
121
228
  * "how do I stop recording?" -> false (has additional context)
122
229
  * "stop recording and close the browser" -> false (has CU task component)
123
230
  */
124
- export function isStopRecordingOnly(taskText: string): boolean {
231
+ function isStopRecordingOnly(taskText: string): boolean {
125
232
  if (!detectStopRecordingIntent(taskText)) return false;
126
233
 
127
234
  const stripped = stripStopRecordingIntent(taskText);
@@ -130,6 +237,30 @@ export function isStopRecordingOnly(taskText: string): boolean {
130
237
  return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
131
238
  }
132
239
 
240
+ /** Returns true if the text is purely a restart command (no additional task). */
241
+ function isRestartRecordingOnly(taskText: string): boolean {
242
+ if (!detectRestartRecordingIntent(taskText)) return false;
243
+ const stripped = stripRestartRecordingIntent(taskText);
244
+ const withoutFillers = stripped.replace(FILLER_PATTERN, '');
245
+ return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
246
+ }
247
+
248
+ /** Returns true if the text is purely a pause command (no additional task). */
249
+ function isPauseRecordingOnly(taskText: string): boolean {
250
+ if (!detectPauseRecordingIntent(taskText)) return false;
251
+ const stripped = stripPauseRecordingIntent(taskText);
252
+ const withoutFillers = stripped.replace(FILLER_PATTERN, '');
253
+ return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
254
+ }
255
+
256
+ /** Returns true if the text is purely a resume command (no additional task). */
257
+ function isResumeRecordingOnly(taskText: string): boolean {
258
+ if (!detectResumeRecordingIntent(taskText)) return false;
259
+ const stripped = stripResumeRecordingIntent(taskText);
260
+ const withoutFillers = stripped.replace(FILLER_PATTERN, '');
261
+ return withoutFillers.replace(/[.,;!?\s]+/g, '').length === 0;
262
+ }
263
+
133
264
  // ─── Dynamic name normalization ─────────────────────────────────────────────
134
265
 
135
266
  /**
@@ -159,7 +290,7 @@ export function stripDynamicNames(text: string, dynamicNames: string[]): string
159
290
  * punctuation, and dynamic assistant names. Used to determine whether
160
291
  * remaining text after stripping recording clauses needs further processing.
161
292
  */
162
- export function hasSubstantiveContent(text: string, dynamicNames?: string[]): boolean {
293
+ function hasSubstantiveContent(text: string, dynamicNames?: string[]): boolean {
163
294
  let cleaned = text;
164
295
  if (dynamicNames && dynamicNames.length > 0) {
165
296
  cleaned = stripDynamicNames(cleaned, dynamicNames);
@@ -175,22 +306,68 @@ export function hasSubstantiveContent(text: string, dynamicNames?: string[]): bo
175
306
  * triggering recording side effects in the mixed handler. */
176
307
  const WH_INTERROGATIVE = /^\s*(how|what|why|when|where|who|which)\b/i;
177
308
 
309
+ /**
310
+ * Indirect informational patterns that indicate the user is asking *about*
311
+ * recording rather than commanding a recording action. These catch prompts
312
+ * where the WH-word is buried after polite filler or an informational verb:
313
+ *
314
+ * - "can you tell me how to stop recording?"
315
+ * - "explain how to stop the recording"
316
+ * - "is there a way to stop recording?"
317
+ * - "I'd like to know how to pause the recording"
318
+ * - "do you know how to start recording?"
319
+ *
320
+ * Critical: these must NOT match polite imperatives like "can you stop
321
+ * recording?" — the key distinction is the intermediary informational
322
+ * verb/phrase (tell/explain/describe/show + how, "is there a way", etc.).
323
+ */
324
+ const INDIRECT_INFORMATIONAL_PATTERNS: RegExp[] = [
325
+ // "tell me how...", "can you explain how...", "show me how..."
326
+ /^\s*(can\s+you\s+|could\s+you\s+|would\s+you\s+)?(tell|explain|describe|show)\s+(me\s+)?how\b/i,
327
+ // "is there a way to...", "are there any ways to..."
328
+ /^\s*(is\s+there|are\s+there)\s+(a\s+|any\s+)?(ways?|methods?|options?|means)\s+to\b/i,
329
+ // "I'd like to know...", "I want to know..."
330
+ /^\s*(i('d|\s+would)\s+like\s+to\s+know|i\s+want\s+to\s+know)\b/i,
331
+ // "do you know how to...", "can I learn how to..."
332
+ /^\s*(do\s+you\s+know\s+how|can\s+i\s+learn(\s+how)?)\s+to\b/i,
333
+ // Bare informational verbs at start: "explain how...", "tell me how..."
334
+ /^\s*(explain|describe)\s+(to\s+me\s+)?how\b/i,
335
+ // "tell me about..." (informational, not imperative)
336
+ /^\s*(tell|explain|describe|show)\s+(me\s+)?(about\s+)?(how|what|why|when|where)\b/i,
337
+ ];
338
+
178
339
  /**
179
340
  * Returns true if the text appears to be a question about recording rather
180
341
  * than an imperative command that includes recording.
181
342
  *
182
343
  * "how do I stop recording?" → true (question — don't trigger side effects)
344
+ * "can you tell me how to stop recording?" → true (informational — don't trigger)
345
+ * "explain how to stop the recording" → true (informational — don't trigger)
346
+ * "is there a way to stop recording?" → true (capability question — don't trigger)
183
347
  * "open Chrome and record my screen" → false (command — trigger recording)
184
348
  * "can you record my screen?" → false (polite imperative — trigger recording)
349
+ * "can you stop recording?" → false (polite imperative — trigger stop)
185
350
  */
186
- export function isInterrogative(text: string, dynamicNames?: string[]): boolean {
351
+ function isInterrogative(text: string, dynamicNames?: string[]): boolean {
187
352
  let cleaned = text;
188
353
  if (dynamicNames && dynamicNames.length > 0) {
189
354
  cleaned = stripDynamicNames(cleaned, dynamicNames);
190
355
  }
191
356
  // Strip polite prefixes that don't change interrogative status
192
357
  cleaned = cleaned.replace(/^\s*(hey|hi|hello|please|pls|plz)[,\s]+/i, '');
193
- return WH_INTERROGATIVE.test(cleaned);
358
+
359
+ // Direct WH-questions (how/what/why/when/where/who/which)
360
+ if (WH_INTERROGATIVE.test(cleaned)) {
361
+ return true;
362
+ }
363
+
364
+ // Indirect informational patterns — checked on cleaned text after
365
+ // stripping polite prefixes, so "please tell me how..." is caught
366
+ if (INDIRECT_INFORMATIONAL_PATTERNS.some((p) => p.test(cleaned))) {
367
+ return true;
368
+ }
369
+
370
+ return false;
194
371
  }
195
372
 
196
373
  // ─── Unified classification ─────────────────────────────────────────────────
@@ -206,7 +383,7 @@ export function isInterrogative(text: string, dynamicNames?: string[]): boolean
206
383
  * If `dynamicNames` are provided, they are stripped from the beginning of the
207
384
  * text before classification (e.g., "Nova, record my screen" -> "record my screen").
208
385
  */
209
- export function classifyRecordingIntent(
386
+ function _classifyRecordingIntent(
210
387
  taskText: string,
211
388
  dynamicNames?: string[],
212
389
  ): RecordingIntentClass {
@@ -231,3 +408,117 @@ export function classifyRecordingIntent(
231
408
 
232
409
  return 'none';
233
410
  }
411
+
412
+ // ─── Structured intent resolver ─────────────────────────────────────────────
413
+
414
+ /**
415
+ * Resolves recording intent from user text into a structured result that
416
+ * distinguishes pure recording commands from commands with remaining task text.
417
+ *
418
+ * Pipeline:
419
+ * 1. Strip dynamic assistant names (leading vocative)
420
+ * 2. Strip leading polite wrappers
421
+ * 3. Interrogative gate — questions return `none`
422
+ * 3.5. Restart compound detection (before independent start/stop)
423
+ * 3.6. Pause/resume detection
424
+ * 4. Detect start/stop patterns (start takes precedence when both present)
425
+ * 5. Determine if recording-only or has a remainder, stripping from the
426
+ * ORIGINAL text to preserve the user's exact phrasing
427
+ */
428
+ export function resolveRecordingIntent(
429
+ text: string,
430
+ dynamicNames?: string[],
431
+ ): RecordingIntentResult {
432
+ // Step 1: Strip dynamic assistant names for normalization
433
+ let normalized =
434
+ dynamicNames && dynamicNames.length > 0
435
+ ? stripDynamicNames(text, dynamicNames)
436
+ : text;
437
+
438
+ // Step 2: Strip leading polite wrappers for normalization
439
+ normalized = normalized.replace(/^\s*(hey|hi|hello|please|pls|plz)[,\s]+/i, '');
440
+
441
+ // Step 3: Interrogative gate — questions (WH-words and indirect
442
+ // informational patterns) are not commands
443
+ if (isInterrogative(normalized, dynamicNames)) {
444
+ return { kind: 'none' };
445
+ }
446
+
447
+ // Step 3.5: Restart compound detection — check BEFORE independent start/stop
448
+ // so "stop recording and start a new one" is recognized as restart, not
449
+ // as separate stop + start patterns.
450
+ if (detectRestartRecordingIntent(normalized)) {
451
+ if (isRestartRecordingOnly(normalized)) {
452
+ return { kind: 'restart_only' };
453
+ }
454
+ // Strip from the ORIGINAL text to preserve user's exact phrasing
455
+ const remainder = stripRestartRecordingIntent(text);
456
+ if (hasSubstantiveContent(remainder, dynamicNames)) {
457
+ return { kind: 'restart_with_remainder', remainder };
458
+ }
459
+ return { kind: 'restart_only' };
460
+ }
461
+
462
+ // Step 3.6: Pause/resume detection — check before start/stop
463
+ if (detectPauseRecordingIntent(normalized)) {
464
+ if (isPauseRecordingOnly(normalized)) {
465
+ return { kind: 'pause_only' };
466
+ }
467
+ // Pause with additional text falls through to normal processing
468
+ }
469
+
470
+ if (detectResumeRecordingIntent(normalized)) {
471
+ if (isResumeRecordingOnly(normalized)) {
472
+ return { kind: 'resume_only' };
473
+ }
474
+ // Resume with additional text falls through to normal processing
475
+ }
476
+
477
+ // Step 4: Detect start and stop patterns on the normalized text
478
+ const hasStart = detectRecordingIntent(normalized);
479
+ const hasStop = detectStopRecordingIntent(normalized);
480
+
481
+ // Step 5: Resolve
482
+ if (hasStart) {
483
+ if (hasStop) {
484
+ // Both start and stop detected — use combined variants
485
+ if (isRecordingOnly(normalized)) {
486
+ // Check if stop-only after stripping start patterns
487
+ const withoutStart = stripRecordingIntent(normalized);
488
+ if (isStopRecordingOnly(withoutStart)) {
489
+ return { kind: 'start_and_stop_only' };
490
+ }
491
+ }
492
+ let remainder = stripRecordingIntent(text);
493
+ remainder = stripStopRecordingIntent(remainder);
494
+ if (hasSubstantiveContent(remainder, dynamicNames)) {
495
+ return { kind: 'start_and_stop_with_remainder', remainder };
496
+ }
497
+ return { kind: 'start_and_stop_only' };
498
+ }
499
+ // Only start detected
500
+ if (isRecordingOnly(normalized)) {
501
+ return { kind: 'start_only' };
502
+ }
503
+ // Strip from the ORIGINAL text to preserve user's exact phrasing
504
+ const remainder = stripRecordingIntent(text);
505
+ if (hasSubstantiveContent(remainder, dynamicNames)) {
506
+ return { kind: 'start_with_remainder', remainder };
507
+ }
508
+ return { kind: 'start_only' };
509
+ }
510
+
511
+ if (hasStop) {
512
+ if (isStopRecordingOnly(normalized)) {
513
+ return { kind: 'stop_only' };
514
+ }
515
+ // Strip from the ORIGINAL text to preserve user's exact phrasing
516
+ const remainder = stripStopRecordingIntent(text);
517
+ if (hasSubstantiveContent(remainder, dynamicNames)) {
518
+ return { kind: 'stop_with_remainder', remainder };
519
+ }
520
+ return { kind: 'stop_only' };
521
+ }
522
+
523
+ return { kind: 'none' };
524
+ }
@@ -25,6 +25,7 @@ import { requestComputerControlTool } from '../tools/computer-use/request-comput
25
25
  import type { ProxyApprovalCallback, ProxyApprovalRequest } from '../tools/network/script-proxy/index.js';
26
26
  import { getAllToolDefinitions } from '../tools/registry.js';
27
27
  import { allUiSurfaceTools } from '../tools/ui-surface/definitions.js';
28
+ import type { GuardianRuntimeContext } from './session-runtime-assembly.js';
28
29
  import { projectSkillTools, type SkillProjectionCache } from './session-skill-tools.js';
29
30
  import type { SurfaceSessionContext } from './session-surfaces.js';
30
31
  import {
@@ -55,6 +56,8 @@ export interface ToolSetupContext extends SurfaceSessionContext {
55
56
  headlessLock?: boolean;
56
57
  /** When set, this session is executing a task run. Used to retrieve ephemeral permission rules. */
57
58
  taskRunId?: string;
59
+ /** Guardian runtime context for the session — actorRole is propagated into ToolContext for control-plane policy enforcement. */
60
+ guardianContext?: GuardianRuntimeContext;
58
61
  }
59
62
 
60
63
  // ── buildToolDefinitions ─────────────────────────────────────────────
@@ -105,6 +108,7 @@ export function createToolExecutor(
105
108
  assistantId: ctx.assistantId,
106
109
  requestId: ctx.currentRequestId,
107
110
  taskRunId: ctx.taskRunId,
111
+ guardianActorRole: ctx.guardianContext?.actorRole,
108
112
  onOutput,
109
113
  signal: ctx.abortController?.signal,
110
114
  sandboxOverride: ctx.sandboxOverride,
@@ -13,7 +13,7 @@ import { getLogger } from '../util/logger.js';
13
13
  import { getDb } from './db.js';
14
14
  import { conversationAssistantAttentionState, conversationAttentionEvents, conversations, messages } from './schema.js';
15
15
 
16
- const log = getLogger('conversation-attention-store');
16
+ const _log = getLogger('conversation-attention-store');
17
17
 
18
18
  // ── Types ────────────────────────────────────────────────────────────
19
19
 
@@ -138,7 +138,7 @@ export function projectAssistantMessage(params: {
138
138
  }
139
139
 
140
140
  // Monotonic: only advance if the new message is strictly later
141
- if (existing.latestAssistantMessageAt !== null && messageAt <= existing.latestAssistantMessageAt) {
141
+ if (existing.latestAssistantMessageAt != null && messageAt <= existing.latestAssistantMessageAt) {
142
142
  return;
143
143
  }
144
144
 
@@ -251,8 +251,8 @@ export function recordConversationSeenSignal(params: {
251
251
  // Only advance the seen cursor if there is a latest assistant message to mark as seen,
252
252
  // and the seen cursor hasn't already reached or passed it (monotonic invariant).
253
253
  const shouldAdvanceSeen =
254
- state.latestAssistantMessageAt !== null &&
255
- (state.lastSeenAssistantMessageAt === null ||
254
+ state.latestAssistantMessageAt != null &&
255
+ (state.lastSeenAssistantMessageAt == null ||
256
256
  state.latestAssistantMessageAt > state.lastSeenAssistantMessageAt);
257
257
 
258
258
  // Guard seen metadata monotonicity: only update lastSeen* metadata when the
@@ -260,7 +260,7 @@ export function recordConversationSeenSignal(params: {
260
260
  // Out-of-order delivery (e.g. delayed channel callbacks) must not regress
261
261
  // the projected channel/source/confidence metadata.
262
262
  const isNewerSignal =
263
- state.lastSeenEventAt === null || eventObservedAt >= state.lastSeenEventAt;
263
+ state.lastSeenEventAt == null || eventObservedAt >= state.lastSeenEventAt;
264
264
 
265
265
  const updates: Record<string, unknown> = {
266
266
  updatedAt: now,