@vellumai/assistant 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/.env.example +3 -0
  2. package/ARCHITECTURE.md +40 -3
  3. package/README.md +43 -35
  4. package/package.json +1 -1
  5. package/scripts/ipc/generate-swift.ts +1 -0
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +58 -120
  7. package/src/__tests__/actor-token-service.test.ts +1099 -0
  8. package/src/__tests__/agent-loop.test.ts +51 -0
  9. package/src/__tests__/approval-routes-http.test.ts +2 -0
  10. package/src/__tests__/assistant-events-sse-hardening.test.ts +7 -5
  11. package/src/__tests__/assistant-id-boundary-guard.test.ts +125 -0
  12. package/src/__tests__/call-controller.test.ts +49 -0
  13. package/src/__tests__/call-pointer-message-composer.test.ts +171 -0
  14. package/src/__tests__/call-pointer-messages.test.ts +93 -3
  15. package/src/__tests__/call-pointer-no-hardcoded-copy.guard.test.ts +42 -0
  16. package/src/__tests__/callback-handoff-copy.test.ts +186 -0
  17. package/src/__tests__/channel-approval-routes.test.ts +133 -12
  18. package/src/__tests__/channel-guardian.test.ts +0 -87
  19. package/src/__tests__/channel-readiness-service.test.ts +10 -16
  20. package/src/__tests__/checker.test.ts +33 -12
  21. package/src/__tests__/config-schema.test.ts +4 -0
  22. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +410 -0
  23. package/src/__tests__/conversation-routes-guardian-reply.test.ts +256 -0
  24. package/src/__tests__/conversation-routes.test.ts +12 -3
  25. package/src/__tests__/credential-security-invariants.test.ts +1 -1
  26. package/src/__tests__/daemon-server-session-init.test.ts +4 -0
  27. package/src/__tests__/guardian-actions-endpoint.test.ts +19 -14
  28. package/src/__tests__/guardian-dispatch.test.ts +8 -0
  29. package/src/__tests__/guardian-outbound-http.test.ts +4 -4
  30. package/src/__tests__/guardian-question-mode.test.ts +200 -0
  31. package/src/__tests__/guardian-routing-invariants.test.ts +178 -0
  32. package/src/__tests__/guardian-routing-state.test.ts +525 -0
  33. package/src/__tests__/handle-user-message-secret-resume.test.ts +2 -0
  34. package/src/__tests__/handlers-telegram-config.test.ts +0 -83
  35. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +55 -0
  36. package/src/__tests__/headless-browser-navigate.test.ts +2 -0
  37. package/src/__tests__/ipc-snapshot.test.ts +18 -51
  38. package/src/__tests__/non-member-access-request.test.ts +131 -8
  39. package/src/__tests__/notification-decision-fallback.test.ts +129 -4
  40. package/src/__tests__/notification-decision-strategy.test.ts +62 -2
  41. package/src/__tests__/notification-guardian-path.test.ts +3 -0
  42. package/src/__tests__/recording-intent-handler.test.ts +1 -0
  43. package/src/__tests__/relay-server.test.ts +841 -39
  44. package/src/__tests__/send-endpoint-busy.test.ts +5 -0
  45. package/src/__tests__/session-agent-loop.test.ts +1 -0
  46. package/src/__tests__/session-confirmation-signals.test.ts +523 -0
  47. package/src/__tests__/session-init.benchmark.test.ts +0 -1
  48. package/src/__tests__/session-surfaces-task-progress.test.ts +1 -1
  49. package/src/__tests__/session-tool-setup-app-refresh.test.ts +81 -2
  50. package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -1
  51. package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -1
  52. package/src/__tests__/tool-executor.test.ts +21 -2
  53. package/src/__tests__/tool-grant-request-escalation.test.ts +333 -27
  54. package/src/__tests__/trusted-contact-approval-notifier.test.ts +678 -0
  55. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1064 -0
  56. package/src/__tests__/twilio-config.test.ts +2 -13
  57. package/src/agent/loop.ts +1 -1
  58. package/src/approvals/guardian-decision-primitive.ts +10 -2
  59. package/src/approvals/guardian-request-resolvers.ts +128 -9
  60. package/src/calls/call-constants.ts +21 -0
  61. package/src/calls/call-controller.ts +9 -2
  62. package/src/calls/call-domain.ts +28 -7
  63. package/src/calls/call-pointer-message-composer.ts +154 -0
  64. package/src/calls/call-pointer-messages.ts +106 -27
  65. package/src/calls/guardian-dispatch.ts +4 -2
  66. package/src/calls/relay-server.ts +424 -12
  67. package/src/calls/twilio-config.ts +4 -11
  68. package/src/calls/twilio-routes.ts +1 -1
  69. package/src/calls/types.ts +3 -1
  70. package/src/cli.ts +5 -4
  71. package/src/config/bundled-skills/agentmail/SKILL.md +4 -0
  72. package/src/config/bundled-skills/app-builder/SKILL.md +146 -10
  73. package/src/config/bundled-skills/app-builder/TOOLS.json +1 -1
  74. package/src/config/bundled-skills/email-setup/SKILL.md +1 -1
  75. package/src/config/bundled-skills/google-oauth-setup/SKILL.md +105 -81
  76. package/src/config/bundled-skills/messaging/SKILL.md +61 -12
  77. package/src/config/bundled-skills/messaging/TOOLS.json +58 -0
  78. package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +6 -1
  79. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +35 -0
  80. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +52 -0
  81. package/src/config/bundled-skills/phone-calls/SKILL.md +30 -39
  82. package/src/config/bundled-skills/twitter/SKILL.md +3 -3
  83. package/src/config/bundled-skills/vercel-token-setup/SKILL.md +1 -0
  84. package/src/config/calls-schema.ts +24 -0
  85. package/src/config/env.ts +22 -0
  86. package/src/config/feature-flag-registry.json +8 -0
  87. package/src/config/schema.ts +2 -2
  88. package/src/config/skills.ts +11 -0
  89. package/src/config/system-prompt.ts +11 -1
  90. package/src/config/templates/SOUL.md +2 -0
  91. package/src/config/vellum-skills/sms-setup/SKILL.md +71 -82
  92. package/src/config/vellum-skills/trusted-contacts/SKILL.md +10 -9
  93. package/src/config/vellum-skills/twilio-setup/SKILL.md +88 -73
  94. package/src/daemon/call-pointer-generators.ts +59 -0
  95. package/src/daemon/computer-use-session.ts +2 -5
  96. package/src/daemon/handlers/apps.ts +76 -20
  97. package/src/daemon/handlers/config-channels.ts +5 -55
  98. package/src/daemon/handlers/config-inbox.ts +9 -3
  99. package/src/daemon/handlers/config-ingress.ts +28 -3
  100. package/src/daemon/handlers/config-telegram.ts +12 -0
  101. package/src/daemon/handlers/config.ts +2 -6
  102. package/src/daemon/handlers/pairing.ts +2 -0
  103. package/src/daemon/handlers/sessions.ts +48 -3
  104. package/src/daemon/handlers/shared.ts +17 -2
  105. package/src/daemon/ipc-contract/integrations.ts +1 -99
  106. package/src/daemon/ipc-contract/messages.ts +47 -1
  107. package/src/daemon/ipc-contract/notifications.ts +11 -0
  108. package/src/daemon/ipc-contract-inventory.json +2 -4
  109. package/src/daemon/lifecycle.ts +17 -0
  110. package/src/daemon/server.ts +14 -1
  111. package/src/daemon/session-agent-loop-handlers.ts +20 -0
  112. package/src/daemon/session-agent-loop.ts +22 -11
  113. package/src/daemon/session-lifecycle.ts +1 -1
  114. package/src/daemon/session-process.ts +11 -1
  115. package/src/daemon/session-runtime-assembly.ts +3 -0
  116. package/src/daemon/session-surfaces.ts +3 -2
  117. package/src/daemon/session.ts +88 -1
  118. package/src/daemon/tool-side-effects.ts +22 -0
  119. package/src/home-base/prebuilt/brain-graph.html +1483 -0
  120. package/src/home-base/prebuilt/index.html +40 -0
  121. package/src/inbound/platform-callback-registration.ts +157 -0
  122. package/src/memory/canonical-guardian-store.ts +1 -1
  123. package/src/memory/db-init.ts +4 -0
  124. package/src/memory/migrations/038-actor-token-records.ts +39 -0
  125. package/src/memory/migrations/index.ts +1 -0
  126. package/src/memory/schema.ts +16 -0
  127. package/src/messaging/provider-types.ts +24 -0
  128. package/src/messaging/provider.ts +7 -0
  129. package/src/messaging/providers/gmail/adapter.ts +127 -0
  130. package/src/messaging/providers/sms/adapter.ts +40 -37
  131. package/src/notifications/adapters/macos.ts +45 -2
  132. package/src/notifications/broadcaster.ts +16 -0
  133. package/src/notifications/copy-composer.ts +39 -1
  134. package/src/notifications/decision-engine.ts +22 -9
  135. package/src/notifications/destination-resolver.ts +16 -2
  136. package/src/notifications/emit-signal.ts +16 -8
  137. package/src/notifications/guardian-question-mode.ts +419 -0
  138. package/src/notifications/signal.ts +14 -3
  139. package/src/permissions/checker.ts +13 -1
  140. package/src/permissions/prompter.ts +14 -0
  141. package/src/providers/anthropic/client.ts +20 -0
  142. package/src/providers/provider-send-message.ts +15 -3
  143. package/src/runtime/access-request-helper.ts +71 -1
  144. package/src/runtime/actor-token-service.ts +234 -0
  145. package/src/runtime/actor-token-store.ts +236 -0
  146. package/src/runtime/channel-approvals.ts +5 -3
  147. package/src/runtime/channel-readiness-service.ts +23 -64
  148. package/src/runtime/channel-readiness-types.ts +3 -4
  149. package/src/runtime/channel-retry-sweep.ts +4 -1
  150. package/src/runtime/confirmation-request-guardian-bridge.ts +197 -0
  151. package/src/runtime/guardian-action-followup-executor.ts +1 -1
  152. package/src/runtime/guardian-context-resolver.ts +82 -0
  153. package/src/runtime/guardian-outbound-actions.ts +0 -3
  154. package/src/runtime/guardian-reply-router.ts +67 -30
  155. package/src/runtime/guardian-vellum-migration.ts +57 -0
  156. package/src/runtime/http-server.ts +65 -12
  157. package/src/runtime/http-types.ts +13 -0
  158. package/src/runtime/invite-redemption-service.ts +8 -0
  159. package/src/runtime/local-actor-identity.ts +76 -0
  160. package/src/runtime/middleware/actor-token.ts +271 -0
  161. package/src/runtime/routes/approval-routes.ts +82 -7
  162. package/src/runtime/routes/brain-graph-routes.ts +222 -0
  163. package/src/runtime/routes/channel-readiness-routes.ts +71 -0
  164. package/src/runtime/routes/conversation-routes.ts +140 -52
  165. package/src/runtime/routes/events-routes.ts +20 -5
  166. package/src/runtime/routes/guardian-action-routes.ts +45 -3
  167. package/src/runtime/routes/guardian-approval-interception.ts +29 -0
  168. package/src/runtime/routes/guardian-bootstrap-routes.ts +145 -0
  169. package/src/runtime/routes/inbound-message-handler.ts +143 -2
  170. package/src/runtime/routes/integration-routes.ts +7 -15
  171. package/src/runtime/routes/pairing-routes.ts +163 -0
  172. package/src/runtime/routes/twilio-routes.ts +934 -0
  173. package/src/runtime/tool-grant-request-helper.ts +3 -1
  174. package/src/security/oauth2.ts +27 -2
  175. package/src/security/token-manager.ts +46 -10
  176. package/src/tools/browser/browser-execution.ts +4 -3
  177. package/src/tools/browser/browser-handoff.ts +10 -18
  178. package/src/tools/browser/browser-manager.ts +80 -25
  179. package/src/tools/browser/browser-screencast.ts +35 -119
  180. package/src/tools/permission-checker.ts +15 -4
  181. package/src/tools/tool-approval-handler.ts +242 -18
  182. package/src/__tests__/handlers-twilio-config.test.ts +0 -1928
  183. package/src/daemon/handlers/config-twilio.ts +0 -1082
@@ -81,6 +81,7 @@ function makeCompletingSession(): Session {
81
81
  setCommandIntent: () => {},
82
82
  setTurnChannelContext: () => {},
83
83
  setTurnInterfaceContext: () => {},
84
+ setStateSignalListener: () => {},
84
85
  updateClient: () => {},
85
86
  hasAnyPendingConfirmation: () => false,
86
87
  hasPendingConfirmation: () => false,
@@ -116,6 +117,7 @@ function makeHangingSession(): Session {
116
117
  setCommandIntent: () => {},
117
118
  setTurnChannelContext: () => {},
118
119
  setTurnInterfaceContext: () => {},
120
+ setStateSignalListener: () => {},
119
121
  updateClient: () => {},
120
122
  hasAnyPendingConfirmation: () => false,
121
123
  hasPendingConfirmation: () => false,
@@ -172,10 +174,13 @@ function makePendingApprovalSession(
172
174
  setCommandIntent: () => {},
173
175
  setTurnChannelContext: () => {},
174
176
  setTurnInterfaceContext: () => {},
177
+ setStateSignalListener: () => {},
175
178
  updateClient: () => {},
176
179
  hasAnyPendingConfirmation: () => pending.size > 0,
177
180
  hasPendingConfirmation: (candidateRequestId: string) => pending.has(candidateRequestId),
178
181
  denyAllPendingConfirmations: denyAllPendingConfirmationsMock,
182
+ emitConfirmationStateChanged: () => {},
183
+ emitActivityState: () => {},
179
184
  getQueueDepth: () => queueDepth,
180
185
  enqueueMessage: enqueueMessageMock,
181
186
  runAgentLoop: runAgentLoopMock,
@@ -311,6 +311,7 @@ function makeCtx(overrides?: Partial<AgentLoopSessionContext> & { agentLoopRun?:
311
311
 
312
312
  refreshWorkspaceTopLevelContextIfNeeded: () => {},
313
313
  markWorkspaceTopLevelDirty: () => {},
314
+ emitActivityState: () => {},
314
315
  getQueueDepth: () => 0,
315
316
  hasQueuedMessages: () => false,
316
317
  canHandoffAtCheckpoint: () => false,
@@ -0,0 +1,523 @@
1
+ /**
2
+ * Behavioral tests for centralized confirmation state emissions and
3
+ * activity version ordering.
4
+ *
5
+ * Covers:
6
+ * - handleConfirmationResponse emits both confirmation_state_changed and
7
+ * assistant_activity_state events centrally
8
+ * - emitActivityState produces monotonically increasing activityVersion
9
+ * - setStateSignalListener routes signals to an external callback (HTTP/SSE)
10
+ * - "deny" decisions produce 'denied' state, "allow" produces 'approved'
11
+ */
12
+ import { mkdtempSync, rmSync } from 'node:fs';
13
+ import { tmpdir } from 'node:os';
14
+ import { join } from 'node:path';
15
+
16
+ import { afterAll, describe, expect, mock, test } from 'bun:test';
17
+
18
+ import type { AgentEvent, CheckpointDecision, CheckpointInfo } from '../agent/loop.js';
19
+ import type { ServerMessage } from '../daemon/ipc-protocol.js';
20
+ import type { Message, ProviderResponse } from '../providers/types.js';
21
+
22
+ const testDir = mkdtempSync(join(tmpdir(), 'session-confirmation-signals-test-'));
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Mocks — must precede Session import
26
+ // ---------------------------------------------------------------------------
27
+
28
+ function makeLoggerStub(): Record<string, unknown> {
29
+ const stub: Record<string, unknown> = {};
30
+ for (const m of ['info', 'warn', 'error', 'debug', 'trace', 'fatal', 'silent', 'child']) {
31
+ stub[m] = m === 'child' ? () => makeLoggerStub() : () => {};
32
+ }
33
+ return stub;
34
+ }
35
+
36
+ mock.module('../util/logger.js', () => ({
37
+ getLogger: () => makeLoggerStub(),
38
+ }));
39
+
40
+ mock.module('../util/platform.js', () => ({
41
+ getSocketPath: () => join(testDir, 'test.sock'),
42
+ getDataDir: () => testDir,
43
+ }));
44
+
45
+ mock.module('../memory/guardian-action-store.js', () => ({
46
+ getPendingDeliveryByConversation: () => null,
47
+ getGuardianActionRequest: () => null,
48
+ resolveGuardianActionRequest: () => {},
49
+ }));
50
+
51
+ mock.module('../providers/registry.js', () => ({
52
+ getProvider: () => ({ name: 'mock-provider' }),
53
+ initializeProviders: () => {},
54
+ }));
55
+
56
+ mock.module('../config/loader.js', () => ({
57
+ getConfig: () => ({
58
+ ui: {},
59
+ provider: 'mock-provider',
60
+ maxTokens: 4096,
61
+ thinking: false,
62
+ contextWindow: {
63
+ maxInputTokens: 100000,
64
+ thresholdTokens: 80000,
65
+ preserveRecentMessages: 6,
66
+ summaryModel: 'mock-model',
67
+ maxSummaryTokens: 512,
68
+ },
69
+ rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
70
+ timeouts: { permissionTimeoutSec: 1 },
71
+ apiKeys: {},
72
+ skills: { entries: {}, allowBundled: true },
73
+ memory: { retrieval: { injectionStrategy: 'inline' } },
74
+ permissions: { mode: 'legacy' },
75
+ }),
76
+ loadRawConfig: () => ({}),
77
+ saveRawConfig: () => {},
78
+ invalidateConfigCache: () => {},
79
+ }));
80
+
81
+ mock.module('../config/system-prompt.js', () => ({
82
+ buildSystemPrompt: () => 'system prompt',
83
+ }));
84
+
85
+ mock.module('../config/skills.js', () => ({
86
+ loadSkillCatalog: () => [],
87
+ loadSkillBySelector: () => ({ skill: null }),
88
+ ensureSkillIcon: async () => null,
89
+ }));
90
+
91
+ mock.module('../config/skill-state.js', () => ({
92
+ resolveSkillStates: () => [],
93
+ }));
94
+
95
+ mock.module('../skills/slash-commands.js', () => ({
96
+ buildInvocableSlashCatalog: () => new Map(),
97
+ resolveSlashSkillCommand: () => ({ kind: 'not_slash' }),
98
+ rewriteKnownSlashCommandPrompt: () => '',
99
+ parseSlashCandidate: () => ({ kind: 'not_slash' }),
100
+ }));
101
+
102
+ mock.module('../permissions/trust-store.js', () => ({
103
+ addRule: () => {},
104
+ findHighestPriorityRule: () => null,
105
+ clearCache: () => {},
106
+ }));
107
+
108
+ mock.module('../security/secret-allowlist.js', () => ({
109
+ resetAllowlist: () => {},
110
+ }));
111
+
112
+ mock.module('../memory/admin.js', () => ({
113
+ getMemoryConflictAndCleanupStats: () => ({
114
+ conflicts: { pending: 0, resolved: 0, oldestPendingAgeMs: null },
115
+ cleanup: { resolvedBacklog: 0, supersededBacklog: 0, resolvedCompleted24h: 0, supersededCompleted24h: 0 },
116
+ }),
117
+ }));
118
+
119
+ mock.module('../memory/conversation-store.js', () => ({
120
+ getConversationThreadType: () => 'default',
121
+ setConversationOriginChannelIfUnset: () => {},
122
+ updateConversationContextWindow: () => {},
123
+ deleteMessageById: () => {},
124
+ provenanceFromGuardianContext: () => ({ source: 'user', guardianContext: undefined }),
125
+ getConversationOriginInterface: () => null,
126
+ getConversationOriginChannel: () => null,
127
+ getMessages: () => [],
128
+ getConversation: () => ({
129
+ id: 'conv-1',
130
+ contextSummary: null,
131
+ contextCompactedMessageCount: 0,
132
+ totalInputTokens: 0,
133
+ totalOutputTokens: 0,
134
+ totalEstimatedCost: 0,
135
+ }),
136
+ createConversation: () => ({ id: 'conv-1' }),
137
+ listConversations: () => [],
138
+ addMessage: () => ({ id: `msg-${Date.now()}` }),
139
+ updateConversationUsage: () => {},
140
+ updateConversationTitle: () => {},
141
+ }));
142
+
143
+ mock.module('../memory/attachments-store.js', () => ({
144
+ uploadAttachment: () => ({ id: `att-${Date.now()}` }),
145
+ linkAttachmentToMessage: () => {},
146
+ }));
147
+
148
+ mock.module('../memory/retriever.js', () => ({
149
+ buildMemoryRecall: async () => ({
150
+ enabled: false,
151
+ degraded: false,
152
+ injectedText: '',
153
+ lexicalHits: 0,
154
+ semanticHits: 0,
155
+ recencyHits: 0,
156
+ injectedTokens: 0,
157
+ latencyMs: 0,
158
+ }),
159
+ injectMemoryRecallIntoUserMessage: (msg: Message) => msg,
160
+ stripMemoryRecallMessages: (msgs: Message[]) => msgs,
161
+ }));
162
+
163
+ mock.module('../context/window-manager.js', () => ({
164
+ ContextWindowManager: class {
165
+ constructor() {}
166
+ async maybeCompact() { return { compacted: false }; }
167
+ },
168
+ createContextSummaryMessage: () => ({ role: 'user', content: [{ type: 'text', text: 'summary' }] }),
169
+ getSummaryFromContextMessage: () => null,
170
+ }));
171
+
172
+ mock.module('../memory/llm-usage-store.js', () => ({
173
+ recordUsageEvent: () => ({ id: 'mock-id', createdAt: Date.now() }),
174
+ listUsageEvents: () => [],
175
+ }));
176
+
177
+ mock.module('../agent/loop.js', () => ({
178
+ AgentLoop: class {
179
+ constructor() {}
180
+ async run(
181
+ _messages: Message[],
182
+ _onEvent: (event: AgentEvent) => void,
183
+ _signal?: AbortSignal,
184
+ _requestId?: string,
185
+ _onCheckpoint?: (checkpoint: CheckpointInfo) => CheckpointDecision,
186
+ ): Promise<Message[]> {
187
+ return [];
188
+ }
189
+ },
190
+ }));
191
+
192
+ mock.module('../memory/canonical-guardian-store.js', () => ({
193
+ listPendingCanonicalGuardianRequestsByDestinationConversation: () => [],
194
+ listCanonicalGuardianRequests: () => [],
195
+ createCanonicalGuardianRequest: () => ({ id: 'mock-cg-id', code: 'MOCK', status: 'pending' }),
196
+ getCanonicalGuardianRequest: () => null,
197
+ getCanonicalGuardianRequestByCode: () => null,
198
+ updateCanonicalGuardianRequest: () => {},
199
+ resolveCanonicalGuardianRequest: () => {},
200
+ createCanonicalGuardianDelivery: () => ({ id: 'mock-cgd-id' }),
201
+ listCanonicalGuardianDeliveries: () => [],
202
+ listPendingCanonicalGuardianRequestsByDestinationChat: () => [],
203
+ updateCanonicalGuardianDelivery: () => {},
204
+ generateCanonicalRequestCode: () => 'MOCK-CODE',
205
+ }));
206
+
207
+ // ---------------------------------------------------------------------------
208
+ // Import Session AFTER mocks
209
+ // ---------------------------------------------------------------------------
210
+
211
+ import { Session } from '../daemon/session.js';
212
+
213
+ // ---------------------------------------------------------------------------
214
+ // Helpers
215
+ // ---------------------------------------------------------------------------
216
+
217
+ function makeProvider() {
218
+ return {
219
+ name: 'mock',
220
+ async sendMessage(): Promise<ProviderResponse> {
221
+ return {
222
+ content: [],
223
+ model: 'mock',
224
+ usage: { inputTokens: 0, outputTokens: 0 },
225
+ stopReason: 'end_turn',
226
+ };
227
+ },
228
+ };
229
+ }
230
+
231
+ function makeSession(sendToClient?: (msg: ServerMessage) => void): Session {
232
+ return new Session(
233
+ 'conv-signals-test',
234
+ makeProvider(),
235
+ 'system prompt',
236
+ 4096,
237
+ sendToClient ?? (() => {}),
238
+ testDir,
239
+ );
240
+ }
241
+
242
+ /**
243
+ * Seed a pending confirmation directly in the prompter's internal map.
244
+ * This avoids calling `prompt()` which has complex side effects (sends
245
+ * a confirmation_request message, needs allowlistOptions, etc.).
246
+ */
247
+ function seedPendingConfirmation(session: Session, requestId: string): void {
248
+ const prompter = session['prompter'] as unknown as {
249
+ pending: Map<string, { resolve: (...args: unknown[]) => void; reject: (...args: unknown[]) => void; timer: ReturnType<typeof setTimeout> }>;
250
+ };
251
+ prompter.pending.set(requestId, {
252
+ resolve: () => {},
253
+ reject: () => {},
254
+ timer: setTimeout(() => {}, 60_000),
255
+ });
256
+ }
257
+
258
+ afterAll(() => {
259
+ try { rmSync(testDir, { recursive: true, force: true }); } catch { /* best effort */ }
260
+ });
261
+
262
+ // ---------------------------------------------------------------------------
263
+ // Tests
264
+ // ---------------------------------------------------------------------------
265
+
266
+ describe('centralized confirmation emissions', () => {
267
+ test('handleConfirmationResponse emits confirmation_state_changed with approved state for allow decision', () => {
268
+ const emitted: ServerMessage[] = [];
269
+ const session = makeSession((msg) => emitted.push(msg));
270
+
271
+ seedPendingConfirmation(session, 'req-allow-1');
272
+ session.handleConfirmationResponse('req-allow-1', 'allow');
273
+
274
+ const confirmMsgs = emitted.filter((m) => m.type === 'confirmation_state_changed');
275
+ // Filter to our explicitly requested emission (not the pending/timed_out ones from prompter)
276
+ const confirmMsg = confirmMsgs.find(
277
+ (m) => 'requestId' in m && (m as { requestId: string }).requestId === 'req-allow-1'
278
+ && 'state' in m && (m as { state: string }).state === 'approved',
279
+ );
280
+ expect(confirmMsg).toBeDefined();
281
+ expect(confirmMsg).toMatchObject({
282
+ type: 'confirmation_state_changed',
283
+ sessionId: 'conv-signals-test',
284
+ requestId: 'req-allow-1',
285
+ state: 'approved',
286
+ source: 'button',
287
+ });
288
+ });
289
+
290
+ test('handleConfirmationResponse emits confirmation_state_changed with denied state for deny decision', () => {
291
+ const emitted: ServerMessage[] = [];
292
+ const session = makeSession((msg) => emitted.push(msg));
293
+
294
+ seedPendingConfirmation(session, 'req-deny-1');
295
+ session.handleConfirmationResponse('req-deny-1', 'deny');
296
+
297
+ const confirmMsg = emitted.find(
298
+ (m) => m.type === 'confirmation_state_changed'
299
+ && 'requestId' in m && (m as { requestId: string }).requestId === 'req-deny-1'
300
+ && 'state' in m && (m as { state: string }).state === 'denied',
301
+ );
302
+ expect(confirmMsg).toBeDefined();
303
+ expect(confirmMsg).toMatchObject({
304
+ type: 'confirmation_state_changed',
305
+ requestId: 'req-deny-1',
306
+ state: 'denied',
307
+ source: 'button',
308
+ });
309
+ });
310
+
311
+ test('handleConfirmationResponse emits assistant_activity_state with thinking phase', () => {
312
+ const emitted: ServerMessage[] = [];
313
+ const session = makeSession((msg) => emitted.push(msg));
314
+
315
+ seedPendingConfirmation(session, 'req-activity-1');
316
+ session.handleConfirmationResponse('req-activity-1', 'allow');
317
+
318
+ const activityMsg = emitted.find(
319
+ (m) => m.type === 'assistant_activity_state'
320
+ && 'reason' in m && (m as { reason: string }).reason === 'confirmation_resolved',
321
+ );
322
+ expect(activityMsg).toBeDefined();
323
+ expect(activityMsg).toMatchObject({
324
+ type: 'assistant_activity_state',
325
+ sessionId: 'conv-signals-test',
326
+ phase: 'thinking',
327
+ reason: 'confirmation_resolved',
328
+ anchor: 'assistant_turn',
329
+ });
330
+ });
331
+
332
+ test('handleConfirmationResponse passes emissionContext source', () => {
333
+ const emitted: ServerMessage[] = [];
334
+ const session = makeSession((msg) => emitted.push(msg));
335
+
336
+ seedPendingConfirmation(session, 'req-ctx-1');
337
+ session.handleConfirmationResponse('req-ctx-1', 'allow', undefined, undefined, undefined, {
338
+ source: 'inline_nl',
339
+ decisionText: 'yes please',
340
+ });
341
+
342
+ const confirmMsg = emitted.find(
343
+ (m) => m.type === 'confirmation_state_changed'
344
+ && 'requestId' in m && (m as { requestId: string }).requestId === 'req-ctx-1',
345
+ );
346
+ expect(confirmMsg).toBeDefined();
347
+ expect(confirmMsg).toMatchObject({
348
+ source: 'inline_nl',
349
+ decisionText: 'yes please',
350
+ });
351
+ });
352
+
353
+ test('always_deny produces denied state', () => {
354
+ const emitted: ServerMessage[] = [];
355
+ const session = makeSession((msg) => emitted.push(msg));
356
+
357
+ seedPendingConfirmation(session, 'req-always-deny');
358
+ session.handleConfirmationResponse('req-always-deny', 'always_deny');
359
+
360
+ const confirmMsg = emitted.find(
361
+ (m) => m.type === 'confirmation_state_changed'
362
+ && 'requestId' in m && (m as { requestId: string }).requestId === 'req-always-deny',
363
+ );
364
+ expect(confirmMsg).toBeDefined();
365
+ expect(confirmMsg).toMatchObject({
366
+ state: 'denied',
367
+ });
368
+ });
369
+
370
+ test('always_allow produces approved state', () => {
371
+ const emitted: ServerMessage[] = [];
372
+ const session = makeSession((msg) => emitted.push(msg));
373
+
374
+ seedPendingConfirmation(session, 'req-always-allow');
375
+ session.handleConfirmationResponse('req-always-allow', 'always_allow');
376
+
377
+ const confirmMsg = emitted.find(
378
+ (m) => m.type === 'confirmation_state_changed'
379
+ && 'requestId' in m && (m as { requestId: string }).requestId === 'req-always-allow',
380
+ );
381
+ expect(confirmMsg).toBeDefined();
382
+ expect(confirmMsg).toMatchObject({
383
+ state: 'approved',
384
+ });
385
+ });
386
+ });
387
+
388
+ describe('activity version ordering', () => {
389
+ test('emitActivityState produces monotonically increasing activityVersion', () => {
390
+ const emitted: ServerMessage[] = [];
391
+ const session = makeSession((msg) => emitted.push(msg));
392
+
393
+ session.emitActivityState('thinking', 'message_dequeued', 'assistant_turn');
394
+ session.emitActivityState('streaming', 'first_text_delta', 'assistant_turn');
395
+ session.emitActivityState('tool_running', 'tool_use_start', 'assistant_turn');
396
+ session.emitActivityState('idle', 'message_complete', 'global');
397
+
398
+ const activityMsgs = emitted.filter(
399
+ (m) => m.type === 'assistant_activity_state',
400
+ ) as Array<ServerMessage & { activityVersion: number }>;
401
+
402
+ expect(activityMsgs).toHaveLength(4);
403
+
404
+ // Versions must be strictly increasing
405
+ for (let i = 1; i < activityMsgs.length; i++) {
406
+ expect(activityMsgs[i].activityVersion).toBeGreaterThan(
407
+ activityMsgs[i - 1].activityVersion,
408
+ );
409
+ }
410
+
411
+ // First version must be >= 1
412
+ expect(activityMsgs[0].activityVersion).toBeGreaterThanOrEqual(1);
413
+ });
414
+
415
+ test('handleConfirmationResponse increments activityVersion for its activity emission', () => {
416
+ const emitted: ServerMessage[] = [];
417
+ const session = makeSession((msg) => emitted.push(msg));
418
+
419
+ // Emit a baseline activity state
420
+ session.emitActivityState('thinking', 'message_dequeued', 'assistant_turn');
421
+
422
+ const baselineMsg = emitted.find((m) => m.type === 'assistant_activity_state') as
423
+ ServerMessage & { activityVersion: number };
424
+ const baselineVersion = baselineMsg.activityVersion;
425
+
426
+ // Now handle a confirmation
427
+ seedPendingConfirmation(session, 'req-version-1');
428
+ session.handleConfirmationResponse('req-version-1', 'allow');
429
+
430
+ const activityMsgs = emitted.filter(
431
+ (m) => m.type === 'assistant_activity_state',
432
+ ) as Array<ServerMessage & { activityVersion: number; reason: string }>;
433
+
434
+ // The confirmation_resolved activity message should have a higher version
435
+ const resolvedMsg = activityMsgs.find(
436
+ (m) => m.reason === 'confirmation_resolved',
437
+ );
438
+ expect(resolvedMsg).toBeDefined();
439
+ expect(resolvedMsg!.activityVersion).toBeGreaterThan(baselineVersion);
440
+ });
441
+ });
442
+
443
+ describe('state signal listener', () => {
444
+ test('setStateSignalListener routes emitActivityState to external callback', () => {
445
+ const clientMsgs: ServerMessage[] = [];
446
+ const signalMsgs: ServerMessage[] = [];
447
+
448
+ const session = makeSession((msg) => clientMsgs.push(msg));
449
+ session.setStateSignalListener((msg) => signalMsgs.push(msg));
450
+
451
+ session.emitActivityState('thinking', 'message_dequeued', 'assistant_turn');
452
+
453
+ // Both sendToClient and signal listener should receive the message
454
+ expect(clientMsgs.filter((m) => m.type === 'assistant_activity_state')).toHaveLength(1);
455
+ expect(signalMsgs.filter((m) => m.type === 'assistant_activity_state')).toHaveLength(1);
456
+
457
+ // Messages should be identical
458
+ const clientMsg = clientMsgs.find((m) => m.type === 'assistant_activity_state');
459
+ const signalMsg = signalMsgs.find((m) => m.type === 'assistant_activity_state');
460
+ expect(clientMsg).toEqual(signalMsg);
461
+ });
462
+
463
+ test('setStateSignalListener routes emitConfirmationStateChanged to external callback', () => {
464
+ const clientMsgs: ServerMessage[] = [];
465
+ const signalMsgs: ServerMessage[] = [];
466
+
467
+ const session = makeSession((msg) => clientMsgs.push(msg));
468
+ session.setStateSignalListener((msg) => signalMsgs.push(msg));
469
+
470
+ session.emitConfirmationStateChanged({
471
+ sessionId: 'conv-signals-test',
472
+ requestId: 'req-signal-1',
473
+ state: 'approved',
474
+ source: 'button',
475
+ });
476
+
477
+ expect(clientMsgs.filter((m) => m.type === 'confirmation_state_changed')).toHaveLength(1);
478
+ expect(signalMsgs.filter((m) => m.type === 'confirmation_state_changed')).toHaveLength(1);
479
+ });
480
+
481
+ test('without state signal listener, only sendToClient receives messages', () => {
482
+ const clientMsgs: ServerMessage[] = [];
483
+
484
+ const session = makeSession((msg) => clientMsgs.push(msg));
485
+ // No setStateSignalListener call
486
+
487
+ session.emitActivityState('idle', 'message_complete', 'global');
488
+
489
+ expect(clientMsgs.filter((m) => m.type === 'assistant_activity_state')).toHaveLength(1);
490
+ });
491
+
492
+ test('state signal listener receives handleConfirmationResponse emissions', () => {
493
+ const signalMsgs: ServerMessage[] = [];
494
+
495
+ // Use no-op sendToClient (simulates HTTP session with no socket)
496
+ const session = makeSession(() => {});
497
+ session.setStateSignalListener((msg) => signalMsgs.push(msg));
498
+
499
+ seedPendingConfirmation(session, 'req-signal-confirm');
500
+ session.handleConfirmationResponse('req-signal-confirm', 'allow');
501
+
502
+ const confirmSignal = signalMsgs.find(
503
+ (m) => m.type === 'confirmation_state_changed'
504
+ && 'requestId' in m && (m as { requestId: string }).requestId === 'req-signal-confirm',
505
+ );
506
+ const activitySignal = signalMsgs.find(
507
+ (m) => m.type === 'assistant_activity_state'
508
+ && 'reason' in m && (m as { reason: string }).reason === 'confirmation_resolved',
509
+ );
510
+
511
+ expect(confirmSignal).toBeDefined();
512
+ expect(confirmSignal).toMatchObject({
513
+ state: 'approved',
514
+ requestId: 'req-signal-confirm',
515
+ });
516
+
517
+ expect(activitySignal).toBeDefined();
518
+ expect(activitySignal).toMatchObject({
519
+ phase: 'thinking',
520
+ reason: 'confirmation_resolved',
521
+ });
522
+ });
523
+ });
@@ -263,7 +263,6 @@ mock.module('../tools/browser/browser-screencast.js', () => ({
263
263
  stopAllScreencasts: () => Promise.resolve(),
264
264
  isScreencastActive: () => false,
265
265
  getSender: () => undefined,
266
- getScreencastSurfaceId: () => null,
267
266
  }));
268
267
 
269
268
  mock.module('../services/published-app-updater.js', () => ({
@@ -26,7 +26,7 @@ function makeContext(
26
26
  sendToClient: (msg) => sent.push(msg),
27
27
  pendingSurfaceActions: new Map<string, { surfaceType: SurfaceType }>(),
28
28
  lastSurfaceAction: new Map<string, { actionId: string; data?: Record<string, unknown> }>(),
29
- surfaceState: new Map<string, { surfaceType: SurfaceType; data: SurfaceData }>(),
29
+ surfaceState: new Map<string, { surfaceType: SurfaceType; data: SurfaceData; title?: string }>(),
30
30
  surfaceUndoStacks: new Map<string, string[]>(),
31
31
  currentTurnSurfaces: [],
32
32
  isProcessing: () => false,
@@ -62,7 +62,7 @@ function makeCtx(overrides: Partial<ToolSetupContext> = {}): ToolSetupContext {
62
62
  sendToClient: mock(() => {}),
63
63
  pendingSurfaceActions: new Map(),
64
64
  lastSurfaceAction: new Map(),
65
- surfaceState: new Map<string, { surfaceType: SurfaceType; data: SurfaceData }>(),
65
+ surfaceState: new Map<string, { surfaceType: SurfaceType; data: SurfaceData; title?: string }>(),
66
66
  surfaceUndoStacks: new Map(),
67
67
  currentTurnSurfaces: [],
68
68
  isProcessing: () => false,
@@ -392,6 +392,85 @@ describe('session-tool-setup app refresh side effects', () => {
392
392
  });
393
393
  });
394
394
 
395
+ // ── app_create side effects ─────────────────────────────────────────
396
+
397
+ describe('app_create side effects', () => {
398
+ test('broadcasts app_files_changed after app_create', async () => {
399
+ const ctx = makeCtx();
400
+ const executor = makeFakeExecutor({
401
+ content: JSON.stringify({ id: 'new-app-1', name: 'My App' }),
402
+ isError: false,
403
+ });
404
+ const broadcastSpy = mock(() => {});
405
+
406
+ const toolFn = createToolExecutor(
407
+ executor as unknown as ToolExecutor, noopPrompter, noopSecretPrompter,
408
+ ctx, noopLifecycleHandler, broadcastSpy,
409
+ );
410
+
411
+ await toolFn('app_create', { name: 'My App', html: '<h1>hi</h1>' });
412
+
413
+ expect(broadcastSpy).toHaveBeenCalledTimes(1);
414
+ expect((broadcastSpy.mock.calls as unknown[][])[0][0]).toEqual({
415
+ type: 'app_files_changed',
416
+ appId: 'new-app-1',
417
+ });
418
+ });
419
+
420
+ test('skips side effects when app_create result is an error', async () => {
421
+ const ctx = makeCtx();
422
+ const executor = makeFakeExecutor({ content: 'Error', isError: true });
423
+ const broadcastSpy = mock(() => {});
424
+
425
+ const toolFn = createToolExecutor(
426
+ executor as unknown as ToolExecutor, noopPrompter, noopSecretPrompter,
427
+ ctx, noopLifecycleHandler, broadcastSpy,
428
+ );
429
+
430
+ await toolFn('app_create', { name: 'Bad', html: '' });
431
+
432
+ expect(broadcastSpy).not.toHaveBeenCalled();
433
+ });
434
+ });
435
+
436
+ // ── app_delete side effects ────────────────────────────────────────
437
+
438
+ describe('app_delete side effects', () => {
439
+ test('broadcasts app_files_changed after app_delete', async () => {
440
+ const ctx = makeCtx();
441
+ const executor = makeFakeExecutor({ content: '{}', isError: false });
442
+ const broadcastSpy = mock(() => {});
443
+
444
+ const toolFn = createToolExecutor(
445
+ executor as unknown as ToolExecutor, noopPrompter, noopSecretPrompter,
446
+ ctx, noopLifecycleHandler, broadcastSpy,
447
+ );
448
+
449
+ await toolFn('app_delete', { app_id: 'del-app-1' });
450
+
451
+ expect(broadcastSpy).toHaveBeenCalledTimes(1);
452
+ expect((broadcastSpy.mock.calls as unknown[][])[0][0]).toEqual({
453
+ type: 'app_files_changed',
454
+ appId: 'del-app-1',
455
+ });
456
+ });
457
+
458
+ test('skips side effects when app_delete result is an error', async () => {
459
+ const ctx = makeCtx();
460
+ const executor = makeFakeExecutor({ content: 'Error', isError: true });
461
+ const broadcastSpy = mock(() => {});
462
+
463
+ const toolFn = createToolExecutor(
464
+ executor as unknown as ToolExecutor, noopPrompter, noopSecretPrompter,
465
+ ctx, noopLifecycleHandler, broadcastSpy,
466
+ );
467
+
468
+ await toolFn('app_delete', { app_id: 'del-err' });
469
+
470
+ expect(broadcastSpy).not.toHaveBeenCalled();
471
+ });
472
+ });
473
+
395
474
  // ── Name-based hook targeting (skill-origin tools) ──────────────────
396
475
 
397
476
  describe('name-based hooks fire for skill-origin tools', () => {
@@ -437,7 +516,7 @@ describe('session-tool-setup app refresh side effects', () => {
437
516
  ctx, noopLifecycleHandler, broadcastSpy,
438
517
  );
439
518
 
440
- for (const toolName of ['read_file', 'write_file', 'shell', 'app_create', 'app_list', 'app_delete']) {
519
+ for (const toolName of ['read_file', 'write_file', 'shell', 'app_list']) {
441
520
  refreshSpy.mockClear();
442
521
  broadcastSpy.mockClear();
443
522
  updatePublishedSpy.mockClear();
@@ -49,7 +49,7 @@ function makeCtx(overrides: Partial<ToolSetupContext> = {}): ToolSetupContext {
49
49
  sendToClient: mock(() => {}),
50
50
  pendingSurfaceActions: new Map(),
51
51
  lastSurfaceAction: new Map(),
52
- surfaceState: new Map<string, { surfaceType: SurfaceType; data: SurfaceData }>(),
52
+ surfaceState: new Map<string, { surfaceType: SurfaceType; data: SurfaceData; title?: string }>(),
53
53
  surfaceUndoStacks: new Map(),
54
54
  currentTurnSurfaces: [],
55
55
  isProcessing: () => false,