@vellumai/assistant 0.4.11 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/ARCHITECTURE.md +401 -385
  2. package/package.json +1 -1
  3. package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +75 -61
  4. package/src/__tests__/registry.test.ts +235 -187
  5. package/src/__tests__/secure-keys.test.ts +27 -0
  6. package/src/__tests__/session-agent-loop.test.ts +521 -256
  7. package/src/__tests__/session-surfaces-task-progress.test.ts +1 -0
  8. package/src/__tests__/session-tool-setup-app-refresh.test.ts +1 -0
  9. package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -0
  10. package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -0
  11. package/src/__tests__/skills.test.ts +334 -276
  12. package/src/__tests__/slack-skill.test.ts +124 -0
  13. package/src/__tests__/starter-task-flow.test.ts +7 -17
  14. package/src/agent/loop.ts +10 -3
  15. package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +449 -0
  16. package/src/config/bundled-skills/doordash/SKILL.md +171 -0
  17. package/src/config/bundled-skills/doordash/__tests__/doordash-client.test.ts +203 -0
  18. package/src/config/bundled-skills/doordash/__tests__/doordash-session.test.ts +164 -0
  19. package/src/config/bundled-skills/doordash/doordash-cli.ts +1193 -0
  20. package/src/config/bundled-skills/doordash/doordash-entry.ts +22 -0
  21. package/src/config/bundled-skills/doordash/lib/cart-queries.ts +787 -0
  22. package/src/config/bundled-skills/doordash/lib/client.ts +1071 -0
  23. package/src/config/bundled-skills/doordash/lib/order-queries.ts +85 -0
  24. package/src/config/bundled-skills/doordash/lib/queries.ts +28 -0
  25. package/src/config/bundled-skills/doordash/lib/query-extractor.ts +94 -0
  26. package/src/config/bundled-skills/doordash/lib/search-queries.ts +203 -0
  27. package/src/config/bundled-skills/doordash/lib/session.ts +93 -0
  28. package/src/config/bundled-skills/doordash/lib/shared/errors.ts +61 -0
  29. package/src/config/bundled-skills/doordash/lib/shared/ipc.ts +32 -0
  30. package/src/config/bundled-skills/doordash/lib/shared/network-recorder.ts +380 -0
  31. package/src/config/bundled-skills/doordash/lib/shared/platform.ts +35 -0
  32. package/src/config/bundled-skills/doordash/lib/shared/recording-store.ts +43 -0
  33. package/src/config/bundled-skills/doordash/lib/shared/recording-types.ts +49 -0
  34. package/src/config/bundled-skills/doordash/lib/shared/truncate.ts +6 -0
  35. package/src/config/bundled-skills/doordash/lib/store-queries.ts +246 -0
  36. package/src/config/bundled-skills/doordash/lib/types.ts +367 -0
  37. package/src/config/bundled-skills/google-calendar/SKILL.md +4 -5
  38. package/src/config/bundled-skills/google-oauth-setup/SKILL.md +41 -41
  39. package/src/config/bundled-skills/messaging/SKILL.md +59 -42
  40. package/src/config/bundled-skills/messaging/TOOLS.json +14 -92
  41. package/src/config/bundled-skills/messaging/tools/gmail-archive-by-query.ts +5 -1
  42. package/src/config/bundled-skills/messaging/tools/gmail-batch-archive.ts +11 -2
  43. package/src/config/bundled-skills/messaging/tools/gmail-outreach-scan.ts +8 -1
  44. package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +12 -4
  45. package/src/config/bundled-skills/messaging/tools/gmail-unsubscribe.ts +5 -1
  46. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +5 -1
  47. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +5 -2
  48. package/src/config/bundled-skills/notion/SKILL.md +240 -0
  49. package/src/config/bundled-skills/notion-oauth-setup/SKILL.md +127 -0
  50. package/src/config/bundled-skills/oauth-setup/SKILL.md +144 -0
  51. package/src/config/bundled-skills/phone-calls/SKILL.md +76 -45
  52. package/src/config/bundled-skills/skills-catalog/SKILL.md +32 -29
  53. package/src/config/bundled-skills/slack/SKILL.md +49 -0
  54. package/src/config/bundled-skills/slack/TOOLS.json +167 -0
  55. package/src/config/bundled-skills/slack/tools/shared.ts +23 -0
  56. package/src/config/bundled-skills/{messaging → slack}/tools/slack-add-reaction.ts +2 -5
  57. package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +33 -0
  58. package/src/config/bundled-skills/slack/tools/slack-configure-channels.ts +75 -0
  59. package/src/config/bundled-skills/{messaging → slack}/tools/slack-delete-message.ts +2 -5
  60. package/src/config/bundled-skills/{messaging → slack}/tools/slack-leave-channel.ts +2 -5
  61. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +193 -0
  62. package/src/config/{vellum-skills → bundled-skills}/sms-setup/SKILL.md +29 -22
  63. package/src/config/{vellum-skills → bundled-skills}/telegram-setup/SKILL.md +17 -14
  64. package/src/config/{vellum-skills → bundled-skills}/twilio-setup/SKILL.md +20 -5
  65. package/src/config/bundled-tool-registry.ts +292 -267
  66. package/src/config/schema.ts +1 -1
  67. package/src/daemon/handlers/skills.ts +334 -234
  68. package/src/daemon/ipc-contract/messages.ts +2 -0
  69. package/src/daemon/ipc-contract/surfaces.ts +2 -0
  70. package/src/daemon/lifecycle.ts +358 -221
  71. package/src/daemon/response-tier.ts +2 -0
  72. package/src/daemon/server.ts +453 -193
  73. package/src/daemon/session-agent-loop-handlers.ts +43 -2
  74. package/src/daemon/session-agent-loop.ts +3 -0
  75. package/src/daemon/session-lifecycle.ts +3 -0
  76. package/src/daemon/session-process.ts +1 -0
  77. package/src/daemon/session-surfaces.ts +22 -20
  78. package/src/daemon/session-tool-setup.ts +1 -0
  79. package/src/daemon/session.ts +5 -2
  80. package/src/messaging/outreach-classifier.ts +12 -5
  81. package/src/messaging/provider-types.ts +5 -0
  82. package/src/messaging/provider.ts +1 -1
  83. package/src/messaging/providers/gmail/adapter.ts +11 -5
  84. package/src/messaging/providers/gmail/client.ts +2 -0
  85. package/src/messaging/providers/slack/adapter.ts +1 -0
  86. package/src/messaging/providers/slack/client.ts +8 -0
  87. package/src/messaging/providers/slack/types.ts +5 -0
  88. package/src/runtime/http-errors.ts +33 -20
  89. package/src/runtime/http-server.ts +706 -291
  90. package/src/runtime/http-types.ts +26 -16
  91. package/src/runtime/routes/secret-routes.ts +57 -2
  92. package/src/runtime/routes/surface-action-routes.ts +66 -0
  93. package/src/runtime/routes/trust-rules-routes.ts +140 -0
  94. package/src/security/keychain-to-encrypted-migration.ts +59 -0
  95. package/src/security/secure-keys.ts +17 -0
  96. package/src/skills/frontmatter.ts +9 -7
  97. package/src/tools/apps/executors.ts +2 -1
  98. package/src/tools/tool-manifest.ts +44 -42
  99. package/src/tools/types.ts +9 -0
  100. package/src/__tests__/skill-mirror-parity.test.ts +0 -176
  101. package/src/config/vellum-skills/catalog.json +0 -63
  102. package/src/config/vellum-skills/chatgpt-import/tools/chatgpt-import.ts +0 -295
  103. package/src/skills/vellum-catalog-remote.ts +0 -166
  104. package/src/tools/skills/vellum-catalog.ts +0 -168
  105. /package/src/config/{vellum-skills → bundled-skills}/chatgpt-import/SKILL.md +0 -0
  106. /package/src/config/{vellum-skills → bundled-skills}/chatgpt-import/TOOLS.json +0 -0
  107. /package/src/config/{vellum-skills → bundled-skills}/deploy-fullstack-vercel/SKILL.md +0 -0
  108. /package/src/config/{vellum-skills → bundled-skills}/document-writer/SKILL.md +0 -0
  109. /package/src/config/{vellum-skills → bundled-skills}/guardian-verify-setup/SKILL.md +0 -0
  110. /package/src/config/{vellum-skills → bundled-skills}/slack-oauth-setup/SKILL.md +0 -0
  111. /package/src/config/{vellum-skills → bundled-skills}/trusted-contacts/SKILL.md +0 -0
@@ -1,30 +1,35 @@
1
- import { beforeEach,describe, expect, mock, test } from 'bun:test';
1
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
- import type { AgentEvent, CheckpointDecision, CheckpointInfo } from '../agent/loop.js';
4
- import type { ServerMessage } from '../daemon/ipc-protocol.js';
5
- import type { ContentBlock,Message } from '../providers/types.js';
3
+ import type {
4
+ AgentEvent,
5
+ CheckpointDecision,
6
+ CheckpointInfo,
7
+ } from "../agent/loop.js";
8
+ import type { ServerMessage } from "../daemon/ipc-protocol.js";
9
+ import type { ContentBlock, Message } from "../providers/types.js";
6
10
 
7
11
  // ── Module mocks (must precede imports of the module under test) ─────
8
12
 
9
- mock.module('../util/logger.js', () => ({
10
- getLogger: () => new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
13
+ mock.module("../util/logger.js", () => ({
14
+ getLogger: () =>
15
+ new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
11
16
  }));
12
17
 
13
- mock.module('../util/platform.js', () => ({
14
- getSocketPath: () => '/tmp/test.sock',
15
- getDataDir: () => '/tmp',
18
+ mock.module("../util/platform.js", () => ({
19
+ getSocketPath: () => "/tmp/test.sock",
20
+ getDataDir: () => "/tmp",
16
21
  }));
17
22
 
18
- mock.module('../config/loader.js', () => ({
23
+ mock.module("../config/loader.js", () => ({
19
24
  getConfig: () => ({
20
- provider: 'mock-provider',
25
+ provider: "mock-provider",
21
26
  maxTokens: 4096,
22
27
  thinking: false,
23
28
  contextWindow: {
24
29
  maxInputTokens: 100000,
25
30
  thresholdTokens: 80000,
26
31
  preserveRecentMessages: 6,
27
- summaryModel: 'mock-model',
32
+ summaryModel: "mock-model",
28
33
  maxSummaryTokens: 512,
29
34
  },
30
35
  rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
@@ -38,12 +43,12 @@ mock.module('../config/loader.js', () => ({
38
43
  }));
39
44
 
40
45
  let hookBlocked = false;
41
- let hookBlockedBy = '';
46
+ let hookBlockedBy = "";
42
47
 
43
- mock.module('../hooks/manager.js', () => ({
48
+ mock.module("../hooks/manager.js", () => ({
44
49
  getHookManager: () => ({
45
50
  trigger: async (hookName: string) => {
46
- if (hookName === 'pre-message' && hookBlocked) {
51
+ if (hookName === "pre-message" && hookBlocked) {
47
52
  return { blocked: true, blockedBy: hookBlockedBy };
48
53
  }
49
54
  return { blocked: false };
@@ -51,13 +56,13 @@ mock.module('../hooks/manager.js', () => ({
51
56
  }),
52
57
  }));
53
58
 
54
- mock.module('../memory/conversation-store.js', () => ({
55
- getConversationThreadType: () => 'default',
59
+ mock.module("../memory/conversation-store.js", () => ({
60
+ getConversationThreadType: () => "default",
56
61
  setConversationOriginChannelIfUnset: () => {},
57
62
  updateConversationUsage: () => {},
58
63
  getMessages: () => [],
59
64
  getConversation: () => ({
60
- id: 'conv-1',
65
+ id: "conv-1",
61
66
  contextSummary: null,
62
67
  contextCompactedMessageCount: 0,
63
68
  totalInputTokens: 0,
@@ -65,20 +70,23 @@ mock.module('../memory/conversation-store.js', () => ({
65
70
  totalEstimatedCost: 0,
66
71
  title: null,
67
72
  }),
68
- provenanceFromGuardianContext: () => ({ source: 'user', guardianContext: undefined }),
73
+ provenanceFromGuardianContext: () => ({
74
+ source: "user",
75
+ guardianContext: undefined,
76
+ }),
69
77
  getConversationOriginInterface: () => null,
70
- addMessage: () => ({ id: 'mock-msg-id' }),
78
+ addMessage: () => ({ id: "mock-msg-id" }),
71
79
  deleteMessageById: () => {},
72
80
  updateConversationContextWindow: () => {},
73
81
  updateConversationTitle: () => {},
74
82
  getConversationOriginChannel: () => null,
75
83
  }));
76
84
 
77
- mock.module('../memory/retriever.js', () => ({
85
+ mock.module("../memory/retriever.js", () => ({
78
86
  buildMemoryRecall: async () => ({
79
87
  enabled: false,
80
88
  degraded: false,
81
- injectedText: '',
89
+ injectedText: "",
82
90
  lexicalHits: 0,
83
91
  semanticHits: 0,
84
92
  recencyHits: 0,
@@ -89,65 +97,87 @@ mock.module('../memory/retriever.js', () => ({
89
97
  stripMemoryRecallMessages: (msgs: Message[]) => msgs,
90
98
  }));
91
99
 
92
- mock.module('../memory/app-store.js', () => ({
100
+ mock.module("../memory/app-store.js", () => ({
93
101
  getApp: () => null,
94
102
  listAppFiles: () => [],
95
- getAppsDir: () => '/tmp/apps',
103
+ getAppsDir: () => "/tmp/apps",
96
104
  }));
97
105
 
98
- mock.module('../memory/app-git-service.js', () => ({
106
+ mock.module("../memory/app-git-service.js", () => ({
99
107
  commitAppTurnChanges: () => Promise.resolve(),
100
108
  }));
101
109
 
102
- mock.module('../daemon/session-memory.js', () => ({
103
- prepareMemoryContext: async (_ctx: unknown, _content: string, _id: string, _signal: AbortSignal) => ({
110
+ mock.module("../daemon/session-memory.js", () => ({
111
+ prepareMemoryContext: async (
112
+ _ctx: unknown,
113
+ _content: string,
114
+ _id: string,
115
+ _signal: AbortSignal,
116
+ ) => ({
104
117
  runMessages: [],
105
- recall: { enabled: false, degraded: false, injectedText: '', lexicalHits: 0, semanticHits: 0, recencyHits: 0, injectedTokens: 0, latencyMs: 0 },
106
- dynamicProfile: { text: '' },
118
+ recall: {
119
+ enabled: false,
120
+ degraded: false,
121
+ injectedText: "",
122
+ lexicalHits: 0,
123
+ semanticHits: 0,
124
+ recencyHits: 0,
125
+ injectedTokens: 0,
126
+ latencyMs: 0,
127
+ },
128
+ dynamicProfile: { text: "" },
107
129
  softConflictInstruction: null,
108
- recallInjectionStrategy: 'prepend_user_block' as const,
130
+ recallInjectionStrategy: "prepend_user_block" as const,
109
131
  conflictClarification: null,
110
132
  }),
111
133
  }));
112
134
 
113
- mock.module('../daemon/session-runtime-assembly.js', () => ({
135
+ mock.module("../daemon/session-runtime-assembly.js", () => ({
114
136
  applyRuntimeInjections: (msgs: Message[]) => msgs,
115
137
  stripInjectedContext: (msgs: Message[]) => msgs,
116
138
  }));
117
139
 
118
- mock.module('../daemon/session-dynamic-profile.js', () => ({
140
+ mock.module("../daemon/session-dynamic-profile.js", () => ({
119
141
  stripDynamicProfileMessages: (msgs: Message[]) => msgs,
120
142
  injectDynamicProfileIntoUserMessage: (msg: Message) => msg,
121
143
  }));
122
144
 
123
- mock.module('../daemon/date-context.js', () => ({
145
+ mock.module("../daemon/date-context.js", () => ({
124
146
  buildTemporalContext: () => null,
125
147
  }));
126
148
 
127
- mock.module('../daemon/history-repair.js', () => ({
128
- repairHistory: (msgs: Message[]) => ({ messages: msgs, stats: { assistantToolResultsMigrated: 0, missingToolResultsInserted: 0, orphanToolResultsDowngraded: 0, consecutiveSameRoleMerged: 0 } }),
149
+ mock.module("../daemon/history-repair.js", () => ({
150
+ repairHistory: (msgs: Message[]) => ({
151
+ messages: msgs,
152
+ stats: {
153
+ assistantToolResultsMigrated: 0,
154
+ missingToolResultsInserted: 0,
155
+ orphanToolResultsDowngraded: 0,
156
+ consecutiveSameRoleMerged: 0,
157
+ },
158
+ }),
129
159
  deepRepairHistory: (msgs: Message[]) => ({ messages: msgs, stats: {} }),
130
160
  }));
131
161
 
132
- mock.module('../daemon/session-history.js', () => ({
162
+ mock.module("../daemon/session-history.js", () => ({
133
163
  consolidateAssistantMessages: () => {},
134
164
  }));
135
165
 
136
- mock.module('../daemon/session-usage.js', () => ({
166
+ mock.module("../daemon/session-usage.js", () => ({
137
167
  recordUsage: () => {},
138
168
  }));
139
169
 
140
- mock.module('../daemon/session-attachments.js', () => ({
170
+ mock.module("../daemon/session-attachments.js", () => ({
141
171
  resolveAssistantAttachments: async () => ({
142
172
  assistantAttachments: [],
143
173
  emittedAttachments: [],
144
174
  directiveWarnings: [],
145
175
  }),
146
176
  approveHostAttachmentRead: async () => true,
147
- formatAttachmentWarnings: () => '',
177
+ formatAttachmentWarnings: () => "",
148
178
  }));
149
179
 
150
- mock.module('../daemon/assistant-attachments.js', () => ({
180
+ mock.module("../daemon/assistant-attachments.js", () => ({
151
181
  cleanAssistantContent: (content: unknown[]) => ({
152
182
  cleanedContent: content,
153
183
  directives: [],
@@ -155,72 +185,79 @@ mock.module('../daemon/assistant-attachments.js', () => ({
155
185
  }),
156
186
  drainDirectiveDisplayBuffer: (buffer: string) => ({
157
187
  emitText: buffer,
158
- bufferedRemainder: '',
188
+ bufferedRemainder: "",
159
189
  }),
160
190
  }));
161
191
 
162
- mock.module('../daemon/session-media-retry.js', () => ({
192
+ mock.module("../daemon/session-media-retry.js", () => ({
163
193
  stripMediaPayloadsForRetry: (msgs: Message[]) => ({
164
194
  messages: msgs,
165
195
  modified: false,
166
196
  replacedBlocks: 0,
167
197
  latestUserIndex: null,
168
198
  }),
169
- raceWithTimeout: async () => 'completed' as const,
199
+ raceWithTimeout: async () => "completed" as const,
170
200
  }));
171
201
 
172
- mock.module('../workspace/turn-commit.js', () => ({
202
+ mock.module("../workspace/turn-commit.js", () => ({
173
203
  commitTurnChanges: async () => {},
174
204
  }));
175
205
 
176
- mock.module('../workspace/git-service.js', () => ({
206
+ mock.module("../workspace/git-service.js", () => ({
177
207
  getWorkspaceGitService: () => ({
178
208
  ensureInitialized: async () => {},
179
209
  }),
180
210
  }));
181
211
 
182
- mock.module('../daemon/session-error.js', () => ({
212
+ mock.module("../daemon/session-error.js", () => ({
183
213
  classifySessionError: (_err: unknown, _ctx: unknown) => ({
184
- code: 'SESSION_PROCESSING_FAILED',
185
- userMessage: 'Something went wrong processing your message.',
214
+ code: "SESSION_PROCESSING_FAILED",
215
+ userMessage: "Something went wrong processing your message.",
186
216
  retryable: false,
187
217
  }),
188
218
  isUserCancellation: (err: unknown, ctx: { aborted?: boolean }) => {
189
219
  if (!ctx.aborted) return false;
190
- if (err instanceof DOMException && err.name === 'AbortError') return true;
191
- if (err instanceof Error && err.name === 'AbortError') return true;
220
+ if (err instanceof DOMException && err.name === "AbortError") return true;
221
+ if (err instanceof Error && err.name === "AbortError") return true;
192
222
  return false;
193
223
  },
194
- buildSessionErrorMessage: (sessionId: string, classified: Record<string, unknown>) => ({
195
- type: 'session_error',
224
+ buildSessionErrorMessage: (
225
+ sessionId: string,
226
+ classified: Record<string, unknown>,
227
+ ) => ({
228
+ type: "session_error",
196
229
  sessionId,
197
230
  ...classified,
198
231
  }),
199
232
  isContextTooLarge: (msg: string) => /context.?length.?exceeded/i.test(msg),
200
233
  }));
201
234
 
202
- mock.module('../daemon/session-slash.js', () => ({
203
- isProviderOrderingError: (msg: string) => /ordering|before.*after|messages.*order/i.test(msg),
235
+ mock.module("../daemon/session-slash.js", () => ({
236
+ isProviderOrderingError: (msg: string) =>
237
+ /ordering|before.*after|messages.*order/i.test(msg),
204
238
  }));
205
239
 
206
- mock.module('../util/truncate.js', () => ({
240
+ mock.module("../util/truncate.js", () => ({
207
241
  truncate: (s: string) => s,
208
242
  }));
209
243
 
210
- mock.module('../agent/message-types.js', () => ({
244
+ mock.module("../agent/message-types.js", () => ({
211
245
  createAssistantMessage: (text: string) => ({
212
- role: 'assistant' as const,
213
- content: [{ type: 'text', text }],
246
+ role: "assistant" as const,
247
+ content: [{ type: "text", text }],
214
248
  }),
215
249
  }));
216
250
 
217
- mock.module('../memory/llm-request-log-store.js', () => ({
251
+ mock.module("../memory/llm-request-log-store.js", () => ({
218
252
  recordRequestLog: () => {},
219
253
  }));
220
254
 
221
255
  // ── Imports (after mocks) ────────────────────────────────────────────
222
256
 
223
- import { type AgentLoopSessionContext,runAgentLoopImpl } from '../daemon/session-agent-loop.js';
257
+ import {
258
+ type AgentLoopSessionContext,
259
+ runAgentLoopImpl,
260
+ } from "../daemon/session-agent-loop.js";
224
261
 
225
262
  // ── Test helpers ─────────────────────────────────────────────────────
226
263
 
@@ -232,51 +269,63 @@ type AgentLoopRun = (
232
269
  onCheckpoint?: (checkpoint: CheckpointInfo) => CheckpointDecision,
233
270
  ) => Promise<Message[]>;
234
271
 
235
- function makeCtx(overrides?: Partial<AgentLoopSessionContext> & { agentLoopRun?: AgentLoopRun }): AgentLoopSessionContext {
236
- const agentLoopRun = overrides?.agentLoopRun ?? (async (messages: Message[]) => [
237
- ...messages,
238
- { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'response' }] },
239
- ]);
272
+ function makeCtx(
273
+ overrides?: Partial<AgentLoopSessionContext> & {
274
+ agentLoopRun?: AgentLoopRun;
275
+ },
276
+ ): AgentLoopSessionContext {
277
+ const agentLoopRun =
278
+ overrides?.agentLoopRun ??
279
+ (async (messages: Message[]) => [
280
+ ...messages,
281
+ {
282
+ role: "assistant" as const,
283
+ content: [{ type: "text" as const, text: "response" }],
284
+ },
285
+ ]);
240
286
 
241
287
  return {
242
- conversationId: 'test-conv',
288
+ conversationId: "test-conv",
243
289
  messages: [
244
- { role: 'user', content: [{ type: 'text', text: 'Hello' }] },
290
+ { role: "user", content: [{ type: "text", text: "Hello" }] },
245
291
  ] as Message[],
246
292
  processing: true,
247
293
  abortController: new AbortController(),
248
- currentRequestId: 'test-req',
294
+ currentRequestId: "test-req",
249
295
 
250
296
  agentLoop: {
251
297
  run: agentLoopRun,
252
- } as unknown as AgentLoopSessionContext['agentLoop'],
298
+ } as unknown as AgentLoopSessionContext["agentLoop"],
253
299
  provider: {
254
- name: 'mock-provider',
300
+ name: "mock-provider",
255
301
  sendMessage: async () => ({
256
- content: [{ type: 'text', text: 'title' }],
257
- model: 'mock',
302
+ content: [{ type: "text", text: "title" }],
303
+ model: "mock",
258
304
  usage: { inputTokens: 0, outputTokens: 0 },
259
- stopReason: 'end_turn',
305
+ stopReason: "end_turn",
260
306
  }),
261
- } as unknown as AgentLoopSessionContext['provider'],
262
- systemPrompt: 'system prompt',
307
+ } as unknown as AgentLoopSessionContext["provider"],
308
+ systemPrompt: "system prompt",
263
309
 
264
310
  contextWindowManager: {
265
311
  maybeCompact: async () => ({ compacted: false }),
266
- } as unknown as AgentLoopSessionContext['contextWindowManager'],
312
+ } as unknown as AgentLoopSessionContext["contextWindowManager"],
267
313
  contextCompactedMessageCount: 0,
268
314
  contextCompactedAt: null,
269
315
 
270
- conflictGate: { evaluate: async () => null } as unknown as AgentLoopSessionContext['conflictGate'],
271
- memoryPolicy: { scopeId: 'default', includeDefaultFallback: true },
316
+ conflictGate: {
317
+ evaluate: async () => null,
318
+ } as unknown as AgentLoopSessionContext["conflictGate"],
319
+ memoryPolicy: { scopeId: "default", includeDefaultFallback: true },
272
320
 
273
321
  currentActiveSurfaceId: undefined,
274
322
  currentPage: undefined,
275
323
  surfaceState: new Map(),
276
324
  pendingSurfaceActions: new Map(),
325
+ surfaceActionRequestIds: new Set<string>(),
277
326
  currentTurnSurfaces: [],
278
327
 
279
- workingDir: '/tmp',
328
+ workingDir: "/tmp",
280
329
  workspaceTopLevelContext: null,
281
330
  workspaceTopLevelDirty: false,
282
331
  channelCapabilities: undefined,
@@ -287,24 +336,30 @@ function makeCtx(overrides?: Partial<AgentLoopSessionContext> & { agentLoopRun?:
287
336
  allowedToolNames: undefined,
288
337
  preactivatedSkillIds: undefined,
289
338
  skillProjectionState: new Map(),
290
- skillProjectionCache: new Map() as unknown as AgentLoopSessionContext['skillProjectionCache'],
339
+ skillProjectionCache:
340
+ new Map() as unknown as AgentLoopSessionContext["skillProjectionCache"],
291
341
 
292
342
  traceEmitter: {
293
343
  emit: () => {},
294
- } as unknown as AgentLoopSessionContext['traceEmitter'],
344
+ } as unknown as AgentLoopSessionContext["traceEmitter"],
295
345
  profiler: {
296
346
  startRequest: () => {},
297
347
  emitSummary: () => {},
298
- } as unknown as AgentLoopSessionContext['profiler'],
299
- usageStats: { totalInputTokens: 0, totalOutputTokens: 0, totalEstimatedCost: 0, model: '' },
348
+ } as unknown as AgentLoopSessionContext["profiler"],
349
+ usageStats: {
350
+ totalInputTokens: 0,
351
+ totalOutputTokens: 0,
352
+ totalEstimatedCost: 0,
353
+ model: "",
354
+ },
300
355
  turnCount: 0,
301
356
 
302
357
  lastAssistantAttachments: [],
303
358
  lastAttachmentWarnings: [],
304
359
 
305
360
  hasNoClient: false,
306
- prompter: {} as unknown as AgentLoopSessionContext['prompter'],
307
- queue: {} as unknown as AgentLoopSessionContext['queue'],
361
+ prompter: {} as unknown as AgentLoopSessionContext["prompter"],
362
+ queue: {} as unknown as AgentLoopSessionContext["queue"],
308
363
 
309
364
  getWorkspaceGitService: () => ({ ensureInitialized: async () => {} }),
310
365
  commitTurnChanges: async () => {},
@@ -318,133 +373,160 @@ function makeCtx(overrides?: Partial<AgentLoopSessionContext> & { agentLoopRun?:
318
373
  drainQueue: () => {},
319
374
  getTurnInterfaceContext: () => null,
320
375
  getTurnChannelContext: () => ({
321
- userMessageChannel: 'vellum' as const,
322
- assistantMessageChannel: 'vellum' as const,
376
+ userMessageChannel: "vellum" as const,
377
+ assistantMessageChannel: "vellum" as const,
323
378
  }),
324
379
 
325
380
  ...overrides,
326
381
  } as AgentLoopSessionContext;
327
382
  }
328
383
 
329
-
330
384
  // ── Tests ────────────────────────────────────────────────────────────
331
385
 
332
386
  beforeEach(() => {
333
387
  hookBlocked = false;
334
- hookBlockedBy = '';
388
+ hookBlockedBy = "";
335
389
  });
336
390
 
337
- describe('session-agent-loop', () => {
338
-
339
- describe('pre-flight checks', () => {
340
- test('throws if called without an abortController', async () => {
391
+ describe("session-agent-loop", () => {
392
+ describe("pre-flight checks", () => {
393
+ test("throws if called without an abortController", async () => {
341
394
  const ctx = makeCtx();
342
395
  ctx.abortController = null;
343
396
  await expect(
344
- runAgentLoopImpl(ctx, 'hello', 'msg-1', () => {}),
345
- ).rejects.toThrow('runAgentLoop called without prior persistUserMessage');
397
+ runAgentLoopImpl(ctx, "hello", "msg-1", () => {}),
398
+ ).rejects.toThrow("runAgentLoop called without prior persistUserMessage");
346
399
  });
347
400
  });
348
401
 
349
- describe('pre-message hook blocking', () => {
350
- test('emits error and returns early when pre-message hook blocks', async () => {
402
+ describe("pre-message hook blocking", () => {
403
+ test("emits error and returns early when pre-message hook blocks", async () => {
351
404
  hookBlocked = true;
352
- hookBlockedBy = 'test-hook';
405
+ hookBlockedBy = "test-hook";
353
406
  const events: ServerMessage[] = [];
354
407
  const ctx = makeCtx();
355
408
 
356
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
409
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
357
410
 
358
- const errorEvent = events.find((e) => e.type === 'error');
411
+ const errorEvent = events.find((e) => e.type === "error");
359
412
  expect(errorEvent).toBeDefined();
360
- expect((errorEvent as { message: string }).message).toContain('test-hook');
413
+ expect((errorEvent as { message: string }).message).toContain(
414
+ "test-hook",
415
+ );
361
416
  });
362
417
 
363
- test('removes user message when hook blocks without skipPreMessageRollback', async () => {
418
+ test("removes user message when hook blocks without skipPreMessageRollback", async () => {
364
419
  hookBlocked = true;
365
- hookBlockedBy = 'guard';
420
+ hookBlockedBy = "guard";
366
421
  const ctx = makeCtx();
367
422
  const originalLength = ctx.messages.length;
368
423
 
369
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', () => {});
424
+ await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
370
425
 
371
426
  expect(ctx.messages.length).toBe(originalLength - 1);
372
427
  });
373
428
 
374
- test('keeps user message when hook blocks with skipPreMessageRollback', async () => {
429
+ test("keeps user message when hook blocks with skipPreMessageRollback", async () => {
375
430
  hookBlocked = true;
376
- hookBlockedBy = 'guard';
431
+ hookBlockedBy = "guard";
377
432
  const ctx = makeCtx();
378
433
  const originalLength = ctx.messages.length;
379
434
 
380
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', () => {}, { skipPreMessageRollback: true });
435
+ await runAgentLoopImpl(ctx, "hello", "msg-1", () => {}, {
436
+ skipPreMessageRollback: true,
437
+ });
381
438
 
382
439
  expect(ctx.messages.length).toBe(originalLength);
383
440
  });
384
441
  });
385
442
 
386
- describe('tool execution errors via agent loop', () => {
387
- test('error events from agent loop are classified and emitted', async () => {
443
+ describe("tool execution errors via agent loop", () => {
444
+ test("error events from agent loop are classified and emitted", async () => {
388
445
  const events: ServerMessage[] = [];
389
446
 
390
447
  const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
391
448
  // Simulate tool_use + error during execution
392
- onEvent({ type: 'tool_use', id: 'tu-1', name: 'bash', input: { cmd: 'ls' } });
393
449
  onEvent({
394
- type: 'error',
395
- error: new Error('Tool execution failed: permission denied'),
450
+ type: "tool_use",
451
+ id: "tu-1",
452
+ name: "bash",
453
+ input: { cmd: "ls" },
396
454
  });
397
455
  onEvent({
398
- type: 'message_complete',
456
+ type: "error",
457
+ error: new Error("Tool execution failed: permission denied"),
458
+ });
459
+ onEvent({
460
+ type: "message_complete",
399
461
  message: {
400
- role: 'assistant',
401
- content: [{ type: 'text', text: 'I encountered an error' }],
462
+ role: "assistant",
463
+ content: [{ type: "text", text: "I encountered an error" }],
402
464
  },
403
465
  });
404
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 50, model: 'test-model', providerDurationMs: 200 });
466
+ onEvent({
467
+ type: "usage",
468
+ inputTokens: 100,
469
+ outputTokens: 50,
470
+ model: "test-model",
471
+ providerDurationMs: 200,
472
+ });
405
473
  return [
406
474
  ...messages,
407
- { role: 'assistant' as const, content: [{ type: 'text', text: 'I encountered an error' }] as ContentBlock[] },
475
+ {
476
+ role: "assistant" as const,
477
+ content: [
478
+ { type: "text", text: "I encountered an error" },
479
+ ] as ContentBlock[],
480
+ },
408
481
  ];
409
482
  };
410
483
 
411
484
  const ctx = makeCtx({ agentLoopRun });
412
- await runAgentLoopImpl(ctx, 'run ls', 'msg-1', (msg) => events.push(msg));
485
+ await runAgentLoopImpl(ctx, "run ls", "msg-1", (msg) => events.push(msg));
413
486
 
414
- const sessionError = events.find((e) => e.type === 'session_error');
487
+ const sessionError = events.find((e) => e.type === "session_error");
415
488
  expect(sessionError).toBeDefined();
416
489
  });
417
490
 
418
- test('non-error agent loop completion does not emit session_error', async () => {
491
+ test("non-error agent loop completion does not emit session_error", async () => {
419
492
  const events: ServerMessage[] = [];
420
493
 
421
494
  const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
422
495
  onEvent({
423
- type: 'message_complete',
496
+ type: "message_complete",
424
497
  message: {
425
- role: 'assistant',
426
- content: [{ type: 'text', text: 'All good' }],
498
+ role: "assistant",
499
+ content: [{ type: "text", text: "All good" }],
427
500
  },
428
501
  });
429
- onEvent({ type: 'usage', inputTokens: 50, outputTokens: 25, model: 'test-model', providerDurationMs: 100 });
502
+ onEvent({
503
+ type: "usage",
504
+ inputTokens: 50,
505
+ outputTokens: 25,
506
+ model: "test-model",
507
+ providerDurationMs: 100,
508
+ });
430
509
  return [
431
510
  ...messages,
432
- { role: 'assistant' as const, content: [{ type: 'text', text: 'All good' }] as ContentBlock[] },
511
+ {
512
+ role: "assistant" as const,
513
+ content: [{ type: "text", text: "All good" }] as ContentBlock[],
514
+ },
433
515
  ];
434
516
  };
435
517
 
436
518
  const ctx = makeCtx({ agentLoopRun });
437
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
519
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
438
520
 
439
- const sessionError = events.find((e) => e.type === 'session_error');
521
+ const sessionError = events.find((e) => e.type === "session_error");
440
522
  expect(sessionError).toBeUndefined();
441
- const complete = events.find((e) => e.type === 'message_complete');
523
+ const complete = events.find((e) => e.type === "message_complete");
442
524
  expect(complete).toBeDefined();
443
525
  });
444
526
  });
445
527
 
446
- describe('context window exhaustion (context-too-large recovery)', () => {
447
- test('triggers forced compaction when context-too-large is detected', async () => {
528
+ describe("context window exhaustion (context-too-large recovery)", () => {
529
+ test("triggers forced compaction when context-too-large is detected", async () => {
448
530
  const events: ServerMessage[] = [];
449
531
  let callCount = 0;
450
532
  let compactForceCalled = false;
@@ -454,35 +536,59 @@ describe('session-agent-loop', () => {
454
536
  if (callCount === 1) {
455
537
  // First call: emit context_too_large error, return same messages (no progress)
456
538
  onEvent({
457
- type: 'error',
458
- error: new Error('context_length_exceeded'),
539
+ type: "error",
540
+ error: new Error("context_length_exceeded"),
541
+ });
542
+ onEvent({
543
+ type: "usage",
544
+ inputTokens: 100,
545
+ outputTokens: 0,
546
+ model: "test-model",
547
+ providerDurationMs: 50,
459
548
  });
460
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 0, model: 'test-model', providerDurationMs: 50 });
461
549
  return messages;
462
550
  }
463
551
  // Second call (after compaction): succeed
464
552
  onEvent({
465
- type: 'message_complete',
466
- message: { role: 'assistant', content: [{ type: 'text', text: 'recovered' }] },
553
+ type: "message_complete",
554
+ message: {
555
+ role: "assistant",
556
+ content: [{ type: "text", text: "recovered" }],
557
+ },
558
+ });
559
+ onEvent({
560
+ type: "usage",
561
+ inputTokens: 50,
562
+ outputTokens: 25,
563
+ model: "test-model",
564
+ providerDurationMs: 100,
467
565
  });
468
- onEvent({ type: 'usage', inputTokens: 50, outputTokens: 25, model: 'test-model', providerDurationMs: 100 });
469
566
  return [
470
567
  ...messages,
471
- { role: 'assistant' as const, content: [{ type: 'text', text: 'recovered' }] as ContentBlock[] },
568
+ {
569
+ role: "assistant" as const,
570
+ content: [{ type: "text", text: "recovered" }] as ContentBlock[],
571
+ },
472
572
  ];
473
573
  };
474
574
 
475
575
  const ctx = makeCtx({
476
576
  agentLoopRun,
477
577
  contextWindowManager: {
478
- maybeCompact: async (_msgs: Message[], _signal: AbortSignal, opts?: { force?: boolean }) => {
578
+ maybeCompact: async (
579
+ _msgs: Message[],
580
+ _signal: AbortSignal,
581
+ opts?: { force?: boolean },
582
+ ) => {
479
583
  if (opts?.force) {
480
584
  compactForceCalled = true;
481
585
  return {
482
586
  compacted: true,
483
- messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }] as Message[],
587
+ messages: [
588
+ { role: "user", content: [{ type: "text", text: "Hello" }] },
589
+ ] as Message[],
484
590
  compactedPersistedMessages: 5,
485
- summaryText: 'Summary of prior conversation',
591
+ summaryText: "Summary of prior conversation",
486
592
  previousEstimatedInputTokens: 90000,
487
593
  estimatedInputTokens: 30000,
488
594
  maxInputTokens: 100000,
@@ -491,31 +597,37 @@ describe('session-agent-loop', () => {
491
597
  summaryCalls: 1,
492
598
  summaryInputTokens: 500,
493
599
  summaryOutputTokens: 200,
494
- summaryModel: 'mock-model',
600
+ summaryModel: "mock-model",
495
601
  };
496
602
  }
497
603
  return { compacted: false };
498
604
  },
499
- } as unknown as AgentLoopSessionContext['contextWindowManager'],
605
+ } as unknown as AgentLoopSessionContext["contextWindowManager"],
500
606
  });
501
607
 
502
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
608
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
503
609
 
504
610
  expect(compactForceCalled).toBe(true);
505
611
  expect(callCount).toBe(2);
506
- const compactEvent = events.find((e) => e.type === 'context_compacted');
612
+ const compactEvent = events.find((e) => e.type === "context_compacted");
507
613
  expect(compactEvent).toBeDefined();
508
614
  });
509
615
 
510
- test('emits session_error when context stays too large after all recovery attempts', async () => {
616
+ test("emits session_error when context stays too large after all recovery attempts", async () => {
511
617
  const events: ServerMessage[] = [];
512
618
 
513
619
  const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
514
620
  onEvent({
515
- type: 'error',
516
- error: new Error('context_length_exceeded'),
621
+ type: "error",
622
+ error: new Error("context_length_exceeded"),
623
+ });
624
+ onEvent({
625
+ type: "usage",
626
+ inputTokens: 100,
627
+ outputTokens: 0,
628
+ model: "test-model",
629
+ providerDurationMs: 50,
517
630
  });
518
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 0, model: 'test-model', providerDurationMs: 50 });
519
631
  return messages;
520
632
  };
521
633
 
@@ -525,9 +637,11 @@ describe('session-agent-loop', () => {
525
637
  // Compaction succeeds but context is still too large
526
638
  maybeCompact: async () => ({
527
639
  compacted: true,
528
- messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }] as Message[],
640
+ messages: [
641
+ { role: "user", content: [{ type: "text", text: "Hello" }] },
642
+ ] as Message[],
529
643
  compactedPersistedMessages: 5,
530
- summaryText: 'Summary',
644
+ summaryText: "Summary",
531
645
  previousEstimatedInputTokens: 90000,
532
646
  estimatedInputTokens: 85000,
533
647
  maxInputTokens: 100000,
@@ -536,20 +650,20 @@ describe('session-agent-loop', () => {
536
650
  summaryCalls: 1,
537
651
  summaryInputTokens: 500,
538
652
  summaryOutputTokens: 200,
539
- summaryModel: 'mock-model',
653
+ summaryModel: "mock-model",
540
654
  }),
541
- } as unknown as AgentLoopSessionContext['contextWindowManager'],
655
+ } as unknown as AgentLoopSessionContext["contextWindowManager"],
542
656
  });
543
657
 
544
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
658
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
545
659
 
546
- const sessionError = events.find((e) => e.type === 'session_error');
660
+ const sessionError = events.find((e) => e.type === "session_error");
547
661
  expect(sessionError).toBeDefined();
548
662
  });
549
663
  });
550
664
 
551
- describe('provider ordering error retry', () => {
552
- test('retries with deep repair when ordering error is detected', async () => {
665
+ describe("provider ordering error retry", () => {
666
+ test("retries with deep repair when ordering error is detected", async () => {
553
667
  const events: ServerMessage[] = [];
554
668
  let callCount = 0;
555
669
 
@@ -557,80 +671,129 @@ describe('session-agent-loop', () => {
557
671
  callCount++;
558
672
  if (callCount === 1) {
559
673
  onEvent({
560
- type: 'error',
561
- error: new Error('messages ordering error'),
674
+ type: "error",
675
+ error: new Error("messages ordering error"),
676
+ });
677
+ onEvent({
678
+ type: "usage",
679
+ inputTokens: 100,
680
+ outputTokens: 0,
681
+ model: "test-model",
682
+ providerDurationMs: 50,
562
683
  });
563
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 0, model: 'test-model', providerDurationMs: 50 });
564
684
  return messages;
565
685
  }
566
686
  // Retry succeeds
567
687
  onEvent({
568
- type: 'message_complete',
569
- message: { role: 'assistant', content: [{ type: 'text', text: 'fixed' }] },
688
+ type: "message_complete",
689
+ message: {
690
+ role: "assistant",
691
+ content: [{ type: "text", text: "fixed" }],
692
+ },
693
+ });
694
+ onEvent({
695
+ type: "usage",
696
+ inputTokens: 50,
697
+ outputTokens: 25,
698
+ model: "test-model",
699
+ providerDurationMs: 100,
570
700
  });
571
- onEvent({ type: 'usage', inputTokens: 50, outputTokens: 25, model: 'test-model', providerDurationMs: 100 });
572
701
  return [
573
702
  ...messages,
574
- { role: 'assistant' as const, content: [{ type: 'text', text: 'fixed' }] as ContentBlock[] },
703
+ {
704
+ role: "assistant" as const,
705
+ content: [{ type: "text", text: "fixed" }] as ContentBlock[],
706
+ },
575
707
  ];
576
708
  };
577
709
 
578
710
  const ctx = makeCtx({ agentLoopRun });
579
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
711
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
580
712
 
581
713
  expect(callCount).toBe(2);
582
714
  });
583
715
 
584
- test('emits deferred ordering error when retry also fails', async () => {
716
+ test("emits deferred ordering error when retry also fails", async () => {
585
717
  const events: ServerMessage[] = [];
586
718
 
587
719
  const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
588
720
  onEvent({
589
- type: 'error',
590
- error: new Error('messages ordering error'),
721
+ type: "error",
722
+ error: new Error("messages ordering error"),
723
+ });
724
+ onEvent({
725
+ type: "usage",
726
+ inputTokens: 100,
727
+ outputTokens: 0,
728
+ model: "test-model",
729
+ providerDurationMs: 50,
591
730
  });
592
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 0, model: 'test-model', providerDurationMs: 50 });
593
731
  return messages;
594
732
  };
595
733
 
596
734
  const ctx = makeCtx({ agentLoopRun });
597
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
735
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
598
736
 
599
- const sessionError = events.find((e) => e.type === 'session_error');
737
+ const sessionError = events.find((e) => e.type === "session_error");
600
738
  expect(sessionError).toBeDefined();
601
739
  });
602
740
  });
603
741
 
604
- describe('checkpoint handoff (infinite loop prevention)', () => {
605
- test('yields at checkpoint when canHandoffAtCheckpoint returns true', async () => {
742
+ describe("checkpoint handoff (infinite loop prevention)", () => {
743
+ test("yields at checkpoint when canHandoffAtCheckpoint returns true", async () => {
606
744
  const events: ServerMessage[] = [];
607
745
 
608
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, _signal, _reqId, onCheckpoint) => {
746
+ const agentLoopRun: AgentLoopRun = async (
747
+ messages,
748
+ onEvent,
749
+ _signal,
750
+ _reqId,
751
+ onCheckpoint,
752
+ ) => {
609
753
  // Simulate tool use followed by checkpoint
610
- onEvent({ type: 'tool_use', id: 'tu-1', name: 'file_read', input: {} });
754
+ onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
611
755
  onEvent({
612
- type: 'tool_result',
613
- toolUseId: 'tu-1',
614
- content: 'file content',
756
+ type: "tool_result",
757
+ toolUseId: "tu-1",
758
+ content: "file content",
615
759
  isError: false,
616
760
  });
617
761
  onEvent({
618
- type: 'message_complete',
619
- message: { role: 'assistant', content: [{ type: 'text', text: 'partial' }] },
762
+ type: "message_complete",
763
+ message: {
764
+ role: "assistant",
765
+ content: [{ type: "text", text: "partial" }],
766
+ },
767
+ });
768
+ onEvent({
769
+ type: "usage",
770
+ inputTokens: 100,
771
+ outputTokens: 50,
772
+ model: "test-model",
773
+ providerDurationMs: 100,
620
774
  });
621
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 50, model: 'test-model', providerDurationMs: 100 });
622
775
  if (onCheckpoint) {
623
- const decision = onCheckpoint({ turnIndex: 0, toolCount: 1, hasToolUse: true });
624
- if (decision === 'yield') {
776
+ const decision = onCheckpoint({
777
+ turnIndex: 0,
778
+ toolCount: 1,
779
+ hasToolUse: true,
780
+ });
781
+ if (decision === "yield") {
625
782
  return [
626
783
  ...messages,
627
- { role: 'assistant' as const, content: [{ type: 'text', text: 'partial' }] as ContentBlock[] },
784
+ {
785
+ role: "assistant" as const,
786
+ content: [{ type: "text", text: "partial" }] as ContentBlock[],
787
+ },
628
788
  ];
629
789
  }
630
790
  }
631
791
  return [
632
792
  ...messages,
633
- { role: 'assistant' as const, content: [{ type: 'text', text: 'partial' }] as ContentBlock[] },
793
+ {
794
+ role: "assistant" as const,
795
+ content: [{ type: "text", text: "partial" }] as ContentBlock[],
796
+ },
634
797
  ];
635
798
  };
636
799
 
@@ -639,34 +802,52 @@ describe('session-agent-loop', () => {
639
802
  canHandoffAtCheckpoint: () => true,
640
803
  } as unknown as Partial<AgentLoopSessionContext>);
641
804
 
642
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
805
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
643
806
 
644
- const handoff = events.find((e) => e.type === 'generation_handoff');
807
+ const handoff = events.find((e) => e.type === "generation_handoff");
645
808
  expect(handoff).toBeDefined();
646
809
  });
647
810
 
648
- test('continues when canHandoffAtCheckpoint returns false', async () => {
811
+ test("continues when canHandoffAtCheckpoint returns false", async () => {
649
812
  const events: ServerMessage[] = [];
650
813
 
651
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, _signal, _reqId, onCheckpoint) => {
652
- onEvent({ type: 'tool_use', id: 'tu-1', name: 'file_read', input: {} });
814
+ const agentLoopRun: AgentLoopRun = async (
815
+ messages,
816
+ onEvent,
817
+ _signal,
818
+ _reqId,
819
+ onCheckpoint,
820
+ ) => {
821
+ onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
653
822
  onEvent({
654
- type: 'tool_result',
655
- toolUseId: 'tu-1',
656
- content: 'content',
823
+ type: "tool_result",
824
+ toolUseId: "tu-1",
825
+ content: "content",
657
826
  isError: false,
658
827
  });
659
828
  onEvent({
660
- type: 'message_complete',
661
- message: { role: 'assistant', content: [{ type: 'text', text: 'done' }] },
829
+ type: "message_complete",
830
+ message: {
831
+ role: "assistant",
832
+ content: [{ type: "text", text: "done" }],
833
+ },
834
+ });
835
+ onEvent({
836
+ type: "usage",
837
+ inputTokens: 100,
838
+ outputTokens: 50,
839
+ model: "test-model",
840
+ providerDurationMs: 100,
662
841
  });
663
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 50, model: 'test-model', providerDurationMs: 100 });
664
842
  if (onCheckpoint) {
665
843
  onCheckpoint({ turnIndex: 0, toolCount: 1, hasToolUse: true });
666
844
  }
667
845
  return [
668
846
  ...messages,
669
- { role: 'assistant' as const, content: [{ type: 'text', text: 'done' }] as ContentBlock[] },
847
+ {
848
+ role: "assistant" as const,
849
+ content: [{ type: "text", text: "done" }] as ContentBlock[],
850
+ },
670
851
  ];
671
852
  };
672
853
 
@@ -675,37 +856,60 @@ describe('session-agent-loop', () => {
675
856
  canHandoffAtCheckpoint: () => false,
676
857
  } as unknown as Partial<AgentLoopSessionContext>);
677
858
 
678
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
859
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
679
860
 
680
- const handoff = events.find((e) => e.type === 'generation_handoff');
861
+ const handoff = events.find((e) => e.type === "generation_handoff");
681
862
  expect(handoff).toBeUndefined();
682
- const complete = events.find((e) => e.type === 'message_complete');
863
+ const complete = events.find((e) => e.type === "message_complete");
683
864
  expect(complete).toBeDefined();
684
865
  });
685
866
 
686
- test('does not yield during browser flow even when handoff is available', async () => {
867
+ test("does not yield during browser flow even when handoff is available", async () => {
687
868
  const events: ServerMessage[] = [];
688
869
 
689
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, _signal, _reqId, onCheckpoint) => {
870
+ const agentLoopRun: AgentLoopRun = async (
871
+ messages,
872
+ onEvent,
873
+ _signal,
874
+ _reqId,
875
+ onCheckpoint,
876
+ ) => {
690
877
  // All tool uses are browser_ prefixed
691
- onEvent({ type: 'tool_use', id: 'tu-1', name: 'browser_navigate', input: {} });
692
878
  onEvent({
693
- type: 'tool_result',
694
- toolUseId: 'tu-1',
695
- content: 'navigated',
879
+ type: "tool_use",
880
+ id: "tu-1",
881
+ name: "browser_navigate",
882
+ input: {},
883
+ });
884
+ onEvent({
885
+ type: "tool_result",
886
+ toolUseId: "tu-1",
887
+ content: "navigated",
696
888
  isError: false,
697
889
  });
698
890
  onEvent({
699
- type: 'message_complete',
700
- message: { role: 'assistant', content: [{ type: 'text', text: 'browsing' }] },
891
+ type: "message_complete",
892
+ message: {
893
+ role: "assistant",
894
+ content: [{ type: "text", text: "browsing" }],
895
+ },
896
+ });
897
+ onEvent({
898
+ type: "usage",
899
+ inputTokens: 100,
900
+ outputTokens: 50,
901
+ model: "test-model",
902
+ providerDurationMs: 100,
701
903
  });
702
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 50, model: 'test-model', providerDurationMs: 100 });
703
904
  if (onCheckpoint) {
704
905
  onCheckpoint({ turnIndex: 0, toolCount: 1, hasToolUse: true });
705
906
  }
706
907
  return [
707
908
  ...messages,
708
- { role: 'assistant' as const, content: [{ type: 'text', text: 'browsing' }] as ContentBlock[] },
909
+ {
910
+ role: "assistant" as const,
911
+ content: [{ type: "text", text: "browsing" }] as ContentBlock[],
912
+ },
709
913
  ];
710
914
  };
711
915
 
@@ -714,99 +918,135 @@ describe('session-agent-loop', () => {
714
918
  canHandoffAtCheckpoint: () => true,
715
919
  } as unknown as Partial<AgentLoopSessionContext>);
716
920
 
717
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
921
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
718
922
 
719
923
  // Browser flows should NOT yield
720
- const handoff = events.find((e) => e.type === 'generation_handoff');
924
+ const handoff = events.find((e) => e.type === "generation_handoff");
721
925
  expect(handoff).toBeUndefined();
722
926
  });
723
927
  });
724
928
 
725
- describe('user cancellation', () => {
726
- test('emits generation_cancelled when abort signal fires', async () => {
929
+ describe("user cancellation", () => {
930
+ test("emits generation_cancelled when abort signal fires", async () => {
727
931
  const events: ServerMessage[] = [];
728
932
  const abortController = new AbortController();
729
933
 
730
934
  const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
731
935
  onEvent({
732
- type: 'message_complete',
733
- message: { role: 'assistant', content: [{ type: 'text', text: 'partial' }] },
936
+ type: "message_complete",
937
+ message: {
938
+ role: "assistant",
939
+ content: [{ type: "text", text: "partial" }],
940
+ },
941
+ });
942
+ onEvent({
943
+ type: "usage",
944
+ inputTokens: 100,
945
+ outputTokens: 50,
946
+ model: "test-model",
947
+ providerDurationMs: 100,
734
948
  });
735
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 50, model: 'test-model', providerDurationMs: 100 });
736
949
  // Simulate abort after processing
737
950
  abortController.abort();
738
951
  return [
739
952
  ...messages,
740
- { role: 'assistant' as const, content: [{ type: 'text', text: 'partial' }] as ContentBlock[] },
953
+ {
954
+ role: "assistant" as const,
955
+ content: [{ type: "text", text: "partial" }] as ContentBlock[],
956
+ },
741
957
  ];
742
958
  };
743
959
 
744
960
  const ctx = makeCtx({ agentLoopRun, abortController });
745
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
961
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
746
962
 
747
- const cancelled = events.find((e) => e.type === 'generation_cancelled');
963
+ const cancelled = events.find((e) => e.type === "generation_cancelled");
748
964
  expect(cancelled).toBeDefined();
749
965
  });
750
966
 
751
- test('handles AbortError thrown from agent loop as user cancellation', async () => {
967
+ test("handles AbortError thrown from agent loop as user cancellation", async () => {
752
968
  const events: ServerMessage[] = [];
753
969
  const abortController = new AbortController();
754
970
 
755
971
  const agentLoopRun: AgentLoopRun = async () => {
756
972
  abortController.abort();
757
- const err = new DOMException('The operation was aborted', 'AbortError');
973
+ const err = new DOMException("The operation was aborted", "AbortError");
758
974
  throw err;
759
975
  };
760
976
 
761
977
  const ctx = makeCtx({ agentLoopRun, abortController });
762
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
978
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
763
979
 
764
- const cancelled = events.find((e) => e.type === 'generation_cancelled');
980
+ const cancelled = events.find((e) => e.type === "generation_cancelled");
765
981
  expect(cancelled).toBeDefined();
766
982
  // Should NOT emit a session_error for user cancellation
767
- const sessionError = events.find((e) => e.type === 'session_error');
983
+ const sessionError = events.find((e) => e.type === "session_error");
768
984
  expect(sessionError).toBeUndefined();
769
985
  });
770
986
  });
771
987
 
772
- describe('finally block cleanup', () => {
773
- test('increments turnCount after successful run', async () => {
988
+ describe("finally block cleanup", () => {
989
+ test("increments turnCount after successful run", async () => {
774
990
  const ctx = makeCtx({
775
991
  agentLoopRun: async (messages, onEvent) => {
776
992
  onEvent({
777
- type: 'message_complete',
778
- message: { role: 'assistant', content: [{ type: 'text', text: 'hi' }] },
993
+ type: "message_complete",
994
+ message: {
995
+ role: "assistant",
996
+ content: [{ type: "text", text: "hi" }],
997
+ },
998
+ });
999
+ onEvent({
1000
+ type: "usage",
1001
+ inputTokens: 10,
1002
+ outputTokens: 5,
1003
+ model: "test",
1004
+ providerDurationMs: 50,
779
1005
  });
780
- onEvent({ type: 'usage', inputTokens: 10, outputTokens: 5, model: 'test', providerDurationMs: 50 });
781
1006
  return [
782
1007
  ...messages,
783
- { role: 'assistant' as const, content: [{ type: 'text', text: 'hi' }] as ContentBlock[] },
1008
+ {
1009
+ role: "assistant" as const,
1010
+ content: [{ type: "text", text: "hi" }] as ContentBlock[],
1011
+ },
784
1012
  ];
785
1013
  },
786
1014
  });
787
1015
  expect(ctx.turnCount).toBe(0);
788
1016
 
789
- await runAgentLoopImpl(ctx, 'hi', 'msg-1', () => {});
1017
+ await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
790
1018
 
791
1019
  expect(ctx.turnCount).toBe(1);
792
1020
  });
793
1021
 
794
- test('clears processing state and abort controller', async () => {
1022
+ test("clears processing state and abort controller", async () => {
795
1023
  const ctx = makeCtx({
796
1024
  agentLoopRun: async (messages, onEvent) => {
797
1025
  onEvent({
798
- type: 'message_complete',
799
- message: { role: 'assistant', content: [{ type: 'text', text: 'hi' }] },
1026
+ type: "message_complete",
1027
+ message: {
1028
+ role: "assistant",
1029
+ content: [{ type: "text", text: "hi" }],
1030
+ },
1031
+ });
1032
+ onEvent({
1033
+ type: "usage",
1034
+ inputTokens: 10,
1035
+ outputTokens: 5,
1036
+ model: "test",
1037
+ providerDurationMs: 50,
800
1038
  });
801
- onEvent({ type: 'usage', inputTokens: 10, outputTokens: 5, model: 'test', providerDurationMs: 50 });
802
1039
  return [
803
1040
  ...messages,
804
- { role: 'assistant' as const, content: [{ type: 'text', text: 'hi' }] as ContentBlock[] },
1041
+ {
1042
+ role: "assistant" as const,
1043
+ content: [{ type: "text", text: "hi" }] as ContentBlock[],
1044
+ },
805
1045
  ];
806
1046
  },
807
1047
  });
808
1048
 
809
- await runAgentLoopImpl(ctx, 'hi', 'msg-1', () => {});
1049
+ await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
810
1050
 
811
1051
  expect(ctx.processing).toBe(false);
812
1052
  expect(ctx.abortController).toBeNull();
@@ -814,62 +1054,87 @@ describe('session-agent-loop', () => {
814
1054
  expect(ctx.commandIntent).toBeUndefined();
815
1055
  });
816
1056
 
817
- test('clears state even when agent loop throws', async () => {
1057
+ test("clears state even when agent loop throws", async () => {
818
1058
  const ctx = makeCtx({
819
1059
  agentLoopRun: async () => {
820
- throw new Error('unexpected crash');
1060
+ throw new Error("unexpected crash");
821
1061
  },
822
1062
  });
823
1063
 
824
- await runAgentLoopImpl(ctx, 'hi', 'msg-1', () => {});
1064
+ await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
825
1065
 
826
1066
  expect(ctx.processing).toBe(false);
827
1067
  expect(ctx.abortController).toBeNull();
828
1068
  });
829
1069
 
830
- test('drains queue after completion', async () => {
1070
+ test("drains queue after completion", async () => {
831
1071
  let drainReason: string | undefined;
832
1072
  const ctx = makeCtx({
833
- agentLoopRun: async (messages: Message[], onEvent: (event: AgentEvent) => void) => {
1073
+ agentLoopRun: async (
1074
+ messages: Message[],
1075
+ onEvent: (event: AgentEvent) => void,
1076
+ ) => {
1077
+ onEvent({
1078
+ type: "message_complete",
1079
+ message: {
1080
+ role: "assistant",
1081
+ content: [{ type: "text", text: "ok" }],
1082
+ },
1083
+ });
834
1084
  onEvent({
835
- type: 'message_complete',
836
- message: { role: 'assistant', content: [{ type: 'text', text: 'ok' }] },
1085
+ type: "usage",
1086
+ inputTokens: 10,
1087
+ outputTokens: 5,
1088
+ model: "test",
1089
+ providerDurationMs: 50,
837
1090
  });
838
- onEvent({ type: 'usage', inputTokens: 10, outputTokens: 5, model: 'test', providerDurationMs: 50 });
839
1091
  return [
840
1092
  ...messages,
841
- { role: 'assistant' as const, content: [{ type: 'text', text: 'ok' }] as ContentBlock[] },
1093
+ {
1094
+ role: "assistant" as const,
1095
+ content: [{ type: "text", text: "ok" }] as ContentBlock[],
1096
+ },
842
1097
  ];
843
1098
  },
844
- drainQueue: (reason: string) => { drainReason = reason; },
1099
+ drainQueue: (reason: string) => {
1100
+ drainReason = reason;
1101
+ },
845
1102
  } as unknown as Partial<AgentLoopSessionContext>);
846
1103
 
847
- await runAgentLoopImpl(ctx, 'hi', 'msg-1', () => {});
1104
+ await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
848
1105
 
849
- expect(drainReason).toBe('loop_complete');
1106
+ expect(drainReason).toBe("loop_complete");
850
1107
  });
851
1108
  });
852
1109
 
853
- describe('error-only response with no assistant text', () => {
854
- test('synthesizes error assistant message when provider returns no response', async () => {
1110
+ describe("error-only response with no assistant text", () => {
1111
+ test("synthesizes error assistant message when provider returns no response", async () => {
855
1112
  const events: ServerMessage[] = [];
856
1113
 
857
1114
  const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
858
1115
  // Emit a non-ordering, non-context-too-large error that sets providerErrorUserMessage
859
1116
  onEvent({
860
- type: 'error',
861
- error: new Error('Internal processing failure'),
1117
+ type: "error",
1118
+ error: new Error("Internal processing failure"),
1119
+ });
1120
+ onEvent({
1121
+ type: "usage",
1122
+ inputTokens: 100,
1123
+ outputTokens: 0,
1124
+ model: "test-model",
1125
+ providerDurationMs: 50,
862
1126
  });
863
- onEvent({ type: 'usage', inputTokens: 100, outputTokens: 0, model: 'test-model', providerDurationMs: 50 });
864
1127
  // Return same messages (no assistant message appended)
865
1128
  return messages;
866
1129
  };
867
1130
 
868
1131
  const ctx = makeCtx({ agentLoopRun });
869
- await runAgentLoopImpl(ctx, 'hello', 'msg-1', (msg) => events.push(msg));
1132
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
870
1133
 
871
1134
  // The providerErrorUserMessage should trigger a synthesized assistant_text_delta
872
- const textDeltas = events.filter((e) => e.type === 'assistant_text_delta');
1135
+ const textDeltas = events.filter(
1136
+ (e) => e.type === "assistant_text_delta",
1137
+ );
873
1138
  expect(textDeltas.length).toBeGreaterThanOrEqual(1);
874
1139
  });
875
1140
  });