@vellumai/assistant 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/ARCHITECTURE.md +11 -12
  2. package/docker-entrypoint.sh +13 -1
  3. package/docker-init-apt-root.sh +79 -6
  4. package/openapi.yaml +336 -21
  5. package/package.json +1 -1
  6. package/src/__tests__/agent-loop-exit-reason.test.ts +272 -0
  7. package/src/__tests__/agent-loop-provider-error-recording.test.ts +195 -0
  8. package/src/__tests__/compactor-tail-resolution.test.ts +107 -1
  9. package/src/__tests__/config-get-vision-flag.test.ts +136 -0
  10. package/src/__tests__/config-loader-backfill.test.ts +115 -18
  11. package/src/__tests__/context-token-estimator.test.ts +30 -65
  12. package/src/__tests__/conversation-agent-loop.test.ts +57 -1
  13. package/src/__tests__/conversation-media-retry.test.ts +19 -8
  14. package/src/__tests__/conversation-runtime-assembly.test.ts +26 -4
  15. package/src/__tests__/date-context.test.ts +45 -0
  16. package/src/__tests__/external-plugin-loader.test.ts +91 -19
  17. package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +0 -1
  18. package/src/__tests__/guardian-dispatch.test.ts +1 -0
  19. package/src/__tests__/heartbeat-service.test.ts +24 -164
  20. package/src/__tests__/helpers/channel-test-adapter.ts +0 -2
  21. package/src/__tests__/host-app-control-proxy.test.ts +241 -0
  22. package/src/__tests__/host-proxy-preactivation.test.ts +200 -13
  23. package/src/__tests__/injector-background-turn.test.ts +153 -0
  24. package/src/__tests__/injector-chain.test.ts +5 -0
  25. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +9 -2
  26. package/src/__tests__/llm-callsite-catalog.test.ts +25 -0
  27. package/src/__tests__/llm-catalog-parity.test.ts +3 -0
  28. package/src/__tests__/llm-request-log-agent-loop-exit-reason.test.ts +116 -0
  29. package/src/__tests__/llm-request-log-error-payload.test.ts +138 -0
  30. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +2 -0
  31. package/src/__tests__/llm-resolver.test.ts +255 -2
  32. package/src/__tests__/managed-profile-guard.test.ts +10 -0
  33. package/src/__tests__/notification-decision-fallback.test.ts +0 -91
  34. package/src/__tests__/notification-decision-strategy.test.ts +14 -31
  35. package/src/__tests__/notification-deep-link.test.ts +15 -0
  36. package/src/__tests__/notification-guardian-path.test.ts +1 -2
  37. package/src/__tests__/notification-platform-adapter.test.ts +5 -4
  38. package/src/__tests__/notification-telegram-adapter.test.ts +1 -0
  39. package/src/__tests__/notification-vellum-adapter.test.ts +113 -0
  40. package/src/__tests__/openai-provider.test.ts +218 -3
  41. package/src/__tests__/openai-responses-cutover-guard.test.ts +3 -3
  42. package/src/__tests__/openrouter-provider-only.test.ts +51 -3
  43. package/src/__tests__/openrouter-token-estimation.test.ts +34 -25
  44. package/src/__tests__/platform-proxy-context.test.ts +6 -1
  45. package/src/__tests__/plugin-tool-contribution.test.ts +3 -3
  46. package/src/__tests__/plugin-types.test.ts +2 -2
  47. package/src/__tests__/provider-catalog-visibility.test.ts +16 -0
  48. package/src/__tests__/provider-platform-proxy-integration.test.ts +27 -25
  49. package/src/__tests__/secret-routes-platform-proxy.test.ts +1 -1
  50. package/src/__tests__/system-prompt.test.ts +6 -73
  51. package/src/__tests__/workspace-migration-087-memory-router-balanced-profile.test.ts +228 -0
  52. package/src/a2a/__tests__/agent-card.test.ts +98 -0
  53. package/src/a2a/__tests__/e2e-a2a-channel.test.ts +597 -0
  54. package/src/a2a/__tests__/protocol-helpers.test.ts +113 -0
  55. package/src/a2a/__tests__/task-store.test.ts +246 -0
  56. package/src/a2a/agent-card.ts +58 -0
  57. package/src/a2a/feature-gate.ts +8 -0
  58. package/src/a2a/protocol-constants.ts +21 -0
  59. package/src/a2a/protocol-errors.ts +50 -0
  60. package/src/a2a/protocol-types.ts +162 -0
  61. package/src/a2a/task-store.ts +168 -0
  62. package/src/agent/loop.ts +167 -18
  63. package/src/channels/config.ts +9 -0
  64. package/src/channels/types.ts +14 -0
  65. package/src/cli/{__tests__ → commands/__tests__}/notifications.test.ts +201 -28
  66. package/src/cli/commands/__tests__/schedules.test.ts +469 -0
  67. package/src/cli/commands/notifications.ts +65 -35
  68. package/src/cli/commands/plugins.ts +67 -0
  69. package/src/cli/commands/schedules.ts +297 -5
  70. package/src/cli/lib/__tests__/search-plugins.test.ts +261 -0
  71. package/src/cli/lib/install-from-github.ts +8 -9
  72. package/src/cli/lib/search-plugins.ts +163 -0
  73. package/src/cli/program.ts +14 -0
  74. package/src/config/assistant-feature-flags.ts +24 -54
  75. package/src/config/bundled-skills/app-builder/SKILL.md +117 -1
  76. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -1
  77. package/src/config/call-site-defaults.ts +105 -0
  78. package/src/config/feature-flag-registry.json +21 -29
  79. package/src/config/llm-resolver.ts +52 -1
  80. package/src/config/schema.ts +2 -0
  81. package/src/config/schemas/__tests__/memory-v2.test.ts +3 -3
  82. package/src/config/schemas/channels.ts +9 -0
  83. package/src/config/schemas/conversations.ts +10 -0
  84. package/src/config/schemas/heartbeat.ts +14 -0
  85. package/src/config/schemas/llm.ts +1 -3
  86. package/src/config/schemas/memory-retrospective.ts +1 -1
  87. package/src/config/schemas/memory-v2.ts +4 -4
  88. package/src/config/schemas/memory.ts +3 -1
  89. package/src/config/seed-inference-profiles.ts +99 -29
  90. package/src/context/compactor.ts +72 -12
  91. package/src/context/token-estimator.ts +32 -34
  92. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +3 -22
  93. package/src/daemon/conversation-agent-loop-handlers.ts +78 -0
  94. package/src/daemon/conversation-agent-loop.ts +29 -2
  95. package/src/daemon/conversation-runtime-assembly.ts +9 -0
  96. package/src/daemon/conversation.ts +0 -7
  97. package/src/daemon/date-context.ts +40 -0
  98. package/src/daemon/guardian-action-generators.ts +1 -125
  99. package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +248 -0
  100. package/src/daemon/handlers/__tests__/config-a2a-invite.test.ts +154 -0
  101. package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +133 -0
  102. package/src/daemon/handlers/__tests__/config-a2a.test.ts +95 -0
  103. package/src/daemon/handlers/config-a2a.ts +289 -0
  104. package/src/daemon/handlers/conversations.ts +1 -0
  105. package/src/daemon/host-app-control-proxy.ts +69 -18
  106. package/src/daemon/host-proxy-preactivation.ts +85 -18
  107. package/src/daemon/lifecycle.ts +49 -61
  108. package/src/daemon/memory-v2-startup.ts +49 -13
  109. package/src/daemon/message-types/notifications.ts +21 -0
  110. package/src/daemon/pkb-reminder-builder.test.ts +10 -53
  111. package/src/daemon/pkb-reminder-builder.ts +4 -19
  112. package/src/daemon/process-message.ts +3 -0
  113. package/src/daemon/skill-memory-refresh.ts +5 -1
  114. package/src/daemon/wake-target-adapter.ts +2 -0
  115. package/src/export/__tests__/transcript-formatter.test.ts +121 -0
  116. package/src/export/transcript-formatter.ts +54 -20
  117. package/src/heartbeat/__tests__/heartbeat-service.test.ts +44 -0
  118. package/src/heartbeat/heartbeat-service.ts +34 -191
  119. package/src/home/__tests__/feed-types.test.ts +40 -0
  120. package/src/home/feed-types.ts +14 -2
  121. package/src/ipc/cli-client.ts +147 -45
  122. package/src/memory/__tests__/conversation-queries.test.ts +220 -0
  123. package/src/memory/__tests__/memory-retrospective-enqueue.test.ts +2 -50
  124. package/src/memory/__tests__/memory-retrospective-job.test.ts +87 -4
  125. package/src/memory/conversation-queries.ts +87 -1
  126. package/src/memory/conversation-title-service.ts +26 -4
  127. package/src/memory/db-init.ts +6 -0
  128. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +84 -3
  129. package/src/memory/graph/conversation-graph-memory.ts +18 -6
  130. package/src/memory/graph/tools.ts +6 -37
  131. package/src/memory/invite-store.ts +53 -0
  132. package/src/memory/llm-request-log-source-clickhouse.ts +7 -2
  133. package/src/memory/llm-request-log-store.ts +92 -1
  134. package/src/memory/memory-retrospective-enqueue.ts +1 -20
  135. package/src/memory/memory-retrospective-job.ts +33 -6
  136. package/src/memory/migrations/250-provider-connection-base-url-and-models.ts +28 -0
  137. package/src/memory/migrations/251-a2a-tasks.ts +49 -0
  138. package/src/memory/migrations/252-llm-request-log-agent-loop-exit-reason.ts +32 -0
  139. package/src/memory/migrations/index.ts +3 -0
  140. package/src/memory/migrations/registry.ts +8 -0
  141. package/src/memory/schema/a2a.ts +15 -0
  142. package/src/memory/schema/index.ts +1 -0
  143. package/src/memory/schema/inference.ts +2 -0
  144. package/src/memory/schema/infrastructure.ts +1 -0
  145. package/src/memory/v2/__tests__/activation-store.test.ts +25 -23
  146. package/src/memory/v2/__tests__/cli-command-store.test.ts +404 -0
  147. package/src/memory/v2/__tests__/frontmatter-sweep.test.ts +25 -4
  148. package/src/memory/v2/__tests__/injection.test.ts +190 -3
  149. package/src/memory/v2/__tests__/static-context.test.ts +12 -1
  150. package/src/memory/v2/activation-store.ts +14 -16
  151. package/src/memory/v2/cli-command-content.ts +19 -0
  152. package/src/memory/v2/cli-command-store.ts +304 -0
  153. package/src/memory/v2/frontmatter-sweep.ts +7 -1
  154. package/src/memory/v2/injection.ts +49 -20
  155. package/src/memory/v2/page-index.ts +38 -13
  156. package/src/memory/v2/static-context.ts +4 -4
  157. package/src/memory/v2/types.ts +23 -0
  158. package/src/messaging/providers/a2a/__tests__/deliver.test.ts +274 -0
  159. package/src/messaging/providers/a2a/deliver.ts +156 -0
  160. package/src/messaging/providers/gmail/client.ts +9 -2
  161. package/src/messaging/providers/index.ts +11 -2
  162. package/src/notifications/__tests__/broadcaster.test.ts +203 -0
  163. package/src/notifications/__tests__/decision-engine.test.ts +283 -0
  164. package/src/notifications/__tests__/deterministic-checks.test.ts +286 -0
  165. package/src/notifications/__tests__/emit-signal-home-feed.test.ts +1 -0
  166. package/src/notifications/__tests__/home-feed-side-effect.test.ts +430 -7
  167. package/src/notifications/adapters/macos.ts +12 -2
  168. package/src/notifications/broadcaster.ts +29 -4
  169. package/src/notifications/copy-composer.ts +17 -64
  170. package/src/notifications/decision-engine.ts +111 -44
  171. package/src/notifications/deterministic-checks.ts +96 -0
  172. package/src/notifications/emit-signal.ts +1 -0
  173. package/src/notifications/home-feed-side-effect.ts +85 -6
  174. package/src/notifications/signal.ts +0 -4
  175. package/src/notifications/types.ts +8 -0
  176. package/src/oauth/platform-connection.test.ts +43 -3
  177. package/src/oauth/platform-connection.ts +13 -4
  178. package/src/plugins/defaults/injectors.ts +38 -19
  179. package/src/plugins/external-plugin-loader.ts +82 -10
  180. package/src/plugins/types.ts +16 -7
  181. package/src/prompts/__tests__/system-prompt.test.ts +6 -51
  182. package/src/prompts/__tests__/task-progress-hint-section.test.ts +4 -8
  183. package/src/prompts/system-prompt.ts +0 -8
  184. package/src/prompts/templates/BOOTSTRAP.md +5 -5
  185. package/src/prompts/templates/system-sections.ts +0 -9
  186. package/src/providers/__tests__/inference.test.ts +2 -0
  187. package/src/providers/call-site-routing.ts +24 -6
  188. package/src/providers/connection-resolution.ts +63 -13
  189. package/src/providers/inference/__tests__/adapter-factory-openai-compatible.test.ts +74 -0
  190. package/src/providers/inference/__tests__/connections-openai-compatible.test.ts +175 -0
  191. package/src/providers/inference/__tests__/connections-status-label.test.ts +15 -0
  192. package/src/providers/inference/adapter-factory.ts +9 -20
  193. package/src/providers/inference/auth.ts +12 -0
  194. package/src/providers/inference/backfill.ts +14 -1
  195. package/src/providers/inference/connections.ts +85 -5
  196. package/src/providers/inference/resolve-auth.ts +2 -0
  197. package/src/providers/model-catalog.ts +199 -244
  198. package/src/providers/model-intents.ts +3 -3
  199. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +235 -0
  200. package/src/providers/openai/chat-completions-provider.ts +159 -6
  201. package/src/providers/openrouter/client.ts +42 -4
  202. package/src/providers/platform-proxy/constants.ts +3 -4
  203. package/src/providers/provider-catalog-visibility.ts +3 -1
  204. package/src/providers/provider-send-message.ts +27 -12
  205. package/src/providers/registry.ts +30 -1
  206. package/src/runtime/agent-wake.ts +61 -1
  207. package/src/runtime/auth/route-policy.ts +13 -0
  208. package/src/runtime/http-server.ts +7 -16
  209. package/src/runtime/http-types.ts +0 -47
  210. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +258 -0
  211. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +66 -4
  212. package/src/runtime/routes/__tests__/inference-provider-connection-routes.test.ts +275 -44
  213. package/src/runtime/routes/__tests__/llm-call-sites-routes.test.ts +12 -0
  214. package/src/runtime/routes/channel-availability-routes.ts +5 -0
  215. package/src/runtime/routes/consolidation-routes.ts +100 -0
  216. package/src/runtime/routes/conversation-query-routes.ts +70 -11
  217. package/src/runtime/routes/conversation-routes.ts +7 -0
  218. package/src/runtime/routes/index.ts +2 -0
  219. package/src/runtime/routes/inference-provider-connection-routes.ts +134 -1
  220. package/src/runtime/routes/integrations/a2a.ts +235 -0
  221. package/src/runtime/routes/llm-call-sites-routes.ts +11 -1
  222. package/src/runtime/routes/subagents-routes.ts +41 -0
  223. package/src/subagent/manager.ts +2 -0
  224. package/src/tools/memory/register.ts +1 -9
  225. package/src/tools/registry.ts +2 -2
  226. package/src/tools/types.ts +37 -2
  227. package/src/workspace/migrations/087-memory-router-balanced-profile.ts +91 -0
  228. package/src/workspace/migrations/registry.ts +2 -0
  229. package/src/__tests__/guardian-action-conversation-turn.test.ts +0 -441
  230. package/src/memory/graph/__tests__/remember-description.test.ts +0 -55
  231. package/src/runtime/guardian-action-conversation-turn.ts +0 -99
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Tests for the `agent_loop_exit` instrumentation added in this PR.
3
+ *
4
+ * Coverage targets:
5
+ * 1. **One emit per run** — the idempotency guard fires once, even if the
6
+ * code path would otherwise reach two emit sites (the empty-response
7
+ * throw → catch-block fallback case).
8
+ * 2. **Reason matches break site** — for each reachable break site, the
9
+ * emitted reason is the one documented in `AgentLoopExitReason`.
10
+ * 3. **Always the last AgentEvent of the run** — consumers can rely on
11
+ * positional ordering to find it.
12
+ *
13
+ * Sites not exercised here (`empty_response_exhausted`, `aborted_via_error`)
14
+ * require deeper provider fakery and are best covered by integration tests
15
+ * once we wire up the empty-response pipeline mock.
16
+ */
17
+ import { describe, expect, test } from "bun:test";
18
+
19
+ import type {
20
+ AgentEvent,
21
+ CheckpointDecision,
22
+ CheckpointInfo,
23
+ } from "../agent/loop.js";
24
+ import { AgentLoop } from "../agent/loop.js";
25
+ import type {
26
+ Message,
27
+ Provider,
28
+ ProviderResponse,
29
+ SendMessageOptions,
30
+ ToolDefinition,
31
+ } from "../providers/types.js";
32
+
33
+ // ---------------------------------------------------------------------------
34
+ // Helpers (mirrored from agent-loop.test.ts so this file is self-contained)
35
+ // ---------------------------------------------------------------------------
36
+
37
+ function createMockProvider(responses: ProviderResponse[]): {
38
+ provider: Provider;
39
+ } {
40
+ let callIndex = 0;
41
+ const provider: Provider = {
42
+ name: "mock",
43
+ async sendMessage(
44
+ _messages: Message[],
45
+ _tools?: ToolDefinition[],
46
+ _systemPrompt?: string,
47
+ _options?: SendMessageOptions,
48
+ ): Promise<ProviderResponse> {
49
+ const response = responses[callIndex] ?? responses[responses.length - 1];
50
+ callIndex++;
51
+ return response;
52
+ },
53
+ };
54
+ return { provider };
55
+ }
56
+
57
+ function textResponse(text: string): ProviderResponse {
58
+ return {
59
+ content: [{ type: "text", text }],
60
+ model: "mock-model",
61
+ usage: { inputTokens: 10, outputTokens: 5 },
62
+ stopReason: "end_turn",
63
+ };
64
+ }
65
+
66
+ function toolUseResponse(
67
+ id: string,
68
+ name: string,
69
+ input: Record<string, unknown>,
70
+ ): ProviderResponse {
71
+ return {
72
+ content: [{ type: "tool_use", id, name, input }],
73
+ model: "mock-model",
74
+ usage: { inputTokens: 10, outputTokens: 5 },
75
+ stopReason: "tool_use",
76
+ };
77
+ }
78
+
79
+ const dummyTools: ToolDefinition[] = [
80
+ {
81
+ name: "read_file",
82
+ description: "Read a file",
83
+ input_schema: { type: "object", properties: { path: { type: "string" } } },
84
+ },
85
+ ];
86
+
87
+ const userMessage: Message = {
88
+ role: "user",
89
+ content: [{ type: "text", text: "Hello" }],
90
+ };
91
+
92
+ function lastExitEvent(
93
+ events: AgentEvent[],
94
+ ): Extract<AgentEvent, { type: "agent_loop_exit" }> | undefined {
95
+ return events.find(
96
+ (e): e is Extract<AgentEvent, { type: "agent_loop_exit" }> =>
97
+ e.type === "agent_loop_exit",
98
+ );
99
+ }
100
+
101
+ function countExitEvents(events: AgentEvent[]): number {
102
+ return events.filter((e) => e.type === "agent_loop_exit").length;
103
+ }
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Tests
107
+ // ---------------------------------------------------------------------------
108
+
109
+ describe("AgentLoop exit-reason instrumentation", () => {
110
+ test("emits exit event exactly once with 'no_tool_calls' on plain text response", async () => {
111
+ const { provider } = createMockProvider([textResponse("Hi there!")]);
112
+ const loop = new AgentLoop(provider, "system prompt");
113
+
114
+ const events: AgentEvent[] = [];
115
+ await loop.run([userMessage], (e) => {
116
+ events.push(e);
117
+ });
118
+
119
+ expect(countExitEvents(events)).toBe(1);
120
+ const exit = lastExitEvent(events);
121
+ expect(exit?.reason).toBe("no_tool_calls");
122
+ });
123
+
124
+ test("agent_loop_exit is the last event emitted", async () => {
125
+ const { provider } = createMockProvider([textResponse("Hi there!")]);
126
+ const loop = new AgentLoop(provider, "system prompt");
127
+
128
+ const events: AgentEvent[] = [];
129
+ await loop.run([userMessage], (e) => {
130
+ events.push(e);
131
+ });
132
+
133
+ expect(events.length).toBeGreaterThan(0);
134
+ expect(events[events.length - 1].type).toBe("agent_loop_exit");
135
+ });
136
+
137
+ test("emits 'aborted_pre_call' when signal is already aborted at run start", async () => {
138
+ const { provider } = createMockProvider([textResponse("never sent")]);
139
+ const loop = new AgentLoop(provider, "system prompt");
140
+
141
+ const controller = new AbortController();
142
+ controller.abort();
143
+
144
+ const events: AgentEvent[] = [];
145
+ await loop.run([userMessage], (e) => { events.push(e); }, controller.signal);
146
+
147
+ expect(countExitEvents(events)).toBe(1);
148
+ expect(lastExitEvent(events)?.reason).toBe("aborted_pre_call");
149
+ });
150
+
151
+ test("emits 'yield_to_user' when tool result requests yieldToUser", async () => {
152
+ const { provider } = createMockProvider([
153
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
154
+ ]);
155
+ const toolExecutor = async () => ({
156
+ content: "ok",
157
+ isError: false,
158
+ yieldToUser: true,
159
+ });
160
+ const loop = new AgentLoop(
161
+ provider,
162
+ "system",
163
+ {},
164
+ dummyTools,
165
+ toolExecutor,
166
+ );
167
+
168
+ const events: AgentEvent[] = [];
169
+ await loop.run([userMessage], (e) => { events.push(e); });
170
+
171
+ expect(countExitEvents(events)).toBe(1);
172
+ expect(lastExitEvent(events)?.reason).toBe("yield_to_user");
173
+ });
174
+
175
+ test("emits 'checkpoint_yield' when onCheckpoint returns 'yield'", async () => {
176
+ const { provider } = createMockProvider([
177
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
178
+ textResponse("never reached"),
179
+ ]);
180
+ const toolExecutor = async () => ({ content: "ok", isError: false });
181
+ const loop = new AgentLoop(
182
+ provider,
183
+ "system",
184
+ {},
185
+ dummyTools,
186
+ toolExecutor,
187
+ );
188
+
189
+ const onCheckpoint = (_info: CheckpointInfo): CheckpointDecision =>
190
+ "yield";
191
+
192
+ const events: AgentEvent[] = [];
193
+ await loop.run(
194
+ [userMessage],
195
+ (e) => { events.push(e); },
196
+ undefined,
197
+ undefined,
198
+ onCheckpoint,
199
+ );
200
+
201
+ expect(countExitEvents(events)).toBe(1);
202
+ expect(lastExitEvent(events)?.reason).toBe("checkpoint_yield");
203
+ });
204
+
205
+ test("emits 'error' when provider throws an unhandled error", async () => {
206
+ const provider: Provider = {
207
+ name: "broken",
208
+ async sendMessage(): Promise<ProviderResponse> {
209
+ throw new Error("provider exploded");
210
+ },
211
+ };
212
+ const loop = new AgentLoop(provider, "system prompt");
213
+
214
+ const events: AgentEvent[] = [];
215
+ await loop.run([userMessage], (e) => { events.push(e); });
216
+
217
+ expect(countExitEvents(events)).toBe(1);
218
+ expect(lastExitEvent(events)?.reason).toBe("error");
219
+ });
220
+
221
+ test("does not double-emit when multiple exit conditions stack", async () => {
222
+ // Tool returns yieldToUser AND the controller is aborted post-response —
223
+ // the first reached condition wins, but the guard prevents a second
224
+ // emit even if subsequent code paths attempt one.
225
+ const { provider } = createMockProvider([
226
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
227
+ ]);
228
+ const toolExecutor = async () => ({
229
+ content: "ok",
230
+ isError: false,
231
+ yieldToUser: true,
232
+ });
233
+ const loop = new AgentLoop(
234
+ provider,
235
+ "system",
236
+ {},
237
+ dummyTools,
238
+ toolExecutor,
239
+ );
240
+
241
+ const events: AgentEvent[] = [];
242
+ await loop.run([userMessage], (e) => { events.push(e); });
243
+
244
+ expect(countExitEvents(events)).toBe(1);
245
+ });
246
+
247
+ test("emits 'aborted_during_tools' when signal aborts after tool execution", async () => {
248
+ const controller = new AbortController();
249
+ const { provider } = createMockProvider([
250
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
251
+ ]);
252
+ // Abort the signal inside the tool executor so by the time the loop
253
+ // re-checks signal.aborted post-tools the abort has landed.
254
+ const toolExecutor = async () => {
255
+ controller.abort();
256
+ return { content: "ok", isError: false };
257
+ };
258
+ const loop = new AgentLoop(
259
+ provider,
260
+ "system",
261
+ {},
262
+ dummyTools,
263
+ toolExecutor,
264
+ );
265
+
266
+ const events: AgentEvent[] = [];
267
+ await loop.run([userMessage], (e) => { events.push(e); }, controller.signal);
268
+
269
+ expect(countExitEvents(events)).toBe(1);
270
+ expect(lastExitEvent(events)?.reason).toBe("aborted_during_tools");
271
+ });
272
+ });
@@ -0,0 +1,195 @@
1
+ /**
2
+ * Integration tests for the agent loop's `provider_error` recording path.
3
+ *
4
+ * When the `llmCall` pipeline throws (provider rejected the request before
5
+ * returning a usable response), the loop must emit a `provider_error` event
6
+ * carrying the loop-level raw request and the thrown error so downstream
7
+ * consumers can persist an `llm_request_logs` row. Without this, rejected
8
+ * calls leave nothing in the LLM inspector — only a pino log line.
9
+ *
10
+ * Coverage:
11
+ * - Emits `provider_error` with `rawRequest`, `error`, and `actualProvider`
12
+ * when the provider throws a `ProviderError`.
13
+ * - `rawRequest` carries the message history, tools, and system prompt the
14
+ * loop attempted to send — so the row replays/debugs cleanly.
15
+ * - `actualProvider` echoes `ProviderError.provider` when available, falling
16
+ * back to `provider.name` for non-ProviderError throws.
17
+ * - The error is still re-thrown internally (the existing `error` event
18
+ * still fires after the new `provider_error` event), preserving the
19
+ * outer-catch behavior (abort/Sentry/break).
20
+ * - Skips emission on user-aborted runs — there is no provider rejection
21
+ * worth recording when the user cancelled.
22
+ */
23
+
24
+ import { describe, expect, test } from "bun:test";
25
+
26
+ import type { AgentEvent } from "../agent/loop.js";
27
+ import { AgentLoop } from "../agent/loop.js";
28
+ import type {
29
+ Message,
30
+ Provider,
31
+ ProviderResponse,
32
+ SendMessageOptions,
33
+ ToolDefinition,
34
+ } from "../providers/types.js";
35
+ import { ProviderError } from "../util/errors.js";
36
+
37
+ /**
38
+ * Build a provider that throws on every `sendMessage` call. Records what
39
+ * the loop attempted to send so the test can assert `rawRequest` carries
40
+ * the right payload.
41
+ */
42
+ function makeThrowingProvider(
43
+ name: string,
44
+ throwFn: () => Error,
45
+ ): {
46
+ provider: Provider;
47
+ calls: Array<{
48
+ messages: Message[];
49
+ tools?: ToolDefinition[];
50
+ systemPrompt?: string;
51
+ }>;
52
+ } {
53
+ const calls: Array<{
54
+ messages: Message[];
55
+ tools?: ToolDefinition[];
56
+ systemPrompt?: string;
57
+ }> = [];
58
+ const provider: Provider = {
59
+ name,
60
+ async sendMessage(
61
+ messages: Message[],
62
+ tools?: ToolDefinition[],
63
+ systemPrompt?: string,
64
+ _options?: SendMessageOptions,
65
+ ): Promise<ProviderResponse> {
66
+ calls.push({ messages: [...messages], tools, systemPrompt });
67
+ throw throwFn();
68
+ },
69
+ };
70
+ return { provider, calls };
71
+ }
72
+
73
+ describe("AgentLoop provider_error event emission", () => {
74
+ test("emits provider_error with loop-level rawRequest when provider throws ProviderError", async () => {
75
+ const thrown = new ProviderError(
76
+ "Anthropic API error (429): rate limited",
77
+ "anthropic",
78
+ 429,
79
+ { retryAfterMs: 1500 },
80
+ );
81
+ const { provider, calls } = makeThrowingProvider("anthropic", () => thrown);
82
+
83
+ const events: AgentEvent[] = [];
84
+ const loop = new AgentLoop(provider, "you are a helpful assistant");
85
+
86
+ await loop.run(
87
+ [{ role: "user", content: [{ type: "text", text: "hi" }] }],
88
+ (e) => {
89
+ events.push(e);
90
+ },
91
+ );
92
+
93
+ expect(calls).toHaveLength(1);
94
+
95
+ const providerErrorEvent = events.find((e) => e.type === "provider_error");
96
+ expect(providerErrorEvent).toBeDefined();
97
+ if (providerErrorEvent?.type !== "provider_error") {
98
+ throw new Error("type narrowing");
99
+ }
100
+ expect(providerErrorEvent.error).toBe(thrown);
101
+ expect(providerErrorEvent.actualProvider).toBe("anthropic");
102
+
103
+ // rawRequest should carry the loop-level abstract shape: messages,
104
+ // tools, systemPrompt, and the provider name we tried to dispatch
105
+ // through. The provider-specific shape (e.g. Gemini's `contents`) is
106
+ // never built because the provider threw before returning it.
107
+ const raw = providerErrorEvent.rawRequest as Record<string, unknown>;
108
+ expect(raw.provider).toBe("anthropic");
109
+ expect(raw.systemPrompt).toBe("you are a helpful assistant");
110
+ expect(Array.isArray(raw.messages)).toBe(true);
111
+ expect((raw.messages as Message[])[0].role).toBe("user");
112
+ });
113
+
114
+ test("error event still fires after provider_error (outer catch behavior unchanged)", async () => {
115
+ const thrown = new ProviderError(
116
+ "Gemini API error (500): internal",
117
+ "gemini",
118
+ 500,
119
+ );
120
+ const { provider } = makeThrowingProvider("gemini", () => thrown);
121
+
122
+ const events: AgentEvent[] = [];
123
+ const loop = new AgentLoop(provider, "system");
124
+
125
+ await loop.run(
126
+ [{ role: "user", content: [{ type: "text", text: "hi" }] }],
127
+ (e) => {
128
+ events.push(e);
129
+ },
130
+ );
131
+
132
+ const providerErrorIdx = events.findIndex(
133
+ (e) => e.type === "provider_error",
134
+ );
135
+ const errorIdx = events.findIndex((e) => e.type === "error");
136
+ expect(providerErrorIdx).toBeGreaterThanOrEqual(0);
137
+ expect(errorIdx).toBeGreaterThanOrEqual(0);
138
+ // Recording-first ordering is load-bearing: a consumer that sees the
139
+ // generic `error` event and shuts the stream down must have already
140
+ // received the `provider_error` row for the rejected call.
141
+ expect(providerErrorIdx).toBeLessThan(errorIdx);
142
+ });
143
+
144
+ test("falls back to provider.name when a non-ProviderError is thrown", async () => {
145
+ const thrown = new Error("unexpected SDK boom");
146
+ const { provider } = makeThrowingProvider("openai", () => thrown);
147
+
148
+ const events: AgentEvent[] = [];
149
+ const loop = new AgentLoop(provider, "system");
150
+
151
+ await loop.run(
152
+ [{ role: "user", content: [{ type: "text", text: "hi" }] }],
153
+ (e) => {
154
+ events.push(e);
155
+ },
156
+ );
157
+
158
+ const providerErrorEvent = events.find((e) => e.type === "provider_error");
159
+ expect(providerErrorEvent).toBeDefined();
160
+ if (providerErrorEvent?.type !== "provider_error") {
161
+ throw new Error("type narrowing");
162
+ }
163
+ // The thrown Error has no `.provider` field, so the event falls back to
164
+ // the dispatching provider's `name` — keeps the persisted log row's
165
+ // `provider` column populated even for surprise errors.
166
+ expect(providerErrorEvent.actualProvider).toBe("openai");
167
+ expect(providerErrorEvent.error).toBe(thrown);
168
+ });
169
+
170
+ test("does NOT emit provider_error on user-aborted runs", async () => {
171
+ const controller = new AbortController();
172
+ const thrown = new Error("aborted");
173
+ const { provider } = makeThrowingProvider("anthropic", () => {
174
+ // Pre-abort then throw so the loop's catch sees `signal.aborted === true`.
175
+ controller.abort();
176
+ return thrown;
177
+ });
178
+
179
+ const events: AgentEvent[] = [];
180
+ const loop = new AgentLoop(provider, "system");
181
+
182
+ await loop.run(
183
+ [{ role: "user", content: [{ type: "text", text: "hi" }] }],
184
+ (e) => {
185
+ events.push(e);
186
+ },
187
+ controller.signal,
188
+ );
189
+
190
+ const providerErrorEvent = events.find((e) => e.type === "provider_error");
191
+ // Cancellation should never produce a recording row — there's no
192
+ // provider rejection worth logging when the user pulled the plug.
193
+ expect(providerErrorEvent).toBeUndefined();
194
+ });
195
+ });
@@ -1,6 +1,46 @@
1
1
  import { describe, expect, it } from "bun:test";
2
2
 
3
- import { canonicalDateTimeKey } from "../context/compactor.js";
3
+ import {
4
+ adjustTailIndexForToolPairing,
5
+ canonicalDateTimeKey,
6
+ } from "../context/compactor.js";
7
+ import type { Message } from "../providers/types.js";
8
+
9
+ const userText = (text: string): Message => ({
10
+ role: "user",
11
+ content: [{ type: "text", text }],
12
+ });
13
+
14
+ const userToolResult = (toolUseId: string): Message => ({
15
+ role: "user",
16
+ content: [{ type: "tool_result", tool_use_id: toolUseId, content: "ok" }],
17
+ });
18
+
19
+ const userToolResultAndText = (toolUseId: string, text: string): Message => ({
20
+ role: "user",
21
+ content: [
22
+ { type: "tool_result", tool_use_id: toolUseId, content: "ok" },
23
+ { type: "text", text },
24
+ ],
25
+ });
26
+
27
+ const assistantToolUse = (id: string): Message => ({
28
+ role: "assistant",
29
+ content: [{ type: "tool_use", id, name: "Bash", input: {} }],
30
+ });
31
+
32
+ const assistantText = (text: string): Message => ({
33
+ role: "assistant",
34
+ content: [{ type: "text", text }],
35
+ });
36
+
37
+ const assistantWebSearch = (id: string): Message => ({
38
+ role: "assistant",
39
+ content: [
40
+ { type: "server_tool_use", id, name: "web_search", input: {} },
41
+ { type: "web_search_tool_result", tool_use_id: id, content: [] },
42
+ ],
43
+ });
4
44
 
5
45
  describe("canonicalDateTimeKey", () => {
6
46
  const stored = "2026-04-02 (Thursday) 01:52:33 -05:00 (America/Chicago)";
@@ -39,3 +79,69 @@ describe("canonicalDateTimeKey", () => {
39
79
  expect(canonicalDateTimeKey("01:52:33")).toBeNull();
40
80
  });
41
81
  });
82
+
83
+ describe("adjustTailIndexForToolPairing", () => {
84
+ it("returns tailIndex unchanged when the tail starts on a clean user turn", () => {
85
+ const messages: Message[] = [
86
+ userText("hi"),
87
+ assistantText("hello"),
88
+ userText("how are you"),
89
+ ];
90
+ expect(adjustTailIndexForToolPairing(messages, 2)).toBe(2);
91
+ });
92
+
93
+ it("walks back past the orphan tool_result cluster to the prior user turn", () => {
94
+ const messages: Message[] = [
95
+ userText("setup"),
96
+ assistantText("ok"),
97
+ userText("run a command"),
98
+ assistantToolUse("X"),
99
+ userToolResult("X"),
100
+ ];
101
+ expect(adjustTailIndexForToolPairing(messages, 4)).toBe(2);
102
+ });
103
+
104
+ it("keeps walking when the candidate also leads with tool_result", () => {
105
+ const messages: Message[] = [
106
+ userText("first"),
107
+ userText("second"),
108
+ assistantToolUse("X1"),
109
+ userToolResult("X1"),
110
+ assistantToolUse("X2"),
111
+ userToolResult("X2"),
112
+ ];
113
+ expect(adjustTailIndexForToolPairing(messages, 5)).toBe(1);
114
+ });
115
+
116
+ it("returns 0 when the walk falls off the front of the array", () => {
117
+ const messages: Message[] = [
118
+ userToolResult("X"),
119
+ assistantToolUse("Y"),
120
+ userToolResult("Y"),
121
+ ];
122
+ expect(adjustTailIndexForToolPairing(messages, 2)).toBe(0);
123
+ });
124
+
125
+ it("ignores server-side web_search_tool_result blocks", () => {
126
+ const messages: Message[] = [
127
+ userText("look it up"),
128
+ assistantWebSearch("WS1"),
129
+ userText("thanks"),
130
+ ];
131
+ expect(adjustTailIndexForToolPairing(messages, 2)).toBe(2);
132
+ });
133
+
134
+ it("treats mixed tool_result + text user messages as unsafe", () => {
135
+ const messages: Message[] = [
136
+ userText("kick off"),
137
+ assistantToolUse("X"),
138
+ userToolResultAndText("X", "and here is more text"),
139
+ ];
140
+ expect(adjustTailIndexForToolPairing(messages, 2)).toBe(0);
141
+ });
142
+
143
+ it("returns tailIndex unchanged when tailIndex is 0", () => {
144
+ const messages: Message[] = [userText("only one")];
145
+ expect(adjustTailIndexForToolPairing(messages, 0)).toBe(0);
146
+ });
147
+ });