@runtypelabs/persona 3.21.2 → 3.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +67 -0
  2. package/dist/animations/glyph-cycle.d.cts +1 -1
  3. package/dist/animations/glyph-cycle.d.ts +1 -1
  4. package/dist/animations/{types-CWPIj66R.d.cts → types-BZVr1YOV.d.cts} +10 -0
  5. package/dist/animations/{types-CWPIj66R.d.ts → types-BZVr1YOV.d.ts} +10 -0
  6. package/dist/animations/wipe.d.cts +1 -1
  7. package/dist/animations/wipe.d.ts +1 -1
  8. package/dist/index.cjs +50 -43
  9. package/dist/index.cjs.map +1 -1
  10. package/dist/index.d.cts +474 -6
  11. package/dist/index.d.ts +474 -6
  12. package/dist/index.global.js +98 -88
  13. package/dist/index.global.js.map +1 -1
  14. package/dist/index.js +48 -41
  15. package/dist/index.js.map +1 -1
  16. package/dist/smart-dom-reader.cjs +1875 -0
  17. package/dist/smart-dom-reader.d.cts +4521 -0
  18. package/dist/smart-dom-reader.d.ts +4521 -0
  19. package/dist/smart-dom-reader.js +1848 -0
  20. package/dist/theme-editor.cjs +2282 -90
  21. package/dist/theme-editor.d.cts +348 -1
  22. package/dist/theme-editor.d.ts +348 -1
  23. package/dist/theme-editor.js +2267 -90
  24. package/package.json +9 -2
  25. package/src/client.test.ts +165 -0
  26. package/src/client.ts +144 -23
  27. package/src/components/composer-parts.test.ts +34 -0
  28. package/src/components/composer-parts.ts +9 -6
  29. package/src/index.ts +26 -0
  30. package/src/session.test.ts +258 -0
  31. package/src/session.ts +886 -30
  32. package/src/session.webmcp.test.ts +815 -0
  33. package/src/smart-dom-reader.test.ts +135 -0
  34. package/src/smart-dom-reader.ts +135 -0
  35. package/src/theme-editor/color-utils.test.ts +59 -0
  36. package/src/theme-editor/color-utils.ts +38 -2
  37. package/src/theme-editor/index.ts +35 -0
  38. package/src/theme-editor/webmcp/coerce.test.ts +86 -0
  39. package/src/theme-editor/webmcp/coerce.ts +286 -0
  40. package/src/theme-editor/webmcp/index.ts +45 -0
  41. package/src/theme-editor/webmcp/summary.ts +324 -0
  42. package/src/theme-editor/webmcp/tools.test.ts +205 -0
  43. package/src/theme-editor/webmcp/tools.ts +795 -0
  44. package/src/theme-editor/webmcp/types.ts +87 -0
  45. package/src/types.ts +186 -0
  46. package/src/ui.composer-keyboard.test.ts +229 -0
  47. package/src/ui.ts +127 -5
  48. package/src/utils/composer-history.test.ts +128 -0
  49. package/src/utils/composer-history.ts +113 -0
  50. package/src/utils/message-fingerprint.test.ts +20 -0
  51. package/src/utils/message-fingerprint.ts +2 -0
  52. package/src/utils/smart-dom-adapter.test.ts +257 -0
  53. package/src/utils/smart-dom-adapter.ts +217 -0
  54. package/{LICENSE → src/vendor/smart-dom-reader/LICENSE} +2 -2
  55. package/src/vendor/smart-dom-reader/README.md +61 -0
  56. package/src/vendor/smart-dom-reader/index.d.ts +476 -0
  57. package/src/vendor/smart-dom-reader/index.js +1618 -0
  58. package/src/webmcp-bridge.test.ts +429 -0
  59. package/src/webmcp-bridge.ts +547 -0
@@ -0,0 +1,815 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+
3
+ import { AgentWidgetSession } from "./session";
4
+ import type {
5
+ AgentWidgetMessage,
6
+ WebMcpConfirmInfo,
7
+ WebMcpToolResult,
8
+ } from "./types";
9
+
10
+ // Build a session whose client has WebMCP methods overridden by spies.
11
+ const makeSession = (overrides?: {
12
+ executeReturn?: WebMcpToolResult;
13
+ resumeOk?: boolean;
14
+ isOperational?: boolean;
15
+ executeImpl?: () => Promise<WebMcpToolResult>;
16
+ }) => {
17
+ const session = new AgentWidgetSession(
18
+ { apiUrl: "http://test", webmcp: { enabled: true } },
19
+ {
20
+ onMessagesChanged: () => undefined,
21
+ onStatusChanged: () => undefined,
22
+ onStreamingChanged: () => undefined,
23
+ },
24
+ );
25
+
26
+ const client = (session as unknown as { client: Record<string, unknown> })
27
+ .client;
28
+
29
+ const executeSpy = vi.fn(
30
+ overrides?.executeImpl ??
31
+ (async (): Promise<WebMcpToolResult> =>
32
+ overrides?.executeReturn ?? {
33
+ content: [{ type: "text", text: "ok" }],
34
+ }),
35
+ );
36
+ // Mimic AgentWidgetClient.executeWebMcpToolCall — returns null when bridge
37
+ // not configured. We toggle via isOperational below.
38
+ client.executeWebMcpToolCall = vi.fn(
39
+ () =>
40
+ overrides?.isOperational === false
41
+ ? null
42
+ : executeSpy(),
43
+ );
44
+ client.isWebMcpOperational = vi.fn(
45
+ () => overrides?.isOperational !== false,
46
+ );
47
+
48
+ const resumeSpy = vi.fn(async () => new Response(new Blob([""]), {
49
+ status: overrides?.resumeOk === false ? 500 : 200,
50
+ }));
51
+ client.resumeFlow = resumeSpy;
52
+
53
+ // Stub `connectStream` so we don't try to parse the empty body.
54
+ (session as unknown as { connectStream: () => Promise<void> }).connectStream =
55
+ vi.fn(async () => undefined);
56
+
57
+ return { session, executeSpy, resumeSpy, client };
58
+ };
59
+
60
+ const awaitingMessage = (
61
+ id: string,
62
+ name: string,
63
+ executionId: string = "exec-1",
64
+ ): AgentWidgetMessage => ({
65
+ id: `msg-${id}-${executionId}`,
66
+ role: "assistant",
67
+ content: "",
68
+ createdAt: new Date().toISOString(),
69
+ agentMetadata: { executionId, awaitingLocalTool: true },
70
+ toolCall: {
71
+ id,
72
+ name,
73
+ status: "complete",
74
+ args: { q: "shoes" },
75
+ },
76
+ });
77
+
78
+ describe("AgentWidgetSession — WebMCP resolve", () => {
79
+ beforeEach(() => {
80
+ vi.clearAllMocks();
81
+ });
82
+
83
+ it("posts result to /resume on the happy path", async () => {
84
+ const { session, executeSpy, resumeSpy } = makeSession({
85
+ executeReturn: {
86
+ content: [{ type: "text", text: "hi" }],
87
+ },
88
+ });
89
+ await session.resolveWebMcpToolCall(
90
+ awaitingMessage("tool-1", "webmcp:search"),
91
+ );
92
+ expect(executeSpy).toHaveBeenCalledTimes(1);
93
+ expect(resumeSpy).toHaveBeenCalledWith(
94
+ "exec-1",
95
+ { "webmcp:search": { content: [{ type: "text", text: "hi" }] } },
96
+ expect.objectContaining({ signal: expect.any(AbortSignal) }),
97
+ );
98
+ });
99
+
100
+ it("still resumes (with isError) when the bridge is not operational", async () => {
101
+ // BugBot finding #1: previously, handleEvent skipped resolveWebMcpToolCall
102
+ // entirely when `isWebMcpOperational()` was false — leaving the dispatch
103
+ // hung. The session must surface an actionable error to /resume instead.
104
+ const { session, resumeSpy } = makeSession({
105
+ isOperational: false,
106
+ });
107
+ await session.resolveWebMcpToolCall(
108
+ awaitingMessage("tool-1", "webmcp:search"),
109
+ );
110
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
111
+ const call = resumeSpy.mock.calls[0]!;
112
+ const payload = (call as unknown[])[1] as {
113
+ "webmcp:search": WebMcpToolResult;
114
+ };
115
+ expect(payload["webmcp:search"].isError).toBe(true);
116
+ });
117
+
118
+ it("dedupes re-emitted step_await for the same toolCall.id", async () => {
119
+ // BugBot finding #2: an SSE re-emit of the same step_await message would
120
+ // re-set `awaitingLocalTool: true` after the local upsertMessage cleared
121
+ // it. The handled-set must outlive the resolve round-trip.
122
+ const { session, executeSpy, resumeSpy } = makeSession();
123
+ const msg = awaitingMessage("tool-1", "webmcp:search");
124
+
125
+ await session.resolveWebMcpToolCall(msg);
126
+ await session.resolveWebMcpToolCall(msg); // re-emit
127
+ await session.resolveWebMcpToolCall(msg); // re-emit again
128
+
129
+ expect(executeSpy).toHaveBeenCalledTimes(1);
130
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
131
+ });
132
+
133
+ it("allows retry on the same toolCall.id when /resume fails", async () => {
134
+ // BugBot finding #4: a permanent handled-set would block the only retry
135
+ // path when `/resume` itself fails (network / server). The dedupe should
136
+ // promote to "resolved" only AFTER /resume succeeds; failures stay
137
+ // retryable on the next step_await re-emit.
138
+ const { session, executeSpy, resumeSpy, client } = makeSession();
139
+ // First attempt: resume throws.
140
+ (client.resumeFlow as ReturnType<typeof vi.fn>).mockImplementationOnce(
141
+ async () => {
142
+ throw new Error("network down");
143
+ },
144
+ );
145
+ // Second attempt: resume succeeds.
146
+ (client.resumeFlow as ReturnType<typeof vi.fn>).mockImplementationOnce(
147
+ async () =>
148
+ new Response(new Blob([""]), {
149
+ status: 200,
150
+ }),
151
+ );
152
+
153
+ const msg = awaitingMessage("tool-1", "webmcp:search");
154
+ await session.resolveWebMcpToolCall(msg);
155
+ await session.resolveWebMcpToolCall(msg); // retry — must be allowed
156
+ await session.resolveWebMcpToolCall(msg); // post-success — must be blocked
157
+
158
+ expect(executeSpy).toHaveBeenCalledTimes(2);
159
+ expect(resumeSpy).toHaveBeenCalledTimes(2);
160
+ });
161
+
162
+ it("threads an AbortSignal into resumeFlow", async () => {
163
+ // BugBot finding #6: cancel() needs to propagate into /resume.
164
+ const { session, resumeSpy } = makeSession();
165
+ await session.resolveWebMcpToolCall(
166
+ awaitingMessage("tool-1", "webmcp:search"),
167
+ );
168
+ const call = resumeSpy.mock.calls[0]!;
169
+ const opts = (call as unknown[])[2] as { signal?: AbortSignal } | undefined;
170
+ expect(opts?.signal).toBeInstanceOf(AbortSignal);
171
+ expect(opts!.signal!.aborted).toBe(false);
172
+ });
173
+
174
+ it("aborts an in-flight resolve when cancel() is called", async () => {
175
+ // BugBot finding #6 (cont.) — the bridge execute race should reject on
176
+ // cancel so the dispatch doesn't fire a stale /resume after the user
177
+ // stops.
178
+ let release: () => void = () => undefined;
179
+ const stuck = new Promise<WebMcpToolResult>((resolve) => {
180
+ release = () =>
181
+ resolve({ content: [{ type: "text", text: "late" }] });
182
+ });
183
+ const { session, resumeSpy } = makeSession({ executeImpl: () => stuck });
184
+ const inflight = session.resolveWebMcpToolCall(
185
+ awaitingMessage("tool-1", "webmcp:slow"),
186
+ );
187
+ session.cancel();
188
+ // Allow the rejected race + catch to settle.
189
+ release();
190
+ await inflight;
191
+ expect(resumeSpy).not.toHaveBeenCalled();
192
+ });
193
+
194
+ it("does NOT abort the shared session abortController", async () => {
195
+ // The chained-turn fix: a webmcp resolve must leave `this.abortController`
196
+ // untouched. In a chain (tool A → /resume → tool B) that shared controller
197
+ // is still piping A's resume SSE — the very stream that just delivered B's
198
+ // step_await — so aborting it strands B (it never executes; its /resume is
199
+ // never POSTed; the dispatch hangs forever). Resolves use a dedicated
200
+ // per-call controller tracked in `webMcpResolveControllers` instead.
201
+ const { session } = makeSession();
202
+ const shared = new AbortController();
203
+ (session as unknown as { abortController: AbortController | null })
204
+ .abortController = shared;
205
+ await session.resolveWebMcpToolCall(
206
+ awaitingMessage("tool-1", "webmcp:search"),
207
+ );
208
+ expect(shared.signal.aborted).toBe(false);
209
+ expect(
210
+ (session as unknown as { abortController: AbortController | null })
211
+ .abortController,
212
+ ).toBe(shared);
213
+ });
214
+
215
+ it("a second resolve does not abort the first (chained / parallel)", () => {
216
+ // Two `webmcp:*` resolves in one turn each own a controller; neither aborts
217
+ // the other. (Previously the second pre-aborted the shared controller,
218
+ // killing the first / the in-flight resume stream.)
219
+ const stuck = new Promise<WebMcpToolResult>(() => undefined);
220
+ const { session } = makeSession({ executeImpl: () => stuck });
221
+ const set = (
222
+ session as unknown as { webMcpResolveControllers: Set<AbortController> }
223
+ ).webMcpResolveControllers;
224
+
225
+ void session.resolveWebMcpToolCall(awaitingMessage("tool-1", "webmcp:search"));
226
+ const first = [...set][0]!;
227
+ void session.resolveWebMcpToolCall(awaitingMessage("tool-2", "webmcp:add"));
228
+
229
+ expect(set.size).toBe(2);
230
+ expect(first.signal.aborted).toBe(false);
231
+ });
232
+
233
+ it("cancel() aborts and clears every in-flight resolve controller", () => {
234
+ const stuck = new Promise<WebMcpToolResult>(() => undefined);
235
+ const { session } = makeSession({ executeImpl: () => stuck });
236
+ const set = (
237
+ session as unknown as { webMcpResolveControllers: Set<AbortController> }
238
+ ).webMcpResolveControllers;
239
+
240
+ void session.resolveWebMcpToolCall(awaitingMessage("tool-1", "webmcp:search"));
241
+ void session.resolveWebMcpToolCall(awaitingMessage("tool-2", "webmcp:add"));
242
+ const controllers = [...set];
243
+ expect(controllers).toHaveLength(2);
244
+
245
+ session.cancel();
246
+ expect(set.size).toBe(0);
247
+ for (const c of controllers) expect(c.signal.aborted).toBe(true);
248
+ });
249
+
250
+ it("clearMessages() tears down in-flight resolve controllers", () => {
251
+ const stuck = new Promise<WebMcpToolResult>(() => undefined);
252
+ const { session } = makeSession({ executeImpl: () => stuck });
253
+ const set = (
254
+ session as unknown as { webMcpResolveControllers: Set<AbortController> }
255
+ ).webMcpResolveControllers;
256
+
257
+ void session.resolveWebMcpToolCall(awaitingMessage("tool-1", "webmcp:search"));
258
+ const c = [...set][0]!;
259
+
260
+ session.clearMessages();
261
+ expect(set.size).toBe(0);
262
+ expect(c.signal.aborted).toBe(true);
263
+ });
264
+
265
+ it("a microtask-deferred resolve bails if a teardown bumped the epoch", async () => {
266
+ // Problem #3 from the reverted iter-10: a resolve deferred via
267
+ // queueMicrotask must not escape a teardown that happened between queue and
268
+ // run. handleEvent captures the epoch; clearMessages bumps it; the deferred
269
+ // resolve sees the mismatch and never executes the page tool.
270
+ const { session, executeSpy } = makeSession();
271
+ (
272
+ session as unknown as { handleEvent: (e: unknown) => void }
273
+ ).handleEvent({
274
+ type: "message",
275
+ message: awaitingMessage("tool-1", "webmcp:search"),
276
+ });
277
+ // Teardown BEFORE the queued microtask runs.
278
+ session.clearMessages();
279
+ // Flush the microtask queue.
280
+ await Promise.resolve();
281
+ await Promise.resolve();
282
+ expect(executeSpy).not.toHaveBeenCalled();
283
+ });
284
+
285
+ it("a stale step_await re-emit does not resurrect awaitingLocalTool once resolved", () => {
286
+ // BugBot: a duplicate step_await (awaitingLocalTool:true) for an
287
+ // already-resolved webmcp tool must not flip the message back to awaiting
288
+ // and show a stuck local-tool wait. upsertMessage clears it when the
289
+ // tool's `${executionId}:${toolCallId}` key is inflight/resolved.
290
+ const session = makeSession().session;
291
+ const s = session as unknown as {
292
+ webMcpResolvedKeys: Set<string>;
293
+ upsertMessage: (m: AgentWidgetMessage) => void;
294
+ messages: AgentWidgetMessage[];
295
+ };
296
+ s.webMcpResolvedKeys.add("exec-1:tool-1");
297
+ // Baseline: the resolved message with awaiting already cleared.
298
+ s.upsertMessage({
299
+ ...awaitingMessage("tool-1", "webmcp:search"),
300
+ agentMetadata: { executionId: "exec-1", awaitingLocalTool: false },
301
+ });
302
+ // Stale re-emit flips awaiting back to true on the wire.
303
+ s.upsertMessage(awaitingMessage("tool-1", "webmcp:search"));
304
+ const stored = s.messages.find((m) => m.toolCall?.id === "tool-1");
305
+ expect(stored?.agentMetadata?.awaitingLocalTool).toBe(false);
306
+ });
307
+
308
+ it("an error event does not clear streaming while a webmcp resolve is in flight", () => {
309
+ // BugBot: the error handler mirrors the idle handler — it must not tear
310
+ // down streaming while a sibling/successor resolve is still executing.
311
+ const stuck = new Promise<WebMcpToolResult>(() => undefined);
312
+ const session = makeSession({ executeImpl: () => stuck }).session;
313
+ void session.resolveWebMcpToolCall(awaitingMessage("tool-1", "webmcp:search"));
314
+ (
315
+ session as unknown as { handleEvent: (e: unknown) => void }
316
+ ).handleEvent({ type: "error", error: new Error("stream blip") });
317
+ expect((session as unknown as { streaming: boolean }).streaming).toBe(true);
318
+ });
319
+
320
+ it("connectStream error does not clear streaming while a webmcp resolve is in flight", async () => {
321
+ // BugBot: connectStream's catch mirrors the error/idle handlers — a failed
322
+ // resume stream must not tear down streaming while another resolve runs.
323
+ const session = new AgentWidgetSession(
324
+ { apiUrl: "http://test", webmcp: { enabled: true } },
325
+ {
326
+ onMessagesChanged: () => undefined,
327
+ onStatusChanged: () => undefined,
328
+ onStreamingChanged: () => undefined,
329
+ },
330
+ );
331
+ const s = session as unknown as {
332
+ client: { processStream: (...a: unknown[]) => Promise<void> };
333
+ webMcpResolveControllers: Set<AbortController>;
334
+ streaming: boolean;
335
+ };
336
+ s.client.processStream = vi.fn(async () => {
337
+ throw new Error("stream blip");
338
+ });
339
+ // Simulate a resolve still in flight.
340
+ s.webMcpResolveControllers.add(new AbortController());
341
+ await session.connectStream(new ReadableStream(), { allowReentry: true });
342
+ expect(s.streaming).toBe(true);
343
+ });
344
+
345
+ it("forwards the abort signal into client.executeWebMcpToolCall", async () => {
346
+ // BugBot finding #12: the session must thread its signal INTO the
347
+ // bridge so cancel() can short-circuit the confirm bubble AND the
348
+ // execute() race, not just abort the await on the session side.
349
+ const { session, client } = makeSession();
350
+ await session.resolveWebMcpToolCall(
351
+ awaitingMessage("tool-1", "webmcp:search"),
352
+ );
353
+ const spy = client.executeWebMcpToolCall as ReturnType<typeof vi.fn>;
354
+ expect(spy).toHaveBeenCalledTimes(1);
355
+ const call = spy.mock.calls[0]!;
356
+ // Args: (wireToolName, args, signal?)
357
+ expect(call[0]).toBe("webmcp:search");
358
+ expect(call[2]).toBeInstanceOf(AbortSignal);
359
+ });
360
+
361
+ it("does not construct the bridge when config.webmcp.enabled is false", () => {
362
+ // BugBot finding #14: previously the bridge was constructed whenever a
363
+ // `webmcp` block existed, regardless of `enabled`. That left
364
+ // `executeWebMcpToolCall` returning a non-null promise even when WebMCP
365
+ // was explicitly disabled — making the session's "WebMCP not enabled"
366
+ // resume branch dead code. Constructor now gates on `enabled === true`.
367
+ const session = new AgentWidgetSession(
368
+ { apiUrl: "http://test", webmcp: { enabled: false } },
369
+ {
370
+ onMessagesChanged: () => undefined,
371
+ onStatusChanged: () => undefined,
372
+ onStreamingChanged: () => undefined,
373
+ },
374
+ );
375
+ const client = (
376
+ session as unknown as {
377
+ client: {
378
+ executeWebMcpToolCall: (
379
+ n: string,
380
+ a: unknown,
381
+ s?: AbortSignal,
382
+ ) => unknown;
383
+ };
384
+ }
385
+ ).client;
386
+ expect(client.executeWebMcpToolCall("webmcp:x", {})).toBeNull();
387
+ });
388
+
389
+ it("does not construct the bridge when config.webmcp is omitted", () => {
390
+ const session = new AgentWidgetSession(
391
+ { apiUrl: "http://test" },
392
+ {
393
+ onMessagesChanged: () => undefined,
394
+ onStatusChanged: () => undefined,
395
+ onStreamingChanged: () => undefined,
396
+ },
397
+ );
398
+ const client = (
399
+ session as unknown as {
400
+ client: {
401
+ executeWebMcpToolCall: (
402
+ n: string,
403
+ a: unknown,
404
+ s?: AbortSignal,
405
+ ) => unknown;
406
+ };
407
+ }
408
+ ).client;
409
+ expect(client.executeWebMcpToolCall("webmcp:x", {})).toBeNull();
410
+ });
411
+
412
+ it("marks resolved on HTTP /resume success, not on stream completion", async () => {
413
+ // BugBot finding #8: if the resume HTTP response is OK but the downstream
414
+ // SSE stream errors, we still want dedupe to block re-emits — the server
415
+ // has already accepted the answer.
416
+ const { session, resumeSpy, executeSpy } = makeSession();
417
+ // Make connectStream throw to simulate a broken downstream SSE.
418
+ (session as unknown as { connectStream: () => Promise<void> })
419
+ .connectStream = vi.fn(async () => {
420
+ throw new Error("stream broken");
421
+ });
422
+
423
+ const msg = awaitingMessage("tool-1", "webmcp:search");
424
+ await session.resolveWebMcpToolCall(msg);
425
+ await session.resolveWebMcpToolCall(msg); // re-emit — must be blocked
426
+
427
+ expect(executeSpy).toHaveBeenCalledTimes(1);
428
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
429
+ });
430
+
431
+ it("scopes dedupe by executionId so a different dispatch with the same toolCall.id is not blocked", async () => {
432
+ // BugBot finding #9: a later dispatch (different executionId) that
433
+ // happens to emit a colliding `toolCall.id` must NOT be silently
434
+ // blocked. Dedupe keys are `${executionId}:${toolCallId}` so they
435
+ // naturally segregate.
436
+ const { session, executeSpy, resumeSpy } = makeSession();
437
+ await session.resolveWebMcpToolCall(
438
+ awaitingMessage("tool-1", "webmcp:search", "exec-1"),
439
+ );
440
+ // Different execution, same toolCall.id — must be allowed.
441
+ await session.resolveWebMcpToolCall(
442
+ awaitingMessage("tool-1", "webmcp:search", "exec-2"),
443
+ );
444
+ expect(executeSpy).toHaveBeenCalledTimes(2);
445
+ expect(resumeSpy).toHaveBeenCalledTimes(2);
446
+ });
447
+
448
+ it("blocks stale re-emits of an old executionId even after a new dispatch starts", async () => {
449
+ // BugBot finding #11: clearing the resolved set on sendMessage would
450
+ // let a stale step_await from the prior /resume's still-active SSE
451
+ // re-trigger execute(). With executionId-scoped keys, the prior
452
+ // execution's resolved entries persist — so stale re-emits stay blocked.
453
+ const { session, executeSpy, resumeSpy } = makeSession();
454
+ await session.resolveWebMcpToolCall(
455
+ awaitingMessage("tool-1", "webmcp:search", "exec-1"),
456
+ );
457
+ // Stale re-emit from exec-1 after a new turn started — still blocked.
458
+ await session.resolveWebMcpToolCall(
459
+ awaitingMessage("tool-1", "webmcp:search", "exec-1"),
460
+ );
461
+ expect(executeSpy).toHaveBeenCalledTimes(1);
462
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
463
+ });
464
+
465
+ it("surfaces onError when a step_await is missing executionId", async () => {
466
+ // BugBot finding #17: silently returning here strands the server-side
467
+ // dispatch with no recovery path. Persona can't /resume without an
468
+ // executionId, but it CAN surface the failure to the host so an
469
+ // operator notices.
470
+ const onError = vi.fn();
471
+ const session = new AgentWidgetSession(
472
+ { apiUrl: "http://test", webmcp: { enabled: true } },
473
+ {
474
+ onMessagesChanged: () => undefined,
475
+ onStatusChanged: () => undefined,
476
+ onStreamingChanged: () => undefined,
477
+ onError,
478
+ },
479
+ );
480
+ const broken: AgentWidgetMessage = {
481
+ id: "msg-broken",
482
+ role: "assistant",
483
+ content: "",
484
+ createdAt: new Date().toISOString(),
485
+ toolCall: { id: "tool-x", name: "webmcp:search", status: "complete" },
486
+ // executionId missing
487
+ };
488
+ await session.resolveWebMcpToolCall(broken);
489
+ expect(onError).toHaveBeenCalledTimes(1);
490
+ expect((onError.mock.calls[0]![0] as Error).message).toMatch(
491
+ /executionId/i,
492
+ );
493
+ });
494
+
495
+ it("posts isError /resume when a step_await is missing toolCall.id", async () => {
496
+ // BugBot finding #17 (cont.): when executionId is present but toolCall.id
497
+ // is missing, we can still advance the server-side dispatch by posting
498
+ // an isError /resume for the tool name. Dedupe falls apart but the
499
+ // dispatch doesn't hang.
500
+ const { session, resumeSpy } = makeSession();
501
+ const partial: AgentWidgetMessage = {
502
+ id: "msg-no-toolid",
503
+ role: "assistant",
504
+ content: "",
505
+ createdAt: new Date().toISOString(),
506
+ agentMetadata: { executionId: "exec-x", awaitingLocalTool: true },
507
+ toolCall: {
508
+ id: "",
509
+ name: "webmcp:search",
510
+ status: "complete",
511
+ },
512
+ };
513
+ await session.resolveWebMcpToolCall(partial);
514
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
515
+ const payload = (resumeSpy.mock.calls[0]! as unknown[])[1] as {
516
+ "webmcp:search": WebMcpToolResult;
517
+ };
518
+ expect(payload["webmcp:search"].isError).toBe(true);
519
+ });
520
+
521
+ it("dedupes repeated malformed (missing toolCall.id) re-emits", async () => {
522
+ // BugBot iter 9: posting an isError /resume for a no-toolCallId message
523
+ // is recovery, not a license to repeat. Identical re-emits of the same
524
+ // malformed step_await (same executionId + wireToolName) must collapse
525
+ // to a single POST.
526
+ const { session, resumeSpy } = makeSession();
527
+ const partial = (): AgentWidgetMessage => ({
528
+ id: `msg-${Math.random()}`,
529
+ role: "assistant",
530
+ content: "",
531
+ createdAt: new Date().toISOString(),
532
+ agentMetadata: { executionId: "exec-x", awaitingLocalTool: true },
533
+ toolCall: { id: "", name: "webmcp:search", status: "complete" },
534
+ });
535
+ await session.resolveWebMcpToolCall(partial());
536
+ await session.resolveWebMcpToolCall(partial());
537
+ await session.resolveWebMcpToolCall(partial());
538
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
539
+ });
540
+
541
+ it("keys a single call's /resume by webMcpToolCallId when present", async () => {
542
+ // core#3878: when the server emits a per-call id, the single-call path keys
543
+ // /resume by it (server prefers id over name) — not by the wire tool name.
544
+ const { session, resumeSpy } = makeSession({
545
+ executeReturn: { content: [{ type: "text", text: "added" }] },
546
+ });
547
+ const msg: AgentWidgetMessage = {
548
+ id: "msg-single",
549
+ role: "assistant",
550
+ content: "",
551
+ createdAt: new Date().toISOString(),
552
+ agentMetadata: {
553
+ executionId: "exec-1",
554
+ awaitingLocalTool: true,
555
+ webMcpToolCallId: "toolu_AAA",
556
+ },
557
+ toolCall: {
558
+ id: "toolu_AAA",
559
+ name: "webmcp:add_to_cart",
560
+ status: "complete",
561
+ args: { sku: "SHOE-001" },
562
+ },
563
+ };
564
+ await session.resolveWebMcpToolCall(msg);
565
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
566
+ const payload = (resumeSpy.mock.calls[0]! as unknown[])[1] as Record<
567
+ string,
568
+ unknown
569
+ >;
570
+ expect(Object.keys(payload)).toEqual(["toolu_AAA"]);
571
+ });
572
+ });
573
+
574
+ describe("AgentWidgetSession — WebMCP parallel batched resume (core#3878)", () => {
575
+ beforeEach(() => {
576
+ vi.clearAllMocks();
577
+ });
578
+
579
+ // A `step_await(local_tool_required)` message as client.ts emits it for a
580
+ // PARALLEL local-tool call: the per-call `toolCallId` is both the toolCall.id
581
+ // AND `agentMetadata.webMcpToolCallId`. Two of these for one executionId share
582
+ // a tool name but differ by id (the whole point of core#3878).
583
+ const parallelAwait = (
584
+ toolCallId: string,
585
+ sku: string,
586
+ executionId = "exec-par",
587
+ ): AgentWidgetMessage => ({
588
+ id: `tool-${toolCallId}`,
589
+ role: "assistant",
590
+ content: "",
591
+ createdAt: new Date().toISOString(),
592
+ agentMetadata: {
593
+ executionId,
594
+ awaitingLocalTool: true,
595
+ webMcpToolCallId: toolCallId,
596
+ },
597
+ toolCall: {
598
+ id: toolCallId,
599
+ name: "webmcp:add_to_cart",
600
+ status: "complete",
601
+ args: { sku },
602
+ },
603
+ });
604
+
605
+ const feed = (session: AgentWidgetSession, msg: AgentWidgetMessage) =>
606
+ (session as unknown as { handleEvent: (e: unknown) => void }).handleEvent({
607
+ type: "message",
608
+ message: msg,
609
+ });
610
+
611
+ // The batch flushes only when the stream that delivered the awaits ENDS —
612
+ // the client emits `status: idle` at stream end. Simulate that.
613
+ const endStream = (session: AgentWidgetSession) =>
614
+ (session as unknown as { handleEvent: (e: unknown) => void }).handleEvent({
615
+ type: "status",
616
+ status: "idle",
617
+ });
618
+
619
+ const flushMicrotasks = async () => {
620
+ // idle → queueMicrotask(flush) → resolveWebMcpToolCallBatch (async).
621
+ for (let i = 0; i < 6; i++) await Promise.resolve();
622
+ };
623
+
624
+ it("two parallel same-tool awaits → both execute → exactly ONE batched /resume keyed by toolCallId", async () => {
625
+ const { session, executeSpy, resumeSpy } = makeSession({
626
+ // Echo the sku so we can prove each call's output is mapped to its id.
627
+ executeImpl: undefined,
628
+ });
629
+ // Make execute return a per-call result derived from its args.
630
+ const client = (session as unknown as { client: Record<string, unknown> })
631
+ .client;
632
+ (client.executeWebMcpToolCall as ReturnType<typeof vi.fn>).mockImplementation(
633
+ (_name: string, args: { sku: string }) => {
634
+ executeSpy();
635
+ return Promise.resolve({
636
+ content: [{ type: "text", text: `added ${args.sku}` }],
637
+ });
638
+ },
639
+ );
640
+
641
+ // Two parallel step_awaits arrive in the SAME tick (one paused execution),
642
+ // then the stream ends.
643
+ feed(session, parallelAwait("toolu_A", "SHOE-001"));
644
+ feed(session, parallelAwait("toolu_B", "SHOE-007"));
645
+ endStream(session);
646
+ await flushMicrotasks();
647
+
648
+ // Both page tools ran.
649
+ expect(executeSpy).toHaveBeenCalledTimes(2);
650
+
651
+ // Exactly ONE /resume for the shared execution — not one per tool.
652
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
653
+ const [execId, toolOutputs] = resumeSpy.mock.calls[0]! as unknown as [
654
+ string,
655
+ Record<string, { content: { text: string }[] }>,
656
+ ];
657
+ expect(execId).toBe("exec-par");
658
+ // Keyed by per-call toolCallId, with each call's own output.
659
+ expect(Object.keys(toolOutputs).sort()).toEqual(["toolu_A", "toolu_B"]);
660
+ expect(toolOutputs["toolu_A"].content[0].text).toBe("added SHOE-001");
661
+ expect(toolOutputs["toolu_B"].content[0].text).toBe("added SHOE-007");
662
+ });
663
+
664
+ it("executes siblings concurrently — one call's gate Promise does not block the other", async () => {
665
+ // The native approval bubble parks each call's execute on a Promise. A
666
+ // gated sibling must not head-of-line-block the others: both executes
667
+ // should be in flight before either completes.
668
+ let releaseA: (r: WebMcpToolResult) => void = () => undefined;
669
+ let releaseB: (r: WebMcpToolResult) => void = () => undefined;
670
+ const pA = new Promise<WebMcpToolResult>((r) => (releaseA = r));
671
+ const pB = new Promise<WebMcpToolResult>((r) => (releaseB = r));
672
+ const started: string[] = [];
673
+
674
+ const { session, resumeSpy } = makeSession();
675
+ const client = (session as unknown as { client: Record<string, unknown> })
676
+ .client;
677
+ (client.executeWebMcpToolCall as ReturnType<typeof vi.fn>).mockImplementation(
678
+ (_name: string, args: { sku: string }) => {
679
+ started.push(args.sku);
680
+ return args.sku === "SHOE-001" ? pA : pB;
681
+ },
682
+ );
683
+
684
+ feed(session, parallelAwait("toolu_A", "SHOE-001"));
685
+ feed(session, parallelAwait("toolu_B", "SHOE-007"));
686
+ endStream(session);
687
+ await flushMicrotasks();
688
+
689
+ // Both executes are in flight even though neither has resolved → no
690
+ // head-of-line blocking. No /resume yet (both still parked).
691
+ expect(started.sort()).toEqual(["SHOE-001", "SHOE-007"]);
692
+ expect(resumeSpy).not.toHaveBeenCalled();
693
+
694
+ // Release out of order; the batched resume waits for BOTH.
695
+ releaseB({ content: [{ type: "text", text: "b" }] });
696
+ await flushMicrotasks();
697
+ expect(resumeSpy).not.toHaveBeenCalled();
698
+ releaseA({ content: [{ type: "text", text: "a" }] });
699
+ await flushMicrotasks();
700
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
701
+ const toolOutputs = (resumeSpy.mock.calls[0]! as unknown[])[1] as Record<
702
+ string,
703
+ unknown
704
+ >;
705
+ expect(Object.keys(toolOutputs).sort()).toEqual(["toolu_A", "toolu_B"]);
706
+ });
707
+
708
+ it("dedupes a duplicate parallel await within the same batch", async () => {
709
+ const { session, executeSpy, resumeSpy } = makeSession();
710
+ feed(session, parallelAwait("toolu_A", "SHOE-001"));
711
+ feed(session, parallelAwait("toolu_A", "SHOE-001")); // duplicate id
712
+ feed(session, parallelAwait("toolu_B", "SHOE-007"));
713
+ endStream(session);
714
+ await flushMicrotasks();
715
+ expect(executeSpy).toHaveBeenCalledTimes(2); // A once, B once
716
+ expect(resumeSpy).toHaveBeenCalledTimes(1);
717
+ });
718
+
719
+ it("keeps the run 'running' at stream-end while a batch is still pending flush", () => {
720
+ // BugBot (PR #214): the batch flush is deferred to a microtask after the
721
+ // idle handler, so webMcpResolveControllers is empty when idle runs. The
722
+ // run must NOT be marked complete while local tools are still outstanding.
723
+ const { session } = makeSession();
724
+ const s = session as unknown as {
725
+ agentExecution: { status: string } | null;
726
+ };
727
+ feed(session, parallelAwait("toolu_A", "SHOE-001"));
728
+ feed(session, parallelAwait("toolu_B", "SHOE-007"));
729
+ endStream(session); // idle arrives BEFORE the deferred batch flush
730
+ expect(s.agentExecution?.status).toBe("running");
731
+ });
732
+
733
+ it("updateConfig tears down buffered batches and pending approvals", async () => {
734
+ // BugBot (PR #214): updateConfig swaps the client; a buffered batch or
735
+ // parked approval flushed afterward would target the fresh (session-less)
736
+ // client and strand the paused turn. updateConfig must reset WebMCP state.
737
+ const { session } = makeSession();
738
+ const s = session as unknown as {
739
+ webMcpAwaitBatches: Map<string, unknown>;
740
+ webMcpApprovalResolvers: Map<string, unknown>;
741
+ };
742
+ feed(session, parallelAwait("toolu_A", "SHOE-001"));
743
+ feed(session, parallelAwait("toolu_B", "SHOE-007"));
744
+ const pending = session.requestWebMcpApproval({
745
+ toolName: "add_to_cart",
746
+ args: { sku: "SHOE-001" },
747
+ } as WebMcpConfirmInfo);
748
+ expect(s.webMcpAwaitBatches.size).toBe(1); // one batch keyed by executionId
749
+ expect(s.webMcpApprovalResolvers.size).toBe(1);
750
+
751
+ session.updateConfig({ apiUrl: "http://test", webmcp: { enabled: true } });
752
+
753
+ expect(s.webMcpAwaitBatches.size).toBe(0);
754
+ await expect(pending).resolves.toBe(false);
755
+ expect(s.webMcpApprovalResolvers.size).toBe(0);
756
+ });
757
+
758
+ it("a teardown before the stream-end flush strands the batch", async () => {
759
+ const { session, executeSpy, resumeSpy } = makeSession();
760
+ feed(session, parallelAwait("toolu_A", "SHOE-001"));
761
+ feed(session, parallelAwait("toolu_B", "SHOE-007"));
762
+ // Teardown clears the buffered batch (and bumps the epoch) BEFORE the
763
+ // stream-end flush — even if a late idle arrives, nothing should resolve.
764
+ session.clearMessages();
765
+ endStream(session);
766
+ await flushMicrotasks();
767
+ expect(executeSpy).not.toHaveBeenCalled();
768
+ expect(resumeSpy).not.toHaveBeenCalled();
769
+ });
770
+
771
+ it("settles pending approval bubbles on teardown so a parked resolve can't hang", async () => {
772
+ // BugBot (PR #214): the bridge parks a resolve on `await requestConfirm`
773
+ // and only re-checks signal.aborted AFTER that await. If a teardown
774
+ // (cancel/clearMessages/hydrate/sendMessage) happens while an approval
775
+ // bubble is still awaiting a click, the resolver must be settled or the
776
+ // bridge execute / its /resume / the resolve's finally all hang forever.
777
+ const { session } = makeSession();
778
+ const s = session as unknown as {
779
+ webMcpApprovalResolvers: Map<string, (b: boolean) => void>;
780
+ messages: AgentWidgetMessage[];
781
+ };
782
+
783
+ // No autoApprove → the gate parks on a pending Promise.
784
+ const pending = session.requestWebMcpApproval({
785
+ toolName: "add_to_cart",
786
+ args: { sku: "SHOE-001" },
787
+ } as WebMcpConfirmInfo);
788
+ expect(s.webMcpApprovalResolvers.size).toBe(1);
789
+
790
+ session.cancel();
791
+
792
+ // The parked confirm Promise resolves false (declined) and the map clears.
793
+ await expect(pending).resolves.toBe(false);
794
+ expect(s.webMcpApprovalResolvers.size).toBe(0);
795
+ // The bubble must not be left visually "pending" — it flips to denied so no
796
+ // stale Approve/Deny remains clickable.
797
+ const bubble = s.messages.find((m) => m.variant === "approval");
798
+ expect(bubble?.approval?.status).toBe("denied");
799
+ });
800
+
801
+ it("clearMessages() also settles pending approval bubbles", async () => {
802
+ const { session } = makeSession();
803
+ const s = session as unknown as {
804
+ webMcpApprovalResolvers: Map<string, (b: boolean) => void>;
805
+ };
806
+ const pending = session.requestWebMcpApproval({
807
+ toolName: "add_to_cart",
808
+ args: { sku: "SHOE-007" },
809
+ } as WebMcpConfirmInfo);
810
+ expect(s.webMcpApprovalResolvers.size).toBe(1);
811
+ session.clearMessages();
812
+ await expect(pending).resolves.toBe(false);
813
+ expect(s.webMcpApprovalResolvers.size).toBe(0);
814
+ });
815
+ });