@vellumai/assistant 0.4.48 → 0.4.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/README.md +2 -23
  3. package/docs/architecture/integrations.md +45 -41
  4. package/docs/architecture/keychain-broker.md +3 -3
  5. package/docs/runbook-trusted-contacts.md +3 -8
  6. package/hook-templates/debug-prompt-logger/hook.json +1 -1
  7. package/hook-templates/debug-prompt-logger/run.sh +1 -3
  8. package/package.json +1 -1
  9. package/src/__tests__/actor-token-service.test.ts +0 -1
  10. package/src/__tests__/anthropic-provider.test.ts +156 -0
  11. package/src/__tests__/approval-cascade.test.ts +810 -0
  12. package/src/__tests__/approval-primitive.test.ts +0 -1
  13. package/src/__tests__/approval-routes-http.test.ts +2 -0
  14. package/src/__tests__/assistant-attachments.test.ts +12 -34
  15. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
  16. package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
  17. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  18. package/src/__tests__/channel-guardian.test.ts +0 -2
  19. package/src/__tests__/channel-readiness-routes.test.ts +15 -6
  20. package/src/__tests__/channel-readiness-service.test.ts +10 -9
  21. package/src/__tests__/checker.test.ts +9 -29
  22. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
  23. package/src/__tests__/computer-use-tools.test.ts +2 -19
  24. package/src/__tests__/config-watcher.test.ts +0 -1
  25. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  26. package/src/__tests__/context-image-dimensions.test.ts +332 -0
  27. package/src/__tests__/context-token-estimator.test.ts +196 -13
  28. package/src/__tests__/conversation-attention-store.test.ts +0 -1
  29. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  30. package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
  31. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  32. package/src/__tests__/credential-metadata-store.test.ts +64 -73
  33. package/src/__tests__/credential-security-invariants.test.ts +13 -7
  34. package/src/__tests__/credential-vault-unit.test.ts +280 -49
  35. package/src/__tests__/credential-vault.test.ts +138 -16
  36. package/src/__tests__/credentials-cli.test.ts +71 -0
  37. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  38. package/src/__tests__/ephemeral-permissions.test.ts +3 -3
  39. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  40. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
  41. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
  42. package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
  43. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  44. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
  45. package/src/__tests__/heartbeat-service.test.ts +0 -1
  46. package/src/__tests__/host-cu-proxy.test.ts +629 -0
  47. package/src/__tests__/host-shell-tool.test.ts +27 -15
  48. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  49. package/src/__tests__/ingress-url-consistency.test.ts +14 -21
  50. package/src/__tests__/integration-status.test.ts +32 -51
  51. package/src/__tests__/intent-routing.test.ts +0 -1
  52. package/src/__tests__/invite-routes-http.test.ts +10 -9
  53. package/src/__tests__/keychain-broker-client.test.ts +11 -43
  54. package/src/__tests__/notification-routing-intent.test.ts +0 -1
  55. package/src/__tests__/oauth-cli.test.ts +373 -14
  56. package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
  57. package/src/__tests__/oauth-scope-policy.test.ts +4 -6
  58. package/src/__tests__/oauth-store.test.ts +756 -0
  59. package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
  60. package/src/__tests__/provider-error-scenarios.test.ts +0 -1
  61. package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
  62. package/src/__tests__/public-ingress-urls.test.ts +15 -21
  63. package/src/__tests__/recording-handler.test.ts +3 -4
  64. package/src/__tests__/registry.test.ts +2 -2
  65. package/src/__tests__/runtime-events-sse.test.ts +55 -7
  66. package/src/__tests__/schedule-store.test.ts +0 -1
  67. package/src/__tests__/scheduler-recurrence.test.ts +0 -1
  68. package/src/__tests__/scoped-approval-grants.test.ts +0 -1
  69. package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
  70. package/src/__tests__/secret-ingress-handler.test.ts +0 -1
  71. package/src/__tests__/send-endpoint-busy.test.ts +21 -6
  72. package/src/__tests__/sequence-store.test.ts +0 -1
  73. package/src/__tests__/session-init.benchmark.test.ts +4 -5
  74. package/src/__tests__/skill-include-graph.test.ts +66 -0
  75. package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
  76. package/src/__tests__/skill-load-tool.test.ts +149 -1
  77. package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
  78. package/src/__tests__/skills-uninstall.test.ts +1 -1
  79. package/src/__tests__/skills.test.ts +3 -3
  80. package/src/__tests__/slack-channel-config.test.ts +67 -3
  81. package/src/__tests__/slack-share-routes.test.ts +17 -19
  82. package/src/__tests__/system-prompt.test.ts +0 -1
  83. package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
  84. package/src/__tests__/terminal-tools.test.ts +4 -3
  85. package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
  86. package/src/__tests__/tool-approval-handler.test.ts +0 -1
  87. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
  88. package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
  89. package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
  90. package/src/__tests__/tool-executor.test.ts +0 -1
  91. package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
  92. package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
  93. package/src/__tests__/trust-store.test.ts +1 -22
  94. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  95. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  96. package/src/__tests__/twilio-routes.test.ts +0 -16
  97. package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
  98. package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
  99. package/src/agent/ax-tree-compaction.test.ts +235 -0
  100. package/src/agent/loop.ts +76 -130
  101. package/src/calls/call-domain.ts +1 -6
  102. package/src/calls/relay-server.ts +9 -13
  103. package/src/calls/twilio-config.ts +2 -7
  104. package/src/calls/twilio-routes.ts +1 -2
  105. package/src/calls/voice-ingress-preflight.ts +1 -1
  106. package/src/cli/commands/browser-relay.ts +18 -12
  107. package/src/cli/commands/completions.ts +0 -3
  108. package/src/cli/commands/credentials.ts +101 -15
  109. package/src/cli/commands/oauth/apps.ts +255 -0
  110. package/src/cli/commands/oauth/connections.ts +299 -0
  111. package/src/cli/commands/oauth/index.ts +52 -0
  112. package/src/cli/commands/oauth/providers.ts +242 -0
  113. package/src/cli/commands/skills.ts +4 -338
  114. package/src/cli/program.ts +1 -5
  115. package/src/cli/reference.ts +1 -3
  116. package/src/config/assistant-feature-flags.ts +0 -3
  117. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  118. package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
  119. package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
  120. package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
  121. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
  122. package/src/config/bundled-skills/settings/SKILL.md +1 -1
  123. package/src/config/bundled-skills/settings/TOOLS.json +2 -8
  124. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
  125. package/src/config/env-registry.ts +14 -83
  126. package/src/config/env.ts +11 -50
  127. package/src/config/feature-flag-registry.json +16 -16
  128. package/src/config/loader.ts +0 -6
  129. package/src/config/schema.ts +3 -1
  130. package/src/config/skills.ts +21 -2
  131. package/src/context/image-dimensions.ts +229 -0
  132. package/src/context/token-estimator.ts +75 -12
  133. package/src/context/window-manager.ts +49 -10
  134. package/src/daemon/assistant-attachments.ts +1 -13
  135. package/src/daemon/handlers/config-ingress.ts +8 -33
  136. package/src/daemon/handlers/config-slack-channel.ts +49 -46
  137. package/src/daemon/handlers/config-telegram.ts +32 -16
  138. package/src/daemon/handlers/sessions.ts +10 -24
  139. package/src/daemon/handlers/shared.ts +0 -130
  140. package/src/daemon/host-cu-proxy.ts +401 -0
  141. package/src/daemon/lifecycle.ts +36 -68
  142. package/src/daemon/message-protocol.ts +3 -0
  143. package/src/daemon/message-types/computer-use.ts +2 -119
  144. package/src/daemon/message-types/host-cu.ts +19 -0
  145. package/src/daemon/message-types/messages.ts +3 -0
  146. package/src/daemon/server.ts +14 -21
  147. package/src/daemon/session-agent-loop-handlers.ts +2 -0
  148. package/src/daemon/session-attachments.ts +1 -2
  149. package/src/daemon/session-slash.ts +1 -1
  150. package/src/daemon/session-surfaces.ts +40 -28
  151. package/src/daemon/session-tool-setup.ts +2 -9
  152. package/src/daemon/session.ts +138 -15
  153. package/src/daemon/tool-side-effects.ts +2 -8
  154. package/src/daemon/watch-handler.ts +2 -2
  155. package/src/events/tool-metrics-listener.ts +2 -2
  156. package/src/hooks/manager.ts +1 -4
  157. package/src/inbound/public-ingress-urls.ts +7 -7
  158. package/src/logfire.ts +16 -5
  159. package/src/memory/conversation-key-store.ts +21 -0
  160. package/src/memory/db-init.ts +4 -0
  161. package/src/memory/migrations/149-oauth-tables.ts +60 -0
  162. package/src/memory/migrations/index.ts +1 -0
  163. package/src/memory/schema/index.ts +1 -0
  164. package/src/memory/schema/oauth.ts +65 -0
  165. package/src/messaging/provider.ts +4 -4
  166. package/src/messaging/providers/gmail/client.ts +82 -2
  167. package/src/messaging/providers/gmail/people-client.ts +10 -10
  168. package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
  169. package/src/messaging/providers/whatsapp/adapter.ts +11 -8
  170. package/src/messaging/registry.ts +2 -32
  171. package/src/notifications/copy-composer.ts +0 -5
  172. package/src/notifications/signal.ts +4 -5
  173. package/src/oauth/byo-connection.test.ts +126 -25
  174. package/src/oauth/byo-connection.ts +22 -6
  175. package/src/oauth/connect-orchestrator.ts +113 -57
  176. package/src/oauth/connect-types.ts +17 -23
  177. package/src/oauth/connection-resolver.ts +35 -11
  178. package/src/oauth/connection.ts +1 -1
  179. package/src/oauth/manual-token-connection.ts +104 -0
  180. package/src/oauth/oauth-store.ts +496 -0
  181. package/src/oauth/platform-connection.test.ts +29 -0
  182. package/src/oauth/platform-connection.ts +6 -5
  183. package/src/oauth/provider-behaviors.ts +124 -0
  184. package/src/oauth/scope-policy.ts +9 -2
  185. package/src/oauth/seed-providers.ts +161 -0
  186. package/src/oauth/token-persistence.ts +74 -78
  187. package/src/permissions/checker.ts +3 -3
  188. package/src/permissions/defaults.ts +0 -1
  189. package/src/permissions/prompter.ts +10 -1
  190. package/src/permissions/trust-store.ts +13 -0
  191. package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
  192. package/src/prompts/system-prompt.ts +28 -40
  193. package/src/providers/anthropic/client.ts +133 -24
  194. package/src/providers/retry.ts +1 -27
  195. package/src/runtime/auth/route-policy.ts +0 -3
  196. package/src/runtime/channel-reply-delivery.ts +0 -40
  197. package/src/runtime/gateway-client.ts +0 -7
  198. package/src/runtime/http-server.ts +8 -6
  199. package/src/runtime/http-types.ts +2 -2
  200. package/src/runtime/middleware/twilio-validation.ts +1 -11
  201. package/src/runtime/pending-interactions.ts +14 -12
  202. package/src/runtime/routes/channel-delivery-routes.ts +0 -1
  203. package/src/runtime/routes/conversation-routes.ts +73 -19
  204. package/src/runtime/routes/events-routes.ts +21 -11
  205. package/src/runtime/routes/host-cu-routes.ts +97 -0
  206. package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
  207. package/src/runtime/routes/integrations/slack/share.ts +6 -7
  208. package/src/runtime/routes/log-export-routes.ts +126 -8
  209. package/src/runtime/routes/settings-routes.ts +55 -48
  210. package/src/runtime/routes/surface-action-routes.ts +1 -1
  211. package/src/runtime/routes/watch-routes.ts +128 -0
  212. package/src/schedule/integration-status.ts +10 -9
  213. package/src/security/credential-key.ts +0 -156
  214. package/src/security/keychain-broker-client.ts +5 -6
  215. package/src/security/oauth2.ts +1 -1
  216. package/src/security/token-manager.ts +119 -46
  217. package/src/skills/catalog-install.ts +358 -0
  218. package/src/skills/include-graph.ts +32 -0
  219. package/src/telegram/bot-username.ts +2 -3
  220. package/src/tools/browser/network-recorder.ts +1 -1
  221. package/src/tools/browser/network-recording-types.ts +1 -1
  222. package/src/tools/computer-use/definitions.ts +46 -11
  223. package/src/tools/computer-use/registry.ts +4 -5
  224. package/src/tools/credentials/broker.ts +1 -2
  225. package/src/tools/credentials/metadata-store.ts +17 -121
  226. package/src/tools/credentials/vault.ts +94 -167
  227. package/src/tools/registry.ts +2 -7
  228. package/src/tools/skills/load.ts +62 -3
  229. package/src/tools/watch/watch-state.ts +0 -12
  230. package/src/util/logger.ts +7 -41
  231. package/src/util/platform.ts +9 -28
  232. package/src/watcher/providers/google-calendar.ts +2 -1
  233. package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
  234. package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
  235. package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
  236. package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
  237. package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
  238. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
  239. package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
  240. package/src/cli/commands/dev.ts +0 -129
  241. package/src/cli/commands/map.ts +0 -391
  242. package/src/cli/commands/oauth.ts +0 -77
  243. package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
  244. package/src/daemon/computer-use-session.ts +0 -1026
  245. package/src/daemon/ride-shotgun-handler.ts +0 -569
  246. package/src/oauth/provider-base-urls.ts +0 -21
  247. package/src/oauth/provider-profiles.ts +0 -192
  248. package/src/prompts/computer-use-prompt.ts +0 -98
  249. package/src/runtime/routes/computer-use-routes.ts +0 -641
  250. package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
  251. package/src/runtime/telegram-streaming-delivery.ts +0 -393
  252. package/src/tools/computer-use/request-computer-control.ts +0 -56
@@ -1,729 +0,0 @@
1
- import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
-
3
- import type { ApprovalUIMetadata } from "./channel-approval-types.js";
4
- import type { ChannelDeliveryResult } from "./gateway-client.js";
5
-
6
- // ---------------------------------------------------------------------------
7
- // Mocks
8
- // ---------------------------------------------------------------------------
9
-
10
- let callCount = 0;
11
- const mockDeliverChannelReply = mock(
12
- async (): Promise<ChannelDeliveryResult> => {
13
- callCount++;
14
- return { ok: true, messageId: 99 + callCount };
15
- },
16
- );
17
-
18
- mock.module("./gateway-client.js", () => ({
19
- deliverChannelReply: mockDeliverChannelReply,
20
- }));
21
-
22
- mock.module("../util/logger.js", () => ({
23
- getLogger: () =>
24
- new Proxy({} as Record<string, unknown>, {
25
- get: () => () => {},
26
- }),
27
- }));
28
-
29
- import { TelegramStreamingDelivery } from "./telegram-streaming-delivery.js";
30
-
31
- // ---------------------------------------------------------------------------
32
- // Helpers
33
- // ---------------------------------------------------------------------------
34
-
35
- type CallArgs = [string, Record<string, unknown>, string];
36
-
37
- /** Extract the payload (second argument) from the Nth mock call. */
38
- function callPayload(n: number): Record<string, unknown> {
39
- const args = mockDeliverChannelReply.mock.calls[n] as unknown as CallArgs;
40
- return args[1];
41
- }
42
-
43
- function createDelivery(): TelegramStreamingDelivery {
44
- return new TelegramStreamingDelivery({
45
- callbackUrl: "http://test/deliver",
46
- chatId: "123",
47
- mintBearerToken: () => "test-token",
48
- });
49
- }
50
-
51
- /** Flush all pending microtasks / promise callbacks. */
52
- async function flushPromises(): Promise<void> {
53
- // Multiple rounds to handle chained .then() callbacks
54
- for (let i = 0; i < 10; i++) {
55
- await new Promise((resolve) => setTimeout(resolve, 0));
56
- }
57
- }
58
-
59
- // ---------------------------------------------------------------------------
60
- // Tests
61
- // ---------------------------------------------------------------------------
62
-
63
- describe("TelegramStreamingDelivery", () => {
64
- beforeEach(() => {
65
- callCount = 0;
66
- mockDeliverChannelReply.mockReset();
67
- mockDeliverChannelReply.mockImplementation(
68
- async (): Promise<ChannelDeliveryResult> => {
69
- callCount++;
70
- return { ok: true, messageId: 99 + callCount };
71
- },
72
- );
73
- });
74
-
75
- afterEach(() => {
76
- mockDeliverChannelReply.mockReset();
77
- });
78
-
79
- // ── Test 1: initial send when buffer reaches MIN_INITIAL_CHARS ──────
80
- test("sends initial message when buffer reaches MIN_INITIAL_CHARS", async () => {
81
- const delivery = createDelivery();
82
- // MIN_INITIAL_CHARS is 20; send 25 chars
83
- delivery.onEvent({
84
- type: "assistant_text_delta",
85
- text: "a".repeat(25),
86
- });
87
-
88
- await flushPromises();
89
-
90
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
91
- const payload = callPayload(0);
92
- expect(payload.text).toBe("a".repeat(25));
93
- // Initial send should NOT have a messageId (it's a new message)
94
- expect(payload.messageId).toBeUndefined();
95
- });
96
-
97
- // ── Test 2: edits message with accumulated text on finish() ─────────
98
- test("edits message with accumulated text on finish()", async () => {
99
- const delivery = createDelivery();
100
- // First: send enough to trigger initial send
101
- delivery.onEvent({
102
- type: "assistant_text_delta",
103
- text: "a".repeat(25),
104
- });
105
- await flushPromises();
106
-
107
- // Then add more text and finish
108
- delivery.onEvent({
109
- type: "assistant_text_delta",
110
- text: "b".repeat(10),
111
- });
112
- await delivery.finish();
113
-
114
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
115
-
116
- // First call: new message (no messageId)
117
- const firstPayload = callPayload(0);
118
- expect(firstPayload.messageId).toBeUndefined();
119
-
120
- // Second call: edit (with messageId from first call)
121
- const secondPayload = callPayload(1);
122
- expect(secondPayload.messageId).toBe(100); // first call returns messageId: 100
123
- expect(secondPayload.text).toBe("a".repeat(25) + "b".repeat(10));
124
- });
125
-
126
- // ── Test 3: sends remainder as new message when messageId missing ───
127
- test("sends remainder as new message when messageId is missing", async () => {
128
- // First call: no messageId in response; second call: with messageId
129
- mockDeliverChannelReply.mockReset();
130
- let localCallCount = 0;
131
- mockDeliverChannelReply.mockImplementation(
132
- async (): Promise<ChannelDeliveryResult> => {
133
- localCallCount++;
134
- if (localCallCount === 1) return { ok: true }; // no messageId
135
- return { ok: true, messageId: 200 };
136
- },
137
- );
138
-
139
- const delivery = createDelivery();
140
- delivery.onEvent({
141
- type: "assistant_text_delta",
142
- text: "a".repeat(25),
143
- });
144
- await flushPromises();
145
-
146
- delivery.onEvent({
147
- type: "assistant_text_delta",
148
- text: "b".repeat(10),
149
- });
150
- await delivery.finish();
151
-
152
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
153
-
154
- // The initial text was already delivered (just without a messageId),
155
- // so the second call should contain only the remainder (buffer text)
156
- const secondPayload = callPayload(1);
157
- expect(secondPayload.text).toBe("b".repeat(10));
158
- // It's sent as a new message (no messageId in payload) since the first
159
- // call didn't return one
160
- expect(secondPayload.messageId).toBeUndefined();
161
- });
162
-
163
- // ── Test 4: sends full text when initial send fails ─────────────────
164
- test("sends full text when initial send fails", async () => {
165
- mockDeliverChannelReply.mockReset();
166
- let localCallCount = 0;
167
- mockDeliverChannelReply.mockImplementation(
168
- async (): Promise<ChannelDeliveryResult> => {
169
- localCallCount++;
170
- if (localCallCount === 1) throw new Error("Network error");
171
- return { ok: true, messageId: 300 };
172
- },
173
- );
174
-
175
- const delivery = createDelivery();
176
- delivery.onEvent({
177
- type: "assistant_text_delta",
178
- text: "a".repeat(25),
179
- });
180
- await flushPromises();
181
-
182
- // The initial send failed; buffer should be restored
183
- await delivery.finish();
184
-
185
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
186
-
187
- // The finish() call should send the complete accumulated text
188
- const secondPayload = callPayload(1);
189
- expect(secondPayload.text).toBe("a".repeat(25));
190
- expect(delivery.finishSucceeded).toBe(true);
191
- });
192
-
193
- // ── Test 5: tool_use_start between text segments produces single message ─
194
- test("tool_use_start between text segments produces single message", async () => {
195
- const delivery = createDelivery();
196
-
197
- // Send enough text to trigger initial message (>= MIN_INITIAL_CHARS=20)
198
- delivery.onEvent({
199
- type: "assistant_text_delta",
200
- text: "Yeah, still here! ", // 18 chars
201
- });
202
- delivery.onEvent({
203
- type: "assistant_text_delta",
204
- text: "aa", // Push past 20 chars
205
- });
206
- await flushPromises();
207
-
208
- // Initial message sent
209
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
210
- const initialPayload = callPayload(0);
211
- expect(initialPayload.messageId).toBeUndefined(); // new message
212
-
213
- // tool_use_start — should NOT finalize/reset message state
214
- delivery.onEvent({
215
- type: "tool_use_start",
216
- toolName: "memory_recall",
217
- input: {},
218
- });
219
- await flushPromises();
220
-
221
- // More text after the tool call
222
- delivery.onEvent({
223
- type: "assistant_text_delta",
224
- text: "What do you need?",
225
- });
226
-
227
- await delivery.finish();
228
-
229
- // The final edit should be to the SAME message (same messageId),
230
- // containing the full combined text
231
- const lastCallIndex = mockDeliverChannelReply.mock.calls.length - 1;
232
- const lastPayload = callPayload(lastCallIndex);
233
- expect(lastPayload.messageId).toBe(100); // same messageId as initial
234
- expect(lastPayload.text).toBe("Yeah, still here! aaWhat do you need?");
235
- });
236
-
237
- // ── Test 5b: multiple tool calls between text segments ──────────────
238
- test("multiple tool calls between text segments produce single message", async () => {
239
- const delivery = createDelivery();
240
-
241
- delivery.onEvent({
242
- type: "assistant_text_delta",
243
- text: "a".repeat(25),
244
- });
245
- await flushPromises();
246
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
247
-
248
- // Two consecutive tool calls
249
- delivery.onEvent({ type: "tool_use_start", toolName: "tool1", input: {} });
250
- delivery.onEvent({ type: "tool_use_start", toolName: "tool2", input: {} });
251
-
252
- // More text after both tool calls
253
- delivery.onEvent({
254
- type: "assistant_text_delta",
255
- text: "b".repeat(10),
256
- });
257
- await delivery.finish();
258
-
259
- // All text should be in the same message
260
- const lastCallIndex = mockDeliverChannelReply.mock.calls.length - 1;
261
- const lastPayload = callPayload(lastCallIndex);
262
- expect(lastPayload.messageId).toBe(100);
263
- expect(lastPayload.text).toBe("a".repeat(25) + "b".repeat(10));
264
- });
265
-
266
- // ── Test 5c: tool_use_start before any text is a no-op ─────────────
267
- test("tool_use_start before any text is a no-op", async () => {
268
- const delivery = createDelivery();
269
-
270
- delivery.onEvent({
271
- type: "tool_use_start",
272
- toolName: "init_tool",
273
- input: {},
274
- });
275
- await flushPromises();
276
-
277
- // No messages should have been sent
278
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(0);
279
-
280
- // Subsequent text should work normally
281
- delivery.onEvent({
282
- type: "assistant_text_delta",
283
- text: "a".repeat(25),
284
- });
285
- await flushPromises();
286
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
287
-
288
- await delivery.finish();
289
- expect(delivery.finishSucceeded).toBe(true);
290
- });
291
-
292
- // ── Test 5d: tool_use_start at end of response finalizes on finish ──
293
- test("tool_use_start at end of response finalizes on finish", async () => {
294
- const delivery = createDelivery();
295
-
296
- delivery.onEvent({
297
- type: "assistant_text_delta",
298
- text: "a".repeat(25),
299
- });
300
- await flushPromises();
301
-
302
- delivery.onEvent({
303
- type: "tool_use_start",
304
- toolName: "final_tool",
305
- input: {},
306
- });
307
- await delivery.finish();
308
-
309
- // The text should have been delivered via the initial message + a final edit
310
- expect(delivery.finishSucceeded).toBe(true);
311
- expect(delivery.hasDeliveredText).toBe(true);
312
- });
313
-
314
- // ── Test 5e: text exceeding max length after tool pause splits at length boundary ─
315
- test("text exceeding max length after tool pause splits at length boundary", async () => {
316
- const delivery = createDelivery();
317
-
318
- // Send ~3900 chars
319
- delivery.onEvent({
320
- type: "assistant_text_delta",
321
- text: "a".repeat(3900),
322
- });
323
- await flushPromises();
324
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
325
-
326
- // Tool call (should not split)
327
- delivery.onEvent({ type: "tool_use_start", toolName: "lookup", input: {} });
328
-
329
- // Send 200 more chars (total 4100 > 4000 limit)
330
- delivery.onEvent({
331
- type: "assistant_text_delta",
332
- text: "b".repeat(200),
333
- });
334
-
335
- await delivery.finish();
336
- await flushPromises();
337
-
338
- // Should have split at 4000-char boundary:
339
- // 1. Initial message (3900 chars)
340
- // 2. Edit at boundary (4000 chars)
341
- // 3. Overflow new message (100 chars)
342
- const calls = mockDeliverChannelReply.mock.calls.length;
343
- expect(calls).toBe(3);
344
-
345
- // Edit at boundary
346
- const editPayload = callPayload(1);
347
- expect((editPayload.text as string).length).toBe(4000);
348
- expect(editPayload.messageId).toBeDefined();
349
-
350
- // Overflow as new message
351
- const overflowPayload = callPayload(2);
352
- expect((overflowPayload.text as string).length).toBe(100);
353
- expect(overflowPayload.messageId).toBeUndefined();
354
- });
355
-
356
- // ── Test 5f: preserves below-threshold text across tool_use_start ───
357
- test("preserves below-threshold text across tool_use_start", async () => {
358
- const delivery = createDelivery();
359
-
360
- // Send text below MIN_INITIAL_CHARS threshold
361
- delivery.onEvent({
362
- type: "assistant_text_delta",
363
- text: "Hi! ", // 4 chars, well below 20
364
- });
365
- await flushPromises();
366
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(0); // not sent yet
367
-
368
- // tool_use_start
369
- delivery.onEvent({
370
- type: "tool_use_start",
371
- toolName: "memory_recall",
372
- input: {},
373
- });
374
- await flushPromises();
375
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(0); // still not sent
376
-
377
- // More text after tool (enough to trigger initial send when combined)
378
- delivery.onEvent({
379
- type: "assistant_text_delta",
380
- text: "What can I help with?", // 21 chars, combined = 25 >= 20
381
- });
382
- await flushPromises();
383
-
384
- // Should have sent initial message with ALL text (pre-tool + post-tool)
385
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
386
- const payload = callPayload(0);
387
- expect(payload.text).toBe("Hi! What can I help with?");
388
-
389
- await delivery.finish();
390
- expect(delivery.finishSucceeded).toBe(true);
391
- });
392
-
393
- // ── Test 5g: delivers below-threshold text when tool_use_start is followed by finish ─
394
- test("delivers below-threshold text when tool_use_start is followed by finish", async () => {
395
- const delivery = createDelivery();
396
-
397
- delivery.onEvent({
398
- type: "assistant_text_delta",
399
- text: "Hi!", // 3 chars
400
- });
401
- delivery.onEvent({
402
- type: "tool_use_start",
403
- toolName: "lookup",
404
- input: {},
405
- });
406
-
407
- await delivery.finish();
408
-
409
- // The "Hi!" should have been sent as a new message during finish
410
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
411
- const payload = callPayload(0);
412
- expect(payload.text).toBe("Hi!");
413
- expect(delivery.finishSucceeded).toBe(true);
414
- });
415
-
416
- // ── Test 5h: no-messageId response doesn't cause duplicate messages on continued deltas ─
417
- test("no-messageId response doesn't cause duplicate messages on continued deltas", async () => {
418
- // Simulate the exact bug from the screenshot: initial send succeeds
419
- // without messageId, then more deltas create overlapping new messages
420
- mockDeliverChannelReply.mockReset();
421
- mockDeliverChannelReply.mockImplementation(
422
- async (): Promise<ChannelDeliveryResult> => {
423
- // All sends return no messageId (simulates gateway omitting it)
424
- return { ok: true };
425
- },
426
- );
427
-
428
- const delivery = createDelivery();
429
-
430
- // First batch: triggers sendInitialMessage (>= 20 chars)
431
- delivery.onEvent({
432
- type: "assistant_text_delta",
433
- text: "Alright, hit me with something",
434
- });
435
- await flushPromises();
436
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
437
- expect(callPayload(0).text).toBe("Alright, hit me with something");
438
-
439
- // More deltas arrive — should NOT trigger another sendInitialMessage
440
- delivery.onEvent({
441
- type: "assistant_text_delta",
442
- text: " longer and let's see if it comes through as one",
443
- });
444
- await flushPromises();
445
- // Still only 1 call — text accumulates in buffer
446
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
447
-
448
- delivery.onEvent({
449
- type: "assistant_text_delta",
450
- text: " message now!",
451
- });
452
- await delivery.finish();
453
-
454
- // finish() should send the remainder as a single new message
455
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
456
- const finishPayload = callPayload(1);
457
- expect(finishPayload.text).toBe(
458
- " longer and let's see if it comes through as one message now!",
459
- );
460
- expect(finishPayload.messageId).toBeUndefined();
461
- expect(delivery.finishSucceeded).toBe(true);
462
- });
463
-
464
- // ── Test 5i: combined threshold accounts for pre-tool currentMessageText ─
465
- test("combined threshold accounts for pre-tool currentMessageText", async () => {
466
- const delivery = createDelivery();
467
-
468
- // Send 15 chars (below 20 threshold)
469
- delivery.onEvent({
470
- type: "assistant_text_delta",
471
- text: "Hello, world!! ", // 15 chars
472
- });
473
- await flushPromises();
474
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(0);
475
-
476
- // tool_use_start moves 15 chars to currentMessageText
477
- delivery.onEvent({
478
- type: "tool_use_start",
479
- toolName: "lookup",
480
- input: {},
481
- });
482
-
483
- // Send only 6 more chars — buffer alone (6) < 20, but combined (21) >= 20
484
- delivery.onEvent({
485
- type: "assistant_text_delta",
486
- text: "Great!",
487
- });
488
- await flushPromises();
489
-
490
- // Should have triggered initial send with combined text
491
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
492
- expect(callPayload(0).text).toBe("Hello, world!! Great!");
493
-
494
- await delivery.finish();
495
- expect(delivery.finishSucceeded).toBe(true);
496
- });
497
-
498
- // ── Test 5j: no-messageId + tool_use_start + finish delivers post-tool text ─
499
- test("no-messageId + tool_use_start + finish delivers post-tool text", async () => {
500
- // Scenario from Devin review: initial send succeeds without messageId,
501
- // more deltas arrive, tool_use_start fires, finish() must deliver post-tool text.
502
- mockDeliverChannelReply.mockReset();
503
- let localCallCount = 0;
504
- mockDeliverChannelReply.mockImplementation(
505
- async (): Promise<ChannelDeliveryResult> => {
506
- localCallCount++;
507
- if (localCallCount === 1) return { ok: true }; // no messageId
508
- return { ok: true, messageId: 400 };
509
- },
510
- );
511
-
512
- const delivery = createDelivery();
513
-
514
- // Step 1: initial send (>= 20 chars), succeeds without messageId
515
- delivery.onEvent({
516
- type: "assistant_text_delta",
517
- text: "a".repeat(25),
518
- });
519
- await flushPromises();
520
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
521
-
522
- // Step 2: more deltas arrive — stuck in buffer (onTextDelta skips both branches)
523
- delivery.onEvent({
524
- type: "assistant_text_delta",
525
- text: "post-tool text",
526
- });
527
- await flushPromises();
528
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1); // no new call
529
-
530
- // Step 3: tool_use_start — buffer should NOT be moved to currentMessageText
531
- delivery.onEvent({
532
- type: "tool_use_start",
533
- toolName: "some_tool",
534
- input: {},
535
- });
536
-
537
- // Step 4: finish() — should deliver the post-tool text as a new message
538
- await delivery.finish();
539
-
540
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
541
- const secondPayload = callPayload(1);
542
- expect(secondPayload.text).toBe("post-tool text");
543
- expect(secondPayload.messageId).toBeUndefined(); // new message, not edit
544
- expect(delivery.finishSucceeded).toBe(true);
545
- });
546
-
547
- // ── Test 5k: no-messageId + finish with approval sends approval as new message ─
548
- test("no-messageId + finish with approval sends approval as new message", async () => {
549
- // Scenario from Codex review: initial send succeeds without messageId,
550
- // no additional buffer, but finish(approval) must still deliver approval buttons.
551
- mockDeliverChannelReply.mockReset();
552
- mockDeliverChannelReply.mockImplementation(
553
- async (): Promise<ChannelDeliveryResult> => {
554
- return { ok: true }; // no messageId
555
- },
556
- );
557
-
558
- const delivery = createDelivery();
559
-
560
- // Initial send succeeds without messageId
561
- delivery.onEvent({
562
- type: "assistant_text_delta",
563
- text: "a".repeat(25),
564
- });
565
- await flushPromises();
566
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
567
-
568
- // finish() with approval — approval must not be silently dropped
569
- const approval: ApprovalUIMetadata = {
570
- requestId: "test-req",
571
- actions: [{ id: "approve_once", label: "Approve" }],
572
- plainTextFallback: "Reply APPROVE or REJECT",
573
- };
574
- await delivery.finish(approval);
575
-
576
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
577
- const secondPayload = callPayload(1);
578
- // Approval buttons sent as a new message
579
- expect(secondPayload.approval).toEqual(approval);
580
- expect(secondPayload.messageId).toBeUndefined();
581
- expect(delivery.finishSucceeded).toBe(true);
582
- });
583
-
584
- // ── Test 5l: no-messageId + buffer + finish with approval delivers both ─
585
- test("no-messageId + buffer + finish with approval delivers both text and approval", async () => {
586
- // Combined scenario: no-messageId initial send, buffered text, and approval buttons.
587
- mockDeliverChannelReply.mockReset();
588
- let localCallCount = 0;
589
- mockDeliverChannelReply.mockImplementation(
590
- async (): Promise<ChannelDeliveryResult> => {
591
- localCallCount++;
592
- if (localCallCount === 1) return { ok: true }; // no messageId
593
- return { ok: true, messageId: 500 };
594
- },
595
- );
596
-
597
- const delivery = createDelivery();
598
-
599
- // Initial send succeeds without messageId
600
- delivery.onEvent({
601
- type: "assistant_text_delta",
602
- text: "a".repeat(25),
603
- });
604
- await flushPromises();
605
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
606
-
607
- // More deltas arrive
608
- delivery.onEvent({
609
- type: "assistant_text_delta",
610
- text: "remainder",
611
- });
612
-
613
- // finish() with approval — should deliver buffer text + approval together
614
- const approval: ApprovalUIMetadata = {
615
- requestId: "test-req",
616
- actions: [{ id: "approve_once", label: "Approve" }],
617
- plainTextFallback: "Reply APPROVE or REJECT",
618
- };
619
- await delivery.finish(approval);
620
-
621
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(2);
622
- const secondPayload = callPayload(1);
623
- expect(secondPayload.text).toBe("remainder");
624
- expect(secondPayload.approval).toEqual(approval);
625
- expect(secondPayload.messageId).toBeUndefined();
626
- expect(delivery.finishSucceeded).toBe(true);
627
- });
628
-
629
- // ── Test 6: skips final edit when text hasn't changed ───────────────
630
- test("skips final edit when text hasn't changed", async () => {
631
- const delivery = createDelivery();
632
-
633
- // Feed exactly MIN_INITIAL_CHARS (20) to trigger initial send
634
- delivery.onEvent({
635
- type: "assistant_text_delta",
636
- text: "a".repeat(20),
637
- });
638
- await flushPromises();
639
-
640
- // Initial send should have fired
641
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
642
-
643
- // Call finish() with no additional text
644
- await delivery.finish();
645
-
646
- // Should NOT have made a second call since text hasn't changed
647
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
648
- expect(delivery.finishSucceeded).toBe(true);
649
- });
650
-
651
- // ── Test 7: splits message at TELEGRAM_MAX_TEXT_LEN boundary ────────
652
- test("splits message at TELEGRAM_MAX_TEXT_LEN boundary", async () => {
653
- const delivery = createDelivery();
654
-
655
- // Send initial chunk to start a message (>= 20 chars)
656
- delivery.onEvent({
657
- type: "assistant_text_delta",
658
- text: "a".repeat(25),
659
- });
660
- await flushPromises();
661
-
662
- // Initial send fired
663
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
664
-
665
- // Now send enough to exceed TELEGRAM_MAX_TEXT_LEN (4000) when combined
666
- // with the initial 25 chars. The edit is throttled, so the buffer
667
- // accumulates until finish() flushes it. finish() has its own overflow
668
- // handling that splits at the 4000-char boundary.
669
- delivery.onEvent({
670
- type: "assistant_text_delta",
671
- text: "b".repeat(4500),
672
- });
673
-
674
- // Call finish() to flush — this triggers the overflow split in finish()
675
- await delivery.finish();
676
- await flushPromises();
677
-
678
- // finish() should have: (1) edited the current message with up to 4000
679
- // chars, then (2) sent the overflow as a new message.
680
- // Total calls: 1 (initial) + 1 (edit at boundary) + 1 (overflow new message) = 3
681
- expect(mockDeliverChannelReply.mock.calls.length).toBe(3);
682
-
683
- // The second call (edit at boundary) should have text of length 4000
684
- const editPayload = callPayload(1);
685
- expect((editPayload.text as string).length).toBe(4000);
686
- expect(editPayload.messageId).toBeDefined();
687
-
688
- // The third call (overflow) should be a new message (no messageId in payload)
689
- const overflowPayload = callPayload(2);
690
- expect(overflowPayload.messageId).toBeUndefined();
691
- // Overflow should contain the remainder: 25 + 4500 - 4000 = 525 chars
692
- expect((overflowPayload.text as string).length).toBe(525);
693
-
694
- expect(delivery.finishSucceeded).toBe(true);
695
- });
696
-
697
- // ── Test 8: ignores events after finish() is called ─────────────────
698
- test("ignores events after finish() is called", async () => {
699
- const delivery = createDelivery();
700
-
701
- // Send initial text to trigger a message
702
- delivery.onEvent({
703
- type: "assistant_text_delta",
704
- text: "a".repeat(25),
705
- });
706
- await flushPromises();
707
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(1);
708
-
709
- // Finish
710
- await delivery.finish();
711
-
712
- const callsAfterFinish = mockDeliverChannelReply.mock.calls.length;
713
-
714
- // Now send more events -- they should be ignored
715
- delivery.onEvent({
716
- type: "assistant_text_delta",
717
- text: "ignored text",
718
- });
719
- delivery.onEvent({
720
- type: "tool_use_start",
721
- toolName: "ignored_tool",
722
- input: {},
723
- });
724
- await flushPromises();
725
-
726
- // No additional calls should have been made
727
- expect(mockDeliverChannelReply).toHaveBeenCalledTimes(callsAfterFinish);
728
- });
729
- });