@amodalai/runtime 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/dist/src/__fixtures__/README.md +4 -0
  2. package/dist/src/__fixtures__/e2e.test.d.ts +6 -0
  3. package/dist/src/__fixtures__/e2e.test.js +211 -0
  4. package/dist/src/__fixtures__/e2e.test.js.map +1 -0
  5. package/dist/src/__fixtures__/smoke-agent/automations/delivery-callback-test.json +9 -0
  6. package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/spec.json +1 -1
  7. package/dist/src/__fixtures__/smoke.test.js +715 -29
  8. package/dist/src/__fixtures__/smoke.test.js.map +1 -1
  9. package/dist/src/__fixtures__/test-env.d.ts +27 -0
  10. package/dist/src/__fixtures__/test-env.js +64 -0
  11. package/dist/src/__fixtures__/test-env.js.map +1 -0
  12. package/dist/src/__fixtures__/test-helpers.d.ts +30 -0
  13. package/dist/src/__fixtures__/test-helpers.js +120 -0
  14. package/dist/src/__fixtures__/test-helpers.js.map +1 -0
  15. package/dist/src/agent/agent-types.d.ts +22 -0
  16. package/dist/src/agent/agent-types.js.map +1 -1
  17. package/dist/src/agent/automation-bridge.d.ts +9 -0
  18. package/dist/src/agent/automation-bridge.js +26 -0
  19. package/dist/src/agent/automation-bridge.js.map +1 -1
  20. package/dist/src/agent/automation-bridge.test.js +63 -0
  21. package/dist/src/agent/automation-bridge.test.js.map +1 -1
  22. package/dist/src/agent/local-server.d.ts +0 -7
  23. package/dist/src/agent/local-server.js +230 -86
  24. package/dist/src/agent/local-server.js.map +1 -1
  25. package/dist/src/agent/local-server.test.js +14 -8
  26. package/dist/src/agent/local-server.test.js.map +1 -1
  27. package/dist/src/agent/loop-types.d.ts +81 -2
  28. package/dist/src/agent/loop-types.js +4 -0
  29. package/dist/src/agent/loop-types.js.map +1 -1
  30. package/dist/src/agent/loop.js +16 -3
  31. package/dist/src/agent/loop.js.map +1 -1
  32. package/dist/src/agent/loop.test.js +572 -8
  33. package/dist/src/agent/loop.test.js.map +1 -1
  34. package/dist/src/agent/proactive/delivery-router.d.ts +68 -0
  35. package/dist/src/agent/proactive/delivery-router.js +337 -0
  36. package/dist/src/agent/proactive/delivery-router.js.map +1 -0
  37. package/dist/src/agent/proactive/delivery-router.test.d.ts +6 -0
  38. package/dist/src/agent/proactive/delivery-router.test.js +455 -0
  39. package/dist/src/agent/proactive/delivery-router.test.js.map +1 -0
  40. package/dist/src/agent/proactive/proactive-runner.d.ts +23 -1
  41. package/dist/src/agent/proactive/proactive-runner.js +42 -10
  42. package/dist/src/agent/proactive/proactive-runner.js.map +1 -1
  43. package/dist/src/agent/proactive/proactive-runner.test.js +0 -2
  44. package/dist/src/agent/proactive/proactive-runner.test.js.map +1 -1
  45. package/dist/src/agent/routes/admin-chat-abort.test.d.ts +6 -0
  46. package/dist/src/agent/routes/admin-chat-abort.test.js +206 -0
  47. package/dist/src/agent/routes/admin-chat-abort.test.js.map +1 -0
  48. package/dist/src/agent/routes/admin-chat.js +0 -2
  49. package/dist/src/agent/routes/admin-chat.js.map +1 -1
  50. package/dist/src/agent/routes/task.test.js +0 -2
  51. package/dist/src/agent/routes/task.test.js.map +1 -1
  52. package/dist/src/agent/snapshot-server.js +0 -2
  53. package/dist/src/agent/snapshot-server.js.map +1 -1
  54. package/dist/src/agent/states/compacting.js +5 -3
  55. package/dist/src/agent/states/compacting.js.map +1 -1
  56. package/dist/src/agent/states/confirming.js +3 -0
  57. package/dist/src/agent/states/confirming.js.map +1 -1
  58. package/dist/src/agent/states/dispatching.js +45 -1
  59. package/dist/src/agent/states/dispatching.js.map +1 -1
  60. package/dist/src/agent/states/executing.js +225 -81
  61. package/dist/src/agent/states/executing.js.map +1 -1
  62. package/dist/src/agent/states/streaming.js +14 -0
  63. package/dist/src/agent/states/streaming.js.map +1 -1
  64. package/dist/src/agent/states/thinking.d.ts +1 -1
  65. package/dist/src/agent/states/thinking.js +246 -29
  66. package/dist/src/agent/states/thinking.js.map +1 -1
  67. package/dist/src/agent/token-estimate.d.ts +20 -6
  68. package/dist/src/agent/token-estimate.js +24 -3
  69. package/dist/src/agent/token-estimate.js.map +1 -1
  70. package/dist/src/agent/token-estimate.test.d.ts +6 -0
  71. package/dist/src/agent/token-estimate.test.js +44 -0
  72. package/dist/src/agent/token-estimate.test.js.map +1 -0
  73. package/dist/src/api/create-agent.js +0 -3
  74. package/dist/src/api/create-agent.js.map +1 -1
  75. package/dist/src/api/types.d.ts +0 -2
  76. package/dist/src/env-ref.d.ts +13 -0
  77. package/dist/src/env-ref.js +31 -0
  78. package/dist/src/env-ref.js.map +1 -0
  79. package/dist/src/env-ref.test.d.ts +6 -0
  80. package/dist/src/env-ref.test.js +34 -0
  81. package/dist/src/env-ref.test.js.map +1 -0
  82. package/dist/src/errors.d.ts +15 -0
  83. package/dist/src/errors.js +22 -0
  84. package/dist/src/errors.js.map +1 -1
  85. package/dist/src/errors.test.js +2 -2
  86. package/dist/src/errors.test.js.map +1 -1
  87. package/dist/src/events/event-bus.d.ts +54 -0
  88. package/dist/src/events/event-bus.js +84 -0
  89. package/dist/src/events/event-bus.js.map +1 -0
  90. package/dist/src/events/event-bus.test.d.ts +6 -0
  91. package/dist/src/events/event-bus.test.js +112 -0
  92. package/dist/src/events/event-bus.test.js.map +1 -0
  93. package/dist/src/events/events-route.d.ts +36 -0
  94. package/dist/src/events/events-route.js +80 -0
  95. package/dist/src/events/events-route.js.map +1 -0
  96. package/dist/src/events/events-route.test.d.ts +6 -0
  97. package/dist/src/events/events-route.test.js +134 -0
  98. package/dist/src/events/events-route.test.js.map +1 -0
  99. package/dist/src/events/store-event-wrapper.d.ts +19 -0
  100. package/dist/src/events/store-event-wrapper.js +57 -0
  101. package/dist/src/events/store-event-wrapper.js.map +1 -0
  102. package/dist/src/events/store-event-wrapper.test.d.ts +6 -0
  103. package/dist/src/events/store-event-wrapper.test.js +91 -0
  104. package/dist/src/events/store-event-wrapper.test.js.map +1 -0
  105. package/dist/src/middleware/auth.d.ts +0 -2
  106. package/dist/src/middleware/auth.js.map +1 -1
  107. package/dist/src/providers/search-provider.d.ts +64 -0
  108. package/dist/src/providers/search-provider.js +174 -0
  109. package/dist/src/providers/search-provider.js.map +1 -0
  110. package/dist/src/providers/types.d.ts +8 -0
  111. package/dist/src/routes/ai-stream.d.ts +15 -0
  112. package/dist/src/routes/ai-stream.js +9 -0
  113. package/dist/src/routes/ai-stream.js.map +1 -1
  114. package/dist/src/routes/chat-stream.d.ts +6 -0
  115. package/dist/src/routes/chat-stream.js +2 -0
  116. package/dist/src/routes/chat-stream.js.map +1 -1
  117. package/dist/src/routes/chat.d.ts +6 -0
  118. package/dist/src/routes/chat.js +2 -0
  119. package/dist/src/routes/chat.js.map +1 -1
  120. package/dist/src/routes/session-resolver.d.ts +5 -0
  121. package/dist/src/routes/session-resolver.js +1 -15
  122. package/dist/src/routes/session-resolver.js.map +1 -1
  123. package/dist/src/routes/session-resolver.test.js +7 -6
  124. package/dist/src/routes/session-resolver.test.js.map +1 -1
  125. package/dist/src/server.d.ts +6 -0
  126. package/dist/src/server.js +2 -0
  127. package/dist/src/server.js.map +1 -1
  128. package/dist/src/session/drizzle-session-store.d.ts +56 -0
  129. package/dist/src/session/drizzle-session-store.js +203 -0
  130. package/dist/src/session/drizzle-session-store.js.map +1 -0
  131. package/dist/src/session/manager.d.ts +6 -3
  132. package/dist/src/session/manager.js +46 -16
  133. package/dist/src/session/manager.js.map +1 -1
  134. package/dist/src/session/manager.test.js +12 -18
  135. package/dist/src/session/manager.test.js.map +1 -1
  136. package/dist/src/session/pglite-session-store.d.ts +23 -0
  137. package/dist/src/session/pglite-session-store.js +70 -0
  138. package/dist/src/session/pglite-session-store.js.map +1 -0
  139. package/dist/src/session/postgres-session-store.d.ts +44 -0
  140. package/dist/src/session/postgres-session-store.js +138 -0
  141. package/dist/src/session/postgres-session-store.js.map +1 -0
  142. package/dist/src/session/session-builder.d.ts +0 -2
  143. package/dist/src/session/session-builder.js +22 -2
  144. package/dist/src/session/session-builder.js.map +1 -1
  145. package/dist/src/session/session-builder.test.js +0 -2
  146. package/dist/src/session/session-builder.test.js.map +1 -1
  147. package/dist/src/session/session-store-selector.d.ts +49 -0
  148. package/dist/src/session/session-store-selector.js +60 -0
  149. package/dist/src/session/session-store-selector.js.map +1 -0
  150. package/dist/src/session/session-store-selector.test.d.ts +6 -0
  151. package/dist/src/session/session-store-selector.test.js +79 -0
  152. package/dist/src/session/session-store-selector.test.js.map +1 -0
  153. package/dist/src/session/store.d.ts +146 -32
  154. package/dist/src/session/store.js +126 -138
  155. package/dist/src/session/store.js.map +1 -1
  156. package/dist/src/session/store.test.js +385 -107
  157. package/dist/src/session/store.test.js.map +1 -1
  158. package/dist/src/session/tool-context-factory.d.ts +3 -2
  159. package/dist/src/session/tool-context-factory.js +1 -2
  160. package/dist/src/session/tool-context-factory.js.map +1 -1
  161. package/dist/src/session/tool-context-factory.test.js +1 -4
  162. package/dist/src/session/tool-context-factory.test.js.map +1 -1
  163. package/dist/src/session/types.d.ts +13 -6
  164. package/dist/src/stores/schema.d.ts +0 -34
  165. package/dist/src/stores/schema.js +6 -4
  166. package/dist/src/stores/schema.js.map +1 -1
  167. package/dist/src/tools/admin-file-tools.d.ts +29 -0
  168. package/dist/src/tools/admin-file-tools.js +525 -11
  169. package/dist/src/tools/admin-file-tools.js.map +1 -1
  170. package/dist/src/tools/admin-file-tools.test.js +373 -4
  171. package/dist/src/tools/admin-file-tools.test.js.map +1 -1
  172. package/dist/src/tools/custom-tool-adapter.test.js +0 -1
  173. package/dist/src/tools/custom-tool-adapter.test.js.map +1 -1
  174. package/dist/src/tools/dispatch-tool.d.ts +4 -4
  175. package/dist/src/tools/fetch-url-tool.d.ts +23 -0
  176. package/dist/src/tools/fetch-url-tool.js +333 -0
  177. package/dist/src/tools/fetch-url-tool.js.map +1 -0
  178. package/dist/src/tools/fetch-url-tool.test.d.ts +6 -0
  179. package/dist/src/tools/fetch-url-tool.test.js +228 -0
  180. package/dist/src/tools/fetch-url-tool.test.js.map +1 -0
  181. package/dist/src/tools/mcp-tool-adapter.test.js +0 -1
  182. package/dist/src/tools/mcp-tool-adapter.test.js.map +1 -1
  183. package/dist/src/tools/registry.test.js +0 -1
  184. package/dist/src/tools/registry.test.js.map +1 -1
  185. package/dist/src/tools/request-tool.test.js +0 -1
  186. package/dist/src/tools/request-tool.test.js.map +1 -1
  187. package/dist/src/tools/store-tools.test.js +0 -1
  188. package/dist/src/tools/store-tools.test.js.map +1 -1
  189. package/dist/src/tools/types.d.ts +20 -2
  190. package/dist/src/tools/web-search-tool.d.ts +31 -0
  191. package/dist/src/tools/web-search-tool.js +170 -0
  192. package/dist/src/tools/web-search-tool.js.map +1 -0
  193. package/dist/src/tools/web-search-tool.test.d.ts +6 -0
  194. package/dist/src/tools/web-search-tool.test.js +153 -0
  195. package/dist/src/tools/web-search-tool.test.js.map +1 -0
  196. package/dist/src/tools/web-tools-shared.d.ts +21 -0
  197. package/dist/src/tools/web-tools-shared.js +32 -0
  198. package/dist/src/tools/web-tools-shared.js.map +1 -0
  199. package/dist/src/types.d.ts +20 -0
  200. package/dist/src/types.js +13 -0
  201. package/dist/src/types.js.map +1 -1
  202. package/dist/tsconfig.tsbuildinfo +1 -1
  203. package/package.json +17 -3
  204. package/dist/src/agent/session-store.d.ts +0 -71
  205. package/dist/src/agent/session-store.js +0 -151
  206. package/dist/src/agent/session-store.js.map +0 -1
  207. package/dist/src/session/admin-file-tools.d.ts +0 -136
  208. package/dist/src/session/admin-file-tools.js +0 -240
  209. package/dist/src/session/admin-file-tools.js.map +0 -1
@@ -13,26 +13,12 @@
13
13
  import { describe, it, expect, beforeAll, afterAll } from 'vitest';
14
14
  import { fork } from 'node:child_process';
15
15
  import { resolve } from 'node:path';
16
- import { readFileSync, writeFileSync, rmSync } from 'node:fs';
17
- // Load API keys from repo root .env.test if not already set.
18
- // To run smoke tests: create .env.test at the repo root with ANTHROPIC_API_KEY=sk-ant-...
19
- // This file is gitignored never commit API keys.
20
- if (!process.env['ANTHROPIC_API_KEY']) {
21
- try {
22
- const envPath = resolve(__dirname, '../../../../.env.test');
23
- const envContent = readFileSync(envPath, 'utf-8');
24
- for (const line of envContent.split('\n')) {
25
- const match = line.match(/^([^#=]+)=(.*)$/);
26
- if (match) {
27
- const [, key, value] = match;
28
- if (key && value && !process.env[key.trim()]) {
29
- process.env[key.trim()] = value.trim();
30
- }
31
- }
32
- }
33
- }
34
- catch { /* no .env.test — tests will skip */ }
35
- }
16
+ import { readFileSync, writeFileSync, rmSync, readdirSync } from 'node:fs';
17
+ import { expectDoneReason, expectTotalTokens } from './test-helpers.js';
18
+ import { loadTestEnv, defaultTargetName } from './test-env.js';
19
+ // Pull API keys out of <repo-root>/.env.test (gitignored). Missing keys
20
+ // cause the describe block below to skip with a reason.
21
+ loadTestEnv();
36
22
  // ---------------------------------------------------------------------------
37
23
  // Config
38
24
  // ---------------------------------------------------------------------------
@@ -42,6 +28,18 @@ const AGENT_DIR = resolve(__dirname, 'smoke-agent');
42
28
  const REST_SERVER = resolve(__dirname, 'smoke-rest-server.mjs');
43
29
  const MCP_SERVER = resolve(__dirname, 'smoke-mcp-server.mjs');
44
30
  const TIMEOUT = 45_000; // per-test timeout for LLM calls
31
+ const SMOKE_TARGETS = {
32
+ anthropic: { provider: 'anthropic', model: 'claude-sonnet-4-20250514', apiKeyEnv: 'ANTHROPIC_API_KEY' },
33
+ google: { provider: 'google', model: 'gemini-2.5-flash', apiKeyEnv: 'GOOGLE_API_KEY' },
34
+ openai: { provider: 'openai', model: 'gpt-4o-mini', apiKeyEnv: 'OPENAI_API_KEY' },
35
+ groq: { provider: 'groq', model: 'llama-3.3-70b-versatile', apiKeyEnv: 'GROQ_API_KEY' },
36
+ };
37
+ function pickSmokeTarget() {
38
+ const override = process.env['SMOKE_TARGET'];
39
+ const name = override ?? defaultTargetName(SMOKE_TARGETS);
40
+ return { name, target: SMOKE_TARGETS[name] };
41
+ }
42
+ const { name: smokeTargetName, target: smokeTarget } = pickSmokeTarget();
45
43
  // ---------------------------------------------------------------------------
46
44
  // Helpers
47
45
  // ---------------------------------------------------------------------------
@@ -58,10 +56,12 @@ async function waitForServer(port, maxMs = 15_000) {
58
56
  }
59
57
  throw new Error(`Server on port ${port} did not start within ${maxMs}ms`);
60
58
  }
61
- async function chat(message, sessionId) {
59
+ async function chat(message, sessionId, opts) {
62
60
  const body = { message };
63
61
  if (sessionId)
64
62
  body['session_id'] = sessionId;
63
+ if (opts?.maxSessionTokens !== undefined)
64
+ body['max_session_tokens'] = opts.maxSessionTokens;
65
65
  const res = await fetch(`http://localhost:${AGENT_PORT}/chat`, {
66
66
  method: 'POST',
67
67
  headers: { 'Content-Type': 'application/json' },
@@ -102,14 +102,45 @@ function allText(events) {
102
102
  // ---------------------------------------------------------------------------
103
103
  let restServer = null;
104
104
  let agentServer = null;
105
- const skipReason = process.env['ANTHROPIC_API_KEY'] ? '' : 'ANTHROPIC_API_KEY not set';
106
- describe.skipIf(!!skipReason)('smoke tests', () => {
105
+ /** Captures payloads delivered to callback-type targets for assertion. */
106
+ const receivedAutomationResults = [];
107
+ const skipReason = !smokeTarget
108
+ ? `unknown SMOKE_TARGET "${smokeTargetName}"; known: ${Object.keys(SMOKE_TARGETS).join(', ')}`
109
+ : process.env[smokeTarget.apiKeyEnv]
110
+ ? ''
111
+ : `${smokeTarget.apiKeyEnv} not set`;
112
+ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
113
+ // Stash fixture files so afterAll can restore them; otherwise the
114
+ // per-run rewrites (provider + absolute MCP path) leak into the repo.
115
+ const amodalPath = resolve(AGENT_DIR, 'amodal.json');
116
+ const mcpSpecPath = resolve(AGENT_DIR, 'connections/mock-mcp/spec.json');
117
+ const originalAmodalJson = readFileSync(amodalPath, 'utf-8');
118
+ const originalMcpSpec = readFileSync(mcpSpecPath, 'utf-8');
107
119
  beforeAll(async () => {
108
120
  // 0. Nuke prior state — clean slate for every run
109
121
  rmSync(resolve(AGENT_DIR, '.amodal/store-data'), { recursive: true, force: true });
110
- rmSync(resolve(AGENT_DIR, '.amodal/sessions'), { recursive: true, force: true });
111
- // 2. Write MCP server spec with absolute path (loadRepo reads this as-is)
112
- writeFileSync(resolve(AGENT_DIR, 'connections/mock-mcp/spec.json'), JSON.stringify({ protocol: 'mcp', transport: 'stdio', command: 'node', args: [MCP_SERVER] }, null, 2));
122
+ // 1. Rewrite amodal.json with the selected provider/model.
123
+ // smokeTarget is guaranteed defined here skipReason above gates
124
+ // the describe block when it's undefined or missing a key.
125
+ if (!smokeTarget)
126
+ throw new Error('unreachable: smokeTarget is undefined under skipReason guard');
127
+ const amodalConfig = JSON.parse(originalAmodalJson);
128
+ amodalConfig['models'] = {
129
+ main: { provider: smokeTarget.provider, model: smokeTarget.model },
130
+ };
131
+ // Enable web_search + fetch_url tools when a Google API key is available.
132
+ // Key resolution happens in the core config parser via env: prefix.
133
+ if (process.env['GOOGLE_API_KEY']) {
134
+ amodalConfig['webTools'] = {
135
+ provider: 'google',
136
+ apiKey: 'env:GOOGLE_API_KEY',
137
+ model: 'gemini-3-flash-preview',
138
+ };
139
+ }
140
+ writeFileSync(amodalPath, JSON.stringify(amodalConfig, null, 2));
141
+ // 2. Write MCP server spec with absolute path (loadRepo reads this as-is).
142
+ // Restored in afterAll so the env-specific path doesn't leak into git.
143
+ writeFileSync(mcpSpecPath, JSON.stringify({ protocol: 'mcp', transport: 'stdio', command: 'node', args: [MCP_SERVER] }, null, 2));
113
144
  // 3. Start mock REST server
114
145
  restServer = fork(REST_SERVER, [], {
115
146
  env: { ...process.env, SMOKE_REST_PORT: String(REST_PORT) },
@@ -122,6 +153,9 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
122
153
  repoPath: AGENT_DIR,
123
154
  port: AGENT_PORT,
124
155
  hotReload: false,
156
+ onAutomationResult: (payload) => {
157
+ receivedAutomationResults.push(payload);
158
+ },
125
159
  });
126
160
  await agentServer.start();
127
161
  await waitForServer(AGENT_PORT);
@@ -133,6 +167,10 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
133
167
  if (restServer) {
134
168
  restServer.kill('SIGTERM');
135
169
  }
170
+ // Restore fixture files so the per-run rewrites stay test-local and
171
+ // don't show up in git status afterwards.
172
+ writeFileSync(amodalPath, originalAmodalJson);
173
+ writeFileSync(mcpSpecPath, originalMcpSpec);
136
174
  });
137
175
  // -------------------------------------------------------------------------
138
176
  // 1. Server lifecycle
@@ -272,9 +310,205 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
272
310
  const toolStarts = findEvents(events, 'tool_call_start');
273
311
  const readTool = toolStarts.find((e) => e['tool_name'] === 'read_repo_file');
274
312
  expect(readTool).toBeDefined();
313
+ // The matching result should be a success — validates the full
314
+ // tool_call_start → execute → tool_call_result SSE round-trip.
315
+ const toolResults = findEvents(events, 'tool_call_result');
316
+ const readResult = toolResults.find((e) => e['tool_id'] === readTool?.['tool_id']);
317
+ expect(readResult).toBeDefined();
318
+ expect(readResult?.['status']).toBe('success');
275
319
  const responseText = allText(events);
276
320
  expect(responseText.toLowerCase()).toContain('test');
277
321
  }, TIMEOUT);
322
+ // End-to-end: the "reduce emojis in formatting rules" scenario from the
323
+ // admin-agent regression. Before the discovery + edit tools existed, the
324
+ // agent guessed wrong paths and often created a new skill file instead
325
+ // of editing the existing knowledge doc. With list_repo_files /
326
+ // glob_repo_files / grep_repo_files / edit_repo_file available, it
327
+ // should discover knowledge/formatting-rules.md and edit it in place.
328
+ it('admin agent discovers and edits the right file (emoji-reduction scenario)', async () => {
329
+ const formattingRulesPath = resolve(AGENT_DIR, 'knowledge', 'formatting-rules.md');
330
+ const emojiHeavyBody = [
331
+ '# Formatting Rules 🎨',
332
+ '',
333
+ 'Use emojis liberally to make the output more engaging! 🎉🎉🎉',
334
+ '',
335
+ '## Tone 💬',
336
+ '',
337
+ "Drop a 🚀 when celebrating a win, a 🔥 when highlighting risk, and a ✨ when introducing a new feature. Don't hold back! 🙌",
338
+ '',
339
+ 'Every bullet point should start with an emoji. 📝 Every heading should have one too. 🏷️',
340
+ '',
341
+ '## Examples 📚',
342
+ '- ✅ "Deployment succeeded 🎉"',
343
+ '- ❌ "Deployment failed 💥"',
344
+ '',
345
+ ].join('\n');
346
+ const emojiCount = (s) => (s.match(/\p{Emoji_Presentation}/gu) ?? []).length;
347
+ const initialEmojis = emojiCount(emojiHeavyBody);
348
+ expect(initialEmojis).toBeGreaterThan(5);
349
+ writeFileSync(formattingRulesPath, emojiHeavyBody);
350
+ // Snapshot skills/ so we can assert the agent didn't create a bogus skill.
351
+ const skillsDir = resolve(AGENT_DIR, 'skills');
352
+ const skillsBefore = new Set(readdirSync(skillsDir));
353
+ try {
354
+ const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
355
+ method: 'POST',
356
+ headers: { 'Content-Type': 'application/json' },
357
+ body: JSON.stringify({
358
+ message: 'I want to use emojis less often in my formatting rules. Find where they are defined in my repo and reduce the emoji guidance — remove most emoji usage from the instructions, keep the document but make it plain text. Work carefully: first look around to find the right file, then edit it in place. Do not create any new skills.',
359
+ }),
360
+ signal: AbortSignal.timeout(TIMEOUT * 2),
361
+ });
362
+ const text = await res.text();
363
+ const events = parseSSE(text);
364
+ const toolStarts = findEvents(events, 'tool_call_start');
365
+ const toolNames = toolStarts.map((e) => String(e['tool_name']));
366
+ // Discovery: the agent should have used at least one of the new
367
+ // discovery tools to find formatting-rules.md instead of guessing.
368
+ const usedDiscovery = toolNames.some((n) => n === 'list_repo_files' || n === 'glob_repo_files' || n === 'grep_repo_files');
369
+ expect(usedDiscovery).toBe(true);
370
+ // Action: should edit in place, NOT rewrite the whole file or create
371
+ // a new skill. We allow either edit_repo_file (preferred) or
372
+ // write_repo_file targeting the same path (acceptable).
373
+ const editedInPlace = toolNames.includes('edit_repo_file');
374
+ const rewroteFile = toolNames.includes('write_repo_file');
375
+ expect(editedInPlace || rewroteFile).toBe(true);
376
+ // Regression guard: agent must NOT have created a new skill.
377
+ const skillsAfter = new Set(readdirSync(skillsDir));
378
+ const newSkills = [...skillsAfter].filter((s) => !skillsBefore.has(s));
379
+ expect(newSkills).toEqual([]);
380
+ // Outcome: the file should still exist and contain significantly
381
+ // fewer emojis than before.
382
+ const after = readFileSync(formattingRulesPath, 'utf-8');
383
+ expect(after.length).toBeGreaterThan(0);
384
+ const afterEmojis = emojiCount(after);
385
+ expect(afterEmojis).toBeLessThan(initialEmojis);
386
+ }
387
+ finally {
388
+ // Clean up — remove the formatting-rules.md fixture regardless of pass/fail.
389
+ rmSync(formattingRulesPath, { force: true });
390
+ }
391
+ }, TIMEOUT * 2);
392
+ // Pagination end-to-end: drop a 3000-line file with a sentinel on line
393
+ // 2800, ask the admin agent to report what's there verbatim. The default
394
+ // read cap is 2000 lines, so the agent MUST either paginate via offset
395
+ // or use grep. Verifies the new line_start/line_end/total_lines/
396
+ // truncated response shape is actually usable by a real LLM.
397
+ it('admin agent paginates a long file to reach content past the default cap', async () => {
398
+ const bigFilePath = resolve(AGENT_DIR, 'knowledge', 'big-file.md');
399
+ // Sentinel must be distinct enough that the agent can quote it back.
400
+ const SENTINEL = 'TARGET:CONTENT:ABCD1234:the-answer-is-42';
401
+ const TARGET_LINE = 2800;
402
+ const TOTAL_LINES = 3000;
403
+ const body = Array.from({ length: TOTAL_LINES }, (_, i) => {
404
+ const n = i + 1;
405
+ return n === TARGET_LINE ? `line ${String(n)}: ${SENTINEL}` : `line ${String(n)}: filler`;
406
+ }).join('\n');
407
+ writeFileSync(bigFilePath, body);
408
+ try {
409
+ const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
410
+ method: 'POST',
411
+ headers: { 'Content-Type': 'application/json' },
412
+ body: JSON.stringify({
413
+ message: `I just added a long file at knowledge/big-file.md. Tell me exactly what's on line ${String(TARGET_LINE)} — report the full line content verbatim. Just give me the line, no summary.`,
414
+ }),
415
+ signal: AbortSignal.timeout(TIMEOUT * 2),
416
+ });
417
+ const text = await res.text();
418
+ const events = parseSSE(text);
419
+ const toolStarts = findEvents(events, 'tool_call_start');
420
+ const toolNames = toolStarts.map((e) => String(e['tool_name']));
421
+ // The agent needs to touch the file — either read_repo_file or
422
+ // grep_repo_files would work to find the target line.
423
+ const touchedFile = toolNames.some((n) => n === 'read_repo_file' || n === 'grep_repo_files');
424
+ expect(touchedFile).toBe(true);
425
+ // If the agent used read_repo_file, at least one call must have
426
+ // specified an offset/limit that covers line 2800 (the default
427
+ // 2000-line window doesn't reach it, so the agent HAS to adapt).
428
+ const readCalls = toolStarts.filter((e) => e['tool_name'] === 'read_repo_file');
429
+ if (readCalls.length > 0) {
430
+ const usedPagination = readCalls.some((e) => {
431
+ const params = e['parameters'];
432
+ if (!params)
433
+ return false;
434
+ const offset = typeof params['offset'] === 'number' ? params['offset'] : 1;
435
+ const limit = typeof params['limit'] === 'number' ? params['limit'] : 2000;
436
+ // Covers line TARGET_LINE if offset <= TARGET_LINE AND
437
+ // offset + limit - 1 >= TARGET_LINE.
438
+ return offset <= TARGET_LINE && offset + limit - 1 >= TARGET_LINE;
439
+ });
440
+ expect(usedPagination).toBe(true);
441
+ }
442
+ // Hard assertion: the response contains the sentinel verbatim.
443
+ const responseText = allText(events);
444
+ expect(responseText).toContain(SENTINEL);
445
+ }
446
+ finally {
447
+ rmSync(bigFilePath, { force: true });
448
+ }
449
+ }, TIMEOUT * 2);
450
+ // Multi-chunk pagination: sentinels spread across a 5000-line file so no
451
+ // single default read (2000 lines) can cover all of them. Verifies the
452
+ // agent either (a) chains multiple reads following the truncated: true
453
+ // signal, or (b) uses grep. Either is acceptable — what matters is that
454
+ // the agent finds content past the default window.
455
+ it('admin agent finds content scattered across a long file via pagination or grep', async () => {
456
+ const bigFilePath = resolve(AGENT_DIR, 'knowledge', 'scatter.md');
457
+ const MARKER = 'MARKER-ZXCV9876';
458
+ const MARKER_LINES = [500, 2500, 4500];
459
+ const TOTAL_LINES = 5000;
460
+ const body = Array.from({ length: TOTAL_LINES }, (_, i) => {
461
+ const n = i + 1;
462
+ return MARKER_LINES.includes(n) ? `line ${String(n)}: ${MARKER}` : `line ${String(n)}: filler`;
463
+ }).join('\n');
464
+ writeFileSync(bigFilePath, body);
465
+ try {
466
+ const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
467
+ method: 'POST',
468
+ headers: { 'Content-Type': 'application/json' },
469
+ body: JSON.stringify({
470
+ message: `Read knowledge/scatter.md and quote the exact content of line 500, line 2500, and line 4500 verbatim. Report each line's full text.`,
471
+ }),
472
+ signal: AbortSignal.timeout(TIMEOUT * 2),
473
+ });
474
+ const text = await res.text();
475
+ const events = parseSSE(text);
476
+ const toolStarts = findEvents(events, 'tool_call_start');
477
+ const toolNames = toolStarts.map((e) => String(e['tool_name']));
478
+ // Agent must have touched the file.
479
+ const touchedFile = toolNames.some((n) => n === 'read_repo_file' || n === 'grep_repo_files');
480
+ expect(touchedFile).toBe(true);
481
+ // If the agent committed to read-only discovery (no grep), verify at
482
+ // least one read_repo_file call reached past the default 2000-line
483
+ // cap — otherwise it couldn't have seen markers at lines 2500 or
484
+ // 4500. When grep is used first, pagination isn't required because
485
+ // the agent may have used read_repo_file only to confirm a line it
486
+ // already found via grep.
487
+ const usedGrep = toolNames.includes('grep_repo_files');
488
+ const readCalls = toolStarts.filter((e) => e['tool_name'] === 'read_repo_file');
489
+ if (readCalls.length > 0 && !usedGrep) {
490
+ const reachedPastCap = readCalls.some((e) => {
491
+ const params = e['parameters'];
492
+ if (!params)
493
+ return false;
494
+ const offset = typeof params['offset'] === 'number' ? params['offset'] : 1;
495
+ const limit = typeof params['limit'] === 'number' ? params['limit'] : 2000;
496
+ // A single read covers up to line_end = offset + limit - 1.
497
+ return offset + limit - 1 > 2000;
498
+ });
499
+ expect(reachedPastCap).toBe(true);
500
+ }
501
+ // Hard assertion: final response identifies all three marker line
502
+ // numbers. LLMs paraphrase, so search the response for each number.
503
+ const responseText = allText(events);
504
+ for (const n of MARKER_LINES) {
505
+ expect(responseText).toContain(String(n));
506
+ }
507
+ }
508
+ finally {
509
+ rmSync(bigFilePath, { force: true });
510
+ }
511
+ }, TIMEOUT * 2);
278
512
  // -------------------------------------------------------------------------
279
513
  // 10. Write intent enforcement (G8)
280
514
  // -------------------------------------------------------------------------
@@ -372,6 +606,44 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
372
606
  expect(mentionsAny).toBe(true);
373
607
  }, TIMEOUT * 3);
374
608
  // -------------------------------------------------------------------------
609
+ // 11e. Parallel tool calls — batched read-only execution
610
+ // -------------------------------------------------------------------------
611
+ it('batches parallel read-only tool calls in a single turn', async () => {
612
+ // Seed three distinct items with unique names so we can verify the
613
+ // model saw each result.
614
+ const seed = [
615
+ ['parallel-alpha', 'Alpha Marker'],
616
+ ['parallel-beta', 'Beta Marker'],
617
+ ['parallel-gamma', 'Gamma Marker'],
618
+ ];
619
+ const writes = await Promise.all(seed.map(([id, name]) => chat(`Write to test-items store: item_id="${id}", name="${name}", status="active".`)));
620
+ const allWritesOk = writes.every((w) => findEvents(w.events, 'tool_call_result').some((e) => e['status'] === 'success'));
621
+ if (!allWritesOk)
622
+ return; // seeding failed — skip
623
+ // Ask the model to fetch all three in parallel. Models sometimes split
624
+ // this across turns; when they emit a single-turn parallel batch we
625
+ // verify the runtime handled it correctly end-to-end.
626
+ const { events } = await chat('Fetch all three of these items from the test-items store in parallel ' +
627
+ 'using three concurrent query_store tool calls (one per key): ' +
628
+ '"parallel-alpha", "parallel-beta", "parallel-gamma". Then list the name ' +
629
+ 'field of each item in your response.');
630
+ const toolStarts = findEvents(events, 'tool_call_start');
631
+ const toolResults = findEvents(events, 'tool_call_result');
632
+ const queryStoreStarts = toolStarts.filter((e) => e['tool_name'] === 'query_store');
633
+ if (queryStoreStarts.length < 2) {
634
+ // eslint-disable-next-line no-console -- intentional test diagnostic
635
+ console.warn(`[smoke] Model emitted ${String(queryStoreStarts.length)} query_store call(s) — ` +
636
+ 'parallel-batch path not exercised this run (LLM non-determinism)');
637
+ return;
638
+ }
639
+ // Every start must have a matching successful result — batching must
640
+ // not drop events or corrupt SSE ordering. (This is the assertion that
641
+ // actually exercises the batching code path; content coverage of the
642
+ // response is LLM-variable and not the batcher's job.)
643
+ const successResults = toolResults.filter((e) => e['status'] === 'success');
644
+ expect(successResults.length).toBeGreaterThanOrEqual(queryStoreStarts.length);
645
+ }, TIMEOUT * 3);
646
+ // -------------------------------------------------------------------------
375
647
  // 12. Concurrent sessions don't bleed context
376
648
  // -------------------------------------------------------------------------
377
649
  it('concurrent sessions are isolated', async () => {
@@ -558,9 +830,6 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
558
830
  // 23. Sessions — listing and history
559
831
  // -------------------------------------------------------------------------
560
832
  it('sessions endpoint returns a sessions array', async () => {
561
- // Chat sessions in local dev don't auto-populate the legacy session store
562
- // used by /sessions (only automation runs do), so we just verify the
563
- // endpoint returns the expected shape.
564
833
  const res = await fetch(`http://localhost:${AGENT_PORT}/sessions`, { signal: AbortSignal.timeout(5000) });
565
834
  const body = await res.json();
566
835
  expect(res.status).toBe(200);
@@ -570,6 +839,79 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
570
839
  const res = await fetch(`http://localhost:${AGENT_PORT}/session/nonexistent-id`, { signal: AbortSignal.timeout(5000) });
571
840
  expect(res.status).toBe(404);
572
841
  });
842
+ it('persists chat session through full list/get/patch/delete lifecycle', async () => {
843
+ // Full dev-UI session history loop, all served from DrizzleSessionStore.
844
+ const { sessionId } = await chat('Say "ok" in one word.');
845
+ expect(sessionId).toBeTruthy();
846
+ // 1. Session appears in /sessions with the UI response shape
847
+ const listRes = await fetch(`http://localhost:${AGENT_PORT}/sessions`, { signal: AbortSignal.timeout(5000) });
848
+ const listBody = await listRes.json();
849
+ expect(listRes.status).toBe(200);
850
+ const found = listBody.sessions.find((s) => s['id'] === sessionId);
851
+ expect(found).toBeDefined();
852
+ if (!found)
853
+ throw new Error('unreachable');
854
+ expect(found['appId']).toBe('local');
855
+ expect(typeof found['summary']).toBe('string');
856
+ expect(String(found['summary']).length).toBeGreaterThan(0);
857
+ expect(typeof found['createdAt']).toBe('number');
858
+ expect(typeof found['lastAccessedAt']).toBe('number');
859
+ expect(found['automationName']).toBeUndefined();
860
+ // 2. /session/:id returns the conversation history
861
+ const getRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, { signal: AbortSignal.timeout(5000) });
862
+ const getBody = await getRes.json();
863
+ expect(getRes.status).toBe(200);
864
+ expect(getBody.session_id).toBe(sessionId);
865
+ expect(getBody.messages.length).toBeGreaterThan(0);
866
+ expect(getBody.messages[0].role).toBe('user');
867
+ expect(getBody.messages[0].text).toContain('Say "ok"');
868
+ // 3. PATCH title updates metadata and is visible on subsequent list
869
+ const patchRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
870
+ method: 'PATCH',
871
+ headers: { 'Content-Type': 'application/json' },
872
+ body: JSON.stringify({ title: 'smoke renamed' }),
873
+ signal: AbortSignal.timeout(5000),
874
+ });
875
+ expect(patchRes.status).toBe(200);
876
+ const list2Res = await fetch(`http://localhost:${AGENT_PORT}/sessions`, { signal: AbortSignal.timeout(5000) });
877
+ const list2Body = await list2Res.json();
878
+ const renamed = list2Body.sessions.find((s) => s['id'] === sessionId);
879
+ expect(renamed?.['title']).toBe('smoke renamed');
880
+ expect(renamed?.['summary']).toBe('smoke renamed');
881
+ // 4. DELETE removes the session, subsequent GET 404s
882
+ const delRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
883
+ method: 'DELETE',
884
+ signal: AbortSignal.timeout(5000),
885
+ });
886
+ expect(delRes.status).toBe(200);
887
+ const getAfterDelRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, { signal: AbortSignal.timeout(5000) });
888
+ expect(getAfterDelRes.status).toBe(404);
889
+ }, TIMEOUT);
890
+ it('preserves tool-call chips in /session/:id history', async () => {
891
+ // Tool calls appear as {type: 'tool-call'} parts in the assistant's
892
+ // ModelMessage.content — flattenModelMessage should surface them to
893
+ // the UI as toolCalls[]. Without this, the dev-UI chat history panel
894
+ // renders the assistant's reply but drops the tool-call chips.
895
+ const { sessionId } = await chat('Use the request tool to GET /items from the mock-api connection with intent "read".');
896
+ expect(sessionId).toBeTruthy();
897
+ const getRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, { signal: AbortSignal.timeout(5000) });
898
+ const getBody = await getRes.json();
899
+ expect(getRes.status).toBe(200);
900
+ const assistantWithTools = getBody.messages.find((m) => m.role === 'assistant' && m.toolCalls && m.toolCalls.length > 0);
901
+ // Soft assertion: the model may choose not to call tools on any given
902
+ // turn (LLM non-determinism). When it does, the toolCall round-trip
903
+ // must work end-to-end.
904
+ if (assistantWithTools?.toolCalls) {
905
+ const call = assistantWithTools.toolCalls[0];
906
+ expect(call.toolId).toBeTruthy();
907
+ expect(call.toolName).toBeTruthy();
908
+ expect(typeof call.parameters).toBe('object');
909
+ }
910
+ else {
911
+ // eslint-disable-next-line no-console -- intentional test diagnostic
912
+ console.warn('[smoke] Model did not call a tool for the request prompt — LLM non-determinism, skipping toolCall round-trip assertion');
913
+ }
914
+ }, TIMEOUT);
573
915
  // -------------------------------------------------------------------------
574
916
  // 24. Files — browser and editor
575
917
  // -------------------------------------------------------------------------
@@ -699,6 +1041,268 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
699
1041
  });
700
1042
  expect(res.status).toBe(400);
701
1043
  });
1044
+ // -------------------------------------------------------------------------
1045
+ // Agent loop safety features (budget, done reason)
1046
+ // -------------------------------------------------------------------------
1047
+ it('done event carries reason=model_stop on normal completion', async () => {
1048
+ const { events } = await chat('Reply with just the word "ok".');
1049
+ expectDoneReason(events, 'model_stop');
1050
+ });
1051
+ it('max_session_tokens budget terminates the loop with reason=budget_exceeded', async () => {
1052
+ // 200 tokens is well below what any single-turn + tool-call response
1053
+ // will consume, so the budget check fires after the first turn.
1054
+ const { events } = await chat('Echo these strings one at a time, calling echo_tool for each: alpha, bravo, charlie, delta, echo, foxtrot.', undefined, { maxSessionTokens: 200 });
1055
+ expectDoneReason(events, 'budget_exceeded');
1056
+ expectTotalTokens(events, { atLeast: 200 });
1057
+ });
1058
+ // -------------------------------------------------------------------------
1059
+ // 25. Runtime event bus (/api/events SSE stream)
1060
+ // -------------------------------------------------------------------------
1061
+ it('emits session_created when a new chat session is created', async () => {
1062
+ const stream = await openEventStream();
1063
+ try {
1064
+ const chatResult = await chat('Say "hi" and nothing else.');
1065
+ const event = await stream.waitFor((e) => e['type'] === 'session_created' && e['sessionId'] === chatResult.sessionId, TIMEOUT);
1066
+ expect(event['type']).toBe('session_created');
1067
+ expect(event['sessionId']).toBe(chatResult.sessionId);
1068
+ expect(event['seq']).toBeGreaterThan(0);
1069
+ expect(typeof event['timestamp']).toBe('string');
1070
+ }
1071
+ finally {
1072
+ stream.close();
1073
+ }
1074
+ }, TIMEOUT);
1075
+ it('emits session_updated on follow-up messages in an existing session', async () => {
1076
+ const first = await chat('Remember the number 7.');
1077
+ const stream = await openEventStream();
1078
+ try {
1079
+ await chat('Reply with just "ok".', first.sessionId);
1080
+ const event = await stream.waitFor((e) => e['type'] === 'session_updated' && e['sessionId'] === first.sessionId, TIMEOUT);
1081
+ expect(event['type']).toBe('session_updated');
1082
+ expect(event['sessionId']).toBe(first.sessionId);
1083
+ }
1084
+ finally {
1085
+ stream.close();
1086
+ }
1087
+ }, TIMEOUT * 2);
1088
+ it('emits session_deleted when a session is DELETEd', async () => {
1089
+ const { sessionId } = await chat('Say "ok".');
1090
+ const stream = await openEventStream();
1091
+ try {
1092
+ const res = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
1093
+ method: 'DELETE',
1094
+ signal: AbortSignal.timeout(5000),
1095
+ });
1096
+ expect(res.status).toBe(200);
1097
+ const event = await stream.waitFor((e) => e['type'] === 'session_deleted' && e['sessionId'] === sessionId, 5000);
1098
+ expect(event['type']).toBe('session_deleted');
1099
+ expect(event['sessionId']).toBe(sessionId);
1100
+ }
1101
+ finally {
1102
+ stream.close();
1103
+ }
1104
+ }, TIMEOUT);
1105
+ it('emits automation_triggered and automation_completed on manual run', async () => {
1106
+ // The automation's registered name is derived from the filename
1107
+ // (automations/test-auto.md → "test-auto"), not the frontmatter.
1108
+ const automationName = 'test-auto';
1109
+ const stream = await openEventStream();
1110
+ try {
1111
+ const runPromise = fetch(`http://localhost:${AGENT_PORT}/automations/${automationName}/run`, {
1112
+ method: 'POST',
1113
+ headers: { 'Content-Type': 'application/json' },
1114
+ body: '{}',
1115
+ signal: AbortSignal.timeout(TIMEOUT),
1116
+ });
1117
+ const triggered = await stream.waitFor((e) => e['type'] === 'automation_triggered' && e['name'] === automationName, 5000);
1118
+ expect(triggered['source']).toBeDefined();
1119
+ const completed = await stream.waitFor((e) => (e['type'] === 'automation_completed' || e['type'] === 'automation_failed') &&
1120
+ e['name'] === automationName, TIMEOUT);
1121
+ expect(completed['type']).toBe('automation_completed');
1122
+ expect(typeof completed['durationMs']).toBe('number');
1123
+ const runRes = await runPromise;
1124
+ expect([200, 500]).toContain(runRes.status);
1125
+ }
1126
+ finally {
1127
+ stream.close();
1128
+ }
1129
+ }, TIMEOUT + 10_000);
1130
+ it('delivers automation result via onAutomationResult callback (full chain)', async () => {
1131
+ // This test verifies the full wiring:
1132
+ // automation runs → DeliveryRouter dispatches callback target →
1133
+ // LocalServerConfig.onAutomationResult fires with the payload.
1134
+ //
1135
+ // The delivery-callback-test automation has `delivery.targets: [{ type:
1136
+ // 'callback' }]` with a template. When it completes, our onAutomationResult
1137
+ // (configured in beforeAll) should receive the full DeliveryPayload.
1138
+ const before = receivedAutomationResults.length;
1139
+ const runRes = await fetch(`http://localhost:${AGENT_PORT}/automations/delivery-callback-test/run`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: '{}', signal: AbortSignal.timeout(TIMEOUT) });
1140
+ expect([200, 500]).toContain(runRes.status);
1141
+ // Poll briefly for the callback to fire (delivery happens after runMessage drains)
1142
+ const deadline = Date.now() + 5000;
1143
+ while (receivedAutomationResults.length === before && Date.now() < deadline) {
1144
+ await new Promise((r) => setTimeout(r, 100));
1145
+ }
1146
+ expect(receivedAutomationResults.length).toBeGreaterThan(before);
1147
+ const payload = receivedAutomationResults[receivedAutomationResults.length - 1];
1148
+ expect(payload?.['automation']).toBe('delivery-callback-test');
1149
+ expect(payload?.['status']).toBe('success');
1150
+ // Template renders with {{automation}} built-in; {{count}} should come
1151
+ // from the JSON result (soft-check since LLM output varies slightly).
1152
+ const message = String(payload?.['message'] ?? '');
1153
+ expect(message).toContain('delivery-callback-test');
1154
+ }, TIMEOUT + 10_000);
1155
+ it('replays buffered events via Last-Event-ID on reconnect', async () => {
1156
+ // Produce at least one event, capture its seq, disconnect, reconnect
1157
+ // with Last-Event-ID set to seq-1, and verify we get the event back.
1158
+ const firstStream = await openEventStream();
1159
+ let capturedSeq = 0;
1160
+ try {
1161
+ await chat('Say "ok".');
1162
+ const event = await firstStream.waitFor((e) => e['type'] === 'session_created', TIMEOUT);
1163
+ capturedSeq = Number(event['seq']);
1164
+ expect(capturedSeq).toBeGreaterThan(0);
1165
+ }
1166
+ finally {
1167
+ firstStream.close();
1168
+ }
1169
+ const replayStream = await openEventStream({ lastEventId: String(capturedSeq - 1) });
1170
+ try {
1171
+ const replayed = await replayStream.waitFor((e) => Number(e['seq']) === capturedSeq, 5000);
1172
+ expect(Number(replayed['seq'])).toBe(capturedSeq);
1173
+ }
1174
+ finally {
1175
+ replayStream.close();
1176
+ }
1177
+ }, TIMEOUT);
1178
+ it('emits session_updated when title is PATCHed', async () => {
1179
+ const { sessionId } = await chat('Say "ok".');
1180
+ const stream = await openEventStream();
1181
+ try {
1182
+ const res = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
1183
+ method: 'PATCH',
1184
+ headers: { 'Content-Type': 'application/json' },
1185
+ body: JSON.stringify({ title: 'my renamed session' }),
1186
+ signal: AbortSignal.timeout(5000),
1187
+ });
1188
+ expect(res.status).toBe(200);
1189
+ const event = await stream.waitFor((e) => e['type'] === 'session_updated' && e['sessionId'] === sessionId && e['title'] === 'my renamed session', 5000);
1190
+ expect(event['title']).toBe('my renamed session');
1191
+ }
1192
+ finally {
1193
+ stream.close();
1194
+ }
1195
+ }, TIMEOUT);
1196
+ it('emits store_updated when a tool writes to a store', async () => {
1197
+ const stream = await openEventStream();
1198
+ try {
1199
+ // Ask the agent to write to test-items store. Agent non-determinism
1200
+ // means it might not actually call the tool; we soft-check the event.
1201
+ await chat('Write an item to the test-items store with id="evt-smoke-1" and name="smoke event test".');
1202
+ const event = stream.events.find((e) => e['type'] === 'store_updated' && e['storeName'] === 'test-items');
1203
+ if (event) {
1204
+ expect(event['operation']).toBe('put');
1205
+ }
1206
+ else {
1207
+ // Model may have chosen not to call the store tool — this test is
1208
+ // soft (logged, not asserted) because it depends on LLM behavior.
1209
+ // eslint-disable-next-line no-console -- intentional test diagnostic
1210
+ console.warn('[smoke] store_updated not emitted — LLM may have declined to call store_write');
1211
+ }
1212
+ }
1213
+ finally {
1214
+ stream.close();
1215
+ }
1216
+ }, TIMEOUT);
1217
+ it('emits store_updated when a direct REST write happens', async () => {
1218
+ // This path doesn't depend on the LLM — assertable hard.
1219
+ const stream = await openEventStream();
1220
+ try {
1221
+ const res = await fetch(`http://localhost:${AGENT_PORT}/api/stores/test-items`, {
1222
+ method: 'POST',
1223
+ headers: { 'Content-Type': 'application/json' },
1224
+ body: JSON.stringify({ id: 'rest-smoke-1', name: 'direct rest write' }),
1225
+ signal: AbortSignal.timeout(5000),
1226
+ });
1227
+ expect(res.status).toBe(201);
1228
+ const event = await stream.waitFor((e) => e['type'] === 'store_updated' && e['storeName'] === 'test-items' && e['operation'] === 'put', 5000);
1229
+ expect(event['operation']).toBe('put');
1230
+ }
1231
+ finally {
1232
+ stream.close();
1233
+ }
1234
+ }, TIMEOUT);
1235
+ it('emits automation_started and automation_stopped', async () => {
1236
+ // The smoke agent's test-auto has no cron schedule, so start will fail.
1237
+ // That's fine — we want to verify the happy path when a schedulable
1238
+ // automation exists. Skip if none are available.
1239
+ const listRes = await fetch(`http://localhost:${AGENT_PORT}/automations`);
1240
+ const listBody = await listRes.json();
1241
+ const schedulable = listBody.automations.find((a) => a.schedule);
1242
+ if (!schedulable) {
1243
+ return; // smoke agent has no scheduled automation — skip
1244
+ }
1245
+ const stream = await openEventStream();
1246
+ try {
1247
+ const startRes = await fetch(`http://localhost:${AGENT_PORT}/automations/${schedulable.name}/start`, { method: 'POST', signal: AbortSignal.timeout(5000) });
1248
+ if (startRes.status !== 200)
1249
+ return; // not a schedulable automation
1250
+ const started = await stream.waitFor((e) => e['type'] === 'automation_started' && e['name'] === schedulable.name, 5000);
1251
+ expect(typeof started['intervalMs']).toBe('number');
1252
+ await fetch(`http://localhost:${AGENT_PORT}/automations/${schedulable.name}/stop`, { method: 'POST', signal: AbortSignal.timeout(5000) });
1253
+ const stopped = await stream.waitFor((e) => e['type'] === 'automation_stopped' && e['name'] === schedulable.name, 5000);
1254
+ expect(stopped['name']).toBe(schedulable.name);
1255
+ }
1256
+ finally {
1257
+ stream.close();
1258
+ }
1259
+ }, TIMEOUT);
1260
+ it('fans out the same event to all concurrent clients (two-tab case)', async () => {
1261
+ // Two independent SSE connections — the "two browser tabs" scenario.
1262
+ // Every event emitted by the server should reach BOTH clients with
1263
+ // the same seq number.
1264
+ const [s1, s2] = await Promise.all([openEventStream(), openEventStream()]);
1265
+ try {
1266
+ const chatResult = await chat('Say "ok".');
1267
+ const [e1, e2] = await Promise.all([
1268
+ s1.waitFor((e) => e['type'] === 'session_created' && e['sessionId'] === chatResult.sessionId, TIMEOUT),
1269
+ s2.waitFor((e) => e['type'] === 'session_created' && e['sessionId'] === chatResult.sessionId, TIMEOUT),
1270
+ ]);
1271
+ // Same logical event reached both clients
1272
+ expect(e1['seq']).toBe(e2['seq']);
1273
+ expect(e1['timestamp']).toBe(e2['timestamp']);
1274
+ expect(e1['sessionId']).toBe(e2['sessionId']);
1275
+ }
1276
+ finally {
1277
+ s1.close();
1278
+ s2.close();
1279
+ }
1280
+ }, TIMEOUT);
1281
+ // -------------------------------------------------------------------------
1282
+ // Web tools (web_search, fetch_url) — gated on GOOGLE_API_KEY.
1283
+ //
1284
+ // When the smoke target is Anthropic/OpenAI but GOOGLE_API_KEY is set,
1285
+ // these tests exercise the cross-provider case: the main agent runs on
1286
+ // one provider, but web_search routes through the dedicated Gemini
1287
+ // backend. beforeAll injects the webTools config when the key is set.
1288
+ // -------------------------------------------------------------------------
1289
+ const hasGoogleKey = !!process.env['GOOGLE_API_KEY'];
1290
+ it.skipIf(!hasGoogleKey)('web_search tool is invoked for a current-information question', async () => {
1291
+ const { events } = await chat('Use the web_search tool to find an authoritative source for the current stable version of Node.js. Reply with just the version number.');
1292
+ const toolStarts = findEvents(events, 'tool_call_start');
1293
+ const toolResults = findEvents(events, 'tool_call_result');
1294
+ const webSearchStart = toolStarts.find((e) => e['tool_name'] === 'web_search');
1295
+ expect(webSearchStart).toBeDefined();
1296
+ // The matching result for that tool_id should be a success.
1297
+ const toolId = webSearchStart?.['tool_id'];
1298
+ const webSearchResult = toolResults.find((e) => e['tool_id'] === toolId);
1299
+ expect(webSearchResult).toBeDefined();
1300
+ expect(webSearchResult?.['status']).toBe('success');
1301
+ // The session should finish normally with text output.
1302
+ const done = findEvent(events, 'done');
1303
+ expect(done?.['reason']).toBe('model_stop');
1304
+ expect(allText(events).length).toBeGreaterThan(0);
1305
+ }, TIMEOUT);
702
1306
  });
703
1307
  // ---------------------------------------------------------------------------
704
1308
  // SSE parser helper
@@ -715,4 +1319,86 @@ function parseSSE(text) {
715
1319
  }
716
1320
  return events;
717
1321
  }
1322
+ async function openEventStream(options = {}) {
1323
+ const controller = new AbortController();
1324
+ const headers = { Accept: 'text/event-stream' };
1325
+ if (options.lastEventId)
1326
+ headers['Last-Event-ID'] = options.lastEventId;
1327
+ const res = await fetch(`http://localhost:${AGENT_PORT}/api/events`, {
1328
+ headers,
1329
+ signal: controller.signal,
1330
+ });
1331
+ if (!res.body)
1332
+ throw new Error('no response body from /api/events');
1333
+ const events = [];
1334
+ const waiters = [];
1335
+ // Drain the stream in the background, parsing SSE frames. Push each event
1336
+ // to the events array and notify any waiting predicates.
1337
+ const reader = res.body.getReader();
1338
+ const decoder = new TextDecoder();
1339
+ let buffer = '';
1340
+ let draining = true;
1341
+ void (async () => {
1342
+ try {
1343
+ while (draining) {
1344
+ const { done, value } = await reader.read();
1345
+ if (done)
1346
+ break;
1347
+ buffer += decoder.decode(value, { stream: true });
1348
+ let idx;
1349
+ while ((idx = buffer.indexOf('\n\n')) !== -1) {
1350
+ const frame = buffer.slice(0, idx);
1351
+ buffer = buffer.slice(idx + 2);
1352
+ const dataLine = frame.split('\n').find((l) => l.startsWith('data: '));
1353
+ if (!dataLine)
1354
+ continue;
1355
+ try {
1356
+ const event = JSON.parse(dataLine.slice(6));
1357
+ events.push(event);
1358
+ // Notify any matching waiters (iterate a snapshot; matching
1359
+ // waiters are removed from the queue).
1360
+ for (let i = waiters.length - 1; i >= 0; i--) {
1361
+ const waiter = waiters[i];
1362
+ if (waiter && waiter.predicate(event)) {
1363
+ waiters.splice(i, 1);
1364
+ waiter.resolve(event);
1365
+ }
1366
+ }
1367
+ }
1368
+ catch { /* malformed frame */ }
1369
+ }
1370
+ }
1371
+ }
1372
+ catch { /* aborted or connection closed */ }
1373
+ })();
1374
+ return {
1375
+ events,
1376
+ waitFor(predicate, timeoutMs = 5000) {
1377
+ // Check already-buffered events first
1378
+ const already = events.find(predicate);
1379
+ if (already)
1380
+ return Promise.resolve(already);
1381
+ return new Promise((resolve, reject) => {
1382
+ const timer = setTimeout(() => {
1383
+ const idx = waiters.findIndex((w) => w.predicate === predicate);
1384
+ if (idx !== -1)
1385
+ waiters.splice(idx, 1);
1386
+ reject(new Error(`waitFor timed out after ${String(timeoutMs)}ms`));
1387
+ }, timeoutMs);
1388
+ waiters.push({
1389
+ predicate,
1390
+ resolve: (event) => {
1391
+ clearTimeout(timer);
1392
+ resolve(event);
1393
+ },
1394
+ });
1395
+ });
1396
+ },
1397
+ close() {
1398
+ draining = false;
1399
+ controller.abort();
1400
+ reader.cancel().catch(() => { });
1401
+ },
1402
+ };
1403
+ }
718
1404
  //# sourceMappingURL=smoke.test.js.map