@amodalai/runtime 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/__fixtures__/README.md +4 -0
- package/dist/src/__fixtures__/e2e.test.d.ts +6 -0
- package/dist/src/__fixtures__/e2e.test.js +211 -0
- package/dist/src/__fixtures__/e2e.test.js.map +1 -0
- package/dist/src/__fixtures__/smoke-agent/automations/delivery-callback-test.json +9 -0
- package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/spec.json +1 -1
- package/dist/src/__fixtures__/smoke.test.js +715 -29
- package/dist/src/__fixtures__/smoke.test.js.map +1 -1
- package/dist/src/__fixtures__/test-env.d.ts +27 -0
- package/dist/src/__fixtures__/test-env.js +64 -0
- package/dist/src/__fixtures__/test-env.js.map +1 -0
- package/dist/src/__fixtures__/test-helpers.d.ts +30 -0
- package/dist/src/__fixtures__/test-helpers.js +120 -0
- package/dist/src/__fixtures__/test-helpers.js.map +1 -0
- package/dist/src/agent/agent-types.d.ts +22 -0
- package/dist/src/agent/agent-types.js.map +1 -1
- package/dist/src/agent/automation-bridge.d.ts +9 -0
- package/dist/src/agent/automation-bridge.js +26 -0
- package/dist/src/agent/automation-bridge.js.map +1 -1
- package/dist/src/agent/automation-bridge.test.js +63 -0
- package/dist/src/agent/automation-bridge.test.js.map +1 -1
- package/dist/src/agent/local-server.d.ts +0 -7
- package/dist/src/agent/local-server.js +230 -86
- package/dist/src/agent/local-server.js.map +1 -1
- package/dist/src/agent/local-server.test.js +14 -8
- package/dist/src/agent/local-server.test.js.map +1 -1
- package/dist/src/agent/loop-types.d.ts +81 -2
- package/dist/src/agent/loop-types.js +4 -0
- package/dist/src/agent/loop-types.js.map +1 -1
- package/dist/src/agent/loop.js +16 -3
- package/dist/src/agent/loop.js.map +1 -1
- package/dist/src/agent/loop.test.js +572 -8
- package/dist/src/agent/loop.test.js.map +1 -1
- package/dist/src/agent/proactive/delivery-router.d.ts +68 -0
- package/dist/src/agent/proactive/delivery-router.js +337 -0
- package/dist/src/agent/proactive/delivery-router.js.map +1 -0
- package/dist/src/agent/proactive/delivery-router.test.d.ts +6 -0
- package/dist/src/agent/proactive/delivery-router.test.js +455 -0
- package/dist/src/agent/proactive/delivery-router.test.js.map +1 -0
- package/dist/src/agent/proactive/proactive-runner.d.ts +23 -1
- package/dist/src/agent/proactive/proactive-runner.js +42 -10
- package/dist/src/agent/proactive/proactive-runner.js.map +1 -1
- package/dist/src/agent/proactive/proactive-runner.test.js +0 -2
- package/dist/src/agent/proactive/proactive-runner.test.js.map +1 -1
- package/dist/src/agent/routes/admin-chat-abort.test.d.ts +6 -0
- package/dist/src/agent/routes/admin-chat-abort.test.js +206 -0
- package/dist/src/agent/routes/admin-chat-abort.test.js.map +1 -0
- package/dist/src/agent/routes/admin-chat.js +0 -2
- package/dist/src/agent/routes/admin-chat.js.map +1 -1
- package/dist/src/agent/routes/task.test.js +0 -2
- package/dist/src/agent/routes/task.test.js.map +1 -1
- package/dist/src/agent/snapshot-server.js +0 -2
- package/dist/src/agent/snapshot-server.js.map +1 -1
- package/dist/src/agent/states/compacting.js +5 -3
- package/dist/src/agent/states/compacting.js.map +1 -1
- package/dist/src/agent/states/confirming.js +3 -0
- package/dist/src/agent/states/confirming.js.map +1 -1
- package/dist/src/agent/states/dispatching.js +45 -1
- package/dist/src/agent/states/dispatching.js.map +1 -1
- package/dist/src/agent/states/executing.js +225 -81
- package/dist/src/agent/states/executing.js.map +1 -1
- package/dist/src/agent/states/streaming.js +14 -0
- package/dist/src/agent/states/streaming.js.map +1 -1
- package/dist/src/agent/states/thinking.d.ts +1 -1
- package/dist/src/agent/states/thinking.js +246 -29
- package/dist/src/agent/states/thinking.js.map +1 -1
- package/dist/src/agent/token-estimate.d.ts +20 -6
- package/dist/src/agent/token-estimate.js +24 -3
- package/dist/src/agent/token-estimate.js.map +1 -1
- package/dist/src/agent/token-estimate.test.d.ts +6 -0
- package/dist/src/agent/token-estimate.test.js +44 -0
- package/dist/src/agent/token-estimate.test.js.map +1 -0
- package/dist/src/api/create-agent.js +0 -3
- package/dist/src/api/create-agent.js.map +1 -1
- package/dist/src/api/types.d.ts +0 -2
- package/dist/src/env-ref.d.ts +13 -0
- package/dist/src/env-ref.js +31 -0
- package/dist/src/env-ref.js.map +1 -0
- package/dist/src/env-ref.test.d.ts +6 -0
- package/dist/src/env-ref.test.js +34 -0
- package/dist/src/env-ref.test.js.map +1 -0
- package/dist/src/errors.d.ts +15 -0
- package/dist/src/errors.js +22 -0
- package/dist/src/errors.js.map +1 -1
- package/dist/src/errors.test.js +2 -2
- package/dist/src/errors.test.js.map +1 -1
- package/dist/src/events/event-bus.d.ts +54 -0
- package/dist/src/events/event-bus.js +84 -0
- package/dist/src/events/event-bus.js.map +1 -0
- package/dist/src/events/event-bus.test.d.ts +6 -0
- package/dist/src/events/event-bus.test.js +112 -0
- package/dist/src/events/event-bus.test.js.map +1 -0
- package/dist/src/events/events-route.d.ts +36 -0
- package/dist/src/events/events-route.js +80 -0
- package/dist/src/events/events-route.js.map +1 -0
- package/dist/src/events/events-route.test.d.ts +6 -0
- package/dist/src/events/events-route.test.js +134 -0
- package/dist/src/events/events-route.test.js.map +1 -0
- package/dist/src/events/store-event-wrapper.d.ts +19 -0
- package/dist/src/events/store-event-wrapper.js +57 -0
- package/dist/src/events/store-event-wrapper.js.map +1 -0
- package/dist/src/events/store-event-wrapper.test.d.ts +6 -0
- package/dist/src/events/store-event-wrapper.test.js +91 -0
- package/dist/src/events/store-event-wrapper.test.js.map +1 -0
- package/dist/src/middleware/auth.d.ts +0 -2
- package/dist/src/middleware/auth.js.map +1 -1
- package/dist/src/providers/search-provider.d.ts +64 -0
- package/dist/src/providers/search-provider.js +174 -0
- package/dist/src/providers/search-provider.js.map +1 -0
- package/dist/src/providers/types.d.ts +8 -0
- package/dist/src/routes/ai-stream.d.ts +15 -0
- package/dist/src/routes/ai-stream.js +9 -0
- package/dist/src/routes/ai-stream.js.map +1 -1
- package/dist/src/routes/chat-stream.d.ts +6 -0
- package/dist/src/routes/chat-stream.js +2 -0
- package/dist/src/routes/chat-stream.js.map +1 -1
- package/dist/src/routes/chat.d.ts +6 -0
- package/dist/src/routes/chat.js +2 -0
- package/dist/src/routes/chat.js.map +1 -1
- package/dist/src/routes/session-resolver.d.ts +5 -0
- package/dist/src/routes/session-resolver.js +1 -15
- package/dist/src/routes/session-resolver.js.map +1 -1
- package/dist/src/routes/session-resolver.test.js +7 -6
- package/dist/src/routes/session-resolver.test.js.map +1 -1
- package/dist/src/server.d.ts +6 -0
- package/dist/src/server.js +2 -0
- package/dist/src/server.js.map +1 -1
- package/dist/src/session/drizzle-session-store.d.ts +56 -0
- package/dist/src/session/drizzle-session-store.js +203 -0
- package/dist/src/session/drizzle-session-store.js.map +1 -0
- package/dist/src/session/manager.d.ts +6 -3
- package/dist/src/session/manager.js +46 -16
- package/dist/src/session/manager.js.map +1 -1
- package/dist/src/session/manager.test.js +12 -18
- package/dist/src/session/manager.test.js.map +1 -1
- package/dist/src/session/pglite-session-store.d.ts +23 -0
- package/dist/src/session/pglite-session-store.js +70 -0
- package/dist/src/session/pglite-session-store.js.map +1 -0
- package/dist/src/session/postgres-session-store.d.ts +44 -0
- package/dist/src/session/postgres-session-store.js +138 -0
- package/dist/src/session/postgres-session-store.js.map +1 -0
- package/dist/src/session/session-builder.d.ts +0 -2
- package/dist/src/session/session-builder.js +22 -2
- package/dist/src/session/session-builder.js.map +1 -1
- package/dist/src/session/session-builder.test.js +0 -2
- package/dist/src/session/session-builder.test.js.map +1 -1
- package/dist/src/session/session-store-selector.d.ts +49 -0
- package/dist/src/session/session-store-selector.js +60 -0
- package/dist/src/session/session-store-selector.js.map +1 -0
- package/dist/src/session/session-store-selector.test.d.ts +6 -0
- package/dist/src/session/session-store-selector.test.js +79 -0
- package/dist/src/session/session-store-selector.test.js.map +1 -0
- package/dist/src/session/store.d.ts +146 -32
- package/dist/src/session/store.js +126 -138
- package/dist/src/session/store.js.map +1 -1
- package/dist/src/session/store.test.js +385 -107
- package/dist/src/session/store.test.js.map +1 -1
- package/dist/src/session/tool-context-factory.d.ts +3 -2
- package/dist/src/session/tool-context-factory.js +1 -2
- package/dist/src/session/tool-context-factory.js.map +1 -1
- package/dist/src/session/tool-context-factory.test.js +1 -4
- package/dist/src/session/tool-context-factory.test.js.map +1 -1
- package/dist/src/session/types.d.ts +13 -6
- package/dist/src/stores/schema.d.ts +0 -34
- package/dist/src/stores/schema.js +6 -4
- package/dist/src/stores/schema.js.map +1 -1
- package/dist/src/tools/admin-file-tools.d.ts +29 -0
- package/dist/src/tools/admin-file-tools.js +525 -11
- package/dist/src/tools/admin-file-tools.js.map +1 -1
- package/dist/src/tools/admin-file-tools.test.js +373 -4
- package/dist/src/tools/admin-file-tools.test.js.map +1 -1
- package/dist/src/tools/custom-tool-adapter.test.js +0 -1
- package/dist/src/tools/custom-tool-adapter.test.js.map +1 -1
- package/dist/src/tools/dispatch-tool.d.ts +4 -4
- package/dist/src/tools/fetch-url-tool.d.ts +23 -0
- package/dist/src/tools/fetch-url-tool.js +333 -0
- package/dist/src/tools/fetch-url-tool.js.map +1 -0
- package/dist/src/tools/fetch-url-tool.test.d.ts +6 -0
- package/dist/src/tools/fetch-url-tool.test.js +228 -0
- package/dist/src/tools/fetch-url-tool.test.js.map +1 -0
- package/dist/src/tools/mcp-tool-adapter.test.js +0 -1
- package/dist/src/tools/mcp-tool-adapter.test.js.map +1 -1
- package/dist/src/tools/registry.test.js +0 -1
- package/dist/src/tools/registry.test.js.map +1 -1
- package/dist/src/tools/request-tool.test.js +0 -1
- package/dist/src/tools/request-tool.test.js.map +1 -1
- package/dist/src/tools/store-tools.test.js +0 -1
- package/dist/src/tools/store-tools.test.js.map +1 -1
- package/dist/src/tools/types.d.ts +20 -2
- package/dist/src/tools/web-search-tool.d.ts +31 -0
- package/dist/src/tools/web-search-tool.js +170 -0
- package/dist/src/tools/web-search-tool.js.map +1 -0
- package/dist/src/tools/web-search-tool.test.d.ts +6 -0
- package/dist/src/tools/web-search-tool.test.js +153 -0
- package/dist/src/tools/web-search-tool.test.js.map +1 -0
- package/dist/src/tools/web-tools-shared.d.ts +21 -0
- package/dist/src/tools/web-tools-shared.js +32 -0
- package/dist/src/tools/web-tools-shared.js.map +1 -0
- package/dist/src/types.d.ts +20 -0
- package/dist/src/types.js +13 -0
- package/dist/src/types.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +17 -3
- package/dist/src/agent/session-store.d.ts +0 -71
- package/dist/src/agent/session-store.js +0 -151
- package/dist/src/agent/session-store.js.map +0 -1
- package/dist/src/session/admin-file-tools.d.ts +0 -136
- package/dist/src/session/admin-file-tools.js +0 -240
- package/dist/src/session/admin-file-tools.js.map +0 -1
|
@@ -13,26 +13,12 @@
|
|
|
13
13
|
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
14
14
|
import { fork } from 'node:child_process';
|
|
15
15
|
import { resolve } from 'node:path';
|
|
16
|
-
import { readFileSync, writeFileSync, rmSync } from 'node:fs';
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
//
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
const envPath = resolve(__dirname, '../../../../.env.test');
|
|
23
|
-
const envContent = readFileSync(envPath, 'utf-8');
|
|
24
|
-
for (const line of envContent.split('\n')) {
|
|
25
|
-
const match = line.match(/^([^#=]+)=(.*)$/);
|
|
26
|
-
if (match) {
|
|
27
|
-
const [, key, value] = match;
|
|
28
|
-
if (key && value && !process.env[key.trim()]) {
|
|
29
|
-
process.env[key.trim()] = value.trim();
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
catch { /* no .env.test — tests will skip */ }
|
|
35
|
-
}
|
|
16
|
+
import { readFileSync, writeFileSync, rmSync, readdirSync } from 'node:fs';
|
|
17
|
+
import { expectDoneReason, expectTotalTokens } from './test-helpers.js';
|
|
18
|
+
import { loadTestEnv, defaultTargetName } from './test-env.js';
|
|
19
|
+
// Pull API keys out of <repo-root>/.env.test (gitignored). Missing keys
|
|
20
|
+
// cause the describe block below to skip with a reason.
|
|
21
|
+
loadTestEnv();
|
|
36
22
|
// ---------------------------------------------------------------------------
|
|
37
23
|
// Config
|
|
38
24
|
// ---------------------------------------------------------------------------
|
|
@@ -42,6 +28,18 @@ const AGENT_DIR = resolve(__dirname, 'smoke-agent');
|
|
|
42
28
|
const REST_SERVER = resolve(__dirname, 'smoke-rest-server.mjs');
|
|
43
29
|
const MCP_SERVER = resolve(__dirname, 'smoke-mcp-server.mjs');
|
|
44
30
|
const TIMEOUT = 45_000; // per-test timeout for LLM calls
|
|
31
|
+
const SMOKE_TARGETS = {
|
|
32
|
+
anthropic: { provider: 'anthropic', model: 'claude-sonnet-4-20250514', apiKeyEnv: 'ANTHROPIC_API_KEY' },
|
|
33
|
+
google: { provider: 'google', model: 'gemini-2.5-flash', apiKeyEnv: 'GOOGLE_API_KEY' },
|
|
34
|
+
openai: { provider: 'openai', model: 'gpt-4o-mini', apiKeyEnv: 'OPENAI_API_KEY' },
|
|
35
|
+
groq: { provider: 'groq', model: 'llama-3.3-70b-versatile', apiKeyEnv: 'GROQ_API_KEY' },
|
|
36
|
+
};
|
|
37
|
+
function pickSmokeTarget() {
|
|
38
|
+
const override = process.env['SMOKE_TARGET'];
|
|
39
|
+
const name = override ?? defaultTargetName(SMOKE_TARGETS);
|
|
40
|
+
return { name, target: SMOKE_TARGETS[name] };
|
|
41
|
+
}
|
|
42
|
+
const { name: smokeTargetName, target: smokeTarget } = pickSmokeTarget();
|
|
45
43
|
// ---------------------------------------------------------------------------
|
|
46
44
|
// Helpers
|
|
47
45
|
// ---------------------------------------------------------------------------
|
|
@@ -58,10 +56,12 @@ async function waitForServer(port, maxMs = 15_000) {
|
|
|
58
56
|
}
|
|
59
57
|
throw new Error(`Server on port ${port} did not start within ${maxMs}ms`);
|
|
60
58
|
}
|
|
61
|
-
async function chat(message, sessionId) {
|
|
59
|
+
async function chat(message, sessionId, opts) {
|
|
62
60
|
const body = { message };
|
|
63
61
|
if (sessionId)
|
|
64
62
|
body['session_id'] = sessionId;
|
|
63
|
+
if (opts?.maxSessionTokens !== undefined)
|
|
64
|
+
body['max_session_tokens'] = opts.maxSessionTokens;
|
|
65
65
|
const res = await fetch(`http://localhost:${AGENT_PORT}/chat`, {
|
|
66
66
|
method: 'POST',
|
|
67
67
|
headers: { 'Content-Type': 'application/json' },
|
|
@@ -102,14 +102,45 @@ function allText(events) {
|
|
|
102
102
|
// ---------------------------------------------------------------------------
|
|
103
103
|
let restServer = null;
|
|
104
104
|
let agentServer = null;
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
/** Captures payloads delivered to callback-type targets for assertion. */
|
|
106
|
+
const receivedAutomationResults = [];
|
|
107
|
+
const skipReason = !smokeTarget
|
|
108
|
+
? `unknown SMOKE_TARGET "${smokeTargetName}"; known: ${Object.keys(SMOKE_TARGETS).join(', ')}`
|
|
109
|
+
: process.env[smokeTarget.apiKeyEnv]
|
|
110
|
+
? ''
|
|
111
|
+
: `${smokeTarget.apiKeyEnv} not set`;
|
|
112
|
+
describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
113
|
+
// Stash fixture files so afterAll can restore them; otherwise the
|
|
114
|
+
// per-run rewrites (provider + absolute MCP path) leak into the repo.
|
|
115
|
+
const amodalPath = resolve(AGENT_DIR, 'amodal.json');
|
|
116
|
+
const mcpSpecPath = resolve(AGENT_DIR, 'connections/mock-mcp/spec.json');
|
|
117
|
+
const originalAmodalJson = readFileSync(amodalPath, 'utf-8');
|
|
118
|
+
const originalMcpSpec = readFileSync(mcpSpecPath, 'utf-8');
|
|
107
119
|
beforeAll(async () => {
|
|
108
120
|
// 0. Nuke prior state — clean slate for every run
|
|
109
121
|
rmSync(resolve(AGENT_DIR, '.amodal/store-data'), { recursive: true, force: true });
|
|
110
|
-
|
|
111
|
-
//
|
|
112
|
-
|
|
122
|
+
// 1. Rewrite amodal.json with the selected provider/model.
|
|
123
|
+
// smokeTarget is guaranteed defined here — skipReason above gates
|
|
124
|
+
// the describe block when it's undefined or missing a key.
|
|
125
|
+
if (!smokeTarget)
|
|
126
|
+
throw new Error('unreachable: smokeTarget is undefined under skipReason guard');
|
|
127
|
+
const amodalConfig = JSON.parse(originalAmodalJson);
|
|
128
|
+
amodalConfig['models'] = {
|
|
129
|
+
main: { provider: smokeTarget.provider, model: smokeTarget.model },
|
|
130
|
+
};
|
|
131
|
+
// Enable web_search + fetch_url tools when a Google API key is available.
|
|
132
|
+
// Key resolution happens in the core config parser via env: prefix.
|
|
133
|
+
if (process.env['GOOGLE_API_KEY']) {
|
|
134
|
+
amodalConfig['webTools'] = {
|
|
135
|
+
provider: 'google',
|
|
136
|
+
apiKey: 'env:GOOGLE_API_KEY',
|
|
137
|
+
model: 'gemini-3-flash-preview',
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
writeFileSync(amodalPath, JSON.stringify(amodalConfig, null, 2));
|
|
141
|
+
// 2. Write MCP server spec with absolute path (loadRepo reads this as-is).
|
|
142
|
+
// Restored in afterAll so the env-specific path doesn't leak into git.
|
|
143
|
+
writeFileSync(mcpSpecPath, JSON.stringify({ protocol: 'mcp', transport: 'stdio', command: 'node', args: [MCP_SERVER] }, null, 2));
|
|
113
144
|
// 3. Start mock REST server
|
|
114
145
|
restServer = fork(REST_SERVER, [], {
|
|
115
146
|
env: { ...process.env, SMOKE_REST_PORT: String(REST_PORT) },
|
|
@@ -122,6 +153,9 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
122
153
|
repoPath: AGENT_DIR,
|
|
123
154
|
port: AGENT_PORT,
|
|
124
155
|
hotReload: false,
|
|
156
|
+
onAutomationResult: (payload) => {
|
|
157
|
+
receivedAutomationResults.push(payload);
|
|
158
|
+
},
|
|
125
159
|
});
|
|
126
160
|
await agentServer.start();
|
|
127
161
|
await waitForServer(AGENT_PORT);
|
|
@@ -133,6 +167,10 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
133
167
|
if (restServer) {
|
|
134
168
|
restServer.kill('SIGTERM');
|
|
135
169
|
}
|
|
170
|
+
// Restore fixture files so the per-run rewrites stay test-local and
|
|
171
|
+
// don't show up in git status afterwards.
|
|
172
|
+
writeFileSync(amodalPath, originalAmodalJson);
|
|
173
|
+
writeFileSync(mcpSpecPath, originalMcpSpec);
|
|
136
174
|
});
|
|
137
175
|
// -------------------------------------------------------------------------
|
|
138
176
|
// 1. Server lifecycle
|
|
@@ -272,9 +310,205 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
272
310
|
const toolStarts = findEvents(events, 'tool_call_start');
|
|
273
311
|
const readTool = toolStarts.find((e) => e['tool_name'] === 'read_repo_file');
|
|
274
312
|
expect(readTool).toBeDefined();
|
|
313
|
+
// The matching result should be a success — validates the full
|
|
314
|
+
// tool_call_start → execute → tool_call_result SSE round-trip.
|
|
315
|
+
const toolResults = findEvents(events, 'tool_call_result');
|
|
316
|
+
const readResult = toolResults.find((e) => e['tool_id'] === readTool?.['tool_id']);
|
|
317
|
+
expect(readResult).toBeDefined();
|
|
318
|
+
expect(readResult?.['status']).toBe('success');
|
|
275
319
|
const responseText = allText(events);
|
|
276
320
|
expect(responseText.toLowerCase()).toContain('test');
|
|
277
321
|
}, TIMEOUT);
|
|
322
|
+
// End-to-end: the "reduce emojis in formatting rules" scenario from the
|
|
323
|
+
// admin-agent regression. Before the discovery + edit tools existed, the
|
|
324
|
+
// agent guessed wrong paths and often created a new skill file instead
|
|
325
|
+
// of editing the existing knowledge doc. With list_repo_files /
|
|
326
|
+
// glob_repo_files / grep_repo_files / edit_repo_file available, it
|
|
327
|
+
// should discover knowledge/formatting-rules.md and edit it in place.
|
|
328
|
+
it('admin agent discovers and edits the right file (emoji-reduction scenario)', async () => {
|
|
329
|
+
const formattingRulesPath = resolve(AGENT_DIR, 'knowledge', 'formatting-rules.md');
|
|
330
|
+
const emojiHeavyBody = [
|
|
331
|
+
'# Formatting Rules 🎨',
|
|
332
|
+
'',
|
|
333
|
+
'Use emojis liberally to make the output more engaging! 🎉🎉🎉',
|
|
334
|
+
'',
|
|
335
|
+
'## Tone 💬',
|
|
336
|
+
'',
|
|
337
|
+
"Drop a 🚀 when celebrating a win, a 🔥 when highlighting risk, and a ✨ when introducing a new feature. Don't hold back! 🙌",
|
|
338
|
+
'',
|
|
339
|
+
'Every bullet point should start with an emoji. 📝 Every heading should have one too. 🏷️',
|
|
340
|
+
'',
|
|
341
|
+
'## Examples 📚',
|
|
342
|
+
'- ✅ "Deployment succeeded 🎉"',
|
|
343
|
+
'- ❌ "Deployment failed 💥"',
|
|
344
|
+
'',
|
|
345
|
+
].join('\n');
|
|
346
|
+
const emojiCount = (s) => (s.match(/\p{Emoji_Presentation}/gu) ?? []).length;
|
|
347
|
+
const initialEmojis = emojiCount(emojiHeavyBody);
|
|
348
|
+
expect(initialEmojis).toBeGreaterThan(5);
|
|
349
|
+
writeFileSync(formattingRulesPath, emojiHeavyBody);
|
|
350
|
+
// Snapshot skills/ so we can assert the agent didn't create a bogus skill.
|
|
351
|
+
const skillsDir = resolve(AGENT_DIR, 'skills');
|
|
352
|
+
const skillsBefore = new Set(readdirSync(skillsDir));
|
|
353
|
+
try {
|
|
354
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
355
|
+
method: 'POST',
|
|
356
|
+
headers: { 'Content-Type': 'application/json' },
|
|
357
|
+
body: JSON.stringify({
|
|
358
|
+
message: 'I want to use emojis less often in my formatting rules. Find where they are defined in my repo and reduce the emoji guidance — remove most emoji usage from the instructions, keep the document but make it plain text. Work carefully: first look around to find the right file, then edit it in place. Do not create any new skills.',
|
|
359
|
+
}),
|
|
360
|
+
signal: AbortSignal.timeout(TIMEOUT * 2),
|
|
361
|
+
});
|
|
362
|
+
const text = await res.text();
|
|
363
|
+
const events = parseSSE(text);
|
|
364
|
+
const toolStarts = findEvents(events, 'tool_call_start');
|
|
365
|
+
const toolNames = toolStarts.map((e) => String(e['tool_name']));
|
|
366
|
+
// Discovery: the agent should have used at least one of the new
|
|
367
|
+
// discovery tools to find formatting-rules.md instead of guessing.
|
|
368
|
+
const usedDiscovery = toolNames.some((n) => n === 'list_repo_files' || n === 'glob_repo_files' || n === 'grep_repo_files');
|
|
369
|
+
expect(usedDiscovery).toBe(true);
|
|
370
|
+
// Action: should edit in place, NOT rewrite the whole file or create
|
|
371
|
+
// a new skill. We allow either edit_repo_file (preferred) or
|
|
372
|
+
// write_repo_file targeting the same path (acceptable).
|
|
373
|
+
const editedInPlace = toolNames.includes('edit_repo_file');
|
|
374
|
+
const rewroteFile = toolNames.includes('write_repo_file');
|
|
375
|
+
expect(editedInPlace || rewroteFile).toBe(true);
|
|
376
|
+
// Regression guard: agent must NOT have created a new skill.
|
|
377
|
+
const skillsAfter = new Set(readdirSync(skillsDir));
|
|
378
|
+
const newSkills = [...skillsAfter].filter((s) => !skillsBefore.has(s));
|
|
379
|
+
expect(newSkills).toEqual([]);
|
|
380
|
+
// Outcome: the file should still exist and contain significantly
|
|
381
|
+
// fewer emojis than before.
|
|
382
|
+
const after = readFileSync(formattingRulesPath, 'utf-8');
|
|
383
|
+
expect(after.length).toBeGreaterThan(0);
|
|
384
|
+
const afterEmojis = emojiCount(after);
|
|
385
|
+
expect(afterEmojis).toBeLessThan(initialEmojis);
|
|
386
|
+
}
|
|
387
|
+
finally {
|
|
388
|
+
// Clean up — remove the formatting-rules.md fixture regardless of pass/fail.
|
|
389
|
+
rmSync(formattingRulesPath, { force: true });
|
|
390
|
+
}
|
|
391
|
+
}, TIMEOUT * 2);
|
|
392
|
+
// Pagination end-to-end: drop a 3000-line file with a sentinel on line
|
|
393
|
+
// 2800, ask the admin agent to report what's there verbatim. The default
|
|
394
|
+
// read cap is 2000 lines, so the agent MUST either paginate via offset
|
|
395
|
+
// or use grep. Verifies the new line_start/line_end/total_lines/
|
|
396
|
+
// truncated response shape is actually usable by a real LLM.
|
|
397
|
+
it('admin agent paginates a long file to reach content past the default cap', async () => {
|
|
398
|
+
const bigFilePath = resolve(AGENT_DIR, 'knowledge', 'big-file.md');
|
|
399
|
+
// Sentinel must be distinct enough that the agent can quote it back.
|
|
400
|
+
const SENTINEL = 'TARGET:CONTENT:ABCD1234:the-answer-is-42';
|
|
401
|
+
const TARGET_LINE = 2800;
|
|
402
|
+
const TOTAL_LINES = 3000;
|
|
403
|
+
const body = Array.from({ length: TOTAL_LINES }, (_, i) => {
|
|
404
|
+
const n = i + 1;
|
|
405
|
+
return n === TARGET_LINE ? `line ${String(n)}: ${SENTINEL}` : `line ${String(n)}: filler`;
|
|
406
|
+
}).join('\n');
|
|
407
|
+
writeFileSync(bigFilePath, body);
|
|
408
|
+
try {
|
|
409
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
410
|
+
method: 'POST',
|
|
411
|
+
headers: { 'Content-Type': 'application/json' },
|
|
412
|
+
body: JSON.stringify({
|
|
413
|
+
message: `I just added a long file at knowledge/big-file.md. Tell me exactly what's on line ${String(TARGET_LINE)} — report the full line content verbatim. Just give me the line, no summary.`,
|
|
414
|
+
}),
|
|
415
|
+
signal: AbortSignal.timeout(TIMEOUT * 2),
|
|
416
|
+
});
|
|
417
|
+
const text = await res.text();
|
|
418
|
+
const events = parseSSE(text);
|
|
419
|
+
const toolStarts = findEvents(events, 'tool_call_start');
|
|
420
|
+
const toolNames = toolStarts.map((e) => String(e['tool_name']));
|
|
421
|
+
// The agent needs to touch the file — either read_repo_file or
|
|
422
|
+
// grep_repo_files would work to find the target line.
|
|
423
|
+
const touchedFile = toolNames.some((n) => n === 'read_repo_file' || n === 'grep_repo_files');
|
|
424
|
+
expect(touchedFile).toBe(true);
|
|
425
|
+
// If the agent used read_repo_file, at least one call must have
|
|
426
|
+
// specified an offset/limit that covers line 2800 (the default
|
|
427
|
+
// 2000-line window doesn't reach it, so the agent HAS to adapt).
|
|
428
|
+
const readCalls = toolStarts.filter((e) => e['tool_name'] === 'read_repo_file');
|
|
429
|
+
if (readCalls.length > 0) {
|
|
430
|
+
const usedPagination = readCalls.some((e) => {
|
|
431
|
+
const params = e['parameters'];
|
|
432
|
+
if (!params)
|
|
433
|
+
return false;
|
|
434
|
+
const offset = typeof params['offset'] === 'number' ? params['offset'] : 1;
|
|
435
|
+
const limit = typeof params['limit'] === 'number' ? params['limit'] : 2000;
|
|
436
|
+
// Covers line TARGET_LINE if offset <= TARGET_LINE AND
|
|
437
|
+
// offset + limit - 1 >= TARGET_LINE.
|
|
438
|
+
return offset <= TARGET_LINE && offset + limit - 1 >= TARGET_LINE;
|
|
439
|
+
});
|
|
440
|
+
expect(usedPagination).toBe(true);
|
|
441
|
+
}
|
|
442
|
+
// Hard assertion: the response contains the sentinel verbatim.
|
|
443
|
+
const responseText = allText(events);
|
|
444
|
+
expect(responseText).toContain(SENTINEL);
|
|
445
|
+
}
|
|
446
|
+
finally {
|
|
447
|
+
rmSync(bigFilePath, { force: true });
|
|
448
|
+
}
|
|
449
|
+
}, TIMEOUT * 2);
|
|
450
|
+
// Multi-chunk pagination: sentinels spread across a 5000-line file so no
|
|
451
|
+
// single default read (2000 lines) can cover all of them. Verifies the
|
|
452
|
+
// agent either (a) chains multiple reads following the truncated: true
|
|
453
|
+
// signal, or (b) uses grep. Either is acceptable — what matters is that
|
|
454
|
+
// the agent finds content past the default window.
|
|
455
|
+
it('admin agent finds content scattered across a long file via pagination or grep', async () => {
|
|
456
|
+
const bigFilePath = resolve(AGENT_DIR, 'knowledge', 'scatter.md');
|
|
457
|
+
const MARKER = 'MARKER-ZXCV9876';
|
|
458
|
+
const MARKER_LINES = [500, 2500, 4500];
|
|
459
|
+
const TOTAL_LINES = 5000;
|
|
460
|
+
const body = Array.from({ length: TOTAL_LINES }, (_, i) => {
|
|
461
|
+
const n = i + 1;
|
|
462
|
+
return MARKER_LINES.includes(n) ? `line ${String(n)}: ${MARKER}` : `line ${String(n)}: filler`;
|
|
463
|
+
}).join('\n');
|
|
464
|
+
writeFileSync(bigFilePath, body);
|
|
465
|
+
try {
|
|
466
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
467
|
+
method: 'POST',
|
|
468
|
+
headers: { 'Content-Type': 'application/json' },
|
|
469
|
+
body: JSON.stringify({
|
|
470
|
+
message: `Read knowledge/scatter.md and quote the exact content of line 500, line 2500, and line 4500 verbatim. Report each line's full text.`,
|
|
471
|
+
}),
|
|
472
|
+
signal: AbortSignal.timeout(TIMEOUT * 2),
|
|
473
|
+
});
|
|
474
|
+
const text = await res.text();
|
|
475
|
+
const events = parseSSE(text);
|
|
476
|
+
const toolStarts = findEvents(events, 'tool_call_start');
|
|
477
|
+
const toolNames = toolStarts.map((e) => String(e['tool_name']));
|
|
478
|
+
// Agent must have touched the file.
|
|
479
|
+
const touchedFile = toolNames.some((n) => n === 'read_repo_file' || n === 'grep_repo_files');
|
|
480
|
+
expect(touchedFile).toBe(true);
|
|
481
|
+
// If the agent committed to read-only discovery (no grep), verify at
|
|
482
|
+
// least one read_repo_file call reached past the default 2000-line
|
|
483
|
+
// cap — otherwise it couldn't have seen markers at lines 2500 or
|
|
484
|
+
// 4500. When grep is used first, pagination isn't required because
|
|
485
|
+
// the agent may have used read_repo_file only to confirm a line it
|
|
486
|
+
// already found via grep.
|
|
487
|
+
const usedGrep = toolNames.includes('grep_repo_files');
|
|
488
|
+
const readCalls = toolStarts.filter((e) => e['tool_name'] === 'read_repo_file');
|
|
489
|
+
if (readCalls.length > 0 && !usedGrep) {
|
|
490
|
+
const reachedPastCap = readCalls.some((e) => {
|
|
491
|
+
const params = e['parameters'];
|
|
492
|
+
if (!params)
|
|
493
|
+
return false;
|
|
494
|
+
const offset = typeof params['offset'] === 'number' ? params['offset'] : 1;
|
|
495
|
+
const limit = typeof params['limit'] === 'number' ? params['limit'] : 2000;
|
|
496
|
+
// A single read covers up to line_end = offset + limit - 1.
|
|
497
|
+
return offset + limit - 1 > 2000;
|
|
498
|
+
});
|
|
499
|
+
expect(reachedPastCap).toBe(true);
|
|
500
|
+
}
|
|
501
|
+
// Hard assertion: final response identifies all three marker line
|
|
502
|
+
// numbers. LLMs paraphrase, so search the response for each number.
|
|
503
|
+
const responseText = allText(events);
|
|
504
|
+
for (const n of MARKER_LINES) {
|
|
505
|
+
expect(responseText).toContain(String(n));
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
finally {
|
|
509
|
+
rmSync(bigFilePath, { force: true });
|
|
510
|
+
}
|
|
511
|
+
}, TIMEOUT * 2);
|
|
278
512
|
// -------------------------------------------------------------------------
|
|
279
513
|
// 10. Write intent enforcement (G8)
|
|
280
514
|
// -------------------------------------------------------------------------
|
|
@@ -372,6 +606,44 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
372
606
|
expect(mentionsAny).toBe(true);
|
|
373
607
|
}, TIMEOUT * 3);
|
|
374
608
|
// -------------------------------------------------------------------------
|
|
609
|
+
// 11e. Parallel tool calls — batched read-only execution
|
|
610
|
+
// -------------------------------------------------------------------------
|
|
611
|
+
it('batches parallel read-only tool calls in a single turn', async () => {
|
|
612
|
+
// Seed three distinct items with unique names so we can verify the
|
|
613
|
+
// model saw each result.
|
|
614
|
+
const seed = [
|
|
615
|
+
['parallel-alpha', 'Alpha Marker'],
|
|
616
|
+
['parallel-beta', 'Beta Marker'],
|
|
617
|
+
['parallel-gamma', 'Gamma Marker'],
|
|
618
|
+
];
|
|
619
|
+
const writes = await Promise.all(seed.map(([id, name]) => chat(`Write to test-items store: item_id="${id}", name="${name}", status="active".`)));
|
|
620
|
+
const allWritesOk = writes.every((w) => findEvents(w.events, 'tool_call_result').some((e) => e['status'] === 'success'));
|
|
621
|
+
if (!allWritesOk)
|
|
622
|
+
return; // seeding failed — skip
|
|
623
|
+
// Ask the model to fetch all three in parallel. Models sometimes split
|
|
624
|
+
// this across turns; when they emit a single-turn parallel batch we
|
|
625
|
+
// verify the runtime handled it correctly end-to-end.
|
|
626
|
+
const { events } = await chat('Fetch all three of these items from the test-items store in parallel ' +
|
|
627
|
+
'using three concurrent query_store tool calls (one per key): ' +
|
|
628
|
+
'"parallel-alpha", "parallel-beta", "parallel-gamma". Then list the name ' +
|
|
629
|
+
'field of each item in your response.');
|
|
630
|
+
const toolStarts = findEvents(events, 'tool_call_start');
|
|
631
|
+
const toolResults = findEvents(events, 'tool_call_result');
|
|
632
|
+
const queryStoreStarts = toolStarts.filter((e) => e['tool_name'] === 'query_store');
|
|
633
|
+
if (queryStoreStarts.length < 2) {
|
|
634
|
+
// eslint-disable-next-line no-console -- intentional test diagnostic
|
|
635
|
+
console.warn(`[smoke] Model emitted ${String(queryStoreStarts.length)} query_store call(s) — ` +
|
|
636
|
+
'parallel-batch path not exercised this run (LLM non-determinism)');
|
|
637
|
+
return;
|
|
638
|
+
}
|
|
639
|
+
// Every start must have a matching successful result — batching must
|
|
640
|
+
// not drop events or corrupt SSE ordering. (This is the assertion that
|
|
641
|
+
// actually exercises the batching code path; content coverage of the
|
|
642
|
+
// response is LLM-variable and not the batcher's job.)
|
|
643
|
+
const successResults = toolResults.filter((e) => e['status'] === 'success');
|
|
644
|
+
expect(successResults.length).toBeGreaterThanOrEqual(queryStoreStarts.length);
|
|
645
|
+
}, TIMEOUT * 3);
|
|
646
|
+
// -------------------------------------------------------------------------
|
|
375
647
|
// 12. Concurrent sessions don't bleed context
|
|
376
648
|
// -------------------------------------------------------------------------
|
|
377
649
|
it('concurrent sessions are isolated', async () => {
|
|
@@ -558,9 +830,6 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
558
830
|
// 23. Sessions — listing and history
|
|
559
831
|
// -------------------------------------------------------------------------
|
|
560
832
|
it('sessions endpoint returns a sessions array', async () => {
|
|
561
|
-
// Chat sessions in local dev don't auto-populate the legacy session store
|
|
562
|
-
// used by /sessions (only automation runs do), so we just verify the
|
|
563
|
-
// endpoint returns the expected shape.
|
|
564
833
|
const res = await fetch(`http://localhost:${AGENT_PORT}/sessions`, { signal: AbortSignal.timeout(5000) });
|
|
565
834
|
const body = await res.json();
|
|
566
835
|
expect(res.status).toBe(200);
|
|
@@ -570,6 +839,79 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
570
839
|
const res = await fetch(`http://localhost:${AGENT_PORT}/session/nonexistent-id`, { signal: AbortSignal.timeout(5000) });
|
|
571
840
|
expect(res.status).toBe(404);
|
|
572
841
|
});
|
|
842
|
+
it('persists chat session through full list/get/patch/delete lifecycle', async () => {
|
|
843
|
+
// Full dev-UI session history loop, all served from DrizzleSessionStore.
|
|
844
|
+
const { sessionId } = await chat('Say "ok" in one word.');
|
|
845
|
+
expect(sessionId).toBeTruthy();
|
|
846
|
+
// 1. Session appears in /sessions with the UI response shape
|
|
847
|
+
const listRes = await fetch(`http://localhost:${AGENT_PORT}/sessions`, { signal: AbortSignal.timeout(5000) });
|
|
848
|
+
const listBody = await listRes.json();
|
|
849
|
+
expect(listRes.status).toBe(200);
|
|
850
|
+
const found = listBody.sessions.find((s) => s['id'] === sessionId);
|
|
851
|
+
expect(found).toBeDefined();
|
|
852
|
+
if (!found)
|
|
853
|
+
throw new Error('unreachable');
|
|
854
|
+
expect(found['appId']).toBe('local');
|
|
855
|
+
expect(typeof found['summary']).toBe('string');
|
|
856
|
+
expect(String(found['summary']).length).toBeGreaterThan(0);
|
|
857
|
+
expect(typeof found['createdAt']).toBe('number');
|
|
858
|
+
expect(typeof found['lastAccessedAt']).toBe('number');
|
|
859
|
+
expect(found['automationName']).toBeUndefined();
|
|
860
|
+
// 2. /session/:id returns the conversation history
|
|
861
|
+
const getRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, { signal: AbortSignal.timeout(5000) });
|
|
862
|
+
const getBody = await getRes.json();
|
|
863
|
+
expect(getRes.status).toBe(200);
|
|
864
|
+
expect(getBody.session_id).toBe(sessionId);
|
|
865
|
+
expect(getBody.messages.length).toBeGreaterThan(0);
|
|
866
|
+
expect(getBody.messages[0].role).toBe('user');
|
|
867
|
+
expect(getBody.messages[0].text).toContain('Say "ok"');
|
|
868
|
+
// 3. PATCH title updates metadata and is visible on subsequent list
|
|
869
|
+
const patchRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
|
|
870
|
+
method: 'PATCH',
|
|
871
|
+
headers: { 'Content-Type': 'application/json' },
|
|
872
|
+
body: JSON.stringify({ title: 'smoke renamed' }),
|
|
873
|
+
signal: AbortSignal.timeout(5000),
|
|
874
|
+
});
|
|
875
|
+
expect(patchRes.status).toBe(200);
|
|
876
|
+
const list2Res = await fetch(`http://localhost:${AGENT_PORT}/sessions`, { signal: AbortSignal.timeout(5000) });
|
|
877
|
+
const list2Body = await list2Res.json();
|
|
878
|
+
const renamed = list2Body.sessions.find((s) => s['id'] === sessionId);
|
|
879
|
+
expect(renamed?.['title']).toBe('smoke renamed');
|
|
880
|
+
expect(renamed?.['summary']).toBe('smoke renamed');
|
|
881
|
+
// 4. DELETE removes the session, subsequent GET 404s
|
|
882
|
+
const delRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
|
|
883
|
+
method: 'DELETE',
|
|
884
|
+
signal: AbortSignal.timeout(5000),
|
|
885
|
+
});
|
|
886
|
+
expect(delRes.status).toBe(200);
|
|
887
|
+
const getAfterDelRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, { signal: AbortSignal.timeout(5000) });
|
|
888
|
+
expect(getAfterDelRes.status).toBe(404);
|
|
889
|
+
}, TIMEOUT);
|
|
890
|
+
it('preserves tool-call chips in /session/:id history', async () => {
|
|
891
|
+
// Tool calls appear as {type: 'tool-call'} parts in the assistant's
|
|
892
|
+
// ModelMessage.content — flattenModelMessage should surface them to
|
|
893
|
+
// the UI as toolCalls[]. Without this, the dev-UI chat history panel
|
|
894
|
+
// renders the assistant's reply but drops the tool-call chips.
|
|
895
|
+
const { sessionId } = await chat('Use the request tool to GET /items from the mock-api connection with intent "read".');
|
|
896
|
+
expect(sessionId).toBeTruthy();
|
|
897
|
+
const getRes = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, { signal: AbortSignal.timeout(5000) });
|
|
898
|
+
const getBody = await getRes.json();
|
|
899
|
+
expect(getRes.status).toBe(200);
|
|
900
|
+
const assistantWithTools = getBody.messages.find((m) => m.role === 'assistant' && m.toolCalls && m.toolCalls.length > 0);
|
|
901
|
+
// Soft assertion: the model may choose not to call tools on any given
|
|
902
|
+
// turn (LLM non-determinism). When it does, the toolCall round-trip
|
|
903
|
+
// must work end-to-end.
|
|
904
|
+
if (assistantWithTools?.toolCalls) {
|
|
905
|
+
const call = assistantWithTools.toolCalls[0];
|
|
906
|
+
expect(call.toolId).toBeTruthy();
|
|
907
|
+
expect(call.toolName).toBeTruthy();
|
|
908
|
+
expect(typeof call.parameters).toBe('object');
|
|
909
|
+
}
|
|
910
|
+
else {
|
|
911
|
+
// eslint-disable-next-line no-console -- intentional test diagnostic
|
|
912
|
+
console.warn('[smoke] Model did not call a tool for the request prompt — LLM non-determinism, skipping toolCall round-trip assertion');
|
|
913
|
+
}
|
|
914
|
+
}, TIMEOUT);
|
|
573
915
|
// -------------------------------------------------------------------------
|
|
574
916
|
// 24. Files — browser and editor
|
|
575
917
|
// -------------------------------------------------------------------------
|
|
@@ -699,6 +1041,268 @@ describe.skipIf(!!skipReason)('smoke tests', () => {
|
|
|
699
1041
|
});
|
|
700
1042
|
expect(res.status).toBe(400);
|
|
701
1043
|
});
|
|
1044
|
+
// -------------------------------------------------------------------------
|
|
1045
|
+
// Agent loop safety features (budget, done reason)
|
|
1046
|
+
// -------------------------------------------------------------------------
|
|
1047
|
+
it('done event carries reason=model_stop on normal completion', async () => {
|
|
1048
|
+
const { events } = await chat('Reply with just the word "ok".');
|
|
1049
|
+
expectDoneReason(events, 'model_stop');
|
|
1050
|
+
});
|
|
1051
|
+
it('max_session_tokens budget terminates the loop with reason=budget_exceeded', async () => {
|
|
1052
|
+
// 200 tokens is well below what any single-turn + tool-call response
|
|
1053
|
+
// will consume, so the budget check fires after the first turn.
|
|
1054
|
+
const { events } = await chat('Echo these strings one at a time, calling echo_tool for each: alpha, bravo, charlie, delta, echo, foxtrot.', undefined, { maxSessionTokens: 200 });
|
|
1055
|
+
expectDoneReason(events, 'budget_exceeded');
|
|
1056
|
+
expectTotalTokens(events, { atLeast: 200 });
|
|
1057
|
+
});
|
|
1058
|
+
// -------------------------------------------------------------------------
|
|
1059
|
+
// 25. Runtime event bus (/api/events SSE stream)
|
|
1060
|
+
// -------------------------------------------------------------------------
|
|
1061
|
+
it('emits session_created when a new chat session is created', async () => {
|
|
1062
|
+
const stream = await openEventStream();
|
|
1063
|
+
try {
|
|
1064
|
+
const chatResult = await chat('Say "hi" and nothing else.');
|
|
1065
|
+
const event = await stream.waitFor((e) => e['type'] === 'session_created' && e['sessionId'] === chatResult.sessionId, TIMEOUT);
|
|
1066
|
+
expect(event['type']).toBe('session_created');
|
|
1067
|
+
expect(event['sessionId']).toBe(chatResult.sessionId);
|
|
1068
|
+
expect(event['seq']).toBeGreaterThan(0);
|
|
1069
|
+
expect(typeof event['timestamp']).toBe('string');
|
|
1070
|
+
}
|
|
1071
|
+
finally {
|
|
1072
|
+
stream.close();
|
|
1073
|
+
}
|
|
1074
|
+
}, TIMEOUT);
|
|
1075
|
+
it('emits session_updated on follow-up messages in an existing session', async () => {
|
|
1076
|
+
const first = await chat('Remember the number 7.');
|
|
1077
|
+
const stream = await openEventStream();
|
|
1078
|
+
try {
|
|
1079
|
+
await chat('Reply with just "ok".', first.sessionId);
|
|
1080
|
+
const event = await stream.waitFor((e) => e['type'] === 'session_updated' && e['sessionId'] === first.sessionId, TIMEOUT);
|
|
1081
|
+
expect(event['type']).toBe('session_updated');
|
|
1082
|
+
expect(event['sessionId']).toBe(first.sessionId);
|
|
1083
|
+
}
|
|
1084
|
+
finally {
|
|
1085
|
+
stream.close();
|
|
1086
|
+
}
|
|
1087
|
+
}, TIMEOUT * 2);
|
|
1088
|
+
it('emits session_deleted when a session is DELETEd', async () => {
|
|
1089
|
+
const { sessionId } = await chat('Say "ok".');
|
|
1090
|
+
const stream = await openEventStream();
|
|
1091
|
+
try {
|
|
1092
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
|
|
1093
|
+
method: 'DELETE',
|
|
1094
|
+
signal: AbortSignal.timeout(5000),
|
|
1095
|
+
});
|
|
1096
|
+
expect(res.status).toBe(200);
|
|
1097
|
+
const event = await stream.waitFor((e) => e['type'] === 'session_deleted' && e['sessionId'] === sessionId, 5000);
|
|
1098
|
+
expect(event['type']).toBe('session_deleted');
|
|
1099
|
+
expect(event['sessionId']).toBe(sessionId);
|
|
1100
|
+
}
|
|
1101
|
+
finally {
|
|
1102
|
+
stream.close();
|
|
1103
|
+
}
|
|
1104
|
+
}, TIMEOUT);
|
|
1105
|
+
it('emits automation_triggered and automation_completed on manual run', async () => {
|
|
1106
|
+
// The automation's registered name is derived from the filename
|
|
1107
|
+
// (automations/test-auto.md → "test-auto"), not the frontmatter.
|
|
1108
|
+
const automationName = 'test-auto';
|
|
1109
|
+
const stream = await openEventStream();
|
|
1110
|
+
try {
|
|
1111
|
+
const runPromise = fetch(`http://localhost:${AGENT_PORT}/automations/${automationName}/run`, {
|
|
1112
|
+
method: 'POST',
|
|
1113
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1114
|
+
body: '{}',
|
|
1115
|
+
signal: AbortSignal.timeout(TIMEOUT),
|
|
1116
|
+
});
|
|
1117
|
+
const triggered = await stream.waitFor((e) => e['type'] === 'automation_triggered' && e['name'] === automationName, 5000);
|
|
1118
|
+
expect(triggered['source']).toBeDefined();
|
|
1119
|
+
const completed = await stream.waitFor((e) => (e['type'] === 'automation_completed' || e['type'] === 'automation_failed') &&
|
|
1120
|
+
e['name'] === automationName, TIMEOUT);
|
|
1121
|
+
expect(completed['type']).toBe('automation_completed');
|
|
1122
|
+
expect(typeof completed['durationMs']).toBe('number');
|
|
1123
|
+
const runRes = await runPromise;
|
|
1124
|
+
expect([200, 500]).toContain(runRes.status);
|
|
1125
|
+
}
|
|
1126
|
+
finally {
|
|
1127
|
+
stream.close();
|
|
1128
|
+
}
|
|
1129
|
+
}, TIMEOUT + 10_000);
|
|
1130
|
+
it('delivers automation result via onAutomationResult callback (full chain)', async () => {
|
|
1131
|
+
// This test verifies the full wiring:
|
|
1132
|
+
// automation runs → DeliveryRouter dispatches callback target →
|
|
1133
|
+
// LocalServerConfig.onAutomationResult fires with the payload.
|
|
1134
|
+
//
|
|
1135
|
+
// The delivery-callback-test automation has `delivery.targets: [{ type:
|
|
1136
|
+
// 'callback' }]` with a template. When it completes, our onAutomationResult
|
|
1137
|
+
// (configured in beforeAll) should receive the full DeliveryPayload.
|
|
1138
|
+
const before = receivedAutomationResults.length;
|
|
1139
|
+
const runRes = await fetch(`http://localhost:${AGENT_PORT}/automations/delivery-callback-test/run`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: '{}', signal: AbortSignal.timeout(TIMEOUT) });
|
|
1140
|
+
expect([200, 500]).toContain(runRes.status);
|
|
1141
|
+
// Poll briefly for the callback to fire (delivery happens after runMessage drains)
|
|
1142
|
+
const deadline = Date.now() + 5000;
|
|
1143
|
+
while (receivedAutomationResults.length === before && Date.now() < deadline) {
|
|
1144
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1145
|
+
}
|
|
1146
|
+
expect(receivedAutomationResults.length).toBeGreaterThan(before);
|
|
1147
|
+
const payload = receivedAutomationResults[receivedAutomationResults.length - 1];
|
|
1148
|
+
expect(payload?.['automation']).toBe('delivery-callback-test');
|
|
1149
|
+
expect(payload?.['status']).toBe('success');
|
|
1150
|
+
// Template renders with {{automation}} built-in; {{count}} should come
|
|
1151
|
+
// from the JSON result (soft-check since LLM output varies slightly).
|
|
1152
|
+
const message = String(payload?.['message'] ?? '');
|
|
1153
|
+
expect(message).toContain('delivery-callback-test');
|
|
1154
|
+
}, TIMEOUT + 10_000);
|
|
1155
|
+
it('replays buffered events via Last-Event-ID on reconnect', async () => {
|
|
1156
|
+
// Produce at least one event, capture its seq, disconnect, reconnect
|
|
1157
|
+
// with Last-Event-ID set to seq-1, and verify we get the event back.
|
|
1158
|
+
const firstStream = await openEventStream();
|
|
1159
|
+
let capturedSeq = 0;
|
|
1160
|
+
try {
|
|
1161
|
+
await chat('Say "ok".');
|
|
1162
|
+
const event = await firstStream.waitFor((e) => e['type'] === 'session_created', TIMEOUT);
|
|
1163
|
+
capturedSeq = Number(event['seq']);
|
|
1164
|
+
expect(capturedSeq).toBeGreaterThan(0);
|
|
1165
|
+
}
|
|
1166
|
+
finally {
|
|
1167
|
+
firstStream.close();
|
|
1168
|
+
}
|
|
1169
|
+
const replayStream = await openEventStream({ lastEventId: String(capturedSeq - 1) });
|
|
1170
|
+
try {
|
|
1171
|
+
const replayed = await replayStream.waitFor((e) => Number(e['seq']) === capturedSeq, 5000);
|
|
1172
|
+
expect(Number(replayed['seq'])).toBe(capturedSeq);
|
|
1173
|
+
}
|
|
1174
|
+
finally {
|
|
1175
|
+
replayStream.close();
|
|
1176
|
+
}
|
|
1177
|
+
}, TIMEOUT);
|
|
1178
|
+
it('emits session_updated when title is PATCHed', async () => {
|
|
1179
|
+
const { sessionId } = await chat('Say "ok".');
|
|
1180
|
+
const stream = await openEventStream();
|
|
1181
|
+
try {
|
|
1182
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/session/${sessionId}`, {
|
|
1183
|
+
method: 'PATCH',
|
|
1184
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1185
|
+
body: JSON.stringify({ title: 'my renamed session' }),
|
|
1186
|
+
signal: AbortSignal.timeout(5000),
|
|
1187
|
+
});
|
|
1188
|
+
expect(res.status).toBe(200);
|
|
1189
|
+
const event = await stream.waitFor((e) => e['type'] === 'session_updated' && e['sessionId'] === sessionId && e['title'] === 'my renamed session', 5000);
|
|
1190
|
+
expect(event['title']).toBe('my renamed session');
|
|
1191
|
+
}
|
|
1192
|
+
finally {
|
|
1193
|
+
stream.close();
|
|
1194
|
+
}
|
|
1195
|
+
}, TIMEOUT);
|
|
1196
|
+
it('emits store_updated when a tool writes to a store', async () => {
|
|
1197
|
+
const stream = await openEventStream();
|
|
1198
|
+
try {
|
|
1199
|
+
// Ask the agent to write to test-items store. Agent non-determinism
|
|
1200
|
+
// means it might not actually call the tool; we soft-check the event.
|
|
1201
|
+
await chat('Write an item to the test-items store with id="evt-smoke-1" and name="smoke event test".');
|
|
1202
|
+
const event = stream.events.find((e) => e['type'] === 'store_updated' && e['storeName'] === 'test-items');
|
|
1203
|
+
if (event) {
|
|
1204
|
+
expect(event['operation']).toBe('put');
|
|
1205
|
+
}
|
|
1206
|
+
else {
|
|
1207
|
+
// Model may have chosen not to call the store tool — this test is
|
|
1208
|
+
// soft (logged, not asserted) because it depends on LLM behavior.
|
|
1209
|
+
// eslint-disable-next-line no-console -- intentional test diagnostic
|
|
1210
|
+
console.warn('[smoke] store_updated not emitted — LLM may have declined to call store_write');
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
finally {
|
|
1214
|
+
stream.close();
|
|
1215
|
+
}
|
|
1216
|
+
}, TIMEOUT);
|
|
1217
|
+
it('emits store_updated when a direct REST write happens', async () => {
|
|
1218
|
+
// This path doesn't depend on the LLM — assertable hard.
|
|
1219
|
+
const stream = await openEventStream();
|
|
1220
|
+
try {
|
|
1221
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/api/stores/test-items`, {
|
|
1222
|
+
method: 'POST',
|
|
1223
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1224
|
+
body: JSON.stringify({ id: 'rest-smoke-1', name: 'direct rest write' }),
|
|
1225
|
+
signal: AbortSignal.timeout(5000),
|
|
1226
|
+
});
|
|
1227
|
+
expect(res.status).toBe(201);
|
|
1228
|
+
const event = await stream.waitFor((e) => e['type'] === 'store_updated' && e['storeName'] === 'test-items' && e['operation'] === 'put', 5000);
|
|
1229
|
+
expect(event['operation']).toBe('put');
|
|
1230
|
+
}
|
|
1231
|
+
finally {
|
|
1232
|
+
stream.close();
|
|
1233
|
+
}
|
|
1234
|
+
}, TIMEOUT);
|
|
1235
|
+
it('emits automation_started and automation_stopped', async () => {
|
|
1236
|
+
// The smoke agent's test-auto has no cron schedule, so start will fail.
|
|
1237
|
+
// That's fine — we want to verify the happy path when a schedulable
|
|
1238
|
+
// automation exists. Skip if none are available.
|
|
1239
|
+
const listRes = await fetch(`http://localhost:${AGENT_PORT}/automations`);
|
|
1240
|
+
const listBody = await listRes.json();
|
|
1241
|
+
const schedulable = listBody.automations.find((a) => a.schedule);
|
|
1242
|
+
if (!schedulable) {
|
|
1243
|
+
return; // smoke agent has no scheduled automation — skip
|
|
1244
|
+
}
|
|
1245
|
+
const stream = await openEventStream();
|
|
1246
|
+
try {
|
|
1247
|
+
const startRes = await fetch(`http://localhost:${AGENT_PORT}/automations/${schedulable.name}/start`, { method: 'POST', signal: AbortSignal.timeout(5000) });
|
|
1248
|
+
if (startRes.status !== 200)
|
|
1249
|
+
return; // not a schedulable automation
|
|
1250
|
+
const started = await stream.waitFor((e) => e['type'] === 'automation_started' && e['name'] === schedulable.name, 5000);
|
|
1251
|
+
expect(typeof started['intervalMs']).toBe('number');
|
|
1252
|
+
await fetch(`http://localhost:${AGENT_PORT}/automations/${schedulable.name}/stop`, { method: 'POST', signal: AbortSignal.timeout(5000) });
|
|
1253
|
+
const stopped = await stream.waitFor((e) => e['type'] === 'automation_stopped' && e['name'] === schedulable.name, 5000);
|
|
1254
|
+
expect(stopped['name']).toBe(schedulable.name);
|
|
1255
|
+
}
|
|
1256
|
+
finally {
|
|
1257
|
+
stream.close();
|
|
1258
|
+
}
|
|
1259
|
+
}, TIMEOUT);
|
|
1260
|
+
it('fans out the same event to all concurrent clients (two-tab case)', async () => {
|
|
1261
|
+
// Two independent SSE connections — the "two browser tabs" scenario.
|
|
1262
|
+
// Every event emitted by the server should reach BOTH clients with
|
|
1263
|
+
// the same seq number.
|
|
1264
|
+
const [s1, s2] = await Promise.all([openEventStream(), openEventStream()]);
|
|
1265
|
+
try {
|
|
1266
|
+
const chatResult = await chat('Say "ok".');
|
|
1267
|
+
const [e1, e2] = await Promise.all([
|
|
1268
|
+
s1.waitFor((e) => e['type'] === 'session_created' && e['sessionId'] === chatResult.sessionId, TIMEOUT),
|
|
1269
|
+
s2.waitFor((e) => e['type'] === 'session_created' && e['sessionId'] === chatResult.sessionId, TIMEOUT),
|
|
1270
|
+
]);
|
|
1271
|
+
// Same logical event reached both clients
|
|
1272
|
+
expect(e1['seq']).toBe(e2['seq']);
|
|
1273
|
+
expect(e1['timestamp']).toBe(e2['timestamp']);
|
|
1274
|
+
expect(e1['sessionId']).toBe(e2['sessionId']);
|
|
1275
|
+
}
|
|
1276
|
+
finally {
|
|
1277
|
+
s1.close();
|
|
1278
|
+
s2.close();
|
|
1279
|
+
}
|
|
1280
|
+
}, TIMEOUT);
|
|
1281
|
+
// -------------------------------------------------------------------------
|
|
1282
|
+
// Web tools (web_search, fetch_url) — gated on GOOGLE_API_KEY.
|
|
1283
|
+
//
|
|
1284
|
+
// When the smoke target is Anthropic/OpenAI but GOOGLE_API_KEY is set,
|
|
1285
|
+
// these tests exercise the cross-provider case: the main agent runs on
|
|
1286
|
+
// one provider, but web_search routes through the dedicated Gemini
|
|
1287
|
+
// backend. beforeAll injects the webTools config when the key is set.
|
|
1288
|
+
// -------------------------------------------------------------------------
|
|
1289
|
+
const hasGoogleKey = !!process.env['GOOGLE_API_KEY'];
|
|
1290
|
+
it.skipIf(!hasGoogleKey)('web_search tool is invoked for a current-information question', async () => {
|
|
1291
|
+
const { events } = await chat('Use the web_search tool to find an authoritative source for the current stable version of Node.js. Reply with just the version number.');
|
|
1292
|
+
const toolStarts = findEvents(events, 'tool_call_start');
|
|
1293
|
+
const toolResults = findEvents(events, 'tool_call_result');
|
|
1294
|
+
const webSearchStart = toolStarts.find((e) => e['tool_name'] === 'web_search');
|
|
1295
|
+
expect(webSearchStart).toBeDefined();
|
|
1296
|
+
// The matching result for that tool_id should be a success.
|
|
1297
|
+
const toolId = webSearchStart?.['tool_id'];
|
|
1298
|
+
const webSearchResult = toolResults.find((e) => e['tool_id'] === toolId);
|
|
1299
|
+
expect(webSearchResult).toBeDefined();
|
|
1300
|
+
expect(webSearchResult?.['status']).toBe('success');
|
|
1301
|
+
// The session should finish normally with text output.
|
|
1302
|
+
const done = findEvent(events, 'done');
|
|
1303
|
+
expect(done?.['reason']).toBe('model_stop');
|
|
1304
|
+
expect(allText(events).length).toBeGreaterThan(0);
|
|
1305
|
+
}, TIMEOUT);
|
|
702
1306
|
});
|
|
703
1307
|
// ---------------------------------------------------------------------------
|
|
704
1308
|
// SSE parser helper
|
|
@@ -715,4 +1319,86 @@ function parseSSE(text) {
|
|
|
715
1319
|
}
|
|
716
1320
|
return events;
|
|
717
1321
|
}
|
|
1322
|
+
async function openEventStream(options = {}) {
|
|
1323
|
+
const controller = new AbortController();
|
|
1324
|
+
const headers = { Accept: 'text/event-stream' };
|
|
1325
|
+
if (options.lastEventId)
|
|
1326
|
+
headers['Last-Event-ID'] = options.lastEventId;
|
|
1327
|
+
const res = await fetch(`http://localhost:${AGENT_PORT}/api/events`, {
|
|
1328
|
+
headers,
|
|
1329
|
+
signal: controller.signal,
|
|
1330
|
+
});
|
|
1331
|
+
if (!res.body)
|
|
1332
|
+
throw new Error('no response body from /api/events');
|
|
1333
|
+
const events = [];
|
|
1334
|
+
const waiters = [];
|
|
1335
|
+
// Drain the stream in the background, parsing SSE frames. Push each event
|
|
1336
|
+
// to the events array and notify any waiting predicates.
|
|
1337
|
+
const reader = res.body.getReader();
|
|
1338
|
+
const decoder = new TextDecoder();
|
|
1339
|
+
let buffer = '';
|
|
1340
|
+
let draining = true;
|
|
1341
|
+
void (async () => {
|
|
1342
|
+
try {
|
|
1343
|
+
while (draining) {
|
|
1344
|
+
const { done, value } = await reader.read();
|
|
1345
|
+
if (done)
|
|
1346
|
+
break;
|
|
1347
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1348
|
+
let idx;
|
|
1349
|
+
while ((idx = buffer.indexOf('\n\n')) !== -1) {
|
|
1350
|
+
const frame = buffer.slice(0, idx);
|
|
1351
|
+
buffer = buffer.slice(idx + 2);
|
|
1352
|
+
const dataLine = frame.split('\n').find((l) => l.startsWith('data: '));
|
|
1353
|
+
if (!dataLine)
|
|
1354
|
+
continue;
|
|
1355
|
+
try {
|
|
1356
|
+
const event = JSON.parse(dataLine.slice(6));
|
|
1357
|
+
events.push(event);
|
|
1358
|
+
// Notify any matching waiters (iterate a snapshot; matching
|
|
1359
|
+
// waiters are removed from the queue).
|
|
1360
|
+
for (let i = waiters.length - 1; i >= 0; i--) {
|
|
1361
|
+
const waiter = waiters[i];
|
|
1362
|
+
if (waiter && waiter.predicate(event)) {
|
|
1363
|
+
waiters.splice(i, 1);
|
|
1364
|
+
waiter.resolve(event);
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
catch { /* malformed frame */ }
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
catch { /* aborted or connection closed */ }
|
|
1373
|
+
})();
|
|
1374
|
+
return {
|
|
1375
|
+
events,
|
|
1376
|
+
waitFor(predicate, timeoutMs = 5000) {
|
|
1377
|
+
// Check already-buffered events first
|
|
1378
|
+
const already = events.find(predicate);
|
|
1379
|
+
if (already)
|
|
1380
|
+
return Promise.resolve(already);
|
|
1381
|
+
return new Promise((resolve, reject) => {
|
|
1382
|
+
const timer = setTimeout(() => {
|
|
1383
|
+
const idx = waiters.findIndex((w) => w.predicate === predicate);
|
|
1384
|
+
if (idx !== -1)
|
|
1385
|
+
waiters.splice(idx, 1);
|
|
1386
|
+
reject(new Error(`waitFor timed out after ${String(timeoutMs)}ms`));
|
|
1387
|
+
}, timeoutMs);
|
|
1388
|
+
waiters.push({
|
|
1389
|
+
predicate,
|
|
1390
|
+
resolve: (event) => {
|
|
1391
|
+
clearTimeout(timer);
|
|
1392
|
+
resolve(event);
|
|
1393
|
+
},
|
|
1394
|
+
});
|
|
1395
|
+
});
|
|
1396
|
+
},
|
|
1397
|
+
close() {
|
|
1398
|
+
draining = false;
|
|
1399
|
+
controller.abort();
|
|
1400
|
+
reader.cancel().catch(() => { });
|
|
1401
|
+
},
|
|
1402
|
+
};
|
|
1403
|
+
}
|
|
718
1404
|
//# sourceMappingURL=smoke.test.js.map
|