@amodalai/runtime 0.3.17 → 0.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/__fixtures__/smoke.test.js +56 -295
- package/dist/src/__fixtures__/smoke.test.js.map +1 -1
- package/dist/src/__tests__/studio-integration.test.js +10 -3
- package/dist/src/__tests__/studio-integration.test.js.map +1 -1
- package/dist/src/agent/local-server.js +32 -12
- package/dist/src/agent/local-server.js.map +1 -1
- package/dist/src/agent/local-server.test.js +10 -3
- package/dist/src/agent/local-server.test.js.map +1 -1
- package/dist/src/context/compiler.js +9 -0
- package/dist/src/context/compiler.js.map +1 -1
- package/dist/src/context/compiler.test.js +47 -1
- package/dist/src/context/compiler.test.js.map +1 -1
- package/dist/src/context/types.d.ts +3 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +2 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/routes/session-resolver.d.ts +5 -0
- package/dist/src/routes/session-resolver.js +19 -5
- package/dist/src/routes/session-resolver.js.map +1 -1
- package/dist/src/session/session-builder.d.ts +14 -1
- package/dist/src/session/session-builder.js +38 -2
- package/dist/src/session/session-builder.js.map +1 -1
- package/dist/src/session/session-builder.test.js +81 -0
- package/dist/src/session/session-builder.test.js.map +1 -1
- package/dist/src/tools/memory-tool.d.ts +38 -0
- package/dist/src/tools/memory-tool.js +258 -0
- package/dist/src/tools/memory-tool.js.map +1 -0
- package/dist/src/tools/memory-tool.test.d.ts +6 -0
- package/dist/src/tools/memory-tool.test.js +155 -0
- package/dist/src/tools/memory-tool.test.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +4 -4
|
@@ -13,10 +13,11 @@
|
|
|
13
13
|
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
14
14
|
import { fork } from 'node:child_process';
|
|
15
15
|
import { resolve } from 'node:path';
|
|
16
|
-
import { readFileSync, writeFileSync, rmSync
|
|
16
|
+
import { readFileSync, writeFileSync, rmSync } from 'node:fs';
|
|
17
17
|
import { expectDoneReason, expectTotalTokens } from './test-helpers.js';
|
|
18
18
|
import { loadTestEnv, defaultTargetName } from './test-env.js';
|
|
19
19
|
import { VISION_PROVIDERS } from '../providers/types.js';
|
|
20
|
+
import { getDb, agentMemoryEntries, eq } from '@amodalai/db';
|
|
20
21
|
// Pull API keys out of <repo-root>/.env.test (gitignored). Missing keys
|
|
21
22
|
// cause the describe block below to skip with a reason.
|
|
22
23
|
loadTestEnv();
|
|
@@ -131,6 +132,8 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
131
132
|
amodalConfig['models'] = {
|
|
132
133
|
main: { provider: smokeTarget.provider, model: smokeTarget.model },
|
|
133
134
|
};
|
|
135
|
+
// Enable agent memory so the memory smoke test can verify the full pipeline.
|
|
136
|
+
amodalConfig['memory'] = { enabled: true };
|
|
134
137
|
// Enable web_search + fetch_url tools when a Google API key is available.
|
|
135
138
|
// Key resolution happens in the core config parser via env: prefix.
|
|
136
139
|
if (process.env['GOOGLE_API_KEY']) {
|
|
@@ -251,6 +254,52 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
251
254
|
expect(responseText).toContain('SMOKE7742');
|
|
252
255
|
}, TIMEOUT * 2);
|
|
253
256
|
// -------------------------------------------------------------------------
|
|
257
|
+
// 4b. Agent memory — injected into system prompt from DB
|
|
258
|
+
// -------------------------------------------------------------------------
|
|
259
|
+
it('agent responds with facts only present in memory', async () => {
|
|
260
|
+
// Seed the memory entries table directly — avoids depending on the LLM
|
|
261
|
+
// to call memory.add (non-deterministic). The server already migrated
|
|
262
|
+
// the schema, so the table exists. getDb() returns the same singleton.
|
|
263
|
+
const db = getDb();
|
|
264
|
+
// The agent name from amodal.json is used as appId (matches runtime behavior).
|
|
265
|
+
const AGENT_NAME = 'smoke-test-agent';
|
|
266
|
+
const MEMORY_SENTINEL = 'MEMORY_SMOKE_SENTINEL_XK92';
|
|
267
|
+
await db
|
|
268
|
+
.insert(agentMemoryEntries)
|
|
269
|
+
.values({ appId: AGENT_NAME, content: `The user's favorite color is ${MEMORY_SENTINEL}.` });
|
|
270
|
+
// New session — memory entries are loaded fresh from the DB during session creation.
|
|
271
|
+
const { events } = await chat('What is my favorite color? Reply with just the color value, nothing else.');
|
|
272
|
+
const responseText = allText(events);
|
|
273
|
+
expect(responseText).toContain(MEMORY_SENTINEL);
|
|
274
|
+
// Clean up — remove memory entries so other tests start clean
|
|
275
|
+
await db.delete(agentMemoryEntries).where(eq(agentMemoryEntries.appId, AGENT_NAME));
|
|
276
|
+
}, TIMEOUT);
|
|
277
|
+
it('memory round-trip: agent writes entry, next session reads it', async () => {
|
|
278
|
+
// Ask the agent to remember a unique fact. The agent should call memory.add.
|
|
279
|
+
const ROUND_TRIP_SENTINEL = 'ROUNDTRIP_CODE_7749';
|
|
280
|
+
const first = await chat(`Please remember this exact code for me: ${ROUND_TRIP_SENTINEL}. Save it to memory using the memory tool with action "add".`);
|
|
281
|
+
// Verify the agent called the memory tool
|
|
282
|
+
const toolCalls = findEvents(first.events, 'tool_call_start');
|
|
283
|
+
const memoryCall = toolCalls.find((e) => e['tool_name'] === 'memory');
|
|
284
|
+
expect(memoryCall).toBeDefined();
|
|
285
|
+
// Verify the entry was written with the correct appId
|
|
286
|
+
const db = getDb();
|
|
287
|
+
const AGENT_NAME = 'smoke-test-agent';
|
|
288
|
+
const rows = await db
|
|
289
|
+
.select({ appId: agentMemoryEntries.appId, content: agentMemoryEntries.content })
|
|
290
|
+
.from(agentMemoryEntries)
|
|
291
|
+
.where(eq(agentMemoryEntries.appId, AGENT_NAME));
|
|
292
|
+
const match = rows.find((r) => r.content.includes(ROUND_TRIP_SENTINEL));
|
|
293
|
+
expect(match).toBeDefined();
|
|
294
|
+
expect(match?.appId).toBe(AGENT_NAME);
|
|
295
|
+
// New session — the agent should know the code from memory without being told
|
|
296
|
+
const second = await chat('What code did I ask you to remember? Reply with just the code, nothing else.');
|
|
297
|
+
const responseText = allText(second.events);
|
|
298
|
+
expect(responseText).toContain(ROUND_TRIP_SENTINEL);
|
|
299
|
+
// Clean up
|
|
300
|
+
await db.delete(agentMemoryEntries).where(eq(agentMemoryEntries.appId, AGENT_NAME));
|
|
301
|
+
}, TIMEOUT * 3);
|
|
302
|
+
// -------------------------------------------------------------------------
|
|
254
303
|
// 5. Tool call — store
|
|
255
304
|
// -------------------------------------------------------------------------
|
|
256
305
|
it('makes at least one tool call across chat interactions', async () => {
|
|
@@ -292,249 +341,11 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
292
341
|
const errorResult = toolResults.find((e) => e['status'] === 'error');
|
|
293
342
|
expect(errorResult).toBeDefined();
|
|
294
343
|
}, TIMEOUT);
|
|
344
|
+
// Tests for eval run, admin chat, admin file discovery, and admin
|
|
345
|
+
// pagination were removed — those routes moved to Studio. See
|
|
346
|
+
// packages/studio/src/__tests__/smoke.test.ts for Studio-side coverage.
|
|
295
347
|
// -------------------------------------------------------------------------
|
|
296
|
-
// 8.
|
|
297
|
-
// -------------------------------------------------------------------------
|
|
298
|
-
it('runs eval and returns results', async () => {
|
|
299
|
-
const res = await fetch(`http://localhost:${AGENT_PORT}/api/evals/run`, {
|
|
300
|
-
method: 'POST',
|
|
301
|
-
headers: { 'Content-Type': 'application/json' },
|
|
302
|
-
body: JSON.stringify({ evalNames: ['basic-eval'] }),
|
|
303
|
-
signal: AbortSignal.timeout(60_000),
|
|
304
|
-
});
|
|
305
|
-
const text = await res.text();
|
|
306
|
-
const events = [];
|
|
307
|
-
for (const line of text.split('\n')) {
|
|
308
|
-
if (!line.startsWith('data: '))
|
|
309
|
-
continue;
|
|
310
|
-
try {
|
|
311
|
-
events.push(JSON.parse(line.slice(6)));
|
|
312
|
-
}
|
|
313
|
-
catch { /* skip */ }
|
|
314
|
-
}
|
|
315
|
-
const complete = findEvent(events, 'eval_complete');
|
|
316
|
-
expect(complete).toBeDefined();
|
|
317
|
-
expect(complete?.['passed']).toBe(true);
|
|
318
|
-
}, 60_000);
|
|
319
|
-
// -------------------------------------------------------------------------
|
|
320
|
-
// 9. Admin chat — reads repo files
|
|
321
|
-
// -------------------------------------------------------------------------
|
|
322
|
-
it('admin agent can read skill files', async () => {
|
|
323
|
-
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
324
|
-
method: 'POST',
|
|
325
|
-
headers: { 'Content-Type': 'application/json' },
|
|
326
|
-
body: JSON.stringify({ message: 'Read the test-skill skill file and tell me what it says. Be brief.' }),
|
|
327
|
-
signal: AbortSignal.timeout(TIMEOUT),
|
|
328
|
-
});
|
|
329
|
-
const text = await res.text();
|
|
330
|
-
const events = parseSSE(text);
|
|
331
|
-
const init = findEvent(events, 'init');
|
|
332
|
-
expect(init).toBeDefined();
|
|
333
|
-
// Admin agent should use read_repo_file tool
|
|
334
|
-
const toolStarts = findEvents(events, 'tool_call_start');
|
|
335
|
-
const readTool = toolStarts.find((e) => e['tool_name'] === 'read_repo_file');
|
|
336
|
-
expect(readTool).toBeDefined();
|
|
337
|
-
// The matching result should be a success — validates the full
|
|
338
|
-
// tool_call_start → execute → tool_call_result SSE round-trip.
|
|
339
|
-
const toolResults = findEvents(events, 'tool_call_result');
|
|
340
|
-
const readResult = toolResults.find((e) => e['tool_id'] === readTool?.['tool_id']);
|
|
341
|
-
expect(readResult).toBeDefined();
|
|
342
|
-
expect(readResult?.['status']).toBe('success');
|
|
343
|
-
const responseText = allText(events);
|
|
344
|
-
expect(responseText.toLowerCase()).toContain('test');
|
|
345
|
-
}, TIMEOUT);
|
|
346
|
-
// End-to-end: the "reduce emojis in formatting rules" scenario from the
|
|
347
|
-
// admin-agent regression. Before the discovery + edit tools existed, the
|
|
348
|
-
// agent guessed wrong paths and often created a new skill file instead
|
|
349
|
-
// of editing the existing knowledge doc. With list_repo_files /
|
|
350
|
-
// glob_repo_files / grep_repo_files / edit_repo_file available, it
|
|
351
|
-
// should discover knowledge/formatting-rules.md and edit it in place.
|
|
352
|
-
it('admin agent discovers and edits the right file (emoji-reduction scenario)', async () => {
|
|
353
|
-
const formattingRulesPath = resolve(AGENT_DIR, 'knowledge', 'formatting-rules.md');
|
|
354
|
-
const emojiHeavyBody = [
|
|
355
|
-
'# Formatting Rules 🎨',
|
|
356
|
-
'',
|
|
357
|
-
'Use emojis liberally to make the output more engaging! 🎉🎉🎉',
|
|
358
|
-
'',
|
|
359
|
-
'## Tone 💬',
|
|
360
|
-
'',
|
|
361
|
-
"Drop a 🚀 when celebrating a win, a 🔥 when highlighting risk, and a ✨ when introducing a new feature. Don't hold back! 🙌",
|
|
362
|
-
'',
|
|
363
|
-
'Every bullet point should start with an emoji. 📝 Every heading should have one too. 🏷️',
|
|
364
|
-
'',
|
|
365
|
-
'## Examples 📚',
|
|
366
|
-
'- ✅ "Deployment succeeded 🎉"',
|
|
367
|
-
'- ❌ "Deployment failed 💥"',
|
|
368
|
-
'',
|
|
369
|
-
].join('\n');
|
|
370
|
-
const emojiCount = (s) => (s.match(/\p{Emoji_Presentation}/gu) ?? []).length;
|
|
371
|
-
const initialEmojis = emojiCount(emojiHeavyBody);
|
|
372
|
-
expect(initialEmojis).toBeGreaterThan(5);
|
|
373
|
-
writeFileSync(formattingRulesPath, emojiHeavyBody);
|
|
374
|
-
// Snapshot skills/ so we can assert the agent didn't create a bogus skill.
|
|
375
|
-
const skillsDir = resolve(AGENT_DIR, 'skills');
|
|
376
|
-
const skillsBefore = new Set(readdirSync(skillsDir));
|
|
377
|
-
try {
|
|
378
|
-
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
379
|
-
method: 'POST',
|
|
380
|
-
headers: { 'Content-Type': 'application/json' },
|
|
381
|
-
body: JSON.stringify({
|
|
382
|
-
message: 'I want to use emojis less often in my formatting rules. Find where they are defined in my repo and reduce the emoji guidance — remove most emoji usage from the instructions, keep the document but make it plain text. Work carefully: first look around to find the right file, then edit it in place. Do not create any new skills.',
|
|
383
|
-
}),
|
|
384
|
-
signal: AbortSignal.timeout(TIMEOUT * 2),
|
|
385
|
-
});
|
|
386
|
-
const text = await res.text();
|
|
387
|
-
const events = parseSSE(text);
|
|
388
|
-
const toolStarts = findEvents(events, 'tool_call_start');
|
|
389
|
-
const toolNames = toolStarts.map((e) => String(e['tool_name']));
|
|
390
|
-
// Discovery: the agent should have used at least one of the new
|
|
391
|
-
// discovery tools to find formatting-rules.md instead of guessing.
|
|
392
|
-
const usedDiscovery = toolNames.some((n) => n === 'list_repo_files' || n === 'glob_repo_files' || n === 'grep_repo_files');
|
|
393
|
-
expect(usedDiscovery).toBe(true);
|
|
394
|
-
// Action: should edit in place, NOT rewrite the whole file or create
|
|
395
|
-
// a new skill. We allow either edit_repo_file (preferred) or
|
|
396
|
-
// write_repo_file targeting the same path (acceptable).
|
|
397
|
-
const editedInPlace = toolNames.includes('edit_repo_file');
|
|
398
|
-
const rewroteFile = toolNames.includes('write_repo_file');
|
|
399
|
-
expect(editedInPlace || rewroteFile).toBe(true);
|
|
400
|
-
// Regression guard: agent must NOT have created a new skill.
|
|
401
|
-
const skillsAfter = new Set(readdirSync(skillsDir));
|
|
402
|
-
const newSkills = [...skillsAfter].filter((s) => !skillsBefore.has(s));
|
|
403
|
-
expect(newSkills).toEqual([]);
|
|
404
|
-
// Outcome: the file should still exist and contain significantly
|
|
405
|
-
// fewer emojis than before.
|
|
406
|
-
const after = readFileSync(formattingRulesPath, 'utf-8');
|
|
407
|
-
expect(after.length).toBeGreaterThan(0);
|
|
408
|
-
const afterEmojis = emojiCount(after);
|
|
409
|
-
expect(afterEmojis).toBeLessThan(initialEmojis);
|
|
410
|
-
}
|
|
411
|
-
finally {
|
|
412
|
-
// Clean up — remove the formatting-rules.md fixture regardless of pass/fail.
|
|
413
|
-
rmSync(formattingRulesPath, { force: true });
|
|
414
|
-
}
|
|
415
|
-
}, TIMEOUT * 2);
|
|
416
|
-
// Pagination end-to-end: drop a 3000-line file with a sentinel on line
|
|
417
|
-
// 2800, ask the admin agent to report what's there verbatim. The default
|
|
418
|
-
// read cap is 2000 lines, so the agent MUST either paginate via offset
|
|
419
|
-
// or use grep. Verifies the new line_start/line_end/total_lines/
|
|
420
|
-
// truncated response shape is actually usable by a real LLM.
|
|
421
|
-
it('admin agent paginates a long file to reach content past the default cap', async () => {
|
|
422
|
-
const bigFilePath = resolve(AGENT_DIR, 'knowledge', 'big-file.md');
|
|
423
|
-
// Sentinel must be distinct enough that the agent can quote it back.
|
|
424
|
-
const SENTINEL = 'TARGET:CONTENT:ABCD1234:the-answer-is-42';
|
|
425
|
-
const TARGET_LINE = 2800;
|
|
426
|
-
const TOTAL_LINES = 3000;
|
|
427
|
-
const body = Array.from({ length: TOTAL_LINES }, (_, i) => {
|
|
428
|
-
const n = i + 1;
|
|
429
|
-
return n === TARGET_LINE ? `line ${String(n)}: ${SENTINEL}` : `line ${String(n)}: filler`;
|
|
430
|
-
}).join('\n');
|
|
431
|
-
writeFileSync(bigFilePath, body);
|
|
432
|
-
try {
|
|
433
|
-
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
434
|
-
method: 'POST',
|
|
435
|
-
headers: { 'Content-Type': 'application/json' },
|
|
436
|
-
body: JSON.stringify({
|
|
437
|
-
message: `I just added a long file at knowledge/big-file.md. Tell me exactly what's on line ${String(TARGET_LINE)} — report the full line content verbatim. Just give me the line, no summary.`,
|
|
438
|
-
}),
|
|
439
|
-
signal: AbortSignal.timeout(TIMEOUT * 2),
|
|
440
|
-
});
|
|
441
|
-
const text = await res.text();
|
|
442
|
-
const events = parseSSE(text);
|
|
443
|
-
const toolStarts = findEvents(events, 'tool_call_start');
|
|
444
|
-
const toolNames = toolStarts.map((e) => String(e['tool_name']));
|
|
445
|
-
// The agent needs to touch the file — either read_repo_file or
|
|
446
|
-
// grep_repo_files would work to find the target line.
|
|
447
|
-
const touchedFile = toolNames.some((n) => n === 'read_repo_file' || n === 'grep_repo_files');
|
|
448
|
-
expect(touchedFile).toBe(true);
|
|
449
|
-
// If the agent used read_repo_file, at least one call must have
|
|
450
|
-
// specified an offset/limit that covers line 2800 (the default
|
|
451
|
-
// 2000-line window doesn't reach it, so the agent HAS to adapt).
|
|
452
|
-
const readCalls = toolStarts.filter((e) => e['tool_name'] === 'read_repo_file');
|
|
453
|
-
if (readCalls.length > 0) {
|
|
454
|
-
const usedPagination = readCalls.some((e) => {
|
|
455
|
-
const params = e['parameters'];
|
|
456
|
-
if (!params)
|
|
457
|
-
return false;
|
|
458
|
-
const offset = typeof params['offset'] === 'number' ? params['offset'] : 1;
|
|
459
|
-
const limit = typeof params['limit'] === 'number' ? params['limit'] : 2000;
|
|
460
|
-
// Covers line TARGET_LINE if offset <= TARGET_LINE AND
|
|
461
|
-
// offset + limit - 1 >= TARGET_LINE.
|
|
462
|
-
return offset <= TARGET_LINE && offset + limit - 1 >= TARGET_LINE;
|
|
463
|
-
});
|
|
464
|
-
expect(usedPagination).toBe(true);
|
|
465
|
-
}
|
|
466
|
-
// Hard assertion: the response contains the sentinel verbatim.
|
|
467
|
-
const responseText = allText(events);
|
|
468
|
-
expect(responseText).toContain(SENTINEL);
|
|
469
|
-
}
|
|
470
|
-
finally {
|
|
471
|
-
rmSync(bigFilePath, { force: true });
|
|
472
|
-
}
|
|
473
|
-
}, TIMEOUT * 2);
|
|
474
|
-
// Multi-chunk pagination: sentinels spread across a 5000-line file so no
|
|
475
|
-
// single default read (2000 lines) can cover all of them. Verifies the
|
|
476
|
-
// agent either (a) chains multiple reads following the truncated: true
|
|
477
|
-
// signal, or (b) uses grep. Either is acceptable — what matters is that
|
|
478
|
-
// the agent finds content past the default window.
|
|
479
|
-
it('admin agent finds content scattered across a long file via pagination or grep', async () => {
|
|
480
|
-
const bigFilePath = resolve(AGENT_DIR, 'knowledge', 'scatter.md');
|
|
481
|
-
const MARKER = 'MARKER-ZXCV9876';
|
|
482
|
-
const MARKER_LINES = [500, 2500, 4500];
|
|
483
|
-
const TOTAL_LINES = 5000;
|
|
484
|
-
const body = Array.from({ length: TOTAL_LINES }, (_, i) => {
|
|
485
|
-
const n = i + 1;
|
|
486
|
-
return MARKER_LINES.includes(n) ? `line ${String(n)}: ${MARKER}` : `line ${String(n)}: filler`;
|
|
487
|
-
}).join('\n');
|
|
488
|
-
writeFileSync(bigFilePath, body);
|
|
489
|
-
try {
|
|
490
|
-
const res = await fetch(`http://localhost:${AGENT_PORT}/config/chat`, {
|
|
491
|
-
method: 'POST',
|
|
492
|
-
headers: { 'Content-Type': 'application/json' },
|
|
493
|
-
body: JSON.stringify({
|
|
494
|
-
message: `Read knowledge/scatter.md and quote the exact content of line 500, line 2500, and line 4500 verbatim. Report each line's full text.`,
|
|
495
|
-
}),
|
|
496
|
-
signal: AbortSignal.timeout(TIMEOUT * 2),
|
|
497
|
-
});
|
|
498
|
-
const text = await res.text();
|
|
499
|
-
const events = parseSSE(text);
|
|
500
|
-
const toolStarts = findEvents(events, 'tool_call_start');
|
|
501
|
-
const toolNames = toolStarts.map((e) => String(e['tool_name']));
|
|
502
|
-
// Agent must have touched the file.
|
|
503
|
-
const touchedFile = toolNames.some((n) => n === 'read_repo_file' || n === 'grep_repo_files');
|
|
504
|
-
expect(touchedFile).toBe(true);
|
|
505
|
-
// If the agent committed to read-only discovery (no grep), verify at
|
|
506
|
-
// least one read_repo_file call reached past the default 2000-line
|
|
507
|
-
// cap — otherwise it couldn't have seen markers at lines 2500 or
|
|
508
|
-
// 4500. When grep is used first, pagination isn't required because
|
|
509
|
-
// the agent may have used read_repo_file only to confirm a line it
|
|
510
|
-
// already found via grep.
|
|
511
|
-
const usedGrep = toolNames.includes('grep_repo_files');
|
|
512
|
-
const readCalls = toolStarts.filter((e) => e['tool_name'] === 'read_repo_file');
|
|
513
|
-
if (readCalls.length > 0 && !usedGrep) {
|
|
514
|
-
const reachedPastCap = readCalls.some((e) => {
|
|
515
|
-
const params = e['parameters'];
|
|
516
|
-
if (!params)
|
|
517
|
-
return false;
|
|
518
|
-
const offset = typeof params['offset'] === 'number' ? params['offset'] : 1;
|
|
519
|
-
const limit = typeof params['limit'] === 'number' ? params['limit'] : 2000;
|
|
520
|
-
// A single read covers up to line_end = offset + limit - 1.
|
|
521
|
-
return offset + limit - 1 > 2000;
|
|
522
|
-
});
|
|
523
|
-
expect(reachedPastCap).toBe(true);
|
|
524
|
-
}
|
|
525
|
-
// Hard assertion: final response identifies all three marker line
|
|
526
|
-
// numbers. LLMs paraphrase, so search the response for each number.
|
|
527
|
-
const responseText = allText(events);
|
|
528
|
-
for (const n of MARKER_LINES) {
|
|
529
|
-
expect(responseText).toContain(String(n));
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
finally {
|
|
533
|
-
rmSync(bigFilePath, { force: true });
|
|
534
|
-
}
|
|
535
|
-
}, TIMEOUT * 2);
|
|
536
|
-
// -------------------------------------------------------------------------
|
|
537
|
-
// 10. Write intent enforcement (G8)
|
|
348
|
+
// 8. Write intent enforcement (G8)
|
|
538
349
|
// -------------------------------------------------------------------------
|
|
539
350
|
it('rejects POST with intent "read"', async () => {
|
|
540
351
|
const { events } = await chat('Use the request tool to call POST /items on mock-api with intent "read" and data {"name": "test"}. Do not use "write" intent — use exactly "read".');
|
|
@@ -695,17 +506,7 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
695
506
|
expect(responseText).toMatch(/2|two/i);
|
|
696
507
|
}, TIMEOUT);
|
|
697
508
|
// -------------------------------------------------------------------------
|
|
698
|
-
// 15.
|
|
699
|
-
// -------------------------------------------------------------------------
|
|
700
|
-
it('lists eval suites from repo', async () => {
|
|
701
|
-
const res = await fetch(`http://localhost:${AGENT_PORT}/api/evals/suites`, { signal: AbortSignal.timeout(5000) });
|
|
702
|
-
const body = await res.json();
|
|
703
|
-
expect(res.status).toBe(200);
|
|
704
|
-
expect(body.suites.length).toBeGreaterThan(0);
|
|
705
|
-
expect(body.suites[0]?.['name']).toBe('basic-eval');
|
|
706
|
-
});
|
|
707
|
-
// -------------------------------------------------------------------------
|
|
708
|
-
// 16. Inspect endpoint — connection health
|
|
509
|
+
// 15. Inspect endpoint — connection health
|
|
709
510
|
// -------------------------------------------------------------------------
|
|
710
511
|
it('inspect shows connection status', async () => {
|
|
711
512
|
const res = await fetch(`http://localhost:${AGENT_PORT}/inspect/context`, { signal: AbortSignal.timeout(10000) });
|
|
@@ -864,7 +665,7 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
864
665
|
expect(found).toBeDefined();
|
|
865
666
|
if (!found)
|
|
866
667
|
throw new Error('unreachable');
|
|
867
|
-
expect(found['appId']).toBe('
|
|
668
|
+
expect(found['appId']).toBe('smoke-test-agent');
|
|
868
669
|
expect(typeof found['summary']).toBe('string');
|
|
869
670
|
expect(String(found['summary']).length).toBeGreaterThan(0);
|
|
870
671
|
expect(typeof found['createdAt']).toBe('number');
|
|
@@ -1173,31 +974,6 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
1173
974
|
stream.close();
|
|
1174
975
|
}
|
|
1175
976
|
}, TIMEOUT);
|
|
1176
|
-
it('emits automation_started and automation_stopped', async () => {
|
|
1177
|
-
// The smoke agent's test-auto has no cron schedule, so start will fail.
|
|
1178
|
-
// That's fine — we want to verify the happy path when a schedulable
|
|
1179
|
-
// automation exists. Skip if none are available.
|
|
1180
|
-
const listRes = await fetch(`http://localhost:${AGENT_PORT}/automations`);
|
|
1181
|
-
const listBody = await listRes.json();
|
|
1182
|
-
const schedulable = listBody.automations.find((a) => a.schedule);
|
|
1183
|
-
if (!schedulable) {
|
|
1184
|
-
return; // smoke agent has no scheduled automation — skip
|
|
1185
|
-
}
|
|
1186
|
-
const stream = await openEventStream();
|
|
1187
|
-
try {
|
|
1188
|
-
const startRes = await fetch(`http://localhost:${AGENT_PORT}/automations/${schedulable.name}/start`, { method: 'POST', signal: AbortSignal.timeout(5000) });
|
|
1189
|
-
if (startRes.status !== 200)
|
|
1190
|
-
return; // not a schedulable automation
|
|
1191
|
-
const started = await stream.waitFor((e) => e['type'] === 'automation_started' && e['name'] === schedulable.name, 5000);
|
|
1192
|
-
expect(typeof started['intervalMs']).toBe('number');
|
|
1193
|
-
await fetch(`http://localhost:${AGENT_PORT}/automations/${schedulable.name}/stop`, { method: 'POST', signal: AbortSignal.timeout(5000) });
|
|
1194
|
-
const stopped = await stream.waitFor((e) => e['type'] === 'automation_stopped' && e['name'] === schedulable.name, 5000);
|
|
1195
|
-
expect(stopped['name']).toBe(schedulable.name);
|
|
1196
|
-
}
|
|
1197
|
-
finally {
|
|
1198
|
-
stream.close();
|
|
1199
|
-
}
|
|
1200
|
-
}, TIMEOUT);
|
|
1201
977
|
it('fans out the same event to all concurrent clients (two-tab case)', async () => {
|
|
1202
978
|
// Two independent SSE connections — the "two browser tabs" scenario.
|
|
1203
979
|
// Every event emitted by the server should reach BOTH clients with
|
|
@@ -1311,21 +1087,6 @@ describe.skipIf(!!skipReason)(`smoke tests [${smokeTargetName}]`, () => {
|
|
|
1311
1087
|
expect(res.status).toBe(400);
|
|
1312
1088
|
});
|
|
1313
1089
|
});
|
|
1314
|
-
// ---------------------------------------------------------------------------
|
|
1315
|
-
// SSE parser helper
|
|
1316
|
-
// ---------------------------------------------------------------------------
|
|
1317
|
-
function parseSSE(text) {
|
|
1318
|
-
const events = [];
|
|
1319
|
-
for (const line of text.split('\n')) {
|
|
1320
|
-
if (!line.startsWith('data: '))
|
|
1321
|
-
continue;
|
|
1322
|
-
try {
|
|
1323
|
-
events.push(JSON.parse(line.slice(6)));
|
|
1324
|
-
}
|
|
1325
|
-
catch { /* skip */ }
|
|
1326
|
-
}
|
|
1327
|
-
return events;
|
|
1328
|
-
}
|
|
1329
1090
|
async function openEventStream(options = {}) {
|
|
1330
1091
|
const controller = new AbortController();
|
|
1331
1092
|
const headers = { Accept: 'text/event-stream' };
|