@amodalai/runtime 0.2.10 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/__fixtures__/e2e.test.js +2 -2
- package/dist/src/__fixtures__/e2e.test.js.map +1 -1
- package/dist/src/__fixtures__/smoke.test.js +0 -88
- package/dist/src/__fixtures__/smoke.test.js.map +1 -1
- package/dist/src/__tests__/studio-integration.test.js +298 -0
- package/dist/src/__tests__/studio-integration.test.js.map +1 -0
- package/dist/src/agent/agent-types.d.ts +4 -0
- package/dist/src/agent/feedback-store.d.ts +11 -10
- package/dist/src/agent/feedback-store.js +147 -75
- package/dist/src/agent/feedback-store.js.map +1 -1
- package/dist/src/agent/local-server.js +30 -111
- package/dist/src/agent/local-server.js.map +1 -1
- package/dist/src/agent/local-server.test.js +17 -1
- package/dist/src/agent/local-server.test.js.map +1 -1
- package/dist/src/agent/routes/context.d.ts +24 -0
- package/dist/src/agent/routes/context.js +30 -0
- package/dist/src/agent/routes/context.js.map +1 -0
- package/dist/src/agent/routes/feedback.js +28 -56
- package/dist/src/agent/routes/feedback.js.map +1 -1
- package/dist/src/api/create-agent.js +8 -4
- package/dist/src/api/create-agent.js.map +1 -1
- package/dist/src/api/types.d.ts +1 -1
- package/dist/src/channels/channel-session-mapper.js +1 -1
- package/dist/src/channels/channel-session-mapper.js.map +1 -1
- package/dist/src/config.d.ts +2 -2
- package/dist/src/config.js +2 -1
- package/dist/src/config.js.map +1 -1
- package/dist/src/config.test.js +1 -1
- package/dist/src/config.test.js.map +1 -1
- package/dist/src/errors.d.ts +2 -2
- package/dist/src/errors.js +2 -2
- package/dist/src/index.d.ts +0 -3
- package/dist/src/index.js +0 -3
- package/dist/src/index.js.map +1 -1
- package/dist/src/session/drizzle-session-store.d.ts +4 -6
- package/dist/src/session/drizzle-session-store.js +15 -5
- package/dist/src/session/drizzle-session-store.js.map +1 -1
- package/dist/src/session/manager.js +1 -1
- package/dist/src/session/manager.test.js +7 -5
- package/dist/src/session/manager.test.js.map +1 -1
- package/dist/src/session/postgres-session-store.d.ts +3 -24
- package/dist/src/session/postgres-session-store.js +9 -128
- package/dist/src/session/postgres-session-store.js.map +1 -1
- package/dist/src/session/session-builder.d.ts +0 -4
- package/dist/src/session/session-builder.js +2 -9
- package/dist/src/session/session-builder.js.map +1 -1
- package/dist/src/session/session-builder.test.js +0 -25
- package/dist/src/session/session-builder.test.js.map +1 -1
- package/dist/src/session/session-store-selector.d.ts +11 -26
- package/dist/src/session/session-store-selector.js +3 -48
- package/dist/src/session/session-store-selector.js.map +1 -1
- package/dist/src/session/session-store-selector.test.js +5 -57
- package/dist/src/session/session-store-selector.test.js.map +1 -1
- package/dist/src/session/store.d.ts +8 -14
- package/dist/src/session/store.js +8 -10
- package/dist/src/session/store.js.map +1 -1
- package/dist/src/session/store.test.js +6 -126
- package/dist/src/session/store.test.js.map +1 -1
- package/dist/src/session/tool-context-factory.js +1 -1
- package/dist/src/session/tool-context-factory.js.map +1 -1
- package/dist/src/stores/drizzle-store-backend.d.ts +5 -0
- package/dist/src/stores/drizzle-store-backend.js +23 -3
- package/dist/src/stores/drizzle-store-backend.js.map +1 -1
- package/dist/src/stores/drizzle-store-backend.test.js +10 -58
- package/dist/src/stores/drizzle-store-backend.test.js.map +1 -1
- package/dist/src/stores/index.d.ts +0 -2
- package/dist/src/stores/index.js +0 -1
- package/dist/src/stores/index.js.map +1 -1
- package/dist/src/stores/postgres-store-backend.d.ts +5 -15
- package/dist/src/stores/postgres-store-backend.js +14 -72
- package/dist/src/stores/postgres-store-backend.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +4 -6
- package/dist/src/agent/automation-bridge.d.ts +0 -33
- package/dist/src/agent/automation-bridge.js +0 -50
- package/dist/src/agent/automation-bridge.js.map +0 -1
- package/dist/src/agent/automation-bridge.test.d.ts +0 -6
- package/dist/src/agent/automation-bridge.test.js +0 -130
- package/dist/src/agent/automation-bridge.test.js.map +0 -1
- package/dist/src/agent/eval-store.d.ts +0 -50
- package/dist/src/agent/eval-store.js +0 -137
- package/dist/src/agent/eval-store.js.map +0 -1
- package/dist/src/agent/proactive/delivery-router.d.ts +0 -68
- package/dist/src/agent/proactive/delivery-router.js +0 -337
- package/dist/src/agent/proactive/delivery-router.js.map +0 -1
- package/dist/src/agent/proactive/delivery-router.test.js +0 -455
- package/dist/src/agent/proactive/delivery-router.test.js.map +0 -1
- package/dist/src/agent/proactive/delivery.d.ts +0 -21
- package/dist/src/agent/proactive/delivery.js +0 -68
- package/dist/src/agent/proactive/delivery.js.map +0 -1
- package/dist/src/agent/proactive/delivery.test.d.ts +0 -6
- package/dist/src/agent/proactive/delivery.test.js +0 -65
- package/dist/src/agent/proactive/delivery.test.js.map +0 -1
- package/dist/src/agent/proactive/proactive-runner.d.ts +0 -129
- package/dist/src/agent/proactive/proactive-runner.js +0 -301
- package/dist/src/agent/proactive/proactive-runner.js.map +0 -1
- package/dist/src/agent/proactive/proactive-runner.test.d.ts +0 -6
- package/dist/src/agent/proactive/proactive-runner.test.js +0 -250
- package/dist/src/agent/proactive/proactive-runner.test.js.map +0 -1
- package/dist/src/agent/routes/admin-chat-abort.test.d.ts +0 -6
- package/dist/src/agent/routes/admin-chat-abort.test.js +0 -207
- package/dist/src/agent/routes/admin-chat-abort.test.js.map +0 -1
- package/dist/src/agent/routes/admin-chat.d.ts +0 -28
- package/dist/src/agent/routes/admin-chat.js +0 -110
- package/dist/src/agent/routes/admin-chat.js.map +0 -1
- package/dist/src/agent/routes/automations.d.ts +0 -19
- package/dist/src/agent/routes/automations.js +0 -86
- package/dist/src/agent/routes/automations.js.map +0 -1
- package/dist/src/agent/routes/automations.test.d.ts +0 -6
- package/dist/src/agent/routes/automations.test.js +0 -117
- package/dist/src/agent/routes/automations.test.js.map +0 -1
- package/dist/src/agent/routes/evals.d.ts +0 -17
- package/dist/src/agent/routes/evals.js +0 -389
- package/dist/src/agent/routes/evals.js.map +0 -1
- package/dist/src/agent/routes/webhooks.d.ts +0 -17
- package/dist/src/agent/routes/webhooks.js +0 -63
- package/dist/src/agent/routes/webhooks.js.map +0 -1
- package/dist/src/agent/routes/webhooks.test.d.ts +0 -6
- package/dist/src/agent/routes/webhooks.test.js +0 -100
- package/dist/src/agent/routes/webhooks.test.js.map +0 -1
- package/dist/src/session/pglite-session-store.d.ts +0 -23
- package/dist/src/session/pglite-session-store.js +0 -92
- package/dist/src/session/pglite-session-store.js.map +0 -1
- package/dist/src/stores/pglite-store-backend.d.ts +0 -39
- package/dist/src/stores/pglite-store-backend.js +0 -128
- package/dist/src/stores/pglite-store-backend.js.map +0 -1
- package/dist/src/stores/pglite-store-backend.test.d.ts +0 -6
- package/dist/src/stores/pglite-store-backend.test.js +0 -150
- package/dist/src/stores/pglite-store-backend.test.js.map +0 -1
- package/dist/src/stores/schema.d.ts +0 -593
- package/dist/src/stores/schema.js +0 -75
- package/dist/src/stores/schema.js.map +0 -1
- package/dist/src/tools/admin-file-tools.d.ts +0 -42
- package/dist/src/tools/admin-file-tools.js +0 -714
- package/dist/src/tools/admin-file-tools.js.map +0 -1
- package/dist/src/tools/admin-file-tools.test.d.ts +0 -6
- package/dist/src/tools/admin-file-tools.test.js +0 -523
- package/dist/src/tools/admin-file-tools.test.js.map +0 -1
- /package/dist/src/{agent/proactive/delivery-router.test.d.ts → __tests__/studio-integration.test.d.ts} +0 -0
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license
|
|
3
|
-
* Copyright 2025 Amodal Labs, Inc.
|
|
4
|
-
* SPDX-License-Identifier: MIT
|
|
5
|
-
*/
|
|
6
|
-
import { Router } from 'express';
|
|
7
|
-
import type { ProactiveRunner } from '../proactive/proactive-runner.js';
|
|
8
|
-
export interface AutomationRouterOptions {
|
|
9
|
-
runner: ProactiveRunner;
|
|
10
|
-
}
|
|
11
|
-
/**
|
|
12
|
-
* Creates routes for listing, starting, stopping, and triggering automations.
|
|
13
|
-
*
|
|
14
|
-
* GET /automations — list all registered automations
|
|
15
|
-
* POST /automations/:name/start — start a cron automation
|
|
16
|
-
* POST /automations/:name/stop — stop a running cron automation
|
|
17
|
-
* POST /automations/:name/run — manually trigger an automation
|
|
18
|
-
*/
|
|
19
|
-
export declare function createAutomationRouter(options: AutomationRouterOptions): Router;
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license
|
|
3
|
-
* Copyright 2025 Amodal Labs, Inc.
|
|
4
|
-
* SPDX-License-Identifier: MIT
|
|
5
|
-
*/
|
|
6
|
-
import { Router } from 'express';
|
|
7
|
-
import { asyncHandler } from '../../routes/route-helpers.js';
|
|
8
|
-
/**
|
|
9
|
-
* Creates routes for listing, starting, stopping, and triggering automations.
|
|
10
|
-
*
|
|
11
|
-
* GET /automations — list all registered automations
|
|
12
|
-
* POST /automations/:name/start — start a cron automation
|
|
13
|
-
* POST /automations/:name/stop — stop a running cron automation
|
|
14
|
-
* POST /automations/:name/run — manually trigger an automation
|
|
15
|
-
*/
|
|
16
|
-
export function createAutomationRouter(options) {
|
|
17
|
-
const router = Router();
|
|
18
|
-
router.get('/automations', (_req, res) => {
|
|
19
|
-
const automations = options.runner.listAutomations();
|
|
20
|
-
res.json({ automations });
|
|
21
|
-
});
|
|
22
|
-
router.post('/automations/:name/start', (req, res) => {
|
|
23
|
-
const name = req.params['name'] ?? '';
|
|
24
|
-
const result = options.runner.startAutomation(name);
|
|
25
|
-
if (!result.success) {
|
|
26
|
-
res.status(400).json({ error: result.error });
|
|
27
|
-
return;
|
|
28
|
-
}
|
|
29
|
-
res.json({ status: 'started', automation: name });
|
|
30
|
-
});
|
|
31
|
-
router.post('/automations/:name/stop', (req, res) => {
|
|
32
|
-
const name = req.params['name'] ?? '';
|
|
33
|
-
const result = options.runner.stopAutomation(name);
|
|
34
|
-
if (!result.success) {
|
|
35
|
-
res.status(400).json({ error: result.error });
|
|
36
|
-
return;
|
|
37
|
-
}
|
|
38
|
-
res.json({ status: 'stopped', automation: name });
|
|
39
|
-
});
|
|
40
|
-
router.post('/automations/:name/run', asyncHandler(async (req, res) => {
|
|
41
|
-
const name = req.params['name'] ?? '';
|
|
42
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- Express body parsing
|
|
43
|
-
const payload = (req.body ?? {});
|
|
44
|
-
try {
|
|
45
|
-
const result = await options.runner.triggerAutomation(name, payload);
|
|
46
|
-
if (!result.success) {
|
|
47
|
-
res.status(result.error?.toLowerCase().includes('not found') ? 404 : 500).json({ status: 'error', automation: name, error: result.error });
|
|
48
|
-
return;
|
|
49
|
-
}
|
|
50
|
-
res.json({ status: 'completed', automation: name });
|
|
51
|
-
}
|
|
52
|
-
catch (err) {
|
|
53
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
54
|
-
res.status(500).json({ error: msg });
|
|
55
|
-
}
|
|
56
|
-
}));
|
|
57
|
-
// SSE streaming endpoint for live automation runs
|
|
58
|
-
router.post('/automations/:name/stream', asyncHandler(async (req, res) => {
|
|
59
|
-
const name = req.params['name'] ?? '';
|
|
60
|
-
// SSE headers
|
|
61
|
-
res.writeHead(200, {
|
|
62
|
-
'Content-Type': 'text/event-stream',
|
|
63
|
-
'Cache-Control': 'no-cache',
|
|
64
|
-
'Connection': 'keep-alive',
|
|
65
|
-
});
|
|
66
|
-
try {
|
|
67
|
-
const stream = options.runner.streamAutomation(name);
|
|
68
|
-
if (!stream) {
|
|
69
|
-
res.write(`data: ${JSON.stringify({ type: 'error', message: `Automation "${name}" not found` })}\n\n`);
|
|
70
|
-
res.end();
|
|
71
|
-
return;
|
|
72
|
-
}
|
|
73
|
-
for await (const event of stream) {
|
|
74
|
-
res.write(`data: ${JSON.stringify(event)}\n\n`);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
catch (err) {
|
|
78
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
79
|
-
res.write(`data: ${JSON.stringify({ type: 'error', message: msg })}\n\n`);
|
|
80
|
-
}
|
|
81
|
-
res.write(`data: ${JSON.stringify({ type: 'done', timestamp: new Date().toISOString() })}\n\n`);
|
|
82
|
-
res.end();
|
|
83
|
-
}));
|
|
84
|
-
return router;
|
|
85
|
-
}
|
|
86
|
-
//# sourceMappingURL=automations.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"automations.js","sourceRoot":"","sources":["../../../../src/agent/routes/automations.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAC,MAAM,EAAC,MAAM,SAAS,CAAC;AAG/B,OAAO,EAAC,YAAY,EAAC,MAAM,+BAA+B,CAAC;AAM3D;;;;;;;GAOG;AACH,MAAM,UAAU,sBAAsB,CAAC,OAAgC;IACrE,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,MAAM,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;QAC1D,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;QACrD,GAAG,CAAC,IAAI,CAAC,EAAC,WAAW,EAAC,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,0BAA0B,EAAE,CAAC,GAAY,EAAE,GAAa,EAAE,EAAE;QACtE,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,KAAK,EAAE,MAAM,CAAC,KAAK,EAAC,CAAC,CAAC;YAC5C,OAAO;QACT,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,yBAAyB,EAAE,CAAC,GAAY,EAAE,GAAa,EAAE,EAAE;QACrE,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,KAAK,EAAE,MAAM,CAAC,KAAK,EAAC,CAAC,CAAC;YAC5C,OAAO;QACT,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE,YAAY,CAAC,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QACvF,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,+FAA+F;QAC/F,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAA4B,CAAC;QAE5D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACrE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAC,CAAC,CAAC;gBACzI,OAAO;YACT,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAC,CAAC,CAAC;QACpD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,KAAK,EAAE,GAAG,EAAC,CAAC,CAAC;QACrC,CAAC;IACH,CAAC,CAAC,CAAC,CAAC;IAEJ,kDAAkD;IAClD,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE,YAAY,CAAC,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QAC1F,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QAEtC,cAAc;QACd,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE;YACjB,cAAc,EAAE,mBAAmB;YACnC,eAAe,EAAE,UAAU;YAC3B,YAAY,EAAE,YAAY;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACrD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,EAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,eAAe,IAAI,aAAa,EAAC,CAAC,MAAM,CAAC,CAAC;gBACrG,GAAG,CAAC,GAAG,EAAE,CAAC;gBACV,OAAO;YACT,CAAC;YAED,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACjC,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,EAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,EAAC,CAAC,MAAM,CAAC,CAAC;QAC1E,CAAC;QAED,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,EAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAC,CAAC,MAAM,CAAC,CAAC;QAC9F,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC,CAAC;IAEJ,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license
|
|
3
|
-
* Copyright 2025 Amodal Labs, Inc.
|
|
4
|
-
* SPDX-License-Identifier: MIT
|
|
5
|
-
*/
|
|
6
|
-
import { describe, it, expect, vi } from 'vitest';
|
|
7
|
-
import express from 'express';
|
|
8
|
-
import request from 'supertest';
|
|
9
|
-
import { createAutomationRouter } from './automations.js';
|
|
10
|
-
function makeMockRunner(overrides) {
|
|
11
|
-
return {
|
|
12
|
-
start: vi.fn(),
|
|
13
|
-
stop: vi.fn(),
|
|
14
|
-
startAutomation: vi.fn().mockReturnValue({ success: true }),
|
|
15
|
-
stopAutomation: vi.fn().mockReturnValue({ success: true }),
|
|
16
|
-
listAutomations: vi.fn().mockReturnValue([
|
|
17
|
-
{ name: 'daily-check', title: 'Daily Check', schedule: '0 9 * * *', webhookTriggered: false, running: false },
|
|
18
|
-
{ name: 'alert-handler', title: 'Alert Handler', webhookTriggered: true, running: true },
|
|
19
|
-
]),
|
|
20
|
-
handleWebhook: vi.fn().mockResolvedValue({ matched: true }),
|
|
21
|
-
triggerAutomation: vi.fn().mockResolvedValue({ success: true }),
|
|
22
|
-
...overrides,
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
function createApp(runner) {
|
|
26
|
-
const app = express();
|
|
27
|
-
app.use(express.json());
|
|
28
|
-
app.use(createAutomationRouter({ runner }));
|
|
29
|
-
return app;
|
|
30
|
-
}
|
|
31
|
-
describe('repo-automations routes', () => {
|
|
32
|
-
it('should list automations with running state', async () => {
|
|
33
|
-
const runner = makeMockRunner();
|
|
34
|
-
const app = createApp(runner);
|
|
35
|
-
const res = await request(app).get('/automations');
|
|
36
|
-
expect(res.status).toBe(200);
|
|
37
|
-
expect(res.body.automations).toHaveLength(2);
|
|
38
|
-
expect(res.body.automations[0].name).toBe('daily-check');
|
|
39
|
-
expect(res.body.automations[0].running).toBe(false);
|
|
40
|
-
expect(res.body.automations[1].running).toBe(true);
|
|
41
|
-
});
|
|
42
|
-
it('should start an automation', async () => {
|
|
43
|
-
const runner = makeMockRunner();
|
|
44
|
-
const app = createApp(runner);
|
|
45
|
-
const res = await request(app).post('/automations/daily-check/start');
|
|
46
|
-
expect(res.status).toBe(200);
|
|
47
|
-
expect(res.body.status).toBe('started');
|
|
48
|
-
expect(runner.startAutomation).toHaveBeenCalledWith('daily-check');
|
|
49
|
-
});
|
|
50
|
-
it('should return 400 when start fails', async () => {
|
|
51
|
-
const runner = makeMockRunner({
|
|
52
|
-
startAutomation: vi.fn().mockReturnValue({ success: false, error: 'Already running' }),
|
|
53
|
-
});
|
|
54
|
-
const app = createApp(runner);
|
|
55
|
-
const res = await request(app).post('/automations/daily-check/start');
|
|
56
|
-
expect(res.status).toBe(400);
|
|
57
|
-
expect(res.body.error).toBe('Already running');
|
|
58
|
-
});
|
|
59
|
-
it('should stop an automation', async () => {
|
|
60
|
-
const runner = makeMockRunner();
|
|
61
|
-
const app = createApp(runner);
|
|
62
|
-
const res = await request(app).post('/automations/daily-check/stop');
|
|
63
|
-
expect(res.status).toBe(200);
|
|
64
|
-
expect(res.body.status).toBe('stopped');
|
|
65
|
-
expect(runner.stopAutomation).toHaveBeenCalledWith('daily-check');
|
|
66
|
-
});
|
|
67
|
-
it('should return 400 when stop fails', async () => {
|
|
68
|
-
const runner = makeMockRunner({
|
|
69
|
-
stopAutomation: vi.fn().mockReturnValue({ success: false, error: 'Not running' }),
|
|
70
|
-
});
|
|
71
|
-
const app = createApp(runner);
|
|
72
|
-
const res = await request(app).post('/automations/daily-check/stop');
|
|
73
|
-
expect(res.status).toBe(400);
|
|
74
|
-
expect(res.body.error).toBe('Not running');
|
|
75
|
-
});
|
|
76
|
-
it('should trigger automation manually', async () => {
|
|
77
|
-
const runner = makeMockRunner();
|
|
78
|
-
const app = createApp(runner);
|
|
79
|
-
const res = await request(app)
|
|
80
|
-
.post('/automations/daily-check/run')
|
|
81
|
-
.send({});
|
|
82
|
-
expect(res.status).toBe(200);
|
|
83
|
-
expect(res.body.status).toBe('completed');
|
|
84
|
-
expect(runner.triggerAutomation).toHaveBeenCalledWith('daily-check', {});
|
|
85
|
-
});
|
|
86
|
-
it('should return 404 for unknown automation', async () => {
|
|
87
|
-
const runner = makeMockRunner({
|
|
88
|
-
triggerAutomation: vi.fn().mockResolvedValue({ success: false, error: 'Not found' }),
|
|
89
|
-
});
|
|
90
|
-
const app = createApp(runner);
|
|
91
|
-
const res = await request(app)
|
|
92
|
-
.post('/automations/unknown/run')
|
|
93
|
-
.send({});
|
|
94
|
-
expect(res.status).toBe(404);
|
|
95
|
-
expect(res.body.error).toContain('Not found');
|
|
96
|
-
});
|
|
97
|
-
it('should pass payload to triggerAutomation', async () => {
|
|
98
|
-
const runner = makeMockRunner();
|
|
99
|
-
const app = createApp(runner);
|
|
100
|
-
await request(app)
|
|
101
|
-
.post('/automations/daily-check/run')
|
|
102
|
-
.send({ context: 'manual-trigger' });
|
|
103
|
-
expect(runner.triggerAutomation).toHaveBeenCalledWith('daily-check', { context: 'manual-trigger' });
|
|
104
|
-
});
|
|
105
|
-
it('should handle trigger errors', async () => {
|
|
106
|
-
const runner = makeMockRunner({
|
|
107
|
-
triggerAutomation: vi.fn().mockRejectedValue(new Error('Runtime failure')),
|
|
108
|
-
});
|
|
109
|
-
const app = createApp(runner);
|
|
110
|
-
const res = await request(app)
|
|
111
|
-
.post('/automations/daily-check/run')
|
|
112
|
-
.send({});
|
|
113
|
-
expect(res.status).toBe(500);
|
|
114
|
-
expect(res.body.error).toBe('Runtime failure');
|
|
115
|
-
});
|
|
116
|
-
});
|
|
117
|
-
//# sourceMappingURL=automations.test.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"automations.test.js","sourceRoot":"","sources":["../../../../src/agent/routes/automations.test.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAC,MAAM,QAAQ,CAAC;AAChD,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,OAAO,MAAM,WAAW,CAAC;AAChC,OAAO,EAAC,sBAAsB,EAAC,MAAM,kBAAkB,CAAC;AAGxD,SAAS,cAAc,CAAC,SAAoC;IAC1D,OAAO;QACL,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE;QACb,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QACzD,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QACxD,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC;YACvC,EAAC,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,EAAE,gBAAgB,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAC;YAC3G,EAAC,IAAI,EAAE,eAAe,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAC;SACvF,CAAC;QACF,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QACzD,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QAC7D,GAAG,SAAS;KACiB,CAAC;AAClC,CAAC;AAED,SAAS,SAAS,CAAC,MAAuB;IACxC,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;IACtB,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,sBAAsB,CAAC,EAAC,MAAM,EAAC,CAAC,CAAC,CAAC;IAC1C,OAAO,GAAG,CAAC;AACb,CAAC;AAED,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QAEnD,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACzD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAEtE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,oBAAoB,CAAC,aAAa,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,iBAAiB,EAAC,CAAC;SAC9C,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAEtE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;QACzC,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAErE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,oBAAoB,CAAC,aAAa,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAC,CAAC;SACzC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAErE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;aAC3B,IAAI,CAAC,8BAA8B,CAAC;aACpC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,oBAAoB,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAC3E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,WAAW,EAAC,CAAC;SAC5C,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;aAC3B,IAAI,CAAC,0BAA0B,CAAC;aAChC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,OAAO,CAAC,GAAG,CAAC;aACf,IAAI,CAAC,8BAA8B,CAAC;aACpC,IAAI,CAAC,EAAC,OAAO,EAAE,gBAAgB,EAAC,CAAC,CAAC;QAErC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,oBAAoB,CAAC,aAAa,EAAE,EAAC,OAAO,EAAE,gBAAgB,EAAC,CAAC,CAAC;IACpG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;QAC5C,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;SACpC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;aAC3B,IAAI,CAAC,8BAA8B,CAAC;aACpC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license
|
|
3
|
-
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
-
* SPDX-License-Identifier: MIT
|
|
5
|
-
*/
|
|
6
|
-
import { Router } from 'express';
|
|
7
|
-
import type { AgentBundle } from '@amodalai/types';
|
|
8
|
-
import type { EvalStore } from '../eval-store.js';
|
|
9
|
-
export interface EvalRouterOptions {
|
|
10
|
-
/** Returns the current agent bundle (replaces sessionManager.getBundle()) */
|
|
11
|
-
getBundle: () => AgentBundle | undefined;
|
|
12
|
-
evalStore: EvalStore;
|
|
13
|
-
repoPath: string;
|
|
14
|
-
/** Port the server is listening on — used by eval query provider to call /chat */
|
|
15
|
-
getPort: () => number | null;
|
|
16
|
-
}
|
|
17
|
-
export declare function createEvalRouter(options: EvalRouterOptions): Router;
|
|
@@ -1,389 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license
|
|
3
|
-
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
-
* SPDX-License-Identifier: MIT
|
|
5
|
-
*/
|
|
6
|
-
import { Router } from 'express';
|
|
7
|
-
import { buildEvalRun, judgeAllAssertions, computeEvalCost, aggregateRunCost, createRuntimeProvider } from '@amodalai/core';
|
|
8
|
-
import { SSEEventType } from '../../types.js';
|
|
9
|
-
import { asyncHandler } from '../../routes/route-helpers.js';
|
|
10
|
-
async function streamQuery(baseUrl, message, evalRes, evalName, appId, sessionId) {
|
|
11
|
-
const chatRes = await fetch(`${baseUrl}/chat`, {
|
|
12
|
-
method: 'POST',
|
|
13
|
-
headers: { 'Content-Type': 'application/json' },
|
|
14
|
-
body: JSON.stringify({ message, app_id: appId ?? 'eval-runner', ...(sessionId ? { session_id: sessionId } : {}) }),
|
|
15
|
-
});
|
|
16
|
-
const text = await chatRes.text();
|
|
17
|
-
const lines = text.split('\n');
|
|
18
|
-
let fullResponse = '';
|
|
19
|
-
const toolCalls = [];
|
|
20
|
-
const toolResults = [];
|
|
21
|
-
let usage;
|
|
22
|
-
let queryError;
|
|
23
|
-
for (const line of lines) {
|
|
24
|
-
if (!line.startsWith('data: '))
|
|
25
|
-
continue;
|
|
26
|
-
try {
|
|
27
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- SSE parsing
|
|
28
|
-
const event = JSON.parse(line.substring(6));
|
|
29
|
-
const eventType = String(event['type'] ?? '');
|
|
30
|
-
if (eventType === SSEEventType.TextDelta) {
|
|
31
|
-
const content = String(event['content'] ?? '');
|
|
32
|
-
fullResponse += content;
|
|
33
|
-
writeSSE(evalRes, { type: 'agent_text', evalName, content });
|
|
34
|
-
}
|
|
35
|
-
else if (eventType === SSEEventType.ToolCallStart) {
|
|
36
|
-
const params = (event['parameters'] ?? {}); // eslint-disable-line @typescript-eslint/no-unsafe-type-assertion
|
|
37
|
-
toolCalls.push({ name: String(event['tool_name'] ?? ''), parameters: params });
|
|
38
|
-
writeSSE(evalRes, { type: 'agent_tool', evalName, toolName: event['tool_name'], parameters: params });
|
|
39
|
-
}
|
|
40
|
-
else if (eventType === SSEEventType.ToolCallResult) {
|
|
41
|
-
const resultRaw = String(event['result'] ?? event['error'] ?? '');
|
|
42
|
-
toolResults.push(`${String(event['tool_name'] ?? 'request')}: ${resultRaw}`);
|
|
43
|
-
writeSSE(evalRes, { type: 'agent_tool_result', evalName, toolName: event['tool_name'] ?? 'request', status: event['status'], durationMs: event['duration_ms'] });
|
|
44
|
-
}
|
|
45
|
-
else if (eventType === SSEEventType.Error) {
|
|
46
|
-
queryError = String(event['message'] ?? event['error'] ?? 'Unknown error');
|
|
47
|
-
writeSSE(evalRes, { type: 'agent_error', evalName, error: queryError });
|
|
48
|
-
}
|
|
49
|
-
else if (eventType === SSEEventType.Done) {
|
|
50
|
-
const u = (event['usage'] ?? {});
|
|
51
|
-
// Accumulate tokens across multiple done events (multi-turn agent loops
|
|
52
|
-
// may emit one done per turn in the session runner)
|
|
53
|
-
if ((u.input_tokens ?? 0) > 0 || (u.output_tokens ?? 0) > 0 || (u.cached_tokens ?? 0) > 0) {
|
|
54
|
-
if (!usage) {
|
|
55
|
-
usage = { inputTokens: 0, outputTokens: 0 };
|
|
56
|
-
}
|
|
57
|
-
usage.inputTokens += u.input_tokens ?? 0;
|
|
58
|
-
usage.outputTokens += u.output_tokens ?? 0;
|
|
59
|
-
if (u.cached_tokens) {
|
|
60
|
-
usage.cacheReadInputTokens = (usage.cacheReadInputTokens ?? 0) + u.cached_tokens;
|
|
61
|
-
}
|
|
62
|
-
if (u.cache_creation_tokens) {
|
|
63
|
-
usage.cacheCreationInputTokens = (usage.cacheCreationInputTokens ?? 0) + u.cache_creation_tokens;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
catch {
|
|
69
|
-
/* Malformed SSE line — skip non-JSON data lines (e.g. partial chunks, comments) */
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
if (!usage) {
|
|
73
|
-
const outputChars = fullResponse.length;
|
|
74
|
-
const estimatedOutput = Math.ceil(outputChars / 4);
|
|
75
|
-
usage = { inputTokens: estimatedOutput * 3, outputTokens: estimatedOutput };
|
|
76
|
-
}
|
|
77
|
-
return { response: fullResponse, toolCalls, toolResults, usage, ...(queryError ? { error: queryError } : {}) };
|
|
78
|
-
}
|
|
79
|
-
/**
|
|
80
|
-
* Create a JudgeProvider that calls the LLM directly — no session, no tools,
|
|
81
|
-
* no system prompt overhead. Just a simple prompt→response for each assertion.
|
|
82
|
-
* This is ~10x cheaper than routing through /chat with the full agent context.
|
|
83
|
-
*/
|
|
84
|
-
function createDirectJudgeProvider(modelConfig) {
|
|
85
|
-
const provider = createRuntimeProvider(modelConfig);
|
|
86
|
-
const tracked = {
|
|
87
|
-
totalInputTokens: 0,
|
|
88
|
-
totalOutputTokens: 0,
|
|
89
|
-
judge: async (prompt) => {
|
|
90
|
-
try {
|
|
91
|
-
const response = await provider.chat({
|
|
92
|
-
model: modelConfig.model,
|
|
93
|
-
systemPrompt: 'You are an eval judge. Be concise.',
|
|
94
|
-
messages: [{ role: 'user', content: prompt }],
|
|
95
|
-
tools: [],
|
|
96
|
-
maxTokens: 256,
|
|
97
|
-
});
|
|
98
|
-
const text = response.content
|
|
99
|
-
.filter((b) => b.type === 'text')
|
|
100
|
-
.map((b) => b.text)
|
|
101
|
-
.join('');
|
|
102
|
-
if (response.usage) {
|
|
103
|
-
tracked.totalInputTokens += response.usage.inputTokens + (response.usage.cacheReadInputTokens ?? 0) + (response.usage.cacheCreationInputTokens ?? 0);
|
|
104
|
-
tracked.totalOutputTokens += response.usage.outputTokens;
|
|
105
|
-
}
|
|
106
|
-
return text;
|
|
107
|
-
}
|
|
108
|
-
catch (err) {
|
|
109
|
-
return `Judge error: ${err instanceof Error ? err.message : String(err)}`;
|
|
110
|
-
}
|
|
111
|
-
},
|
|
112
|
-
};
|
|
113
|
-
return tracked;
|
|
114
|
-
}
|
|
115
|
-
function writeSSE(res, data) {
|
|
116
|
-
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
117
|
-
}
|
|
118
|
-
export function createEvalRouter(options) {
|
|
119
|
-
const router = Router();
|
|
120
|
-
/** List eval definitions from the repo */
|
|
121
|
-
router.get('/api/evals/suites', (_req, res) => {
|
|
122
|
-
const repo = options.getBundle();
|
|
123
|
-
if (!repo) {
|
|
124
|
-
res.status(500).json({ error: 'No bundle available' });
|
|
125
|
-
return;
|
|
126
|
-
}
|
|
127
|
-
const suites = repo.evals.map((e) => ({
|
|
128
|
-
name: e.name,
|
|
129
|
-
title: e.title,
|
|
130
|
-
description: e.description,
|
|
131
|
-
query: e.query,
|
|
132
|
-
assertionCount: e.assertions.length,
|
|
133
|
-
assertions: e.assertions.map((a) => ({ text: a.text, negated: a.negated })),
|
|
134
|
-
location: e.location,
|
|
135
|
-
}));
|
|
136
|
-
res.json({ suites });
|
|
137
|
-
});
|
|
138
|
-
/** List saved eval runs */
|
|
139
|
-
router.get('/api/evals/runs', (_req, res) => {
|
|
140
|
-
const runs = options.evalStore.list();
|
|
141
|
-
res.json({ runs });
|
|
142
|
-
});
|
|
143
|
-
/** Get a single eval run */
|
|
144
|
-
router.get('/api/evals/runs/:id', (req, res) => {
|
|
145
|
-
const run = options.evalStore.load(req.params['id'] ?? '');
|
|
146
|
-
if (!run) {
|
|
147
|
-
res.status(404).json({ error: 'Run not found' });
|
|
148
|
-
return;
|
|
149
|
-
}
|
|
150
|
-
res.json(run);
|
|
151
|
-
});
|
|
152
|
-
/** Delete an eval run */
|
|
153
|
-
router.delete('/api/evals/runs/:id', (req, res) => {
|
|
154
|
-
const deleted = options.evalStore.delete(req.params['id'] ?? '');
|
|
155
|
-
if (!deleted) {
|
|
156
|
-
res.status(404).json({ error: 'Run not found' });
|
|
157
|
-
return;
|
|
158
|
-
}
|
|
159
|
-
res.json({ ok: true });
|
|
160
|
-
});
|
|
161
|
-
/** Run eval suite — SSE stream with full per-eval results */
|
|
162
|
-
router.post('/api/evals/run', asyncHandler(async (req, res) => {
|
|
163
|
-
const port = options.getPort();
|
|
164
|
-
if (!port) {
|
|
165
|
-
res.status(503).json({ error: 'Server not ready' });
|
|
166
|
-
return;
|
|
167
|
-
}
|
|
168
|
-
const baseUrl = `http://127.0.0.1:${port}`;
|
|
169
|
-
const repo = options.getBundle();
|
|
170
|
-
if (!repo) {
|
|
171
|
-
res.status(500).json({ error: 'No bundle available' });
|
|
172
|
-
return;
|
|
173
|
-
}
|
|
174
|
-
// Read optional eval names and model override from POST body
|
|
175
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- request body
|
|
176
|
-
const body = (req.body ?? {});
|
|
177
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- request body
|
|
178
|
-
const evalNames = body['evalNames'];
|
|
179
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- request body
|
|
180
|
-
const modelOverride = body['model'];
|
|
181
|
-
let evals = repo.evals;
|
|
182
|
-
if (evalNames && evalNames.length > 0) {
|
|
183
|
-
evals = evals.filter((e) => evalNames.includes(e.name));
|
|
184
|
-
}
|
|
185
|
-
if (evals.length === 0) {
|
|
186
|
-
res.status(400).json({ error: 'No evals defined' });
|
|
187
|
-
return;
|
|
188
|
-
}
|
|
189
|
-
res.writeHead(200, {
|
|
190
|
-
'Content-Type': 'text/event-stream',
|
|
191
|
-
'Cache-Control': 'no-cache',
|
|
192
|
-
'Connection': 'keep-alive',
|
|
193
|
-
});
|
|
194
|
-
// Save original model config for restoration after override
|
|
195
|
-
const originalModelConfig = repo.config?.models?.['main'];
|
|
196
|
-
// If model override provided, swap main model config
|
|
197
|
-
if (modelOverride && repo.config?.models) {
|
|
198
|
-
repo.config.models['main'] = {
|
|
199
|
-
provider: modelOverride.provider,
|
|
200
|
-
model: modelOverride.model,
|
|
201
|
-
};
|
|
202
|
-
}
|
|
203
|
-
const evalSessionId = `eval-${Date.now()}`;
|
|
204
|
-
const judgeProvider = createDirectJudgeProvider(originalModelConfig);
|
|
205
|
-
const modelInfo = repo.config ? {
|
|
206
|
-
provider: repo.config.models?.['main']?.provider ?? 'unknown',
|
|
207
|
-
model: repo.config.models?.['main']?.model ?? 'unknown',
|
|
208
|
-
} : { provider: 'unknown', model: 'unknown' };
|
|
209
|
-
const results = [];
|
|
210
|
-
const perCaseCosts = [];
|
|
211
|
-
const startTime = Date.now();
|
|
212
|
-
// Restore original model before judging so judge uses the original model
|
|
213
|
-
const restoreModel = () => {
|
|
214
|
-
if (originalModelConfig && repo.config?.models) {
|
|
215
|
-
repo.config.models['main'] = originalModelConfig;
|
|
216
|
-
}
|
|
217
|
-
};
|
|
218
|
-
for (let i = 0; i < evals.length; i++) {
|
|
219
|
-
const ev = evals[i];
|
|
220
|
-
writeSSE(res, { type: 'eval_start', evalName: ev.name, current: i + 1, total: evals.length });
|
|
221
|
-
const evalStart = Date.now();
|
|
222
|
-
try {
|
|
223
|
-
// Run the query — streams agent events to client
|
|
224
|
-
const { response, toolCalls, toolResults, usage, error: queryError } = await streamQuery(baseUrl, ev.query, res, ev.name, ev.setup.app, evalSessionId);
|
|
225
|
-
// Restore original model for judging
|
|
226
|
-
restoreModel();
|
|
227
|
-
let assertions = [];
|
|
228
|
-
let passed = false;
|
|
229
|
-
let judgeCost;
|
|
230
|
-
// Skip judging if query had an error
|
|
231
|
-
if (!queryError) {
|
|
232
|
-
// Build enriched response for the judge — include tool results so it knows data was fetched
|
|
233
|
-
let enriched = response;
|
|
234
|
-
if (toolCalls.length > 0) {
|
|
235
|
-
enriched += '\n\n[Tool calls made: ' + toolCalls.map((tc) => `${tc.name}(${JSON.stringify(tc.parameters)})`).join(', ') + ']';
|
|
236
|
-
}
|
|
237
|
-
if (toolResults.length > 0) {
|
|
238
|
-
enriched += '\n\n[Tool results received:\n' + toolResults.join('\n') + ']';
|
|
239
|
-
}
|
|
240
|
-
// Judge assertions — track judge tokens separately
|
|
241
|
-
const judgeInputBefore = judgeProvider.totalInputTokens;
|
|
242
|
-
const judgeOutputBefore = judgeProvider.totalOutputTokens;
|
|
243
|
-
assertions = await judgeAllAssertions(enriched, ev.assertions, judgeProvider);
|
|
244
|
-
passed = assertions.every((a) => a.passed);
|
|
245
|
-
const judgeInputUsed = judgeProvider.totalInputTokens - judgeInputBefore;
|
|
246
|
-
const judgeOutputUsed = judgeProvider.totalOutputTokens - judgeOutputBefore;
|
|
247
|
-
judgeCost = judgeInputUsed > 0 ? computeEvalCost(judgeInputUsed, judgeOutputUsed, originalModelConfig?.model ?? modelInfo.model) : undefined;
|
|
248
|
-
}
|
|
249
|
-
// Re-apply model override for next eval query
|
|
250
|
-
if (modelOverride && repo.config?.models) {
|
|
251
|
-
repo.config.models['main'] = {
|
|
252
|
-
provider: modelOverride.provider,
|
|
253
|
-
model: modelOverride.model,
|
|
254
|
-
};
|
|
255
|
-
}
|
|
256
|
-
const queryCost = usage ? computeEvalCost(usage.inputTokens, usage.outputTokens, modelInfo.model, usage.cacheReadInputTokens, usage.cacheCreationInputTokens) : undefined;
|
|
257
|
-
if (queryCost)
|
|
258
|
-
perCaseCosts.push(queryCost);
|
|
259
|
-
const result = {
|
|
260
|
-
eval: ev,
|
|
261
|
-
response,
|
|
262
|
-
toolCalls,
|
|
263
|
-
assertions,
|
|
264
|
-
passed,
|
|
265
|
-
durationMs: Date.now() - evalStart,
|
|
266
|
-
cost: queryCost,
|
|
267
|
-
...(queryError ? { error: queryError } : {}),
|
|
268
|
-
};
|
|
269
|
-
results.push(result);
|
|
270
|
-
// Send full result with eval_complete — separate query and judge costs
|
|
271
|
-
writeSSE(res, {
|
|
272
|
-
type: 'eval_complete',
|
|
273
|
-
evalName: ev.name,
|
|
274
|
-
passed,
|
|
275
|
-
current: i + 1,
|
|
276
|
-
total: evals.length,
|
|
277
|
-
result: {
|
|
278
|
-
response,
|
|
279
|
-
toolCalls,
|
|
280
|
-
toolResults,
|
|
281
|
-
assertions,
|
|
282
|
-
durationMs: result.durationMs,
|
|
283
|
-
queryCost,
|
|
284
|
-
judgeCost,
|
|
285
|
-
...(queryError ? { error: queryError } : {}),
|
|
286
|
-
},
|
|
287
|
-
});
|
|
288
|
-
}
|
|
289
|
-
catch (err) {
|
|
290
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
291
|
-
// Restore original model on error too
|
|
292
|
-
restoreModel();
|
|
293
|
-
if (modelOverride && repo.config?.models) {
|
|
294
|
-
repo.config.models['main'] = {
|
|
295
|
-
provider: modelOverride.provider,
|
|
296
|
-
model: modelOverride.model,
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
const result = {
|
|
300
|
-
eval: ev,
|
|
301
|
-
response: '',
|
|
302
|
-
toolCalls: [],
|
|
303
|
-
assertions: [],
|
|
304
|
-
passed: false,
|
|
305
|
-
durationMs: Date.now() - evalStart,
|
|
306
|
-
error: msg,
|
|
307
|
-
};
|
|
308
|
-
results.push(result);
|
|
309
|
-
writeSSE(res, {
|
|
310
|
-
type: 'eval_complete',
|
|
311
|
-
evalName: ev.name,
|
|
312
|
-
passed: false,
|
|
313
|
-
current: i + 1,
|
|
314
|
-
total: evals.length,
|
|
315
|
-
result: { response: '', toolCalls: [], assertions: [], durationMs: result.durationMs, error: msg },
|
|
316
|
-
});
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
// Restore original model config
|
|
320
|
-
restoreModel();
|
|
321
|
-
// Build suite result
|
|
322
|
-
const totalCost = perCaseCosts.length > 0 ? aggregateRunCost(perCaseCosts) : undefined;
|
|
323
|
-
const suiteResult = {
|
|
324
|
-
results,
|
|
325
|
-
totalPassed: results.filter((r) => r.passed).length,
|
|
326
|
-
totalFailed: results.filter((r) => !r.passed).length,
|
|
327
|
-
totalSkipped: 0,
|
|
328
|
-
totalDurationMs: Date.now() - startTime,
|
|
329
|
-
totalCost,
|
|
330
|
-
model: modelInfo,
|
|
331
|
-
timestamp: new Date().toISOString(),
|
|
332
|
-
};
|
|
333
|
-
const run = buildEvalRun(suiteResult, modelInfo, { orgId: 'local', triggeredBy: 'manual' });
|
|
334
|
-
options.evalStore.save(run); // eslint-disable-line @typescript-eslint/no-unsafe-type-assertion
|
|
335
|
-
// Suppress unused variable warnings
|
|
336
|
-
void evalSessionId;
|
|
337
|
-
writeSSE(res, { type: 'run_complete', run });
|
|
338
|
-
writeSSE(res, { type: 'done' });
|
|
339
|
-
res.end();
|
|
340
|
-
}));
|
|
341
|
-
/** Get eval history for a specific eval */
|
|
342
|
-
router.get('/api/evals/runs/by-eval/:evalName', (req, res) => {
|
|
343
|
-
const evalName = req.params['evalName'] ?? '';
|
|
344
|
-
const entries = options.evalStore.listByEval(evalName);
|
|
345
|
-
res.json({ entries });
|
|
346
|
-
});
|
|
347
|
-
/** Get arena model config */
|
|
348
|
-
router.get('/api/evals/arena/models', (_req, res) => {
|
|
349
|
-
const repo = options.getBundle();
|
|
350
|
-
if (!repo) {
|
|
351
|
-
res.status(500).json({ error: 'No bundle available' });
|
|
352
|
-
return;
|
|
353
|
-
}
|
|
354
|
-
const config = repo.config;
|
|
355
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- config shape
|
|
356
|
-
const rawConfig = config;
|
|
357
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- config shape
|
|
358
|
-
const arena = rawConfig['arena'];
|
|
359
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- config shape
|
|
360
|
-
const configModels = arena?.['models'];
|
|
361
|
-
const models = configModels ?? [
|
|
362
|
-
{ provider: 'anthropic', model: 'claude-opus-4-6', label: 'Claude Opus 4.6' },
|
|
363
|
-
{ provider: 'anthropic', model: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6' },
|
|
364
|
-
{ provider: 'anthropic', model: 'claude-haiku-4-5-20251001', label: 'Claude Haiku 4.5' },
|
|
365
|
-
{ provider: 'openai', model: 'gpt-4o', label: 'GPT-4o' },
|
|
366
|
-
{ provider: 'openai', model: 'gpt-4o-mini', label: 'GPT-4o Mini' },
|
|
367
|
-
{ provider: 'openai', model: 'gpt-4.1', label: 'GPT-4.1' },
|
|
368
|
-
{ provider: 'openai', model: 'gpt-4.1-mini', label: 'GPT-4.1 Mini' },
|
|
369
|
-
{ provider: 'google', model: 'gemini-2.5-pro', label: 'Gemini 2.5 Pro' },
|
|
370
|
-
{ provider: 'google', model: 'gemini-2.5-flash', label: 'Gemini 2.5 Flash' },
|
|
371
|
-
{ provider: 'deepseek', model: 'deepseek-chat', label: 'DeepSeek Chat' },
|
|
372
|
-
{ provider: 'deepseek', model: 'deepseek-reasoner', label: 'DeepSeek Reasoner' },
|
|
373
|
-
{ provider: 'groq', model: 'llama-3.3-70b-versatile', label: 'Llama 3.3 70B (Groq)' },
|
|
374
|
-
{ provider: 'groq', model: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B (Groq)' },
|
|
375
|
-
{ provider: 'groq', model: 'meta-llama/llama-4-scout-17b-16e-instruct', label: 'Llama 4 Scout (Groq)' },
|
|
376
|
-
{ provider: 'groq', model: 'qwen/qwen3-32b', label: 'Qwen 3 32B (Groq)' },
|
|
377
|
-
{ provider: 'groq', model: 'moonshotai/kimi-k2-instruct', label: 'Kimi K2 (Groq)' },
|
|
378
|
-
{ provider: 'groq', model: 'openai/gpt-oss-120b', label: 'GPT-OSS 120B (Groq)' },
|
|
379
|
-
{ provider: 'mistral', model: 'mistral-large-latest', label: 'Mistral Large' },
|
|
380
|
-
{ provider: 'mistral', model: 'mistral-small-latest', label: 'Mistral Small' },
|
|
381
|
-
{ provider: 'mistral', model: 'codestral-latest', label: 'Codestral' },
|
|
382
|
-
{ provider: 'xai', model: 'grok-3', label: 'Grok 3' },
|
|
383
|
-
{ provider: 'xai', model: 'grok-3-mini', label: 'Grok 3 Mini' },
|
|
384
|
-
];
|
|
385
|
-
res.json({ models });
|
|
386
|
-
});
|
|
387
|
-
return router;
|
|
388
|
-
}
|
|
389
|
-
//# sourceMappingURL=evals.js.map
|