@amodalai/runtime 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/dist/src/__fixtures__/e2e.test.js +2 -2
  2. package/dist/src/__fixtures__/e2e.test.js.map +1 -1
  3. package/dist/src/__fixtures__/smoke.test.js +0 -88
  4. package/dist/src/__fixtures__/smoke.test.js.map +1 -1
  5. package/dist/src/__tests__/studio-integration.test.js +298 -0
  6. package/dist/src/__tests__/studio-integration.test.js.map +1 -0
  7. package/dist/src/agent/agent-types.d.ts +4 -0
  8. package/dist/src/agent/feedback-store.d.ts +11 -10
  9. package/dist/src/agent/feedback-store.js +147 -75
  10. package/dist/src/agent/feedback-store.js.map +1 -1
  11. package/dist/src/agent/local-server.js +30 -111
  12. package/dist/src/agent/local-server.js.map +1 -1
  13. package/dist/src/agent/local-server.test.js +17 -1
  14. package/dist/src/agent/local-server.test.js.map +1 -1
  15. package/dist/src/agent/routes/context.d.ts +24 -0
  16. package/dist/src/agent/routes/context.js +30 -0
  17. package/dist/src/agent/routes/context.js.map +1 -0
  18. package/dist/src/agent/routes/feedback.js +28 -56
  19. package/dist/src/agent/routes/feedback.js.map +1 -1
  20. package/dist/src/api/create-agent.js +8 -4
  21. package/dist/src/api/create-agent.js.map +1 -1
  22. package/dist/src/api/types.d.ts +1 -1
  23. package/dist/src/channels/channel-session-mapper.js +1 -1
  24. package/dist/src/channels/channel-session-mapper.js.map +1 -1
  25. package/dist/src/config.d.ts +2 -2
  26. package/dist/src/config.js +2 -1
  27. package/dist/src/config.js.map +1 -1
  28. package/dist/src/config.test.js +1 -1
  29. package/dist/src/config.test.js.map +1 -1
  30. package/dist/src/errors.d.ts +2 -2
  31. package/dist/src/errors.js +2 -2
  32. package/dist/src/index.d.ts +0 -3
  33. package/dist/src/index.js +0 -3
  34. package/dist/src/index.js.map +1 -1
  35. package/dist/src/server.d.ts +2 -0
  36. package/dist/src/server.js +1 -0
  37. package/dist/src/server.js.map +1 -1
  38. package/dist/src/session/drizzle-session-store.d.ts +4 -6
  39. package/dist/src/session/drizzle-session-store.js +15 -5
  40. package/dist/src/session/drizzle-session-store.js.map +1 -1
  41. package/dist/src/session/manager.js +1 -1
  42. package/dist/src/session/manager.test.js +7 -5
  43. package/dist/src/session/manager.test.js.map +1 -1
  44. package/dist/src/session/postgres-session-store.d.ts +3 -24
  45. package/dist/src/session/postgres-session-store.js +9 -128
  46. package/dist/src/session/postgres-session-store.js.map +1 -1
  47. package/dist/src/session/session-builder.d.ts +0 -4
  48. package/dist/src/session/session-builder.js +2 -9
  49. package/dist/src/session/session-builder.js.map +1 -1
  50. package/dist/src/session/session-builder.test.js +0 -25
  51. package/dist/src/session/session-builder.test.js.map +1 -1
  52. package/dist/src/session/session-store-selector.d.ts +11 -26
  53. package/dist/src/session/session-store-selector.js +3 -48
  54. package/dist/src/session/session-store-selector.js.map +1 -1
  55. package/dist/src/session/session-store-selector.test.js +5 -57
  56. package/dist/src/session/session-store-selector.test.js.map +1 -1
  57. package/dist/src/session/store.d.ts +8 -14
  58. package/dist/src/session/store.js +8 -10
  59. package/dist/src/session/store.js.map +1 -1
  60. package/dist/src/session/store.test.js +6 -126
  61. package/dist/src/session/store.test.js.map +1 -1
  62. package/dist/src/session/tool-context-factory.js +1 -1
  63. package/dist/src/session/tool-context-factory.js.map +1 -1
  64. package/dist/src/stores/drizzle-store-backend.d.ts +5 -0
  65. package/dist/src/stores/drizzle-store-backend.js +23 -3
  66. package/dist/src/stores/drizzle-store-backend.js.map +1 -1
  67. package/dist/src/stores/drizzle-store-backend.test.js +10 -58
  68. package/dist/src/stores/drizzle-store-backend.test.js.map +1 -1
  69. package/dist/src/stores/index.d.ts +0 -2
  70. package/dist/src/stores/index.js +0 -1
  71. package/dist/src/stores/index.js.map +1 -1
  72. package/dist/src/stores/postgres-store-backend.d.ts +5 -15
  73. package/dist/src/stores/postgres-store-backend.js +14 -72
  74. package/dist/src/stores/postgres-store-backend.js.map +1 -1
  75. package/dist/tsconfig.tsbuildinfo +1 -1
  76. package/package.json +4 -6
  77. package/dist/src/agent/automation-bridge.d.ts +0 -33
  78. package/dist/src/agent/automation-bridge.js +0 -50
  79. package/dist/src/agent/automation-bridge.js.map +0 -1
  80. package/dist/src/agent/automation-bridge.test.d.ts +0 -6
  81. package/dist/src/agent/automation-bridge.test.js +0 -130
  82. package/dist/src/agent/automation-bridge.test.js.map +0 -1
  83. package/dist/src/agent/eval-store.d.ts +0 -50
  84. package/dist/src/agent/eval-store.js +0 -137
  85. package/dist/src/agent/eval-store.js.map +0 -1
  86. package/dist/src/agent/proactive/delivery-router.d.ts +0 -68
  87. package/dist/src/agent/proactive/delivery-router.js +0 -337
  88. package/dist/src/agent/proactive/delivery-router.js.map +0 -1
  89. package/dist/src/agent/proactive/delivery-router.test.js +0 -455
  90. package/dist/src/agent/proactive/delivery-router.test.js.map +0 -1
  91. package/dist/src/agent/proactive/delivery.d.ts +0 -21
  92. package/dist/src/agent/proactive/delivery.js +0 -68
  93. package/dist/src/agent/proactive/delivery.js.map +0 -1
  94. package/dist/src/agent/proactive/delivery.test.d.ts +0 -6
  95. package/dist/src/agent/proactive/delivery.test.js +0 -65
  96. package/dist/src/agent/proactive/delivery.test.js.map +0 -1
  97. package/dist/src/agent/proactive/proactive-runner.d.ts +0 -129
  98. package/dist/src/agent/proactive/proactive-runner.js +0 -301
  99. package/dist/src/agent/proactive/proactive-runner.js.map +0 -1
  100. package/dist/src/agent/proactive/proactive-runner.test.d.ts +0 -6
  101. package/dist/src/agent/proactive/proactive-runner.test.js +0 -250
  102. package/dist/src/agent/proactive/proactive-runner.test.js.map +0 -1
  103. package/dist/src/agent/routes/admin-chat-abort.test.d.ts +0 -6
  104. package/dist/src/agent/routes/admin-chat-abort.test.js +0 -207
  105. package/dist/src/agent/routes/admin-chat-abort.test.js.map +0 -1
  106. package/dist/src/agent/routes/admin-chat.d.ts +0 -28
  107. package/dist/src/agent/routes/admin-chat.js +0 -110
  108. package/dist/src/agent/routes/admin-chat.js.map +0 -1
  109. package/dist/src/agent/routes/automations.d.ts +0 -19
  110. package/dist/src/agent/routes/automations.js +0 -86
  111. package/dist/src/agent/routes/automations.js.map +0 -1
  112. package/dist/src/agent/routes/automations.test.d.ts +0 -6
  113. package/dist/src/agent/routes/automations.test.js +0 -117
  114. package/dist/src/agent/routes/automations.test.js.map +0 -1
  115. package/dist/src/agent/routes/evals.d.ts +0 -17
  116. package/dist/src/agent/routes/evals.js +0 -389
  117. package/dist/src/agent/routes/evals.js.map +0 -1
  118. package/dist/src/agent/routes/webhooks.d.ts +0 -17
  119. package/dist/src/agent/routes/webhooks.js +0 -63
  120. package/dist/src/agent/routes/webhooks.js.map +0 -1
  121. package/dist/src/agent/routes/webhooks.test.d.ts +0 -6
  122. package/dist/src/agent/routes/webhooks.test.js +0 -100
  123. package/dist/src/agent/routes/webhooks.test.js.map +0 -1
  124. package/dist/src/session/pglite-session-store.d.ts +0 -23
  125. package/dist/src/session/pglite-session-store.js +0 -92
  126. package/dist/src/session/pglite-session-store.js.map +0 -1
  127. package/dist/src/stores/pglite-store-backend.d.ts +0 -39
  128. package/dist/src/stores/pglite-store-backend.js +0 -128
  129. package/dist/src/stores/pglite-store-backend.js.map +0 -1
  130. package/dist/src/stores/pglite-store-backend.test.d.ts +0 -6
  131. package/dist/src/stores/pglite-store-backend.test.js +0 -150
  132. package/dist/src/stores/pglite-store-backend.test.js.map +0 -1
  133. package/dist/src/stores/schema.d.ts +0 -593
  134. package/dist/src/stores/schema.js +0 -75
  135. package/dist/src/stores/schema.js.map +0 -1
  136. package/dist/src/tools/admin-file-tools.d.ts +0 -42
  137. package/dist/src/tools/admin-file-tools.js +0 -714
  138. package/dist/src/tools/admin-file-tools.js.map +0 -1
  139. package/dist/src/tools/admin-file-tools.test.d.ts +0 -6
  140. package/dist/src/tools/admin-file-tools.test.js +0 -523
  141. package/dist/src/tools/admin-file-tools.test.js.map +0 -1
  142. /package/dist/src/{agent/proactive/delivery-router.test.d.ts → __tests__/studio-integration.test.d.ts} +0 -0
@@ -1,19 +0,0 @@
1
- /**
2
- * @license
3
- * Copyright 2025 Amodal Labs, Inc.
4
- * SPDX-License-Identifier: MIT
5
- */
6
- import { Router } from 'express';
7
- import type { ProactiveRunner } from '../proactive/proactive-runner.js';
8
- export interface AutomationRouterOptions {
9
- runner: ProactiveRunner;
10
- }
11
- /**
12
- * Creates routes for listing, starting, stopping, and triggering automations.
13
- *
14
- * GET /automations — list all registered automations
15
- * POST /automations/:name/start — start a cron automation
16
- * POST /automations/:name/stop — stop a running cron automation
17
- * POST /automations/:name/run — manually trigger an automation
18
- */
19
- export declare function createAutomationRouter(options: AutomationRouterOptions): Router;
@@ -1,86 +0,0 @@
1
- /**
2
- * @license
3
- * Copyright 2025 Amodal Labs, Inc.
4
- * SPDX-License-Identifier: MIT
5
- */
6
- import { Router } from 'express';
7
- import { asyncHandler } from '../../routes/route-helpers.js';
8
- /**
9
- * Creates routes for listing, starting, stopping, and triggering automations.
10
- *
11
- * GET /automations — list all registered automations
12
- * POST /automations/:name/start — start a cron automation
13
- * POST /automations/:name/stop — stop a running cron automation
14
- * POST /automations/:name/run — manually trigger an automation
15
- */
16
- export function createAutomationRouter(options) {
17
- const router = Router();
18
- router.get('/automations', (_req, res) => {
19
- const automations = options.runner.listAutomations();
20
- res.json({ automations });
21
- });
22
- router.post('/automations/:name/start', (req, res) => {
23
- const name = req.params['name'] ?? '';
24
- const result = options.runner.startAutomation(name);
25
- if (!result.success) {
26
- res.status(400).json({ error: result.error });
27
- return;
28
- }
29
- res.json({ status: 'started', automation: name });
30
- });
31
- router.post('/automations/:name/stop', (req, res) => {
32
- const name = req.params['name'] ?? '';
33
- const result = options.runner.stopAutomation(name);
34
- if (!result.success) {
35
- res.status(400).json({ error: result.error });
36
- return;
37
- }
38
- res.json({ status: 'stopped', automation: name });
39
- });
40
- router.post('/automations/:name/run', asyncHandler(async (req, res) => {
41
- const name = req.params['name'] ?? '';
42
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- Express body parsing
43
- const payload = (req.body ?? {});
44
- try {
45
- const result = await options.runner.triggerAutomation(name, payload);
46
- if (!result.success) {
47
- res.status(result.error?.toLowerCase().includes('not found') ? 404 : 500).json({ status: 'error', automation: name, error: result.error });
48
- return;
49
- }
50
- res.json({ status: 'completed', automation: name });
51
- }
52
- catch (err) {
53
- const msg = err instanceof Error ? err.message : String(err);
54
- res.status(500).json({ error: msg });
55
- }
56
- }));
57
- // SSE streaming endpoint for live automation runs
58
- router.post('/automations/:name/stream', asyncHandler(async (req, res) => {
59
- const name = req.params['name'] ?? '';
60
- // SSE headers
61
- res.writeHead(200, {
62
- 'Content-Type': 'text/event-stream',
63
- 'Cache-Control': 'no-cache',
64
- 'Connection': 'keep-alive',
65
- });
66
- try {
67
- const stream = options.runner.streamAutomation(name);
68
- if (!stream) {
69
- res.write(`data: ${JSON.stringify({ type: 'error', message: `Automation "${name}" not found` })}\n\n`);
70
- res.end();
71
- return;
72
- }
73
- for await (const event of stream) {
74
- res.write(`data: ${JSON.stringify(event)}\n\n`);
75
- }
76
- }
77
- catch (err) {
78
- const msg = err instanceof Error ? err.message : String(err);
79
- res.write(`data: ${JSON.stringify({ type: 'error', message: msg })}\n\n`);
80
- }
81
- res.write(`data: ${JSON.stringify({ type: 'done', timestamp: new Date().toISOString() })}\n\n`);
82
- res.end();
83
- }));
84
- return router;
85
- }
86
- //# sourceMappingURL=automations.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"automations.js","sourceRoot":"","sources":["../../../../src/agent/routes/automations.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAC,MAAM,EAAC,MAAM,SAAS,CAAC;AAG/B,OAAO,EAAC,YAAY,EAAC,MAAM,+BAA+B,CAAC;AAM3D;;;;;;;GAOG;AACH,MAAM,UAAU,sBAAsB,CAAC,OAAgC;IACrE,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,MAAM,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;QAC1D,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;QACrD,GAAG,CAAC,IAAI,CAAC,EAAC,WAAW,EAAC,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,0BAA0B,EAAE,CAAC,GAAY,EAAE,GAAa,EAAE,EAAE;QACtE,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,KAAK,EAAE,MAAM,CAAC,KAAK,EAAC,CAAC,CAAC;YAC5C,OAAO;QACT,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,yBAAyB,EAAE,CAAC,GAAY,EAAE,GAAa,EAAE,EAAE;QACrE,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,KAAK,EAAE,MAAM,CAAC,KAAK,EAAC,CAAC,CAAC;YAC5C,OAAO;QACT,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE,YAAY,CAAC,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QACvF,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,+FAA+F;QAC/F,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAA4B,CAAC;QAE5D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACrE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAC,CAAC,CAAC;gBACzI,OAAO;YACT,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAC,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAC,CAAC,CAAC;QACpD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAC,KAAK,EAAE,GAAG,EAAC,CAAC,CAAC;QACrC,CAAC;IACH,CAAC,CAAC,CAAC,CAAC;IAEJ,kDAAkD;IAClD,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE,YAAY,CAAC,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QAC1F,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QAEtC,cAAc;QACd,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE;YACjB,cAAc,EAAE,mBAAmB;YACnC,eAAe,EAAE,UAAU;YAC3B,YAAY,EAAE,YAAY;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACrD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,EAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,eAAe,IAAI,aAAa,EAAC,CAAC,MAAM,CAAC,CAAC;gBACrG,GAAG,CAAC,GAAG,EAAE,CAAC;gBACV,OAAO;YACT,CAAC;YAED,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACjC,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,EAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,EAAC,CAAC,MAAM,CAAC,CAAC;QAC1E,CAAC;QAED,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,EAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAC,CAAC,MAAM,CAAC,CAAC;QAC9F,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC,CAAC;IAEJ,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -1,6 +0,0 @@
1
- /**
2
- * @license
3
- * Copyright 2025 Amodal Labs, Inc.
4
- * SPDX-License-Identifier: MIT
5
- */
6
- export {};
@@ -1,117 +0,0 @@
1
- /**
2
- * @license
3
- * Copyright 2025 Amodal Labs, Inc.
4
- * SPDX-License-Identifier: MIT
5
- */
6
- import { describe, it, expect, vi } from 'vitest';
7
- import express from 'express';
8
- import request from 'supertest';
9
- import { createAutomationRouter } from './automations.js';
10
- function makeMockRunner(overrides) {
11
- return {
12
- start: vi.fn(),
13
- stop: vi.fn(),
14
- startAutomation: vi.fn().mockReturnValue({ success: true }),
15
- stopAutomation: vi.fn().mockReturnValue({ success: true }),
16
- listAutomations: vi.fn().mockReturnValue([
17
- { name: 'daily-check', title: 'Daily Check', schedule: '0 9 * * *', webhookTriggered: false, running: false },
18
- { name: 'alert-handler', title: 'Alert Handler', webhookTriggered: true, running: true },
19
- ]),
20
- handleWebhook: vi.fn().mockResolvedValue({ matched: true }),
21
- triggerAutomation: vi.fn().mockResolvedValue({ success: true }),
22
- ...overrides,
23
- };
24
- }
25
- function createApp(runner) {
26
- const app = express();
27
- app.use(express.json());
28
- app.use(createAutomationRouter({ runner }));
29
- return app;
30
- }
31
- describe('repo-automations routes', () => {
32
- it('should list automations with running state', async () => {
33
- const runner = makeMockRunner();
34
- const app = createApp(runner);
35
- const res = await request(app).get('/automations');
36
- expect(res.status).toBe(200);
37
- expect(res.body.automations).toHaveLength(2);
38
- expect(res.body.automations[0].name).toBe('daily-check');
39
- expect(res.body.automations[0].running).toBe(false);
40
- expect(res.body.automations[1].running).toBe(true);
41
- });
42
- it('should start an automation', async () => {
43
- const runner = makeMockRunner();
44
- const app = createApp(runner);
45
- const res = await request(app).post('/automations/daily-check/start');
46
- expect(res.status).toBe(200);
47
- expect(res.body.status).toBe('started');
48
- expect(runner.startAutomation).toHaveBeenCalledWith('daily-check');
49
- });
50
- it('should return 400 when start fails', async () => {
51
- const runner = makeMockRunner({
52
- startAutomation: vi.fn().mockReturnValue({ success: false, error: 'Already running' }),
53
- });
54
- const app = createApp(runner);
55
- const res = await request(app).post('/automations/daily-check/start');
56
- expect(res.status).toBe(400);
57
- expect(res.body.error).toBe('Already running');
58
- });
59
- it('should stop an automation', async () => {
60
- const runner = makeMockRunner();
61
- const app = createApp(runner);
62
- const res = await request(app).post('/automations/daily-check/stop');
63
- expect(res.status).toBe(200);
64
- expect(res.body.status).toBe('stopped');
65
- expect(runner.stopAutomation).toHaveBeenCalledWith('daily-check');
66
- });
67
- it('should return 400 when stop fails', async () => {
68
- const runner = makeMockRunner({
69
- stopAutomation: vi.fn().mockReturnValue({ success: false, error: 'Not running' }),
70
- });
71
- const app = createApp(runner);
72
- const res = await request(app).post('/automations/daily-check/stop');
73
- expect(res.status).toBe(400);
74
- expect(res.body.error).toBe('Not running');
75
- });
76
- it('should trigger automation manually', async () => {
77
- const runner = makeMockRunner();
78
- const app = createApp(runner);
79
- const res = await request(app)
80
- .post('/automations/daily-check/run')
81
- .send({});
82
- expect(res.status).toBe(200);
83
- expect(res.body.status).toBe('completed');
84
- expect(runner.triggerAutomation).toHaveBeenCalledWith('daily-check', {});
85
- });
86
- it('should return 404 for unknown automation', async () => {
87
- const runner = makeMockRunner({
88
- triggerAutomation: vi.fn().mockResolvedValue({ success: false, error: 'Not found' }),
89
- });
90
- const app = createApp(runner);
91
- const res = await request(app)
92
- .post('/automations/unknown/run')
93
- .send({});
94
- expect(res.status).toBe(404);
95
- expect(res.body.error).toContain('Not found');
96
- });
97
- it('should pass payload to triggerAutomation', async () => {
98
- const runner = makeMockRunner();
99
- const app = createApp(runner);
100
- await request(app)
101
- .post('/automations/daily-check/run')
102
- .send({ context: 'manual-trigger' });
103
- expect(runner.triggerAutomation).toHaveBeenCalledWith('daily-check', { context: 'manual-trigger' });
104
- });
105
- it('should handle trigger errors', async () => {
106
- const runner = makeMockRunner({
107
- triggerAutomation: vi.fn().mockRejectedValue(new Error('Runtime failure')),
108
- });
109
- const app = createApp(runner);
110
- const res = await request(app)
111
- .post('/automations/daily-check/run')
112
- .send({});
113
- expect(res.status).toBe(500);
114
- expect(res.body.error).toBe('Runtime failure');
115
- });
116
- });
117
- //# sourceMappingURL=automations.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"automations.test.js","sourceRoot":"","sources":["../../../../src/agent/routes/automations.test.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAC,MAAM,QAAQ,CAAC;AAChD,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,OAAO,MAAM,WAAW,CAAC;AAChC,OAAO,EAAC,sBAAsB,EAAC,MAAM,kBAAkB,CAAC;AAGxD,SAAS,cAAc,CAAC,SAAoC;IAC1D,OAAO;QACL,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE;QACb,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QACzD,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QACxD,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC;YACvC,EAAC,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,EAAE,gBAAgB,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAC;YAC3G,EAAC,IAAI,EAAE,eAAe,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAC;SACvF,CAAC;QACF,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QACzD,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAC,OAAO,EAAE,IAAI,EAAC,CAAC;QAC7D,GAAG,SAAS;KACiB,CAAC;AAClC,CAAC;AAED,SAAS,SAAS,CAAC,MAAuB;IACxC,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;IACtB,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,sBAAsB,CAAC,EAAC,MAAM,EAAC,CAAC,CAAC,CAAC;IAC1C,OAAO,GAAG,CAAC;AACb,CAAC;AAED,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QAEnD,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACzD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAEtE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,oBAAoB,CAAC,aAAa,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,iBAAiB,EAAC,CAAC;SAC9C,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAEtE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;QACzC,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAErE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,oBAAoB,CAAC,aAAa,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,EAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAC,CAAC;SACzC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAErE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;aAC3B,IAAI,CAAC,8BAA8B,CAAC;aACpC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,oBAAoB,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAC3E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,WAAW,EAAC,CAAC;SAC5C,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;aAC3B,IAAI,CAAC,0BAA0B,CAAC;aAChC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,OAAO,CAAC,GAAG,CAAC;aACf,IAAI,CAAC,8BAA8B,CAAC;aACpC,IAAI,CAAC,EAAC,OAAO,EAAE,gBAAgB,EAAC,CAAC,CAAC;QAErC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,oBAAoB,CAAC,aAAa,EAAE,EAAC,OAAO,EAAE,gBAAgB,EAAC,CAAC,CAAC;IACpG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;QAC5C,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;SACpC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE9B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;aAC3B,IAAI,CAAC,8BAA8B,CAAC;aACpC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,17 +0,0 @@
1
- /**
2
- * @license
3
- * Copyright 2026 Amodal Labs, Inc.
4
- * SPDX-License-Identifier: MIT
5
- */
6
- import { Router } from 'express';
7
- import type { AgentBundle } from '@amodalai/types';
8
- import type { EvalStore } from '../eval-store.js';
9
- export interface EvalRouterOptions {
10
- /** Returns the current agent bundle (replaces sessionManager.getBundle()) */
11
- getBundle: () => AgentBundle | undefined;
12
- evalStore: EvalStore;
13
- repoPath: string;
14
- /** Port the server is listening on — used by eval query provider to call /chat */
15
- getPort: () => number | null;
16
- }
17
- export declare function createEvalRouter(options: EvalRouterOptions): Router;
@@ -1,389 +0,0 @@
1
- /**
2
- * @license
3
- * Copyright 2026 Amodal Labs, Inc.
4
- * SPDX-License-Identifier: MIT
5
- */
6
- import { Router } from 'express';
7
- import { buildEvalRun, judgeAllAssertions, computeEvalCost, aggregateRunCost, createRuntimeProvider } from '@amodalai/core';
8
- import { SSEEventType } from '../../types.js';
9
- import { asyncHandler } from '../../routes/route-helpers.js';
10
- async function streamQuery(baseUrl, message, evalRes, evalName, appId, sessionId) {
11
- const chatRes = await fetch(`${baseUrl}/chat`, {
12
- method: 'POST',
13
- headers: { 'Content-Type': 'application/json' },
14
- body: JSON.stringify({ message, app_id: appId ?? 'eval-runner', ...(sessionId ? { session_id: sessionId } : {}) }),
15
- });
16
- const text = await chatRes.text();
17
- const lines = text.split('\n');
18
- let fullResponse = '';
19
- const toolCalls = [];
20
- const toolResults = [];
21
- let usage;
22
- let queryError;
23
- for (const line of lines) {
24
- if (!line.startsWith('data: '))
25
- continue;
26
- try {
27
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- SSE parsing
28
- const event = JSON.parse(line.substring(6));
29
- const eventType = String(event['type'] ?? '');
30
- if (eventType === SSEEventType.TextDelta) {
31
- const content = String(event['content'] ?? '');
32
- fullResponse += content;
33
- writeSSE(evalRes, { type: 'agent_text', evalName, content });
34
- }
35
- else if (eventType === SSEEventType.ToolCallStart) {
36
- const params = (event['parameters'] ?? {}); // eslint-disable-line @typescript-eslint/no-unsafe-type-assertion
37
- toolCalls.push({ name: String(event['tool_name'] ?? ''), parameters: params });
38
- writeSSE(evalRes, { type: 'agent_tool', evalName, toolName: event['tool_name'], parameters: params });
39
- }
40
- else if (eventType === SSEEventType.ToolCallResult) {
41
- const resultRaw = String(event['result'] ?? event['error'] ?? '');
42
- toolResults.push(`${String(event['tool_name'] ?? 'request')}: ${resultRaw}`);
43
- writeSSE(evalRes, { type: 'agent_tool_result', evalName, toolName: event['tool_name'] ?? 'request', status: event['status'], durationMs: event['duration_ms'] });
44
- }
45
- else if (eventType === SSEEventType.Error) {
46
- queryError = String(event['message'] ?? event['error'] ?? 'Unknown error');
47
- writeSSE(evalRes, { type: 'agent_error', evalName, error: queryError });
48
- }
49
- else if (eventType === SSEEventType.Done) {
50
- const u = (event['usage'] ?? {});
51
- // Accumulate tokens across multiple done events (multi-turn agent loops
52
- // may emit one done per turn in the session runner)
53
- if ((u.input_tokens ?? 0) > 0 || (u.output_tokens ?? 0) > 0 || (u.cached_tokens ?? 0) > 0) {
54
- if (!usage) {
55
- usage = { inputTokens: 0, outputTokens: 0 };
56
- }
57
- usage.inputTokens += u.input_tokens ?? 0;
58
- usage.outputTokens += u.output_tokens ?? 0;
59
- if (u.cached_tokens) {
60
- usage.cacheReadInputTokens = (usage.cacheReadInputTokens ?? 0) + u.cached_tokens;
61
- }
62
- if (u.cache_creation_tokens) {
63
- usage.cacheCreationInputTokens = (usage.cacheCreationInputTokens ?? 0) + u.cache_creation_tokens;
64
- }
65
- }
66
- }
67
- }
68
- catch {
69
- /* Malformed SSE line — skip non-JSON data lines (e.g. partial chunks, comments) */
70
- }
71
- }
72
- if (!usage) {
73
- const outputChars = fullResponse.length;
74
- const estimatedOutput = Math.ceil(outputChars / 4);
75
- usage = { inputTokens: estimatedOutput * 3, outputTokens: estimatedOutput };
76
- }
77
- return { response: fullResponse, toolCalls, toolResults, usage, ...(queryError ? { error: queryError } : {}) };
78
- }
79
- /**
80
- * Create a JudgeProvider that calls the LLM directly — no session, no tools,
81
- * no system prompt overhead. Just a simple prompt→response for each assertion.
82
- * This is ~10x cheaper than routing through /chat with the full agent context.
83
- */
84
- function createDirectJudgeProvider(modelConfig) {
85
- const provider = createRuntimeProvider(modelConfig);
86
- const tracked = {
87
- totalInputTokens: 0,
88
- totalOutputTokens: 0,
89
- judge: async (prompt) => {
90
- try {
91
- const response = await provider.chat({
92
- model: modelConfig.model,
93
- systemPrompt: 'You are an eval judge. Be concise.',
94
- messages: [{ role: 'user', content: prompt }],
95
- tools: [],
96
- maxTokens: 256,
97
- });
98
- const text = response.content
99
- .filter((b) => b.type === 'text')
100
- .map((b) => b.text)
101
- .join('');
102
- if (response.usage) {
103
- tracked.totalInputTokens += response.usage.inputTokens + (response.usage.cacheReadInputTokens ?? 0) + (response.usage.cacheCreationInputTokens ?? 0);
104
- tracked.totalOutputTokens += response.usage.outputTokens;
105
- }
106
- return text;
107
- }
108
- catch (err) {
109
- return `Judge error: ${err instanceof Error ? err.message : String(err)}`;
110
- }
111
- },
112
- };
113
- return tracked;
114
- }
115
- function writeSSE(res, data) {
116
- res.write(`data: ${JSON.stringify(data)}\n\n`);
117
- }
118
- export function createEvalRouter(options) {
119
- const router = Router();
120
- /** List eval definitions from the repo */
121
- router.get('/api/evals/suites', (_req, res) => {
122
- const repo = options.getBundle();
123
- if (!repo) {
124
- res.status(500).json({ error: 'No bundle available' });
125
- return;
126
- }
127
- const suites = repo.evals.map((e) => ({
128
- name: e.name,
129
- title: e.title,
130
- description: e.description,
131
- query: e.query,
132
- assertionCount: e.assertions.length,
133
- assertions: e.assertions.map((a) => ({ text: a.text, negated: a.negated })),
134
- location: e.location,
135
- }));
136
- res.json({ suites });
137
- });
138
- /** List saved eval runs */
139
- router.get('/api/evals/runs', (_req, res) => {
140
- const runs = options.evalStore.list();
141
- res.json({ runs });
142
- });
143
- /** Get a single eval run */
144
- router.get('/api/evals/runs/:id', (req, res) => {
145
- const run = options.evalStore.load(req.params['id'] ?? '');
146
- if (!run) {
147
- res.status(404).json({ error: 'Run not found' });
148
- return;
149
- }
150
- res.json(run);
151
- });
152
- /** Delete an eval run */
153
- router.delete('/api/evals/runs/:id', (req, res) => {
154
- const deleted = options.evalStore.delete(req.params['id'] ?? '');
155
- if (!deleted) {
156
- res.status(404).json({ error: 'Run not found' });
157
- return;
158
- }
159
- res.json({ ok: true });
160
- });
161
- /** Run eval suite — SSE stream with full per-eval results */
162
- router.post('/api/evals/run', asyncHandler(async (req, res) => {
163
- const port = options.getPort();
164
- if (!port) {
165
- res.status(503).json({ error: 'Server not ready' });
166
- return;
167
- }
168
- const baseUrl = `http://127.0.0.1:${port}`;
169
- const repo = options.getBundle();
170
- if (!repo) {
171
- res.status(500).json({ error: 'No bundle available' });
172
- return;
173
- }
174
- // Read optional eval names and model override from POST body
175
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- request body
176
- const body = (req.body ?? {});
177
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- request body
178
- const evalNames = body['evalNames'];
179
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- request body
180
- const modelOverride = body['model'];
181
- let evals = repo.evals;
182
- if (evalNames && evalNames.length > 0) {
183
- evals = evals.filter((e) => evalNames.includes(e.name));
184
- }
185
- if (evals.length === 0) {
186
- res.status(400).json({ error: 'No evals defined' });
187
- return;
188
- }
189
- res.writeHead(200, {
190
- 'Content-Type': 'text/event-stream',
191
- 'Cache-Control': 'no-cache',
192
- 'Connection': 'keep-alive',
193
- });
194
- // Save original model config for restoration after override
195
- const originalModelConfig = repo.config?.models?.['main'];
196
- // If model override provided, swap main model config
197
- if (modelOverride && repo.config?.models) {
198
- repo.config.models['main'] = {
199
- provider: modelOverride.provider,
200
- model: modelOverride.model,
201
- };
202
- }
203
- const evalSessionId = `eval-${Date.now()}`;
204
- const judgeProvider = createDirectJudgeProvider(originalModelConfig);
205
- const modelInfo = repo.config ? {
206
- provider: repo.config.models?.['main']?.provider ?? 'unknown',
207
- model: repo.config.models?.['main']?.model ?? 'unknown',
208
- } : { provider: 'unknown', model: 'unknown' };
209
- const results = [];
210
- const perCaseCosts = [];
211
- const startTime = Date.now();
212
- // Restore original model before judging so judge uses the original model
213
- const restoreModel = () => {
214
- if (originalModelConfig && repo.config?.models) {
215
- repo.config.models['main'] = originalModelConfig;
216
- }
217
- };
218
- for (let i = 0; i < evals.length; i++) {
219
- const ev = evals[i];
220
- writeSSE(res, { type: 'eval_start', evalName: ev.name, current: i + 1, total: evals.length });
221
- const evalStart = Date.now();
222
- try {
223
- // Run the query — streams agent events to client
224
- const { response, toolCalls, toolResults, usage, error: queryError } = await streamQuery(baseUrl, ev.query, res, ev.name, ev.setup.app, evalSessionId);
225
- // Restore original model for judging
226
- restoreModel();
227
- let assertions = [];
228
- let passed = false;
229
- let judgeCost;
230
- // Skip judging if query had an error
231
- if (!queryError) {
232
- // Build enriched response for the judge — include tool results so it knows data was fetched
233
- let enriched = response;
234
- if (toolCalls.length > 0) {
235
- enriched += '\n\n[Tool calls made: ' + toolCalls.map((tc) => `${tc.name}(${JSON.stringify(tc.parameters)})`).join(', ') + ']';
236
- }
237
- if (toolResults.length > 0) {
238
- enriched += '\n\n[Tool results received:\n' + toolResults.join('\n') + ']';
239
- }
240
- // Judge assertions — track judge tokens separately
241
- const judgeInputBefore = judgeProvider.totalInputTokens;
242
- const judgeOutputBefore = judgeProvider.totalOutputTokens;
243
- assertions = await judgeAllAssertions(enriched, ev.assertions, judgeProvider);
244
- passed = assertions.every((a) => a.passed);
245
- const judgeInputUsed = judgeProvider.totalInputTokens - judgeInputBefore;
246
- const judgeOutputUsed = judgeProvider.totalOutputTokens - judgeOutputBefore;
247
- judgeCost = judgeInputUsed > 0 ? computeEvalCost(judgeInputUsed, judgeOutputUsed, originalModelConfig?.model ?? modelInfo.model) : undefined;
248
- }
249
- // Re-apply model override for next eval query
250
- if (modelOverride && repo.config?.models) {
251
- repo.config.models['main'] = {
252
- provider: modelOverride.provider,
253
- model: modelOverride.model,
254
- };
255
- }
256
- const queryCost = usage ? computeEvalCost(usage.inputTokens, usage.outputTokens, modelInfo.model, usage.cacheReadInputTokens, usage.cacheCreationInputTokens) : undefined;
257
- if (queryCost)
258
- perCaseCosts.push(queryCost);
259
- const result = {
260
- eval: ev,
261
- response,
262
- toolCalls,
263
- assertions,
264
- passed,
265
- durationMs: Date.now() - evalStart,
266
- cost: queryCost,
267
- ...(queryError ? { error: queryError } : {}),
268
- };
269
- results.push(result);
270
- // Send full result with eval_complete — separate query and judge costs
271
- writeSSE(res, {
272
- type: 'eval_complete',
273
- evalName: ev.name,
274
- passed,
275
- current: i + 1,
276
- total: evals.length,
277
- result: {
278
- response,
279
- toolCalls,
280
- toolResults,
281
- assertions,
282
- durationMs: result.durationMs,
283
- queryCost,
284
- judgeCost,
285
- ...(queryError ? { error: queryError } : {}),
286
- },
287
- });
288
- }
289
- catch (err) {
290
- const msg = err instanceof Error ? err.message : String(err);
291
- // Restore original model on error too
292
- restoreModel();
293
- if (modelOverride && repo.config?.models) {
294
- repo.config.models['main'] = {
295
- provider: modelOverride.provider,
296
- model: modelOverride.model,
297
- };
298
- }
299
- const result = {
300
- eval: ev,
301
- response: '',
302
- toolCalls: [],
303
- assertions: [],
304
- passed: false,
305
- durationMs: Date.now() - evalStart,
306
- error: msg,
307
- };
308
- results.push(result);
309
- writeSSE(res, {
310
- type: 'eval_complete',
311
- evalName: ev.name,
312
- passed: false,
313
- current: i + 1,
314
- total: evals.length,
315
- result: { response: '', toolCalls: [], assertions: [], durationMs: result.durationMs, error: msg },
316
- });
317
- }
318
- }
319
- // Restore original model config
320
- restoreModel();
321
- // Build suite result
322
- const totalCost = perCaseCosts.length > 0 ? aggregateRunCost(perCaseCosts) : undefined;
323
- const suiteResult = {
324
- results,
325
- totalPassed: results.filter((r) => r.passed).length,
326
- totalFailed: results.filter((r) => !r.passed).length,
327
- totalSkipped: 0,
328
- totalDurationMs: Date.now() - startTime,
329
- totalCost,
330
- model: modelInfo,
331
- timestamp: new Date().toISOString(),
332
- };
333
- const run = buildEvalRun(suiteResult, modelInfo, { orgId: 'local', triggeredBy: 'manual' });
334
- options.evalStore.save(run); // eslint-disable-line @typescript-eslint/no-unsafe-type-assertion
335
- // Suppress unused variable warnings
336
- void evalSessionId;
337
- writeSSE(res, { type: 'run_complete', run });
338
- writeSSE(res, { type: 'done' });
339
- res.end();
340
- }));
341
- /** Get eval history for a specific eval */
342
- router.get('/api/evals/runs/by-eval/:evalName', (req, res) => {
343
- const evalName = req.params['evalName'] ?? '';
344
- const entries = options.evalStore.listByEval(evalName);
345
- res.json({ entries });
346
- });
347
- /** Get arena model config */
348
- router.get('/api/evals/arena/models', (_req, res) => {
349
- const repo = options.getBundle();
350
- if (!repo) {
351
- res.status(500).json({ error: 'No bundle available' });
352
- return;
353
- }
354
- const config = repo.config;
355
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- config shape
356
- const rawConfig = config;
357
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- config shape
358
- const arena = rawConfig['arena'];
359
- // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- config shape
360
- const configModels = arena?.['models'];
361
- const models = configModels ?? [
362
- { provider: 'anthropic', model: 'claude-opus-4-6', label: 'Claude Opus 4.6' },
363
- { provider: 'anthropic', model: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6' },
364
- { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', label: 'Claude Haiku 4.5' },
365
- { provider: 'openai', model: 'gpt-4o', label: 'GPT-4o' },
366
- { provider: 'openai', model: 'gpt-4o-mini', label: 'GPT-4o Mini' },
367
- { provider: 'openai', model: 'gpt-4.1', label: 'GPT-4.1' },
368
- { provider: 'openai', model: 'gpt-4.1-mini', label: 'GPT-4.1 Mini' },
369
- { provider: 'google', model: 'gemini-2.5-pro', label: 'Gemini 2.5 Pro' },
370
- { provider: 'google', model: 'gemini-2.5-flash', label: 'Gemini 2.5 Flash' },
371
- { provider: 'deepseek', model: 'deepseek-chat', label: 'DeepSeek Chat' },
372
- { provider: 'deepseek', model: 'deepseek-reasoner', label: 'DeepSeek Reasoner' },
373
- { provider: 'groq', model: 'llama-3.3-70b-versatile', label: 'Llama 3.3 70B (Groq)' },
374
- { provider: 'groq', model: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B (Groq)' },
375
- { provider: 'groq', model: 'meta-llama/llama-4-scout-17b-16e-instruct', label: 'Llama 4 Scout (Groq)' },
376
- { provider: 'groq', model: 'qwen/qwen3-32b', label: 'Qwen 3 32B (Groq)' },
377
- { provider: 'groq', model: 'moonshotai/kimi-k2-instruct', label: 'Kimi K2 (Groq)' },
378
- { provider: 'groq', model: 'openai/gpt-oss-120b', label: 'GPT-OSS 120B (Groq)' },
379
- { provider: 'mistral', model: 'mistral-large-latest', label: 'Mistral Large' },
380
- { provider: 'mistral', model: 'mistral-small-latest', label: 'Mistral Small' },
381
- { provider: 'mistral', model: 'codestral-latest', label: 'Codestral' },
382
- { provider: 'xai', model: 'grok-3', label: 'Grok 3' },
383
- { provider: 'xai', model: 'grok-3-mini', label: 'Grok 3 Mini' },
384
- ];
385
- res.json({ models });
386
- });
387
- return router;
388
- }
389
- //# sourceMappingURL=evals.js.map