opc-agent 4.0.44 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +20 -20
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +14 -14
  3. package/.github/PULL_REQUEST_TEMPLATE.md +13 -13
  4. package/CHANGELOG.md +48 -48
  5. package/CONTRIBUTING.md +36 -36
  6. package/README.zh-CN.md +497 -497
  7. package/dist/channels/wechat.js +6 -6
  8. package/dist/cli.js +2 -2
  9. package/dist/core/runtime.js +18 -0
  10. package/dist/deploy/index.js +56 -56
  11. package/dist/providers/index.js +39 -13
  12. package/dist/studio/server.js +211 -20
  13. package/dist/studio-ui/index.html +279 -24
  14. package/dist/ui/components.js +105 -105
  15. package/examples/README.md +22 -22
  16. package/examples/basic-agent.ts +90 -90
  17. package/examples/brain-integration.ts +71 -71
  18. package/examples/multi-channel.ts +74 -74
  19. package/fix-sidebar.mjs +188 -188
  20. package/install.ps1 +154 -154
  21. package/install.sh +164 -164
  22. package/package.json +1 -1
  23. package/scripts/install.ps1 +31 -31
  24. package/scripts/install.sh +40 -40
  25. package/serve-studio.js +13 -13
  26. package/serve-test.js +25 -25
  27. package/src/channels/dingtalk.ts +46 -46
  28. package/src/channels/email.ts +351 -351
  29. package/src/channels/feishu.ts +349 -349
  30. package/src/channels/googlechat.ts +42 -42
  31. package/src/channels/imessage.ts +31 -31
  32. package/src/channels/irc.ts +82 -82
  33. package/src/channels/line.ts +32 -32
  34. package/src/channels/matrix.ts +33 -33
  35. package/src/channels/mattermost.ts +57 -57
  36. package/src/channels/msteams.ts +32 -32
  37. package/src/channels/nostr.ts +32 -32
  38. package/src/channels/qq.ts +33 -33
  39. package/src/channels/signal.ts +32 -32
  40. package/src/channels/sms.ts +33 -33
  41. package/src/channels/telegram.ts +616 -616
  42. package/src/channels/twitch.ts +65 -65
  43. package/src/channels/voice-call.ts +100 -100
  44. package/src/channels/websocket.ts +399 -399
  45. package/src/channels/wechat.ts +329 -329
  46. package/src/channels/whatsapp.ts +32 -32
  47. package/src/cli/chat.ts +99 -99
  48. package/src/cli/setup.ts +314 -314
  49. package/src/cli.ts +2 -2
  50. package/src/core/agent.ts +476 -476
  51. package/src/core/api-server.ts +277 -277
  52. package/src/core/audio.ts +98 -98
  53. package/src/core/collaboration.ts +275 -275
  54. package/src/core/context-discovery.ts +85 -85
  55. package/src/core/context-refs.ts +140 -140
  56. package/src/core/gateway.ts +106 -106
  57. package/src/core/heartbeat.ts +51 -51
  58. package/src/core/hooks.ts +105 -105
  59. package/src/core/ide-bridge.ts +133 -133
  60. package/src/core/node-network.ts +86 -86
  61. package/src/core/profiles.ts +122 -122
  62. package/src/core/runtime.ts +18 -0
  63. package/src/core/scheduler.ts +187 -187
  64. package/src/core/session-manager.ts +137 -137
  65. package/src/core/subagent.ts +98 -98
  66. package/src/core/vision.ts +180 -180
  67. package/src/core/workflow-graph.ts +365 -365
  68. package/src/daemon.ts +96 -96
  69. package/src/deploy/index.ts +255 -255
  70. package/src/doctor.ts +156 -156
  71. package/src/eval/index.ts +211 -211
  72. package/src/eval/suites/basic.json +16 -16
  73. package/src/eval/suites/memory.json +12 -12
  74. package/src/eval/suites/safety.json +14 -14
  75. package/src/hub/brain-seed.ts +54 -54
  76. package/src/hub/client.ts +60 -60
  77. package/src/mcp/servers/calculator-mcp.ts +65 -65
  78. package/src/mcp/servers/crypto-mcp.ts +73 -73
  79. package/src/mcp/servers/database-mcp.ts +72 -72
  80. package/src/mcp/servers/datetime-mcp.ts +69 -69
  81. package/src/mcp/servers/filesystem.ts +66 -66
  82. package/src/mcp/servers/github-mcp.ts +58 -58
  83. package/src/mcp/servers/index.ts +63 -63
  84. package/src/mcp/servers/json-mcp.ts +102 -102
  85. package/src/mcp/servers/memory-mcp.ts +56 -56
  86. package/src/mcp/servers/regex-mcp.ts +53 -53
  87. package/src/mcp/servers/web-mcp.ts +49 -49
  88. package/src/memory/context-compressor.ts +189 -189
  89. package/src/memory/seed-loader.ts +212 -212
  90. package/src/memory/user-profiler.ts +215 -215
  91. package/src/plugins/content-filter.ts +23 -23
  92. package/src/plugins/logger.ts +18 -18
  93. package/src/plugins/rate-limiter.ts +38 -38
  94. package/src/protocols/a2a/client.ts +132 -132
  95. package/src/protocols/a2a/index.ts +8 -8
  96. package/src/protocols/a2a/server.ts +333 -333
  97. package/src/protocols/a2a/types.ts +88 -88
  98. package/src/protocols/a2a/utils.ts +50 -50
  99. package/src/protocols/agui/client.ts +83 -83
  100. package/src/protocols/agui/index.ts +4 -4
  101. package/src/protocols/agui/server.ts +218 -218
  102. package/src/protocols/agui/types.ts +153 -153
  103. package/src/protocols/index.ts +2 -2
  104. package/src/protocols/mcp/agent-tools.ts +134 -134
  105. package/src/protocols/mcp/index.ts +8 -8
  106. package/src/protocols/mcp/server.ts +262 -262
  107. package/src/protocols/mcp/types.ts +69 -69
  108. package/src/providers/index.ts +632 -608
  109. package/src/publish/index.ts +376 -376
  110. package/src/scheduler/cron-engine.ts +191 -191
  111. package/src/scheduler/index.ts +2 -2
  112. package/src/schema/oad.ts +217 -217
  113. package/src/security/approval.ts +131 -131
  114. package/src/security/approvals.ts +143 -143
  115. package/src/security/elevated.ts +105 -105
  116. package/src/security/guardrails.ts +248 -248
  117. package/src/security/index.ts +9 -9
  118. package/src/security/keys.ts +87 -87
  119. package/src/security/secrets.ts +129 -129
  120. package/src/skills/builtin/index.ts +408 -408
  121. package/src/skills/marketplace.ts +113 -113
  122. package/src/skills/types.ts +42 -42
  123. package/src/studio/server.ts +209 -22
  124. package/src/studio/templates-data.ts +178 -178
  125. package/src/studio-ui/index.html +279 -24
  126. package/src/telemetry/index.ts +324 -324
  127. package/src/tools/builtin/browser.ts +299 -299
  128. package/src/tools/builtin/datetime.ts +41 -41
  129. package/src/tools/builtin/file.ts +107 -107
  130. package/src/tools/builtin/home-assistant.ts +116 -116
  131. package/src/tools/builtin/rl-tools.ts +243 -243
  132. package/src/tools/builtin/shell.ts +43 -43
  133. package/src/tools/builtin/vision.ts +64 -64
  134. package/src/tools/builtin/web-search.ts +126 -126
  135. package/src/tools/builtin/web.ts +35 -35
  136. package/src/tools/document-processor.ts +213 -213
  137. package/src/tools/image-generator.ts +150 -150
  138. package/src/tools/integrations/calendar.ts +73 -73
  139. package/src/tools/integrations/code-exec.ts +39 -39
  140. package/src/tools/integrations/csv-analyzer.ts +92 -92
  141. package/src/tools/integrations/database.ts +44 -44
  142. package/src/tools/integrations/email-send.ts +76 -76
  143. package/src/tools/integrations/git-tool.ts +42 -42
  144. package/src/tools/integrations/github-tool.ts +76 -76
  145. package/src/tools/integrations/image-gen.ts +56 -56
  146. package/src/tools/integrations/index.ts +92 -92
  147. package/src/tools/integrations/jira.ts +83 -83
  148. package/src/tools/integrations/notion.ts +71 -71
  149. package/src/tools/integrations/npm-tool.ts +48 -48
  150. package/src/tools/integrations/pdf-reader.ts +58 -58
  151. package/src/tools/integrations/slack.ts +65 -65
  152. package/src/tools/integrations/summarizer.ts +49 -49
  153. package/src/tools/integrations/translator.ts +48 -48
  154. package/src/tools/integrations/trello.ts +60 -60
  155. package/src/tools/integrations/vector-search.ts +42 -42
  156. package/src/tools/integrations/web-scraper.ts +47 -47
  157. package/src/tools/integrations/web-search.ts +58 -58
  158. package/src/tools/integrations/webhook.ts +38 -38
  159. package/src/tools/mcp-client.ts +131 -131
  160. package/src/tools/web-scraper.ts +179 -179
  161. package/src/tools/web-search.ts +180 -180
  162. package/src/ui/components.ts +127 -127
  163. package/srv-out.txt +1 -1
  164. package/templates/ecommerce-assistant/README.md +45 -45
  165. package/templates/ecommerce-assistant/oad.yaml +47 -47
  166. package/templates/tech-support/README.md +43 -43
  167. package/templates/tech-support/oad.yaml +45 -45
  168. package/test-agent/Dockerfile +9 -9
  169. package/test-agent/README.md +50 -50
  170. package/test-agent/agent.yaml +23 -23
  171. package/test-agent/docker-compose.yml +11 -11
  172. package/test-agent/oad.yaml +31 -31
  173. package/test-agent/package-lock.json +1492 -1492
  174. package/test-agent/package.json +17 -17
  175. package/test-agent/src/index.ts +24 -24
  176. package/test-agent/src/skills/echo.ts +15 -15
  177. package/test-agent/tsconfig.json +24 -24
  178. package/test-full.js +43 -43
  179. package/test-sidebar.js +22 -22
  180. package/test-studio3.js +75 -75
  181. package/test-studio4.js +41 -41
  182. package/tests/a2a-protocol.test.ts +285 -285
  183. package/tests/agui-protocol.test.ts +246 -246
  184. package/tests/api-server.test.ts +148 -148
  185. package/tests/approvals.test.ts +89 -89
  186. package/tests/audio.test.ts +40 -40
  187. package/tests/brain-seed-extended.test.ts +490 -490
  188. package/tests/brain-seed.test.ts +239 -239
  189. package/tests/browser.test.ts +179 -179
  190. package/tests/channels/discord.test.ts +79 -79
  191. package/tests/channels/email.test.ts +148 -148
  192. package/tests/channels/feishu.test.ts +123 -123
  193. package/tests/channels/telegram.test.ts +129 -129
  194. package/tests/channels/websocket.test.ts +53 -53
  195. package/tests/channels/wechat.test.ts +170 -170
  196. package/tests/channels-extra.test.ts +45 -45
  197. package/tests/chat-cli.test.ts +160 -160
  198. package/tests/cli.test.ts +46 -46
  199. package/tests/context-compressor.test.ts +172 -172
  200. package/tests/context-refs.test.ts +121 -121
  201. package/tests/cron-engine.test.ts +101 -101
  202. package/tests/daemon.test.ts +135 -135
  203. package/tests/deepbrain-wire.test.ts +234 -234
  204. package/tests/deploy-and-dag.test.ts +196 -196
  205. package/tests/doctor.test.ts +38 -38
  206. package/tests/document-processor.test.ts +69 -69
  207. package/tests/e2e-nocode.test.ts +442 -442
  208. package/tests/elevated.test.ts +69 -69
  209. package/tests/eval.test.ts +173 -173
  210. package/tests/gateway.test.ts +63 -63
  211. package/tests/guardrails.test.ts +177 -177
  212. package/tests/home-assistant.test.ts +40 -40
  213. package/tests/hooks.test.ts +79 -79
  214. package/tests/ide-bridge.test.ts +38 -38
  215. package/tests/image-generator.test.ts +84 -84
  216. package/tests/init-role.test.ts +124 -124
  217. package/tests/integrations.test.ts +249 -249
  218. package/tests/mcp-client.test.ts +92 -92
  219. package/tests/mcp-server.test.ts +178 -178
  220. package/tests/mcp-servers.test.ts +260 -260
  221. package/tests/node-network.test.ts +74 -74
  222. package/tests/plugin-a2a-enhanced.test.ts +230 -230
  223. package/tests/profiles.test.ts +61 -61
  224. package/tests/publish.test.ts +231 -231
  225. package/tests/rl-tools.test.ts +93 -93
  226. package/tests/sandbox-manager.test.ts +46 -46
  227. package/tests/scheduler.test.ts +200 -200
  228. package/tests/secrets.test.ts +107 -107
  229. package/tests/security-enhanced.test.ts +233 -233
  230. package/tests/settings-api.test.ts +148 -148
  231. package/tests/setup.test.ts +73 -73
  232. package/tests/subagent.test.ts +193 -193
  233. package/tests/telegram-discord.test.ts +60 -60
  234. package/tests/telemetry.test.ts +186 -186
  235. package/tests/user-profiler.test.ts +169 -169
  236. package/tests/v090-features.test.ts +254 -254
  237. package/tests/vision.test.ts +61 -61
  238. package/tests/voice-call.test.ts +47 -47
  239. package/tests/voice-enhanced.test.ts +169 -169
  240. package/tests/voice-interaction.test.ts +38 -38
  241. package/tests/web-search.test.ts +155 -155
  242. package/tests/workflow-graph.test.ts +279 -279
  243. package/tutorial/customer-service-agent/README.md +612 -612
  244. package/tutorial/customer-service-agent/SOUL.md +26 -26
  245. package/tutorial/customer-service-agent/agent.yaml +63 -63
  246. package/tutorial/customer-service-agent/package.json +19 -19
  247. package/tutorial/customer-service-agent/src/index.ts +69 -69
  248. package/tutorial/customer-service-agent/src/skills/faq.ts +27 -27
  249. package/tutorial/customer-service-agent/src/skills/ticket.ts +22 -22
  250. package/tutorial/customer-service-agent/tsconfig.json +14 -14
@@ -1,69 +1,69 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { ElevatedManager } from '../src/security/elevated';
3
-
4
- describe('ElevatedManager', () => {
5
- it('should default to ask mode', () => {
6
- const mgr = new ElevatedManager();
7
- expect(mgr.getMode()).toBe('ask');
8
- expect(mgr.isElevated()).toBe(false);
9
- mgr.destroy();
10
- });
11
-
12
- it('should elevate and revoke', () => {
13
- const mgr = new ElevatedManager();
14
- expect(mgr.elevate('test')).toBe(true);
15
- expect(mgr.isElevated()).toBe(true);
16
- mgr.revoke();
17
- expect(mgr.isElevated()).toBe(false);
18
- mgr.destroy();
19
- });
20
-
21
- it('should not elevate in off mode', () => {
22
- const mgr = new ElevatedManager({ mode: 'off' });
23
- expect(mgr.elevate()).toBe(false);
24
- expect(mgr.isElevated()).toBe(false);
25
- mgr.destroy();
26
- });
27
-
28
- it('should allow commands in allowedCommands list', () => {
29
- const mgr = new ElevatedManager({ allowedCommands: [/^git\s/] });
30
- expect(mgr.isCommandAllowed('git pull')).toBe(true);
31
- expect(mgr.isCommandAllowed('rm -rf /')).toBe(false);
32
- mgr.destroy();
33
- });
34
-
35
- it('should auto-execute in on mode', () => {
36
- const mgr = new ElevatedManager({ mode: 'on' });
37
- const result = mgr.canExecute('sudo reboot');
38
- expect(result.allowed).toBe(true);
39
- expect(mgr.isElevated()).toBe(true);
40
- mgr.destroy();
41
- });
42
-
43
- it('should require elevation in ask mode when not elevated', () => {
44
- const mgr = new ElevatedManager({ mode: 'ask' });
45
- const result = mgr.canExecute('sudo reboot');
46
- expect(result.allowed).toBe(false);
47
- expect(result.needsElevation).toBe(true);
48
- mgr.destroy();
49
- });
50
-
51
- it('should allow execution in ask mode when elevated', () => {
52
- const mgr = new ElevatedManager({ mode: 'ask' });
53
- mgr.elevate();
54
- const result = mgr.canExecute('sudo reboot');
55
- expect(result.allowed).toBe(true);
56
- mgr.destroy();
57
- });
58
-
59
- it('should keep audit log', () => {
60
- const mgr = new ElevatedManager();
61
- mgr.elevate('testing');
62
- mgr.revoke('done');
63
- const log = mgr.getAuditLog();
64
- expect(log).toHaveLength(2);
65
- expect(log[0].action).toBe('elevate');
66
- expect(log[1].action).toBe('revoke');
67
- mgr.destroy();
68
- });
69
- });
1
+ import { describe, it, expect } from 'vitest';
2
+ import { ElevatedManager } from '../src/security/elevated';
3
+
4
+ describe('ElevatedManager', () => {
5
+ it('should default to ask mode', () => {
6
+ const mgr = new ElevatedManager();
7
+ expect(mgr.getMode()).toBe('ask');
8
+ expect(mgr.isElevated()).toBe(false);
9
+ mgr.destroy();
10
+ });
11
+
12
+ it('should elevate and revoke', () => {
13
+ const mgr = new ElevatedManager();
14
+ expect(mgr.elevate('test')).toBe(true);
15
+ expect(mgr.isElevated()).toBe(true);
16
+ mgr.revoke();
17
+ expect(mgr.isElevated()).toBe(false);
18
+ mgr.destroy();
19
+ });
20
+
21
+ it('should not elevate in off mode', () => {
22
+ const mgr = new ElevatedManager({ mode: 'off' });
23
+ expect(mgr.elevate()).toBe(false);
24
+ expect(mgr.isElevated()).toBe(false);
25
+ mgr.destroy();
26
+ });
27
+
28
+ it('should allow commands in allowedCommands list', () => {
29
+ const mgr = new ElevatedManager({ allowedCommands: [/^git\s/] });
30
+ expect(mgr.isCommandAllowed('git pull')).toBe(true);
31
+ expect(mgr.isCommandAllowed('rm -rf /')).toBe(false);
32
+ mgr.destroy();
33
+ });
34
+
35
+ it('should auto-execute in on mode', () => {
36
+ const mgr = new ElevatedManager({ mode: 'on' });
37
+ const result = mgr.canExecute('sudo reboot');
38
+ expect(result.allowed).toBe(true);
39
+ expect(mgr.isElevated()).toBe(true);
40
+ mgr.destroy();
41
+ });
42
+
43
+ it('should require elevation in ask mode when not elevated', () => {
44
+ const mgr = new ElevatedManager({ mode: 'ask' });
45
+ const result = mgr.canExecute('sudo reboot');
46
+ expect(result.allowed).toBe(false);
47
+ expect(result.needsElevation).toBe(true);
48
+ mgr.destroy();
49
+ });
50
+
51
+ it('should allow execution in ask mode when elevated', () => {
52
+ const mgr = new ElevatedManager({ mode: 'ask' });
53
+ mgr.elevate();
54
+ const result = mgr.canExecute('sudo reboot');
55
+ expect(result.allowed).toBe(true);
56
+ mgr.destroy();
57
+ });
58
+
59
+ it('should keep audit log', () => {
60
+ const mgr = new ElevatedManager();
61
+ mgr.elevate('testing');
62
+ mgr.revoke('done');
63
+ const log = mgr.getAuditLog();
64
+ expect(log).toHaveLength(2);
65
+ expect(log[0].action).toBe('elevate');
66
+ expect(log[1].action).toBe('revoke');
67
+ mgr.destroy();
68
+ });
69
+ });
@@ -1,173 +1,173 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { AgentEvaluator } from '../src/eval';
3
- import type { EvalCase, EvalSuite, EvalReport } from '../src/eval';
4
- import * as path from 'path';
5
- import * as fs from 'fs';
6
- import * as os from 'os';
7
-
8
- // Mock agent
9
- const mockAgent = {
10
- chat: async (input: string) => {
11
- if (!input) return 'Hello! How can I help?';
12
- if (input.includes('capital of France')) return 'The capital of France is Paris.';
13
- if (input.includes('Hello')) return 'Hello there! How can I help you?';
14
- return `Response to: ${input}`;
15
- },
16
- };
17
-
18
- describe('AgentEvaluator', () => {
19
- const evaluator = new AgentEvaluator(mockAgent);
20
-
21
- it('should score exact match correctly', async () => {
22
- const result = await evaluator.evalCase({
23
- id: 'test-1',
24
- input: 'What is the capital of France?',
25
- expectedOutput: 'The capital of France is Paris.',
26
- });
27
- expect(result.scores.exact_match).toBe(1);
28
- expect(result.passed).toBe(true);
29
- });
30
-
31
- it('should score exact match failure', async () => {
32
- const result = await evaluator.evalCase({
33
- id: 'test-2',
34
- input: 'Hello!',
35
- expectedOutput: 'Goodbye!',
36
- });
37
- expect(result.scores.exact_match).toBe(0);
38
- expect(result.passed).toBe(false);
39
- });
40
-
41
- it('should score contains correctly', async () => {
42
- const result = await evaluator.evalCase({
43
- id: 'test-3',
44
- input: 'What is the capital of France?',
45
- expectedContains: ['Paris', 'capital'],
46
- });
47
- expect(result.scores.contains).toBe(1);
48
- expect(result.passed).toBe(true);
49
- });
50
-
51
- it('should score partial contains', async () => {
52
- const result = await evaluator.evalCase({
53
- id: 'test-4',
54
- input: 'What is the capital of France?',
55
- expectedContains: ['Paris', 'London'],
56
- });
57
- expect(result.scores.contains).toBe(0.5);
58
- expect(result.passed).toBe(true); // 0.5 >= 0.5 threshold
59
- });
60
-
61
- it('should score not_contains correctly', async () => {
62
- const result = await evaluator.evalCase({
63
- id: 'test-5',
64
- input: 'Hello!',
65
- expectedNotContains: ['error', 'crash'],
66
- });
67
- expect(result.scores.not_contains).toBe(1);
68
- expect(result.passed).toBe(true);
69
- });
70
-
71
- it('should score not_contains failure', async () => {
72
- const result = await evaluator.evalCase({
73
- id: 'test-6',
74
- input: 'Hello!',
75
- expectedNotContains: ['Hello', 'crash'],
76
- });
77
- expect(result.scores.not_contains).toBe(0.5);
78
- });
79
-
80
- it('should load suite from JSON', () => {
81
- const suitePath = path.join(__dirname, '..', 'src', 'eval', 'suites', 'basic.json');
82
- const suite = AgentEvaluator.loadSuite(suitePath);
83
- expect(suite.name).toBe('basic');
84
- expect(suite.cases.length).toBe(10);
85
- });
86
-
87
- it('should load all built-in suites', () => {
88
- const suites = AgentEvaluator.builtinSuites();
89
- expect(suites.length).toBeGreaterThanOrEqual(3);
90
- const names = suites.map(s => s.name);
91
- expect(names).toContain('basic');
92
- expect(names).toContain('safety');
93
- expect(names).toContain('memory');
94
- });
95
-
96
- it('should have correct case counts for built-in suites', () => {
97
- const suites = AgentEvaluator.builtinSuites();
98
- const basic = suites.find(s => s.name === 'basic');
99
- const safety = suites.find(s => s.name === 'safety');
100
- const memory = suites.find(s => s.name === 'memory');
101
- expect(basic?.caseCount).toBe(10);
102
- expect(safety?.caseCount).toBe(8);
103
- expect(memory?.caseCount).toBe(6);
104
- });
105
-
106
- it('should compare reports and detect regression', () => {
107
- const baseline: EvalReport = {
108
- suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
109
- results: [
110
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
111
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
112
- ],
113
- };
114
- const current: EvalReport = {
115
- suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
116
- results: [
117
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
118
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
119
- ],
120
- };
121
- const cmp = AgentEvaluator.compare(baseline, current);
122
- expect(cmp.regressed).toContain('b');
123
- expect(cmp.delta).toBe(-0.5);
124
- });
125
-
126
- it('should compare reports and detect improvement', () => {
127
- const baseline: EvalReport = {
128
- suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
129
- results: [
130
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
131
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
132
- ],
133
- };
134
- const current: EvalReport = {
135
- suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
136
- results: [
137
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
138
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
139
- ],
140
- };
141
- const cmp = AgentEvaluator.compare(baseline, current);
142
- expect(cmp.improved).toContain('b');
143
- expect(cmp.delta).toBe(0.5);
144
- });
145
-
146
- it('should save and load report', () => {
147
- const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'eval-'));
148
- const reportPath = path.join(tmpDir, 'report.json');
149
- const report: EvalReport = {
150
- suite: 'test', timestamp: new Date().toISOString(), totalCases: 1, passed: 1, failed: 0, passRate: 1, avgLatency: 5, p95Latency: 5, summary: 'ok',
151
- results: [{ caseId: 'x', input: 'hi', output: 'hello', scores: { latency_ms: 5 }, passed: true }],
152
- };
153
- AgentEvaluator.saveReport(report, reportPath);
154
- const loaded = JSON.parse(fs.readFileSync(reportPath, 'utf-8'));
155
- expect(loaded.suite).toBe('test');
156
- expect(loaded.results.length).toBe(1);
157
- fs.rmSync(tmpDir, { recursive: true });
158
- });
159
-
160
- it('should run evalSuite and produce report', async () => {
161
- const suite: EvalSuite = {
162
- name: 'mini',
163
- cases: [
164
- { id: 't1', input: 'Hello!', expectedContains: ['hello', 'hi'] },
165
- { id: 't2', input: 'What is the capital of France?', expectedContains: ['Paris'] },
166
- ],
167
- };
168
- const report = await evaluator.evalSuite(suite);
169
- expect(report.totalCases).toBe(2);
170
- expect(report.passRate).toBeGreaterThanOrEqual(0);
171
- expect(report.summary).toContain('mini');
172
- });
173
- });
1
+ import { describe, it, expect } from 'vitest';
2
+ import { AgentEvaluator } from '../src/eval';
3
+ import type { EvalCase, EvalSuite, EvalReport } from '../src/eval';
4
+ import * as path from 'path';
5
+ import * as fs from 'fs';
6
+ import * as os from 'os';
7
+
8
+ // Mock agent
9
+ const mockAgent = {
10
+ chat: async (input: string) => {
11
+ if (!input) return 'Hello! How can I help?';
12
+ if (input.includes('capital of France')) return 'The capital of France is Paris.';
13
+ if (input.includes('Hello')) return 'Hello there! How can I help you?';
14
+ return `Response to: ${input}`;
15
+ },
16
+ };
17
+
18
+ describe('AgentEvaluator', () => {
19
+ const evaluator = new AgentEvaluator(mockAgent);
20
+
21
+ it('should score exact match correctly', async () => {
22
+ const result = await evaluator.evalCase({
23
+ id: 'test-1',
24
+ input: 'What is the capital of France?',
25
+ expectedOutput: 'The capital of France is Paris.',
26
+ });
27
+ expect(result.scores.exact_match).toBe(1);
28
+ expect(result.passed).toBe(true);
29
+ });
30
+
31
+ it('should score exact match failure', async () => {
32
+ const result = await evaluator.evalCase({
33
+ id: 'test-2',
34
+ input: 'Hello!',
35
+ expectedOutput: 'Goodbye!',
36
+ });
37
+ expect(result.scores.exact_match).toBe(0);
38
+ expect(result.passed).toBe(false);
39
+ });
40
+
41
+ it('should score contains correctly', async () => {
42
+ const result = await evaluator.evalCase({
43
+ id: 'test-3',
44
+ input: 'What is the capital of France?',
45
+ expectedContains: ['Paris', 'capital'],
46
+ });
47
+ expect(result.scores.contains).toBe(1);
48
+ expect(result.passed).toBe(true);
49
+ });
50
+
51
+ it('should score partial contains', async () => {
52
+ const result = await evaluator.evalCase({
53
+ id: 'test-4',
54
+ input: 'What is the capital of France?',
55
+ expectedContains: ['Paris', 'London'],
56
+ });
57
+ expect(result.scores.contains).toBe(0.5);
58
+ expect(result.passed).toBe(true); // 0.5 >= 0.5 threshold
59
+ });
60
+
61
+ it('should score not_contains correctly', async () => {
62
+ const result = await evaluator.evalCase({
63
+ id: 'test-5',
64
+ input: 'Hello!',
65
+ expectedNotContains: ['error', 'crash'],
66
+ });
67
+ expect(result.scores.not_contains).toBe(1);
68
+ expect(result.passed).toBe(true);
69
+ });
70
+
71
+ it('should score not_contains failure', async () => {
72
+ const result = await evaluator.evalCase({
73
+ id: 'test-6',
74
+ input: 'Hello!',
75
+ expectedNotContains: ['Hello', 'crash'],
76
+ });
77
+ expect(result.scores.not_contains).toBe(0.5);
78
+ });
79
+
80
+ it('should load suite from JSON', () => {
81
+ const suitePath = path.join(__dirname, '..', 'src', 'eval', 'suites', 'basic.json');
82
+ const suite = AgentEvaluator.loadSuite(suitePath);
83
+ expect(suite.name).toBe('basic');
84
+ expect(suite.cases.length).toBe(10);
85
+ });
86
+
87
+ it('should load all built-in suites', () => {
88
+ const suites = AgentEvaluator.builtinSuites();
89
+ expect(suites.length).toBeGreaterThanOrEqual(3);
90
+ const names = suites.map(s => s.name);
91
+ expect(names).toContain('basic');
92
+ expect(names).toContain('safety');
93
+ expect(names).toContain('memory');
94
+ });
95
+
96
+ it('should have correct case counts for built-in suites', () => {
97
+ const suites = AgentEvaluator.builtinSuites();
98
+ const basic = suites.find(s => s.name === 'basic');
99
+ const safety = suites.find(s => s.name === 'safety');
100
+ const memory = suites.find(s => s.name === 'memory');
101
+ expect(basic?.caseCount).toBe(10);
102
+ expect(safety?.caseCount).toBe(8);
103
+ expect(memory?.caseCount).toBe(6);
104
+ });
105
+
106
+ it('should compare reports and detect regression', () => {
107
+ const baseline: EvalReport = {
108
+ suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
109
+ results: [
110
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
111
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
112
+ ],
113
+ };
114
+ const current: EvalReport = {
115
+ suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
116
+ results: [
117
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
118
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
119
+ ],
120
+ };
121
+ const cmp = AgentEvaluator.compare(baseline, current);
122
+ expect(cmp.regressed).toContain('b');
123
+ expect(cmp.delta).toBe(-0.5);
124
+ });
125
+
126
+ it('should compare reports and detect improvement', () => {
127
+ const baseline: EvalReport = {
128
+ suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
129
+ results: [
130
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
131
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
132
+ ],
133
+ };
134
+ const current: EvalReport = {
135
+ suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
136
+ results: [
137
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
138
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
139
+ ],
140
+ };
141
+ const cmp = AgentEvaluator.compare(baseline, current);
142
+ expect(cmp.improved).toContain('b');
143
+ expect(cmp.delta).toBe(0.5);
144
+ });
145
+
146
+ it('should save and load report', () => {
147
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'eval-'));
148
+ const reportPath = path.join(tmpDir, 'report.json');
149
+ const report: EvalReport = {
150
+ suite: 'test', timestamp: new Date().toISOString(), totalCases: 1, passed: 1, failed: 0, passRate: 1, avgLatency: 5, p95Latency: 5, summary: 'ok',
151
+ results: [{ caseId: 'x', input: 'hi', output: 'hello', scores: { latency_ms: 5 }, passed: true }],
152
+ };
153
+ AgentEvaluator.saveReport(report, reportPath);
154
+ const loaded = JSON.parse(fs.readFileSync(reportPath, 'utf-8'));
155
+ expect(loaded.suite).toBe('test');
156
+ expect(loaded.results.length).toBe(1);
157
+ fs.rmSync(tmpDir, { recursive: true });
158
+ });
159
+
160
+ it('should run evalSuite and produce report', async () => {
161
+ const suite: EvalSuite = {
162
+ name: 'mini',
163
+ cases: [
164
+ { id: 't1', input: 'Hello!', expectedContains: ['hello', 'hi'] },
165
+ { id: 't2', input: 'What is the capital of France?', expectedContains: ['Paris'] },
166
+ ],
167
+ };
168
+ const report = await evaluator.evalSuite(suite);
169
+ expect(report.totalCases).toBe(2);
170
+ expect(report.passRate).toBeGreaterThanOrEqual(0);
171
+ expect(report.summary).toContain('mini');
172
+ });
173
+ });
@@ -1,63 +1,63 @@
1
- import { describe, it, expect, beforeEach } from 'vitest';
2
- import { Gateway } from '../src/core/gateway';
3
-
4
- describe('Gateway', () => {
5
- let gw: Gateway;
6
-
7
- beforeEach(() => {
8
- gw = new Gateway({
9
- port: 3000,
10
- agents: [{ id: 'agent-1', name: 'Test Agent' }],
11
- channels: [{ id: 'ch-1', type: 'web' }],
12
- });
13
- });
14
-
15
- it('should start and stop', async () => {
16
- await gw.start();
17
- expect(gw.getStatus().agents).toBe(1);
18
- await gw.stop();
19
- });
20
-
21
- it('should throw on double start', async () => {
22
- await gw.start();
23
- await expect(gw.start()).rejects.toThrow('already running');
24
- await gw.stop();
25
- });
26
-
27
- it('should route messages', async () => {
28
- await gw.start();
29
- const agentId = await gw.routeMessage({ id: '1', content: 'hi', channel: 'web', timestamp: Date.now() }, 'web');
30
- expect(agentId).toBe('agent-1');
31
- await gw.stop();
32
- });
33
-
34
- it('should add and remove agents', async () => {
35
- gw.addAgent({ id: 'agent-2', name: 'Agent 2' });
36
- expect(gw.getStatus().agents).toBe(2);
37
- gw.removeAgent('agent-2');
38
- expect(gw.getStatus().agents).toBe(1);
39
- });
40
-
41
- it('should throw removing unknown agent', () => {
42
- expect(() => gw.removeAgent('unknown')).toThrow('not found');
43
- });
44
-
45
- it('should track status', async () => {
46
- await gw.start();
47
- const status = gw.getStatus();
48
- expect(status.agents).toBe(1);
49
- expect(status.channels).toBe(1);
50
- expect(status.messagesProcessed).toBe(0);
51
- expect(status.uptime).toBeGreaterThanOrEqual(0);
52
- await gw.stop();
53
- });
54
-
55
- it('should report metrics', async () => {
56
- await gw.start();
57
- await gw.routeMessage({ id: '1', content: 'test', channel: 'web', timestamp: Date.now() }, 'web');
58
- const metrics = gw.getMetrics();
59
- expect(metrics.messagesPerMinute).toBeGreaterThan(0);
60
- expect(metrics.errorRate).toBe(0);
61
- await gw.stop();
62
- });
63
- });
1
+ import { describe, it, expect, beforeEach } from 'vitest';
2
+ import { Gateway } from '../src/core/gateway';
3
+
4
+ describe('Gateway', () => {
5
+ let gw: Gateway;
6
+
7
+ beforeEach(() => {
8
+ gw = new Gateway({
9
+ port: 3000,
10
+ agents: [{ id: 'agent-1', name: 'Test Agent' }],
11
+ channels: [{ id: 'ch-1', type: 'web' }],
12
+ });
13
+ });
14
+
15
+ it('should start and stop', async () => {
16
+ await gw.start();
17
+ expect(gw.getStatus().agents).toBe(1);
18
+ await gw.stop();
19
+ });
20
+
21
+ it('should throw on double start', async () => {
22
+ await gw.start();
23
+ await expect(gw.start()).rejects.toThrow('already running');
24
+ await gw.stop();
25
+ });
26
+
27
+ it('should route messages', async () => {
28
+ await gw.start();
29
+ const agentId = await gw.routeMessage({ id: '1', content: 'hi', channel: 'web', timestamp: Date.now() }, 'web');
30
+ expect(agentId).toBe('agent-1');
31
+ await gw.stop();
32
+ });
33
+
34
+ it('should add and remove agents', async () => {
35
+ gw.addAgent({ id: 'agent-2', name: 'Agent 2' });
36
+ expect(gw.getStatus().agents).toBe(2);
37
+ gw.removeAgent('agent-2');
38
+ expect(gw.getStatus().agents).toBe(1);
39
+ });
40
+
41
+ it('should throw removing unknown agent', () => {
42
+ expect(() => gw.removeAgent('unknown')).toThrow('not found');
43
+ });
44
+
45
+ it('should track status', async () => {
46
+ await gw.start();
47
+ const status = gw.getStatus();
48
+ expect(status.agents).toBe(1);
49
+ expect(status.channels).toBe(1);
50
+ expect(status.messagesProcessed).toBe(0);
51
+ expect(status.uptime).toBeGreaterThanOrEqual(0);
52
+ await gw.stop();
53
+ });
54
+
55
+ it('should report metrics', async () => {
56
+ await gw.start();
57
+ await gw.routeMessage({ id: '1', content: 'test', channel: 'web', timestamp: Date.now() }, 'web');
58
+ const metrics = gw.getMetrics();
59
+ expect(metrics.messagesPerMinute).toBeGreaterThan(0);
60
+ expect(metrics.errorRate).toBe(0);
61
+ await gw.stop();
62
+ });
63
+ });